My agentic slop goes here. Not intended for anyone else!

morebushelrm

-9
stack/bushel/.gitignore
···
-
_build
-
.*.swp
-
**/.claude/settings.local.json
-
.photos-api
-
.karakeep-api
-
KARAKEEP.md
-
karakeep-src
-
.DS_Store
-
.openapi-key
-1
stack/bushel/.ocamlformat
···
-
profile=janestreet
-89
stack/bushel/README.md
···
-
# bushel - a reconstruction of livejournal for the 2020s
-
-
**wip spec, if you dont know what this is, its probably not ready for you yet.
-
it is extremely work in progress**
-
-
Bushel is an implementation of the classic ``webring'' of old times, when a
-
smallish group of people would collaborate to share content while maintaining
-
their own websites and online identities.
-
-
In a classic webring, each node would generate its own styled HTML site and
-
share data via RSS or Atom. Every node that is consuming data has to parse the
-
Atom feeds, figure out the peer's encodings, and then turn those back into
-
datastructures for the local site.
-
-
Bushel instead provides a simpler distributed datamodel that is hopefully more
-
maintainable in the long term. Bushel uses the a "resolvable Markdown" format
-
directly share filesystem based data structures across the webring. This avoids
-
having to roundtrip via Atom's XML, and allows direct sharing of typed versioned
-
datastructures across nodes owned by different people. Examples of such
-
datastructures include blog posts, wiki entries, social media feeds, academic
-
papers, events and so on. Every datastructure is versioned with migrations so
-
that older feeds can always be upgraded to newer feeds.
-
-
## Motivation
-
-
Working with open source communities in the modern world is a tremendous
-
undertaking due to the sheer number of channels and lines of communication we
-
have to deal with for even simple interactions. A single feature might begin
-
life in an email thread on a mailing list, then migrate to several GitHub
-
issues, show up on Hacker News for some random criticism, be pushed to a git
-
repository or three, be released onto a package manager, get threads on a
-
Discourse forum, be published as a research paper, have some Jupyter notebooks
-
released as a tutorial, and then a video of a conference talk released.
-
-
The above is a pretty common example of _one_ person's workflow, but open
-
source rarely happens solo these days. Instead, small groups of people
-
collaborate across these tasks, and this is where tracking the information flow
-
gets complex and manual across all the communication medium.
-
-
## Goals
-
-
Bushel is intended to provide a framework for:
-
-
- **individual** open source contributors to quickly write about their work,
-
and mirror it to some of these communication mediums
-
- **groups** of contributors to share content via bidirectional
-
links (e.g. release announcements or blog posts about work done together)
-
with the ability to work in private if they desire
-
- **organisations** to track feature flows among bigger groups of individuals
-
working together across many projects
-
- **projects** to pull together the contributions of individuals and
-
credit them appropriately, while providing a coherent feature flow
-
to others.
-
-
## Design
-
-
The authoring workflow requires minimal work for the users by:
-
-
- generating **content mirrored from online sources** and convert them
-
into a markdown format ready for human editing
-
- providing **markdown shortcuts** to easily reference other sources
-
and projects in the webring database (e.g. `@avsm` or `@avsm/bushel`)
-
- letting **humans quickly edit** those autogenerated files from their
-
perspective (e.g. a project might writeup a github release differently
-
from an individual that is proud of a particular feature they worked on)
-
- ensuring all html **endpoints are version controlled** like
-
a wiki, e.g. so users reference either the latest version of a blog post
-
or a particular revision.
-
- **bidirectional links** mean that if you reference someone elses content,
-
their site is rebuilt to reflect your link.
-
-
The serving workflow requires a custom server instead of just static
-
pages, since:
-
-
- nodes offer GitHub oAuth to offer per-group privacy settings for content
-
that shouldn't immediately be public. This allows, for example, draft
-
blog posts to be easily shared among authors and then go public at the
-
same time.
-
- some content can be private for a group indefinitely, for example design
-
discussions. Although open source is eventually released, most small
-
group design is easier when done in private without the whole Internet
-
giving their opinions.
-
- content can initially be served via HTTPS, but eventually will have bridges
-
to other mechanisms such as a CLI, and GraphQL endpoints of the datamodel
-
for custom clients.
-
-
## Schema
-
-
TODO filesystem schema for authoring
-127
stack/bushel/bin/bushel_bibtex.ml
···
-
open Cmdliner
-
open Printf
-
-
(** TODO:claude Generate bibtex entry from paper data *)
-
let generate_bibtex_entry paper =
-
let open Bushel.Paper in
-
(* Use slug as the bibtex key/label *)
-
let bibkey = slug paper in
-
let bibtype = try bibtype paper with _ -> "misc" in
-
let title = try title paper with _ -> "Untitled" in
-
let authors =
-
let auth_list = try authors paper with _ -> [] in
-
String.concat " and " auth_list
-
in
-
let year = try year paper with _ -> 0 in
-
-
(* Build the bibtex entry *)
-
let buf = Buffer.create 1024 in
-
Buffer.add_string buf (sprintf "@%s{%s,\n" bibtype bibkey);
-
Buffer.add_string buf (sprintf " title = {%s},\n" title);
-
Buffer.add_string buf (sprintf " author = {%s},\n" authors);
-
Buffer.add_string buf (sprintf " year = {%d}" year);
-
-
(* Add optional fields *)
-
(match String.lowercase_ascii bibtype with
-
| "article" ->
-
(try
-
Buffer.add_string buf (sprintf ",\n journal = {%s}" (journal paper))
-
with _ -> ());
-
(match volume paper with
-
| Some v -> Buffer.add_string buf (sprintf ",\n volume = {%s}" v)
-
| None -> ());
-
(match issue paper with
-
| Some i -> Buffer.add_string buf (sprintf ",\n number = {%s}" i)
-
| None -> ());
-
(match pages paper with
-
| "" -> ()
-
| p -> Buffer.add_string buf (sprintf ",\n pages = {%s}" p))
-
| "inproceedings" ->
-
(try
-
Buffer.add_string buf (sprintf ",\n booktitle = {%s}" (booktitle paper))
-
with _ -> ());
-
(match pages paper with
-
| "" -> ()
-
| p -> Buffer.add_string buf (sprintf ",\n pages = {%s}" p));
-
(match publisher paper with
-
| "" -> ()
-
| p -> Buffer.add_string buf (sprintf ",\n publisher = {%s}" p))
-
| "techreport" ->
-
(try
-
Buffer.add_string buf (sprintf ",\n institution = {%s}" (institution paper))
-
with _ -> ());
-
(match number paper with
-
| Some n -> Buffer.add_string buf (sprintf ",\n number = {%s}" n)
-
| None -> ())
-
| "book" ->
-
(match publisher paper with
-
| "" -> ()
-
| p -> Buffer.add_string buf (sprintf ",\n publisher = {%s}" p));
-
(try
-
Buffer.add_string buf (sprintf ",\n isbn = {%s}" (isbn paper))
-
with _ -> ())
-
| _ -> ());
-
-
(* Add DOI if available *)
-
(match doi paper with
-
| Some d -> Buffer.add_string buf (sprintf ",\n doi = {%s}" d)
-
| None -> ());
-
-
(* Add URL if available *)
-
(match url paper with
-
| Some u -> Buffer.add_string buf (sprintf ",\n url = {%s}" u)
-
| None -> ());
-
-
Buffer.add_string buf "\n}\n";
-
Buffer.contents buf
-
-
(** TODO:claude Main function to export bibtex for all papers *)
-
let export_bibtex base_dir output_file latest_only =
-
(* Load all papers *)
-
let bushel = Bushel.load base_dir in
-
let papers = Bushel.Entry.papers bushel in
-
-
(* Filter to only latest versions if requested *)
-
let papers =
-
if latest_only then
-
List.filter (fun p -> p.Bushel.Paper.latest) papers
-
else
-
papers
-
in
-
-
(* Sort papers by year (most recent first) *)
-
let papers = List.sort Bushel.Paper.compare papers in
-
-
(* Generate bibtex for each paper *)
-
let bibtex_entries = List.map generate_bibtex_entry papers in
-
let bibtex_content = String.concat "\n" bibtex_entries in
-
-
(* Output to file or stdout *)
-
match output_file with
-
| None ->
-
print_string bibtex_content;
-
0
-
| Some file ->
-
let oc = open_out file in
-
output_string oc bibtex_content;
-
close_out oc;
-
printf "Bibtex exported to %s (%d entries)\n" file (List.length papers);
-
0
-
-
(** TODO:claude Command line arguments *)
-
let output_file_arg =
-
let doc = "Output file for bibtex (defaults to stdout)" in
-
Arg.(value & opt (some string) None & info ["o"; "output"] ~docv:"FILE" ~doc)
-
-
let latest_only_arg =
-
let doc = "Export only the latest version of each paper" in
-
Arg.(value & flag & info ["latest"] ~doc)
-
-
(** TODO:claude Command term *)
-
let term =
-
Term.(const export_bibtex $ Bushel_common.base_dir $ output_file_arg $ latest_only_arg)
-
-
let cmd =
-
let doc = "Export bibtex for all papers" in
-
let info = Cmd.info "bibtex" ~doc in
-
Cmd.v info term
-96
stack/bushel/bin/bushel_common.ml
···
-
open Cmdliner
-
-
(** TODO:claude Get default base directory from BUSHEL_DATA env variable or current directory *)
-
let get_default_base_dir () =
-
match Sys.getenv_opt "BUSHEL_DATA" with
-
| Some dir -> dir
-
| None -> "."
-
-
(** TODO:claude Optional base directory term with BUSHEL_DATA env variable support *)
-
let base_dir =
-
let doc = "Base directory containing Bushel data (defaults to BUSHEL_DATA env var or current directory)" in
-
Arg.(value & opt dir (get_default_base_dir ()) & info ["d"; "dir"] ~docv:"DIR" ~doc)
-
-
(** TODO:claude Output directory as option *)
-
let output_dir ~default =
-
let doc = "Output directory for generated files" in
-
Arg.(value & opt string default & info ["o"; "output"] ~docv:"DIR" ~doc)
-
-
(** TODO:claude URL term with custom default *)
-
let url_term ~default ~doc =
-
Arg.(value & opt string default & info ["u"; "url"] ~docv:"URL" ~doc)
-
-
(** TODO:claude Overwrite flag *)
-
let overwrite =
-
let doc = "Overwrite existing files" in
-
Arg.(value & flag & info ["overwrite"] ~doc)
-
-
(** TODO:claude Verbose flag *)
-
let verbose =
-
let doc = "Enable verbose output" in
-
Arg.(value & flag & info ["v"; "verbose"] ~doc)
-
-
(** TODO:claude File path term *)
-
let file_term ~default ~doc =
-
Arg.(value & opt string default & info ["f"; "file"] ~docv:"FILE" ~doc)
-
-
(** TODO:claude Channel/handle term *)
-
let channel ~default =
-
let doc = "Channel or handle name" in
-
Arg.(value & opt string default & info ["c"; "channel"] ~docv:"CHANNEL" ~doc)
-
-
(** TODO:claude Optional handle term *)
-
let handle_opt =
-
let doc = "Process specific handle" in
-
Arg.(value & opt (some string) None & info ["h"; "handle"] ~docv:"HANDLE" ~doc)
-
-
(** TODO:claude Tag term for filtering *)
-
let tag =
-
let doc = "Tag to filter or apply" in
-
Arg.(value & opt (some string) None & info ["t"; "tag"] ~docv:"TAG" ~doc)
-
-
(** TODO:claude Limit term *)
-
let limit =
-
let doc = "Limit number of items to process" in
-
Arg.(value & opt (some int) None & info ["l"; "limit"] ~docv:"N" ~doc)
-
-
(** TODO:claude Setup logging with standard options *)
-
let setup_log style_renderer level =
-
Fmt_tty.setup_std_outputs ?style_renderer ();
-
Logs.set_level level;
-
Logs.set_reporter (Logs_fmt.reporter ());
-
()
-
-
(** TODO:claude Common setup term combining logs setup *)
-
let setup_term =
-
Term.(const setup_log $ Fmt_cli.style_renderer () $ Logs_cli.level ())
-
-
(** Keyeio integration for API credential management *)
-
-
(** Create XDG term for bushel *)
-
let xdg_term fs =
-
Xdge.Cmd.term "bushel" fs ()
-
-
(** Create keyeio term for Immiche service *)
-
let immiche_key_term fs =
-
Keyeio.Cmd.term
-
~app_name:"bushel"
-
~fs
-
~service:"immiche"
-
()
-
-
(** Create keyeio term for Karakeepe service *)
-
let karakeepe_key_term fs =
-
Keyeio.Cmd.term
-
~app_name:"bushel"
-
~fs
-
~service:"karakeepe"
-
()
-
-
(** Create keyeio term for Typesense service (includes both typesense and openai keys) *)
-
let typesense_key_term fs =
-
Keyeio.Cmd.term
-
~app_name:"bushel"
-
~fs
-
~service:"typesense"
-
()
-279
stack/bushel/bin/bushel_doi.ml
···
-
module ZT = Zotero_translation
-
module J = Ezjsonm
-
open Cmdliner
-
-
(* Extract all DOIs from notes by scanning for doi.org URLs *)
-
let extract_dois_from_notes notes =
-
let doi_url_pattern = Re.Perl.compile_pat "https?://(?:dx\\.)?doi\\.org/([^)\\s\"'>]+)" in
-
let dois = ref [] in
-
List.iter (fun note ->
-
let body = Bushel.Note.body note in
-
let matches = Re.all doi_url_pattern body in
-
List.iter (fun group ->
-
try
-
let encoded_doi = Re.Group.get group 1 in
-
let doi = Uri.pct_decode encoded_doi in
-
if not (List.mem doi !dois) then
-
dois := doi :: !dois
-
with _ -> ()
-
) matches
-
) notes;
-
!dois
-
-
(* Extract publisher URLs from notes (Elsevier, Nature, ACM, Sage, UPenn, Springer, Taylor & Francis) *)
-
let extract_publisher_urls_from_notes notes =
-
(* Matches publisher URLs: linkinghub.elsevier.com, nature.com, journals.sagepub.com, garfield.library.upenn.edu, link.springer.com, tandfonline.com/doi, and dl.acm.org/doi/10.* URLs *)
-
let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in
-
let urls = ref [] in
-
List.iter (fun note ->
-
let body = Bushel.Note.body note in
-
let matches = Re.all publisher_pattern body in
-
List.iter (fun group ->
-
try
-
let url = Re.Group.get group 0 in
-
if not (List.mem url !urls) then
-
urls := url :: !urls
-
with _ -> ()
-
) matches
-
) notes;
-
!urls
-
-
(* Resolve a single DOI via Zotero and convert to doi_entry *)
-
let resolve_doi zt ~verbose doi =
-
Printf.printf "Resolving DOI: %s\n%!" doi;
-
let doi_url = Printf.sprintf "https://doi.org/%s" doi in
-
try
-
let json = ZT.json_of_doi zt ~slug:"temp" doi in
-
if verbose then begin
-
Printf.printf " Raw Zotero response:\n%s\n%!" (J.value_to_string json)
-
end;
-
try
-
let keys = J.get_dict (json :> J.value) in
-
let title = J.find json ["title"] |> J.get_string in
-
let authors = J.find json ["author"] |> J.get_list J.get_string in
-
let year = J.find json ["year"] |> J.get_string |> int_of_string in
-
let bibtype = J.find json ["bibtype"] |> J.get_string in
-
let publisher =
-
try
-
(* Try journal first, then booktitle, then publisher *)
-
match List.assoc_opt "journal" keys with
-
| Some j -> J.get_string j
-
| None ->
-
match List.assoc_opt "booktitle" keys with
-
| Some b -> J.get_string b
-
| None ->
-
match List.assoc_opt "publisher" keys with
-
| Some p -> J.get_string p
-
| None -> ""
-
with _ -> ""
-
in
-
let entry = Bushel.Doi_entry.create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ~source_urls:[doi_url] () in
-
Printf.printf " ✓ Resolved: %s (%d)\n%!" title year;
-
entry
-
with e ->
-
Printf.eprintf " ✗ Failed to parse response for %s: %s\n%!" doi (Printexc.to_string e);
-
Bushel.Doi_entry.create_failed ~doi ~error:(Printexc.to_string e) ~source_urls:[doi_url] ()
-
with exn ->
-
Printf.eprintf " ✗ Failed to resolve %s: %s\n%!" doi (Printexc.to_string exn);
-
Bushel.Doi_entry.create_failed ~doi ~error:(Printexc.to_string exn) ~source_urls:[doi_url] ()
-
-
(* Resolve a publisher URL via Zotero /web endpoint *)
-
let resolve_url zt ~verbose url =
-
Printf.printf "Resolving URL: %s\n%!" url;
-
try
-
(* Use Zotero's resolve_url which calls /web endpoint with the URL directly *)
-
match ZT.resolve_url zt url with
-
| Error (`Msg err) ->
-
Printf.eprintf " ✗ Failed to resolve URL: %s\n%!" err;
-
Bushel.Doi_entry.create_failed ~doi:url ~error:err ~source_urls:[url] ()
-
| Ok json ->
-
if verbose then begin
-
Printf.printf " Raw Zotero response:\n%s\n%!" (J.value_to_string (json :> J.value))
-
end;
-
try
-
(* Extract metadata from the JSON response *)
-
let json_list = match json with
-
| `A lst -> lst
-
| single -> [(single :> J.value)]
-
in
-
match json_list with
-
| [] ->
-
Printf.eprintf " ✗ Empty response\n%!";
-
Bushel.Doi_entry.create_failed ~doi:url ~error:"Empty response" ~source_urls:[url] ()
-
| item :: _ ->
-
(* Extract DOI if present, otherwise use URL *)
-
let doi = try J.find item ["DOI"] |> J.get_string with _ ->
-
try J.find item ["doi"] |> J.get_string with _ -> url
-
in
-
let title = try J.find item ["title"] |> J.get_string with _ ->
-
"Unknown Title"
-
in
-
(* Extract authors from Zotero's "creators" field *)
-
let authors = try
-
J.find item ["creators"] |> J.get_list (fun creator_obj ->
-
try
-
let last_name = J.find creator_obj ["lastName"] |> J.get_string in
-
let first_name = try J.find creator_obj ["firstName"] |> J.get_string with _ -> "" in
-
if first_name = "" then last_name else first_name ^ " " ^ last_name
-
with _ -> "Unknown Author"
-
)
-
with _ -> []
-
in
-
(* Extract year from Zotero's "date" field *)
-
(* Handles both ISO format "2025-07" and text format "November 28, 2023" *)
-
let year = try
-
let date_str = J.find item ["date"] |> J.get_string in
-
(* First try splitting on '-' for ISO dates like "2025-07" or "2024-11-04" *)
-
let parts = String.split_on_char '-' date_str in
-
match parts with
-
| year_str :: _ when String.length year_str = 4 ->
-
(try int_of_string year_str with _ -> 0)
-
| _ ->
-
(* Try splitting on space and comma for dates like "November 28, 2023" *)
-
let space_parts = String.split_on_char ' ' date_str in
-
let year_candidate = List.find_opt (fun s ->
-
let s = String.trim (String.map (fun c -> if c = ',' then ' ' else c) s) in
-
String.length s = 4 && String.for_all (function '0'..'9' -> true | _ -> false) s
-
) space_parts in
-
(match year_candidate with
-
| Some year_str -> int_of_string (String.trim year_str)
-
| None -> 0)
-
with _ -> 0
-
in
-
(* Extract type/bibtype from Zotero's "itemType" field *)
-
let bibtype = try J.find item ["itemType"] |> J.get_string with _ -> "article" in
-
(* Extract publisher/journal from Zotero's "publicationTitle" field *)
-
let publisher = try
-
J.find item ["publicationTitle"] |> J.get_string
-
with _ -> ""
-
in
-
(* Include both the original URL and the DOI URL in source_urls *)
-
let doi_url = if doi = url then [] else [Printf.sprintf "https://doi.org/%s" doi] in
-
let source_urls = url :: doi_url in
-
let entry = Bushel.Doi_entry.create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ~source_urls () in
-
Printf.printf " ✓ Resolved: %s (%d) [DOI: %s]\n%!" title year doi;
-
entry
-
with e ->
-
Printf.eprintf " ✗ Failed to parse response: %s\n%!" (Printexc.to_string e);
-
Bushel.Doi_entry.create_failed ~doi:url ~error:(Printexc.to_string e) ~source_urls:[url] ()
-
with exn ->
-
Printf.eprintf " ✗ Failed to resolve %s: %s\n%!" url (Printexc.to_string exn);
-
Bushel.Doi_entry.create_failed ~doi:url ~error:(Printexc.to_string exn) ~source_urls:[url] ()
-
-
let run base_dir force verbose =
-
Printf.printf "Loading bushel database...\n%!";
-
let entries = Bushel.load base_dir in
-
let notes = Bushel.Entry.notes entries in
-
-
Printf.printf "Scanning %d notes for DOI URLs...\n%!" (List.length notes);
-
let found_dois = extract_dois_from_notes notes in
-
Printf.printf "Found %d unique DOIs\n%!" (List.length found_dois);
-
-
Printf.printf "Scanning %d notes for publisher URLs...\n%!" (List.length notes);
-
let found_urls = extract_publisher_urls_from_notes notes in
-
Printf.printf "Found %d unique publisher URLs\n%!" (List.length found_urls);
-
-
let data_dir = Bushel.Entry.data_dir entries in
-
let doi_yml_path = Filename.concat data_dir "doi.yml" in
-
Printf.printf "Loading existing DOI cache from %s...\n%!" doi_yml_path;
-
let existing_entries = Bushel.Doi_entry.load doi_yml_path in
-
Printf.printf "Loaded %d cached DOI entries\n%!" (List.length existing_entries);
-
-
(* Filter DOIs that need resolution *)
-
let dois_to_resolve =
-
List.filter (fun doi ->
-
match Bushel.Doi_entry.find_by_doi_including_ignored existing_entries doi with
-
| Some _ when not force ->
-
Printf.printf "Skipping DOI %s (already cached)\n%!" doi;
-
false
-
| Some _ when force ->
-
Printf.printf "Re-resolving DOI %s (--force)\n%!" doi;
-
true
-
| Some _ -> false (* Catch-all for Some case *)
-
| None -> true
-
) found_dois
-
in
-
-
(* Filter URLs that need resolution *)
-
let urls_to_resolve =
-
List.filter (fun url ->
-
match Bushel.Doi_entry.find_by_url_including_ignored existing_entries url with
-
| Some _ when not force ->
-
Printf.printf "Skipping URL %s (already cached)\n%!" url;
-
false
-
| Some _ when force ->
-
Printf.printf "Re-resolving URL %s (--force)\n%!" url;
-
true
-
| Some _ -> false (* Catch-all for Some case *)
-
| None -> true
-
) found_urls
-
in
-
-
if List.length dois_to_resolve = 0 && List.length urls_to_resolve = 0 then begin
-
Printf.printf "No DOIs or URLs to resolve!\n%!";
-
0
-
end else begin
-
Printf.printf "Resolving %d DOI(s) and %d URL(s)...\n%!" (List.length dois_to_resolve) (List.length urls_to_resolve);
-
-
(* Resolve all DOIs and URLs using Eio *)
-
let new_entries = Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
(* Create Zotero Translation client with connection pooling *)
-
let zt = ZT.create ~sw ~env "http://svr-avsm2-eeg-ce:1969" in
-
(* Resolve all DOIs *)
-
let new_doi_entries = List.map (resolve_doi zt ~verbose) dois_to_resolve in
-
(* Resolve all publisher URLs *)
-
let new_url_entries = List.map (resolve_url zt ~verbose) urls_to_resolve in
-
new_doi_entries @ new_url_entries
-
in
-
-
(* Merge with existing entries, combining source_urls for entries with the same DOI *)
-
let all_entries =
-
if force then
-
(* Replace existing entries with new ones - match by DOI *)
-
let is_updated entry =
-
List.exists (fun new_e ->
-
new_e.Bushel.Doi_entry.doi = entry.Bushel.Doi_entry.doi
-
) new_entries
-
in
-
let kept_existing = List.filter (fun e -> not (is_updated e)) existing_entries in
-
kept_existing @ new_entries
-
else
-
(* Merge new entries with existing ones, combining source_urls *)
-
let merged = ref existing_entries in
-
List.iter (fun new_entry ->
-
match Bushel.Doi_entry.find_by_doi_including_ignored !merged new_entry.Bushel.Doi_entry.doi with
-
| Some existing_entry ->
-
(* DOI already exists - merge the entries by combining source_urls and preserving ignore flag *)
-
let combined = Bushel.Doi_entry.merge_entries existing_entry new_entry in
-
merged := combined :: (List.filter (fun e -> e.Bushel.Doi_entry.doi <> new_entry.Bushel.Doi_entry.doi) !merged)
-
| None ->
-
(* New DOI - add it *)
-
merged := new_entry :: !merged
-
) new_entries;
-
!merged
-
in
-
-
(* Save updated cache *)
-
Printf.printf "Saving %d total entries to %s...\n%!" (List.length all_entries) doi_yml_path;
-
Bushel.Doi_entry.save doi_yml_path all_entries;
-
-
Printf.printf "Done!\n%!";
-
0
-
end
-
-
let force_flag =
-
let doc = "Force re-resolution of already cached DOIs" in
-
Arg.(value & flag & info ["force"; "f"] ~doc)
-
-
let verbose_flag =
-
let doc = "Show raw Zotero API responses for debugging" in
-
Arg.(value & flag & info ["verbose"; "v"] ~doc)
-
-
let term =
-
Term.(const run $ Bushel_common.base_dir $ force_flag $ verbose_flag)
-
-
let cmd =
-
let doc = "Resolve DOIs found in notes via Zotero Translation Server" in
-
let info = Cmd.info "doi-resolve" ~doc in
-
Cmd.v info term
-123
stack/bushel/bin/bushel_faces.ml
···
-
open Cmdliner
-
open Printf
-
-
(* Get face for a single contact *)
-
let get_face_for_contact immiche_client ~fs output_dir contact =
-
let names = Bushel.Contact.names contact in
-
let handle = Bushel.Contact.handle contact in
-
let output_path = Filename.concat output_dir (handle ^ ".jpg") in
-
-
(* Skip if file already exists *)
-
if Sys.file_exists output_path then
-
`Skipped (sprintf "Thumbnail for '%s' already exists at %s" (List.hd names) output_path)
-
else begin
-
printf "Processing contact: %s (handle: %s)\n%!" (List.hd names) handle;
-
-
(* Try each name in the list until we find a match *)
-
let rec try_names = function
-
| [] ->
-
`Error (sprintf "No person found with any name for contact '%s'" handle)
-
| name :: rest_names ->
-
printf " Trying name: %s\n%!" name;
-
let people = Immiche.search_person immiche_client ~name in
-
(match people with
-
| [] ->
-
printf " No results for '%s', trying next name...\n%!" name;
-
try_names rest_names
-
| person :: _ ->
-
printf " Found match for '%s'\n%!" name;
-
let result = Immiche.download_thumbnail immiche_client
-
~fs ~person_id:person.id ~output_path in
-
(match result with
-
| Ok _ ->
-
`Ok (sprintf "Saved thumbnail for '%s' to %s" name output_path)
-
| Error (`Msg err) ->
-
`Error (sprintf "Error for '%s': %s" name err)))
-
in
-
try_names names
-
end
-
-
(* Process all contacts or a specific one *)
-
let process_contacts ~sw ~env base_dir output_dir specific_handle profile =
-
printf "Loading Bushel database from %s\n%!" base_dir;
-
let db = Bushel.load base_dir in
-
let contacts = Bushel.Entry.contacts db in
-
printf "Found %d contacts\n%!" (List.length contacts);
-
-
(* Get credentials from keyeio profile *)
-
let api_key = Keyeio.Profile.get_required profile ~key:"api_key" in
-
let base_url = Keyeio.Profile.get profile ~key:"base_url"
-
|> Option.value ~default:"https://photos.recoil.org" in
-
-
printf "Connecting to Immich at %s\n%!" base_url;
-
-
(* Create Immiche client for connection pooling *)
-
let requests_session = Requests.create ~sw env in
-
let immiche_client = Immiche.create ~requests_session ~base_url ~api_key in
-
-
(* Ensure output directory exists *)
-
if not (Sys.file_exists output_dir) then Unix.mkdir output_dir 0o755;
-
-
(* Filter contacts based on specific_handle if provided *)
-
let contacts_to_process =
-
match specific_handle with
-
| Some handle ->
-
begin match Bushel.Contact.find_by_handle contacts handle with
-
| Some contact -> [contact]
-
| None ->
-
eprintf "No contact found with handle '%s'\n%!" handle;
-
[]
-
end
-
| None -> contacts
-
in
-
-
(* Process each contact *)
-
let results = List.map
-
(fun contact ->
-
let result = get_face_for_contact immiche_client ~fs:env#fs output_dir contact in
-
(Bushel.Contact.handle contact, result))
-
contacts_to_process
-
in
-
-
(* Print summary *)
-
let ok_count = List.length (List.filter (fun (_, r) -> match r with `Ok _ -> true | _ -> false) results) in
-
let error_count = List.length (List.filter (fun (_, r) -> match r with `Error _ -> true | _ -> false) results) in
-
let skipped_count = List.length (List.filter (fun (_, r) -> match r with `Skipped _ -> true | _ -> false) results) in
-
-
printf "\nSummary:\n";
-
printf " Successfully processed: %d\n" ok_count;
-
printf " Errors: %d\n" error_count;
-
printf " Skipped (already exist): %d\n" skipped_count;
-
-
(* Print detailed results *)
-
if error_count > 0 then begin
-
printf "\nError details:\n";
-
List.iter (fun (handle, result) ->
-
match result with
-
| `Error msg -> printf " %s: %s\n" handle msg
-
| _ -> ())
-
results;
-
end;
-
-
if ok_count > 0 || skipped_count > 0 then 0 else 1
-
-
(* Command line interface *)
-
-
(* Create term given the Eio environment *)
-
let make_term eio_env =
-
let immiche_profile_term = Bushel_common.immiche_key_term eio_env#fs in
-
Term.(
-
const (fun base_dir output_dir handle profile ->
-
try
-
Eio.Switch.run @@ fun sw ->
-
process_contacts ~sw ~env:eio_env base_dir output_dir handle profile
-
with e ->
-
eprintf "Error: %s\n%!" (Printexc.to_string e);
-
1
-
) $ Bushel_common.base_dir $ Bushel_common.output_dir ~default:"." $ Bushel_common.handle_opt $ immiche_profile_term)
-
-
let make_cmd eio_env =
-
let info = Cmd.info "faces" ~doc:"Retrieve face thumbnails for Bushel contacts from Immich" in
-
Cmd.v info (make_term eio_env)
-
-
(* Main entry point removed - accessed through bushel_main.ml *)
-77
stack/bushel/bin/bushel_ideas.ml
···
-
open Cmdliner
-
-
(** TODO:claude List completed ideas as markdown bullet list *)
-
let list_ideas_md base_dir =
-
let ideas_dir = Printf.sprintf "%s/ideas" base_dir in
-
let contacts_dir = Printf.sprintf "%s/contacts" base_dir in
-
-
if not (Sys.file_exists ideas_dir) then (
-
Printf.eprintf "Ideas directory not found: %s\n" ideas_dir;
-
1
-
) else (
-
(* Load all contacts *)
-
let contacts =
-
if Sys.file_exists contacts_dir then
-
Sys.readdir contacts_dir
-
|> Array.to_list
-
|> List.filter (String.ends_with ~suffix:".md")
-
|> List.filter_map (fun contact_file ->
-
let filepath = Filename.concat contacts_dir contact_file in
-
try Some (Bushel.Contact.of_md filepath)
-
with e ->
-
Printf.eprintf "Error loading contact %s: %s\n" filepath (Printexc.to_string e);
-
None
-
)
-
else []
-
in
-
-
let idea_files = Sys.readdir ideas_dir
-
|> Array.to_list
-
|> List.filter (String.ends_with ~suffix:".md") in
-
let ideas = List.filter_map (fun idea_file ->
-
let filepath = Filename.concat ideas_dir idea_file in
-
try
-
let idea = Bushel.Idea.of_md filepath in
-
match Bushel.Idea.status idea with
-
| Bushel.Idea.Completed -> Some idea
-
| _ -> None
-
with e ->
-
Printf.eprintf "Error processing %s: %s\n" filepath (Printexc.to_string e);
-
None
-
) idea_files in
-
-
(* Sort by year descending *)
-
let sorted_ideas = List.sort (fun a b ->
-
compare (Bushel.Idea.year b) (Bushel.Idea.year a)
-
) ideas in
-
-
(* Output as markdown bullet list *)
-
List.iter (fun idea ->
-
let student_names =
-
Bushel.Idea.students idea
-
|> List.filter_map (fun handle ->
-
match Bushel.Contact.find_by_handle contacts handle with
-
| Some contact -> Some (Bushel.Contact.name contact)
-
| None ->
-
Printf.eprintf "Warning: contact not found for handle %s\n" handle;
-
Some handle
-
)
-
|> String.concat ", "
-
in
-
let level_str = Bushel.Idea.level_to_string (Bushel.Idea.level idea) in
-
Printf.printf "- %d: \"%s\", %s (%s)\n"
-
(Bushel.Idea.year idea)
-
(Bushel.Idea.title idea)
-
student_names
-
level_str
-
) sorted_ideas;
-
0
-
)
-
-
let term =
-
Term.(const list_ideas_md $ Bushel_common.base_dir)
-
-
let cmd =
-
let doc = "List completed ideas as markdown bullet list" in
-
let info = Cmd.info "ideas-md" ~doc in
-
Cmd.v info term
-202
stack/bushel/bin/bushel_info.ml
···
-
open Cmdliner
-
open Bushel
-
-
(** TODO:claude List all slugs with their types *)
-
let list_all_slugs entries =
-
let all = Entry.all_entries entries in
-
(* Sort by slug for consistent output *)
-
let sorted = List.sort (fun a b ->
-
String.compare (Entry.slug a) (Entry.slug b)
-
) all in
-
Fmt.pr "@[<v>";
-
Fmt.pr "%a@," (Fmt.styled `Bold Fmt.string) "Available entries:";
-
Fmt.pr "@,";
-
List.iter (fun entry ->
-
let slug = Entry.slug entry in
-
let type_str = Entry.to_type_string entry in
-
let title = Entry.title entry in
-
Fmt.pr " %a %a - %a@,"
-
(Fmt.styled `Cyan Fmt.string) slug
-
(Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str)
-
Fmt.string title
-
) sorted;
-
Fmt.pr "@]@.";
-
0
-
-
(** TODO:claude Main info command implementation *)
-
let info_cmd () base_dir slug_opt =
-
let entries = load base_dir in
-
match slug_opt with
-
| None ->
-
list_all_slugs entries
-
| Some slug ->
-
(* Handle contact handles starting with @ *)
-
if String.starts_with ~prefix:"@" slug then
-
let handle = String.sub slug 1 (String.length slug - 1) in
-
match Contact.find_by_handle (Entry.contacts entries) handle with
-
| None ->
-
Fmt.epr "Error: No contact found with handle '@%s'@." handle;
-
1
-
| Some contact ->
-
Contact.pp Fmt.stdout contact;
-
(* Add thumbnail information for contact *)
-
(match Entry.contact_thumbnail_slug contact with
-
| Some thumb_slug ->
-
Fmt.pr "@.@.";
-
Fmt.pr "@[<v>%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail Slug" thumb_slug;
-
(* Look up the image in srcsetter *)
-
(match Entry.lookup_image entries thumb_slug with
-
| Some img ->
-
let thumbnail_url = Entry.smallest_webp_variant img in
-
Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail URL" thumbnail_url;
-
Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Origin" (Srcsetter.origin img);
-
let (w, h) = Srcsetter.dims img in
-
Fmt.pr "%a: %dx%d@," (Fmt.styled `Bold Fmt.string) "Dimensions" w h;
-
let variants = Srcsetter.variants img in
-
if not (Srcsetter.MS.is_empty variants) then begin
-
Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Variants";
-
Srcsetter.MS.iter (fun name (vw, vh) ->
-
Fmt.pr " %s: %dx%d@," name vw vh
-
) variants
-
end;
-
Fmt.pr "@]"
-
| None ->
-
Fmt.epr "Warning: Contact thumbnail image not in srcsetter: %s@." thumb_slug;
-
Fmt.pr "@]";
-
())
-
| None -> ());
-
(* Add Typesense JSON *)
-
let doc = Typesense.contact_to_document contact in
-
Fmt.pr "@.@.";
-
Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Typesense Document";
-
Fmt.pr "%s@," (Ezjsonm.value_to_string ~minify:false doc);
-
(* Add backlinks information for contact *)
-
let backlinks = Bushel.Link_graph.get_backlinks_for_slug handle in
-
if backlinks <> [] then begin
-
Fmt.pr "@.@.";
-
Fmt.pr "%a (%d):@," (Fmt.styled `Bold Fmt.string) "Backlinks" (List.length backlinks);
-
List.iter (fun source_slug ->
-
match Entry.lookup entries source_slug with
-
| Some source_entry ->
-
let source_type = Entry.to_type_string source_entry in
-
let source_title = Entry.title source_entry in
-
Fmt.pr " %a %a - %a@,"
-
(Fmt.styled `Cyan Fmt.string) source_slug
-
(Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" source_type)
-
Fmt.string source_title
-
| None ->
-
Fmt.pr " %a %a@,"
-
(Fmt.styled `Cyan Fmt.string) source_slug
-
(Fmt.styled `Red Fmt.string) "(not found)"
-
) backlinks
-
end;
-
Fmt.pr "@.";
-
0
-
else
-
(* Remove leading ':' if present, as slugs are stored without it *)
-
let normalized_slug =
-
if String.starts_with ~prefix:":" slug
-
then String.sub slug 1 (String.length slug - 1)
-
else slug
-
in
-
match Entry.lookup entries normalized_slug with
-
| None ->
-
Fmt.epr "Error: No entry found with slug '%s'@." slug;
-
1
-
| Some entry ->
-
(match entry with
-
| `Paper p -> Paper.pp Fmt.stdout p
-
| `Project p -> Project.pp Fmt.stdout p
-
| `Idea i -> Idea.pp Fmt.stdout i
-
| `Video v -> Video.pp Fmt.stdout v
-
| `Note n -> Note.pp Fmt.stdout n);
-
(* Add thumbnail information if available *)
-
(match Entry.thumbnail_slug entries entry with
-
| Some thumb_slug ->
-
Fmt.pr "@.@.";
-
Fmt.pr "@[<v>%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail Slug" thumb_slug;
-
(* Look up the image in srcsetter *)
-
(match Entry.lookup_image entries thumb_slug with
-
| Some img ->
-
let thumbnail_url = Entry.smallest_webp_variant img in
-
Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail URL" thumbnail_url;
-
Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Origin" (Srcsetter.origin img);
-
let (w, h) = Srcsetter.dims img in
-
Fmt.pr "%a: %dx%d@," (Fmt.styled `Bold Fmt.string) "Dimensions" w h;
-
let variants = Srcsetter.variants img in
-
if not (Srcsetter.MS.is_empty variants) then begin
-
Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Variants";
-
Srcsetter.MS.iter (fun name (vw, vh) ->
-
Fmt.pr " %s: %dx%d@," name vw vh
-
) variants
-
end;
-
Fmt.pr "@]"
-
| None ->
-
Fmt.epr "Warning: Thumbnail image not in srcsetter: %s@." thumb_slug;
-
Fmt.pr "@]";
-
())
-
| None -> ());
-
(* Add Typesense JSON *)
-
let doc = match entry with
-
| `Paper p -> Typesense.paper_to_document entries p
-
| `Project p -> Typesense.project_to_document entries p
-
| `Idea i -> Typesense.idea_to_document entries i
-
| `Video v -> Typesense.video_to_document entries v
-
| `Note n -> Typesense.note_to_document entries n
-
in
-
Fmt.pr "@.@.";
-
Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Typesense Document";
-
Fmt.pr "%s@," (Ezjsonm.value_to_string ~minify:false doc);
-
(* Add backlinks information *)
-
let backlinks = Bushel.Link_graph.get_backlinks_for_slug normalized_slug in
-
if backlinks <> [] then begin
-
Fmt.pr "@.@.";
-
Fmt.pr "%a (%d):@," (Fmt.styled `Bold Fmt.string) "Backlinks" (List.length backlinks);
-
List.iter (fun source_slug ->
-
match Entry.lookup entries source_slug with
-
| Some source_entry ->
-
let source_type = Entry.to_type_string source_entry in
-
let source_title = Entry.title source_entry in
-
Fmt.pr " %a %a - %a@,"
-
(Fmt.styled `Cyan Fmt.string) source_slug
-
(Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" source_type)
-
Fmt.string source_title
-
| None ->
-
Fmt.pr " %a %a@,"
-
(Fmt.styled `Cyan Fmt.string) source_slug
-
(Fmt.styled `Red Fmt.string) "(not found)"
-
) backlinks
-
end;
-
(* Add references information for notes *)
-
(match entry with
-
| `Note n ->
-
let default_author = match Contact.find_by_handle (Entry.contacts entries) "avsm" with
-
| Some c -> c
-
| None -> failwith "Default author 'avsm' not found"
-
in
-
let references = Md.note_references entries default_author n in
-
if references <> [] then begin
-
Fmt.pr "@.@.";
-
Fmt.pr "%a (%d):@," (Fmt.styled `Bold Fmt.string) "References" (List.length references);
-
List.iter (fun (doi, citation, _is_paper) ->
-
Fmt.pr " %a: %s@,"
-
(Fmt.styled `Cyan Fmt.string) doi
-
citation
-
) references
-
end
-
| _ -> ());
-
Fmt.pr "@.";
-
0
-
-
(** TODO:claude Command line interface definition *)
-
let slug_arg =
-
let doc = "The slug of the entry to display (with or without leading ':'), or contact handle (with '@' prefix). If not provided, lists all available slugs." in
-
Arg.(value & pos 0 (some string) None & info [] ~docv:"SLUG" ~doc)
-
-
let term =
-
Term.(const info_cmd $ Bushel_common.setup_term $ Bushel_common.base_dir $ slug_arg)
-
-
let cmd =
-
let doc = "Display all information for a given slug" in
-
let info = Cmd.info "info" ~doc in
-
Cmd.v info term
-525
stack/bushel/bin/bushel_links.ml
···
-
open Cmdliner
-
-
(* Helper function for logging with proper flushing *)
-
let log fmt = Fmt.kstr (fun s -> prerr_string s; flush stderr) fmt
-
let log_verbose verbose fmt =
-
if verbose then Fmt.kstr (fun s -> prerr_string s; flush stderr) fmt
-
else Fmt.kstr (fun _ -> ()) fmt
-
-
(* Initialize a new links.yml file or ensure it exists *)
-
let init_links_file links_file =
-
if Sys.file_exists links_file then
-
print_endline (Fmt.str "Links file %s already exists" links_file)
-
else begin
-
(* Create an empty links file *)
-
Bushel.Link.save_links_file links_file [];
-
print_endline (Fmt.str "Created empty links file: %s" links_file)
-
end;
-
0
-
-
(* Update links.yml from Karakeep *)
-
let update_from_karakeep ~sw ~env base_url profile tag links_file download_assets =
-
let api_key = Keyeio.Profile.get_required profile ~key:"api_key" in
-
let assets_dir = "data/assets" in
-
-
try
-
print_endline (Fmt.str "Fetching links from %s with tag '%s'..." base_url tag);
-
-
(* Prepare tag filter *)
-
let filter_tags = if tag = "" then [] else [tag] in
-
-
(* Fetch bookmarks from Karakeep *)
-
let client = Karakeepe.create ~sw ~env ~api_key ~base_url in
-
let bookmarks = Karakeepe.fetch_all_bookmarks client ~filter_tags () in
-
-
print_endline (Fmt.str "Retrieved %d bookmarks from Karakeep" (List.length bookmarks));
-
-
(* Read existing links if file exists *)
-
let existing_links = Bushel.Link.load_links_file links_file in
-
-
(* Convert bookmarks to bushel links *)
-
let new_links = List.map (fun bookmark ->
-
Karakeepe.to_bushel_link ~base_url bookmark
-
) bookmarks in
-
-
(* Merge with existing links - keep existing dates (karakeep dates may be unreliable) *)
-
let merged_links = Bushel.Link.merge_links existing_links new_links in
-
-
(* Save the updated links file *)
-
Bushel.Link.save_links_file links_file merged_links;
-
-
print_endline (Fmt.str "Updated %s with %d links" links_file (List.length merged_links));
-
-
(* Download assets if requested *)
-
if download_assets then begin
-
print_endline "Downloading assets for bookmarks...";
-
-
(* Ensure the assets directory exists *)
-
(try Unix.mkdir assets_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
-
-
(* Process each bookmark with assets *)
-
List.iter (fun bookmark ->
-
(* Extract asset IDs from bookmark *)
-
let assets = bookmark.Karakeepe.assets in
-
-
(* Skip if no assets *)
-
if assets <> [] then
-
(* Process each asset *)
-
List.iter (fun (asset_id, asset_type) ->
-
let asset_dir = Fmt.str "%s/%s" assets_dir asset_id in
-
let asset_file = Fmt.str "%s/asset.bin" asset_dir in
-
let meta_file = Fmt.str "%s/metadata.json" asset_dir in
-
-
(* Skip if the asset already exists *)
-
if not (Sys.file_exists asset_file) then begin
-
(* Create the asset directory *)
-
(try Unix.mkdir asset_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
-
-
(* Download the asset *)
-
print_endline (Fmt.str "Downloading %s asset %s..." asset_type asset_id);
-
let data = Karakeepe.fetch_asset client asset_id in
-
-
(* Guess content type based on first bytes *)
-
let content_type =
-
if String.length data >= 4 && String.sub data 0 4 = "\x89PNG" then
-
"image/png"
-
else if String.length data >= 3 && String.sub data 0 3 = "\xFF\xD8\xFF" then
-
"image/jpeg"
-
else if String.length data >= 4 && String.sub data 0 4 = "%PDF" then
-
"application/pdf"
-
else
-
"application/octet-stream"
-
in
-
-
(* Write the asset data *)
-
let oc = open_out_bin asset_file in
-
output_string oc data;
-
close_out oc;
-
-
(* Write metadata file *)
-
let metadata = Fmt.str "{\n \"contentType\": \"%s\",\n \"assetType\": \"%s\"\n}"
-
content_type asset_type in
-
let oc = open_out meta_file in
-
output_string oc metadata;
-
close_out oc
-
end
-
) assets
-
) bookmarks;
-
-
print_endline "Asset download completed.";
-
0
-
end else
-
0
-
with exn ->
-
prerr_endline (Fmt.str "Error fetching bookmarks: %s" (Printexc.to_string exn));
-
1
-
-
(* Extract outgoing links from Bushel entries *)
-
let update_from_bushel bushel_dir links_file include_domains exclude_domains =
-
(* Parse domain filters if provided *)
-
let include_domains_list = match include_domains with
-
| None -> []
-
| Some s -> String.split_on_char ',' s |> List.map String.trim
-
in
-
-
let exclude_domains_list = match exclude_domains with
-
| None -> []
-
| Some s -> String.split_on_char ',' s |> List.map String.trim
-
in
-
-
(* Show filter settings if any *)
-
if include_domains_list <> [] then
-
print_endline (Fmt.str "Including only domains: %s" (String.concat ", " include_domains_list));
-
-
if exclude_domains_list <> [] then
-
print_endline (Fmt.str "Excluding domains: %s" (String.concat ", " exclude_domains_list));
-
-
(* Load all entries from the bushel directory *)
-
let notes_dir = Filename.concat bushel_dir "data/notes" in
-
-
(* Make sure the notes directory exists *)
-
if not (Sys.file_exists notes_dir) then begin
-
prerr_endline (Fmt.str "Error: Notes directory %s does not exist" notes_dir);
-
exit 1
-
end;
-
-
(* Load all entries with fallback *)
-
print_endline (Fmt.str "Loading entries from %s..." bushel_dir);
-
-
let entries_data = Bushel.load bushel_dir in
-
let all_entries = Bushel.Entry.all_entries entries_data in
-
print_endline (Fmt.str "Loaded %d entries" (List.length all_entries));
-
-
(* Extract outgoing links from all entries *)
-
print_endline "Extracting outgoing links...";
-
let extracted_links = ref [] in
-
-
(* Process each entry *)
-
List.iter (fun entry ->
-
let entry_body = Bushel.Entry.body entry in
-
let entry_slug = Bushel.Entry.slug entry in
-
-
(* Skip empty bodies *)
-
if entry_body <> "" then begin
-
let links = Bushel.Entry.extract_external_links entry_body in
-
if links <> [] then begin
-
(* Add each link from this entry *)
-
List.iter (fun url ->
-
(* Try to extract domain from URL *)
-
let domain =
-
try
-
let uri = Uri.of_string url in
-
match Uri.host uri with
-
| Some host -> host
-
| None -> "unknown"
-
with _ -> "unknown"
-
in
-
-
(* Filter by domain if filters are specified *)
-
let include_by_domain =
-
if include_domains_list <> [] then
-
List.exists (fun filter ->
-
domain = filter || String.ends_with ~suffix:filter domain
-
) include_domains_list
-
else true
-
in
-
-
let exclude_by_domain =
-
List.exists (fun filter ->
-
domain = filter || String.ends_with ~suffix:filter domain
-
) exclude_domains_list
-
in
-
-
if include_by_domain && not exclude_by_domain then begin
-
let date = Bushel.Entry.date entry in
-
-
(* Extract tags from the entry *)
-
let entry_tags = Bushel.Tags.tags_of_ent entries_data entry in
-
let tag_strings = List.map Bushel.Tags.to_string entry_tags in
-
-
let link = {
-
Bushel.Link.url;
-
date;
-
description = "";
-
karakeep = None;
-
bushel = Some {
-
Bushel.Link.slugs = [entry_slug];
-
tags = tag_strings
-
};
-
} in
-
extracted_links := link :: !extracted_links
-
end
-
) links
-
end
-
end
-
) all_entries;
-
-
(* Load existing links *)
-
let existing_links = Bushel.Link.load_links_file links_file in
-
-
(* Merge with existing links - prefer bushel entry dates *)
-
let merged_links = Bushel.Link.merge_links ~prefer_new_date:true existing_links !extracted_links in
-
-
(* Save the updated links file *)
-
Bushel.Link.save_links_file links_file merged_links;
-
-
print_endline (Fmt.str "Added %d extracted links from Bushel to %s"
-
(List.length !extracted_links) links_file);
-
print_endline (Fmt.str "Total links in file: %d" (List.length merged_links));
-
0
-
-
(* Helper function to filter links that don't have karakeep data for a specific remote *)
-
let filter_links_without_karakeep base_url links =
-
List.filter (fun link ->
-
match link.Bushel.Link.karakeep with
-
| Some { remote_url; _ } when remote_url = base_url -> false
-
| _ -> true
-
) links
-
-
(* Helper function to apply limit to links if specified *)
-
let apply_limit_to_links limit links =
-
match limit with
-
| Some n when n > 0 ->
-
let rec take_n acc count = function
-
| [] -> List.rev acc
-
| _ when count = 0 -> List.rev acc
-
| x :: xs -> take_n (x :: acc) (count - 1) xs
-
in
-
let limited = take_n [] n links in
-
if List.length links > n then
-
log "Limited to first %d links (out of %d available)\n" n (List.length links);
-
limited
-
| _ -> links
-
-
(* Helper function to prepare tags for a link *)
-
let prepare_tags_for_link tag link =
-
let slug_tags =
-
match link.Bushel.Link.bushel with
-
| Some { slugs; _ } -> List.map (fun slug -> "bushel:" ^ slug) slugs
-
| None -> []
-
in
-
if tag = "" then slug_tags
-
else tag :: slug_tags
-
-
(* Helper function to create batches for parallel processing *)
-
let create_batches max_concurrent links =
-
let rec create_batches_aux links acc =
-
match links with
-
| [] -> List.rev acc
-
| _ ->
-
let batch, rest =
-
if List.length links <= max_concurrent then
-
links, []
-
else
-
let rec take n lst batch =
-
if n = 0 || lst = [] then List.rev batch, lst
-
else take (n-1) (List.tl lst) (List.hd lst :: batch)
-
in
-
take max_concurrent links []
-
in
-
create_batches_aux rest (batch :: acc)
-
in
-
create_batches_aux links []
-
-
(* Helper function to upload a single link to Karakeep *)
-
let upload_single_link ~sw ~env api_key base_url tag verbose updated_links link =
-
let url = link.Bushel.Link.url in
-
let title =
-
if link.Bushel.Link.description <> "" then
-
Some link.Bushel.Link.description
-
else None
-
in
-
let tags = prepare_tags_for_link tag link in
-
-
if verbose then begin
-
log " Uploading: %s\n" url;
-
if tags <> [] then
-
log " Tags: %s\n" (String.concat ", " tags);
-
if title <> None then
-
log " Title: %s\n" (Option.get title);
-
end else begin
-
log "Uploading: %s\n" url;
-
end;
-
-
(* Create the bookmark with tags *)
-
try
-
let client = Karakeepe.create ~sw ~env ~api_key ~base_url in
-
let bookmark = Karakeepe.create_bookmark
-
client
-
~url
-
?title
-
~tags
-
()
-
in
-
-
(* Create updated link with karakeep data *)
-
let updated_link = {
-
link with
-
Bushel.Link.karakeep =
-
Some {
-
Bushel.Link.remote_url = base_url;
-
id = bookmark.id;
-
tags = bookmark.tags;
-
metadata = []; (* Will be populated on next sync *)
-
}
-
} in
-
updated_links := updated_link :: !updated_links;
-
-
if verbose then
-
log " ✓ Added to Karakeep with ID: %s\n" bookmark.id
-
else
-
log " - Added to Karakeep with ID: %s\n" bookmark.id;
-
1 (* Success *)
-
with exn ->
-
if verbose then
-
log " ✗ Error uploading %s: %s\n" url (Printexc.to_string exn)
-
else
-
log " - Error uploading %s: %s\n" url (Printexc.to_string exn);
-
0 (* Failure *)
-
-
(* Helper function to process a batch of links *)
-
let process_batch ~sw ~env api_key base_url tag verbose updated_links batch_num total_batches batch =
-
log_verbose verbose "\nProcessing batch %d/%d (%d links)...\n"
-
(batch_num + 1) total_batches (List.length batch);
-
-
(* Process links in this batch in parallel *)
-
let results = ref [] in
-
Eio.Fiber.all (List.map (fun link ->
-
fun () ->
-
let count = upload_single_link ~sw ~env api_key base_url tag verbose updated_links link in
-
results := count :: !results
-
) batch);
-
List.rev !results
-
-
(* Helper function to update links file with new karakeep data *)
-
let update_links_file links_file original_links updated_links =
-
if !updated_links <> [] then begin
-
(* Replace the updated links in the original list *)
-
let final_links =
-
List.map (fun link ->
-
let url = link.Bushel.Link.url in
-
let updated = List.find_opt (fun ul -> ul.Bushel.Link.url = url) !updated_links in
-
match updated with
-
| Some ul -> ul
-
| None -> link
-
) original_links
-
in
-
-
(* Save the updated links file *)
-
Bushel.Link.save_links_file links_file final_links;
-
-
log "Updated %s with %d new karakeep_ids\n"
-
links_file (List.length !updated_links);
-
end
-
-
(* Upload links to Karakeep that don't already have karakeep data *)
-
let upload_to_karakeep ~sw ~env base_url profile links_file tag max_concurrent delay_seconds limit verbose =
-
let api_key = Keyeio.Profile.get_required profile ~key:"api_key" in
-
(* Load links from file *)
-
log_verbose verbose "Loading links from %s...\n" links_file;
-
let links = Bushel.Link.load_links_file links_file in
-
log_verbose verbose "Loaded %d total links\n" (List.length links);
-
-
(* Filter links that don't have karakeep data for this remote *)
-
log_verbose verbose "Filtering links that don't have karakeep data for %s...\n" base_url;
-
let filtered_links = filter_links_without_karakeep base_url links in
-
log_verbose verbose "Found %d links without karakeep data\n" (List.length filtered_links);
-
-
(* Apply limit if specified *)
-
let links_to_upload = apply_limit_to_links limit filtered_links in
-
-
if links_to_upload = [] then begin
-
log "No links to upload to %s (all links already have karakeep data)\n" base_url;
-
0
-
end else begin
-
log "Found %d links to upload to %s\n" (List.length links_to_upload) base_url;
-
-
(* Split links into batches for parallel processing *)
-
let batches = create_batches max_concurrent links_to_upload in
-
log_verbose verbose "Processing in %d batches of up to %d links each...\n"
-
(List.length batches) max_concurrent;
-
log_verbose verbose "Delay between batches: %.1f seconds\n" delay_seconds;
-
-
(* Process batches and accumulate updated links *)
-
let updated_links = ref [] in
-
-
let result = try
-
let rec process_batches total_count batch_num = function
-
| [] -> total_count
-
| batch :: rest ->
-
let results = process_batch ~sw ~env api_key base_url tag verbose updated_links
-
batch_num (List.length batches) batch in
-
-
(* Count successes in this batch *)
-
let batch_successes = List.fold_left (+) 0 results in
-
let new_total = total_count + batch_successes in
-
-
log_verbose verbose " Batch %d complete: %d/%d successful (Total: %d/%d)\n"
-
(batch_num + 1) batch_successes (List.length batch) new_total (new_total + (List.length links_to_upload - new_total));
-
-
(* Add a delay before processing the next batch *)
-
if rest <> [] then begin
-
log_verbose verbose " Waiting %.1f seconds before next batch...\n" delay_seconds;
-
Eio.Time.sleep (Eio.Stdenv.clock env) delay_seconds;
-
end;
-
process_batches new_total (batch_num + 1) rest
-
in
-
process_batches 0 0 batches
-
with exn ->
-
log "Error during upload operation: %s\n" (Printexc.to_string exn);
-
0
-
in
-
-
(* Update the links file with the new karakeep_ids *)
-
update_links_file links_file links updated_links;
-
-
log "Upload complete. %d/%d links uploaded successfully.\n"
-
result (List.length links_to_upload);
-
-
0
-
end
-
-
(* Common arguments *)
-
let links_file_arg =
-
let doc = "Links YAML file. Defaults to links.yml." in
-
Arg.(value & opt string "links.yml" & info ["file"; "f"] ~doc ~docv:"FILE")
-
-
let base_url_arg =
-
let doc = "Base URL of the Karakeep instance" in
-
let default = "https://hoard.recoil.org" in
-
Arg.(value & opt string default & info ["url"] ~doc ~docv:"URL")
-
-
let tag_arg =
-
let doc = "Tag to filter or apply to bookmarks" in
-
Arg.(value & opt string "" & info ["tag"; "t"] ~doc ~docv:"TAG")
-
-
let download_assets_arg =
-
let doc = "Download assets (screenshots, etc.) from Karakeep" in
-
Arg.(value & flag & info ["download-assets"; "d"] ~doc)
-
-
let base_dir_arg =
-
let doc = "Base directory of the Bushel project" in
-
Arg.(value & opt string "." & info ["dir"; "d"] ~doc ~docv:"DIR")
-
-
let include_domains_arg =
-
let doc = "Only include links to these domains (comma-separated list)" in
-
Arg.(value & opt (some string) None & info ["include"] ~doc ~docv:"DOMAINS")
-
-
let exclude_domains_arg =
-
let doc = "Exclude links to these domains (comma-separated list)" in
-
Arg.(value & opt (some string) None & info ["exclude"] ~doc ~docv:"DOMAINS")
-
-
let concurrent_arg =
-
let doc = "Maximum number of concurrent uploads (default: 5)" in
-
Arg.(value & opt int 5 & info ["concurrent"; "c"] ~doc ~docv:"NUM")
-
-
let delay_arg =
-
let doc = "Delay in seconds between batches (default: 1.0)" in
-
Arg.(value & opt float 1.0 & info ["delay"] ~doc ~docv:"SECONDS")
-
-
let limit_arg =
-
let doc = "Limit number of links to upload (for testing)" in
-
Arg.(value & opt (some int) None & info ["limit"; "l"] ~doc ~docv:"NUM")
-
-
let verbose_arg =
-
let doc = "Show detailed progress information during upload" in
-
Arg.(value & flag & info ["verbose"; "v"] ~doc)
-
-
(* Command definitions *)
-
let make_cmd eio_env =
-
let init_cmd =
-
let doc = "Initialize a new links.yml file" in
-
let info = Cmd.info "init" ~doc in
-
Cmd.v info Term.(const init_links_file $ links_file_arg)
-
in
-
-
let karakeep_cmd =
-
let doc = "Update links.yml with links from Karakeep" in
-
let info = Cmd.info "karakeep" ~doc in
-
let profile_term = Bushel_common.karakeepe_key_term eio_env#fs in
-
Cmd.v info Term.(const (fun base_url profile tag links_file download_assets ->
-
Eio.Switch.run @@ fun sw ->
-
update_from_karakeep ~sw ~env:eio_env base_url profile tag links_file download_assets)
-
$ base_url_arg $ profile_term $ tag_arg $ links_file_arg $ download_assets_arg)
-
in
-
-
let bushel_cmd =
-
let doc = "Update links.yml with outgoing links from Bushel entries" in
-
let info = Cmd.info "bushel" ~doc in
-
Cmd.v info Term.(const update_from_bushel $ base_dir_arg $ links_file_arg $ include_domains_arg $ exclude_domains_arg)
-
in
-
-
let upload_cmd =
-
let doc = "Upload links without karakeep data to Karakeep" in
-
let info = Cmd.info "upload" ~doc in
-
let profile_term = Bushel_common.karakeepe_key_term eio_env#fs in
-
Cmd.v info Term.(const (fun base_url profile links_file tag max_concurrent delay_seconds limit verbose ->
-
Eio.Switch.run @@ fun sw ->
-
upload_to_karakeep ~sw ~env:eio_env base_url profile links_file tag max_concurrent delay_seconds limit verbose)
-
$ base_url_arg $ profile_term $ links_file_arg $ tag_arg $ concurrent_arg $ delay_arg $ limit_arg $ verbose_arg)
-
in
-
-
(* Export the term and cmd for use in main bushel.ml *)
-
let doc = "Manage links between Bushel and Karakeep" in
-
let info = Cmd.info "links" ~doc in
-
Cmd.group info [init_cmd; karakeep_cmd; bushel_cmd; upload_cmd]
-122
stack/bushel/bin/bushel_main.ml
···
-
open Cmdliner
-
-
let version = "0.1.0"
-
-
(* Import actual command implementations from submodules *)
-
-
(* Build commands - these need Eio environment *)
-
let build_commands env =
-
(* Faces command *)
-
let faces_cmd = Bushel_faces.make_cmd env in
-
-
(* Links command - uses group structure *)
-
let links_cmd = Bushel_links.make_cmd env in
-
-
(* Obsidian command *)
-
let obsidian_cmd =
-
let doc = "Convert Bushel entries to Obsidian format" in
-
let info = Cmd.info "obsidian" ~version ~doc in
-
Cmd.v info Bushel_obsidian.term
-
in
-
-
(* Paper command *)
-
let paper_cmd =
-
let doc = "Fetch paper metadata from DOI" in
-
let info = Cmd.info "paper" ~version ~doc in
-
Cmd.v info Bushel_paper.term
-
in
-
-
(* Paper classify command *)
-
let paper_classify_cmd = Bushel_paper_classify.cmd in
-
-
(* Paper tex command *)
-
let paper_tex_cmd = Bushel_paper_tex.cmd in
-
-
(* Thumbs command *)
-
let thumbs_cmd =
-
let doc = "Generate thumbnails from paper PDFs" in
-
let info = Cmd.info "thumbs" ~version ~doc in
-
Cmd.v info Bushel_thumbs.term
-
in
-
-
(* Video command *)
-
let video_cmd =
-
let doc = "Fetch videos from PeerTube instances" in
-
let info = Cmd.info "video" ~version ~doc in
-
Cmd.v info Bushel_video.term
-
in
-
-
(* Video thumbs command *)
-
let video_thumbs_cmd = Bushel_video_thumbs.cmd in
-
-
(* Query command *)
-
let query_cmd =
-
let doc = "Query Bushel collections using multisearch" in
-
let info = Cmd.info "query" ~version ~doc in
-
Cmd.v info (Bushel_search.make_term env)
-
in
-
-
(* Bibtex command *)
-
let bibtex_cmd =
-
let doc = "Export bibtex for all papers" in
-
let info = Cmd.info "bibtex" ~version ~doc in
-
Cmd.v info Bushel_bibtex.term
-
in
-
-
(* Ideas command *)
-
let ideas_cmd = Bushel_ideas.cmd in
-
-
(* Info command *)
-
let info_cmd = Bushel_info.cmd in
-
-
(* Missing command *)
-
let missing_cmd = Bushel_missing.cmd in
-
-
(* Note DOI command *)
-
let note_doi_cmd = Bushel_note_doi.cmd in
-
-
(* DOI resolve command *)
-
let doi_cmd = Bushel_doi.cmd in
-
-
(* Main command *)
-
let doc = "Bushel content management toolkit" in
-
let sdocs = Manpage.s_common_options in
-
let man = [
-
`S Manpage.s_description;
-
`P "$(tname) is a unified command-line tool for managing various types of \
-
content in the Bushel system, including papers, videos, links, and more.";
-
`P "$(tname) provides unified access to all Bushel functionality through \
-
integrated subcommands.";
-
`S Manpage.s_commands;
-
`S Manpage.s_common_options;
-
`S "ENVIRONMENT";
-
`P "BUSHEL_CONFIG - Path to configuration file with default settings";
-
`S Manpage.s_authors;
-
`P "Anil Madhavapeddy";
-
`S Manpage.s_bugs;
-
`P "Report bugs at https://github.com/avsm/bushel/issues";
-
] in
-
let info = Cmd.info "bushel" ~version ~doc ~sdocs ~man in
-
Cmd.group info [
-
bibtex_cmd;
-
doi_cmd;
-
faces_cmd;
-
ideas_cmd;
-
info_cmd;
-
links_cmd;
-
missing_cmd;
-
note_doi_cmd;
-
obsidian_cmd;
-
paper_cmd;
-
paper_classify_cmd;
-
paper_tex_cmd;
-
query_cmd;
-
thumbs_cmd;
-
video_cmd;
-
video_thumbs_cmd;
-
]
-
-
let () =
-
Eio_main.run @@ fun env ->
-
let bushel_cmd = build_commands env in
-
exit (Cmd.eval' bushel_cmd)
-185
stack/bushel/bin/bushel_missing.ml
···
-
open Cmdliner
-
open Bushel
-
-
(** Check if an entry has a thumbnail *)
-
let has_thumbnail entries entry =
-
match Entry.thumbnail_slug entries entry with
-
| Some _ -> true
-
| None -> false
-
-
(** Check if an entry has a synopsis or description *)
-
let has_synopsis = function
-
| `Paper p -> Paper.abstract p <> "" (* Papers have abstracts *)
-
| `Note n -> Note.synopsis n <> None (* Notes have optional synopsis *)
-
| `Idea _ -> true (* Ideas don't have synopsis field *)
-
| `Project _ -> true (* Projects don't have synopsis field *)
-
| `Video _ -> true (* Videos don't have synopsis field *)
-
-
(** Check if an entry has tags *)
-
let has_tags = function
-
| `Paper p -> Paper.tags p <> []
-
| `Note n -> Note.tags n <> []
-
| `Idea i -> i.Idea.tags <> [] (* Access record field directly *)
-
| `Project p -> Project.tags p <> []
-
| `Video v -> v.Video.tags <> [] (* Access record field directly *)
-
-
(** Entry with broken references *)
-
type entry_with_broken_refs = {
-
entry : Entry.entry;
-
broken_slugs : string list;
-
broken_contacts : string list;
-
}
-
-
(** Find entries missing thumbnails *)
-
let find_missing_thumbnails entries =
-
let all = Entry.all_entries entries in
-
List.filter (fun entry -> not (has_thumbnail entries entry)) all
-
-
(** Find entries missing synopsis *)
-
let find_missing_synopsis entries =
-
let all = Entry.all_entries entries in
-
List.filter (fun entry -> not (has_synopsis entry)) all
-
-
(** Find entries missing tags *)
-
let find_missing_tags entries =
-
let all = Entry.all_entries entries in
-
List.filter (fun entry -> not (has_tags entry)) all
-
-
(** Find entries with broken slugs or contact handles *)
-
let find_broken_references entries =
-
let all = Entry.all_entries entries in
-
List.filter_map (fun entry ->
-
let body = Entry.body entry in
-
let broken_slugs, broken_contacts = Md.validate_references entries body in
-
if broken_slugs <> [] || broken_contacts <> [] then
-
Some { entry; broken_slugs; broken_contacts }
-
else
-
None
-
) all
-
-
(** Print a list of entries *)
-
let print_entries title entries_list =
-
if entries_list <> [] then begin
-
Fmt.pr "@.%a (%d):@," (Fmt.styled `Bold Fmt.string) title (List.length entries_list);
-
List.iter (fun entry ->
-
let slug = Entry.slug entry in
-
let type_str = Entry.to_type_string entry in
-
let title = Entry.title entry in
-
Fmt.pr " %a %a - %a@,"
-
(Fmt.styled `Cyan Fmt.string) slug
-
(Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str)
-
Fmt.string title
-
) entries_list
-
end
-
-
(** Print entries with broken references *)
-
let print_broken_references title entries_with_broken_refs =
-
if entries_with_broken_refs <> [] then begin
-
Fmt.pr "@.%a (%d):@," (Fmt.styled `Bold Fmt.string) title (List.length entries_with_broken_refs);
-
List.iter (fun { entry; broken_slugs; broken_contacts } ->
-
let slug = Entry.slug entry in
-
let type_str = Entry.to_type_string entry in
-
let entry_title = Entry.title entry in
-
Fmt.pr " %a %a - %a@,"
-
(Fmt.styled `Cyan Fmt.string) slug
-
(Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str)
-
Fmt.string entry_title;
-
if broken_slugs <> [] then
-
Fmt.pr " %a %a@,"
-
(Fmt.styled `Red Fmt.string) "Broken slugs:"
-
(Fmt.list ~sep:Fmt.comma Fmt.string) broken_slugs;
-
if broken_contacts <> [] then
-
Fmt.pr " %a %a@,"
-
(Fmt.styled `Red Fmt.string) "Broken contacts:"
-
(Fmt.list ~sep:Fmt.comma Fmt.string) broken_contacts;
-
) entries_with_broken_refs
-
end
-
-
(** Main missing command implementation *)
-
let missing_cmd () base_dir check_thumbnails check_synopsis check_tags check_refs =
-
let entries = load base_dir in
-
-
let count = ref 0 in
-
-
if check_thumbnails then begin
-
let missing = find_missing_thumbnails entries in
-
print_entries "Entries missing thumbnails" missing;
-
count := !count + List.length missing
-
end;
-
-
if check_synopsis then begin
-
let missing = find_missing_synopsis entries in
-
print_entries "Entries missing synopsis" missing;
-
count := !count + List.length missing
-
end;
-
-
if check_tags then begin
-
let missing = find_missing_tags entries in
-
print_entries "Entries missing tags" missing;
-
count := !count + List.length missing
-
end;
-
-
if check_refs then begin
-
let broken = find_broken_references entries in
-
print_broken_references "Entries with broken references" broken;
-
(* Count total number of broken references, not just entries *)
-
let broken_count = List.fold_left (fun acc { broken_slugs; broken_contacts; _ } ->
-
acc + List.length broken_slugs + List.length broken_contacts
-
) 0 broken in
-
count := !count + broken_count
-
end;
-
-
if !count = 0 then
-
Fmt.pr "@.No missing metadata or broken references found.@."
-
else
-
Fmt.pr "@.Total issues found: %d@." !count;
-
-
0
-
-
(** Command line arguments *)
-
let thumbnails_flag =
-
let doc = "Check for entries missing thumbnails" in
-
Arg.(value & flag & info ["thumbnails"; "t"] ~doc)
-
-
let synopsis_flag =
-
let doc = "Check for entries missing synopsis" in
-
Arg.(value & flag & info ["synopsis"; "s"] ~doc)
-
-
let tags_flag =
-
let doc = "Check for entries missing tags" in
-
Arg.(value & flag & info ["tags"; "g"] ~doc)
-
-
let refs_flag =
-
let doc = "Check for broken slugs and contact handles" in
-
Arg.(value & flag & info ["refs"; "r"] ~doc)
-
-
let term =
-
Term.(const (fun setup base thumbnails synopsis tags refs ->
-
(* If no flags specified, check everything *)
-
let check_all = not (thumbnails || synopsis || tags || refs) in
-
missing_cmd setup base
-
(check_all || thumbnails)
-
(check_all || synopsis)
-
(check_all || tags)
-
(check_all || refs)
-
) $ Bushel_common.setup_term $ Bushel_common.base_dir $ thumbnails_flag $ synopsis_flag $ tags_flag $ refs_flag)
-
-
let cmd =
-
let doc = "List entries with missing metadata or broken references" in
-
let man = [
-
`S Manpage.s_description;
-
`P "This command scans all entries and reports any that are missing thumbnails, synopsis, tags, or have broken slugs/contact handles.";
-
`P "By default, all checks are performed. Use flags to select specific checks.";
-
`S Manpage.s_options;
-
`S Manpage.s_examples;
-
`P "Check for all issues:";
-
`Pre " $(mname) $(tname)";
-
`P "Check only for missing thumbnails:";
-
`Pre " $(mname) $(tname) --thumbnails";
-
`P "Check for missing synopsis and tags:";
-
`Pre " $(mname) $(tname) --synopsis --tags";
-
`P "Check only for broken references:";
-
`Pre " $(mname) $(tname) --refs";
-
] in
-
let info = Cmd.info "missing" ~doc ~man in
-
Cmd.v info term
-131
stack/bushel/bin/bushel_note_doi.ml
···
-
open Cmdliner
-
open Bushel
-
-
(** Generate a roguedoi identifier using Crockford base32 encoding *)
-
let generate_roguedoi () =
-
Random.self_init ();
-
(* Generate a 10-character roguedoi with checksum and split every 5 chars *)
-
let id = Crockford.generate ~length:10 ~split_every:5 ~checksum:true () in
-
Printf.sprintf "10.59999/%s" id
-
-
(** Add DOI to a specific note's frontmatter if it doesn't already have one *)
-
let add_doi_to_note note_path =
-
let content = In_channel.with_open_bin note_path In_channel.input_all in
-
(* Check if note already has a doi: field *)
-
let has_doi = try
-
let _ = String.index content 'd' in
-
let re = Str.regexp "^doi:" in
-
let lines = String.split_on_char '\n' content in
-
List.exists (fun line -> Str.string_match re (String.trim line) 0) lines
-
with Not_found -> false
-
in
-
if has_doi then begin
-
Fmt.pr "%a: Note already has a DOI, skipping@."
-
(Fmt.styled `Yellow Fmt.string) note_path;
-
false
-
end else begin
-
let roguedoi = generate_roguedoi () in
-
(* Parse the file to extract frontmatter *)
-
match String.split_on_char '\n' content with
-
| "---" :: rest ->
-
(* Find the end of frontmatter *)
-
let rec find_end_fm acc = function
-
| [] -> None
-
| "---" :: body_lines -> Some (List.rev acc, body_lines)
-
| line :: lines -> find_end_fm (line :: acc) lines
-
in
-
(match find_end_fm [] rest with
-
| Some (fm_lines, body_lines) ->
-
(* Add doi field to frontmatter *)
-
let new_fm = fm_lines @ [Printf.sprintf "doi: %s" roguedoi] in
-
let new_content =
-
String.concat "\n" (["---"] @ new_fm @ ["---"] @ body_lines)
-
in
-
Out_channel.with_open_bin note_path (fun oc ->
-
Out_channel.output_string oc new_content
-
);
-
Fmt.pr "%a: Added DOI %a@."
-
(Fmt.styled `Green Fmt.string) note_path
-
(Fmt.styled `Cyan Fmt.string) roguedoi;
-
true
-
| None ->
-
Fmt.epr "%a: Could not parse frontmatter@."
-
(Fmt.styled `Red Fmt.string) note_path;
-
false)
-
| _ ->
-
Fmt.epr "%a: No frontmatter found@."
-
(Fmt.styled `Red Fmt.string) note_path;
-
false
-
end
-
-
(** Main command implementation *)
-
let note_doi_cmd () base_dir dry_run =
-
let entries = load base_dir in
-
let notes = Entry.notes entries in
-
-
(* Filter for perma notes without DOI *)
-
let perma_notes = List.filter (fun n ->
-
Note.perma n && Option.is_none (Note.doi n)
-
) notes in
-
-
if perma_notes = [] then begin
-
Fmt.pr "No permanent notes without DOI found.@.";
-
0
-
end else begin
-
Fmt.pr "@[<v>";
-
Fmt.pr "%a: Found %d permanent notes without DOI@.@."
-
(Fmt.styled `Bold Fmt.string) "Info"
-
(List.length perma_notes);
-
-
let count = ref 0 in
-
List.iter (fun note ->
-
let slug = Note.slug note in
-
let note_path = Printf.sprintf "%s/data/notes/%s.md" base_dir slug in
-
Fmt.pr "Processing %a (%a)...@,"
-
(Fmt.styled `Cyan Fmt.string) slug
-
(Fmt.styled `Faint Fmt.string) (Note.title note);
-
-
if not dry_run then begin
-
if add_doi_to_note note_path then
-
incr count
-
end else begin
-
let roguedoi = generate_roguedoi () in
-
Fmt.pr " Would add DOI: %a@,"
-
(Fmt.styled `Cyan Fmt.string) roguedoi;
-
incr count
-
end
-
) perma_notes;
-
-
Fmt.pr "@.";
-
if dry_run then
-
Fmt.pr "%a: Would add DOI to %d notes (dry run)@."
-
(Fmt.styled `Bold Fmt.string) "Summary"
-
!count
-
else
-
Fmt.pr "%a: Added DOI to %d notes@."
-
(Fmt.styled `Bold Fmt.string) "Summary"
-
!count;
-
Fmt.pr "@]@.";
-
0
-
end
-
-
(** Command line interface definition *)
-
let dry_run_flag =
-
let doc = "Show what would be done without making changes" in
-
Arg.(value & flag & info ["n"; "dry-run"] ~doc)
-
-
let term =
-
Term.(const note_doi_cmd $ Bushel_common.setup_term $ Bushel_common.base_dir $ dry_run_flag)
-
-
let cmd =
-
let doc = "Generate and add DOI identifiers to permanent notes" in
-
let man = [
-
`S Manpage.s_description;
-
`P "This command generates roguedoi identifiers using Crockford base32 encoding \
-
and adds them to the frontmatter of permanent notes (notes with perma: true) \
-
that don't already have a DOI.";
-
`P "Roguedoi format: 10.59999/xxxxx-xxxxx where x is a Crockford base32 character.";
-
`S Manpage.s_options;
-
] in
-
let info = Cmd.info "note-doi" ~doc ~man in
-
Cmd.v info term
-88
stack/bushel/bin/bushel_obsidian.ml
···
-
open Bushel
-
-
let obsidian_links =
-
let inline c = function
-
| Md.Obsidian_link l ->
-
Cmarkit_renderer.Context.string c l;
-
true
-
| _ -> false
-
in
-
Cmarkit_renderer.make ~inline ()
-
;;
-
-
let obsidian_of_doc doc =
-
let default = Cmarkit_commonmark.renderer () in
-
let r = Cmarkit_renderer.compose default obsidian_links in
-
Cmarkit_renderer.doc_to_string r doc
-
;;
-
-
let md_to_obsidian entries md =
-
let open Cmarkit in
-
Doc.of_string ~strict:false ~resolver:Md.with_bushel_links md
-
|> Mapper.map_doc (Mapper.make ~inline:(Md.bushel_inline_mapper_to_obsidian entries) ())
-
|> obsidian_of_doc
-
;;
-
-
let obsidian_output base output_dir =
-
let e = load base in
-
let all = Entry.all_entries e @ Entry.all_papers e in
-
List.iter
-
(fun ent ->
-
let slug =
-
match ent with
-
| `Paper { Paper.latest; slug; ver; _ } when not latest ->
-
Printf.sprintf "%s-%s" slug ver
-
| _ -> Entry.slug ent
-
in
-
let fname = Filename.concat output_dir (slug ^ ".md") in
-
let tags =
-
Tags.tags_of_ent e ent
-
|> List.filter_map (fun tag ->
-
match tag with
-
| `Slug _ -> None
-
| `Set s -> Some (Printf.sprintf "\"#%s\"" s)
-
| `Text s -> Some (Printf.sprintf "%s" s)
-
| `Contact _ -> None
-
| `Year y -> Some (Printf.sprintf "\"#y%d\"" y))
-
|> List.map (fun s -> "- " ^ s)
-
|> String.concat "\n"
-
in
-
let links =
-
Tags.tags_of_ent e ent
-
|> List.filter_map (fun tag ->
-
match tag with
-
| `Slug s when s <> slug -> Some (Printf.sprintf "- \"[[%s]]\"" s)
-
| `Contact c -> Some (Printf.sprintf "- \"[[@%s]]\"" c)
-
| _ -> None)
-
|> String.concat "\n"
-
|> function
-
| "" -> ""
-
| s -> "linklist:\n" ^ s ^ "\n"
-
in
-
let body = Entry.body ent |> md_to_obsidian e in
-
let buf = Printf.sprintf "---\ntags:\n%s\n%s---\n\n%s" tags links body in
-
Out_channel.with_open_bin fname (fun oc -> output_string oc buf))
-
all;
-
List.iter
-
(fun contact ->
-
let slug = Contact.handle contact in
-
let fname = Filename.concat output_dir ("@" ^ slug ^ ".md") in
-
let buf = String.concat "\n" (Contact.names contact) in
-
Out_channel.with_open_bin fname (fun oc -> output_string oc buf))
-
(Entry.contacts e)
-
;;
-
-
(* Export the term for use in main bushel.ml *)
-
let term =
-
Cmdliner.Term.(
-
const (fun base_dir output_dir -> obsidian_output base_dir output_dir; 0) $
-
Bushel_common.base_dir $
-
Bushel_common.output_dir ~default:"obsidian"
-
)
-
-
let cmd =
-
let doc = "Generate Obsidian-compatible markdown files" in
-
let info = Cmdliner.Cmd.info "obsidian" ~doc in
-
Cmdliner.Cmd.v info term
-
-
(* Main entry point removed - accessed through bushel_main.ml *)
-76
stack/bushel/bin/bushel_paper.ml
···
-
module ZT = Zotero_translation
-
open Printf
-
module J = Ezjsonm
-
open Cmdliner
-
-
-
let _authors b j =
-
let keys = J.get_dict j in
-
let authors = J.get_list J.get_string (List.assoc "author" keys) in
-
let a =
-
List.fold_left (fun acc a ->
-
match Bushel.Entry.lookup_by_name b a with
-
| Some c -> `String ("@" ^ (Bushel.Contact.handle c)) :: acc
-
| None -> failwith (sprintf "author %s not found" a)
-
) [] authors
-
in
-
J.update j ["author"] (Some (`A a))
-
-
let of_doi zt ~base_dir ~slug ~version doi =
-
let j = ZT.json_of_doi zt ~slug doi in
-
let papers_dir = Printf.sprintf "%s/papers/%s" base_dir slug in
-
(* Ensure papers directory exists *)
-
(try Unix.mkdir papers_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
-
-
(* Extract abstract from JSON data *)
-
let abstract = try
-
let keys = Ezjsonm.get_dict (j :> Ezjsonm.value) in
-
match List.assoc_opt "abstract" keys with
-
| Some abstract_json -> Some (Ezjsonm.get_string abstract_json)
-
| None -> None
-
with _ -> None in
-
-
(* Remove abstract from frontmatter - it goes in body *)
-
let keys = Ezjsonm.get_dict (j :> Ezjsonm.value) in
-
let filtered_keys = List.filter (fun (k, _) -> k <> "abstract") keys in
-
let json_without_abstract = `O filtered_keys in
-
-
(* Use library function to generate YAML with abstract in body *)
-
let content = Bushel.Paper.to_yaml ?abstract ~ver:version json_without_abstract in
-
-
let filename = Printf.sprintf "%s.md" version in
-
let filepath = Filename.concat papers_dir filename in
-
let oc = open_out filepath in
-
output_string oc content;
-
close_out oc;
-
Printf.printf "Created paper file: %s\n" filepath
-
-
let slug_arg =
-
let doc = "Slug for the entry." in
-
Arg.(required & pos 0 (some string) None & info [] ~docv:"SLUG" ~doc)
-
-
let version_arg =
-
let doc = "Version of the entry." in
-
Arg.(required & pos 1 (some string) None & info [] ~docv:"VERSION" ~doc)
-
-
let doi_arg =
-
let doc = "DOI of the entry." in
-
Arg.(required & pos 2 (some string) None & info [] ~docv:"DOI" ~doc)
-
-
(* Export the term for use in main bushel.ml *)
-
let term =
-
Term.(const (fun base slug version doi ->
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
(* Create Zotero Translation client with connection pooling *)
-
let zt = ZT.create ~sw ~env "http://svr-avsm2-eeg-ce:1969" in
-
of_doi zt ~base_dir:base ~slug ~version doi;
-
0
-
) $ Bushel_common.base_dir $ slug_arg $ version_arg $ doi_arg)
-
-
let cmd =
-
let doc = "Generate paper entry from DOI" in
-
let info = Cmd.info "paper" ~doc in
-
Cmd.v info term
-
-
(* Main entry point removed - accessed through bushel_main.ml *)
-57
stack/bushel/bin/bushel_paper_classify.ml
···
-
open Cmdliner
-
-
(** TODO:claude Classify papers based on heuristics and update metadata *)
-
let classify_papers base_dir overwrite =
-
let papers_dir = Printf.sprintf "%s/papers" base_dir in
-
if not (Sys.file_exists papers_dir) then (
-
Printf.eprintf "Papers directory not found: %s\n" papers_dir;
-
1
-
) else (
-
let paper_dirs = Sys.readdir papers_dir |> Array.to_list in
-
List.iter (fun paper_slug ->
-
let paper_path = Filename.concat papers_dir paper_slug in
-
if Sys.is_directory paper_path then (
-
let versions = Sys.readdir paper_path |> Array.to_list
-
|> List.filter (String.ends_with ~suffix:".md") in
-
List.iter (fun version_file ->
-
let filepath = Filename.concat paper_path version_file in
-
let version = Filename.remove_extension version_file in
-
try
-
let paper = Bushel.Paper.of_md ~slug:paper_slug ~ver:version filepath in
-
let predicted_class = Bushel.Paper.classification paper in
-
let class_str = Bushel.Paper.string_of_classification predicted_class in
-
Printf.printf "%s/%s: %s\n" paper_slug version class_str;
-
-
(* Update the file if overwrite is enabled *)
-
if overwrite then (
-
let json_data = Bushel.Paper.raw_json paper in
-
let keys = Ezjsonm.get_dict json_data in
-
let updated_keys = ("classification", `String class_str) ::
-
(List.filter (fun (k, _) -> k <> "classification") keys) in
-
let updated_json = `O updated_keys in
-
let abstract = Some (Bushel.Paper.abstract paper) in
-
let content = Bushel.Paper.to_yaml ?abstract ~ver:version updated_json in
-
let oc = open_out filepath in
-
output_string oc content;
-
close_out oc;
-
Printf.printf " Updated %s\n" filepath
-
)
-
with e ->
-
Printf.eprintf "Error processing %s: %s\n" filepath (Printexc.to_string e)
-
) versions
-
)
-
) paper_dirs;
-
0
-
)
-
-
let overwrite_flag =
-
let doc = "Update paper files with classification metadata" in
-
Arg.(value & flag & info ["overwrite"] ~doc)
-
-
let term =
-
Term.(const classify_papers $ Bushel_common.base_dir $ overwrite_flag)
-
-
let cmd =
-
let doc = "Classify papers as full/short/preprint" in
-
let info = Cmd.info "paper-classify" ~doc in
-
Cmd.v info term
-325
stack/bushel/bin/bushel_paper_tex.ml
···
-
open Printf
-
open Cmdliner
-
-
(** TODO:claude Format author name for LaTeX with initials and full last name *)
-
let format_author_name author =
-
(* Split author name and convert to "F.M.~Lastname" format *)
-
let parts = String.split_on_char ' ' author |> List.filter (fun s -> s <> "") in
-
match List.rev parts with
-
| [] -> ""
-
| lastname :: rest_rev ->
-
let firstname_parts = List.rev rest_rev in
-
let initials = List.map (fun name ->
-
if String.length name > 0 then String.sub name 0 1 ^ "." else ""
-
) firstname_parts in
-
let initials_str = String.concat "" initials in
-
if initials_str = "" then lastname
-
else initials_str ^ "~" ^ lastname
-
-
(** TODO:claude Format author name for LaTeX with underline for target author *)
-
let format_author target_name author =
-
let formatted = format_author_name author in
-
(* Check if author contains target name substring for underlining *)
-
if String.lowercase_ascii author |> fun s ->
-
Re.execp (Re.Perl.compile_pat ~opts:[`Caseless] target_name) s
-
then sprintf "\\underline{%s}" formatted
-
else formatted
-
-
(** TODO:claude Format authors list for LaTeX *)
-
let format_authors target_name authors =
-
match authors with
-
| [] -> ""
-
| [single] -> format_author target_name single
-
| _ ->
-
let formatted = List.map (format_author target_name) authors in
-
String.concat ", " formatted
-
-
(** TODO:claude Escape special LaTeX characters *)
-
let escape_latex str =
-
let replacements = [
-
("&", "\\&");
-
("%", "\\%");
-
("$", "\\$");
-
("#", "\\#");
-
("_", "\\_");
-
("{", "\\{");
-
("}", "\\}");
-
("~", "\\textasciitilde{}");
-
("^", "\\textasciicircum{}");
-
] in
-
List.fold_left (fun s (from, to_) ->
-
Re.replace_string (Re.compile (Re.str from)) ~by:to_ s
-
) str replacements
-
-
(** TODO:claude Clean venue name by removing common prefixes and handling arXiv *)
-
let clean_venue_name venue =
-
(* Special handling for arXiv to avoid redundancy like "arXiv (arXiv:ID)" *)
-
let venue_lower = String.lowercase_ascii venue in
-
if Re.execp (Re.Perl.compile_pat ~opts:[`Caseless] "arxiv") venue_lower then
-
if String.contains venue ':' then
-
(* If it contains arXiv:ID format, just return the ID part *)
-
let parts = String.split_on_char ':' venue in
-
match parts with
-
| _ :: id :: _ -> String.trim id
-
| _ -> venue
-
else venue
-
else
-
let prefixes = [
-
"in proceedings of the ";
-
"proceedings of the ";
-
"in proceedings of ";
-
"proceedings of ";
-
"in the ";
-
"the ";
-
] in
-
let rec remove_prefixes v = function
-
| [] -> v
-
| prefix :: rest ->
-
if String.length v >= String.length prefix &&
-
String.sub (String.lowercase_ascii v) 0 (String.length prefix) = prefix
-
then String.sub v (String.length prefix) (String.length v - String.length prefix)
-
else remove_prefixes v rest
-
in
-
let cleaned = remove_prefixes venue prefixes in
-
(* Capitalize first letter *)
-
if String.length cleaned > 0 then
-
String.mapi (fun i c -> if i = 0 then Char.uppercase_ascii c else c) cleaned
-
else cleaned
-
-
(** TODO:claude Format venue for LaTeX with volume/number details for full papers *)
-
let format_venue paper =
-
let open Bushel.Paper in
-
let classification = classification paper in
-
match bibtype paper with
-
| "article" ->
-
let journal_name = try journal paper |> clean_venue_name |> escape_latex with _ -> "Journal" in
-
if classification = Full then (
-
let vol_info =
-
let vol = volume paper in
-
let num = issue paper in
-
match vol, num with
-
| Some v, Some n -> sprintf ", %s(%s)" v n
-
| Some v, None -> sprintf ", vol. %s" v
-
| None, Some n -> sprintf ", no. %s" n
-
| None, None -> ""
-
in
-
sprintf "\\textit{%s%s}" journal_name vol_info
-
) else
-
sprintf "\\textit{%s}" journal_name
-
| "inproceedings" ->
-
let conf_name = try booktitle paper |> clean_venue_name |> escape_latex with _ -> "Conference" in
-
sprintf "\\textit{%s}" conf_name
-
| "techreport" ->
-
let inst = try institution paper |> escape_latex with _ -> "Institution" in
-
sprintf "\\textit{Technical Report, %s}" inst
-
| "phdthesis" ->
-
let school = try institution paper |> escape_latex with _ -> "University" in
-
sprintf "\\textit{PhD thesis, %s}" school
-
| "mastersthesis" ->
-
let school = try institution paper |> escape_latex with _ -> "University" in
-
sprintf "\\textit{Master's thesis, %s}" school
-
| "book" ->
-
let publisher_str = try Bushel.Paper.publisher paper |> escape_latex with _ -> "" in
-
let edition_str = try
-
let json = Bushel.Paper.raw_json paper in
-
let keys = Ezjsonm.get_dict json in
-
List.assoc "edition" keys |> Ezjsonm.get_string |> escape_latex
-
with _ -> "" in
-
let isbn_str = try Bushel.Paper.isbn paper |> escape_latex with _ -> "" in
-
let venue_info =
-
let base = match publisher_str, edition_str with
-
| pub, ed when pub <> "" && ed <> "" -> sprintf "%s, %s edition" pub ed
-
| pub, _ when pub <> "" -> pub
-
| _, ed when ed <> "" -> sprintf "%s edition" ed
-
| _, _ -> "Book"
-
in
-
if isbn_str <> "" then
-
sprintf "%s, ISBN %s" base isbn_str
-
else
-
base
-
in
-
sprintf "\\textit{%s}" venue_info
-
| "misc" ->
-
(* Try to get meaningful venue info for misc entries *)
-
let journal_str = try Bushel.Paper.journal paper |> clean_venue_name |> escape_latex with _ -> "" in
-
let booktitle_str = try Bushel.Paper.booktitle paper |> clean_venue_name |> escape_latex with _ -> "" in
-
let publisher_str = try Bushel.Paper.publisher paper |> escape_latex with _ -> "" in
-
if journal_str <> "" then
-
sprintf "\\textit{%s}" journal_str
-
else if booktitle_str <> "" then
-
sprintf "\\textit{%s}" booktitle_str
-
else if publisher_str <> "" then
-
sprintf "\\textit{%s}" publisher_str
-
else
-
sprintf "\\textit{Preprint}"
-
| "abstract" ->
-
(* Handle conference abstracts *)
-
let conf_name = try Bushel.Paper.booktitle paper |> clean_venue_name |> escape_latex with _ -> "" in
-
let journal_str = try Bushel.Paper.journal paper |> clean_venue_name |> escape_latex with _ -> "" in
-
if conf_name <> "" then
-
sprintf "\\textit{%s (Abstract)}" conf_name
-
else if journal_str <> "" then
-
sprintf "\\textit{%s (Abstract)}" journal_str
-
else
-
sprintf "\\textit{Conference Abstract}"
-
| _ ->
-
(* Fallback for other types with special arXiv handling *)
-
let journal_str = try Bushel.Paper.journal paper with _ -> "" in
-
let publisher_str = try Bushel.Paper.publisher paper |> escape_latex with _ -> "" in
-
-
(* Special handling for arXiv papers - skip venue, let note handle it *)
-
if String.lowercase_ascii journal_str = "arxiv" then
-
""
-
else if journal_str <> "" then
-
sprintf "\\textit{%s}" (journal_str |> clean_venue_name |> escape_latex)
-
else if publisher_str <> "" then
-
sprintf "\\textit{%s}" publisher_str
-
else
-
sprintf "\\textit{Preprint}"
-
-
(** TODO:claude Generate LaTeX PubItem for a paper *)
-
let generate_latex_entry target_name paper =
-
let open Bushel.Paper in
-
let slug_str = slug paper in
-
let title_str = title paper |> escape_latex in
-
let authors_str = format_authors target_name (authors paper) in
-
let venue_str = format_venue paper in
-
let year_str = year paper |> string_of_int in
-
let month_str =
-
let (_, m, _) = date paper in
-
sprintf "%02d" m
-
in
-
-
(* Check if paper is in the future *)
-
let is_in_press =
-
let paper_time = datetime paper in
-
let now = Ptime_clock.now () in
-
Ptime.compare paper_time now > 0
-
in
-
-
(* Add DOI or PDF link if available, but not for in-press papers unless they have explicit URL *)
-
let title_with_link =
-
if is_in_press then
-
(* For in-press papers, only add link if there's an explicit URL field *)
-
match Bushel.Paper.url paper with
-
| Some u -> sprintf "\\href{%s}{%s}" u title_str
-
| None -> title_str (* No link for in-press papers without explicit URL *)
-
else
-
(* For published papers, use DOI or URL or default PDF link *)
-
match Bushel.Paper.doi paper with
-
| Some doi -> sprintf "\\href{https://doi.org/%s}{%s}" doi title_str
-
| None ->
-
(* Check if there's a URL, otherwise default to PDF link *)
-
let url = match Bushel.Paper.url paper with
-
| Some u -> u
-
| None -> sprintf "https://anil.recoil.org/papers/%s.pdf" slug_str
-
in
-
sprintf "\\href{%s}{%s}" url title_str
-
in
-
-
(* Add "(in press)" if paper is in the future *)
-
let in_press_str = if is_in_press then " \\textit{(in press)}" else "" in
-
-
(* Add note if present *)
-
let note_str = match Bushel.Paper.note paper with
-
| Some n -> sprintf " \\textit{(%s)}" (escape_latex n)
-
| None -> ""
-
in
-
-
sprintf "\\BigGap\n\\PubItemLabeled{%s}\n{``%s,''\n%s,\n%s%s%s,\n\\DatestampYM{%s}{%s}.}\n"
-
slug_str title_with_link authors_str venue_str in_press_str note_str year_str month_str
-
-
(** TODO:claude Generate LaTeX output files for papers *)
-
let generate_tex base_dir output_dir target_name =
-
try
-
let papers = Bushel.load_papers base_dir in
-
let latest_papers = List.filter (fun p -> p.Bushel.Paper.latest) papers in
-
-
(* Extract selected papers first *)
-
let selected_papers = List.filter Bushel.Paper.selected latest_papers in
-
-
(* Group remaining papers by classification, excluding selected ones *)
-
let non_selected_papers = List.filter (fun p -> not (Bushel.Paper.selected p)) latest_papers in
-
let full_papers = List.filter (fun p ->
-
Bushel.Paper.classification p = Bushel.Paper.Full) non_selected_papers in
-
let short_papers = List.filter (fun p ->
-
Bushel.Paper.classification p = Bushel.Paper.Short) non_selected_papers in
-
let preprint_papers = List.filter (fun p ->
-
Bushel.Paper.classification p = Bushel.Paper.Preprint) non_selected_papers in
-
-
(* Sort each group by date, newest first *)
-
let sorted_full = List.sort Bushel.Paper.compare full_papers in
-
let sorted_short = List.sort Bushel.Paper.compare short_papers in
-
let sorted_preprint = List.sort Bushel.Paper.compare preprint_papers in
-
let sorted_selected = List.sort Bushel.Paper.compare selected_papers in
-
-
(* Ensure output directory exists *)
-
(try Unix.mkdir output_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
-
-
(* Write papers_full.tex *)
-
let oc_full = open_out (Filename.concat output_dir "papers_full.tex") in
-
List.iter (fun paper ->
-
let latex = generate_latex_entry target_name paper in
-
output_string oc_full latex;
-
output_char oc_full '\n'
-
) sorted_full;
-
close_out oc_full;
-
Printf.printf "Generated %s/papers_full.tex with %d entries\n" output_dir (List.length sorted_full);
-
-
(* Write papers_short.tex *)
-
let oc_short = open_out (Filename.concat output_dir "papers_short.tex") in
-
List.iter (fun paper ->
-
let latex = generate_latex_entry target_name paper in
-
output_string oc_short latex;
-
output_char oc_short '\n'
-
) sorted_short;
-
close_out oc_short;
-
Printf.printf "Generated %s/papers_short.tex with %d entries\n" output_dir (List.length sorted_short);
-
-
(* Write papers_preprint.tex *)
-
let oc_preprint = open_out (Filename.concat output_dir "papers_preprint.tex") in
-
List.iter (fun paper ->
-
let latex = generate_latex_entry target_name paper in
-
output_string oc_preprint latex;
-
output_char oc_preprint '\n'
-
) sorted_preprint;
-
close_out oc_preprint;
-
Printf.printf "Generated %s/papers_preprint.tex with %d entries\n" output_dir (List.length sorted_preprint);
-
-
(* Write papers_selected.tex *)
-
let oc_selected = open_out (Filename.concat output_dir "papers_selected.tex") in
-
List.iter (fun paper ->
-
let latex = generate_latex_entry target_name paper in
-
output_string oc_selected latex;
-
output_char oc_selected '\n'
-
) sorted_selected;
-
close_out oc_selected;
-
Printf.printf "Generated %s/papers_selected.tex with %d entries\n" output_dir (List.length sorted_selected);
-
-
(* Write paper_count.tex *)
-
let total_count = List.length latest_papers in
-
let oc_count = open_out (Filename.concat output_dir "paper_count.tex") in
-
output_string oc_count (sprintf "\\setcounter{pubcounter}{%d}\n" total_count);
-
close_out oc_count;
-
Printf.printf "Generated %s/paper_count.tex with total count: %d\n" output_dir total_count;
-
-
0
-
with e ->
-
Printf.eprintf "Error loading papers: %s\n" (Printexc.to_string e);
-
1
-
-
let output_dir_arg =
-
let doc = "Output directory for generated LaTeX files" in
-
Arg.(value & opt string "." & info ["output"; "o"] ~docv:"DIR" ~doc)
-
-
let target_name_arg =
-
let doc = "Name to underline in author list (e.g., 'Madhavapeddy')" in
-
Arg.(value & opt string "Madhavapeddy" & info ["target"; "t"] ~docv:"NAME" ~doc)
-
-
let term =
-
Term.(const generate_tex $ Bushel_common.base_dir $ output_dir_arg $ target_name_arg)
-
-
let cmd =
-
let doc = "Generate LaTeX publication entries" in
-
let info = Cmd.info "paper-tex" ~doc in
-
Cmd.v info term
-69
stack/bushel/bin/bushel_search.ml
···
-
open Cmdliner
-
-
(** TODO:claude Bushel search command for integration with main CLI *)
-
-
let endpoint_override =
-
let doc = "Override Typesense server endpoint URL" in
-
Arg.(value & opt (some string) None & info ["endpoint"; "e"] ~doc)
-
-
let limit =
-
let doc = "Maximum number of results to return" in
-
Arg.(value & opt int 50 & info ["limit"; "l"] ~doc)
-
-
let offset =
-
let doc = "Number of results to skip (for pagination)" in
-
Arg.(value & opt int 0 & info ["offset"; "o"] ~doc)
-
-
let query_text =
-
let doc = "Search query text" in
-
Arg.(required & pos 0 (some string) None & info [] ~docv:"QUERY" ~doc)
-
-
(** TODO:claude Search function using multisearch *)
-
let search env profile endpoint_override query_text limit offset =
-
(* Get credentials from keyeio profile *)
-
let api_key = Keyeio.Profile.get_required profile ~key:"api_key" in
-
let default_endpoint = "http://localhost:8108" in
-
let endpoint =
-
match endpoint_override with
-
| Some e -> e
-
| None ->
-
Keyeio.Profile.get profile ~key:"endpoint"
-
|> Option.value ~default:default_endpoint
-
in
-
let openai_key = Keyeio.Profile.get_required profile ~key:"openai_key" in
-
-
let config = {
-
Bushel.Typesense.endpoint;
-
api_key;
-
openai_key;
-
} in
-
-
Printf.printf "Searching Typesense at %s\n" config.endpoint;
-
Printf.printf "Query: \"%s\"\n" query_text;
-
Printf.printf "Limit: %d, Offset: %d\n" limit offset;
-
Printf.printf "\n";
-
-
Eio.Switch.run @@ fun sw ->
-
(try
-
let result = Bushel.Typesense.multisearch ~sw ~env config query_text ~limit:50 () in
-
match result with
-
| Ok multisearch_resp ->
-
let combined_response = Bushel.Typesense.combine_multisearch_results multisearch_resp ~limit ~offset () in
-
Printf.printf "Found %d results (%.2fms)\n\n" combined_response.total combined_response.query_time;
-
-
List.iteri (fun i (hit : Bushel.Typesense.search_result) ->
-
Printf.printf "%d. %s (score: %.2f)\n" (i + 1) (Bushel.Typesense.pp_search_result_oneline hit) hit.Bushel.Typesense.score
-
) combined_response.hits;
-
0
-
| Error err ->
-
Format.eprintf "Search error: %a\n" Bushel.Typesense.pp_error err;
-
exit 1
-
with exn ->
-
Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
-
exit 1
-
)
-
-
(** TODO:claude Command line term - takes eio_env from outside *)
-
let make_term eio_env =
-
let profile_term = Bushel_common.typesense_key_term eio_env#fs in
-
Term.(const (search eio_env) $ profile_term $ endpoint_override $ query_text $ limit $ offset)
-70
stack/bushel/bin/bushel_thumbs.ml
···
-
open Printf
-
open Cmdliner
-
-
(** TODO:claude
-
Helper module for ImageMagick operations *)
-
module Imagemagick = struct
-
(* Generate thumbnail from PDF *)
-
let generate_thumbnail ~pdf_path ~size ~output_path =
-
let cmd =
-
sprintf "magick -density 600 -quality 100 %s[0] -gravity North -crop 100%%x50%%+0+0 -resize %s %s"
-
pdf_path size output_path
-
in
-
eprintf "Running: %s\n%!" cmd;
-
Sys.command cmd
-
end
-
-
(** TODO:claude
-
Process a single paper to generate its thumbnail *)
-
let process_paper base_dir output_dir paper =
-
let slug = Bushel.Paper.slug paper in
-
let pdf_path = sprintf "%s/static/papers/%s.pdf" base_dir slug in
-
let thumbnail_path = sprintf "%s/%s.png" output_dir slug in
-
-
(* Skip if thumbnail already exists *)
-
if Sys.file_exists thumbnail_path then (
-
printf "Thumbnail already exists for %s, skipping\n%!" slug
-
) else if Sys.file_exists pdf_path then (
-
try
-
let size = sprintf "2048x" in
-
printf "Generating high-res thumbnail for %s (size: %s)\n%!" slug size;
-
match Imagemagick.generate_thumbnail ~pdf_path ~size ~output_path:thumbnail_path with
-
| 0 -> printf "Successfully generated thumbnail for %s\n%!" slug
-
| n -> eprintf "Error generating thumbnail for %s (exit code: %d)\n%!" slug n
-
with
-
| e -> eprintf "Error processing paper %s: %s\n%!" slug (Printexc.to_string e)
-
) else (
-
eprintf "PDF file not found for paper: %s\n%!" slug
-
)
-
-
(** TODO:claude
-
Main function to process all papers in a directory *)
-
let process_papers base_dir output_dir =
-
(* Create output directory if it doesn't exist *)
-
if not (Sys.file_exists output_dir) then (
-
printf "Creating output directory: %s\n%!" output_dir;
-
Unix.mkdir output_dir 0o755
-
);
-
-
(* Load Bushel entries and get papers *)
-
printf "Loading papers from %s\n%!" base_dir;
-
let e = Bushel.load base_dir in
-
let papers = Bushel.Entry.papers e in
-
-
(* Process each paper *)
-
printf "Found %d papers\n%!" (List.length papers);
-
List.iter (process_paper base_dir output_dir) papers
-
-
(* Command line arguments are now imported from Bushel_common *)
-
-
(* Export the term for use in main bushel.ml *)
-
let term =
-
Term.(const (fun base_dir output_dir -> process_papers base_dir output_dir; 0) $
-
Bushel_common.base_dir $ Bushel_common.output_dir ~default:".")
-
-
let cmd =
-
let doc = "Generate thumbnails for paper PDFs" in
-
let info = Cmd.info "thumbs" ~doc in
-
Cmd.v info term
-
-
(* Main entry point removed - accessed through bushel_main.ml *)
-244
stack/bushel/bin/bushel_typesense.ml
···
-
open Cmdliner
-
-
(** TODO:claude Bushel Typesense binary with upload and query functionality *)
-
-
let endpoint =
-
let doc = "Typesense server endpoint URL" in
-
Arg.(value & opt string "http://localhost:8108" & info ["endpoint"; "e"] ~doc)
-
-
let api_key =
-
let doc = "Typesense API key for authentication" in
-
Arg.(value & opt string "" & info ["api-key"; "k"] ~doc)
-
-
let openai_key =
-
let doc = "OpenAI API key for embeddings" in
-
Arg.(value & opt string "" & info ["openai-key"; "oa"] ~doc)
-
-
let data_dir =
-
let doc = "Directory containing bushel data files" in
-
Arg.(value & opt string "." & info ["data-dir"; "d"] ~doc)
-
-
(** TODO:claude Main upload function *)
-
let upload endpoint api_key openai_key data_dir =
-
if api_key = "" then (
-
Printf.eprintf "Error: API key is required. Use --api-key or set TYPESENSE_API_KEY environment variable.\n";
-
exit 1
-
);
-
-
if openai_key = "" then (
-
Printf.eprintf "Error: OpenAI API key is required for embeddings. Use --openai-key or set OPENAI_API_KEY environment variable.\n";
-
exit 1
-
);
-
-
let config = Bushel.Typesense.{ endpoint; api_key; openai_key } in
-
-
Printf.printf "Loading bushel data from %s\n" data_dir;
-
let entries = Bushel.load data_dir in
-
-
Printf.printf "Uploading bushel data to Typesense at %s\n" endpoint;
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
(try
-
Bushel.Typesense.upload_all ~sw ~env config entries
-
with exn ->
-
Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
-
exit 1
-
)
-
-
-
(** TODO:claude Query function *)
-
let query endpoint api_key query_text collection limit offset =
-
let base_config = Bushel.Typesense.load_config_from_files () in
-
let config = {
-
Bushel.Typesense.endpoint = if endpoint = "" then base_config.endpoint else endpoint;
-
api_key = if api_key = "" then base_config.api_key else api_key;
-
openai_key = base_config.openai_key;
-
} in
-
-
if config.api_key = "" then (
-
Printf.eprintf "Error: API key is required. Use --api-key or set TYPESENSE_API_KEY environment variable.\n";
-
exit 1
-
);
-
-
Printf.printf "Searching Typesense at %s\n" config.endpoint;
-
Printf.printf "Query: \"%s\"\n" query_text;
-
if collection <> "" then Printf.printf "Collection: %s\n" collection;
-
Printf.printf "Limit: %d, Offset: %d\n" limit offset;
-
Printf.printf "\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
(try
-
let result = if collection = "" then
-
Bushel.Typesense.search_all ~sw ~env config query_text ~limit ~offset ()
-
else
-
Bushel.Typesense.search_collection ~sw ~env config collection query_text ~limit ~offset ()
-
in
-
match result with
-
| Ok response ->
-
Printf.printf "Found %d results (%.2fms)\n\n" response.total response.query_time;
-
List.iteri (fun i (hit : Bushel.Typesense.search_result) ->
-
Printf.printf "%d. [%s] %s (score: %.2f)\n" (i + 1) hit.collection hit.title hit.score;
-
if hit.content <> "" then Printf.printf " %s\n" hit.content;
-
if hit.highlights <> [] then (
-
Printf.printf " Highlights:\n";
-
List.iter (fun (field, snippets) ->
-
List.iter (fun snippet ->
-
Printf.printf " %s: %s\n" field snippet
-
) snippets
-
) hit.highlights
-
);
-
Printf.printf "\n"
-
) response.hits
-
| Error err ->
-
Format.eprintf "Search error: %a\n" Bushel.Typesense.pp_error err;
-
exit 1
-
with exn ->
-
Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
-
exit 1
-
)
-
-
(** TODO:claude List collections function *)
-
let list endpoint api_key =
-
let base_config = Bushel.Typesense.load_config_from_files () in
-
let config = {
-
Bushel.Typesense.endpoint = if endpoint = "" then base_config.endpoint else endpoint;
-
api_key = if api_key = "" then base_config.api_key else api_key;
-
openai_key = base_config.openai_key;
-
} in
-
-
if config.api_key = "" then (
-
Printf.eprintf "Error: API key is required. Use --api-key or set TYPESENSE_API_KEY environment variable.\n";
-
exit 1
-
);
-
-
Printf.printf "Listing collections at %s\n\n" config.endpoint;
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
(try
-
let result = Bushel.Typesense.list_collections ~sw ~env config in
-
match result with
-
| Ok collections ->
-
Printf.printf "Collections:\n";
-
List.iter (fun (name, count) ->
-
Printf.printf " %s (%d documents)\n" name count
-
) collections
-
| Error err ->
-
Format.eprintf "List error: %a\n" Bushel.Typesense.pp_error err;
-
exit 1
-
with exn ->
-
Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
-
exit 1
-
)
-
-
(** TODO:claude Command line arguments for query *)
-
let query_text =
-
let doc = "Search query text" in
-
Arg.(required & pos 0 (some string) None & info [] ~docv:"QUERY" ~doc)
-
-
let collection =
-
let doc = "Specific collection to search (contacts, papers, projects, news, videos, notes, ideas)" in
-
Arg.(value & opt string "" & info ["collection"; "c"] ~doc)
-
-
let limit =
-
let doc = "Maximum number of results to return" in
-
Arg.(value & opt int 10 & info ["limit"; "l"] ~doc)
-
-
let offset =
-
let doc = "Number of results to skip (for pagination)" in
-
Arg.(value & opt int 0 & info ["offset"; "o"] ~doc)
-
-
(** TODO:claude Query command *)
-
let query_cmd =
-
let doc = "Search bushel collections in Typesense" in
-
let man = [
-
`S Manpage.s_description;
-
`P "Search across all or specific bushel collections in Typesense.";
-
`P "The API key can be provided via --api-key flag or TYPESENSE_API_KEY environment variable.";
-
`P "If .typesense-url and .typesense-api files exist, they will be used for configuration.";
-
`S Manpage.s_examples;
-
`P "Search all collections:";
-
`Pre " bushel-typesense query \"machine learning\"";
-
`P "Search specific collection:";
-
`Pre " bushel-typesense query \"OCaml\" --collection papers";
-
`P "Search with pagination:";
-
`Pre " bushel-typesense query \"AI\" --limit 5 --offset 10";
-
] in
-
let info = Cmd.info "query" ~doc ~man in
-
Cmd.v info Term.(const query $ endpoint $ api_key $ query_text $ collection $ limit $ offset)
-
-
(** TODO:claude List command *)
-
let list_cmd =
-
let doc = "List all collections in Typesense" in
-
let man = [
-
`S Manpage.s_description;
-
`P "List all available collections and their document counts.";
-
] in
-
let info = Cmd.info "list" ~doc ~man in
-
Cmd.v info Term.(const list $ endpoint $ api_key)
-
-
(** TODO:claude Updated upload command *)
-
let upload_cmd =
-
let doc = "Upload bushel collections to Typesense search engine" in
-
let man = [
-
`S Manpage.s_description;
-
`P "Upload all bushel object types (contacts, papers, projects, news, videos, notes, ideas) to a Typesense search engine instance.";
-
`P "The API key can be provided via --api-key flag or TYPESENSE_API_KEY environment variable.";
-
`S Manpage.s_examples;
-
`P "Upload to local Typesense instance:";
-
`Pre " bushel-typesense upload --api-key xyz123 --openai-key sk-abc... --data-dir /path/to/data";
-
`P "Upload to remote Typesense instance:";
-
`Pre " bushel-typesense upload --endpoint https://search.example.com --api-key xyz123 --openai-key sk-abc...";
-
] in
-
let info = Cmd.info "upload" ~doc ~man in
-
Cmd.v info Term.(const upload $ endpoint $ api_key $ openai_key $ data_dir)
-
-
(** TODO:claude Main command group *)
-
let main_cmd =
-
let doc = "Bushel Typesense client" in
-
let man = [
-
`S Manpage.s_description;
-
`P "Client for uploading to and querying Bushel collections in Typesense search engine.";
-
`S Manpage.s_commands;
-
`S Manpage.s_common_options;
-
] in
-
let info = Cmd.info "bushel-typesense" ~doc ~man in
-
Cmd.group info [upload_cmd; query_cmd; list_cmd]
-
-
let () =
-
(* Check for API keys in environment if not provided *)
-
let api_key_env = try Some (Sys.getenv "TYPESENSE_API_KEY") with Not_found -> None in
-
let openai_key_env = try Some (Sys.getenv "OPENAI_API_KEY") with Not_found -> None in
-
match api_key_env with
-
| Some key when key <> "" ->
-
(* Override the api_key argument with environment variable *)
-
let api_key = Arg.(value & opt string key & info ["api-key"; "k"] ~doc:"Typesense API key") in
-
let openai_key = match openai_key_env with
-
| Some oa_key when oa_key <> "" -> Arg.(value & opt string oa_key & info ["openai-key"; "oa"] ~doc:"OpenAI API key")
-
| _ -> openai_key
-
in
-
let upload_cmd =
-
let doc = "Upload bushel collections to Typesense search engine" in
-
let info = Cmd.info "upload" ~doc in
-
Cmd.v info Term.(const upload $ endpoint $ api_key $ openai_key $ data_dir)
-
in
-
let query_cmd =
-
let doc = "Search bushel collections in Typesense" in
-
let info = Cmd.info "query" ~doc in
-
Cmd.v info Term.(const query $ endpoint $ api_key $ query_text $ collection $ limit $ offset)
-
in
-
let list_cmd =
-
let doc = "List all collections in Typesense" in
-
let info = Cmd.info "list" ~doc in
-
Cmd.v info Term.(const list $ endpoint $ api_key)
-
in
-
let main_cmd =
-
let doc = "Bushel Typesense client" in
-
let info = Cmd.info "bushel-typesense" ~doc in
-
Cmd.group info [upload_cmd; query_cmd; list_cmd]
-
in
-
exit (Cmd.eval main_cmd)
-
| _ ->
-
exit (Cmd.eval main_cmd)
-131
stack/bushel/bin/bushel_video.ml
···
-
[@@@warning "-26-27-32"]
-
-
open Cmdliner
-
-
let setup_log style_renderer level =
-
Fmt_tty.setup_std_outputs ?style_renderer ();
-
Logs.set_level level;
-
Logs.set_reporter (Logs_fmt.reporter ());
-
()
-
-
let process_videos ~sw ~env output_dir overwrite base_url channel fetch_thumbs thumbs_dir =
-
(* Create PeerTube client for connection pooling *)
-
let requests_session = Requests.create ~sw env in
-
let peertube_client = Peertubee.create ~requests_session ~base_url in
-
-
let all_videos = Peertubee.fetch_all_channel_videos peertube_client channel in
-
Logs.info (fun f -> f "Total videos: %d" (List.length all_videos));
-
-
(* Create thumbnails directory if needed *)
-
(if fetch_thumbs && not (Sys.file_exists thumbs_dir) then
-
Unix.mkdir thumbs_dir 0o755);
-
-
(* Process each video, fetching full details for complete descriptions *)
-
let vids = List.map (fun video ->
-
(* Fetch complete video details to get full description *)
-
let full_video = Peertubee.fetch_video_details peertube_client video.Peertubee.uuid in
-
let (description, published_date, title, url, uuid, slug) =
-
Peertubee.to_bushel_video full_video
-
in
-
Logs.info (fun f -> f "Title: %s, URL: %s" title url);
-
-
(* Download thumbnail if requested *)
-
(if fetch_thumbs then
-
let thumb_path = Filename.concat thumbs_dir (uuid ^ ".jpg") in
-
let fs = Eio.Stdenv.fs env in
-
let result = Peertubee.download_thumbnail peertube_client ~fs full_video thumb_path in
-
match result with
-
| Ok () ->
-
Logs.info (fun f -> f "Downloaded thumbnail for %s to %s" title thumb_path)
-
| Error (`Msg e) ->
-
Logs.warn (fun f -> f "Failed to download thumbnail for %s: %s" title e)
-
);
-
-
{Bushel.Video.description; published_date; title; url; uuid; slug;
-
talk=false; paper=None; project=None; tags=full_video.tags}
-
) all_videos in
-
-
(* Write video files *)
-
List.iter (fun video ->
-
let file_path = Filename.concat output_dir (video.Bushel.Video.uuid ^ ".md") in
-
let file_exists = Sys.file_exists file_path in
-
-
if file_exists then
-
try
-
(* If file exists, load it to preserve specific fields *)
-
let existing_video = Bushel.Video.of_md file_path in
-
(* Create merged video with preserved fields *)
-
let merged_video = {
-
video with
-
tags = existing_video.tags; (* Preserve existing tags *)
-
paper = existing_video.paper; (* Preserve paper field *)
-
project = existing_video.project; (* Preserve project field *)
-
talk = existing_video.talk; (* Preserve talk field *)
-
} in
-
-
(* Write the merged video data *)
-
if overwrite then
-
match Bushel.Video.to_file output_dir merged_video with
-
| Ok () ->
-
Logs.info (fun f -> f "Updated video %s with preserved fields in %s"
-
merged_video.Bushel.Video.title file_path)
-
| Error (`Msg e) ->
-
Logs.err (fun f -> f "Failed to update video %s: %s"
-
merged_video.Bushel.Video.title e)
-
else
-
Logs.info (fun f -> f "Skipping existing video %s (use --overwrite to replace)"
-
video.Bushel.Video.title)
-
with _ ->
-
(* If reading existing file fails, proceed with new data *)
-
if overwrite then
-
match Bushel.Video.to_file output_dir video with
-
| Ok () ->
-
Logs.info (fun f -> f "Wrote video %s to %s (existing file could not be read)"
-
video.Bushel.Video.title file_path)
-
| Error (`Msg e) ->
-
Logs.err (fun f -> f "Failed to write video %s: %s"
-
video.Bushel.Video.title e)
-
else
-
Logs.info (fun f -> f "Skipping existing video %s (use --overwrite to replace)"
-
video.Bushel.Video.title)
-
else
-
(* If file doesn't exist, just write new data *)
-
match Bushel.Video.to_file output_dir video with
-
| Ok () ->
-
Logs.info (fun f -> f "Wrote new video %s to %s"
-
video.Bushel.Video.title file_path)
-
| Error (`Msg e) ->
-
Logs.err (fun f -> f "Failed to write video %s: %s"
-
video.Bushel.Video.title e)
-
) vids
-
-
(* Command line arguments are now imported from Bushel_common *)
-
-
(* Export the term for use in main bushel.ml *)
-
let term =
-
let fetch_thumbs =
-
let doc = "Download video thumbnails" in
-
Arg.(value & flag & info ["fetch-thumbs"] ~doc)
-
in
-
let thumbs_dir =
-
let doc = "Directory to save thumbnails (default: images/videos)" in
-
Arg.(value & opt string "images/videos" & info ["thumbs-dir"] ~docv:"DIR" ~doc)
-
in
-
Term.(const (fun output_dir overwrite base_url channel fetch_thumbs thumbs_dir () ->
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
process_videos ~sw ~env output_dir overwrite base_url channel fetch_thumbs thumbs_dir; 0)
-
$ Bushel_common.output_dir ~default:"." $
-
Bushel_common.overwrite $
-
Bushel_common.url_term ~default:"https://crank.recoil.org" ~doc:"PeerTube base URL" $
-
Bushel_common.channel ~default:"anil" $
-
fetch_thumbs $
-
thumbs_dir $
-
Bushel_common.setup_term)
-
-
let cmd =
-
let doc = "Fetch and process videos from PeerTube" in
-
let info = Cmd.info "video" ~doc in
-
Cmd.v info term
-
-
(* Main entry point removed - accessed through bushel_main.ml *)
-83
stack/bushel/bin/bushel_video_thumbs.ml
···
-
[@@@warning "-26-27-32"]
-
-
open Cmdliner
-
-
let setup_log style_renderer level =
-
Fmt_tty.setup_std_outputs ?style_renderer ();
-
Logs.set_level level;
-
Logs.set_reporter (Logs_fmt.reporter ());
-
()
-
-
let process_video_thumbs ~sw ~env videos_dir thumbs_dir base_url =
-
(* Ensure thumbnail directory exists *)
-
(if not (Sys.file_exists thumbs_dir) then
-
Unix.mkdir thumbs_dir 0o755);
-
-
(* Create PeerTube client for connection pooling *)
-
let requests_session = Requests.create ~sw env in
-
let peertube_client = Peertubee.create ~requests_session ~base_url in
-
-
(* Read all video markdown files *)
-
let video_files = Sys.readdir videos_dir
-
|> Array.to_list
-
|> List.filter (fun f -> Filename.check_suffix f ".md")
-
|> List.map (fun f -> Filename.concat videos_dir f)
-
in
-
-
Logs.info (fun f -> f "Found %d video files to process" (List.length video_files));
-
-
(* Process each video file *)
-
List.iter (fun video_file ->
-
try
-
(* Load existing video *)
-
let video = Bushel.Video.of_md video_file in
-
let uuid = video.Bushel.Video.uuid in
-
-
Logs.info (fun f -> f "Processing video: %s (UUID: %s)" video.title uuid);
-
-
(* Fetch video details from PeerTube to get thumbnail info *)
-
let peertube_video = Peertubee.fetch_video_details peertube_client uuid in
-
-
(* Download thumbnail *)
-
let thumb_path = Filename.concat thumbs_dir (uuid ^ ".jpg") in
-
let fs = Eio.Stdenv.fs env in
-
let result = Peertubee.download_thumbnail peertube_client ~fs peertube_video thumb_path in
-
-
match result with
-
| Ok () ->
-
Logs.info (fun f -> f "Downloaded thumbnail for %s to %s" video.title thumb_path);
-
-
(* Update video file with thumbnail_url field *)
-
(match Peertubee.thumbnail_url peertube_client peertube_video with
-
| Some url ->
-
Logs.info (fun f -> f "Thumbnail URL: %s" url)
-
| None ->
-
Logs.warn (fun f -> f "No thumbnail URL for video %s" video.title))
-
| Error (`Msg e) ->
-
Logs.err (fun f -> f "Failed to download thumbnail for %s: %s" video.title e)
-
with exn ->
-
Logs.err (fun f -> f "Error processing %s: %s" video_file (Printexc.to_string exn))
-
) video_files
-
-
let term =
-
let videos_dir =
-
let doc = "Directory containing video markdown files" in
-
Arg.(value & opt string "data/videos" & info ["videos-dir"; "d"] ~docv:"DIR" ~doc)
-
in
-
let thumbs_dir =
-
let doc = "Directory to save thumbnails" in
-
Arg.(value & opt string "images/videos" & info ["thumbs-dir"; "t"] ~docv:"DIR" ~doc)
-
in
-
Term.(const (fun videos_dir thumbs_dir base_url () ->
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
process_video_thumbs ~sw ~env videos_dir thumbs_dir base_url; 0)
-
$ videos_dir $
-
thumbs_dir $
-
Bushel_common.url_term ~default:"https://crank.recoil.org" ~doc:"PeerTube base URL" $
-
Bushel_common.setup_term)
-
-
let cmd =
-
let doc = "Download thumbnails for existing videos and update metadata" in
-
let info = Cmd.info "video-thumbs" ~doc in
-
Cmd.v info term
-20
stack/bushel/bin/dune
···
-
(library
-
(name bushel_common)
-
(modules bushel_common)
-
(libraries cmdliner fmt fmt.cli fmt.tty logs logs.cli logs.fmt xdge keyeio eio))
-
-
(executable
-
(name bushel_main)
-
(public_name bushel)
-
(package bushel)
-
(modules bushel_main bushel_bibtex bushel_doi bushel_ideas bushel_info bushel_missing bushel_note_doi bushel_obsidian bushel_paper bushel_paper_classify bushel_paper_tex bushel_video bushel_video_thumbs bushel_thumbs bushel_faces bushel_links bushel_search)
-
(flags (:standard -w -69))
-
(libraries bushel bushel_common cmdliner eio eio_main yaml ezjsonm zotero-translation peertubee fmt fmt.cli fmt.tty logs logs.cli logs.fmt cmarkit karakeepe uri unix ptime.clock.os crockford immiche))
-
-
(executable
-
(name bushel_typesense)
-
(public_name bushel-typesense)
-
(package bushel)
-
(modules bushel_typesense)
-
(flags (:standard -w -69))
-
(libraries bushel bushel_common cmdliner eio eio_main))
-51
stack/bushel/bushel.opam
···
-
# This file is generated by dune, edit dune-project instead
-
opam-version: "2.0"
-
synopsis: "A webring but not as oldskool"
-
description: "This is all still a work in progress"
-
maintainer: ["anil@recoil.org"]
-
authors: ["Anil Madhavapeddy"]
-
license: "ISC"
-
homepage: "https://github.com/avsm/bushel"
-
bug-reports: "https://github.com/avsm/bushel/issues"
-
depends: [
-
"dune" {>= "3.17"}
-
"ocaml" {>= "5.2.0"}
-
"uri"
-
"cmarkit"
-
"ezjsonm"
-
"ptime"
-
"jsont"
-
"bytesrw"
-
"jekyll-format"
-
"yaml"
-
"eio"
-
"eio_main"
-
"requests"
-
"fmt"
-
"peertubee"
-
"karakeepe"
-
"typesense-cliente"
-
"immiche"
-
"xdge"
-
"keyeio"
-
"cmdliner" {>= "2.0.0"}
-
"odoc" {with-doc}
-
]
-
build: [
-
["dune" "subst"] {dev}
-
[
-
"dune"
-
"build"
-
"-p"
-
name
-
"-j"
-
jobs
-
"@install"
-
"@runtest" {with-test}
-
"@doc" {with-doc}
-
]
-
]
-
dev-repo: "git+https://github.com/avsm/bushel.git"
-
pin-depends: [
-
[ "zotero-translation.dev" "git+https://github.com/avsm/zotero-translation.git" ]
-
]
-3
stack/bushel/bushel.opam.template
···
-
pin-depends: [
-
[ "zotero-translation.dev" "git+https://github.com/avsm/zotero-translation.git" ]
-
]
-35
stack/bushel/dune-project
···
-
(lang dune 3.17)
-
(name bushel)
-
-
(source (github avsm/bushel))
-
(license ISC)
-
(authors "Anil Madhavapeddy")
-
(maintainers "anil@recoil.org")
-
-
(generate_opam_files true)
-
-
(package
-
(name bushel)
-
(synopsis "A webring but not as oldskool")
-
(description "This is all still a work in progress")
-
(depends
-
(ocaml (>= "5.2.0"))
-
uri
-
cmarkit
-
ezjsonm
-
ptime
-
jsont
-
bytesrw
-
jekyll-format
-
yaml
-
eio
-
eio_main
-
requests
-
fmt
-
peertubee
-
karakeepe
-
typesense-cliente
-
immiche
-
xdge
-
keyeio
-
(cmdliner (>= 2.0.0))))
-79
stack/bushel/lib/bushel.ml
···
-
module Contact = Contact
-
module Idea = Idea
-
module Note = Note
-
module Paper = Paper
-
module Project = Project
-
module Video = Video
-
module Tags = Tags
-
module Link = Link
-
module Entry = Entry
-
module Util = Util
-
module Srcsetter = Srcsetter
-
module Md = Md
-
module Typesense = Typesense
-
module Link_graph = Link_graph
-
module Description = Description
-
module Doi_entry = Doi_entry
-
-
let map_md base subdir fn =
-
let dir = base ^ "/data/" ^ subdir in
-
Sys.readdir dir
-
|> Array.to_list
-
|> List.filter (fun f -> Filename.check_suffix f ".md")
-
|> List.map (fun e -> fn dir e)
-
;;
-
-
let map_category base c fn = map_md base c (fun dir e -> fn @@ Filename.concat dir e)
-
let dbg l = Printf.eprintf "loading %s\n%!" l
-
-
let load_contacts base = dbg "contacts"; map_category base "contacts" Contact.of_md
-
let load_projects base = dbg "projects"; map_category base "projects" Project.of_md
-
let load_notes base =
-
dbg "notes";
-
let notes_from_notes = map_category base "notes" Note.of_md in
-
let notes_from_news = map_category base "news" Note.of_md in
-
notes_from_notes @ notes_from_news
-
let load_ideas base = dbg "ideas"; map_category base "ideas" Idea.of_md
-
let load_videos base = dbg "videos"; map_category base "videos" Video.of_md
-
-
let load_images base =
-
Printf.eprintf "load images %s/images\n%!" base;
-
try
-
Srcsetter.list_of_json (Util.read_file (base ^ "/images/index.json")) |> Result.get_ok
-
with
-
| _ -> [] (* FIXME log *)
-
;;
-
-
let load_papers base =
-
Printf.eprintf "load papers %s/data/papers\n%!" base;
-
Sys.readdir (base ^ "/data/papers")
-
|> Array.to_list
-
|> List.filter (fun slug -> Sys.is_directory (base ^ "/data/papers/" ^ slug))
-
|> List.map (fun slug ->
-
Sys.readdir (base ^ "/data/papers/" ^ slug)
-
|> Array.to_list
-
|> List.filter (fun ver -> Filename.check_suffix ver ".md")
-
|> List.map (fun ver ->
-
let ver = Filename.chop_extension ver in
-
Paper.of_md ~slug ~ver (base ^ "/data/papers/" ^ slug ^ "/" ^ ver ^ ".md")))
-
|> List.flatten
-
|> Paper.tv
-
;;
-
-
let load base =
-
let images = load_images base in
-
let papers = load_papers base in
-
let contacts = load_contacts base in
-
let projects = load_projects base in
-
let notes = load_notes base in
-
let ideas = load_ideas base in
-
let videos = load_videos base in
-
let entries = Entry.v ~images ~papers ~notes ~projects ~ideas ~videos ~contacts ~data_dir:(base ^ "/data") in
-
(* Build link graph *)
-
Printf.eprintf "Building link_graph...\n%!";
-
let graph = Link_graph.build_link_graph entries in
-
Fmt.epr "%a@." Link_graph.pp_graph graph;
-
Link_graph.set_graph graph;
-
entries
-
;;
-
-27
stack/bushel/lib/bushel.mli
···
-
(** Bushel *)
-
-
module Contact = Contact
-
module Idea = Idea
-
module Note = Note
-
module Paper = Paper
-
module Project = Project
-
module Video = Video
-
module Tags = Tags
-
module Link = Link
-
module Entry = Entry
-
module Util = Util
-
module Md = Md
-
module Srcsetter = Srcsetter
-
module Typesense = Typesense
-
module Link_graph = Link_graph
-
module Description = Description
-
module Doi_entry = Doi_entry
-
-
val load_contacts : string -> Contact.ts
-
val load_projects : string -> Project.ts
-
val load_notes : string -> Note.ts
-
val load_ideas : string -> Idea.ts
-
val load_videos : string -> Video.ts
-
val load_images : string -> Srcsetter.ts
-
val load_papers : string -> Paper.ts
-
val load : string -> Entry.t
-172
stack/bushel/lib/contact.ml
···
-
type t =
-
{ names : string list
-
; handle : string
-
; email : string option
-
; icon : string option
-
; github : string option
-
; twitter : string option
-
; bluesky : string option
-
; mastodon : string option
-
; orcid : string option
-
; url : string option
-
; atom : string list option
-
}
-
-
type ts = t list
-
-
let v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom handle names =
-
{ names; handle; email; github; twitter; bluesky; mastodon; orcid; url; icon; atom }
-
;;
-
-
let make names email icon github twitter bluesky mastodon orcid url atom =
-
v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom "" names
-
;;
-
-
let names { names; _ } = names
-
let name { names; _ } = List.hd names
-
let handle { handle; _ } = handle
-
let email { email; _ } = email
-
let icon { icon; _ } = icon
-
let github { github; _ } = github
-
let twitter { twitter; _ } = twitter
-
let bluesky { bluesky; _ } = bluesky
-
let mastodon { mastodon; _ } = mastodon
-
let orcid { orcid; _ } = orcid
-
let url { url; _ } = url
-
let atom { atom; _ } = atom
-
-
let json_t =
-
let open Jsont in
-
let open Jsont.Object in
-
let mem_opt f v ~enc = mem f v ~dec_absent:None ~enc_omit:Option.is_none ~enc in
-
map ~kind:"Contact" make
-
|> mem "names" (list string) ~dec_absent:[] ~enc:names
-
|> mem_opt "email" (some string) ~enc:email
-
|> mem_opt "icon" (some string) ~enc:icon
-
|> mem_opt "github" (some string) ~enc:github
-
|> mem_opt "twitter" (some string) ~enc:twitter
-
|> mem_opt "bluesky" (some string) ~enc:bluesky
-
|> mem_opt "mastodon" (some string) ~enc:mastodon
-
|> mem_opt "orcid" (some string) ~enc:orcid
-
|> mem_opt "url" (some string) ~enc:url
-
|> mem_opt "atom" (some (list string)) ~enc:atom
-
|> finish
-
;;
-
-
let v = Jsont_bytesrw.decode_string (Jsont.list json_t)
-
let compare a b = String.compare a.handle b.handle
-
let find_by_handle ts h = List.find_opt (fun { handle; _ } -> handle = h) ts
-
-
let best_url c =
-
match c.url with
-
| Some v -> Some v
-
| None ->
-
(match c.github with
-
| Some v -> Some ("https://github.com/" ^ v)
-
| None ->
-
(match c.email with
-
| Some v -> Some ("mailto:" ^ v)
-
| None -> None))
-
;;
-
-
let of_md fname =
-
(* TODO fix Jekyll_post to not error on no date *)
-
let fname' = "2000-01-01-" ^ Filename.basename fname in
-
let handle = Filename.basename fname |> Filename.chop_extension in
-
match Jekyll_post.of_string ~fname:fname' (Util.read_file fname) with
-
| Error (`Msg m) -> failwith ("contact_of_md: " ^ m)
-
| Ok jp ->
-
let fields = jp.Jekyll_post.fields |> Jekyll_format.fields_to_yaml in
-
let c = Jsont_bytesrw.decode_string json_t (Ezjsonm.value_to_string fields) in
-
(match c with
-
| Error e -> failwith e
-
| Ok c -> { c with handle })
-
;;
-
-
(* Given a name, turn it lowercase and return the concatenation of the
-
initials of all the words in the name and the full last name. *)
-
let handle_of_name name =
-
let name = String.lowercase_ascii name in
-
let words = String.split_on_char ' ' name in
-
let initials = String.concat "" (List.map (fun w -> String.sub w 0 1) words) in
-
initials ^ List.hd (List.rev words)
-
;;
-
-
(* fuzzy lookup for an author. Strip out any non alpha numeric characters while
-
searching for the name *)
-
let lookup_by_name ts a =
-
let a = String.lowercase_ascii a in
-
let rec aux acc = function
-
| [] -> acc
-
| t :: ts ->
-
if List.exists (fun n -> String.lowercase_ascii n = a) t.names
-
then aux (t :: acc) ts
-
else aux acc ts
-
in
-
match aux [] ts with
-
| [ a ] -> a
-
| [] -> raise (Failure ("contact.ml: author not found: " ^ a))
-
| _ -> raise (Failure ("ambiguous author: " ^ a))
-
;;
-
-
(* TODO:claude *)
-
let typesense_schema =
-
let open Ezjsonm in
-
dict [
-
("name", string "contacts");
-
("fields", list (fun d -> dict d) [
-
[("name", string "id"); ("type", string "string")];
-
[("name", string "handle"); ("type", string "string")];
-
[("name", string "name"); ("type", string "string")];
-
[("name", string "names"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "email"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "icon"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "github"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "twitter"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "bluesky"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "mastodon"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "orcid"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "url"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "atom"); ("type", string "string[]"); ("optional", bool true)];
-
]);
-
]
-
-
(** TODO:claude Pretty-print a contact with ANSI formatting *)
-
let pp ppf c =
-
let open Fmt in
-
pf ppf "@[<v>";
-
pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Contact";
-
pf ppf "%a: @%a@," (styled `Bold string) "Handle" string (handle c);
-
pf ppf "%a: %a@," (styled `Bold string) "Name" string (name c);
-
let ns = names c in
-
if List.length ns > 1 then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Aliases" (list ~sep:comma string) (List.tl ns);
-
(match email c with
-
| Some e -> pf ppf "%a: %a@," (styled `Bold string) "Email" string e
-
| None -> ());
-
(match github c with
-
| Some g -> pf ppf "%a: https://github.com/%a@," (styled `Bold string) "GitHub" string g
-
| None -> ());
-
(match twitter c with
-
| Some t -> pf ppf "%a: https://twitter.com/%a@," (styled `Bold string) "Twitter" string t
-
| None -> ());
-
(match bluesky c with
-
| Some b -> pf ppf "%a: %a@," (styled `Bold string) "Bluesky" string b
-
| None -> ());
-
(match mastodon c with
-
| Some m -> pf ppf "%a: %a@," (styled `Bold string) "Mastodon" string m
-
| None -> ());
-
(match orcid c with
-
| Some o -> pf ppf "%a: https://orcid.org/%a@," (styled `Bold string) "ORCID" string o
-
| None -> ());
-
(match url c with
-
| Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u
-
| None -> ());
-
(match icon c with
-
| Some i -> pf ppf "%a: %a@," (styled `Bold string) "Icon" string i
-
| None -> ());
-
(match atom c with
-
| Some atoms when atoms <> [] ->
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Atom Feeds" (list ~sep:comma string) atoms
-
| _ -> ());
-
pf ppf "@]"
-25
stack/bushel/lib/contact.mli
···
-
type t
-
type ts = t list
-
-
val v : string -> (ts, string) result
-
val names : t -> string list
-
val name : t -> string
-
val handle : t -> string
-
val email : t -> string option
-
val icon : t -> string option
-
val github : t -> string option
-
val twitter : t -> string option
-
val bluesky : t -> string option
-
val mastodon : t -> string option
-
val orcid : t -> string option
-
val url : t -> string option
-
val atom : t -> string list option
-
val best_url : t -> string option
-
val find_by_handle : t list -> string -> t option
-
val handle_of_name : string -> string
-
val lookup_by_name : ts -> string -> t
-
val json_t : t Jsont.t
-
val compare : t -> t -> int
-
val of_md : string -> t
-
val typesense_schema : Ezjsonm.value
-
val pp : Format.formatter -> t -> unit
-72
stack/bushel/lib/description.ml
···
-
(** Generate descriptive text for bushel entries *)
-
-
(* Helper to format a date as "Month Year" *)
-
let format_date date =
-
let (year, month, _day) = date in
-
let month_name = match month with
-
| 1 -> "January" | 2 -> "February" | 3 -> "March" | 4 -> "April"
-
| 5 -> "May" | 6 -> "June" | 7 -> "July" | 8 -> "August"
-
| 9 -> "September" | 10 -> "October" | 11 -> "November" | 12 -> "December"
-
| _ -> ""
-
in
-
Printf.sprintf "%s %d" month_name year
-
-
(* Generate a descriptive sentence for a paper *)
-
let paper_description (p : Paper.t) ~date_str =
-
let venue = match String.lowercase_ascii (Paper.bibtype p) with
-
| "inproceedings" -> Paper.booktitle p
-
| "article" -> Paper.journal p
-
| "book" ->
-
let pub = Paper.publisher p in
-
if pub = "" then "Book" else "Book by " ^ pub
-
| "techreport" ->
-
(try "Technical report at " ^ Paper.institution p
-
with _ -> "Technical report")
-
| "misc" ->
-
let pub = Paper.publisher p in
-
if pub = "" then "Working paper" else "Working paper at " ^ pub
-
| _ -> "Publication"
-
in
-
Printf.sprintf "Paper in %s (%s)" venue date_str
-
-
(* Generate a descriptive sentence for a note *)
-
let note_description (n : Note.t) ~date_str ~lookup_fn =
-
match Note.slug_ent n with
-
| Some slug_ent ->
-
(match lookup_fn slug_ent with
-
| Some related_title ->
-
Printf.sprintf "Note about %s (%s)" related_title date_str
-
| None -> Printf.sprintf "Research note (%s)" date_str)
-
| None -> Printf.sprintf "Research note (%s)" date_str
-
-
(* Generate a descriptive sentence for an idea *)
-
let idea_description (i : Idea.t) ~date_str =
-
let status_str = String.lowercase_ascii (Idea.status_to_string (Idea.status i)) in
-
let level_str = Idea.level_to_string (Idea.level i) in
-
Printf.sprintf "Research idea (%s, %s level, %s)" status_str level_str date_str
-
-
(* Generate a descriptive sentence for a video *)
-
let video_description (v : Video.t) ~date_str ~lookup_fn =
-
let video_type = if Video.talk v then "Talk video" else "Video" in
-
let context = match Video.paper v with
-
| Some paper_slug ->
-
(match lookup_fn paper_slug with
-
| Some title -> Printf.sprintf " about %s" title
-
| None -> "")
-
| None ->
-
(match Video.project v with
-
| Some project_slug ->
-
(match lookup_fn project_slug with
-
| Some title -> Printf.sprintf " about %s" title
-
| None -> "")
-
| None -> "")
-
in
-
Printf.sprintf "%s%s (%s)" video_type context date_str
-
-
(* Generate a descriptive sentence for a project *)
-
let project_description (pr : Project.t) =
-
let end_str = match pr.Project.finish with
-
| Some year -> string_of_int year
-
| None -> "present"
-
in
-
Printf.sprintf "Project (%d–%s)" pr.Project.start end_str
-19
stack/bushel/lib/description.mli
···
-
(** Generate descriptive text for bushel entries *)
-
-
(** Format a date as "Month Year" *)
-
val format_date : int * int * int -> string
-
-
(** Generate a descriptive sentence for a paper with date string *)
-
val paper_description : Paper.t -> date_str:string -> string
-
-
(** Generate a descriptive sentence for a note with date string and lookup function *)
-
val note_description : Note.t -> date_str:string -> lookup_fn:(string -> string option) -> string
-
-
(** Generate a descriptive sentence for an idea with date string *)
-
val idea_description : Idea.t -> date_str:string -> string
-
-
(** Generate a descriptive sentence for a video with date string and lookup function *)
-
val video_description : Video.t -> date_str:string -> lookup_fn:(string -> string option) -> string
-
-
(** Generate a descriptive sentence for a project *)
-
val project_description : Project.t -> string
-147
stack/bushel/lib/doi_entry.ml
···
-
module J = Ezjsonm
-
-
type status =
-
| Resolved
-
| Failed of string
-
-
type t = {
-
doi: string;
-
title: string;
-
authors: string list;
-
year: int;
-
bibtype: string;
-
publisher: string;
-
resolved_at: string;
-
source_urls: string list;
-
status: status;
-
ignore: bool;
-
}
-
-
type ts = t list
-
-
let create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ?(source_urls=[]) () =
-
let resolved_at =
-
let now = Ptime_clock.now () in
-
let rfc3339 = Ptime.to_rfc3339 ~space:false ~frac_s:0 now in
-
String.sub rfc3339 0 10 (* Extract YYYY-MM-DD *)
-
in
-
{ doi; title; authors; year; bibtype; publisher; resolved_at; source_urls; status = Resolved; ignore = false }
-
-
let create_failed ~doi ~error ?(source_urls=[]) () =
-
let resolved_at =
-
let now = Ptime_clock.now () in
-
let rfc3339 = Ptime.to_rfc3339 ~space:false ~frac_s:0 now in
-
String.sub rfc3339 0 10 (* Extract YYYY-MM-DD *)
-
in
-
{ doi; title = ""; authors = []; year = 0; bibtype = ""; publisher = "";
-
resolved_at; source_urls; status = Failed error; ignore = false }
-
-
let merge_entries old_entry new_entry =
-
(* Combine source_urls, removing duplicates *)
-
let combined_urls =
-
List.sort_uniq String.compare (old_entry.source_urls @ new_entry.source_urls)
-
in
-
(* Use new_entry's data but with combined URLs and preserve ignore flag from old entry *)
-
{ new_entry with source_urls = combined_urls; ignore = old_entry.ignore }
-
-
let to_yaml_value entry =
-
let status_field = match entry.status with
-
| Resolved -> []
-
| Failed err -> [("error", `String err)]
-
in
-
let source_urls_field = match entry.source_urls with
-
| [] -> []
-
| urls -> [("source_urls", `A (List.map (fun url -> `String url) urls))]
-
in
-
let ignore_field = if entry.ignore then [("ignore", `Bool true)] else [] in
-
let fields = [
-
("doi", `String entry.doi);
-
("resolved_at", `String entry.resolved_at);
-
] @ status_field @ source_urls_field @ ignore_field in
-
let fields = match entry.status with
-
| Resolved ->
-
fields @ [
-
("title", `String entry.title);
-
("authors", `A (List.map (fun a -> `String a) entry.authors));
-
("year", `Float (float_of_int entry.year));
-
("bibtype", `String entry.bibtype);
-
("publisher", `String entry.publisher);
-
]
-
| Failed _ -> fields
-
in
-
`O fields
-
-
let of_yaml_value v =
-
try
-
let doi = J.find v ["doi"] |> J.get_string in
-
let resolved_at = J.find v ["resolved_at"] |> J.get_string in
-
(* Support both old source_url (single) and new source_urls (list) for backwards compatibility *)
-
let source_urls =
-
try
-
J.find v ["source_urls"] |> J.get_list J.get_string
-
with _ ->
-
try
-
let single_url = J.find v ["source_url"] |> J.get_string in
-
[single_url]
-
with _ -> []
-
in
-
let ignore = try J.find v ["ignore"] |> J.get_bool with _ -> false in
-
let error = try Some (J.find v ["error"] |> J.get_string) with _ -> None in
-
match error with
-
| Some err ->
-
{ doi; title = ""; authors = []; year = 0; bibtype = ""; publisher = "";
-
resolved_at; source_urls; status = Failed err; ignore }
-
| None ->
-
let title = J.find v ["title"] |> J.get_string in
-
let authors = J.find v ["authors"] |> J.get_list J.get_string in
-
let year = J.find v ["year"] |> J.get_float |> int_of_float in
-
let bibtype = J.find v ["bibtype"] |> J.get_string in
-
let publisher = J.find v ["publisher"] |> J.get_string in
-
{ doi; title; authors; year; bibtype; publisher; resolved_at; source_urls; status = Resolved; ignore }
-
with e ->
-
Printf.eprintf "Failed to parse DOI entry: %s\n%!" (Printexc.to_string e);
-
failwith "Invalid DOI entry in YAML"
-
-
let load path =
-
if not (Sys.file_exists path) then
-
[]
-
else
-
try
-
let yaml_str = In_channel.with_open_text path In_channel.input_all in
-
match Yaml.of_string yaml_str with
-
| Ok (`A entries) -> List.map of_yaml_value entries
-
| Ok _ -> []
-
| Error (`Msg e) ->
-
Printf.eprintf "Failed to parse %s: %s\n%!" path e;
-
[]
-
with e ->
-
Printf.eprintf "Failed to load %s: %s\n%!" path (Printexc.to_string e);
-
[]
-
-
let save path entries =
-
let yaml_list = `A (List.map to_yaml_value entries) in
-
let yaml_str = Yaml.to_string_exn yaml_list in
-
Out_channel.with_open_text path (fun oc ->
-
Out_channel.output_string oc yaml_str
-
)
-
-
let to_map entries =
-
let map = Hashtbl.create (List.length entries) in
-
List.iter (fun entry -> Hashtbl.add map entry.doi entry) entries;
-
map
-
-
let find_by_doi entries doi =
-
List.find_opt (fun entry -> not entry.ignore && entry.doi = doi) entries
-
-
let find_by_url entries url =
-
List.find_opt (fun entry ->
-
not entry.ignore && List.mem url entry.source_urls
-
) entries
-
-
let find_by_doi_including_ignored entries doi =
-
List.find_opt (fun entry -> entry.doi = doi) entries
-
-
let find_by_url_including_ignored entries url =
-
List.find_opt (fun entry ->
-
List.mem url entry.source_urls
-
) entries
-51
stack/bushel/lib/doi_entry.mli
···
-
(** DOI entries resolved from external sources via Zotero Translation Server *)
-
-
type status =
-
| Resolved (** Successfully resolved from Zotero *)
-
| Failed of string (** Failed to resolve, with error message *)
-
-
type t = {
-
doi: string;
-
title: string;
-
authors: string list;
-
year: int;
-
bibtype: string; (** article, inproceedings, book, etc *)
-
publisher: string; (** journal/conference/publisher name *)
-
resolved_at: string; (** ISO date when resolved *)
-
source_urls: string list; (** All URLs that resolve to this DOI (publisher links, doi.org URLs, etc) *)
-
status: status;
-
ignore: bool; (** If true, skip this entry when looking up references *)
-
}
-
-
type ts = t list
-
-
(** Load DOI entries from YAML file *)
-
val load : string -> ts
-
-
(** Save DOI entries to YAML file *)
-
val save : string -> ts -> unit
-
-
(** Convert list to hashtable for fast lookup by DOI *)
-
val to_map : ts -> (string, t) Hashtbl.t
-
-
(** Find entry by DOI (excludes ignored entries) *)
-
val find_by_doi : ts -> string -> t option
-
-
(** Find entry by source URL (searches through all source_urls, excludes ignored entries) *)
-
val find_by_url : ts -> string -> t option
-
-
(** Find entry by DOI including ignored entries (for resolution checks) *)
-
val find_by_doi_including_ignored : ts -> string -> t option
-
-
(** Find entry by source URL including ignored entries (for resolution checks) *)
-
val find_by_url_including_ignored : ts -> string -> t option
-
-
(** Create a new resolved entry *)
-
val create_resolved : doi:string -> title:string -> authors:string list ->
-
year:int -> bibtype:string -> publisher:string -> ?source_urls:string list -> unit -> t
-
-
(** Create a new failed entry *)
-
val create_failed : doi:string -> error:string -> ?source_urls:string list -> unit -> t
-
-
(** Merge two entries with the same DOI, combining their source_urls *)
-
val merge_entries : t -> t -> t
-20
stack/bushel/lib/dune
···
-
(library
-
(name bushel)
-
(public_name bushel)
-
(libraries
-
cmarkit
-
uri
-
jsont
-
jsont.bytesrw
-
ezjsonm
-
ptime
-
yaml.unix
-
jekyll-format
-
eio
-
eio_main
-
requests
-
fmt
-
re
-
ptime.clock
-
ptime.clock.os
-
typesense-cliente))
-446
stack/bushel/lib/entry.ml
···
-
type entry =
-
[ `Paper of Paper.t
-
| `Project of Project.t
-
| `Idea of Idea.t
-
| `Video of Video.t
-
| `Note of Note.t
-
]
-
-
type slugs = (string, entry) Hashtbl.t
-
-
type t =
-
{ slugs : slugs
-
; papers : Paper.ts
-
; old_papers : Paper.ts
-
; notes : Note.ts
-
; projects : Project.ts
-
; ideas : Idea.ts
-
; videos : Video.ts
-
; contacts : Contact.ts
-
; images : Srcsetter.ts
-
; doi_entries : Doi_entry.ts
-
; data_dir : string
-
}
-
-
let contacts { contacts; _ } = contacts
-
let videos { videos; _ } = videos
-
let ideas { ideas; _ } = ideas
-
let papers { papers; _ } = papers
-
let notes { notes; _ } = notes
-
let projects { projects; _ } = projects
-
let images { images; _ } = images
-
let doi_entries { doi_entries; _ } = doi_entries
-
let data_dir { data_dir; _ } = data_dir
-
-
let v ~papers ~notes ~projects ~ideas ~videos ~contacts ~images ~data_dir =
-
let slugs : slugs = Hashtbl.create 42 in
-
let papers, old_papers = List.partition (fun p -> p.Paper.latest) papers in
-
List.iter (fun n -> Hashtbl.add slugs n.Note.slug (`Note n)) notes;
-
List.iter (fun p -> Hashtbl.add slugs p.Project.slug (`Project p)) projects;
-
List.iter (fun i -> Hashtbl.add slugs i.Idea.slug (`Idea i)) ideas;
-
List.iter (fun v -> Hashtbl.add slugs v.Video.slug (`Video v)) videos;
-
List.iter (fun p -> Hashtbl.add slugs p.Paper.slug (`Paper p)) papers;
-
(* Load DOI entries from doi.yml *)
-
let doi_yml_path = Filename.concat data_dir "doi.yml" in
-
let doi_entries = Doi_entry.load doi_yml_path in
-
{ slugs; papers; old_papers; notes; projects; ideas; videos; images; contacts; doi_entries; data_dir }
-
;;
-
-
let lookup { slugs; _ } slug = Hashtbl.find_opt slugs slug
-
let lookup_exn { slugs; _ } slug = Hashtbl.find slugs slug
-
-
let old_papers { old_papers; _ } = old_papers
-
-
let sidebar = function
-
| `Note { Note.sidebar = Some s; _ } -> Some s
-
| _ -> None
-
;;
-
-
let to_type_string = function
-
| `Paper _ -> "paper"
-
| `Note _ -> "note"
-
| `Project _ -> "project"
-
| `Idea _ -> "idea"
-
| `Video _ -> "video"
-
;;
-
-
let synopsis = function
-
| `Note n -> Note.synopsis n
-
| _ -> None
-
;;
-
-
let slug = function
-
| `Paper p -> p.Paper.slug
-
| `Note n -> n.Note.slug
-
| `Project p -> p.Project.slug
-
| `Idea i -> i.Idea.slug
-
| `Video v -> v.Video.slug
-
;;
-
-
let title = function
-
| `Paper p -> Paper.title p
-
| `Note n -> Note.title n
-
| `Project p -> Project.title p
-
| `Idea i -> Idea.title i
-
| `Video v -> Video.title v
-
;;
-
-
let body = function
-
| `Paper _ -> ""
-
| `Note n -> Note.body n
-
| `Project p -> Project.body p
-
| `Idea i -> Idea.body i
-
| `Video _ -> ""
-
;;
-
-
let site_url = function
-
| `Paper p -> "/papers/" ^ p.Paper.slug
-
| `Note n -> "/notes/" ^ n.Note.slug
-
| `Project p -> "/projects/" ^ p.Project.slug
-
| `Idea i -> "/ideas/" ^ i.Idea.slug
-
| `Video v -> "/videos/" ^ v.Video.slug
-
;;
-
-
(** Extract external URLs from markdown content *)
-
let extract_external_links md =
-
let open Cmarkit in
-
let urls = ref [] in
-
-
let is_external_url url =
-
(* XXX FIXME *)
-
let is_bushel_slug = String.starts_with ~prefix:":" in
-
let is_tag_slug = String.starts_with ~prefix:"##" in
-
if is_bushel_slug url || is_tag_slug url then false
-
else
-
try
-
let uri = Uri.of_string url in
-
match Uri.scheme uri with
-
| Some s when s = "http" || s = "https" -> true
-
| Some _ -> true (* Any other scheme is considered external *)
-
| None -> false (* Local references or relative paths *)
-
with _ -> false
-
in
-
-
let inline_mapper _ = function
-
| Inline.Link (lb, _) | Inline.Image (lb, _) ->
-
let ref = Inline.Link.reference lb in
-
(match ref with
-
| `Inline (ld, _) ->
-
(match Link_definition.dest ld with
-
| Some (url, _) when is_external_url url ->
-
urls := url :: !urls;
-
Mapper.default
-
| _ -> Mapper.default)
-
| `Ref (_, _, l) ->
-
(* Get the referenced label definition and extract URL if it exists *)
-
let defs = Doc.defs (Doc.of_string ~strict:false md) in
-
(match Label.Map.find_opt (Label.key l) defs with
-
| Some (Link_definition.Def (ld, _)) ->
-
(match Link_definition.dest ld with
-
| Some (url, _) when is_external_url url ->
-
urls := url :: !urls
-
| _ -> ())
-
| _ -> ());
-
Mapper.default)
-
| Inline.Autolink (autolink, _) ->
-
let url = Inline.Autolink.link autolink |> fst in
-
if not (Inline.Autolink.is_email autolink) && is_external_url url then
-
urls := url :: !urls;
-
Mapper.default
-
| _ -> Mapper.default
-
in
-
-
let mapper = Mapper.make ~inline:inline_mapper () in
-
let doc = Doc.of_string ~strict:false md in
-
let _ = Mapper.map_doc mapper doc in
-
List.sort_uniq String.compare !urls
-
-
let outgoing_links e = extract_external_links (body e)
-
-
let lookup_site_url t slug =
-
match lookup t slug with
-
| Some ent -> site_url ent
-
| None -> ""
-
-
let lookup_title t slug =
-
match lookup t slug with
-
| Some ent -> title ent
-
| None -> ""
-
-
-
let date (x : entry) =
-
match x with
-
| `Paper p -> Paper.date p
-
| `Note n -> Note.date n
-
| `Project p -> p.Project.start, 1, 1
-
| `Idea i -> i.Idea.year, i.Idea.month, 1
-
| `Video v -> Video.date v
-
;;
-
-
let datetime v = date v |> Ptime.of_date |> Option.get
-
-
let year x =
-
match date x with
-
| y, _, _ -> y
-
;;
-
-
let is_index_entry = function
-
| `Note { Note.index_page; _ } -> index_page
-
| _ -> false
-
;;
-
-
let notes_for_slug { notes; _ } slug =
-
List.filter (fun n -> match Note.slug_ent n with Some s -> s = slug | None -> false) notes
-
let all_entries { slugs; _ } = Hashtbl.fold (fun _ v acc -> v :: acc) slugs []
-
-
let all_papers { papers; old_papers; _ } =
-
List.map (fun x -> `Paper x) (papers @ old_papers)
-
;;
-
-
let compare a b =
-
let datetime v = Option.get (Ptime.of_date v) in
-
let da = datetime (date a) in
-
let db = datetime (date b) in
-
if da = db then compare (title a) (title b) else Ptime.compare da db
-
;;
-
-
let lookup_by_name {contacts;_} n =
-
match Contact.lookup_by_name contacts n with
-
| v -> Some v
-
| exception _ -> None
-
-
(** Extract the first image URL from markdown text *)
-
let extract_first_image md =
-
let open Cmarkit in
-
(* Don't use bushel link resolver to avoid circular dependency *)
-
let doc = Doc.of_string md in
-
let found_image = ref None in
-
-
let find_image_in_inline _mapper = function
-
| Inline.Image (img, _) ->
-
(match Inline.Link.reference img with
-
| `Inline (ld, _) ->
-
(match Link_definition.dest ld with
-
| Some (url, _) when !found_image = None ->
-
found_image := Some url;
-
Mapper.default
-
| _ -> Mapper.default)
-
| _ -> Mapper.default)
-
| _ -> Mapper.default
-
in
-
-
let mapper = Mapper.make ~inline:find_image_in_inline () in
-
let _ = Mapper.map_doc mapper doc in
-
!found_image
-
;;
-
-
(** Extract the first video slug from markdown text by looking for bushel video links *)
-
let extract_first_video entries md =
-
let open Cmarkit in
-
let doc = Doc.of_string md in
-
let found_video = ref None in
-
-
let find_video_in_inline _mapper = function
-
| Inline.Link (link, _) ->
-
(match Inline.Link.reference link with
-
| `Inline (ld, _) ->
-
(match Link_definition.dest ld with
-
| Some (url, _) when !found_video = None && String.starts_with ~prefix:":" url ->
-
(* Check if this is a video slug *)
-
let slug = String.sub url 1 (String.length url - 1) in
-
(match lookup entries slug with
-
| Some (`Video v) ->
-
found_video := Some (Video.uuid v);
-
Mapper.default
-
| _ -> Mapper.default)
-
| _ -> Mapper.default)
-
| _ -> Mapper.default)
-
| _ -> Mapper.default
-
in
-
-
let mapper = Mapper.make ~inline:find_video_in_inline () in
-
let _ = Mapper.map_doc mapper doc in
-
!found_video
-
;;
-
-
(** Look up an image in the srcsetter list by slug *)
-
let lookup_image { images; _ } slug =
-
List.find_opt (fun img -> Srcsetter.slug img = slug) images
-
-
(** Get the smallest webp variant from a srcsetter image *)
-
let smallest_webp_variant img =
-
let variants = Srcsetter.variants img in
-
let webp_variants =
-
Srcsetter.MS.bindings variants
-
|> List.filter (fun (name, _) -> String.ends_with ~suffix:".webp" name)
-
in
-
match webp_variants with
-
| [] ->
-
(* No webp variants - use the name field which is always webp *)
-
"/images/" ^ Srcsetter.name img
-
| variants ->
-
(* Find the variant with the smallest width *)
-
let smallest = List.fold_left (fun acc (name, (w, h)) ->
-
match acc with
-
| None -> Some (name, w, h)
-
| Some (_, min_w, _) when w < min_w -> Some (name, w, h)
-
| _ -> acc
-
) None variants in
-
match smallest with
-
| Some (name, _, _) -> "/images/" ^ name
-
| None -> "/images/" ^ Srcsetter.name img
-
-
(** Get thumbnail slug for a contact *)
-
let contact_thumbnail_slug contact =
-
(* Contact images use just the handle as slug *)
-
Some (Contact.handle contact)
-
-
(** Get thumbnail URL for a contact - resolved through srcsetter *)
-
let contact_thumbnail entries contact =
-
match contact_thumbnail_slug contact with
-
| None -> None
-
| Some thumb_slug ->
-
match lookup_image entries thumb_slug with
-
| Some img -> Some (smallest_webp_variant img)
-
| None -> None (* Image not in srcsetter - thumbnails are optional *)
-
-
(** Get thumbnail slug for an entry with fallbacks *)
-
let rec thumbnail_slug entries = function
-
| `Paper p ->
-
(* Slug is just the paper slug, directory is in the origin path *)
-
Some (Paper.slug p)
-
-
| `Video v ->
-
(* Videos use their UUID as the slug *)
-
Some (Video.uuid v)
-
-
| `Project p ->
-
(* Project images use "project-{slug}" format *)
-
Some (Printf.sprintf "project-%s" p.Project.slug)
-
-
| `Idea i ->
-
let is_active = match Idea.status i with
-
| Idea.Available | Idea.Discussion | Idea.Ongoing -> true
-
| Idea.Completed | Idea.Expired -> false
-
in
-
if is_active then
-
(* Use first supervisor's face image *)
-
let supervisors = Idea.supervisors i in
-
match supervisors with
-
| sup :: _ ->
-
let handle = if String.length sup > 0 && sup.[0] = '@'
-
then String.sub sup 1 (String.length sup - 1)
-
else sup
-
in
-
(match Contact.find_by_handle (contacts entries) handle with
-
| Some c ->
-
(* Contact images use just the handle as slug *)
-
Some (Contact.handle c)
-
| None ->
-
(* Fallback to project thumbnail *)
-
let project_slug = Idea.project i in
-
(match lookup entries project_slug with
-
| Some p -> thumbnail_slug entries p
-
| None -> None))
-
| [] ->
-
(* No supervisors, use project thumbnail *)
-
let project_slug = Idea.project i in
-
(match lookup entries project_slug with
-
| Some p -> thumbnail_slug entries p
-
| None -> None)
-
else
-
(* Use project thumbnail for completed/expired ideas *)
-
let project_slug = Idea.project i in
-
(match lookup entries project_slug with
-
| Some p -> thumbnail_slug entries p
-
| None -> None)
-
-
| `Note n ->
-
(* Use titleimage if set, otherwise extract first image from body, then try video, otherwise use slug_ent's thumbnail *)
-
(match Note.titleimage n with
-
| Some slug ->
-
(* Always treat titleimage as a bushel slug (without ':' prefix) *)
-
Some slug
-
| None ->
-
(* Extract first image from markdown body *)
-
match extract_first_image (Note.body n) with
-
| Some url when String.starts_with ~prefix:":" url ->
-
Some (String.sub url 1 (String.length url - 1))
-
| Some _ -> None
-
| None ->
-
(* Try extracting first video from markdown body *)
-
match extract_first_video entries (Note.body n) with
-
| Some video_uuid -> Some video_uuid
-
| None ->
-
(* Fallback to slug_ent's thumbnail if present *)
-
match Note.slug_ent n with
-
| Some slug_ent ->
-
(match lookup entries slug_ent with
-
| Some entry -> thumbnail_slug entries entry
-
| None -> None)
-
| None -> None)
-
-
(** Get thumbnail URL for an entry with fallbacks - resolved through srcsetter *)
-
let thumbnail entries entry =
-
match thumbnail_slug entries entry with
-
| None -> None
-
| Some thumb_slug ->
-
match lookup_image entries thumb_slug with
-
| Some img -> Some (smallest_webp_variant img)
-
| None ->
-
(* For projects, fallback to supervisor faces if project image doesn't exist *)
-
(match entry with
-
| `Project p ->
-
(* Find ideas for this project *)
-
let project_ideas = List.filter (fun idea ->
-
Idea.project idea = ":" ^ p.Project.slug
-
) (ideas entries) in
-
(* Collect all unique supervisors from these ideas *)
-
let all_supervisors =
-
List.fold_left (fun acc idea ->
-
List.fold_left (fun acc2 sup ->
-
if List.mem sup acc2 then acc2 else sup :: acc2
-
) acc (Idea.supervisors idea)
-
) [] project_ideas
-
in
-
(* Split into avsm and others, preferring others first *)
-
let (others, avsm) = List.partition (fun sup ->
-
let handle = if String.length sup > 0 && sup.[0] = '@'
-
then String.sub sup 1 (String.length sup - 1)
-
else sup
-
in
-
handle <> "avsm"
-
) all_supervisors in
-
(* Try supervisors in order: others first, then avsm *)
-
let ordered_supervisors = others @ avsm in
-
(* Try each supervisor's face image *)
-
let rec try_supervisors = function
-
| [] -> None
-
| sup :: rest ->
-
let handle = if String.length sup > 0 && sup.[0] = '@'
-
then String.sub sup 1 (String.length sup - 1)
-
else sup
-
in
-
(match Contact.find_by_handle (contacts entries) handle with
-
| Some c ->
-
(match lookup_image entries (Contact.handle c) with
-
| Some img -> Some (smallest_webp_variant img)
-
| None -> try_supervisors rest)
-
| None -> try_supervisors rest)
-
in
-
try_supervisors ordered_supervisors
-
| _ -> None)
-
-
(** Get thumbnail URL for a note with slug_ent *)
-
let thumbnail_note_with_ent entries note_item =
-
(* Use linked entry's thumbnail if slug_ent is set *)
-
match Note.slug_ent note_item with
-
| Some slug_ent ->
-
(match lookup entries (":" ^ slug_ent) with
-
| Some entry -> thumbnail entries entry
-
| None ->
-
(* Fallback to extracting first image from note body *)
-
extract_first_image (Note.body note_item))
-
| None ->
-
(* No slug_ent, extract from note body *)
-
extract_first_image (Note.body note_item)
-79
stack/bushel/lib/entry.mli
···
-
type entry =
-
[ `Idea of Idea.t
-
| `Note of Note.t
-
| `Paper of Paper.t
-
| `Project of Project.t
-
| `Video of Video.t
-
]
-
-
type slugs = (string, entry) Hashtbl.t
-
type t
-
-
val contacts : t -> Contact.ts
-
val videos : t -> Video.ts
-
val ideas : t -> Idea.ts
-
val papers : t -> Paper.ts
-
val notes : t -> Note.ts
-
val projects : t -> Project.ts
-
val images : t -> Srcsetter.ts
-
val doi_entries : t -> Doi_entry.ts
-
val data_dir : t -> string
-
-
val v
-
: papers:Paper.t list
-
-> notes:Note.ts
-
-> projects:Project.ts
-
-> ideas:Idea.ts
-
-> videos:Video.ts
-
-> contacts:Contact.ts
-
-> images:Srcsetter.ts
-
-> data_dir:string
-
-> t
-
-
val lookup : t -> string -> entry option
-
val lookup_exn : t -> string -> entry
-
val lookup_site_url : t -> string -> string
-
val lookup_title : t -> string -> string
-
val lookup_by_name : t -> string -> Contact.t option
-
val old_papers : t -> Paper.ts
-
val sidebar : [> `Note of Note.t ] -> string option
-
val to_type_string : entry -> string
-
val slug : entry -> string
-
val title : entry -> string
-
val body : entry -> string
-
val extract_external_links : string -> string list
-
val outgoing_links : entry -> string list
-
-
(* FIXME move to view *)
-
val site_url : entry -> string
-
val date : entry -> Ptime.date
-
val datetime : entry -> Ptime.t
-
val year : entry -> int
-
val synopsis : entry -> string option
-
-
val is_index_entry : entry -> bool
-
val notes_for_slug : t -> string -> Note.t list
-
val all_entries : t -> entry list
-
val all_papers : t -> entry list
-
val compare : entry -> entry -> int
-
-
(** Look up an image in the srcsetter list by slug *)
-
val lookup_image : t -> string -> Srcsetter.t option
-
-
(** Get the smallest webp variant from a srcsetter image *)
-
val smallest_webp_variant : Srcsetter.t -> string
-
-
(** Get thumbnail slug for a contact *)
-
val contact_thumbnail_slug : Contact.t -> string option
-
-
(** Get thumbnail URL for a contact - resolved through srcsetter *)
-
val contact_thumbnail : t -> Contact.t -> string option
-
-
(** Get thumbnail slug for an entry with fallbacks *)
-
val thumbnail_slug : t -> entry -> string option
-
-
(** Get thumbnail URL for an entry with fallbacks - resolved through srcsetter *)
-
val thumbnail : t -> entry -> string option
-
-
(** Get thumbnail URL for a note with slug_ent *)
-
val thumbnail_note_with_ent : t -> Note.t -> string option
-223
stack/bushel/lib/idea.ml
···
-
type level =
-
| Any
-
| PartII
-
| MPhil
-
| PhD
-
| Postdoc
-
-
let level_of_yaml = function
-
| `String ("Any" | "any") -> Ok Any
-
| `String ("PartII" | "partii") -> Ok PartII
-
| `String ("MPhil" | "mphil") -> Ok MPhil
-
| `String ("PhD" | "phd") -> Ok PhD
-
| `String ("postdoc" | "Postdoc") -> Ok Postdoc
-
| _ -> Error (`Msg "level_of_yaml")
-
;;
-
-
let level_to_string = function
-
| Any -> "Any"
-
| PartII -> "PartII"
-
| MPhil -> "MPhil"
-
| PhD -> "PhD"
-
| Postdoc -> "postdoctoral"
-
;;
-
-
let level_to_tag = function
-
| Any -> "idea-beginner"
-
| PartII -> "idea-medium"
-
| MPhil -> "idea-hard"
-
| PhD -> "idea-phd"
-
| Postdoc -> "idea-postdoc"
-
;;
-
-
let level_to_yaml s = `String (level_to_string s)
-
-
type status =
-
| Available
-
| Discussion
-
| Ongoing
-
| Completed
-
| Expired
-
-
let status_of_yaml = function
-
| `String ("Available" | "available") -> Ok Available
-
| `String ("Discussion" | "discussion") -> Ok Discussion
-
| `String ("Ongoing" | "ongoing") -> Ok Ongoing
-
| `String ("Completed" | "completed") -> Ok Completed
-
| `String ("Expired" | "expired") -> Ok Expired
-
| _ -> Error (`Msg "status_of_yaml")
-
;;
-
-
let status_to_string = function
-
| Available -> "Available"
-
| Discussion -> "Discussion"
-
| Ongoing -> "Ongoing"
-
| Completed -> "Completed"
-
| Expired -> "Expired"
-
;;
-
-
let status_to_tag = function
-
| Available -> "idea-available"
-
| Discussion -> "idea-discuss"
-
| Ongoing -> "idea-ongoing"
-
| Completed -> "idea-done"
-
| Expired -> "idea-expired"
-
;;
-
-
let status_to_yaml s = `String (status_to_string s)
-
-
type t =
-
{ slug : string
-
; title : string
-
; level : level
-
; project : string
-
; status : status
-
; month: int
-
; year : int
-
; supervisors : string list
-
; students : string list
-
; reading : string
-
; body : string
-
; url : string option
-
; tags : string list
-
}
-
-
type ts = t list
-
-
let title i = i.title
-
let supervisors i = i.supervisors
-
let students i = i.students
-
let reading i = i.reading
-
let status i = i.status
-
let level i = i.level
-
let year i = i.year
-
let body i = i.body
-
let project i = i.project
-
-
let compare a b =
-
match compare a.status b.status with
-
| 0 ->
-
(match a.status with
-
| Completed -> compare b.year a.year
-
| _ ->
-
(match compare a.level b.level with
-
| 0 -> begin
-
match compare b.year a.year with
-
| 0 -> compare b.month a.month
-
| n -> n
-
end
-
| n -> n))
-
| n -> n
-
;;
-
-
let of_md fname =
-
match Jekyll_post.of_string ~fname:(Filename.basename fname) (Util.read_file fname) with
-
| Error _ -> failwith "TODO"
-
| Ok jp ->
-
let fields = jp.Jekyll_post.fields in
-
let y = Jekyll_format.fields_to_yaml fields in
-
let year, month, _ = jp.Jekyll_post.date |> Ptime.to_date in
-
let body = jp.Jekyll_post.body in
-
let string f = Yaml.Util.(find_exn f y |> Option.get |> to_string |> Result.get_ok) in
-
let string' f d =
-
try Yaml.Util.(find_exn f y |> Option.get |> to_string |> Result.get_ok) with
-
| _ -> d
-
in
-
let to_list = function
-
| `A l -> Ok l
-
| _ -> Error (`Msg "to_list")
-
in
-
let strings f =
-
try
-
Yaml.Util.(
-
find_exn f y
-
|> Option.get
-
|> to_list
-
|> Result.get_ok
-
|> List.map (fun x -> to_string x |> Result.get_ok))
-
with
-
| _exn -> []
-
in
-
let level =
-
Yaml.Util.(find_exn "level" y |> Option.get |> level_of_yaml |> Result.get_ok)
-
in
-
let status =
-
Yaml.Util.(find_exn "status" y |> Option.get |> status_of_yaml |> Result.get_ok)
-
in
-
let slug = jp.Jekyll_post.slug in
-
{ slug
-
; title = string "title"
-
; level
-
; project = string "project"
-
; status
-
; supervisors = strings "supervisors"
-
; students = strings "students"
-
; tags = strings "tags"
-
; reading = string' "reading" ""
-
; month
-
; year
-
; body
-
; url = None (* TODO *)
-
}
-
;;
-
-
let lookup ideas slug = List.find_opt (fun i -> i.slug = slug) ideas
-
-
(* TODO:claude *)
-
let typesense_schema =
-
let open Ezjsonm in
-
dict [
-
("name", string "ideas");
-
("fields", list (fun d -> dict d) [
-
[("name", string "id"); ("type", string "string")];
-
[("name", string "title"); ("type", string "string")];
-
[("name", string "description"); ("type", string "string")];
-
[("name", string "year"); ("type", string "int32")];
-
[("name", string "date"); ("type", string "string")];
-
[("name", string "date_timestamp"); ("type", string "int64")];
-
[("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
-
[("name", string "level"); ("type", string "string"); ("facet", bool true)];
-
[("name", string "status"); ("type", string "string"); ("facet", bool true)];
-
[("name", string "project"); ("type", string "string"); ("facet", bool true)];
-
[("name", string "supervisors"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "body"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "students"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "reading"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "url"); ("type", string "string"); ("optional", bool true)];
-
]);
-
("default_sorting_field", string "date_timestamp");
-
]
-
-
(** TODO:claude Pretty-print an idea with ANSI formatting *)
-
let pp ppf i =
-
let open Fmt in
-
pf ppf "@[<v>";
-
pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Idea";
-
pf ppf "%a: %a@," (styled `Bold string) "Slug" string i.slug;
-
pf ppf "%a: %a@," (styled `Bold string) "Title" string (title i);
-
pf ppf "%a: %a@," (styled `Bold string) "Level" string (level_to_string (level i));
-
pf ppf "%a: %a@," (styled `Bold string) "Status" string (status_to_string (status i));
-
pf ppf "%a: %a@," (styled `Bold string) "Project" string (project i);
-
pf ppf "%a: %04d-%02d@," (styled `Bold string) "Date" (year i) i.month;
-
let sups = supervisors i in
-
if sups <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Supervisors" (list ~sep:comma string) sups;
-
let studs = students i in
-
if studs <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Students" (list ~sep:comma string) studs;
-
(match i.url with
-
| Some url -> pf ppf "%a: %a@," (styled `Bold string) "URL" string url
-
| None -> ());
-
let t = i.tags in
-
if t <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
-
let r = reading i in
-
if r <> "" then begin
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Reading";
-
pf ppf "%a@," string r;
-
end;
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Body";
-
pf ppf "%a@," string (body i);
-
pf ppf "@]"
-55
stack/bushel/lib/idea.mli
···
-
type level =
-
| Any
-
| PartII
-
| MPhil
-
| PhD
-
| Postdoc
-
-
type status =
-
| Available
-
| Discussion
-
| Ongoing
-
| Completed
-
| Expired
-
-
val level_of_yaml : Ezjsonm.value -> (level, [> `Msg of string ]) result
-
val level_to_string : level -> string
-
val level_to_tag : level -> string
-
val level_to_yaml : level -> Ezjsonm.value
-
val status_of_yaml : Ezjsonm.value -> (status, [> `Msg of string ]) result
-
val status_to_string : status -> string
-
val status_to_tag : status -> string
-
val status_to_yaml : status -> Ezjsonm.value
-
-
type t =
-
{ slug : string
-
; title : string
-
; level : level
-
; project : string
-
; status : status
-
; month : int
-
; year : int
-
; supervisors : string list
-
; students : string list
-
; reading : string
-
; body : string
-
; url : string option
-
; tags : string list
-
}
-
-
type ts = t list
-
-
val title : t -> string
-
val supervisors : t -> string list
-
val students : t -> string list
-
val reading : t -> string
-
val status : t -> status
-
val level : t -> level
-
val year : t -> int
-
val body : t -> string
-
val project : t -> string
-
val compare : t -> t -> int
-
val lookup : t list -> string -> t option
-
val of_md : string -> t
-
val typesense_schema : Ezjsonm.value
-
val pp : Format.formatter -> t -> unit
-296
stack/bushel/lib/link.ml
···
-
type karakeep_data = {
-
remote_url : string;
-
id : string;
-
tags : string list;
-
metadata : (string * string) list;
-
}
-
-
type bushel_data = {
-
slugs : string list;
-
tags : string list;
-
}
-
-
type t = {
-
url : string;
-
date : Ptime.date;
-
description : string;
-
karakeep : karakeep_data option;
-
bushel : bushel_data option;
-
}
-
-
type ts = t list
-
-
let url { url; _ } = url
-
let date { date; _ } = date
-
let description { description; _ } = description
-
let datetime v = Option.get @@ Ptime.of_date @@ date v
-
let compare a b = Ptime.compare (datetime b) (datetime a)
-
-
(* Convert YAML to Link.t *)
-
let t_of_yaml = function
-
| `O fields ->
-
let url =
-
match List.assoc_opt "url" fields with
-
| Some (`String v) -> v
-
| _ -> failwith "link: missing or invalid url"
-
in
-
let date =
-
match List.assoc_opt "date" fields with
-
| Some (`String v) -> begin
-
try
-
match Scanf.sscanf v "%04d-%02d-%02d" (fun y m d -> (y, m, d)) with
-
| (y, m, d) -> (y, m, d)
-
with _ ->
-
(* Fall back to RFC3339 parsing for backward compatibility *)
-
v |> Ptime.of_rfc3339 |> Result.get_ok |> fun (a, _, _) -> Ptime.to_date a
-
end
-
| _ -> failwith "link: missing or invalid date"
-
in
-
let description =
-
match List.assoc_opt "description" fields with
-
| Some (`String v) -> v
-
| _ -> ""
-
in
-
let karakeep =
-
match List.assoc_opt "karakeep" fields with
-
| Some (`O k_fields) ->
-
let remote_url =
-
match List.assoc_opt "remote_url" k_fields with
-
| Some (`String v) -> v
-
| _ -> failwith "link: invalid karakeep.remote_url"
-
in
-
let id =
-
match List.assoc_opt "id" k_fields with
-
| Some (`String v) -> v
-
| _ -> failwith "link: invalid karakeep.id"
-
in
-
let tags =
-
match List.assoc_opt "tags" k_fields with
-
| Some (`A tag_list) ->
-
List.fold_left (fun acc tag ->
-
match tag with
-
| `String t -> t :: acc
-
| _ -> acc
-
) [] tag_list
-
|> List.rev
-
| _ -> []
-
in
-
let metadata =
-
match List.assoc_opt "metadata" k_fields with
-
| Some (`O meta_fields) ->
-
List.fold_left (fun acc (k, v) ->
-
match v with
-
| `String value -> (k, value) :: acc
-
| _ -> acc
-
) [] meta_fields
-
| _ -> []
-
in
-
Some { remote_url; id; tags; metadata }
-
| _ -> None
-
in
-
let bushel =
-
match List.assoc_opt "bushel" fields with
-
| Some (`O b_fields) ->
-
let slugs =
-
match List.assoc_opt "slugs" b_fields with
-
| Some (`A slug_list) ->
-
List.fold_left (fun acc slug ->
-
match slug with
-
| `String s -> s :: acc
-
| _ -> acc
-
) [] slug_list
-
|> List.rev
-
| _ -> []
-
in
-
let tags =
-
match List.assoc_opt "tags" b_fields with
-
| Some (`A tag_list) ->
-
List.fold_left (fun acc tag ->
-
match tag with
-
| `String t -> t :: acc
-
| _ -> acc
-
) [] tag_list
-
|> List.rev
-
| _ -> []
-
in
-
Some { slugs; tags }
-
| _ -> None
-
in
-
{ url; date; description; karakeep; bushel }
-
| _ -> failwith "invalid yaml"
-
-
(* Read file contents *)
-
let read_file file = In_channel.(with_open_bin file input_all)
-
-
(* Load links from a YAML file *)
-
let of_md fname =
-
match Yaml.of_string_exn (read_file fname) with
-
| `A links ->
-
List.map t_of_yaml links
-
| `O _ as single_link ->
-
[t_of_yaml single_link]
-
| _ -> failwith "link_of_md: expected array or object"
-
-
(* Convert Link.t to YAML *)
-
let to_yaml t =
-
let (year, month, day) = t.date in
-
let date_str = Printf.sprintf "%04d-%02d-%02d" year month day in
-
-
(* Create base fields *)
-
let base_fields = [
-
("url", `String t.url);
-
("date", `String date_str);
-
] @
-
(if t.description = "" then [] else [("description", `String t.description)])
-
in
-
-
(* Add karakeep data if present *)
-
let karakeep_fields =
-
match t.karakeep with
-
| Some { remote_url; id; tags; metadata } ->
-
let karakeep_obj = [
-
("remote_url", `String remote_url);
-
("id", `String id);
-
] in
-
let karakeep_obj =
-
if tags = [] then karakeep_obj
-
else ("tags", `A (List.map (fun t -> `String t) tags)) :: karakeep_obj
-
in
-
let karakeep_obj =
-
if metadata = [] then karakeep_obj
-
else ("metadata", `O (List.map (fun (k, v) -> (k, `String v)) metadata)) :: karakeep_obj
-
in
-
[("karakeep", `O karakeep_obj)]
-
| None -> []
-
in
-
-
(* Add bushel data if present *)
-
let bushel_fields =
-
match t.bushel with
-
| Some { slugs; tags } ->
-
let bushel_obj = [] in
-
let bushel_obj =
-
if slugs = [] then bushel_obj
-
else ("slugs", `A (List.map (fun s -> `String s) slugs)) :: bushel_obj
-
in
-
let bushel_obj =
-
if tags = [] then bushel_obj
-
else ("tags", `A (List.map (fun t -> `String t) tags)) :: bushel_obj
-
in
-
if bushel_obj = [] then [] else [("bushel", `O bushel_obj)]
-
| None -> []
-
in
-
-
`O (base_fields @ karakeep_fields @ bushel_fields)
-
-
(* Write a link to a file in the output directory *)
-
let to_file output_dir t =
-
let filename =
-
let (y, m, d) = t.date in
-
let hash = Digest.string t.url |> Digest.to_hex in
-
let short_hash = String.sub hash 0 8 in
-
Printf.sprintf "%04d-%02d-%02d-%s.md" y m d short_hash
-
in
-
let file_path = Fpath.v (Filename.concat output_dir filename) in
-
let yaml = to_yaml t in
-
let yaml_str = Yaml.to_string_exn yaml in
-
let content = "---\n" ^ yaml_str ^ "---\n" in
-
Bos.OS.File.write file_path content
-
-
(* Load links from a YAML file *)
-
let load_links_file path =
-
try
-
let yaml_str = In_channel.(with_open_bin path input_all) in
-
match Yaml.of_string_exn yaml_str with
-
| `A links -> List.map t_of_yaml links
-
| _ -> []
-
with _ -> []
-
-
(* Save links to a YAML file *)
-
let save_links_file path links =
-
try
-
let yaml = `A (List.map to_yaml links) in
-
let yaml_str = Yaml.to_string_exn ~len:4200000 yaml in
-
let oc = open_out path in
-
output_string oc yaml_str;
-
close_out oc
-
with e ->
-
Printf.eprintf "Error saving links file: %s\n%!" (Printexc.to_string e);
-
Printf.eprintf "Attempting to save with smaller length limit...\n%!";
-
let yaml = `A (List.map to_yaml links) in
-
let yaml_str = Yaml.to_string_exn ~len:800000 yaml in
-
let oc = open_out path in
-
output_string oc yaml_str;
-
close_out oc
-
-
(* Merge two lists of links, combining metadata from duplicates *)
-
let merge_links ?(prefer_new_date=false) existing new_links =
-
let links_by_url = Hashtbl.create (List.length existing) in
-
-
(* Add existing links to hashtable *)
-
List.iter (fun link ->
-
Hashtbl.replace links_by_url link.url link
-
) existing;
-
-
(* Merge new links with existing ones *)
-
List.iter (fun new_link ->
-
match Hashtbl.find_opt links_by_url new_link.url with
-
| None ->
-
(* New link not in existing links *)
-
Hashtbl.add links_by_url new_link.url new_link
-
| Some old_link ->
-
(* Merge link data, prefer newer data for fields *)
-
let title =
-
if new_link.description <> "" then new_link.description
-
else old_link.description
-
in
-
-
(* Combine karakeep data (prefer new over old) *)
-
let karakeep =
-
match new_link.karakeep, old_link.karakeep with
-
| Some new_k, Some old_k when new_k.remote_url = old_k.remote_url ->
-
(* Same remote, merge the data *)
-
let merged_metadata =
-
let meta_tbl = Hashtbl.create (List.length old_k.metadata) in
-
List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) old_k.metadata;
-
List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) new_k.metadata;
-
Hashtbl.fold (fun k v acc -> (k, v) :: acc) meta_tbl []
-
in
-
let merged_tags = List.sort_uniq String.compare (old_k.tags @ new_k.tags) in
-
Some { new_k with metadata = merged_metadata; tags = merged_tags }
-
| Some new_k, _ -> Some new_k
-
| None, old_k -> old_k
-
in
-
-
(* Combine bushel data *)
-
let bushel =
-
match new_link.bushel, old_link.bushel with
-
| Some new_b, Some old_b ->
-
(* Merge slugs and tags *)
-
let merged_slugs = List.sort_uniq String.compare (old_b.slugs @ new_b.slugs) in
-
let merged_tags = List.sort_uniq String.compare (old_b.tags @ new_b.tags) in
-
Some { slugs = merged_slugs; tags = merged_tags }
-
| Some new_b, _ -> Some new_b
-
| None, old_b -> old_b
-
in
-
-
(* Combined link - prefer new date when requested (for bushel entries) *)
-
let date =
-
if prefer_new_date then new_link.date
-
else if compare new_link old_link > 0 then new_link.date
-
else old_link.date
-
in
-
let merged_link = {
-
url = new_link.url;
-
date;
-
description = title;
-
karakeep;
-
bushel
-
} in
-
Hashtbl.replace links_by_url new_link.url merged_link
-
) new_links;
-
-
(* Convert hashtable back to list and sort by date *)
-
Hashtbl.to_seq_values links_by_url
-
|> List.of_seq
-
|> List.sort compare
-34
stack/bushel/lib/link.mli
···
-
type karakeep_data = {
-
remote_url : string;
-
id : string;
-
tags : string list;
-
metadata : (string * string) list;
-
}
-
-
type bushel_data = {
-
slugs : string list;
-
tags : string list;
-
}
-
-
type t = {
-
url : string;
-
date : Ptime.date;
-
description : string;
-
karakeep : karakeep_data option;
-
bushel : bushel_data option;
-
}
-
-
type ts = t list
-
-
val compare : t -> t -> int
-
val url : t -> string
-
val date : t -> Ptime.date
-
val datetime : t -> Ptime.t
-
val description : t -> string
-
val of_md : string -> ts
-
val to_yaml : t -> Yaml.value
-
val t_of_yaml : Yaml.value -> t
-
val to_file : string -> t -> (unit, [> `Msg of string]) result
-
val load_links_file : string -> ts
-
val save_links_file : string -> ts -> unit
-
val merge_links : ?prefer_new_date:bool -> ts -> ts -> ts
-317
stack/bushel/lib/link_graph.ml
···
-
module StringSet = Set.Make(String)
-
-
type entry_type = [ `Paper | `Project | `Note | `Idea | `Video | `Contact ]
-
-
type internal_link = {
-
source: string;
-
target: string;
-
target_type: entry_type;
-
}
-
-
type external_link = {
-
source: string;
-
domain: string;
-
url: string;
-
}
-
-
type link_graph = {
-
(* All links *)
-
mutable internal_links: internal_link list;
-
mutable external_links: external_link list;
-
-
(* Indices for efficient queries *)
-
outbound: (string, StringSet.t) Hashtbl.t;
-
backlinks: (string, StringSet.t) Hashtbl.t;
-
external_by_entry: (string, StringSet.t) Hashtbl.t;
-
external_by_domain: (string, StringSet.t) Hashtbl.t; (* domain -> source slugs *)
-
}
-
-
let empty_graph () = {
-
internal_links = [];
-
external_links = [];
-
outbound = Hashtbl.create 256;
-
backlinks = Hashtbl.create 256;
-
external_by_entry = Hashtbl.create 256;
-
external_by_domain = Hashtbl.create 64;
-
}
-
-
(* Global storage for the link graph *)
-
let current_graph : link_graph option ref = ref None
-
-
let set_graph graph = current_graph := Some graph
-
let get_graph () = !current_graph
-
-
let entry_type_to_string = function
-
| `Paper -> "paper"
-
| `Project -> "project"
-
| `Note -> "note"
-
| `Idea -> "idea"
-
| `Video -> "video"
-
| `Contact -> "contact"
-
-
(* Query functions *)
-
-
let get_outbound graph slug =
-
try StringSet.elements (Hashtbl.find graph.outbound slug)
-
with Not_found -> []
-
-
let get_backlinks graph slug =
-
try StringSet.elements (Hashtbl.find graph.backlinks slug)
-
with Not_found -> []
-
-
let get_external_links graph slug =
-
try StringSet.elements (Hashtbl.find graph.external_by_entry slug)
-
with Not_found -> []
-
-
let get_entries_linking_to_domain graph domain =
-
try StringSet.elements (Hashtbl.find graph.external_by_domain domain)
-
with Not_found -> []
-
-
(* Query functions that use the global graph *)
-
-
let get_backlinks_for_slug slug =
-
match !current_graph with
-
| None -> []
-
| Some graph -> get_backlinks graph slug
-
-
let get_outbound_for_slug slug =
-
match !current_graph with
-
| None -> []
-
| Some graph -> get_outbound graph slug
-
-
let get_external_links_for_slug slug =
-
match !current_graph with
-
| None -> []
-
| Some graph -> get_external_links graph slug
-
-
(* Pretty printing *)
-
-
let pp_internal_link ppf (link : internal_link) =
-
Fmt.pf ppf "%s -> %s (%s)"
-
link.source
-
link.target
-
(entry_type_to_string link.target_type)
-
-
let pp_external_link ppf (link : external_link) =
-
Fmt.pf ppf "%s -> %s (%s)"
-
link.source
-
link.domain
-
link.url
-
-
let pp_graph ppf graph =
-
Fmt.pf ppf "@[<v>Internal links: %d@,External links: %d@,Entries with outbound: %d@,Entries with backlinks: %d@]"
-
(List.length graph.internal_links)
-
(List.length graph.external_links)
-
(Hashtbl.length graph.outbound)
-
(Hashtbl.length graph.backlinks)
-
-
let entry_type_of_entry = function
-
| `Paper _ -> `Paper
-
| `Project _ -> `Project
-
| `Note _ -> `Note
-
| `Idea _ -> `Idea
-
| `Video _ -> `Video
-
| `Contact _ -> `Contact
-
-
let extract_domain url =
-
try
-
let uri = Uri.of_string url in
-
match Uri.host uri with
-
| Some host -> host
-
| None -> "unknown"
-
with _ -> "unknown"
-
-
let add_to_set_hashtbl tbl key value =
-
let current =
-
try Hashtbl.find tbl key
-
with Not_found -> StringSet.empty
-
in
-
Hashtbl.replace tbl key (StringSet.add value current)
-
-
let build_link_graph entries =
-
let graph = empty_graph () in
-
-
(* Helper to add internal link *)
-
let add_internal_link source target target_type =
-
let link = { source; target; target_type } in
-
graph.internal_links <- link :: graph.internal_links;
-
add_to_set_hashtbl graph.outbound source target;
-
add_to_set_hashtbl graph.backlinks target source
-
in
-
-
(* Helper to add external link *)
-
let add_external_link source url =
-
let domain = extract_domain url in
-
let link = { source; domain; url } in
-
graph.external_links <- link :: graph.external_links;
-
add_to_set_hashtbl graph.external_by_entry source url;
-
add_to_set_hashtbl graph.external_by_domain domain source
-
in
-
-
(* Process each entry *)
-
let process_entry entry =
-
let source_slug = Entry.slug entry in
-
-
(* Get all links from this entry's markdown content *)
-
let md_content = Entry.body entry in
-
let all_links = Md.extract_all_links md_content in
-
-
List.iter (fun link ->
-
if Md.is_bushel_slug link then
-
(* Internal bushel link *)
-
let target_slug = Md.strip_handle link in
-
match Entry.lookup entries target_slug with
-
| Some target_entry ->
-
let target_type = entry_type_of_entry target_entry in
-
add_internal_link source_slug target_slug target_type
-
| None -> ()
-
else if Md.is_contact_slug link then
-
(* Contact link *)
-
let handle = Md.strip_handle link in
-
match Contact.find_by_handle (Entry.contacts entries) handle with
-
| Some c ->
-
let target_slug = Contact.handle c in
-
add_internal_link source_slug target_slug `Contact
-
| None -> ()
-
else if Md.is_tag_slug link then
-
(* Skip tag links *)
-
()
-
else if Md.is_type_filter_slug link then
-
(* Skip type filter links *)
-
()
-
else if String.starts_with ~prefix:"http://" link ||
-
String.starts_with ~prefix:"https://" link then
-
(* External link *)
-
add_external_link source_slug link
-
else
-
(* Skip other links (relative paths, etc) *)
-
()
-
) all_links
-
in
-
-
(* Process all entries *)
-
List.iter process_entry (Entry.all_entries entries);
-
-
(* Process slug_ent references from notes *)
-
let process_note_slug_ent note =
-
match Note.slug_ent note with
-
| Some target_slug ->
-
let source_slug = Note.slug note in
-
(* Look up the target entry by slug *)
-
(match Entry.lookup entries target_slug with
-
| Some target_entry ->
-
let target_type = entry_type_of_entry target_entry in
-
add_internal_link source_slug target_slug target_type
-
| None -> ())
-
| None -> ()
-
in
-
List.iter process_note_slug_ent (Entry.notes entries);
-
-
(* Process projects: field from papers *)
-
let process_paper_projects paper =
-
let source_slug = Paper.slug paper in
-
let project_slugs = Paper.project_slugs paper in
-
List.iter (fun project_slug ->
-
(* Verify the project exists *)
-
match Entry.lookup entries project_slug with
-
| Some (`Project _) ->
-
add_internal_link source_slug project_slug `Project
-
| _ -> ()
-
) project_slugs
-
in
-
List.iter process_paper_projects (Entry.papers entries);
-
-
(* Deduplicate links *)
-
let module LinkSet = Set.Make(struct
-
type t = internal_link
-
let compare (a : internal_link) (b : internal_link) =
-
match String.compare a.source b.source with
-
| 0 -> String.compare a.target b.target
-
| n -> n
-
end) in
-
-
let module ExtLinkSet = Set.Make(struct
-
type t = external_link
-
let compare (a : external_link) (b : external_link) =
-
match String.compare a.source b.source with
-
| 0 -> String.compare a.url b.url
-
| n -> n
-
end) in
-
-
graph.internal_links <- LinkSet.elements (LinkSet.of_list graph.internal_links);
-
graph.external_links <- ExtLinkSet.elements (ExtLinkSet.of_list graph.external_links);
-
-
graph
-
-
(* Export for visualization *)
-
-
let to_json graph entries =
-
(* Build nodes from entries *)
-
let entry_nodes = List.map (fun entry ->
-
let slug = Entry.slug entry in
-
let title = Entry.title entry in
-
let entry_type = entry_type_of_entry entry in
-
`O [
-
("id", `String slug);
-
("title", `String title);
-
("type", `String (entry_type_to_string entry_type));
-
("group", `String "entry");
-
]
-
) (Entry.all_entries entries) in
-
-
(* Build nodes from contacts *)
-
let contact_nodes = List.map (fun contact ->
-
let handle = Contact.handle contact in
-
let name = Contact.name contact in
-
`O [
-
("id", `String handle);
-
("title", `String name);
-
("type", `String "contact");
-
("group", `String "entry");
-
]
-
) (Entry.contacts entries) in
-
-
(* Build domain nodes from external links *)
-
let domain_map = Hashtbl.create 64 in
-
List.iter (fun link ->
-
if not (Hashtbl.mem domain_map link.domain) then
-
Hashtbl.add domain_map link.domain ()
-
) graph.external_links;
-
-
let domain_nodes = Hashtbl.fold (fun domain () acc ->
-
(`O [
-
("id", `String ("domain:" ^ domain));
-
("title", `String domain);
-
("type", `String "domain");
-
("group", `String "domain");
-
]) :: acc
-
) domain_map [] in
-
-
let all_nodes = entry_nodes @ contact_nodes @ domain_nodes in
-
-
(* Build internal links *)
-
let internal_links_json = List.map (fun (link : internal_link) ->
-
`O [
-
("source", `String link.source);
-
("target", `String link.target);
-
("type", `String "internal");
-
]
-
) graph.internal_links in
-
-
(* Build external links (entry -> domain) *)
-
let external_links_json = List.map (fun (link : external_link) ->
-
`O [
-
("source", `String link.source);
-
("target", `String ("domain:" ^ link.domain));
-
("type", `String "external");
-
]
-
) graph.external_links in
-
-
let all_links = internal_links_json @ external_links_json in
-
-
let json = `O [
-
("nodes", `A all_nodes);
-
("links", `A all_links);
-
] in
-
-
Ezjsonm.to_string json
-781
stack/bushel/lib/md.ml
···
-
(** Bushel mappers for our Markdown extensions and utilities
-
-
This module provides mappers to convert Bushel markdown extensions to different
-
output formats. There are two main mappers:
-
-
1. {!make_bushel_inline_mapper} - Full sidenote mode for the main website
-
- Converts Bushel links to interactive sidenotes
-
- Includes entry previews, contact info, footnotes
-
- Used for the main site HTML rendering
-
-
2. {!make_bushel_link_only_mapper} - Plain HTML mode for feeds and simple output
-
- Converts Bushel links to regular HTML <a> tags
-
- Automatically cleans up link text that contains Bushel slugs
-
- Used for Atom feeds, RSS, search indexing
-
- Images need .webp extension added (handled by calling code)
-
-
For plain text output (search, LLM), use {!markdown_to_plaintext}.
-
*)
-
-
(* Sidenote data types - reuse existing Bushel types *)
-
type sidenote_data =
-
| Contact_note of Contact.t * string (* contact data + trigger text *)
-
| Paper_note of Paper.t * string
-
| Idea_note of Idea.t * string
-
| Note_note of Note.t * string
-
| Project_note of Project.t * string
-
| Video_note of Video.t * string
-
| Footnote_note of string * Cmarkit.Block.t * string
-
(* slug, block content, trigger text *)
-
-
type Cmarkit.Inline.t += Side_note of sidenote_data
-
-
let authorlink = Cmarkit.Meta.key ()
-
-
let make_authorlink label =
-
let meta = Cmarkit.Meta.tag authorlink (Cmarkit.Label.meta label) in
-
Cmarkit.Label.with_meta meta label
-
;;
-
-
let sluglink = Cmarkit.Meta.key ()
-
-
let make_sluglink label =
-
let meta = Cmarkit.Meta.tag sluglink (Cmarkit.Label.meta label) in
-
Cmarkit.Label.with_meta meta label
-
;;
-
-
let with_bushel_links = function
-
| `Def _ as ctx -> Cmarkit.Label.default_resolver ctx
-
| `Ref (_, _, (Some _ as def)) -> def
-
| `Ref (_, ref, None) ->
-
let txt = Cmarkit.Label.key ref in
-
(match txt.[0] with
-
| '@' -> Some (make_authorlink ref)
-
| ':' -> Some (make_sluglink ref)
-
| '#' -> if txt.[1] = '#' then Some (make_sluglink ref) else None
-
| _ -> None)
-
;;
-
-
let strip_handle s =
-
if s.[0] = '@' || s.[0] = ':'
-
then String.sub s 1 (String.length s - 1)
-
else if s.[0] = '#' && s.[1] = '#'
-
then String.sub s 2 (String.length s - 2)
-
else s
-
;;
-
-
(* FIXME use Tags *)
-
let is_bushel_slug = String.starts_with ~prefix:":"
-
let is_tag_slug link =
-
String.starts_with ~prefix:"##" link &&
-
not (String.starts_with ~prefix:"###" link)
-
-
let is_type_filter_slug = String.starts_with ~prefix:"###"
-
let is_contact_slug = String.starts_with ~prefix:"@"
-
-
let text_of_inline lb =
-
let open Cmarkit in
-
Inline.to_plain_text ~break_on_soft:false lb
-
|> fun r -> String.concat "\n" (List.map (String.concat "") r)
-
;;
-
-
let link_target_is_bushel ?slugs lb =
-
let open Cmarkit in
-
let ref = Inline.Link.reference lb in
-
match ref with
-
| `Inline (ld, _) ->
-
let dest = Link_definition.dest ld in
-
(match dest with
-
| Some (url, _) when is_bushel_slug url ->
-
(match slugs with
-
| Some s -> Hashtbl.replace s url ()
-
| _ -> ());
-
Some (url, Inline.Link.text lb |> text_of_inline)
-
| Some (url, _) when is_tag_slug url ->
-
(* Return the tag URL unchanged - will be handled by renderer *)
-
Some (url, Inline.Link.text lb |> text_of_inline)
-
| Some (url, _) when is_contact_slug url ->
-
Some (url, Inline.Link.text lb |> text_of_inline)
-
| _ -> None)
-
| _ -> None
-
;;
-
-
let image_target_is_bushel lb =
-
let open Cmarkit in
-
let ref = Inline.Link.reference lb in
-
match ref with
-
| `Inline (ld, _) ->
-
let dest = Link_definition.dest ld in
-
(match dest with
-
| Some (url, _) when is_bushel_slug url ->
-
let alt = Link_definition.title ld in
-
let dir =
-
Inline.Link.text lb
-
|> Inline.to_plain_text ~break_on_soft:false
-
|> fun r -> String.concat "\n" (List.map (String.concat "") r)
-
in
-
Some (url, alt, dir)
-
| _ -> None)
-
| _ -> None
-
;;
-
-
let rewrite_bushel_link_reference entries slug title meta =
-
let open Cmarkit in
-
let s = strip_handle slug in
-
(* Check if it's a tag, contact, or entry *)
-
if is_tag_slug slug then
-
(* Tag link - keep the ## prefix in dest for renderer to detect *)
-
let txt = Inline.Text (title, meta) in
-
let ld = Link_definition.make ~dest:(slug, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta))
-
else if is_contact_slug slug then
-
(* Contact sidenote *)
-
match Contact.find_by_handle (Entry.contacts entries) s with
-
| Some c ->
-
let sidenote = Side_note (Contact_note (c, title)) in
-
Mapper.ret sidenote
-
| None ->
-
(* Contact not found, fallback to regular link *)
-
let txt = Inline.Text (title, meta) in
-
let ld = Link_definition.make ~dest:("", meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta))
-
else
-
(* Check entry type and generate appropriate sidenote *)
-
match Entry.lookup entries s with
-
| Some (`Paper p) ->
-
let sidenote = Side_note (Paper_note (p, title)) in
-
Mapper.ret sidenote
-
| Some (`Idea i) ->
-
let sidenote = Side_note (Idea_note (i, title)) in
-
Mapper.ret sidenote
-
| Some (`Note n) ->
-
let sidenote = Side_note (Note_note (n, title)) in
-
Mapper.ret sidenote
-
| Some (`Project p) ->
-
let sidenote = Side_note (Project_note (p, title)) in
-
Mapper.ret sidenote
-
| Some (`Video v) ->
-
let sidenote = Side_note (Video_note (v, title)) in
-
Mapper.ret sidenote
-
| None ->
-
(* Entry not found, use regular link *)
-
let dest = Entry.lookup_site_url entries s in
-
let txt = Inline.Text (title, meta) in
-
let ld = Link_definition.make ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta))
-
;;
-
-
let rewrite_bushel_image_reference entries url title dir meta =
-
let open Cmarkit in
-
let dest =
-
match Entry.lookup entries (strip_handle url) with
-
| Some ent -> Entry.site_url ent (* This is a video *)
-
| None -> Printf.sprintf "/images/%s" (strip_handle url)
-
in
-
let txt = Inline.Text (dir, meta) in
-
let ld = Link_definition.make ?title ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
let ent_il = Inline.Image (ld, meta) in
-
Mapper.ret ent_il
-
;;
-
-
type Cmarkit.Inline.t += Obsidian_link of string
-
-
let rewrite_label_reference_to_obsidian lb meta =
-
let open Cmarkit in
-
match Inline.Link.referenced_label lb with
-
| None -> Mapper.default
-
| Some l ->
-
let m = Label.meta l in
-
(match Meta.find authorlink m with
-
| Some () ->
-
let slug = Label.key l in
-
let target = Printf.sprintf "[[%s]]" slug in
-
let txt = Obsidian_link target in
-
Mapper.ret txt
-
| None ->
-
(match Meta.find sluglink m with
-
| None -> Mapper.default
-
| Some () ->
-
let slug = Label.key l in
-
if is_bushel_slug slug
-
then (
-
let target = Printf.sprintf "[[%s]]" (strip_handle slug) in
-
let txt = Obsidian_link target in
-
Mapper.ret txt)
-
else if is_tag_slug slug
-
then (
-
let target = Printf.sprintf "#%s" (strip_handle slug) in
-
let txt = Inline.Text (target, meta) in
-
Mapper.ret txt)
-
else Mapper.default))
-
;;
-
-
let make_bushel_link_only_mapper _defs entries =
-
let open Cmarkit in
-
fun _m ->
-
function
-
| Inline.Link (lb, meta) ->
-
(* Convert Bushel link references to regular links (not sidenotes) *)
-
(match link_target_is_bushel lb with
-
| Some (url, title) ->
-
let s = strip_handle url in
-
let dest = Entry.lookup_site_url entries s in
-
(* If title is itself a Bushel slug, use the entry title instead *)
-
let link_text =
-
if is_bushel_slug title then
-
match Entry.lookup entries (strip_handle title) with
-
| Some ent -> Entry.title ent
-
| None -> title
-
else title
-
in
-
let txt = Inline.Text (link_text, meta) in
-
let ld = Link_definition.make ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta))
-
| None ->
-
(match Inline.Link.referenced_label lb with
-
| Some l ->
-
let m = Label.meta l in
-
(* Check for authorlink (contact) first *)
-
(match Meta.find authorlink m with
-
| Some () ->
-
let slug = Label.key l in
-
let s = strip_handle slug in
-
(match Contact.find_by_handle (Entry.contacts entries) s with
-
| Some c ->
-
let name = Contact.name c in
-
(match Contact.best_url c with
-
| Some dest ->
-
let txt = Inline.Text (name, meta) in
-
let ld = Link_definition.make ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta))
-
| None ->
-
(* No URL for contact, just use name as text *)
-
let txt = Inline.Text (name, meta) in
-
Mapper.ret txt)
-
| None ->
-
(* Contact not found, use title as fallback text *)
-
let title = Inline.Link.text lb |> text_of_inline in
-
let txt = Inline.Text (title, meta) in
-
Mapper.ret txt)
-
| None ->
-
(* Check for sluglink *)
-
(match Meta.find sluglink m with
-
| Some () ->
-
let slug = Label.key l in
-
if is_bushel_slug slug || is_tag_slug slug || is_contact_slug slug
-
then (
-
let s = strip_handle slug in
-
let dest = Entry.lookup_site_url entries s in
-
let title = Inline.Link.text lb |> text_of_inline in
-
(* If link text is itself a Bushel slug, use the entry title instead *)
-
let link_text =
-
let trimmed = String.trim title in
-
if is_bushel_slug trimmed then
-
match Entry.lookup entries (strip_handle trimmed) with
-
| Some ent -> Entry.title ent
-
| None -> title
-
else title
-
in
-
let txt = Inline.Text (link_text, meta) in
-
let ld = Link_definition.make ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta)))
-
else Mapper.default
-
| None -> Mapper.default))
-
| None -> Mapper.default))
-
| _ -> Mapper.default
-
;;
-
-
let rewrite_footnote_reference ?footnote_map entries defs lb _meta =
-
let open Cmarkit in
-
match Inline.Link.referenced_label lb with
-
| None -> Mapper.default
-
| Some l ->
-
(match Inline.Link.reference_definition defs lb with
-
| Some (Block.Footnote.Def (fn, _)) ->
-
let label_key = Label.key l in
-
let slug, trigger_text =
-
match footnote_map with
-
| Some fm ->
-
(match Hashtbl.find_opt fm label_key with
-
| Some (slug, text) -> (slug, text)
-
| None ->
-
let num = Hashtbl.length fm + 1 in
-
let slug = Printf.sprintf "fn-%d" num in
-
let text = Printf.sprintf "[%d]" num in
-
Hashtbl.add fm label_key (slug, text);
-
(slug, text))
-
| None ->
-
(* No map provided, use label key as slug *)
-
let slug = Printf.sprintf "fn-%s" (String.sub label_key 1 (String.length label_key - 1)) in
-
let text = "[?]" in
-
(slug, text)
-
in
-
(* Process the block to convert Bushel link references to regular links (not sidenotes) *)
-
let block = Block.Footnote.block fn in
-
let link_mapper = Mapper.make ~inline:(make_bushel_link_only_mapper defs entries) () in
-
let processed_block =
-
match Mapper.map_block link_mapper block with
-
| Some b -> b
-
| None -> block
-
in
-
let sidenote = Side_note (Footnote_note (slug, processed_block, trigger_text)) in
-
Mapper.ret sidenote
-
| _ -> Mapper.default)
-
-
let rewrite_label_reference ?slugs entries lb meta =
-
let open Cmarkit in
-
match Inline.Link.referenced_label lb with
-
| None -> Mapper.default
-
| Some l ->
-
let m = Label.meta l in
-
(match Meta.find authorlink m with
-
| Some () ->
-
let slug = Label.key l in
-
(match Contact.find_by_handle (Entry.contacts entries) (strip_handle slug) with
-
| Some c ->
-
let trigger_text = Contact.name c in
-
let sidenote = Side_note (Contact_note (c, trigger_text)) in
-
Mapper.ret sidenote
-
| None ->
-
(* Contact not found, fallback to text *)
-
let txt = Inline.Text ("Unknown Person", meta) in
-
Mapper.ret txt)
-
| None ->
-
(match Meta.find sluglink m with
-
| None -> Mapper.default
-
| Some () ->
-
let slug = Label.key l in
-
if is_bushel_slug slug
-
then (
-
(match slugs with
-
| Some s -> Hashtbl.replace s slug ()
-
| _ -> ());
-
let s = strip_handle slug in
-
(* Check entry type and generate appropriate sidenote *)
-
match Entry.lookup entries s with
-
| Some (`Paper p) ->
-
let trigger_text = Entry.lookup_title entries s in
-
let sidenote = Side_note (Paper_note (p, trigger_text)) in
-
Mapper.ret sidenote
-
| Some (`Idea i) ->
-
let trigger_text = Entry.lookup_title entries s in
-
let sidenote = Side_note (Idea_note (i, trigger_text)) in
-
Mapper.ret sidenote
-
| Some (`Note n) ->
-
let trigger_text = Entry.lookup_title entries s in
-
let sidenote = Side_note (Note_note (n, trigger_text)) in
-
Mapper.ret sidenote
-
| Some (`Project p) ->
-
let trigger_text = Entry.lookup_title entries s in
-
let sidenote = Side_note (Project_note (p, trigger_text)) in
-
Mapper.ret sidenote
-
| Some (`Video v) ->
-
let trigger_text = Entry.lookup_title entries s in
-
let sidenote = Side_note (Video_note (v, trigger_text)) in
-
Mapper.ret sidenote
-
| None ->
-
(* Entry not found, use regular link *)
-
let target = Entry.lookup_title entries s in
-
let dest = Entry.lookup_site_url entries s in
-
let txt = Inline.Text (target, meta) in
-
let ld = Link_definition.make ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta)))
-
else if is_tag_slug slug
-
then (
-
let sh = strip_handle slug in
-
(* Use # as dest to prevent navigation, JavaScript will intercept *)
-
let target, dest = sh, "#" in
-
let txt = Inline.Text (target, meta) in
-
let ld = Link_definition.make ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
let ent_il = Inline.Link (ld, meta) in
-
Mapper.ret ent_il)
-
else Mapper.default))
-
;;
-
-
let bushel_inline_mapper_to_obsidian entries _m =
-
let open Cmarkit in
-
function
-
| Inline.Link (lb, meta) ->
-
(match link_target_is_bushel lb with
-
| None -> rewrite_label_reference_to_obsidian lb meta
-
| Some (url, title) -> rewrite_bushel_link_reference entries url title meta)
-
| Inline.Image (lb, meta) ->
-
(match image_target_is_bushel lb with
-
| None -> rewrite_label_reference_to_obsidian lb meta
-
| Some (url, alt, dir) -> rewrite_bushel_image_reference entries url alt dir meta)
-
| _ -> Mapper.default
-
;;
-
-
let make_bushel_inline_mapper ?slugs ?footnote_map defs entries =
-
let open Cmarkit in
-
fun _m ->
-
function
-
| Inline.Link (lb, meta) ->
-
(* First check if this is a footnote reference *)
-
(match Inline.Link.referenced_label lb with
-
| Some l when String.starts_with ~prefix:"^" (Label.key l) ->
-
(* This is a footnote reference *)
-
rewrite_footnote_reference ?footnote_map entries defs lb meta
-
| _ ->
-
(* Not a footnote, handle as bushel link *)
-
(match link_target_is_bushel ?slugs lb with
-
| None -> rewrite_label_reference ?slugs entries lb meta
-
| Some (url, title) -> rewrite_bushel_link_reference entries url title meta))
-
| Inline.Image (lb, meta) ->
-
(match image_target_is_bushel lb with
-
| None -> rewrite_label_reference entries lb meta
-
| Some (url, alt, dir) -> rewrite_bushel_image_reference entries url alt dir meta)
-
| _ -> Mapper.default
-
;;
-
-
let scan_for_slugs entries md =
-
let open Cmarkit in
-
let slugs = Hashtbl.create 7 in
-
let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in
-
let defs = Doc.defs doc in
-
let _ =
-
Mapper.map_doc (Mapper.make ~inline:(make_bushel_inline_mapper ~slugs defs entries) ()) doc
-
in
-
Hashtbl.fold (fun k () a -> k :: a) slugs []
-
;;
-
-
(** Validation mapper that collects broken references *)
-
let make_validation_mapper entries broken_slugs broken_contacts =
-
let open Cmarkit in
-
fun _m ->
-
function
-
| Inline.Link (lb, _meta) ->
-
(* Check inline bushel links *)
-
(match link_target_is_bushel lb with
-
| Some (url, _title) ->
-
let s = strip_handle url in
-
if is_contact_slug url then
-
(* Validate contact handle *)
-
(match Contact.find_by_handle (Entry.contacts entries) s with
-
| None -> Hashtbl.replace broken_contacts url ()
-
| Some _ -> ())
-
else if is_bushel_slug url then
-
(* Validate entry slug *)
-
(match Entry.lookup entries s with
-
| None -> Hashtbl.replace broken_slugs url ()
-
| Some _ -> ())
-
else ();
-
Mapper.default
-
| None ->
-
(* Check referenced label links *)
-
(match Inline.Link.referenced_label lb with
-
| Some l ->
-
let m = Label.meta l in
-
(* Check for contact reference *)
-
(match Meta.find authorlink m with
-
| Some () ->
-
let slug = Label.key l in
-
let handle = strip_handle slug in
-
(match Contact.find_by_handle (Entry.contacts entries) handle with
-
| None -> Hashtbl.replace broken_contacts slug ()
-
| Some _ -> ());
-
Mapper.default
-
| None ->
-
(* Check for entry slug reference *)
-
(match Meta.find sluglink m with
-
| None -> Mapper.default
-
| Some () ->
-
let slug = Label.key l in
-
if is_bushel_slug slug then (
-
let s = strip_handle slug in
-
match Entry.lookup entries s with
-
| None -> Hashtbl.replace broken_slugs slug ()
-
| Some _ -> ()
-
);
-
Mapper.default))
-
| None -> Mapper.default))
-
| _ -> Mapper.default
-
;;
-
-
(** Validate all bushel references in markdown and return broken ones *)
-
let validate_references entries md =
-
let open Cmarkit in
-
let broken_slugs = Hashtbl.create 7 in
-
let broken_contacts = Hashtbl.create 7 in
-
let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in
-
let mapper = Mapper.make ~inline:(make_validation_mapper entries broken_slugs broken_contacts) () in
-
let _ = Mapper.map_doc mapper doc in
-
let slugs = Hashtbl.fold (fun k () a -> k :: a) broken_slugs [] in
-
let contacts = Hashtbl.fold (fun k () a -> k :: a) broken_contacts [] in
-
(slugs, contacts)
-
;;
-
-
(** Extract the first image URL from markdown text *)
-
let extract_first_image md =
-
let open Cmarkit in
-
(* Don't use bushel link resolver to avoid circular dependency with Entry *)
-
let doc = Doc.of_string md in
-
let found_image = ref None in
-
-
let find_image_in_inline _mapper = function
-
| Inline.Image (img, _) ->
-
(match Inline.Link.reference img with
-
| `Inline (ld, _) ->
-
(match Link_definition.dest ld with
-
| Some (url, _) when !found_image = None ->
-
found_image := Some url;
-
Mapper.default
-
| _ -> Mapper.default)
-
| _ -> Mapper.default)
-
| _ -> Mapper.default
-
in
-
-
let mapper = Mapper.make ~inline:find_image_in_inline () in
-
let _ = Mapper.map_doc mapper doc in
-
!found_image
-
;;
-
-
(** Convert markdown text to plain text, resolving bushel links to just their text *)
-
let markdown_to_plaintext _entries text =
-
let open Cmarkit in
-
(* Parse markdown with bushel link resolver *)
-
let doc = Doc.of_string ~resolver:with_bushel_links text in
-
-
(* Convert document blocks to plain text *)
-
let rec block_to_text = function
-
| Block.Blank_line _ -> ""
-
| Block.Thematic_break _ -> "\n---\n"
-
| Block.Paragraph (p, _) ->
-
let inline = Block.Paragraph.inline p in
-
Inline.to_plain_text ~break_on_soft:false inline
-
|> List.map (String.concat "") |> String.concat "\n"
-
| Block.Heading (h, _) ->
-
let inline = Block.Heading.inline h in
-
Inline.to_plain_text ~break_on_soft:false inline
-
|> List.map (String.concat "") |> String.concat "\n"
-
| Block.Block_quote (bq, _) ->
-
let blocks = Block.Block_quote.block bq in
-
block_to_text blocks
-
| Block.List (l, _) ->
-
let items = Block.List'.items l in
-
List.map (fun (item, _) ->
-
let blocks = Block.List_item.block item in
-
block_to_text blocks
-
) items |> String.concat "\n"
-
| Block.Code_block (cb, _) ->
-
let code = Block.Code_block.code cb in
-
String.concat "\n" (List.map Block_line.to_string code)
-
| Block.Html_block _ -> "" (* Skip HTML blocks for search *)
-
| Block.Link_reference_definition _ -> ""
-
| Block.Ext_footnote_definition _ -> ""
-
| Block.Blocks (blocks, _) ->
-
List.map block_to_text blocks |> String.concat "\n"
-
| _ -> ""
-
in
-
let blocks = Doc.block doc in
-
block_to_text blocks
-
;;
-
-
(** Extract all links from markdown text, including from images *)
-
let extract_all_links text =
-
let open Cmarkit in
-
let doc = Doc.of_string ~resolver:with_bushel_links text in
-
let links = ref [] in
-
-
let find_links_in_inline _mapper = function
-
| Inline.Link (lb, _) | Inline.Image (lb, _) ->
-
(* Check for inline link/image destination *)
-
(match Inline.Link.reference lb with
-
| `Inline (ld, _) ->
-
(match Link_definition.dest ld with
-
| Some (url, _) ->
-
links := url :: !links;
-
Mapper.default
-
| None -> Mapper.default)
-
| `Ref _ ->
-
(* For reference-style links/images, check if it has a referenced label *)
-
(match Inline.Link.referenced_label lb with
-
| Some l ->
-
let key = Label.key l in
-
(* Check if it's a bushel-style link *)
-
if String.length key > 0 && (key.[0] = ':' || key.[0] = '@' ||
-
(String.length key > 1 && key.[0] = '#' && key.[1] = '#')) then
-
links := key :: !links;
-
Mapper.default
-
| None -> Mapper.default))
-
| _ -> Mapper.default
-
in
-
-
let mapper = Mapper.make ~inline:find_links_in_inline () in
-
let _ = Mapper.map_doc mapper doc in
-
-
(* Deduplicate *)
-
let module StringSet = Set.Make(String) in
-
StringSet.elements (StringSet.of_list !links)
-
;;
-
-
(* Reference source type for CiTO annotations *)
-
type reference_source =
-
| Paper (* CitesAsSourceDocument *)
-
| Note (* CitesAsRelated *)
-
| External (* Cites *)
-
-
(* Extract references (papers/notes with DOIs) from a note *)
-
let note_references entries default_author note =
-
let refs = ref [] in
-
-
(* Helper to format author name: extract last name from full name *)
-
let format_author_last name =
-
let parts = String.split_on_char ' ' name in
-
List.nth parts (List.length parts - 1)
-
in
-
-
(* Helper to format a citation *)
-
let format_citation ~authors ~year ~title ~publisher =
-
let author_str = match authors with
-
| [] -> ""
-
| [author] -> format_author_last author ^ " "
-
| author :: _ -> (format_author_last author) ^ " et al "
-
in
-
let pub_str = match publisher with
-
| None | Some "" -> ""
-
| Some p -> p ^ ". "
-
in
-
Printf.sprintf "%s(%d). %s. %s" author_str year title pub_str
-
in
-
-
(* Check slug_ent if it exists *)
-
(match Note.slug_ent note with
-
| Some slug ->
-
(match Entry.lookup entries slug with
-
| Some (`Paper p) ->
-
(match Paper.doi p with
-
| Some doi ->
-
let authors = Paper.authors p in
-
let year = Paper.year p in
-
let title = Paper.title p in
-
let publisher = Some (Paper.publisher p) in
-
let citation = format_citation ~authors ~year ~title ~publisher in
-
refs := (doi, citation, Paper) :: !refs
-
| None -> ())
-
| Some (`Note n) ->
-
(match Note.doi n with
-
| Some doi ->
-
let authors = match Note.author n with
-
| Some a -> [a]
-
| None -> [Contact.name default_author]
-
in
-
let (year, _, _) = Note.date n in
-
let title = Note.title n in
-
let publisher = None in
-
let citation = format_citation ~authors ~year ~title ~publisher in
-
refs := (doi, citation, Note) :: !refs
-
| None -> ())
-
| _ -> ())
-
| None -> ());
-
-
(* Scan body for bushel references *)
-
let slugs = scan_for_slugs entries (Note.body note) in
-
List.iter (fun slug ->
-
(* Strip leading : or @ from slug before lookup *)
-
let normalized_slug = strip_handle slug in
-
match Entry.lookup entries normalized_slug with
-
| Some (`Paper p) ->
-
(match Paper.doi p with
-
| Some doi ->
-
let authors = Paper.authors p in
-
let year = Paper.year p in
-
let title = Paper.title p in
-
let publisher = Some (Paper.publisher p) in
-
let citation = format_citation ~authors ~year ~title ~publisher in
-
(* Check if doi already exists in refs *)
-
if not (List.exists (fun (d, _, _) -> d = doi) !refs) then
-
refs := (doi, citation, Paper) :: !refs
-
| None -> ())
-
| Some (`Note n) ->
-
(match Note.doi n with
-
| Some doi ->
-
let authors = match Note.author n with
-
| Some a -> [a]
-
| None -> [Contact.name default_author]
-
in
-
let (year, _, _) = Note.date n in
-
let title = Note.title n in
-
let publisher = None in
-
let citation = format_citation ~authors ~year ~title ~publisher in
-
(* Check if doi already exists in refs *)
-
if not (List.exists (fun (d, _, _) -> d = doi) !refs) then
-
refs := (doi, citation, Note) :: !refs
-
| None -> ())
-
| _ -> ()
-
) slugs;
-
-
(* Scan body for external DOI URLs and resolve from cache *)
-
let body = Note.body note in
-
let doi_url_pattern = Re.Perl.compile_pat "https?://(?:dx\\.)?doi\\.org/([^)\\s\"'>]+)" in
-
let matches = Re.all doi_url_pattern body in
-
let doi_entries = Entry.doi_entries entries in
-
List.iter (fun group ->
-
try
-
let encoded_doi = Re.Group.get group 1 in
-
(* URL decode the DOI *)
-
let doi = Uri.pct_decode encoded_doi in
-
(* Check if doi already exists in refs *)
-
if not (List.exists (fun (d, _, _) -> d = doi) !refs) then
-
(* Look up in DOI cache *)
-
match Doi_entry.find_by_doi doi_entries doi with
-
| Some doi_entry when doi_entry.status = Resolved ->
-
let citation = format_citation
-
~authors:doi_entry.authors
-
~year:doi_entry.year
-
~title:doi_entry.title
-
~publisher:(Some doi_entry.publisher)
-
in
-
refs := (doi, citation, External) :: !refs
-
| _ ->
-
(* Not found in cache, add minimal citation with just the DOI *)
-
refs := (doi, doi, External) :: !refs
-
with _ -> ()
-
) matches;
-
-
(* Scan body for publisher URLs (Elsevier, Nature, ACM, Sage, UPenn, Springer, Taylor & Francis) and resolve from cache *)
-
let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in
-
let publisher_matches = Re.all publisher_pattern body in
-
List.iter (fun group ->
-
try
-
let url = Re.Group.get group 0 in
-
(* Look up in DOI cache by source URL *)
-
match Doi_entry.find_by_url doi_entries url with
-
| Some doi_entry when doi_entry.status = Resolved ->
-
let doi = doi_entry.doi in
-
(* Check if this DOI already exists in refs *)
-
if not (List.exists (fun (d, _, _) -> d = doi) !refs) then
-
let citation = format_citation
-
~authors:doi_entry.authors
-
~year:doi_entry.year
-
~title:doi_entry.title
-
~publisher:(Some doi_entry.publisher)
-
in
-
refs := (doi, citation, External) :: !refs
-
| _ ->
-
(* Not found in cache, skip it *)
-
()
-
with _ -> ()
-
) publisher_matches;
-
-
List.rev !refs
-
;;
-
-73
stack/bushel/lib/md.mli
···
-
val make_bushel_inline_mapper
-
: ?slugs:(string, unit) Hashtbl.t
-
-> ?footnote_map:(string, string * string) Hashtbl.t
-
-> Cmarkit.Label.defs
-
-> Entry.t
-
-> 'a
-
-> Cmarkit.Inline.t
-
-> Cmarkit.Inline.t Cmarkit.Mapper.result
-
-
val make_bushel_link_only_mapper
-
: Cmarkit.Label.defs
-
-> Entry.t
-
-> 'a
-
-> Cmarkit.Inline.t
-
-> Cmarkit.Inline.t Cmarkit.Mapper.result
-
-
type Cmarkit.Inline.t += Obsidian_link of string
-
-
type sidenote_data =
-
| Contact_note of Contact.t * string
-
| Paper_note of Paper.t * string
-
| Idea_note of Idea.t * string
-
| Note_note of Note.t * string
-
| Project_note of Project.t * string
-
| Video_note of Video.t * string
-
| Footnote_note of string * Cmarkit.Block.t * string
-
-
type Cmarkit.Inline.t += Side_note of sidenote_data
-
-
val bushel_inline_mapper_to_obsidian
-
: Entry.t
-
-> 'a
-
-> Cmarkit.Inline.t
-
-> Cmarkit.Inline.t Cmarkit.Mapper.result
-
-
val with_bushel_links
-
: [< `Def of Cmarkit.Label.t option * Cmarkit.Label.t
-
| `Ref of 'a * Cmarkit.Label.t * Cmarkit.Label.t option
-
]
-
-> Cmarkit.Label.t option
-
-
val scan_for_slugs : Entry.t -> string -> string list
-
-
(** Validate all bushel references in markdown and return broken ones.
-
Returns (broken_slugs, broken_contacts) where each list contains
-
the full reference string (e.g., ":missing-slug", "@unknown-handle") *)
-
val validate_references : Entry.t -> string -> string list * string list
-
-
(** Extract the first image URL from markdown text *)
-
val extract_first_image : string -> string option
-
-
(** Convert markdown text to plain text, resolving bushel links to just their text *)
-
val markdown_to_plaintext : 'a -> string -> string
-
-
val is_bushel_slug : string -> bool
-
val is_tag_slug : string -> bool
-
val is_type_filter_slug : string -> bool
-
val is_contact_slug : string -> bool
-
val strip_handle : string -> string
-
-
(** Extract all links from markdown text, including from images (internal and external) *)
-
val extract_all_links : string -> string list
-
-
(** Type indicating the source of a reference for CiTO annotation *)
-
type reference_source =
-
| Paper (** CitesAsSourceDocument *)
-
| Note (** CitesAsRelated *)
-
| External (** Cites *)
-
-
(** Extract references (papers/notes with DOIs) from a note.
-
Returns a list of (DOI, citation_string, reference_source) tuples.
-
Citation format: "Last, First (Year). Title. Publisher. https://doi.org/the/doi" *)
-
val note_references : Entry.t -> Contact.t -> Note.t -> (string * string * reference_source) list
-230
stack/bushel/lib/note.ml
···
-
type t =
-
{ title : string
-
; date : Ptime.date
-
; slug : string
-
; body : string
-
; tags : string list
-
; draft : bool
-
; updated : Ptime.date option
-
; sidebar : string option
-
; index_page : bool
-
; perma : bool (* Permanent article that will receive a DOI *)
-
; doi : string option (* DOI identifier for permanent articles *)
-
; synopsis: string option
-
; titleimage: string option
-
; via : (string * string) option
-
; slug_ent : string option (* Optional reference to another entry *)
-
; source : string option (* Optional source for news-style notes *)
-
; url : string option (* Optional external URL for news-style notes *)
-
; author : string option (* Optional author for news-style notes *)
-
; category : string option (* Optional category for news-style notes *)
-
}
-
-
type ts = t list
-
-
let link { body; via; slug; _ } =
-
match body, via with
-
| "", Some (l, u) -> `Ext (l, u)
-
| "", None -> failwith (slug ^ ": note external without via, via-url")
-
| _, _ -> `Local slug
-
;;
-
-
let origdate { date; _ } = Option.get @@ Ptime.of_date date
-
-
let date { date; updated; _ } =
-
match updated with
-
| None -> date
-
| Some v -> v
-
;;
-
-
let datetime v = Option.get @@ Ptime.of_date @@ date v
-
let compare a b = Ptime.compare (datetime b) (datetime a)
-
let slug { slug; _ } = slug
-
let body { body; _ } = body
-
let title { title; _ } = title
-
let tags { tags; _ } = tags
-
let sidebar { sidebar; _ } = sidebar
-
let synopsis { synopsis; _ } = synopsis
-
let draft { draft; _ } = draft
-
let perma { perma; _ } = perma
-
let doi { doi; _ } = doi
-
let titleimage { titleimage; _ } = titleimage
-
let slug_ent { slug_ent; _ } = slug_ent
-
let source { source; _ } = source
-
let url { url; _ } = url
-
let author { author; _ } = author
-
let category { category; _ } = category
-
let lookup slug notes = List.find (fun n -> n.slug = slug) notes
-
let read_file file = In_channel.(with_open_bin file input_all)
-
let words { body; _ } = Util.count_words body
-
-
-
let of_md fname =
-
(* TODO fix Jekyll_post to basename the fname all the time *)
-
match Jekyll_post.of_string ~fname:(Filename.basename fname) (read_file fname) with
-
| Error (`Msg m) -> failwith ("note_of_md: " ^ m)
-
| Ok jp ->
-
let fields = jp.Jekyll_post.fields in
-
let { Jekyll_post.title; date; slug; body; _ } = jp in
-
let date, _ = Ptime.to_date_time date in
-
let index_page =
-
match Jekyll_format.find "index_page" fields with
-
| Some (`Bool v) -> v
-
| _ -> false
-
in
-
let perma =
-
match Jekyll_format.find "perma" fields with
-
| Some (`Bool v) -> v
-
| _ -> false
-
in
-
let updated =
-
match Jekyll_format.find "updated" fields with
-
| Some (`String v) -> Some (Jekyll_format.parse_date_exn v |> Ptime.to_date)
-
| _ -> None
-
in
-
let draft =
-
match Jekyll_format.find "draft" fields with
-
| Some (`Bool v) -> v
-
| _ -> false
-
in
-
let titleimage =
-
match Jekyll_format.find "titleimage" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let synopsis =
-
match Jekyll_format.find "synopsis" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let sidebar =
-
try Some (read_file ("data/sidebar/" ^ Filename.basename fname)) with
-
| _ -> None
-
in
-
let tags =
-
match Jekyll_format.find "tags" fields with
-
| Some (`A l) ->
-
List.filter_map
-
(function
-
| `String s -> Some s
-
| _ -> None)
-
l
-
| _ -> []
-
in
-
let via =
-
match Jekyll_format.find "via" fields, Jekyll_format.find "via-url" fields with
-
| Some (`String a), Some (`String b) -> Some (a, b)
-
| None, Some (`String b) -> Some ("", b)
-
| _ -> None
-
in
-
let slug_ent =
-
match Jekyll_format.find "slug_ent" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let source =
-
match Jekyll_format.find "source" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let url =
-
match Jekyll_format.find "url" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let author =
-
match Jekyll_format.find "author" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let category =
-
match Jekyll_format.find "category" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let doi =
-
match Jekyll_format.find "doi" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
{ title; draft; date; slug; synopsis; titleimage; index_page; perma; doi; body; via; updated; tags; sidebar; slug_ent; source; url; author; category }
-
-
(* TODO:claude *)
-
let typesense_schema =
-
let open Ezjsonm in
-
dict [
-
("name", string "notes");
-
("fields", list (fun d -> dict d) [
-
[("name", string "id"); ("type", string "string")];
-
[("name", string "title"); ("type", string "string")];
-
[("name", string "content"); ("type", string "string")];
-
[("name", string "date"); ("type", string "string")];
-
[("name", string "date_timestamp"); ("type", string "int64")];
-
[("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
-
[("name", string "body"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "draft"); ("type", string "bool")];
-
[("name", string "synopsis"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "thumbnail_url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "type"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "status"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "related_papers"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "related_projects"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "related_contacts"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "attachments"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "source"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "author"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "category"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "slug_ent"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "words"); ("type", string "int32"); ("optional", bool true)];
-
]);
-
("default_sorting_field", string "date_timestamp");
-
]
-
-
(** TODO:claude Pretty-print a note with ANSI formatting *)
-
let pp ppf n =
-
let open Fmt in
-
pf ppf "@[<v>";
-
pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Note";
-
pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug n);
-
pf ppf "%a: %a@," (styled `Bold string) "Title" string (title n);
-
let (year, month, day) = date n in
-
pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Date" year month day;
-
(match n.updated with
-
| Some (y, m, d) -> pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Updated" y m d
-
| None -> ());
-
pf ppf "%a: %b@," (styled `Bold string) "Draft" (draft n);
-
pf ppf "%a: %b@," (styled `Bold string) "Index Page" n.index_page;
-
pf ppf "%a: %b@," (styled `Bold string) "Perma" (perma n);
-
(match doi n with
-
| Some d -> pf ppf "%a: %a@," (styled `Bold string) "DOI" string d
-
| None -> ());
-
(match synopsis n with
-
| Some syn -> pf ppf "%a: %a@," (styled `Bold string) "Synopsis" string syn
-
| None -> ());
-
(match titleimage n with
-
| Some img -> pf ppf "%a: %a@," (styled `Bold string) "Title Image" string img
-
| None -> ());
-
(match n.via with
-
| Some (label, url) ->
-
if label <> "" then
-
pf ppf "%a: %a (%a)@," (styled `Bold string) "Via" string label string url
-
else
-
pf ppf "%a: %a@," (styled `Bold string) "Via" string url
-
| None -> ());
-
let t = tags n in
-
if t <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
-
(match sidebar n with
-
| Some sb ->
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Sidebar";
-
pf ppf "%a@," string sb
-
| None -> ());
-
let bd = body n in
-
if bd <> "" then begin
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Body";
-
pf ppf "%a@," string bd;
-
end;
-
pf ppf "@]"
-49
stack/bushel/lib/note.mli
···
-
type t =
-
{ title : string
-
; date : Ptime.date
-
; slug : string
-
; body : string
-
; tags : string list
-
; draft : bool
-
; updated : Ptime.date option
-
; sidebar : string option
-
; index_page : bool
-
; perma : bool
-
; doi : string option
-
; synopsis: string option
-
; titleimage: string option
-
; via : (string * string) option
-
; slug_ent : string option
-
; source : string option
-
; url : string option
-
; author : string option
-
; category : string option
-
}
-
-
type ts = t list
-
-
val link : t -> [> `Ext of string * string | `Local of string ]
-
val origdate : t -> Ptime.t
-
val date : t -> Ptime.date
-
val datetime : t -> Ptime.t
-
val compare : t -> t -> int
-
val slug : t -> string
-
val body : t -> string
-
val title : t -> string
-
val draft : t -> bool
-
val perma : t -> bool
-
val doi : t -> string option
-
val synopsis : t -> string option
-
val titleimage : t -> string option
-
val slug_ent : t -> string option
-
val source : t -> string option
-
val url : t -> string option
-
val author : t -> string option
-
val category : t -> string option
-
val tags : t -> string list
-
val sidebar : t -> string option
-
val lookup : string -> t list -> t
-
val words : t -> int
-
val of_md : string -> t
-
val typesense_schema : Ezjsonm.value
-
val pp : Format.formatter -> t -> unit
-373
stack/bushel/lib/paper.ml
···
-
module J = Ezjsonm
-
-
type paper = Ezjsonm.value
-
-
type t =
-
{ slug : string
-
; ver : string
-
; paper : paper
-
; abstract : string
-
; latest : bool
-
}
-
-
type ts = t list
-
-
let key y k = J.find y [ k ]
-
-
let slugs ts =
-
List.fold_left (fun acc t -> if List.mem t.slug acc then acc else t.slug :: acc) [] ts
-
;;
-
-
let slug { slug; _ } = slug
-
let title { paper; _ } : string = key paper "title" |> J.get_string
-
let authors { paper; _ } : string list = key paper "author" |> J.get_list J.get_string
-
-
let project_slugs { paper; _ } : string list =
-
try key paper "projects" |> J.get_list J.get_string with
-
| _ -> []
-
;;
-
-
let slides { paper; _ } : string list =
-
try key paper "slides" |> J.get_list J.get_string with
-
| _ -> []
-
;;
-
-
let bibtype { paper; _ } : string = key paper "bibtype" |> J.get_string
-
-
let journal { paper; _ } =
-
try key paper "journal" |> J.get_string with
-
| Not_found ->
-
failwith
-
(Printf.sprintf "no journal found for %s\n%!" (Ezjsonm.value_to_string paper))
-
;;
-
-
(** TODO:claude Helper to extract raw JSON *)
-
let raw_json { paper; _ } = paper
-
-
let doi { paper; _ } =
-
try Some (key paper "doi" |> J.get_string) with
-
| _ -> None
-
;;
-
-
let volume { paper; _ } =
-
try Some (key paper "volume" |> J.get_string) with
-
| _ -> None
-
;;
-
-
let video { paper; _ } =
-
try Some (key paper "video" |> J.get_string) with
-
| _ -> None
-
;;
-
-
let issue { paper; _ } =
-
try Some (key paper "number" |> J.get_string) with
-
| _ -> None
-
;;
-
-
let url { paper; _ } =
-
try Some (key paper "url" |> J.get_string) with
-
| _ -> None
-
;;
-
-
let pages { paper; _ } = try key paper "pages" |> J.get_string with _ -> ""
-
let abstract { abstract; _ } = abstract
-
-
let institution { paper; _ } =
-
try key paper "institution" |> J.get_string with
-
| Not_found ->
-
failwith
-
(Printf.sprintf "no institution found for %s\n%!" (Ezjsonm.value_to_string paper))
-
;;
-
-
let number { paper; _ } =
-
try Some (key paper "number" |> J.get_string) with
-
| Not_found -> None
-
;;
-
-
let editor { paper; _ } = key paper "editor" |> J.get_string
-
let isbn { paper; _ } = key paper "isbn" |> J.get_string
-
let bib { paper; _ } = key paper "bib" |> J.get_string
-
let year { paper; _ } = key paper "year" |> J.get_string |> int_of_string
-
-
let publisher { paper; _ } =
-
try key paper "publisher" |> J.get_string with
-
| Not_found -> ""
-
;;
-
-
let booktitle { paper; _ } =
-
let r = key paper "booktitle" |> J.get_string |> Bytes.of_string in
-
Bytes.set r 0 (Char.lowercase_ascii (Bytes.get r 0));
-
String.of_bytes r
-
;;
-
-
let date { paper; _ } =
-
let m =
-
try
-
match String.lowercase_ascii (key paper "month" |> J.get_string) with
-
| "jan" -> 1
-
| "feb" -> 2
-
| "mar" -> 3
-
| "apr" -> 4
-
| "may" -> 5
-
| "jun" -> 6
-
| "jul" -> 7
-
| "aug" -> 8
-
| "sep" -> 9
-
| "oct" -> 10
-
| "nov" -> 11
-
| "dec" -> 12
-
| _ -> 1
-
with
-
| Not_found -> 1
-
in
-
let y =
-
try key paper "year" |> J.get_string |> int_of_string with
-
| Not_found ->
-
failwith (Printf.sprintf "no year found for %s" (Ezjsonm.value_to_string paper))
-
in
-
y, m, 1
-
;;
-
-
let datetime p = Option.get @@ Ptime.of_date @@ date p
-
-
let compare p2 p1 =
-
let d1 =
-
Ptime.of_date
-
(try date p1 with
-
| _ -> 1977, 1, 1)
-
|> Option.get
-
in
-
let d2 =
-
Ptime.of_date
-
(try date p2 with
-
| _ -> 1977, 1, 1)
-
|> Option.get
-
in
-
Ptime.compare d1 d2
-
;;
-
-
let get_papers ~slug ts =
-
List.filter (fun p -> p.slug = slug && p.latest <> true) ts |> List.sort compare
-
;;
-
-
let read_file file = In_channel.(with_open_bin file input_all)
-
-
let of_md ~slug ~ver fname =
-
(* TODO fix Jekyll_post to not error on no date *)
-
let fname' = "2000-01-01-" ^ Filename.basename fname in
-
match Jekyll_post.of_string ~fname:fname' (read_file fname) with
-
| Error (`Msg m) -> failwith ("paper_of_md: " ^ m)
-
| Ok jp ->
-
let fields = jp.Jekyll_post.fields |> Jekyll_format.fields_to_yaml in
-
let { Jekyll_post.body; _ } = jp in
-
{ slug; ver; abstract = body; paper = fields; latest = false }
-
;;
-
-
let tv (l : t list) =
-
let h = Hashtbl.create 7 in
-
List.iter
-
(fun { slug; ver; _ } ->
-
match Hashtbl.find_opt h slug with
-
| None -> Hashtbl.add h slug [ ver ]
-
| Some l ->
-
let l = ver :: l in
-
let l = List.sort Stdlib.compare l in
-
Hashtbl.replace h slug l)
-
l;
-
List.map
-
(fun p ->
-
let latest = Hashtbl.find h p.slug |> List.rev |> List.hd in
-
let latest = p.ver = latest in
-
{ p with latest })
-
l
-
;;
-
-
let lookup ts slug = List.find_opt (fun t -> t.slug = slug && t.latest) ts
-
-
let tag_of_bibtype bt =
-
match String.lowercase_ascii bt with
-
| "article" -> "journal"
-
| "inproceedings" -> "conference"
-
| "techreport" -> "report"
-
| "misc" -> "preprint"
-
| "book" -> "book"
-
| x -> x
-
;;
-
-
let tags { paper; _ } =
-
let tags f =
-
try key paper f |> J.get_list J.get_string with
-
| _ -> []
-
in
-
let core = tags "tags" in
-
let extra = tags "keywords" in
-
let projects = tags "projects" in
-
let ty = [ key paper "bibtype" |> J.get_string |> tag_of_bibtype ] in
-
List.flatten [ core; extra; ty; projects ]
-
;;
-
-
let best_url p =
-
if Sys.file_exists (Printf.sprintf "static/papers/%s.pdf" (slug p))
-
then Some (Printf.sprintf "/papers/%s.pdf" (slug p))
-
else url p
-
;;
-
-
(** TODO:claude Classification types for papers *)
-
type classification = Full | Short | Preprint
-
-
let string_of_classification = function
-
| Full -> "full"
-
| Short -> "short"
-
| Preprint -> "preprint"
-
-
let classification_of_string = function
-
| "full" -> Full
-
| "short" -> Short
-
| "preprint" -> Preprint
-
| _ -> Full (* default to full if unknown *)
-
-
(** TODO:claude Get classification from paper metadata, with fallback to heuristic *)
-
let classification { paper; _ } =
-
try
-
key paper "classification" |> J.get_string |> classification_of_string
-
with _ ->
-
(* Fallback to heuristic classification based on venue/bibtype/title *)
-
let bibtype = try key paper "bibtype" |> J.get_string with _ -> "" in
-
let journal = try key paper "journal" |> J.get_string |> String.lowercase_ascii with _ -> "" in
-
let booktitle = try key paper "booktitle" |> J.get_string |> String.lowercase_ascii with _ -> "" in
-
let title_str = try key paper "title" |> J.get_string |> String.lowercase_ascii with _ -> "" in
-
-
(* Helper function to check if text contains any of the patterns *)
-
let contains_any text patterns =
-
List.exists (fun pattern ->
-
let regex = Re.Perl.compile_pat ~opts:[`Caseless] pattern in
-
Re.execp regex text
-
) patterns
-
in
-
-
(* Check for preprint indicators *)
-
let bibtype_lower = String.lowercase_ascii bibtype in
-
if contains_any journal ["arxiv"] || contains_any booktitle ["arxiv"] || bibtype_lower = "misc" || bibtype_lower = "techreport"
-
then Preprint
-
(* Check for workshop/short paper indicators including in title *)
-
else if contains_any journal ["workshop"; "wip"; "poster"; "demo"; "hotdep"; "short"] ||
-
contains_any booktitle ["workshop"; "wip"; "poster"; "demo"; "hotdep"; "short"] ||
-
contains_any title_str ["poster"]
-
then Short
-
(* Default to full paper (journal or conference) *)
-
else Full
-
-
(** TODO:claude Check if paper is marked as selected *)
-
let selected { paper; _ } =
-
try
-
let keys = J.get_dict paper in
-
match List.assoc_opt "selected" keys with
-
| Some (`Bool true) -> true
-
| Some (`String "true") -> true
-
| _ -> false
-
with _ -> false
-
-
(** TODO:claude Get note field from paper metadata *)
-
let note { paper; _ } =
-
try
-
let keys = J.get_dict paper in
-
match List.assoc_opt "note" keys with
-
| Some note_json -> Some (J.get_string note_json)
-
| None -> None
-
with _ -> None
-
-
(* TODO:claude *)
-
let to_yaml ?abstract ~ver:_ json_data =
-
(* Don't add version - it's inferred from filename *)
-
let frontmatter = Yaml.to_string_exn json_data in
-
match abstract with
-
| Some abs ->
-
(* Trim leading/trailing whitespace and normalize blank lines *)
-
let trimmed_abs = String.trim abs in
-
let normalized_abs =
-
(* Replace 3+ consecutive newlines with exactly 2 newlines *)
-
Re.replace_string (Re.compile (Re.seq [Re.char '\n'; Re.char '\n'; Re.rep1 (Re.char '\n')])) ~by:"\n\n" trimmed_abs
-
in
-
if normalized_abs = "" then
-
Printf.sprintf "---\n%s---\n" frontmatter
-
else
-
Printf.sprintf "---\n%s---\n\n%s\n" frontmatter normalized_abs
-
| None -> Printf.sprintf "---\n%s---\n" frontmatter
-
-
(* TODO:claude *)
-
let typesense_schema =
-
let open Ezjsonm in
-
dict [
-
("name", string "papers");
-
("fields", list (fun d -> dict d) [
-
[("name", string "id"); ("type", string "string")];
-
[("name", string "title"); ("type", string "string")];
-
[("name", string "authors"); ("type", string "string[]")];
-
[("name", string "abstract"); ("type", string "string")];
-
[("name", string "date"); ("type", string "string")];
-
[("name", string "date_timestamp"); ("type", string "int64")];
-
[("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
-
[("name", string "doi"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "arxiv_id"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "pdf_url"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "thumbnail_url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "journal"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "related_projects"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "related_talks"); ("type", string "string[]"); ("optional", bool true)];
-
]);
-
("default_sorting_field", string "date_timestamp");
-
]
-
-
(** TODO:claude Pretty-print a paper with ANSI formatting *)
-
let pp ppf p =
-
let open Fmt in
-
pf ppf "@[<v>";
-
pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Paper";
-
pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug p);
-
pf ppf "%a: %a@," (styled `Bold string) "Version" string p.ver;
-
pf ppf "%a: %a@," (styled `Bold string) "Title" string (title p);
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Authors" (list ~sep:comma string) (authors p);
-
pf ppf "%a: %a@," (styled `Bold string) "Year" int (year p);
-
pf ppf "%a: %a@," (styled `Bold string) "Bibtype" string (bibtype p);
-
(match doi p with
-
| Some d -> pf ppf "%a: %a@," (styled `Bold string) "DOI" string d
-
| None -> ());
-
(match url p with
-
| Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u
-
| None -> ());
-
(match video p with
-
| Some v -> pf ppf "%a: %a@," (styled `Bold string) "Video" string v
-
| None -> ());
-
let projs = project_slugs p in
-
if projs <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Projects" (list ~sep:comma string) projs;
-
let sl = slides p in
-
if sl <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Slides" (list ~sep:comma string) sl;
-
(match bibtype p with
-
| "article" ->
-
pf ppf "%a: %a@," (styled `Bold string) "Journal" string (journal p);
-
(match volume p with
-
| Some vol -> pf ppf "%a: %a@," (styled `Bold string) "Volume" string vol
-
| None -> ());
-
(match issue p with
-
| Some iss -> pf ppf "%a: %a@," (styled `Bold string) "Issue" string iss
-
| None -> ());
-
let pgs = pages p in
-
if pgs <> "" then
-
pf ppf "%a: %a@," (styled `Bold string) "Pages" string pgs;
-
| "inproceedings" ->
-
pf ppf "%a: %a@," (styled `Bold string) "Booktitle" string (booktitle p);
-
let pgs = pages p in
-
if pgs <> "" then
-
pf ppf "%a: %a@," (styled `Bold string) "Pages" string pgs;
-
| "techreport" ->
-
pf ppf "%a: %a@," (styled `Bold string) "Institution" string (institution p);
-
(match number p with
-
| Some num -> pf ppf "%a: %a@," (styled `Bold string) "Number" string num
-
| None -> ());
-
| _ -> ());
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Abstract";
-
pf ppf "%a@," (styled `Faint string) (abstract p);
-
pf ppf "@]"
-55
stack/bushel/lib/paper.mli
···
-
type paper
-
-
type t =
-
{ slug : string
-
; ver : string
-
; paper : paper
-
; abstract : string
-
; latest : bool
-
}
-
-
type ts = t list
-
-
val tv : t list -> ts
-
val slug : t -> string
-
val title : t -> string
-
val authors : t -> string list
-
val project_slugs : t -> string list
-
val slides : t -> string list
-
val bibtype : t -> string
-
val journal : t -> string
-
val raw_json : t -> Ezjsonm.value
-
val doi : t -> string option
-
val volume : t -> string option
-
val video : t -> string option
-
val issue : t -> string option
-
val url : t -> string option
-
val best_url : t -> string option
-
val pages : t -> string
-
val abstract : t -> string
-
val institution : t -> string
-
val number : t -> string option
-
val editor : t -> string
-
val isbn : t -> string
-
val bib : t -> string
-
val year : t -> int
-
val publisher : t -> string
-
val booktitle : t -> string
-
val tags : t -> string list
-
val date : t -> int * int * int
-
val datetime : t -> Ptime.t
-
val compare : t -> t -> int
-
val get_papers : slug:string -> ts -> ts
-
val slugs : ts -> string list
-
val lookup : ts -> string -> t option
-
val of_md : slug:string -> ver:string -> string -> t
-
val to_yaml : ?abstract:string -> ver:string -> Ezjsonm.value -> string
-
val typesense_schema : Ezjsonm.value
-
-
type classification = Full | Short | Preprint
-
val string_of_classification : classification -> string
-
val classification_of_string : string -> classification
-
val classification : t -> classification
-
val selected : t -> bool
-
val note : t -> string option
-
val pp : Format.formatter -> t -> unit
-100
stack/bushel/lib/project.ml
···
-
type t =
-
{ slug : string
-
; title : string
-
; start : int (* year *)
-
; finish : int option
-
; tags : string list
-
; ideas : string
-
; body : string
-
}
-
-
type ts = t list
-
-
let tags p = p.tags
-
-
let compare a b =
-
match compare a.start b.start with
-
| 0 -> compare b.finish a.finish
-
| n -> n
-
;;
-
-
let title { title; _ } = title
-
let body { body; _ } = body
-
let ideas { ideas; _ } = ideas
-
-
let of_md fname =
-
match Jekyll_post.of_string ~fname (Util.read_file fname) with
-
| Error (`Msg m) -> failwith ("Project.of_file: " ^ m)
-
| Ok jp ->
-
let fields = jp.Jekyll_post.fields in
-
let { Jekyll_post.title; date; slug; body; _ } = jp in
-
let (start, _, _), _ = Ptime.to_date_time date in
-
let finish =
-
match Jekyll_format.find "finish" fields with
-
| Some (`String date) ->
-
let date = Jekyll_format.parse_date_exn date in
-
let (finish, _, _), _ = Ptime.to_date_time date in
-
Some finish
-
| _ -> None
-
in
-
let ideas =
-
match Jekyll_format.find "ideas" fields with
-
| Some (`String e) -> e
-
| _ -> failwith ("no ideas key in " ^ fname)
-
in
-
let tags =
-
match Jekyll_format.find "tags" fields with
-
| Some (`A tags) -> List.map Yaml.Util.to_string_exn tags
-
| _ -> []
-
in
-
{ slug; title; start; finish; ideas; tags; body }
-
;;
-
-
let lookup projects slug = List.find_opt (fun p -> p.slug = slug) projects
-
-
(* TODO:claude *)
-
let typesense_schema =
-
let open Ezjsonm in
-
dict [
-
("name", string "projects");
-
("fields", list (fun d -> dict d) [
-
[("name", string "id"); ("type", string "string")];
-
[("name", string "title"); ("type", string "string")];
-
[("name", string "description"); ("type", string "string")];
-
[("name", string "start_year"); ("type", string "int32")];
-
[("name", string "finish_year"); ("type", string "int32"); ("optional", bool true)];
-
[("name", string "date"); ("type", string "string")];
-
[("name", string "date_timestamp"); ("type", string "int64")];
-
[("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
-
[("name", string "repository_url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "homepage_url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "languages"); ("type", string "string[]"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "license"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "status"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "related_papers"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "related_talks"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "body"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "ideas"); ("type", string "string"); ("optional", bool true)];
-
]);
-
("default_sorting_field", string "date_timestamp");
-
]
-
-
(** TODO:claude Pretty-print a project with ANSI formatting *)
-
let pp ppf p =
-
let open Fmt in
-
pf ppf "@[<v>";
-
pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Project";
-
pf ppf "%a: %a@," (styled `Bold string) "Slug" string p.slug;
-
pf ppf "%a: %a@," (styled `Bold string) "Title" string (title p);
-
pf ppf "%a: %d@," (styled `Bold string) "Start" p.start;
-
(match p.finish with
-
| Some year -> pf ppf "%a: %d@," (styled `Bold string) "Finish" year
-
| None -> pf ppf "%a: ongoing@," (styled `Bold string) "Finish");
-
let t = tags p in
-
if t <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
-
pf ppf "%a: %a@," (styled `Bold string) "Ideas" string (ideas p);
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Body";
-
pf ppf "%a@," string (body p);
-
pf ppf "@]"
-21
stack/bushel/lib/project.mli
···
-
type t =
-
{ slug : string
-
; title : string
-
; start : int
-
; finish : int option
-
; tags : string list
-
; ideas : string
-
; body : string
-
}
-
-
type ts = t list
-
-
val title : t -> string
-
val body : t -> string
-
val ideas : t -> string
-
val lookup : t list -> string -> t option
-
val tags : t -> string list
-
val compare : t -> t -> int
-
val of_md : string -> t
-
val typesense_schema : Ezjsonm.value
-
val pp : Format.formatter -> t -> unit
-44
stack/bushel/lib/srcsetter.ml
···
-
module MS = Map.Make (String)
-
-
type t =
-
{ name : string
-
; slug : string
-
; origin : string
-
; dims : int * int
-
; variants : (int * int) MS.t
-
}
-
-
type ts = t list
-
-
let v name slug origin variants dims = { name; slug; origin; variants; dims }
-
let slug { slug; _ } = slug
-
let origin { origin; _ } = origin
-
let name { name; _ } = name
-
let dims { dims; _ } = dims
-
let variants { variants; _ } = variants
-
-
let dims_json_t =
-
let open Jsont in
-
let dec x y = x, y in
-
let enc (w, h) = function
-
| 0 -> w
-
| _ -> h
-
in
-
t2 ~dec ~enc uint16
-
;;
-
-
let json_t =
-
let open Jsont in
-
let open Jsont.Object in
-
map ~kind:"Entry" v
-
|> mem "name" string ~enc:name
-
|> mem "slug" string ~enc:slug
-
|> mem "origin" string ~enc:origin
-
|> mem "variants" (as_string_map dims_json_t) ~enc:variants
-
|> mem "dims" dims_json_t ~enc:dims
-
|> finish
-
;;
-
-
let list = Jsont.list json_t
-
let list_to_json es = Jsont_bytesrw.encode_string list ~format:Jsont.Indent es
-
let list_of_json = Jsont_bytesrw.decode_string list
-21
stack/bushel/lib/srcsetter.mli
···
-
module MS : Map.S with type key = string
-
-
type t =
-
{ name : string
-
; slug : string
-
; origin : string
-
; dims : int * int
-
; variants : (int * int) MS.t
-
}
-
-
type ts = t list
-
-
val origin : t -> string
-
val slug : t -> string
-
val name : t -> string
-
val dims : t -> int * int
-
val variants : t -> (int * int) MS.t
-
val list_to_json : t list -> (string, string) result
-
val list_of_json : string -> (t list, string) result
-
val json_t : t Jsont.t
-
val list : t list Jsont.t
-114
stack/bushel/lib/tags.ml
···
-
open Entry
-
-
type t =
-
[ `Slug of string (* :foo points to the specific slug foo *)
-
| `Contact of string (* @foo points to contact foo *)
-
| `Set of string (* #papers points to all Paper entries *)
-
| `Text of string (* foo points to a free text "foo" *)
-
| `Year of int (* a number between 1900--2100 is interpreted as a year *)
-
]
-
-
let is_text = function
-
| `Text _ -> true
-
| _ -> false
-
;;
-
-
let is_slug = function
-
| `Slug _ -> true
-
| _ -> false
-
;;
-
-
let is_set = function
-
| `Set _ -> true
-
| _ -> false
-
;;
-
-
let is_year = function
-
| `Year _ -> true
-
| _ -> false
-
;;
-
-
let of_string s : t =
-
if String.length s < 2 then invalid_arg ("Tag.of_string: " ^ s);
-
match s.[0] with
-
| ':' ->
-
let slug = String.sub s 1 (String.length s - 1) in
-
`Slug slug
-
| '@' -> failwith "TODO add contacts to entries"
-
| '#' ->
-
let cl = String.sub s 1 (String.length s - 1) in
-
`Set cl
-
| _ ->
-
(try
-
let x = int_of_string s in
-
if x > 1900 && x < 2100 then `Year x else `Text s
-
with
-
| _ -> `Text s)
-
;;
-
-
let of_string_list l = List.map of_string l
-
-
let to_string = function
-
| `Slug t -> ":" ^ t
-
| `Contact c -> "@" ^ c
-
| `Set s -> "#" ^ s
-
| `Text t -> t
-
| `Year y -> string_of_int y
-
;;
-
-
let to_raw_string = function
-
| `Slug t -> t
-
| `Contact c -> c
-
| `Set s -> s
-
| `Text t -> t
-
| `Year y -> string_of_int y
-
;;
-
-
let pp ppf t = Fmt.string ppf (to_string t)
-
-
let tags_of_ent _entries ent : t list =
-
match ent with
-
| `Paper p -> of_string_list @@ Paper.tags p
-
| `Video v -> of_string_list v.Video.tags
-
| `Project p -> of_string_list @@ Project.tags p
-
| `Note n -> of_string_list @@ Note.tags n
-
| `Idea i -> of_string_list i.Idea.tags
-
;;
-
-
let mentions tags =
-
List.filter
-
(function
-
| `Contact _ | `Slug _ -> true
-
| _ -> false)
-
tags
-
;;
-
-
let mention_entries entries tags =
-
let lk t =
-
try Some (lookup_exn entries t)
-
with Not_found -> Printf.eprintf "mention_entries not found: %s\n%!" t; None
-
in
-
List.filter_map
-
(function
-
| `Slug t -> lk t
-
| _ -> None)
-
tags
-
;;
-
-
let count_tags ?h fn vs =
-
let h =
-
match h with
-
| Some h -> h
-
| None -> Hashtbl.create 42
-
in
-
List.iter
-
(fun ent ->
-
List.iter
-
(fun tag ->
-
match Hashtbl.find_opt h tag with
-
| Some num -> Hashtbl.replace h tag (num + 1)
-
| None -> Hashtbl.add h tag 1)
-
(fn ent))
-
vs;
-
h
-
;;
-25
stack/bushel/lib/tags.mli
···
-
type t =
-
[ `Contact of string
-
| `Set of string
-
| `Slug of string
-
| `Text of string
-
| `Year of int
-
]
-
-
val is_text : t -> bool
-
val is_set : t -> bool
-
val is_slug : t -> bool
-
val is_year : t -> bool
-
val of_string : string -> t
-
val to_string : t -> string
-
val to_raw_string : t -> string
-
val pp : Format.formatter -> t -> unit
-
val mention_entries : Entry.t -> t list -> Entry.entry list
-
val tags_of_ent : Entry.t -> Entry.entry -> t list
-
val mentions : t list -> t list
-
-
val count_tags
-
: ?h:('a, int) Hashtbl.t
-
-> ('b -> 'a list)
-
-> 'b list
-
-> ('a, int) Hashtbl.t
-80
stack/bushel/lib/util.ml
···
-
let first_hunk s =
-
let lines = String.split_on_char '\n' s in
-
let rec aux acc = function
-
| [] -> String.concat "\n" (List.rev acc)
-
| "" :: "" :: _ -> String.concat "\n" (List.rev acc)
-
| line :: rest -> aux (line :: acc) rest
-
in
-
aux [] lines
-
;;
-
-
let first_and_last_hunks s =
-
let lines = String.split_on_char '\n' s in
-
let rec aux acc = function
-
| [] -> String.concat "\n" (List.rev acc), ""
-
| "" :: "" :: rest ->
-
String.concat "\n" (List.rev acc), String.concat "\n" (List.rev rest)
-
| line :: rest -> aux (line :: acc) rest
-
in
-
aux [] lines
-
;;
-
-
(* Find all footnote definition lines in text *)
-
let find_footnote_lines s =
-
let lines = String.split_on_char '\n' s in
-
let is_footnote_def line =
-
String.length line > 3 &&
-
line.[0] = '[' &&
-
line.[1] = '^' &&
-
String.contains line ':' &&
-
let colon_pos = String.index line ':' in
-
colon_pos > 2 && line.[colon_pos - 1] = ']'
-
in
-
let is_continuation line =
-
String.length line > 0 && (line.[0] = ' ' || line.[0] = '\t')
-
in
-
let rec collect_footnotes acc in_footnote = function
-
| [] -> List.rev acc
-
| line :: rest ->
-
if is_footnote_def line then
-
collect_footnotes (line :: acc) true rest
-
else if in_footnote && is_continuation line then
-
collect_footnotes (line :: acc) true rest
-
else
-
collect_footnotes acc false rest
-
in
-
collect_footnotes [] false lines
-
;;
-
-
(* Augment first hunk with footnote definitions from last hunk *)
-
let first_hunk_with_footnotes s =
-
let first, last = first_and_last_hunks s in
-
let footnote_lines = find_footnote_lines last in
-
if footnote_lines = [] then first
-
else first ^ "\n\n" ^ String.concat "\n" footnote_lines
-
;;
-
-
let count_words (text : string) : int =
-
let len = String.length text in
-
let rec count_words_helper (index : int) (in_word : bool) (count : int) : int =
-
if index >= len
-
then if in_word then count + 1 else count
-
else (
-
let char = String.get text index in
-
let is_whitespace =
-
Char.equal char ' '
-
|| Char.equal char '\t'
-
|| Char.equal char '\n'
-
|| Char.equal char '\r'
-
in
-
if is_whitespace
-
then
-
if in_word
-
then count_words_helper (index + 1) false (count + 1)
-
else count_words_helper (index + 1) false count
-
else count_words_helper (index + 1) true count)
-
in
-
count_words_helper 0 false 0
-
;;
-
-
let read_file file = In_channel.(with_open_bin file input_all)
-166
stack/bushel/lib/video.ml
···
-
type t =
-
{ slug : string
-
; title : string
-
; published_date : Ptime.t
-
; uuid : string
-
; description : string
-
; url : string
-
; talk : bool
-
; paper : string option
-
; project : string option
-
; tags : string list
-
}
-
-
type ts = t list
-
-
let get_shadow fs k =
-
match List.assoc_opt k fs with
-
| Some v -> Some v
-
| None -> List.assoc_opt ("_" ^ k) fs
-
;;
-
-
let get_shadow_string fs k =
-
match get_shadow fs k with
-
| Some (`String v) -> v
-
| _ -> failwith "invalid yaml"
-
;;
-
-
let get_shadow_bool fs k =
-
match get_shadow fs k with
-
| Some (`Bool v) -> v
-
| _ -> failwith "invalid yaml"
-
;;
-
-
let compare a b = Ptime.compare b.published_date a.published_date
-
let url v = v.url
-
let body { description; _ } = description
-
let title { title; _ } = title
-
let uuid { uuid; _ } = uuid
-
let paper { paper; _ } = paper
-
let project { project; _ } = project
-
let slug { slug; _ } = slug
-
let date { published_date; _ } = published_date |> Ptime.to_date
-
let datetime { published_date; _ } = published_date
-
let talk { talk; _ } = talk
-
-
let t_of_yaml ~description = function
-
| `O fields ->
-
let slug = get_shadow_string fields "uuid" in
-
let title = get_shadow_string fields "title" in
-
let published_date =
-
get_shadow_string fields "published_date"
-
|> Ptime.of_rfc3339
-
|> Result.get_ok
-
|> fun (a, _, _) -> a
-
in
-
let uuid = get_shadow_string fields "uuid" in
-
let url = get_shadow_string fields "url" in
-
let talk =
-
try get_shadow_bool fields "talk" with
-
| _ -> false
-
in
-
let tags =
-
match List.assoc_opt "tags" fields with
-
| Some l -> Ezjsonm.get_list Ezjsonm.get_string l
-
| _ -> []
-
in
-
let paper =
-
try Some (get_shadow_string fields "paper") with
-
| _ -> None
-
in
-
let project =
-
try Some (get_shadow_string fields "project") with
-
| _ -> None
-
in
-
{ slug; title; tags; published_date; uuid; description; talk; paper; project; url }
-
| _ -> failwith "invalid yaml"
-
;;
-
-
let to_yaml t =
-
`O [
-
("title", `String t.title);
-
("description", `String t.description);
-
("url", `String t.url);
-
("uuid", `String t.uuid);
-
("slug", `String t.slug);
-
("published_date", `String (Ptime.to_rfc3339 t.published_date));
-
("talk", `Bool t.talk);
-
("tags", `A (List.map (fun t -> `String t) t.tags));
-
("paper", match t.paper with None -> `Null | Some p -> `String p);
-
("project", match t.project with None -> `Null | Some p -> `String p)
-
]
-
-
let to_file output_dir t =
-
let file_path = Fpath.v (Filename.concat output_dir (t.uuid ^ ".md")) in
-
let yaml = to_yaml t in
-
let yaml_str = Yaml.to_string_exn yaml in
-
let content = "---\n" ^ yaml_str ^ "---\n" in
-
Bos.OS.File.write file_path content
-
;;
-
-
let of_md fname =
-
(* TODO fix Jekyll_post to not error on no date *)
-
let fname' = "2000-01-01-" ^ Filename.basename fname in
-
match Jekyll_post.of_string ~fname:fname' (Util.read_file fname) with
-
| Error (`Msg m) -> failwith ("paper_of_md: " ^ m)
-
| Ok jp ->
-
let fields = jp.Jekyll_post.fields |> Jekyll_format.fields_to_yaml in
-
let { Jekyll_post.body; _ } = jp in
-
t_of_yaml ~description:body fields
-
;;
-
-
(* TODO:claude *)
-
let typesense_schema =
-
let open Ezjsonm in
-
dict [
-
("name", string "videos");
-
("fields", list (fun d -> dict d) [
-
[("name", string "id"); ("type", string "string")];
-
[("name", string "title"); ("type", string "string")];
-
[("name", string "description"); ("type", string "string")];
-
[("name", string "published_date"); ("type", string "string")];
-
[("name", string "date"); ("type", string "string")];
-
[("name", string "date_timestamp"); ("type", string "int64")];
-
[("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
-
[("name", string "url"); ("type", string "string")];
-
[("name", string "uuid"); ("type", string "string")];
-
[("name", string "is_talk"); ("type", string "bool")];
-
[("name", string "paper"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "project"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "video_url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "embed_url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "duration"); ("type", string "int32"); ("optional", bool true)];
-
[("name", string "channel"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "platform"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "views"); ("type", string "int32"); ("optional", bool true)];
-
[("name", string "related_papers"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "related_talks"); ("type", string "string[]"); ("optional", bool true)];
-
]);
-
("default_sorting_field", string "date_timestamp");
-
]
-
-
(** TODO:claude Pretty-print a video with ANSI formatting *)
-
let pp ppf v =
-
let open Fmt in
-
pf ppf "@[<v>";
-
pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Video";
-
pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug v);
-
pf ppf "%a: %a@," (styled `Bold string) "UUID" string (uuid v);
-
pf ppf "%a: %a@," (styled `Bold string) "Title" string (title v);
-
let (year, month, day) = date v in
-
pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Date" year month day;
-
pf ppf "%a: %a@," (styled `Bold string) "URL" string (url v);
-
pf ppf "%a: %b@," (styled `Bold string) "Talk" (talk v);
-
(match paper v with
-
| Some p -> pf ppf "%a: %a@," (styled `Bold string) "Paper" string p
-
| None -> ());
-
(match project v with
-
| Some p -> pf ppf "%a: %a@," (styled `Bold string) "Project" string p
-
| None -> ());
-
let t = v.tags in
-
if t <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Description";
-
pf ppf "%a@," string v.description;
-
pf ppf "@]"
-32
stack/bushel/lib/video.mli
···
-
type t =
-
{ slug : string
-
; title : string
-
; published_date : Ptime.t
-
; uuid : string
-
; description : string
-
; url : string
-
; talk : bool
-
; paper : string option
-
; project : string option
-
; tags : string list
-
}
-
-
type ts = t list
-
-
val compare : t -> t -> int
-
val url : t -> string
-
val body : t -> string
-
val title : t -> string
-
val uuid : t -> string
-
val paper : t -> string option
-
val project : t -> string option
-
val slug : t -> string
-
val date : t -> Ptime.date
-
val datetime : t -> Ptime.t
-
val talk : t -> bool
-
val of_md : string -> t
-
val t_of_yaml : description:string -> Yaml.value -> t
-
val to_yaml : t -> Yaml.value
-
val to_file : string -> t -> (unit, [> `Msg of string]) result
-
val typesense_schema : Ezjsonm.value
-
val pp : Format.formatter -> t -> unit