-9
stack/bushel/.gitignore
-9
stack/bushel/.gitignore
-1
stack/bushel/.ocamlformat
-1
stack/bushel/.ocamlformat
···
-127
stack/bushel/bin/bushel_bibtex.ml
-127
stack/bushel/bin/bushel_bibtex.ml
···
-67
stack/bushel/bin/bushel_common.ml
-67
stack/bushel/bin/bushel_common.ml
···-(** TODO:claude Get default base directory from BUSHEL_DATA env variable or current directory *)-let doc = "Base directory containing Bushel data (defaults to BUSHEL_DATA env var or current directory)" in
-295
stack/bushel/bin/bushel_doi.ml
-295
stack/bushel/bin/bushel_doi.ml
···-(* Extract publisher URLs from notes (Elsevier, ScienceDirect, IEEE, Nature, ACM, Sage, UPenn, Springer, Taylor & Francis, OUP) *)-(* Matches publisher URLs: linkinghub.elsevier.com, sciencedirect.com/science/article, ieeexplore.ieee.org, academic.oup.com, nature.com, journals.sagepub.com, garfield.library.upenn.edu, link.springer.com, tandfonline.com/doi, and dl.acm.org/doi/10.* URLs *)-let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|(?:www\\.)?sciencedirect\\.com/science/article|ieeexplore\\.ieee\\.org|academic\\.oup\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in-let entry = Bushel.Doi_entry.create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ~source_urls:[doi_url] () in-Lwt.return (Bushel.Doi_entry.create_failed ~doi ~error:(Printexc.to_string e) ~source_urls:[doi_url] ())-Lwt.return (Bushel.Doi_entry.create_failed ~doi ~error:(Printexc.to_string exn) ~source_urls:[doi_url] ())-Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:"Empty response" ~source_urls:[url] ())-let entry = Bushel.Doi_entry.create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ~source_urls () in-Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:(Printexc.to_string e) ~source_urls:[url] ())-Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:(Printexc.to_string exn) ~source_urls:[url] ())-Printf.printf "Resolving %d DOI(s) and %d URL(s)...\n%!" (List.length dois_to_resolve) (List.length urls_to_resolve);-match Bushel.Doi_entry.find_by_doi_including_ignored !merged new_entry.Bushel.Doi_entry.doi with-(* DOI already exists - merge the entries by combining source_urls and preserving ignore flag *)-merged := combined :: (List.filter (fun e -> e.Bushel.Doi_entry.doi <> new_entry.Bushel.Doi_entry.doi) !merged)
-182
stack/bushel/bin/bushel_faces.ml
-182
stack/bushel/bin/bushel_faces.ml
···-Lwt.return (`Skipped (sprintf "Thumbnail for '%s' already exists at %s" (List.hd names) output_path))-let ok_count = List.length (List.filter (fun (_, r) -> match r with `Ok _ -> true | _ -> false) results) in-let error_count = List.length (List.filter (fun (_, r) -> match r with `Error _ -> true | _ -> false) results) in-let skipped_count = List.length (List.filter (fun (_, r) -> match r with `Skipped _ -> true | _ -> false) results) in-) $ Bushel_common.base_dir $ Bushel_common.output_dir ~default:"." $ Bushel_common.handle_opt $-Bushel_common.url_term ~default:"https://photos.recoil.org" ~doc:"Base URL of the Immich instance")-let info = Cmd.info "faces" ~doc:"Retrieve face thumbnails for Bushel contacts from Immich" in
-77
stack/bushel/bin/bushel_ideas.ml
-77
stack/bushel/bin/bushel_ideas.ml
···
-227
stack/bushel/bin/bushel_info.ml
-227
stack/bushel/bin/bushel_info.ml
···-Fmt.pr "%a@," (Fmt.styled `Bold Fmt.string) (if notes_only then "Available notes:" else "Available entries:");-let doc = "The slug of the entry to display (with or without leading ':'), or contact handle (with '@' prefix). If not provided, lists all available slugs." in
-549
stack/bushel/bin/bushel_links.ml
-549
stack/bushel/bin/bushel_links.ml
···-print_endline (Fmt.str "Including only domains: %s" (String.concat ", " include_domains_list));-let merged_links = Bushel.Link.merge_links ~prefer_new_date:true existing_links !extracted_links in-let upload_to_karakeep base_url api_key_opt links_file tag max_concurrent delay_seconds limit verbose =-(batch_num + 1) batch_successes (List.length batch) new_total (new_total + (List.length links_to_upload - new_total));-Cmd.v info Term.(const update_from_karakeep $ base_url_arg $ api_key_arg $ tag_arg $ links_file_arg $ download_assets_arg)-Cmd.v info Term.(const update_from_bushel $ base_dir_arg $ links_file_arg $ include_domains_arg $ exclude_domains_arg)-Cmd.v info Term.(const upload_to_karakeep $ base_url_arg $ api_key_arg $ links_file_arg $ tag_arg $ concurrent_arg $ delay_arg $ limit_arg $ verbose_arg)
-119
stack/bushel/bin/bushel_main.ml
-119
stack/bushel/bin/bushel_main.ml
···
-186
stack/bushel/bin/bushel_missing.ml
-186
stack/bushel/bin/bushel_missing.ml
···-Fmt.pr "@.%a (%d):@," (Fmt.styled `Bold Fmt.string) title (List.length entries_with_broken_refs);-let missing_cmd base_dir check_thumbnails check_synopsis check_tags check_refs _env _xdg _profile =-`P "This command scans all entries and reports any that are missing thumbnails, synopsis, tags, or have broken slugs/contact handles.";
-131
stack/bushel/bin/bushel_note_doi.ml
-131
stack/bushel/bin/bushel_note_doi.ml
···
-88
stack/bushel/bin/bushel_obsidian.ml
-88
stack/bushel/bin/bushel_obsidian.ml
···
-74
stack/bushel/bin/bushel_paper.ml
-74
stack/bushel/bin/bushel_paper.ml
···
-57
stack/bushel/bin/bushel_paper_classify.ml
-57
stack/bushel/bin/bushel_paper_classify.ml
···
-325
stack/bushel/bin/bushel_paper_tex.ml
-325
stack/bushel/bin/bushel_paper_tex.ml
···-let journal_name = try journal paper |> clean_venue_name |> escape_latex with _ -> "Journal" in-let conf_name = try booktitle paper |> clean_venue_name |> escape_latex with _ -> "Conference" in-let journal_str = try Bushel.Paper.journal paper |> clean_venue_name |> escape_latex with _ -> "" in-let booktitle_str = try Bushel.Paper.booktitle paper |> clean_venue_name |> escape_latex with _ -> "" in-let conf_name = try Bushel.Paper.booktitle paper |> clean_venue_name |> escape_latex with _ -> "" in-let journal_str = try Bushel.Paper.journal paper |> clean_venue_name |> escape_latex with _ -> "" in-(* Add DOI or PDF link if available, but not for in-press papers unless they have explicit URL *)-let non_selected_papers = List.filter (fun p -> not (Bushel.Paper.selected p)) latest_papers in-Printf.printf "Generated %s/papers_full.tex with %d entries\n" output_dir (List.length sorted_full);-Printf.printf "Generated %s/papers_short.tex with %d entries\n" output_dir (List.length sorted_short);-Printf.printf "Generated %s/papers_preprint.tex with %d entries\n" output_dir (List.length sorted_preprint);-Printf.printf "Generated %s/papers_selected.tex with %d entries\n" output_dir (List.length sorted_selected);
-48
stack/bushel/bin/bushel_search.ml
-48
stack/bushel/bin/bushel_search.ml
···-Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";-let combined_response = Bushel.Typesense.combine_multisearch_results multisearch_resp ~limit ~offset () in-Printf.printf "Found %d results (%.2fms)\n\n" combined_response.total combined_response.query_time;-Printf.printf "%d. %s (score: %.2f)\n" (i + 1) (Bushel.Typesense.pp_search_result_oneline hit) hit.Bushel.Typesense.score
-70
stack/bushel/bin/bushel_thumbs.ml
-70
stack/bushel/bin/bushel_thumbs.ml
···-sprintf "magick -density 600 -quality 100 %s[0] -gravity North -crop 100%%x50%%+0+0 -resize %s %s"-Term.(const (fun base_dir output_dir _env _xdg _profile -> process_papers base_dir output_dir; 0) $
-188
stack/bushel/bin/bushel_typesense.ml
-188
stack/bushel/bin/bushel_typesense.ml
···-Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";-Printf.eprintf "Error: OpenAI API key is required for embeddings. Use OPENAI_API_KEY environment variable or create .openrouter-api file.\n";-Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";-Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";-let doc = "Specific collection to search (contacts, papers, projects, notes, videos, ideas)" in-`P "The API key can be read from .typesense-key file or TYPESENSE_API_KEY environment variable.";-`P "Upload all bushel object types (contacts, papers, projects, notes, videos, ideas) to a Typesense search engine instance.";
-138
stack/bushel/bin/bushel_video.ml
-138
stack/bushel/bin/bushel_video.ml
···-Lwt_main.run (process_videos output_dir overwrite base_url channel fetch_thumbs thumbs_dir); 0)
-81
stack/bushel/bin/bushel_video_thumbs.ml
-81
stack/bushel/bin/bushel_video_thumbs.ml
···
-20
stack/bushel/bin/dune
-20
stack/bushel/bin/dune
···-(modules bushel_main bushel_bibtex bushel_ideas bushel_info bushel_missing bushel_note_doi bushel_obsidian bushel_paper_classify bushel_paper_tex bushel_thumbs bushel_search)-(libraries bushel bushel_common cmdliner eio eio_main eiocmd yaml ezjsonm zotero-translation fmt cmarkit uri unix ptime.clock.os crockford))
-51
stack/bushel/bushel.opam
-51
stack/bushel/bushel.opam
···
-3
stack/bushel/bushel.opam.template
-3
stack/bushel/bushel.opam.template
-35
stack/bushel/dune-project
-35
stack/bushel/dune-project
···
-79
stack/bushel/lib/bushel.ml
-79
stack/bushel/lib/bushel.ml
···-let entries = Entry.v ~images ~papers ~notes ~projects ~ideas ~videos ~contacts ~data_dir:(base ^ "/data") in
-27
stack/bushel/lib/bushel.mli
-27
stack/bushel/lib/bushel.mli
···
-172
stack/bushel/lib/contact.ml
-172
stack/bushel/lib/contact.ml
···-pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Aliases" (list ~sep:comma string) (List.tl ns);
-25
stack/bushel/lib/contact.mli
-25
stack/bushel/lib/contact.mli
···
-72
stack/bushel/lib/description.ml
-72
stack/bushel/lib/description.ml
···
-19
stack/bushel/lib/description.mli
-19
stack/bushel/lib/description.mli
···-val note_description : Note.t -> date_str:string -> lookup_fn:(string -> string option) -> string-val video_description : Video.t -> date_str:string -> lookup_fn:(string -> string option) -> string
-147
stack/bushel/lib/doi_entry.ml
-147
stack/bushel/lib/doi_entry.ml
···-{ doi; title; authors; year; bibtype; publisher; resolved_at; source_urls; status = Resolved; ignore = false }-(* Support both old source_url (single) and new source_urls (list) for backwards compatibility *)-{ doi; title; authors; year; bibtype; publisher; resolved_at; source_urls; status = Resolved; ignore }
-51
stack/bushel/lib/doi_entry.mli
-51
stack/bushel/lib/doi_entry.mli
···-source_urls: string list; (** All URLs that resolve to this DOI (publisher links, doi.org URLs, etc) *)
-19
stack/bushel/lib/dune
-19
stack/bushel/lib/dune
-449
stack/bushel/lib/entry.ml
-449
stack/bushel/lib/entry.ml
···-{ slugs; papers; old_papers; notes; projects; ideas; videos; images; contacts; doi_entries; data_dir }-(* Use titleimage if set, otherwise extract first image from body, then try video, otherwise use slug_ent's thumbnail *)
-79
stack/bushel/lib/entry.mli
-79
stack/bushel/lib/entry.mli
···
-223
stack/bushel/lib/idea.ml
-223
stack/bushel/lib/idea.ml
···
-55
stack/bushel/lib/idea.mli
-55
stack/bushel/lib/idea.mli
···
-296
stack/bushel/lib/link.ml
-296
stack/bushel/lib/link.ml
···
-34
stack/bushel/lib/link.mli
-34
stack/bushel/lib/link.mli
···
-317
stack/bushel/lib/link_graph.ml
-317
stack/bushel/lib/link_graph.ml
···-Fmt.pf ppf "@[<v>Internal links: %d@,External links: %d@,Entries with outbound: %d@,Entries with backlinks: %d@]"
-781
stack/bushel/lib/md.ml
-781
stack/bushel/lib/md.ml
···-let mapper = Mapper.make ~inline:(make_validation_mapper entries broken_slugs broken_contacts) () in-(* Scan body for publisher URLs (Elsevier, ScienceDirect, IEEE, Nature, ACM, Sage, UPenn, Springer, Taylor & Francis, OUP) and resolve from cache *)-let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|(?:www\\.)?sciencedirect\\.com/science/article|ieeexplore\\.ieee\\.org|academic\\.oup\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in
-73
stack/bushel/lib/md.mli
-73
stack/bushel/lib/md.mli
···-val note_references : Entry.t -> Contact.t -> Note.t -> (string * string * reference_source) list
-230
stack/bushel/lib/note.ml
-230
stack/bushel/lib/note.ml
···-{ title; draft; date; slug; synopsis; titleimage; index_page; perma; doi; body; via; updated; tags; sidebar; slug_ent; source; url; author; category }-[("name", string "type"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];-[("name", string "status"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];-[("name", string "source"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];-[("name", string "category"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-49
stack/bushel/lib/note.mli
-49
stack/bushel/lib/note.mli
···
-373
stack/bushel/lib/paper.ml
-373
stack/bushel/lib/paper.ml
···-let journal = try key paper "journal" |> J.get_string |> String.lowercase_ascii with _ -> "" in-let booktitle = try key paper "booktitle" |> J.get_string |> String.lowercase_ascii with _ -> "" in-let title_str = try key paper "title" |> J.get_string |> String.lowercase_ascii with _ -> "" in-if contains_any journal ["arxiv"] || contains_any booktitle ["arxiv"] || bibtype_lower = "misc" || bibtype_lower = "techreport"-Re.replace_string (Re.compile (Re.seq [Re.char '\n'; Re.char '\n'; Re.rep1 (Re.char '\n')])) ~by:"\n\n" trimmed_abs-pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Authors" (list ~sep:comma string) (authors p);
-55
stack/bushel/lib/paper.mli
-55
stack/bushel/lib/paper.mli
···
-100
stack/bushel/lib/project.ml
-100
stack/bushel/lib/project.ml
···-[("name", string "languages"); ("type", string "string[]"); ("facet", bool true); ("optional", bool true)];-[("name", string "license"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];-[("name", string "status"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-21
stack/bushel/lib/project.mli
-21
stack/bushel/lib/project.mli
···
-44
stack/bushel/lib/srcsetter.ml
-44
stack/bushel/lib/srcsetter.ml
···
-21
stack/bushel/lib/srcsetter.mli
-21
stack/bushel/lib/srcsetter.mli
···
-114
stack/bushel/lib/tags.ml
-114
stack/bushel/lib/tags.ml
···
-25
stack/bushel/lib/tags.mli
-25
stack/bushel/lib/tags.mli
···
-527
stack/bushel/lib/typesense.ml
-527
stack/bushel/lib/typesense.ml
···-let body = if body = "" then None else Some (Requests.Body.of_string Requests.Mime.json body) in-let abstract = Md.markdown_to_plaintext entries (Paper.abstract paper) |> truncate_for_embedding in-let description = Md.markdown_to_plaintext entries (Project.body project) |> truncate_for_embedding in-let description = Md.markdown_to_plaintext entries (Video.body video) |> truncate_for_embedding in-let description = Md.markdown_to_plaintext entries (Idea.body idea) |> truncate_for_embedding in-("contacts", add_embedding_field_to_schema Contact.typesense_schema config ["name"; "names"], (List.map contact_to_document contacts : Ezjsonm.value list));-("papers", add_embedding_field_to_schema Paper.typesense_schema config ["title"; "abstract"; "authors"], (List.map (paper_to_document entries) papers : Ezjsonm.value list));-("videos", add_embedding_field_to_schema Video.typesense_schema config ["title"; "description"], (List.map (video_to_document entries) videos : Ezjsonm.value list));-("projects", add_embedding_field_to_schema Project.typesense_schema config ["title"; "description"; "tags"], (List.map (project_to_document entries) projects : Ezjsonm.value list));-("notes", add_embedding_field_to_schema Note.typesense_schema config ["title"; "content"; "tags"], (List.map (note_to_document entries) notes : Ezjsonm.value list));-("ideas", add_embedding_field_to_schema Idea.typesense_schema config ["title"; "description"; "tags"], (List.map (idea_to_document entries) ideas : Ezjsonm.value list));-let upload_collection ((name, schema, documents) : string * Ezjsonm.value * Ezjsonm.value list) =-if String.contains line ':' && Str.string_match (Str.regexp ".*success.*true.*") line 0 then acc + 1 else acc) 0 lines in-if String.contains line ':' && Str.string_match (Str.regexp ".*success.*false.*") line 0 then acc + 1 else acc) 0 lines in-let failed_lines = List.filter (fun line -> Str.string_match (Str.regexp ".*success.*false.*") line 0) lines in-let search_collection ~sw ~env (config : config) collection_name query ?(limit=10) ?(offset=0) () =-let result = Typesense_client.search_collection client collection_name query ~limit ~offset () in-let combined_response = Typesense_client.combine_multisearch_results multisearch_resp ~limit ~offset () in-let combine_multisearch_results (multisearch_resp : multisearch_response) ?(limit=10) ?(offset=0) () =
-168
stack/bushel/lib/typesense.mli
-168
stack/bushel/lib/typesense.mli
···-let config = { endpoint = "https://search.example.com"; api_key = "xyz123"; openai_key = "sk-..." } in-env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->-env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->-env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->-env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->-env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->-env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->-env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->-env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->-val combine_multisearch_results : multisearch_response -> ?limit:int -> ?offset:int -> unit -> search_response-env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
-80
stack/bushel/lib/util.ml
-80
stack/bushel/lib/util.ml
···
-166
stack/bushel/lib/video.ml
-166
stack/bushel/lib/video.ml
···-[("name", string "channel"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];-[("name", string "platform"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-32
stack/bushel/lib/video.mli
-32
stack/bushel/lib/video.mli
···