+9
stack/bushel/.gitignore
+9
stack/bushel/.gitignore
+1
stack/bushel/.ocamlformat
+1
stack/bushel/.ocamlformat
···
···
+127
stack/bushel/bin/bushel_bibtex.ml
+127
stack/bushel/bin/bushel_bibtex.ml
···
···
+76
stack/bushel/bin/bushel_common.ml
+76
stack/bushel/bin/bushel_common.ml
···
···+(** TODO:claude Get default base directory from BUSHEL_DATA env variable or current directory *)+let doc = "Base directory containing Bushel data (defaults to BUSHEL_DATA env var or current directory)" in
+295
stack/bushel/bin/bushel_doi.ml
+295
stack/bushel/bin/bushel_doi.ml
···
···+(* Extract publisher URLs from notes (Elsevier, ScienceDirect, IEEE, Nature, ACM, Sage, UPenn, Springer, Taylor & Francis, OUP) *)+(* Matches publisher URLs: linkinghub.elsevier.com, sciencedirect.com/science/article, ieeexplore.ieee.org, academic.oup.com, nature.com, journals.sagepub.com, garfield.library.upenn.edu, link.springer.com, tandfonline.com/doi, and dl.acm.org/doi/10.* URLs *)+let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|(?:www\\.)?sciencedirect\\.com/science/article|ieeexplore\\.ieee\\.org|academic\\.oup\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in+let entry = Bushel.Doi_entry.create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ~source_urls:[doi_url] () in+Lwt.return (Bushel.Doi_entry.create_failed ~doi ~error:(Printexc.to_string e) ~source_urls:[doi_url] ())+Lwt.return (Bushel.Doi_entry.create_failed ~doi ~error:(Printexc.to_string exn) ~source_urls:[doi_url] ())+Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:"Empty response" ~source_urls:[url] ())+let entry = Bushel.Doi_entry.create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ~source_urls () in+Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:(Printexc.to_string e) ~source_urls:[url] ())+Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:(Printexc.to_string exn) ~source_urls:[url] ())+Printf.printf "Resolving %d DOI(s) and %d URL(s)...\n%!" (List.length dois_to_resolve) (List.length urls_to_resolve);+match Bushel.Doi_entry.find_by_doi_including_ignored !merged new_entry.Bushel.Doi_entry.doi with+(* DOI already exists - merge the entries by combining source_urls and preserving ignore flag *)+merged := combined :: (List.filter (fun e -> e.Bushel.Doi_entry.doi <> new_entry.Bushel.Doi_entry.doi) !merged)
+182
stack/bushel/bin/bushel_faces.ml
+182
stack/bushel/bin/bushel_faces.ml
···
···+Lwt.return (`Skipped (sprintf "Thumbnail for '%s' already exists at %s" (List.hd names) output_path))+let ok_count = List.length (List.filter (fun (_, r) -> match r with `Ok _ -> true | _ -> false) results) in+let error_count = List.length (List.filter (fun (_, r) -> match r with `Error _ -> true | _ -> false) results) in+let skipped_count = List.length (List.filter (fun (_, r) -> match r with `Skipped _ -> true | _ -> false) results) in+) $ Bushel_common.base_dir $ Bushel_common.output_dir ~default:"." $ Bushel_common.handle_opt $+Bushel_common.url_term ~default:"https://photos.recoil.org" ~doc:"Base URL of the Immich instance")+let info = Cmd.info "faces" ~doc:"Retrieve face thumbnails for Bushel contacts from Immich" in
+77
stack/bushel/bin/bushel_ideas.ml
+77
stack/bushel/bin/bushel_ideas.ml
···
···
+227
stack/bushel/bin/bushel_info.ml
+227
stack/bushel/bin/bushel_info.ml
···
···+Fmt.pr "%a@," (Fmt.styled `Bold Fmt.string) (if notes_only then "Available notes:" else "Available entries:");+let doc = "The slug of the entry to display (with or without leading ':'), or contact handle (with '@' prefix). If not provided, lists all available slugs." in+Term.(const info_cmd $ Bushel_common.setup_term $ Bushel_common.base_dir $ notes_only_flag $ slug_arg)
+549
stack/bushel/bin/bushel_links.ml
+549
stack/bushel/bin/bushel_links.ml
···
···+print_endline (Fmt.str "Including only domains: %s" (String.concat ", " include_domains_list));+let merged_links = Bushel.Link.merge_links ~prefer_new_date:true existing_links !extracted_links in+let upload_to_karakeep base_url api_key_opt links_file tag max_concurrent delay_seconds limit verbose =+(batch_num + 1) batch_successes (List.length batch) new_total (new_total + (List.length links_to_upload - new_total));+Cmd.v info Term.(const update_from_karakeep $ base_url_arg $ api_key_arg $ tag_arg $ links_file_arg $ download_assets_arg)+Cmd.v info Term.(const update_from_bushel $ base_dir_arg $ links_file_arg $ include_domains_arg $ exclude_domains_arg)+Cmd.v info Term.(const upload_to_karakeep $ base_url_arg $ api_key_arg $ links_file_arg $ tag_arg $ concurrent_arg $ delay_arg $ limit_arg $ verbose_arg)
+115
stack/bushel/bin/bushel_main.ml
+115
stack/bushel/bin/bushel_main.ml
···
···
+185
stack/bushel/bin/bushel_missing.ml
+185
stack/bushel/bin/bushel_missing.ml
···
···+Fmt.pr "@.%a (%d):@," (Fmt.styled `Bold Fmt.string) title (List.length entries_with_broken_refs);+) $ Bushel_common.setup_term $ Bushel_common.base_dir $ thumbnails_flag $ synopsis_flag $ tags_flag $ refs_flag)+`P "This command scans all entries and reports any that are missing thumbnails, synopsis, tags, or have broken slugs/contact handles.";
+131
stack/bushel/bin/bushel_note_doi.ml
+131
stack/bushel/bin/bushel_note_doi.ml
···
···
+88
stack/bushel/bin/bushel_obsidian.ml
+88
stack/bushel/bin/bushel_obsidian.ml
···
···
+74
stack/bushel/bin/bushel_paper.ml
+74
stack/bushel/bin/bushel_paper.ml
···
···
+57
stack/bushel/bin/bushel_paper_classify.ml
+57
stack/bushel/bin/bushel_paper_classify.ml
···
···
+325
stack/bushel/bin/bushel_paper_tex.ml
+325
stack/bushel/bin/bushel_paper_tex.ml
···
···+let journal_name = try journal paper |> clean_venue_name |> escape_latex with _ -> "Journal" in+let conf_name = try booktitle paper |> clean_venue_name |> escape_latex with _ -> "Conference" in+let journal_str = try Bushel.Paper.journal paper |> clean_venue_name |> escape_latex with _ -> "" in+let booktitle_str = try Bushel.Paper.booktitle paper |> clean_venue_name |> escape_latex with _ -> "" in+let conf_name = try Bushel.Paper.booktitle paper |> clean_venue_name |> escape_latex with _ -> "" in+let journal_str = try Bushel.Paper.journal paper |> clean_venue_name |> escape_latex with _ -> "" in+(* Add DOI or PDF link if available, but not for in-press papers unless they have explicit URL *)+let non_selected_papers = List.filter (fun p -> not (Bushel.Paper.selected p)) latest_papers in+Printf.printf "Generated %s/papers_full.tex with %d entries\n" output_dir (List.length sorted_full);+Printf.printf "Generated %s/papers_short.tex with %d entries\n" output_dir (List.length sorted_short);+Printf.printf "Generated %s/papers_preprint.tex with %d entries\n" output_dir (List.length sorted_preprint);+Printf.printf "Generated %s/papers_selected.tex with %d entries\n" output_dir (List.length sorted_selected);
+69
stack/bushel/bin/bushel_search.ml
+69
stack/bushel/bin/bushel_search.ml
···
···+Printf.eprintf "Error: API key is required. Use --api-key, set TYPESENSE_API_KEY environment variable, or create .typesense-key file.\n";+let combined_response = Bushel.Typesense.combine_multisearch_results multisearch_resp ~limit ~offset () in+Printf.printf "Found %d results (%.2fms)\n\n" combined_response.total combined_response.query_time;+Printf.printf "%d. %s (score: %.2f)\n" (i + 1) (Bushel.Typesense.pp_search_result_oneline hit) hit.Bushel.Typesense.score
+70
stack/bushel/bin/bushel_thumbs.ml
+70
stack/bushel/bin/bushel_thumbs.ml
···
···+sprintf "magick -density 600 -quality 100 %s[0] -gravity North -crop 100%%x50%%+0+0 -resize %s %s"
+248
stack/bushel/bin/bushel_typesense.ml
+248
stack/bushel/bin/bushel_typesense.ml
···
···+Printf.eprintf "Error: API key is required. Use --api-key or set TYPESENSE_API_KEY environment variable.\n";+Printf.eprintf "Error: OpenAI API key is required for embeddings. Use --openai-key or set OPENAI_API_KEY environment variable.\n";+Printf.eprintf "Error: API key is required. Use --api-key or set TYPESENSE_API_KEY environment variable.\n";+Printf.eprintf "Error: API key is required. Use --api-key or set TYPESENSE_API_KEY environment variable.\n";+let doc = "Specific collection to search (contacts, papers, projects, news, videos, notes, ideas)" in+`P "The API key can be provided via --api-key flag or TYPESENSE_API_KEY environment variable.";+`P "Upload all bushel object types (contacts, papers, projects, news, videos, notes, ideas) to a Typesense search engine instance.";+`P "The API key can be provided via --api-key flag or TYPESENSE_API_KEY environment variable.";+`Pre " bushel-typesense upload --api-key xyz123 --openai-key sk-abc... --data-dir /path/to/data";+`Pre " bushel-typesense upload --endpoint https://search.example.com --api-key xyz123 --openai-key sk-abc...";+let api_key = Arg.(value & opt string key & info ["api-key"; "k"] ~doc:"Typesense API key") in+| Some oa_key when oa_key <> "" -> Arg.(value & opt string oa_key & info ["openai-key"; "oa"] ~doc:"OpenAI API key")
+138
stack/bushel/bin/bushel_video.ml
+138
stack/bushel/bin/bushel_video.ml
···
···+Lwt_main.run (process_videos output_dir overwrite base_url channel fetch_thumbs thumbs_dir); 0)
+81
stack/bushel/bin/bushel_video_thumbs.ml
+81
stack/bushel/bin/bushel_video_thumbs.ml
···
···
+20
stack/bushel/bin/dune
+20
stack/bushel/bin/dune
···
···+(modules bushel_main bushel_bibtex bushel_doi bushel_ideas bushel_info bushel_missing bushel_note_doi bushel_obsidian bushel_paper bushel_paper_classify bushel_paper_tex bushel_video bushel_video_thumbs bushel_thumbs bushel_faces bushel_links bushel_search)+(libraries bushel bushel_common cmdliner cohttp-lwt-unix lwt.unix yaml ezjsonm zotero-translation peertube fmt fmt.cli fmt.tty logs logs.cli logs.fmt cmarkit karakeep uri unix ptime.clock.os crockford))
+47
stack/bushel/bushel.opam
+47
stack/bushel/bushel.opam
···
···
+3
stack/bushel/bushel.opam.template
+3
stack/bushel/bushel.opam.template
+68
stack/bushel/dune-project
+68
stack/bushel/dune-project
···
···
+79
stack/bushel/lib/bushel.ml
+79
stack/bushel/lib/bushel.ml
···
···+let entries = Entry.v ~images ~papers ~notes ~projects ~ideas ~videos ~contacts ~data_dir:(base ^ "/data") in
+27
stack/bushel/lib/bushel.mli
+27
stack/bushel/lib/bushel.mli
···
···
+172
stack/bushel/lib/contact.ml
+172
stack/bushel/lib/contact.ml
···
···+pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Aliases" (list ~sep:comma string) (List.tl ns);
+25
stack/bushel/lib/contact.mli
+25
stack/bushel/lib/contact.mli
···
···
+72
stack/bushel/lib/description.ml
+72
stack/bushel/lib/description.ml
···
···
+19
stack/bushel/lib/description.mli
+19
stack/bushel/lib/description.mli
···
···+val note_description : Note.t -> date_str:string -> lookup_fn:(string -> string option) -> string+val video_description : Video.t -> date_str:string -> lookup_fn:(string -> string option) -> string
+147
stack/bushel/lib/doi_entry.ml
+147
stack/bushel/lib/doi_entry.ml
···
···+{ doi; title; authors; year; bibtype; publisher; resolved_at; source_urls; status = Resolved; ignore = false }+(* Support both old source_url (single) and new source_urls (list) for backwards compatibility *)+{ doi; title; authors; year; bibtype; publisher; resolved_at; source_urls; status = Resolved; ignore }
+51
stack/bushel/lib/doi_entry.mli
+51
stack/bushel/lib/doi_entry.mli
···
···+source_urls: string list; (** All URLs that resolve to this DOI (publisher links, doi.org URLs, etc) *)
+19
stack/bushel/lib/dune
+19
stack/bushel/lib/dune
+449
stack/bushel/lib/entry.ml
+449
stack/bushel/lib/entry.ml
···
···+{ slugs; papers; old_papers; notes; projects; ideas; videos; images; contacts; doi_entries; data_dir }+(* Use titleimage if set, otherwise extract first image from body, then try video, otherwise use slug_ent's thumbnail *)
+79
stack/bushel/lib/entry.mli
+79
stack/bushel/lib/entry.mli
···
···
+223
stack/bushel/lib/idea.ml
+223
stack/bushel/lib/idea.ml
···
···
+55
stack/bushel/lib/idea.mli
+55
stack/bushel/lib/idea.mli
···
···
+296
stack/bushel/lib/link.ml
+296
stack/bushel/lib/link.ml
···
···
+34
stack/bushel/lib/link.mli
+34
stack/bushel/lib/link.mli
···
···
+317
stack/bushel/lib/link_graph.ml
+317
stack/bushel/lib/link_graph.ml
···
···+Fmt.pf ppf "@[<v>Internal links: %d@,External links: %d@,Entries with outbound: %d@,Entries with backlinks: %d@]"
+781
stack/bushel/lib/md.ml
+781
stack/bushel/lib/md.ml
···
···+let mapper = Mapper.make ~inline:(make_validation_mapper entries broken_slugs broken_contacts) () in+(* Scan body for publisher URLs (Elsevier, ScienceDirect, IEEE, Nature, ACM, Sage, UPenn, Springer, Taylor & Francis, OUP) and resolve from cache *)+let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|(?:www\\.)?sciencedirect\\.com/science/article|ieeexplore\\.ieee\\.org|academic\\.oup\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in
+73
stack/bushel/lib/md.mli
+73
stack/bushel/lib/md.mli
···
···+val note_references : Entry.t -> Contact.t -> Note.t -> (string * string * reference_source) list
+230
stack/bushel/lib/note.ml
+230
stack/bushel/lib/note.ml
···
···+{ title; draft; date; slug; synopsis; titleimage; index_page; perma; doi; body; via; updated; tags; sidebar; slug_ent; source; url; author; category }+[("name", string "type"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];+[("name", string "status"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];+[("name", string "source"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];+[("name", string "category"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
+49
stack/bushel/lib/note.mli
+49
stack/bushel/lib/note.mli
···
···
+373
stack/bushel/lib/paper.ml
+373
stack/bushel/lib/paper.ml
···
···+let journal = try key paper "journal" |> J.get_string |> String.lowercase_ascii with _ -> "" in+let booktitle = try key paper "booktitle" |> J.get_string |> String.lowercase_ascii with _ -> "" in+let title_str = try key paper "title" |> J.get_string |> String.lowercase_ascii with _ -> "" in+if contains_any journal ["arxiv"] || contains_any booktitle ["arxiv"] || bibtype_lower = "misc" || bibtype_lower = "techreport"+Re.replace_string (Re.compile (Re.seq [Re.char '\n'; Re.char '\n'; Re.rep1 (Re.char '\n')])) ~by:"\n\n" trimmed_abs+pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Authors" (list ~sep:comma string) (authors p);
+55
stack/bushel/lib/paper.mli
+55
stack/bushel/lib/paper.mli
···
···
+100
stack/bushel/lib/project.ml
+100
stack/bushel/lib/project.ml
···
···+[("name", string "languages"); ("type", string "string[]"); ("facet", bool true); ("optional", bool true)];+[("name", string "license"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];+[("name", string "status"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
+21
stack/bushel/lib/project.mli
+21
stack/bushel/lib/project.mli
···
···
+44
stack/bushel/lib/srcsetter.ml
+44
stack/bushel/lib/srcsetter.ml
···
···
+21
stack/bushel/lib/srcsetter.mli
+21
stack/bushel/lib/srcsetter.mli
···
···
+522
stack/bushel/lib/typesense.ml
+522
stack/bushel/lib/typesense.ml
······+let abstract = Md.markdown_to_plaintext entries (Paper.abstract paper) |> truncate_for_embedding in+let description = Md.markdown_to_plaintext entries (Project.body project) |> truncate_for_embedding in+let description = Md.markdown_to_plaintext entries (Video.body video) |> truncate_for_embedding in+let description = Md.markdown_to_plaintext entries (Idea.body idea) |> truncate_for_embedding in+("contacts", add_embedding_field_to_schema Contact.typesense_schema config ["name"; "names"], (List.map contact_to_document contacts : Ezjsonm.value list));+("papers", add_embedding_field_to_schema Paper.typesense_schema config ["title"; "abstract"; "authors"], (List.map (paper_to_document entries) papers : Ezjsonm.value list));+("videos", add_embedding_field_to_schema Video.typesense_schema config ["title"; "description"], (List.map (video_to_document entries) videos : Ezjsonm.value list));+("projects", add_embedding_field_to_schema Project.typesense_schema config ["title"; "description"; "tags"], (List.map (project_to_document entries) projects : Ezjsonm.value list));+("notes", add_embedding_field_to_schema Note.typesense_schema config ["title"; "content"; "tags"], (List.map (note_to_document entries) notes : Ezjsonm.value list));+("ideas", add_embedding_field_to_schema Idea.typesense_schema config ["title"; "description"; "tags"], (List.map (idea_to_document entries) ideas : Ezjsonm.value list));+let upload_collection ((name, schema, documents) : string * Ezjsonm.value * Ezjsonm.value list) =+let* () = Lwt_io.write Lwt_io.stdout (Fmt.str "Creating collection %s with %d documents\n" name (List.length documents)) in+if String.contains line ':' && Str.string_match (Str.regexp ".*success.*true.*") line 0 then acc + 1 else acc) 0 lines in+if String.contains line ':' && Str.string_match (Str.regexp ".*success.*false.*") line 0 then acc + 1 else acc) 0 lines in+let* () = Lwt_io.write Lwt_io.stdout (Fmt.str "Upload results for %s: %d successful, %d failed out of %d total\n"+let failed_lines = List.filter (fun line -> Str.string_match (Str.regexp ".*success.*false.*") line 0) lines in+let* result = Typesense_client.search_collection client_config collection_name query ~limit ~offset () in+| Error (Typesense_client.Http_error (code, msg)) -> Lwt.return_error (Http_error (code, msg))+let combined_response = Typesense_client.combine_multisearch_results multisearch_resp ~limit ~offset () in+| Error (Typesense_client.Http_error (code, msg)) -> Lwt.return_error (Http_error (code, msg))+| Error (Typesense_client.Http_error (code, msg)) -> Lwt.return_error (Http_error (code, msg))+| Error (Typesense_client.Http_error (code, msg)) -> Lwt.return_error (Http_error (code, msg))+let combine_multisearch_results (multisearch_resp : multisearch_response) ?(limit=10) ?(offset=0) () =
+107
stack/bushel/lib/typesense.mli
+107
stack/bushel/lib/typesense.mli
······env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
······env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->+val search_collection : config -> string -> string -> ?limit:int -> ?offset:int -> unit -> (search_response, error) result Lwt.t+val search_all : config -> string -> ?limit:int -> ?offset:int -> unit -> (search_response, error) result Lwt.t+val multisearch : config -> string -> ?limit:int -> unit -> (multisearch_response, error) result Lwt.t+val combine_multisearch_results : multisearch_response -> ?limit:int -> ?offset:int -> unit -> search_response
+80
stack/bushel/lib/util.ml
+80
stack/bushel/lib/util.ml
···
···
+166
stack/bushel/lib/video.ml
+166
stack/bushel/lib/video.ml
···
···+[("name", string "channel"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];+[("name", string "platform"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
+32
stack/bushel/lib/video.mli
+32
stack/bushel/lib/video.mli
···
···