My agentic slop goes here. Not intended for anyone else!

more

-33
stack/bushel/bin/bushel_main.ml
···
(* Import actual command implementations from submodules *)
-
(* Faces command *)
-
let faces_cmd =
-
let doc = "Retrieve face thumbnails from Immich photo service" in
-
let info = Cmd.info "faces" ~version ~doc in
-
Cmd.v info Bushel_faces.term
-
-
(* Links command - uses group structure *)
-
let links_cmd = Bushel_links.cmd
-
(* Obsidian command *)
let obsidian_cmd =
let doc = "Convert Bushel entries to Obsidian format" in
let info = Cmd.info "obsidian" ~version ~doc in
Cmd.v info Bushel_obsidian.term
-
(* Paper command *)
-
let paper_cmd =
-
let doc = "Fetch paper metadata from DOI" in
-
let info = Cmd.info "paper" ~version ~doc in
-
Cmd.v info Bushel_paper.term
-
(* Paper classify command *)
let paper_classify_cmd = Bushel_paper_classify.cmd
···
let info = Cmd.info "thumbs" ~version ~doc in
Cmd.v info Bushel_thumbs.term
-
(* Video command *)
-
let video_cmd =
-
let doc = "Fetch videos from PeerTube instances" in
-
let info = Cmd.info "video" ~version ~doc in
-
Cmd.v info Bushel_video.term
-
-
(* Video thumbs command *)
-
let video_thumbs_cmd = Bushel_video_thumbs.cmd
-
(* Query command *)
let query_cmd =
let doc = "Query Bushel collections using multisearch" in
···
(* Note DOI command *)
let note_doi_cmd = Bushel_note_doi.cmd
-
(* DOI resolve command *)
-
let doi_cmd = Bushel_doi.cmd
-
(* Main command *)
let bushel_cmd =
let doc = "Bushel content management toolkit" in
···
let info = Cmd.info "bushel" ~version ~doc ~sdocs ~man in
Cmd.group info [
bibtex_cmd;
-
doi_cmd;
-
faces_cmd;
ideas_cmd;
info_cmd;
-
links_cmd;
missing_cmd;
note_doi_cmd;
obsidian_cmd;
-
paper_cmd;
paper_classify_cmd;
paper_tex_cmd;
query_cmd;
thumbs_cmd;
-
video_cmd;
-
video_thumbs_cmd;
]
let () = exit (Cmd.eval' bushel_cmd)
+32 -52
stack/bushel/bin/bushel_search.ml
···
open Cmdliner
-
open Lwt.Syntax
-
-
(** TODO:claude Bushel search command for integration with main CLI *)
-
let endpoint =
-
let doc = "Typesense server endpoint URL" in
-
Arg.(value & opt string "" & info ["endpoint"; "e"] ~doc)
-
-
let api_key =
-
let doc = "Typesense API key for authentication" in
-
Arg.(value & opt string "" & info ["api-key"; "k"] ~doc)
-
+
(** Bushel search command for integration with main CLI *)
let limit =
let doc = "Maximum number of results to return" in
···
let doc = "Search query text" in
Arg.(required & pos 0 (some string) None & info [] ~docv:"QUERY" ~doc)
-
(** TODO:claude Search function using multisearch *)
-
let search endpoint api_key query_text limit offset =
-
let base_config = Bushel.Typesense.load_config_from_files () in
-
let config = {
-
Bushel.Typesense.endpoint = if endpoint = "" then base_config.endpoint else endpoint;
-
api_key = if api_key = "" then base_config.api_key else api_key;
-
openai_key = base_config.openai_key;
-
} in
-
+
(** Search function using multisearch *)
+
let search query_text limit offset () =
+
let config = Bushel.Typesense.load_config_from_files () in
+
if config.api_key = "" then (
-
Printf.eprintf "Error: API key is required. Use --api-key, set TYPESENSE_API_KEY environment variable, or create .typesense-key file.\n";
-
exit 1
-
);
-
-
Printf.printf "Searching Typesense at %s\n" config.endpoint;
-
Printf.printf "Query: \"%s\"\n" query_text;
-
Printf.printf "Limit: %d, Offset: %d\n" limit offset;
-
Printf.printf "\n";
-
-
Lwt_main.run (
-
Lwt.catch (fun () ->
-
let* result = Bushel.Typesense.multisearch config query_text ~limit:50 () in
-
match result with
-
| Ok multisearch_resp ->
-
let combined_response = Bushel.Typesense.combine_multisearch_results multisearch_resp ~limit ~offset () in
-
Printf.printf "Found %d results (%.2fms)\n\n" combined_response.total combined_response.query_time;
-
-
List.iteri (fun i (hit : Bushel.Typesense.search_result) ->
-
Printf.printf "%d. %s (score: %.2f)\n" (i + 1) (Bushel.Typesense.pp_search_result_oneline hit) hit.Bushel.Typesense.score
-
) combined_response.hits;
-
Lwt.return_unit
-
| Error err ->
-
Format.eprintf "Search error: %a\n" Bushel.Typesense.pp_error err;
-
exit 1
-
) (fun exn ->
-
Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
-
exit 1
-
)
-
);
-
0
+
Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";
+
1
+
) else (
+
Printf.printf "Searching Typesense at %s\n" config.endpoint;
+
Printf.printf "Query: \"%s\"\n" query_text;
+
Printf.printf "Limit: %d, Offset: %d\n\n" limit offset;
-
(** TODO:claude Command line term *)
-
let term = Term.(const search $ endpoint $ api_key $ query_text $ limit $ offset)
+
Eio_main.run (fun env ->
+
Eio.Switch.run (fun sw ->
+
let result = Bushel.Typesense.multisearch ~sw ~env config query_text ~limit:50 () in
+
match result with
+
| Ok multisearch_resp ->
+
let combined_response = Bushel.Typesense.combine_multisearch_results multisearch_resp ~limit ~offset () in
+
Printf.printf "Found %d results (%.2fms)\n\n" combined_response.total combined_response.query_time;
+
+
List.iteri (fun i (hit : Bushel.Typesense.search_result) ->
+
Printf.printf "%d. %s (score: %.2f)\n" (i + 1) (Bushel.Typesense.pp_search_result_oneline hit) hit.Bushel.Typesense.score
+
) combined_response.hits
+
| Error err ->
+
Format.eprintf "Search error: %a\n" Bushel.Typesense.pp_error err;
+
exit 1
+
)
+
);
+
0
+
)
+
+
(** Command line term *)
+
let term = Term.(const search $ query_text $ limit $ offset $ Bushel_common.setup_term)
+93 -153
stack/bushel/bin/bushel_typesense.ml
···
open Cmdliner
-
open Lwt.Syntax
-
(** TODO:claude Bushel Typesense binary with upload and query functionality *)
-
-
let endpoint =
-
let doc = "Typesense server endpoint URL" in
-
Arg.(value & opt string "http://localhost:8108" & info ["endpoint"; "e"] ~doc)
-
-
let api_key =
-
let doc = "Typesense API key for authentication" in
-
Arg.(value & opt string "" & info ["api-key"; "k"] ~doc)
-
-
let openai_key =
-
let doc = "OpenAI API key for embeddings" in
-
Arg.(value & opt string "" & info ["openai-key"; "oa"] ~doc)
+
(** Bushel Typesense binary with upload and query functionality *)
let data_dir =
let doc = "Directory containing bushel data files" in
Arg.(value & opt string "." & info ["data-dir"; "d"] ~doc)
-
(** TODO:claude Main upload function *)
-
let upload endpoint api_key openai_key data_dir =
-
if api_key = "" then (
-
Printf.eprintf "Error: API key is required. Use --api-key or set TYPESENSE_API_KEY environment variable.\n";
-
exit 1
-
);
+
(** Main upload function *)
+
let upload env _xdg _profile data_dir openai_key =
+
let config = Bushel.Typesense.load_config_from_files () in
-
if openai_key = "" then (
-
Printf.eprintf "Error: OpenAI API key is required for embeddings. Use --openai-key or set OPENAI_API_KEY environment variable.\n";
-
exit 1
-
);
-
-
let config = Bushel.Typesense.{ endpoint; api_key; openai_key } in
+
let config = { config with
+
openai_key = if openai_key = "" then config.openai_key else openai_key
+
} in
-
Printf.printf "Loading bushel data from %s\n" data_dir;
-
let entries = Bushel.load data_dir in
+
if config.api_key = "" then (
+
Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";
+
1
+
) else if config.openai_key = "" then (
+
Printf.eprintf "Error: OpenAI API key is required for embeddings. Use OPENAI_API_KEY environment variable or create .openrouter-api file.\n";
+
1
+
) else (
+
Printf.printf "Loading bushel data from %s\n%!" data_dir;
+
let entries = Bushel.load data_dir in
-
Printf.printf "Uploading bushel data to Typesense at %s\n" endpoint;
+
Printf.printf "Uploading bushel data to Typesense at %s\n%!" config.endpoint;
-
Lwt_main.run (
-
Lwt.catch (fun () ->
-
Bushel.Typesense.upload_all config entries
-
) (fun exn ->
-
Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
-
exit 1
-
)
+
Eio.Switch.run (fun sw ->
+
Bushel.Typesense.upload_all ~sw ~env config entries
+
);
+
0
)
-
(** TODO:claude Query function *)
-
let query endpoint api_key query_text collection limit offset =
-
let base_config = Bushel.Typesense.load_config_from_files () in
-
let config = {
-
Bushel.Typesense.endpoint = if endpoint = "" then base_config.endpoint else endpoint;
-
api_key = if api_key = "" then base_config.api_key else api_key;
-
openai_key = base_config.openai_key;
-
} in
-
+
(** Query function *)
+
let query env _xdg _profile query_text collection limit offset =
+
let config = Bushel.Typesense.load_config_from_files () in
+
if config.api_key = "" then (
-
Printf.eprintf "Error: API key is required. Use --api-key or set TYPESENSE_API_KEY environment variable.\n";
-
exit 1
-
);
-
-
Printf.printf "Searching Typesense at %s\n" config.endpoint;
-
Printf.printf "Query: \"%s\"\n" query_text;
-
if collection <> "" then Printf.printf "Collection: %s\n" collection;
-
Printf.printf "Limit: %d, Offset: %d\n" limit offset;
-
Printf.printf "\n";
-
-
Lwt_main.run (
-
Lwt.catch (fun () ->
-
let search_fn = if collection = "" then
-
Bushel.Typesense.search_all config query_text ~limit ~offset
+
Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";
+
1
+
) else (
+
Printf.printf "Searching Typesense at %s\n%!" config.endpoint;
+
Printf.printf "Query: \"%s\"\n%!" query_text;
+
if collection <> "" then Printf.printf "Collection: %s\n%!" collection;
+
Printf.printf "Limit: %d, Offset: %d\n\n%!" limit offset;
+
+
Eio.Switch.run (fun sw ->
+
let search_fn = if collection = "" then
+
Bushel.Typesense.search_all ~sw ~env config query_text ~limit ~offset
else
-
Bushel.Typesense.search_collection config collection query_text ~limit ~offset
+
Bushel.Typesense.search_collection ~sw ~env config collection query_text ~limit ~offset
in
-
let* result = search_fn () in
+
let result = search_fn () in
match result with
| Ok response ->
-
Printf.printf "Found %d results (%.2fms)\n\n" response.total response.query_time;
+
Printf.printf "Found %d results (%.2fms)\n\n%!" response.total response.query_time;
List.iteri (fun i (hit : Bushel.Typesense.search_result) ->
-
Printf.printf "%d. [%s] %s (score: %.2f)\n" (i + 1) hit.collection hit.title hit.score;
-
if hit.content <> "" then Printf.printf " %s\n" hit.content;
+
Printf.printf "%d. [%s] %s (score: %.2f)\n%!" (i + 1) hit.collection hit.title hit.score;
+
if hit.content <> "" then Printf.printf " %s\n%!" hit.content;
if hit.highlights <> [] then (
-
Printf.printf " Highlights:\n";
+
Printf.printf " Highlights:\n%!";
List.iter (fun (field, snippets) ->
List.iter (fun snippet ->
-
Printf.printf " %s: %s\n" field snippet
+
Printf.printf " %s: %s\n%!" field snippet
) snippets
) hit.highlights
);
-
Printf.printf "\n"
-
) response.hits;
-
Lwt.return_unit
+
Printf.printf "\n%!"
+
) response.hits
| Error err ->
-
Format.eprintf "Search error: %a\n" Bushel.Typesense.pp_error err;
+
Format.eprintf "Search error: %a\n%!" Bushel.Typesense.pp_error err;
exit 1
-
) (fun exn ->
-
Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
-
exit 1
-
)
+
);
+
0
)
-
(** TODO:claude List collections function *)
-
let list endpoint api_key =
-
let base_config = Bushel.Typesense.load_config_from_files () in
-
let config = {
-
Bushel.Typesense.endpoint = if endpoint = "" then base_config.endpoint else endpoint;
-
api_key = if api_key = "" then base_config.api_key else api_key;
-
openai_key = base_config.openai_key;
-
} in
-
+
(** List collections function *)
+
let list env _xdg _profile =
+
let config = Bushel.Typesense.load_config_from_files () in
+
if config.api_key = "" then (
-
Printf.eprintf "Error: API key is required. Use --api-key or set TYPESENSE_API_KEY environment variable.\n";
-
exit 1
-
);
-
-
Printf.printf "Listing collections at %s\n\n" config.endpoint;
-
-
Lwt_main.run (
-
Lwt.catch (fun () ->
-
let* result = Bushel.Typesense.list_collections config in
+
Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";
+
1
+
) else (
+
Printf.printf "Listing collections at %s\n\n%!" config.endpoint;
+
+
Eio.Switch.run (fun sw ->
+
let result = Bushel.Typesense.list_collections ~sw ~env config in
match result with
| Ok collections ->
-
Printf.printf "Collections:\n";
+
Printf.printf "Collections:\n%!";
List.iter (fun (name, count) ->
-
Printf.printf " %s (%d documents)\n" name count
-
) collections;
-
Lwt.return_unit
+
Printf.printf " %s (%d documents)\n%!" name count
+
) collections
| Error err ->
-
Format.eprintf "List error: %a\n" Bushel.Typesense.pp_error err;
+
Format.eprintf "List error: %a\n%!" Bushel.Typesense.pp_error err;
exit 1
-
) (fun exn ->
-
Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
-
exit 1
-
)
+
);
+
0
)
-
(** TODO:claude Command line arguments for query *)
+
(** Command line arguments *)
+
let openai_key =
+
let doc = "OpenAI API key for embeddings" in
+
Arg.(value & opt string "" & info ["openai-key"; "oa"] ~doc)
+
let query_text =
let doc = "Search query text" in
Arg.(required & pos 0 (some string) None & info [] ~docv:"QUERY" ~doc)
let collection =
-
let doc = "Specific collection to search (contacts, papers, projects, news, videos, notes, ideas)" in
+
let doc = "Specific collection to search (contacts, papers, projects, notes, videos, ideas)" in
Arg.(value & opt string "" & info ["collection"; "c"] ~doc)
let limit =
···
let doc = "Number of results to skip (for pagination)" in
Arg.(value & opt int 0 & info ["offset"; "o"] ~doc)
-
(** TODO:claude Query command *)
+
(** Query command - uses direct Eio_main.run instead of eiocmd for simplicity *)
let query_cmd =
let doc = "Search bushel collections in Typesense" in
let man = [
`S Manpage.s_description;
`P "Search across all or specific bushel collections in Typesense.";
-
`P "The API key can be provided via --api-key flag or TYPESENSE_API_KEY environment variable.";
-
`P "If .typesense-url and .typesense-api files exist, they will be used for configuration.";
+
`P "The API key can be read from .typesense-key file or TYPESENSE_API_KEY environment variable.";
`S Manpage.s_examples;
`P "Search all collections:";
`Pre " bushel-typesense query \"machine learning\"";
···
`Pre " bushel-typesense query \"AI\" --limit 5 --offset 10";
] in
let info = Cmd.info "query" ~doc ~man in
-
Cmd.v info Term.(const query $ endpoint $ api_key $ query_text $ collection $ limit $ offset)
+
let run query_text collection limit offset () =
+
Eio_main.run (fun env -> query env () () query_text collection limit offset)
+
in
+
Cmd.v info Term.(const run $ query_text $ collection $ limit $ offset $ Bushel_common.setup_term)
-
(** TODO:claude List command *)
+
(** List command *)
let list_cmd =
let doc = "List all collections in Typesense" in
let man = [
···
`P "List all available collections and their document counts.";
] in
let info = Cmd.info "list" ~doc ~man in
-
Cmd.v info Term.(const list $ endpoint $ api_key)
+
let run () =
+
Eio_main.run (fun env -> list env () ())
+
in
+
Cmd.v info Term.(const run $ Bushel_common.setup_term)
-
(** TODO:claude Updated upload command *)
+
(** Upload command *)
let upload_cmd =
let doc = "Upload bushel collections to Typesense search engine" in
let man = [
`S Manpage.s_description;
-
`P "Upload all bushel object types (contacts, papers, projects, news, videos, notes, ideas) to a Typesense search engine instance.";
-
`P "The API key can be provided via --api-key flag or TYPESENSE_API_KEY environment variable.";
+
`P "Upload all bushel object types (contacts, papers, projects, notes, videos, ideas) to a Typesense search engine instance.";
+
`P "The API keys can be read from files or environment variables.";
`S Manpage.s_examples;
-
`P "Upload to local Typesense instance:";
-
`Pre " bushel-typesense upload --api-key xyz123 --openai-key sk-abc... --data-dir /path/to/data";
-
`P "Upload to remote Typesense instance:";
-
`Pre " bushel-typesense upload --endpoint https://search.example.com --api-key xyz123 --openai-key sk-abc...";
+
`P "Upload to Typesense instance:";
+
`Pre " bushel-typesense upload --data-dir /path/to/data";
] in
let info = Cmd.info "upload" ~doc ~man in
-
Cmd.v info Term.(const upload $ endpoint $ api_key $ openai_key $ data_dir)
+
let run data_dir openai_key () =
+
Eio_main.run (fun env -> upload env () () data_dir openai_key)
+
in
+
Cmd.v info Term.(const run $ data_dir $ openai_key $ Bushel_common.setup_term)
-
(** TODO:claude Main command group *)
+
(** Main command group *)
let main_cmd =
let doc = "Bushel Typesense client" in
let man = [
···
let info = Cmd.info "bushel-typesense" ~doc ~man in
Cmd.group info [upload_cmd; query_cmd; list_cmd]
-
let () =
-
(* Check for API keys in environment if not provided *)
-
let api_key_env = try Some (Sys.getenv "TYPESENSE_API_KEY") with Not_found -> None in
-
let openai_key_env = try Some (Sys.getenv "OPENAI_API_KEY") with Not_found -> None in
-
match api_key_env with
-
| Some key when key <> "" ->
-
(* Override the api_key argument with environment variable *)
-
let api_key = Arg.(value & opt string key & info ["api-key"; "k"] ~doc:"Typesense API key") in
-
let openai_key = match openai_key_env with
-
| Some oa_key when oa_key <> "" -> Arg.(value & opt string oa_key & info ["openai-key"; "oa"] ~doc:"OpenAI API key")
-
| _ -> openai_key
-
in
-
let upload_cmd =
-
let doc = "Upload bushel collections to Typesense search engine" in
-
let info = Cmd.info "upload" ~doc in
-
Cmd.v info Term.(const upload $ endpoint $ api_key $ openai_key $ data_dir)
-
in
-
let query_cmd =
-
let doc = "Search bushel collections in Typesense" in
-
let info = Cmd.info "query" ~doc in
-
Cmd.v info Term.(const query $ endpoint $ api_key $ query_text $ collection $ limit $ offset)
-
in
-
let list_cmd =
-
let doc = "List all collections in Typesense" in
-
let info = Cmd.info "list" ~doc in
-
Cmd.v info Term.(const list $ endpoint $ api_key)
-
in
-
let main_cmd =
-
let doc = "Bushel Typesense client" in
-
let info = Cmd.info "bushel-typesense" ~doc in
-
Cmd.group info [upload_cmd; query_cmd; list_cmd]
-
in
-
exit (Cmd.eval main_cmd)
-
| _ ->
-
exit (Cmd.eval main_cmd)
+
let () = exit (Cmd.eval' main_cmd)
+3 -3
stack/bushel/bin/dune
···
(name bushel_main)
(public_name bushel)
(package bushel)
-
(modules bushel_main bushel_bibtex bushel_doi bushel_ideas bushel_info bushel_missing bushel_note_doi bushel_obsidian bushel_paper bushel_paper_classify bushel_paper_tex bushel_video bushel_video_thumbs bushel_thumbs bushel_faces bushel_links bushel_search)
+
(modules bushel_main bushel_bibtex bushel_ideas bushel_info bushel_missing bushel_note_doi bushel_obsidian bushel_paper_classify bushel_paper_tex bushel_thumbs bushel_search)
(flags (:standard -w -69))
-
(libraries bushel bushel_common cmdliner cohttp-lwt-unix lwt.unix yaml ezjsonm zotero-translation peertube fmt fmt.cli fmt.tty logs logs.cli logs.fmt cmarkit karakeep uri unix ptime.clock.os crockford))
+
(libraries bushel bushel_common cmdliner eio eio_main yaml ezjsonm zotero-translation fmt fmt.cli fmt.tty logs logs.cli logs.fmt cmarkit uri unix ptime.clock.os crockford))
(executable
(name bushel_typesense)
···
(package bushel)
(modules bushel_typesense)
(flags (:standard -w -69))
-
(libraries bushel bushel_common cmdliner lwt.unix))
+
(libraries bushel bushel_common cmdliner eio eio_main eiocmd))
+9 -5
stack/bushel/bushel.opam
···
"bytesrw"
"jekyll-format"
"yaml"
-
"lwt"
-
"cohttp-lwt-unix"
+
"eio"
+
"eio_main"
+
"requests"
"fmt"
-
"peertube"
-
"karakeep"
-
"typesense-client"
+
"peertubee"
+
"karakeepe"
+
"typesense-cliente"
"cmdliner"
+
"eiocmd"
+
"xdge"
+
"keyeio"
"odoc" {with-doc}
]
build: [
+10 -43
stack/bushel/dune-project
···
bytesrw
jekyll-format
yaml
-
lwt
-
cohttp-lwt-unix
+
eio
+
eio_main
+
requests
fmt
-
peertube
-
karakeep
-
typesense-client
-
cmdliner))
-
-
(package
-
(name peertube)
-
(synopsis "PeerTube API client")
-
(description "Client for interacting with PeerTube instances")
-
(depends
-
(ocaml (>= "5.2.0"))
-
ezjsonm
-
lwt
-
cohttp-lwt-unix
-
ptime
-
fmt))
-
-
(package
-
(name karakeep)
-
(synopsis "Karakeep API client for Bushel")
-
(description "Karakeep API client to retrieve bookmarks from Karakeep instances")
-
(depends
-
(ocaml (>= "5.2.0"))
-
ezjsonm
-
lwt
-
cohttp-lwt-unix
-
ptime
-
fmt))
-
-
(package
-
(name typesense-client)
-
(synopsis "Standalone Typesense client for OCaml")
-
(description "A standalone Typesense client that can be compiled to JavaScript")
-
(depends
-
(ocaml (>= "5.2.0"))
-
ezjsonm
-
lwt
-
cohttp-lwt-unix
-
ptime
-
fmt
-
uri))
+
peertubee
+
karakeepe
+
typesense-cliente
+
cmdliner
+
eiocmd
+
xdge
+
keyeio))
+3 -3
stack/bushel/lib/dune
···
ptime
yaml.unix
jekyll-format
-
lwt
-
cohttp-lwt-unix
+
eio
+
requests
fmt
re
ptime.clock
ptime.clock.os
-
typesense-client))
+
typesense-cliente))
+30 -552
stack/bushel/lib/typesense.ml
···
-
<<<<<<< HEAD
-
(** TODO:claude Typesense API client for Bushel *)
+
(** Typesense API client for Bushel *)
type config = {
endpoint : string;
···
| Json_error msg -> Fmt.pf fmt "JSON error: %s" msg
| Connection_error msg -> Fmt.pf fmt "Connection error: %s" msg
-
(** TODO:claude Create authentication headers for Typesense API *)
+
(** Create authentication headers for Typesense API *)
let auth_headers api_key =
Requests.Headers.empty
|> Requests.Headers.set "X-TYPESENSE-API-KEY" api_key
|> Requests.Headers.set "Content-Type" "application/json"
-
(** TODO:claude Make HTTP request to Typesense API *)
+
(** Make HTTP request to Typesense API *)
let make_request ~sw ~env ?(meth=`GET) ?(body="") config path =
let uri = Uri.of_string (config.endpoint ^ path) in
let headers = auth_headers config.api_key in
···
with exn ->
Error (Connection_error (Printexc.to_string exn))
-
(** TODO:claude Create a collection with given schema *)
+
(** Create a collection with given schema *)
let create_collection ~sw ~env config (schema : Ezjsonm.value) =
let body = Ezjsonm.value_to_string schema in
make_request ~sw ~env ~meth:`POST ~body config "/collections"
-
(** TODO:claude Check if collection exists *)
+
(** Check if collection exists *)
let collection_exists ~sw ~env config name =
let result = make_request ~sw ~env config ("/collections/" ^ name) in
match result with
···
| Error (Http_error (404, _)) -> false
| Error _ -> false
-
(** TODO:claude Delete a collection *)
+
(** Delete a collection *)
let delete_collection ~sw ~env config name =
make_request ~sw ~env ~meth:`DELETE config ("/collections/" ^ name)
-
(** TODO:claude Upload documents to a collection in batch *)
+
(** Upload documents to a collection in batch *)
let upload_documents ~sw ~env config collection_name (documents : Ezjsonm.value list) =
let jsonl_lines = List.map (fun doc -> Ezjsonm.value_to_string doc) documents in
let body = String.concat "\n" jsonl_lines in
···
(Printf.sprintf "/collections/%s/documents/import?action=upsert" collection_name)
-
(** TODO:claude Convert Bushel objects to Typesense documents *)
+
(** Convert Bushel objects to Typesense documents *)
-
(** TODO:claude Helper function to truncate long strings for embedding *)
+
(** Helper function to truncate long strings for embedding *)
let truncate_for_embedding ?(max_chars=20000) text =
if String.length text <= max_chars then text
else String.sub text 0 max_chars
-
(** TODO:claude Helper function to convert Ptime to Unix timestamp *)
+
(** Helper function to convert Ptime to Unix timestamp *)
let ptime_to_timestamp ptime =
let span = Ptime.to_span ptime in
let seconds = Ptime.Span.to_int_s span in
···
| Some s -> Int64.of_int s
| None -> 0L
-
(** TODO:claude Helper function to convert date tuple to Unix timestamp *)
+
(** Helper function to convert date tuple to Unix timestamp *)
let date_to_timestamp (year, month, day) =
match Ptime.of_date (year, month, day) with
| Some ptime -> ptime_to_timestamp ptime
···
("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
]
-
(** TODO:claude Helper function to add embedding field to schema *)
+
(** Helper function to add embedding field to schema *)
let add_embedding_field_to_schema schema config embedding_from_fields =
let open Ezjsonm in
let fields = get_dict schema |> List.assoc "fields" |> get_list (fun f -> f) in
···
]);
] in
let updated_fields = fields @ [embedding_field] in
-
let updated_schema =
+
let updated_schema =
List.map (fun (k, v) ->
if k = "fields" then (k, list (fun f -> f) updated_fields)
else (k, v)
···
in
dict updated_schema
-
(** TODO:claude Upload all bushel objects to their respective collections *)
+
(** Upload all bushel objects to their respective collections *)
let upload_all ~sw ~env config entries =
print_string "Uploading bushel data to Typesense\n";
···
List.iter upload_collection collections
-
(** TODO:claude Re-export search types from Typesense_cliente *)
+
(** Re-export search types from Typesense_cliente *)
type search_result = Typesense_cliente.search_result = {
id: string;
title: string;
···
query_time: float;
}
-
(** TODO:claude Convert bushel config to client config *)
+
(** Convert bushel config to client config *)
let to_client_config (config : config) =
Typesense_cliente.{ endpoint = config.endpoint; api_key = config.api_key }
-
(** TODO:claude Search a single collection *)
+
(** Search a single collection *)
let search_collection ~sw ~env (config : config) collection_name query ?(limit=10) ?(offset=0) () =
let client_config = to_client_config config in
let requests_session = Requests.create ~sw env in
···
| Error (Typesense_cliente.Json_error msg) -> Error (Json_error msg)
| Error (Typesense_cliente.Connection_error msg) -> Error (Connection_error msg)
-
(** TODO:claude Search across all collections - use client multisearch *)
+
(** Search across all collections - use client multisearch *)
let search_all ~sw ~env (config : config) query ?(limit=10) ?(offset=0) () =
let client_config = to_client_config config in
let requests_session = Requests.create ~sw env in
···
| Error (Typesense_cliente.Json_error msg) -> Error (Json_error msg)
| Error (Typesense_cliente.Connection_error msg) -> Error (Connection_error msg)
-
(** TODO:claude List all collections *)
+
(** List all collections *)
let list_collections ~sw ~env (config : config) =
let client_config = to_client_config config in
let requests_session = Requests.create ~sw env in
···
| Error (Typesense_cliente.Json_error msg) -> Error (Json_error msg)
| Error (Typesense_cliente.Connection_error msg) -> Error (Connection_error msg)
-
(** TODO:claude Re-export multisearch types from Typesense_cliente *)
+
(** Re-export multisearch types from Typesense_cliente *)
type multisearch_response = Typesense_cliente.multisearch_response = {
results: search_response list;
}
-
(** TODO:claude Perform multisearch across all collections *)
+
(** Perform multisearch across all collections *)
let multisearch ~sw ~env (config : config) query ?(limit=10) () =
let client_config = to_client_config config in
let requests_session = Requests.create ~sw env in
···
| Error (Typesense_cliente.Json_error msg) -> Error (Json_error msg)
| Error (Typesense_cliente.Connection_error msg) -> Error (Connection_error msg)
-
(** TODO:claude Combine multisearch results into single result set *)
+
(** Combine multisearch results into single result set *)
let combine_multisearch_results (multisearch_resp : multisearch_response) ?(limit=10) ?(offset=0) () =
Typesense_cliente.combine_multisearch_results multisearch_resp ~limit ~offset ()
-
(** TODO:claude Load configuration from files *)
+
(** Load configuration from files *)
let load_config_from_files () =
let read_file_if_exists filename =
if Sys.file_exists filename then
···
Some (String.trim content)
else None
in
-
-
let endpoint = match read_file_if_exists ".typesense-url" with
-
| Some url -> url
-
| None -> "http://localhost:8108"
-
in
-
-
let api_key = match read_file_if_exists ".typesense-key" with
-
| Some key -> key
-
| None ->
-
try Sys.getenv "TYPESENSE_API_KEY"
-
with Not_found -> ""
-
in
-
-
let openai_key = match read_file_if_exists ".openrouter-api" with
-
| Some key -> key
-
| None ->
-
try Sys.getenv "OPENAI_API_KEY"
-
with Not_found -> ""
-
in
-
-
{ endpoint; api_key; openai_key }
-
(** TODO:claude Re-export pretty printer from Typesense_cliente *)
-
let pp_search_result_oneline = Typesense_cliente.pp_search_result_oneline
-
||||||| parent of 40c9549 (bushel)
-
=======
-
open Lwt.Syntax
-
open Cohttp_lwt_unix
-
-
(** TODO:claude Typesense API client for Bushel *)
-
-
type config = {
-
endpoint : string;
-
api_key : string;
-
openai_key : string;
-
}
-
-
type error =
-
| Http_error of int * string
-
| Json_error of string
-
| Connection_error of string
-
-
let pp_error fmt = function
-
| Http_error (code, msg) -> Fmt.pf fmt "HTTP %d: %s" code msg
-
| Json_error msg -> Fmt.pf fmt "JSON error: %s" msg
-
| Connection_error msg -> Fmt.pf fmt "Connection error: %s" msg
-
-
(** TODO:claude Create authentication headers for Typesense API *)
-
let auth_headers api_key =
-
Cohttp.Header.of_list [
-
("X-TYPESENSE-API-KEY", api_key);
-
("Content-Type", "application/json");
-
]
-
-
(** TODO:claude Make HTTP request to Typesense API *)
-
let make_request ?(meth=`GET) ?(body="") config path =
-
let uri = Uri.of_string (config.endpoint ^ path) in
-
let headers = auth_headers config.api_key in
-
let body = if body = "" then `Empty else `String body in
-
Lwt.catch (fun () ->
-
let* resp, body = Client.call ~headers ~body meth uri in
-
let status = Cohttp.Code.code_of_status (Response.status resp) in
-
let* body_str = Cohttp_lwt.Body.to_string body in
-
if status >= 200 && status < 300 then
-
Lwt.return_ok body_str
-
else
-
Lwt.return_error (Http_error (status, body_str))
-
) (fun exn ->
-
Lwt.return_error (Connection_error (Printexc.to_string exn))
-
)
-
-
(** TODO:claude Create a collection with given schema *)
-
let create_collection config (schema : Ezjsonm.value) =
-
let body = Ezjsonm.value_to_string schema in
-
make_request ~meth:`POST ~body config "/collections"
-
-
(** TODO:claude Check if collection exists *)
-
let collection_exists config name =
-
let* result = make_request config ("/collections/" ^ name) in
-
match result with
-
| Ok _ -> Lwt.return true
-
| Error (Http_error (404, _)) -> Lwt.return false
-
| Error _ -> Lwt.return false
-
-
(** TODO:claude Delete a collection *)
-
let delete_collection config name =
-
make_request ~meth:`DELETE config ("/collections/" ^ name)
-
-
(** TODO:claude Upload documents to a collection in batch *)
-
let upload_documents config collection_name (documents : Ezjsonm.value list) =
-
let jsonl_lines = List.map (fun doc -> Ezjsonm.value_to_string doc) documents in
-
let body = String.concat "\n" jsonl_lines in
-
make_request ~meth:`POST ~body config
-
(Printf.sprintf "/collections/%s/documents/import?action=upsert" collection_name)
-
-
-
(** TODO:claude Convert Bushel objects to Typesense documents *)
-
-
(** TODO:claude Helper function to truncate long strings for embedding *)
-
let truncate_for_embedding ?(max_chars=20000) text =
-
if String.length text <= max_chars then text
-
else String.sub text 0 max_chars
-
-
(** TODO:claude Helper function to convert Ptime to Unix timestamp *)
-
let ptime_to_timestamp ptime =
-
let span = Ptime.to_span ptime in
-
let seconds = Ptime.Span.to_int_s span in
-
match seconds with
-
| Some s -> Int64.of_int s
-
| None -> 0L
-
-
(** TODO:claude Helper function to convert date tuple to Unix timestamp *)
-
let date_to_timestamp (year, month, day) =
-
match Ptime.of_date (year, month, day) with
-
| Some ptime -> ptime_to_timestamp ptime
-
| None -> 0L
-
-
(** Resolve author handles to full names in a list *)
-
let resolve_author_list contacts authors =
-
List.map (fun author ->
-
(* Strip '@' prefix if present *)
-
let handle =
-
if String.length author > 0 && author.[0] = '@' then
-
String.sub author 1 (String.length author - 1)
-
else
-
author
-
in
-
(* Try to look up as a contact handle *)
-
match Contact.find_by_handle contacts handle with
-
| Some contact -> Contact.name contact
-
| None -> author (* Keep original if not found *)
-
) authors
-
-
let contact_to_document (contact : Contact.t) =
-
let open Ezjsonm in
-
let safe_string_list_from_opt = function
-
| Some s -> [s]
-
| None -> []
-
in
-
dict [
-
("id", string (Contact.handle contact));
-
("handle", string (Contact.handle contact));
-
("name", string (Contact.name contact));
-
("names", list string (Contact.names contact));
-
("email", list string (safe_string_list_from_opt (Contact.email contact)));
-
("icon", list string (safe_string_list_from_opt (Contact.icon contact)));
-
("github", list string (safe_string_list_from_opt (Contact.github contact)));
-
("twitter", list string (safe_string_list_from_opt (Contact.twitter contact)));
-
("url", list string (safe_string_list_from_opt (Contact.url contact)));
-
]
-
-
let paper_to_document entries (paper : Paper.t) =
-
let date_tuple = Paper.date paper in
-
let contacts = Entry.contacts entries in
-
-
(* Helper to extract string arrays from JSON, handling both single strings and arrays *)
-
let extract_string_array_from_json json_field_name =
-
try
-
(* Access the raw JSON from the paper record *)
-
let paper_json = Paper.raw_json paper in
-
let value = Ezjsonm.get_dict paper_json |> List.assoc json_field_name in
-
match value with
-
| `String s -> [s]
-
| `A l -> List.filter_map (function `String s -> Some s | _ -> None) l
-
| _ -> []
-
with _ -> []
-
in
-
-
(* Resolve author handles to full names *)
-
let authors = resolve_author_list contacts (Paper.authors paper) in
-
-
(* Convert abstract markdown to plain text *)
-
let abstract = Md.markdown_to_plaintext entries (Paper.abstract paper) |> truncate_for_embedding in
-
-
(* Extract publication metadata *)
-
let bibtype = Paper.bibtype paper in
-
let metadata =
-
try
-
match bibtype with
-
| "article" -> Printf.sprintf "Journal: %s" (Paper.journal paper)
-
| "inproceedings" -> Printf.sprintf "Proceedings: %s" (Paper.journal paper)
-
| "misc" | "techreport" -> Printf.sprintf "Preprint: %s" (Paper.journal paper)
-
| _ -> Printf.sprintf "%s: %s" (String.capitalize_ascii bibtype) (Paper.journal paper)
-
with _ -> bibtype
-
in
-
-
(* Get bibtex from raw JSON *)
-
let bibtex =
-
try
-
let paper_json = Paper.raw_json paper in
-
Ezjsonm.get_dict paper_json
-
|> List.assoc "bibtex"
-
|> Ezjsonm.get_string
-
with _ -> ""
-
in
-
-
let thumbnail_url = Entry.thumbnail entries (`Paper paper) in
-
Ezjsonm.dict [
-
("id", Ezjsonm.string (Paper.slug paper));
-
("title", Ezjsonm.string (Paper.title paper));
-
("authors", Ezjsonm.list Ezjsonm.string authors);
-
("abstract", Ezjsonm.string abstract);
-
("metadata", Ezjsonm.string metadata);
-
("bibtex", Ezjsonm.string bibtex);
-
("date", Ezjsonm.string (let y, m, d = date_tuple in Printf.sprintf "%04d-%02d-%02d" y m d));
-
("date_timestamp", Ezjsonm.int64 (date_to_timestamp date_tuple));
-
("tags", Ezjsonm.list Ezjsonm.string (Paper.tags paper));
-
("doi", Ezjsonm.list Ezjsonm.string (extract_string_array_from_json "doi"));
-
("pdf_url", Ezjsonm.list Ezjsonm.string (extract_string_array_from_json "pdf_url"));
-
("journal", Ezjsonm.list Ezjsonm.string (extract_string_array_from_json "journal"));
-
("related_projects", Ezjsonm.list Ezjsonm.string (Paper.project_slugs paper));
-
("thumbnail_url", Ezjsonm.string (Option.value ~default:"" thumbnail_url));
-
]
-
-
let project_to_document entries (project : Project.t) =
-
let open Ezjsonm in
-
(* Use January 1st of start year as the date for sorting *)
-
let date_timestamp = date_to_timestamp (project.start, 1, 1) in
-
-
(* Convert body markdown to plain text *)
-
let description = Md.markdown_to_plaintext entries (Project.body project) |> truncate_for_embedding in
-
-
let thumbnail_url = Entry.thumbnail entries (`Project project) in
-
dict [
-
("id", string project.slug);
-
("title", string (Project.title project));
-
("description", string description);
-
("start", int project.start);
-
("finish", option int project.finish);
-
("start_year", int project.start);
-
("date", string (Printf.sprintf "%04d-01-01" project.start));
-
("date_timestamp", int64 date_timestamp);
-
("tags", list string (Project.tags project));
-
("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
-
]
-
-
let video_to_document entries (video : Video.t) =
-
let open Ezjsonm in
-
let datetime = Video.datetime video in
-
let safe_string_list_from_opt = function
-
| Some s -> [s]
-
| None -> []
-
in
-
-
(* Convert body markdown to plain text *)
-
let description = Md.markdown_to_plaintext entries (Video.body video) |> truncate_for_embedding in
-
-
(* Resolve paper and project slugs to titles *)
-
let paper_title = match Video.paper video with
-
| Some slug ->
-
(match Entry.lookup entries slug with
-
| Some entry -> Some (Entry.title entry)
-
| None -> Some slug) (* Fallback to slug if not found *)
-
| None -> None
-
in
-
let project_title = match Video.project video with
-
| Some slug ->
-
(match Entry.lookup entries slug with
-
| Some entry -> Some (Entry.title entry)
-
| None -> Some slug) (* Fallback to slug if not found *)
-
| None -> None
-
in
-
-
let thumbnail_url = Entry.thumbnail entries (`Video video) in
-
dict [
-
("id", string (Video.slug video));
-
("title", string (Video.title video));
-
("description", string description);
-
("published_date", string (Ptime.to_rfc3339 datetime));
-
("date", string (Ptime.to_rfc3339 datetime));
-
("date_timestamp", int64 (ptime_to_timestamp datetime));
-
("url", string (Video.url video));
-
("uuid", string (Video.uuid video));
-
("is_talk", bool (Video.talk video));
-
("paper", list string (safe_string_list_from_opt paper_title));
-
("project", list string (safe_string_list_from_opt project_title));
-
("tags", list string video.tags);
-
("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
-
]
-
-
let note_to_document entries (note : Note.t) =
-
let open Ezjsonm in
-
let datetime = Note.datetime note in
-
let safe_string_list_from_opt = function
-
| Some s -> [s]
-
| None -> []
-
in
-
-
(* Convert body markdown to plain text *)
-
let content = Md.markdown_to_plaintext entries (Note.body note) |> truncate_for_embedding in
-
-
let thumbnail_url = Entry.thumbnail entries (`Note note) in
-
let word_count = Note.words note in
-
dict [
-
("id", string (Note.slug note));
-
("title", string (Note.title note));
-
("date", string (Ptime.to_rfc3339 datetime));
-
("date_timestamp", int64 (ptime_to_timestamp datetime));
-
("content", string content);
-
("tags", list string (Note.tags note));
-
("draft", bool (Note.draft note));
-
("synopsis", list string (safe_string_list_from_opt (Note.synopsis note)));
-
("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
-
("words", int word_count);
-
]
-
-
let idea_to_document entries (idea : Idea.t) =
-
let open Ezjsonm in
-
let contacts = Entry.contacts entries in
-
(* Use January 1st of the year as the date for sorting *)
-
let date_timestamp = date_to_timestamp (Idea.year idea, 1, 1) in
-
-
(* Convert body markdown to plain text *)
-
let description = Md.markdown_to_plaintext entries (Idea.body idea) |> truncate_for_embedding in
-
-
(* Resolve supervisor and student handles to full names *)
-
let supervisors = resolve_author_list contacts (Idea.supervisors idea) in
-
let students = resolve_author_list contacts (Idea.students idea) in
-
-
(* Resolve project slug to project title *)
-
let project_title =
-
match Entry.lookup entries (Idea.project idea) with
-
| Some entry -> Entry.title entry
-
| None -> Idea.project idea (* Fallback to slug if not found *)
-
in
-
-
let thumbnail_url = Entry.thumbnail entries (`Idea idea) in
-
dict [
-
("id", string idea.slug);
-
("title", string (Idea.title idea));
-
("description", string description);
-
("level", string (Idea.level_to_string (Idea.level idea)));
-
("project", string project_title);
-
("status", string (Idea.status_to_string (Idea.status idea)));
-
("year", int (Idea.year idea));
-
("date", string (Printf.sprintf "%04d-01-01" (Idea.year idea)));
-
("date_timestamp", int64 date_timestamp);
-
("supervisors", list string supervisors);
-
("students", list string students);
-
("tags", list string idea.tags);
-
("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
-
]
-
-
(** TODO:claude Helper function to add embedding field to schema *)
-
let add_embedding_field_to_schema schema config embedding_from_fields =
-
let open Ezjsonm in
-
let fields = get_dict schema |> List.assoc "fields" |> get_list (fun f -> f) in
-
let embedding_field = dict [
-
("name", string "embedding");
-
("type", string "float[]");
-
("embed", dict [
-
("from", list string embedding_from_fields);
-
("model_config", dict [
-
("model_name", string "openai/text-embedding-3-small");
-
("api_key", string config.openai_key);
-
]);
-
]);
-
] in
-
let updated_fields = fields @ [embedding_field] in
-
let updated_schema =
-
List.map (fun (k, v) ->
-
if k = "fields" then (k, list (fun f -> f) updated_fields)
-
else (k, v)
-
) (get_dict schema)
-
in
-
dict updated_schema
-
-
(** TODO:claude Upload all bushel objects to their respective collections *)
-
let upload_all config entries =
-
let* () = Lwt_io.write Lwt_io.stdout "Uploading bushel data to Typesense\n" in
-
-
let contacts = Entry.contacts entries in
-
let papers = Entry.papers entries in
-
let projects = Entry.projects entries in
-
let notes = Entry.notes entries in
-
let videos = Entry.videos entries in
-
let ideas = Entry.ideas entries in
-
-
let collections = [
-
("contacts", add_embedding_field_to_schema Contact.typesense_schema config ["name"; "names"], (List.map contact_to_document contacts : Ezjsonm.value list));
-
("papers", add_embedding_field_to_schema Paper.typesense_schema config ["title"; "abstract"; "authors"], (List.map (paper_to_document entries) papers : Ezjsonm.value list));
-
("videos", add_embedding_field_to_schema Video.typesense_schema config ["title"; "description"], (List.map (video_to_document entries) videos : Ezjsonm.value list));
-
("projects", add_embedding_field_to_schema Project.typesense_schema config ["title"; "description"; "tags"], (List.map (project_to_document entries) projects : Ezjsonm.value list));
-
("notes", add_embedding_field_to_schema Note.typesense_schema config ["title"; "content"; "tags"], (List.map (note_to_document entries) notes : Ezjsonm.value list));
-
("ideas", add_embedding_field_to_schema Idea.typesense_schema config ["title"; "description"; "tags"], (List.map (idea_to_document entries) ideas : Ezjsonm.value list));
-
] in
-
-
let upload_collection ((name, schema, documents) : string * Ezjsonm.value * Ezjsonm.value list) =
-
let* () = Lwt_io.write Lwt_io.stdout (Fmt.str "Processing collection: %s\n" name) in
-
let* exists = collection_exists config name in
-
let* () =
-
if exists then (
-
let* () = Lwt_io.write Lwt_io.stdout (Fmt.str "Collection %s exists, deleting...\n" name) in
-
let* result = delete_collection config name in
-
match result with
-
| Ok _ -> Lwt_io.write Lwt_io.stdout (Fmt.str "Deleted collection %s\n" name)
-
| Error err ->
-
let err_str = Fmt.str "%a" pp_error err in
-
Lwt_io.write Lwt_io.stdout (Fmt.str "Failed to delete collection %s: %s\n" name err_str)
-
) else
-
Lwt.return_unit
-
in
-
let* () = Lwt_io.write Lwt_io.stdout (Fmt.str "Creating collection %s with %d documents\n" name (List.length documents)) in
-
let* result = create_collection config schema in
-
match result with
-
| Ok _ ->
-
let* () = Lwt_io.write Lwt_io.stdout (Fmt.str "Created collection %s\n" name) in
-
if documents = [] then
-
Lwt_io.write Lwt_io.stdout (Fmt.str "No documents to upload for %s\n" name)
-
else (
-
let* result = upload_documents config name documents in
-
match result with
-
| Ok response ->
-
(* Count successes and failures *)
-
let lines = String.split_on_char '\n' response in
-
let successes = List.fold_left (fun acc line ->
-
if String.contains line ':' && Str.string_match (Str.regexp ".*success.*true.*") line 0 then acc + 1 else acc) 0 lines in
-
let failures = List.fold_left (fun acc line ->
-
if String.contains line ':' && Str.string_match (Str.regexp ".*success.*false.*") line 0 then acc + 1 else acc) 0 lines in
-
let* () = Lwt_io.write Lwt_io.stdout (Fmt.str "Upload results for %s: %d successful, %d failed out of %d total\n"
-
name successes failures (List.length documents)) in
-
if failures > 0 then
-
let* () = Lwt_io.write Lwt_io.stdout (Fmt.str "Failed documents in %s:\n" name) in
-
let failed_lines = List.filter (fun line -> Str.string_match (Str.regexp ".*success.*false.*") line 0) lines in
-
Lwt_list.iter_s (fun line -> Lwt_io.write Lwt_io.stdout (line ^ "\n")) failed_lines
-
else
-
Lwt.return_unit
-
| Error err ->
-
let err_str = Fmt.str "%a" pp_error err in
-
Lwt_io.write Lwt_io.stdout (Fmt.str "Failed to upload documents to %s: %s\n" name err_str)
-
)
-
| Error err ->
-
let err_str = Fmt.str "%a" pp_error err in
-
Lwt_io.write Lwt_io.stdout (Fmt.str "Failed to create collection %s: %s\n" name err_str)
-
in
-
-
Lwt_list.iter_s upload_collection collections
-
-
(** TODO:claude Re-export search types from Typesense_client *)
-
type search_result = Typesense_client.search_result = {
-
id: string;
-
title: string;
-
content: string;
-
score: float;
-
collection: string;
-
highlights: (string * string list) list;
-
document: Ezjsonm.value;
-
}
-
-
type search_response = Typesense_client.search_response = {
-
hits: search_result list;
-
total: int;
-
query_time: float;
-
}
-
-
(** TODO:claude Convert bushel config to client config *)
-
let to_client_config (config : config) =
-
Typesense_client.{ endpoint = config.endpoint; api_key = config.api_key }
-
-
(** TODO:claude Search a single collection *)
-
let search_collection (config : config) collection_name query ?(limit=10) ?(offset=0) () =
-
let client_config = to_client_config config in
-
let* result = Typesense_client.search_collection client_config collection_name query ~limit ~offset () in
-
match result with
-
| Ok response -> Lwt.return_ok response
-
| Error (Typesense_client.Http_error (code, msg)) -> Lwt.return_error (Http_error (code, msg))
-
| Error (Typesense_client.Json_error msg) -> Lwt.return_error (Json_error msg)
-
| Error (Typesense_client.Connection_error msg) -> Lwt.return_error (Connection_error msg)
-
-
(** TODO:claude Search across all collections - use client multisearch *)
-
let search_all (config : config) query ?(limit=10) ?(offset=0) () =
-
let client_config = to_client_config config in
-
let* result = Typesense_client.multisearch client_config query ~limit:50 () in
-
match result with
-
| Ok multisearch_resp ->
-
let combined_response = Typesense_client.combine_multisearch_results multisearch_resp ~limit ~offset () in
-
Lwt.return_ok combined_response
-
| Error (Typesense_client.Http_error (code, msg)) -> Lwt.return_error (Http_error (code, msg))
-
| Error (Typesense_client.Json_error msg) -> Lwt.return_error (Json_error msg)
-
| Error (Typesense_client.Connection_error msg) -> Lwt.return_error (Connection_error msg)
-
-
(** TODO:claude List all collections *)
-
let list_collections (config : config) =
-
let client_config = to_client_config config in
-
let* result = Typesense_client.list_collections client_config in
-
match result with
-
| Ok collections -> Lwt.return_ok collections
-
| Error (Typesense_client.Http_error (code, msg)) -> Lwt.return_error (Http_error (code, msg))
-
| Error (Typesense_client.Json_error msg) -> Lwt.return_error (Json_error msg)
-
| Error (Typesense_client.Connection_error msg) -> Lwt.return_error (Connection_error msg)
-
-
(** TODO:claude Re-export multisearch types from Typesense_client *)
-
type multisearch_response = Typesense_client.multisearch_response = {
-
results: search_response list;
-
}
-
-
(** TODO:claude Perform multisearch across all collections *)
-
let multisearch (config : config) query ?(limit=10) () =
-
let client_config = to_client_config config in
-
let* result = Typesense_client.multisearch client_config query ~limit () in
-
match result with
-
| Ok multisearch_resp -> Lwt.return_ok multisearch_resp
-
| Error (Typesense_client.Http_error (code, msg)) -> Lwt.return_error (Http_error (code, msg))
-
| Error (Typesense_client.Json_error msg) -> Lwt.return_error (Json_error msg)
-
| Error (Typesense_client.Connection_error msg) -> Lwt.return_error (Connection_error msg)
-
-
(** TODO:claude Combine multisearch results into single result set *)
-
let combine_multisearch_results (multisearch_resp : multisearch_response) ?(limit=10) ?(offset=0) () =
-
Typesense_client.combine_multisearch_results multisearch_resp ~limit ~offset ()
-
-
(** TODO:claude Load configuration from files *)
-
let load_config_from_files () =
-
let read_file_if_exists filename =
-
if Sys.file_exists filename then
-
let ic = open_in filename in
-
let content = really_input_string ic (in_channel_length ic) in
-
close_in ic;
-
Some (String.trim content)
-
else None
-
in
-
let endpoint = match read_file_if_exists ".typesense-url" with
| Some url -> url
| None -> "http://localhost:8108"
in
-
+
let api_key = match read_file_if_exists ".typesense-key" with
| Some key -> key
-
| None ->
+
| None ->
try Sys.getenv "TYPESENSE_API_KEY"
with Not_found -> ""
in
-
+
let openai_key = match read_file_if_exists ".openrouter-api" with
| Some key -> key
-
| None ->
+
| None ->
try Sys.getenv "OPENAI_API_KEY"
with Not_found -> ""
in
-
+
{ endpoint; api_key; openai_key }
-
(** TODO:claude Re-export pretty printer from Typesense_client *)
-
let pp_search_result_oneline = Typesense_client.pp_search_result_oneline
-
>>>>>>> 40c9549 (bushel)
+
(** Re-export pretty printer from Typesense_cliente *)
+
let pp_search_result_oneline = Typesense_cliente.pp_search_result_oneline
+11 -129
stack/bushel/lib/typesense.mli
···
(** Typesense API client for Bushel
-
<<<<<<< HEAD
This module provides an OCaml client for the Typesense search engine API.
It handles collection management and document indexing for all Bushel object
···
Eio.Switch.run (fun sw ->
Typesense.upload_all ~sw ~env config entries))
]}
-
-
TODO:claude *)
+
*)
(** Configuration for connecting to a Typesense server *)
type config = {
···
val pp_error : Format.formatter -> error -> unit
(** Create a collection with the given schema.
-
The schema should follow Typesense's collection schema format.
-
TODO:claude *)
+
The schema should follow Typesense's collection schema format. *)
val create_collection :
sw:Eio.Switch.t ->
env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
···
(string, error) result
(** Check if a collection exists by name.
-
Returns true if the collection exists, false otherwise.
-
TODO:claude *)
+
Returns true if the collection exists, false otherwise. *)
val collection_exists :
sw:Eio.Switch.t ->
env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
···
string ->
bool
-
(** Delete a collection by name.
-
TODO:claude *)
+
(** Delete a collection by name. *)
val delete_collection :
sw:Eio.Switch.t ->
env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
···
(string, error) result
(** Upload documents to a collection in batch using JSONL format.
-
More efficient than uploading documents one by one.
-
TODO:claude *)
+
More efficient than uploading documents one by one. *)
val upload_documents :
sw:Eio.Switch.t ->
env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
···
- Extract all bushel data types from the Entry.t
- Create or recreate collections for each type
- Upload all documents in batches
-
- Report progress to stdout
-
TODO:claude *)
+
- Report progress to stdout *)
val upload_all :
sw:Eio.Switch.t ->
env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
···
query_time: float; (** Query execution time in milliseconds *)
}
-
(** Search a specific collection.
-
TODO:claude *)
+
(** Search a specific collection. *)
val search_collection :
sw:Eio.Switch.t ->
env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
···
(search_response, error) result
(** Search across all bushel collections.
-
Results are sorted by relevance score and paginated.
-
TODO:claude *)
+
Results are sorted by relevance score and paginated. *)
val search_all :
sw:Eio.Switch.t ->
env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
···
}
(** Perform multisearch across all collections using Typesense's multi_search endpoint.
-
More efficient than individual searches as it's done in a single request.
-
TODO:claude *)
+
More efficient than individual searches as it's done in a single request. *)
val multisearch :
sw:Eio.Switch.t ->
env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
···
(multisearch_response, error) result
(** Combine multisearch results into a single result set.
-
Results are sorted by relevance score and paginated.
-
TODO:claude *)
+
Results are sorted by relevance score and paginated. *)
val combine_multisearch_results : multisearch_response -> ?limit:int -> ?offset:int -> unit -> search_response
(** List all collections with document counts.
-
Returns a list of (collection_name, document_count) pairs.
-
TODO:claude *)
+
Returns a list of (collection_name, document_count) pairs. *)
val list_collections :
sw:Eio.Switch.t ->
env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
config ->
((string * int) list, error) result
-
||||||| parent of 40c9549 (bushel)
-
=======
-
-
This module provides an OCaml client for the Typesense search engine API.
-
It handles collection management and document indexing for all Bushel object
-
types including contacts, papers, projects, news, videos, notes, and ideas.
-
-
Example usage:
-
{[
-
let config = { endpoint = "https://search.example.com"; api_key = "xyz123" } in
-
Lwt_main.run (Typesense.upload_all config "/path/to/bushel/data")
-
]}
-
-
TODO:claude *)
-
-
(** Configuration for connecting to a Typesense server *)
-
type config = {
-
endpoint : string; (** Typesense server URL (e.g., "https://search.example.com") *)
-
api_key : string; (** API key for authentication *)
-
openai_key : string; (** OpenAI API key for embeddings *)
-
}
-
-
(** Possible errors that can occur during Typesense operations *)
-
type error =
-
| Http_error of int * string (** HTTP error with status code and message *)
-
| Json_error of string (** JSON parsing or encoding error *)
-
| Connection_error of string (** Network connection error *)
-
-
(** Pretty-printer for error types *)
-
val pp_error : Format.formatter -> error -> unit
-
-
(** Create a collection with the given schema.
-
The schema should follow Typesense's collection schema format.
-
TODO:claude *)
-
val create_collection : config -> Ezjsonm.value -> (string, error) result Lwt.t
-
-
(** Check if a collection exists by name.
-
Returns true if the collection exists, false otherwise.
-
TODO:claude *)
-
val collection_exists : config -> string -> bool Lwt.t
-
-
(** Delete a collection by name.
-
TODO:claude *)
-
val delete_collection : config -> string -> (string, error) result Lwt.t
-
-
(** Upload documents to a collection in batch using JSONL format.
-
More efficient than uploading documents one by one.
-
TODO:claude *)
-
val upload_documents : config -> string -> Ezjsonm.value list -> (string, error) result Lwt.t
-
-
(** Upload all bushel objects to Typesense.
-
This function will:
-
- Extract all bushel data types from the Entry.t
-
- Create or recreate collections for each type
-
- Upload all documents in batches
-
- Report progress to stdout
-
TODO:claude *)
-
val upload_all : config -> Entry.t -> unit Lwt.t
-
-
(** Search result structure containing document information and relevance score *)
-
type search_result = {
-
id: string; (** Document ID *)
-
title: string; (** Document title *)
-
content: string; (** Document content/description *)
-
score: float; (** Relevance score *)
-
collection: string; (** Collection name *)
-
highlights: (string * string list) list; (** Highlighted search terms by field *)
-
document: Ezjsonm.value; (** Raw document for flexible field access *)
-
}
-
-
(** Search response containing results and metadata *)
-
type search_response = {
-
hits: search_result list; (** List of matching documents *)
-
total: int; (** Total number of matches *)
-
query_time: float; (** Query execution time in milliseconds *)
-
}
-
-
(** Search a specific collection.
-
TODO:claude *)
-
val search_collection : config -> string -> string -> ?limit:int -> ?offset:int -> unit -> (search_response, error) result Lwt.t
-
-
(** Search across all bushel collections.
-
Results are sorted by relevance score and paginated.
-
TODO:claude *)
-
val search_all : config -> string -> ?limit:int -> ?offset:int -> unit -> (search_response, error) result Lwt.t
-
-
(** Multisearch response containing results from multiple collections *)
-
type multisearch_response = {
-
results: search_response list; (** Results from each collection *)
-
}
-
-
(** Perform multisearch across all collections using Typesense's multi_search endpoint.
-
More efficient than individual searches as it's done in a single request.
-
TODO:claude *)
-
val multisearch : config -> string -> ?limit:int -> unit -> (multisearch_response, error) result Lwt.t
-
-
(** Combine multisearch results into a single result set.
-
Results are sorted by relevance score and paginated.
-
TODO:claude *)
-
val combine_multisearch_results : multisearch_response -> ?limit:int -> ?offset:int -> unit -> search_response
-
-
(** List all collections with document counts.
-
Returns a list of (collection_name, document_count) pairs.
-
TODO:claude *)
-
val list_collections : config -> ((string * int) list, error) result Lwt.t
-
>>>>>>> 40c9549 (bushel)
(** Load configuration from .typesense-url and .typesense-api files.
Falls back to environment variables and defaults.