My agentic slop goes here. Not intended for anyone else!

Compare changes

Choose any two refs to compare.

Changed files
+3630 -12575
claudeio
stack
bushel
cacheio
immich
karakeep
peertubee
requests
requests_json_api
river
typesense-client
zotero-translation
zulip
+1
claudeio/claude.opam
···
"fmt"
"logs"
"ezjsonm"
+
"jsont" {>= "0.2.0"}
"alcotest" {with-test}
"odoc" {with-doc}
]
+1
claudeio/dune-project
···
fmt
logs
ezjsonm
+
(jsont (>= 0.2.0))
(alcotest :with-test)))
+3
claudeio/lib/claude.ml
···
+
module Model = Model
module Content_block = Content_block
module Message = Message
module Control = Control
module Permissions = Permissions
module Hooks = Hooks
+
module Sdk_control = Sdk_control
+
module Structured_output = Structured_output
module Options = Options
module Transport = Transport
module Client = Client
+9
claudeio/lib/claude.mli
···
module Options = Options
(** Configuration options for Claude sessions. *)
+
module Model = Model
+
(** Claude AI model identifiers with type-safe variants. *)
+
module Content_block = Content_block
(** Content blocks for messages (text, tool use, tool results, thinking). *)
···
module Hooks = Hooks
(** Hooks system for event interception. *)
+
+
module Sdk_control = Sdk_control
+
(** SDK control protocol for dynamic configuration. *)
+
+
module Structured_output = Structured_output
+
(** Structured output support using JSON Schema. *)
module Transport = Transport
(** Low-level transport layer for CLI communication. *)
+80 -1
claudeio/lib/client.ml
···
hook_callbacks : (string, Hooks.callback) Hashtbl.t;
mutable next_callback_id : int;
mutable session_id : string option;
+
control_responses : (string, Ezjsonm.value) Hashtbl.t;
+
control_mutex : Eio.Mutex.t;
+
control_condition : Eio.Condition.t;
}
let handle_control_request t control_msg =
···
(* Handle control responses (e.g., initialize response) *)
let request_id = Json_utils.find_string json ["response"; "request_id"] in
Log.debug (fun m -> m "Received control response for request_id: %s" request_id);
-
(* Don't yield control responses as messages, just loop *)
+
(* Store the response and signal waiting threads *)
+
Eio.Mutex.use_rw ~protect:false t.control_mutex (fun () ->
+
Hashtbl.replace t.control_responses request_id json;
+
Eio.Condition.broadcast t.control_condition
+
);
loop ()
| _ ->
···
hook_callbacks;
next_callback_id = 0;
session_id = None;
+
control_responses = Hashtbl.create 16;
+
control_mutex = Eio.Mutex.create ();
+
control_condition = Eio.Condition.create ();
} in
(* Register hooks and send initialize if hooks are configured *)
···
let with_permission_callback t callback =
{ t with permission_callback = Some callback }
+
+
(* Helper to send a control request and wait for response *)
+
let send_control_request t ~request_id request =
+
let open Ezjsonm in
+
(* Send the control request *)
+
let control_msg = Sdk_control.create_request ~request_id ~request in
+
let json = Sdk_control.to_json control_msg in
+
Log.info (fun m -> m "Sending control request: %s" (value_to_string json));
+
Transport.send t.transport json;
+
+
(* Wait for the response with timeout *)
+
let max_wait = 10.0 in (* 10 seconds timeout *)
+
let start_time = Unix.gettimeofday () in
+
+
let rec wait_for_response () =
+
Eio.Mutex.use_rw ~protect:false t.control_mutex (fun () ->
+
match Hashtbl.find_opt t.control_responses request_id with
+
| Some response_json ->
+
(* Remove it from the table *)
+
Hashtbl.remove t.control_responses request_id;
+
response_json
+
| None ->
+
let elapsed = Unix.gettimeofday () -. start_time in
+
if elapsed > max_wait then
+
raise (Failure (Printf.sprintf "Timeout waiting for control response: %s" request_id))
+
else (
+
(* Release mutex and wait for signal *)
+
Eio.Condition.await_no_mutex t.control_condition;
+
wait_for_response ()
+
)
+
)
+
in
+
+
let response_json = wait_for_response () in
+
Log.debug (fun m -> m "Received control response: %s" (value_to_string response_json));
+
+
(* Parse the response *)
+
let response = find response_json ["response"] |> Sdk_control.Response.of_json in
+
match response with
+
| Sdk_control.Response.Success s -> s.response
+
| Sdk_control.Response.Error e ->
+
raise (Failure (Printf.sprintf "Control request failed: %s" e.error))
+
+
let set_permission_mode t mode =
+
let request_id = Printf.sprintf "set_perm_mode_%f" (Unix.gettimeofday ()) in
+
let request = Sdk_control.Request.set_permission_mode ~mode in
+
let _response = send_control_request t ~request_id request in
+
Log.info (fun m -> m "Permission mode set to: %a" Permissions.Mode.pp mode)
+
+
let set_model t model =
+
let model_str = Model.to_string model in
+
let request_id = Printf.sprintf "set_model_%f" (Unix.gettimeofday ()) in
+
let request = Sdk_control.Request.set_model ~model:model_str in
+
let _response = send_control_request t ~request_id request in
+
Log.info (fun m -> m "Model set to: %a" Model.pp model)
+
+
let set_model_string t model_str =
+
set_model t (Model.of_string model_str)
+
+
let get_server_info t =
+
let request_id = Printf.sprintf "get_server_info_%f" (Unix.gettimeofday ()) in
+
let request = Sdk_control.Request.get_server_info () in
+
match send_control_request t ~request_id request with
+
| Some response_data ->
+
let server_info = Sdk_control.Server_info.of_json response_data in
+
Log.info (fun m -> m "Retrieved server info: %a" Sdk_control.Server_info.pp server_info);
+
server_info
+
| None ->
+
raise (Failure "No response data from get_server_info request")
+193 -3
claudeio/lib/client.mli
···
+
(** Client interface for interacting with Claude.
+
+
This module provides the high-level client API for sending messages to
+
Claude and receiving responses. It handles the bidirectional streaming
+
protocol, permission callbacks, and hooks.
+
+
{2 Basic Usage}
+
+
{[
+
Eio.Switch.run @@ fun sw ->
+
let client = Client.create ~sw ~process_mgr () in
+
Client.query client "What is 2+2?";
+
+
let messages = Client.receive_all client in
+
List.iter (function
+
| Message.Assistant msg ->
+
Printf.printf "Claude: %s\n" (Message.Assistant.text msg)
+
| _ -> ()
+
) messages
+
]}
+
+
{2 Features}
+
+
- {b Message Streaming}: Messages are streamed lazily via {!Seq.t}
+
- {b Permission Control}: Custom permission callbacks for tool usage
+
- {b Hooks}: Intercept and modify tool execution
+
- {b Dynamic Control}: Change settings mid-conversation
+
- {b Resource Management}: Automatic cleanup via Eio switches
+
+
{2 Message Flow}
+
+
1. Create a client with {!create}
+
2. Send messages with {!query} or {!send_message}
+
3. Receive responses with {!receive} or {!receive_all}
+
4. Continue multi-turn conversations by sending more messages
+
5. Client automatically cleans up when the switch exits
+
+
{2 Advanced Features}
+
+
- Permission discovery mode for understanding required permissions
+
- Mid-conversation model switching and permission mode changes
+
- Server capability introspection *)
+
(** The log source for client operations *)
val src : Logs.Src.t
type t
+
(** The type of Claude clients. *)
-
val create :
-
?options:Options.t ->
-
sw:Eio.Switch.t ->
+
val create :
+
?options:Options.t ->
+
sw:Eio.Switch.t ->
process_mgr:_ Eio.Process.mgr ->
unit -> t
+
(** [create ?options ~sw ~process_mgr ()] creates a new Claude client.
+
+
@param options Configuration options (defaults to {!Options.default})
+
@param sw Eio switch for resource management
+
@param process_mgr Eio process manager for spawning the Claude CLI *)
val query : t -> string -> unit
+
(** [query t prompt] sends a text message to Claude.
+
+
This is a convenience function for simple string messages. For more
+
complex messages with tool results or multiple content blocks, use
+
{!send_message} instead. *)
+
val send_message : t -> Message.t -> unit
+
(** [send_message t msg] sends a message to Claude.
+
+
Supports all message types including user messages with tool results. *)
+
val send_user_message : t -> Message.User.t -> unit
+
(** [send_user_message t msg] sends a user message to Claude. *)
val receive : t -> Message.t Seq.t
+
(** [receive t] returns a lazy sequence of messages from Claude.
+
+
The sequence yields messages as they arrive from Claude, including:
+
- {!Message.Assistant} - Claude's responses
+
- {!Message.System} - System notifications
+
- {!Message.Result} - Final result with usage statistics
+
+
Control messages (permission requests, hook callbacks) are handled
+
internally and not yielded to the sequence. *)
+
val receive_all : t -> Message.t list
+
(** [receive_all t] collects all messages into a list.
+
+
This is a convenience function that consumes the {!receive} sequence.
+
Use this when you want to process all messages at once rather than
+
streaming them. *)
val interrupt : t -> unit
+
(** [interrupt t] sends an interrupt signal to stop Claude's execution. *)
val discover_permissions : t -> t
+
(** [discover_permissions t] enables permission discovery mode.
+
+
In discovery mode, all tool usage is logged but allowed. Use
+
{!get_discovered_permissions} to retrieve the list of permissions
+
that were requested during execution.
+
+
This is useful for understanding what permissions your prompt requires. *)
+
val get_discovered_permissions : t -> Permissions.Rule.t list
+
(** [get_discovered_permissions t] returns permissions discovered during execution.
+
+
Only useful after enabling {!discover_permissions}. *)
+
val with_permission_callback : t -> Permissions.callback -> t
+
(** [with_permission_callback t callback] updates the permission callback.
+
+
Allows dynamically changing the permission callback without recreating
+
the client. *)
+
+
(** {1 Dynamic Control Methods}
+
+
These methods allow you to change Claude's behavior mid-conversation
+
without recreating the client. This is useful for:
+
+
- Adjusting permission strictness based on user feedback
+
- Switching to faster/cheaper models for simple tasks
+
- Adapting to changing requirements during long conversations
+
- Introspecting server capabilities
+
+
{2 Example: Adaptive Permission Control}
+
+
{[
+
(* Start with strict permissions *)
+
let client = Client.create ~sw ~process_mgr
+
~options:(Options.default
+
|> Options.with_permission_mode Permissions.Mode.Default) ()
+
in
+
+
Client.query client "Analyze this code";
+
let _ = Client.receive_all client in
+
+
(* User approves, switch to auto-accept edits *)
+
Client.set_permission_mode client Permissions.Mode.Accept_edits;
+
+
Client.query client "Now refactor it";
+
let _ = Client.receive_all client in
+
]}
+
+
{2 Example: Model Switching for Efficiency}
+
+
{[
+
(* Use powerful model for complex analysis *)
+
let client = Client.create ~sw ~process_mgr
+
~options:(Options.default |> Options.with_model "claude-sonnet-4-5") ()
+
in
+
+
Client.query client "Design a new architecture for this system";
+
let _ = Client.receive_all client in
+
+
(* Switch to faster model for simple tasks *)
+
Client.set_model client "claude-haiku-4";
+
+
Client.query client "Now write a README";
+
let _ = Client.receive_all client in
+
]}
+
+
{2 Example: Server Introspection}
+
+
{[
+
let info = Client.get_server_info client in
+
Printf.printf "Claude CLI version: %s\n"
+
(Sdk_control.Server_info.version info);
+
Printf.printf "Capabilities: %s\n"
+
(String.concat ", " (Sdk_control.Server_info.capabilities info));
+
]} *)
+
+
val set_permission_mode : t -> Permissions.Mode.t -> unit
+
(** [set_permission_mode t mode] changes the permission mode mid-conversation.
+
+
This allows switching between permission modes without recreating the client:
+
- {!Permissions.Mode.Default} - Prompt for all permissions
+
- {!Permissions.Mode.Accept_edits} - Auto-accept file edits
+
- {!Permissions.Mode.Plan} - Planning mode with restricted execution
+
- {!Permissions.Mode.Bypass_permissions} - Skip all permission checks
+
+
@raise Failure if the server returns an error *)
+
+
val set_model : t -> Model.t -> unit
+
(** [set_model t model] switches to a different AI model mid-conversation.
+
+
Common models:
+
- [`Sonnet_4_5] - Most capable, balanced performance
+
- [`Opus_4] - Maximum capability for complex tasks
+
- [`Haiku_4] - Fast and cost-effective
+
+
@raise Failure if the model is invalid or unavailable *)
+
+
val set_model_string : t -> string -> unit
+
(** [set_model_string t model] switches to a different AI model using a string.
+
+
This is a convenience function that parses the string using {!Model.of_string}.
+
+
@raise Failure if the model is invalid or unavailable *)
+
+
val get_server_info : t -> Sdk_control.Server_info.t
+
(** [get_server_info t] retrieves server capabilities and metadata.
+
+
Returns information about:
+
- Server version string
+
- Available capabilities
+
- Supported commands
+
- Available output styles
+
+
Useful for feature detection and debugging.
+
+
@raise Failure if the server returns an error *)
+1 -1
claudeio/lib/dune
···
(library
(public_name claude)
(name claude)
-
(libraries eio eio.unix ezjsonm fmt logs))
+
(libraries eio eio.unix ezjsonm fmt logs jsont))
+65 -21
claudeio/lib/message.ml
···
end
module Assistant = struct
+
type error = [
+
| `Authentication_failed
+
| `Billing_error
+
| `Rate_limit
+
| `Invalid_request
+
| `Server_error
+
| `Unknown
+
]
+
+
let error_to_string = function
+
| `Authentication_failed -> "authentication_failed"
+
| `Billing_error -> "billing_error"
+
| `Rate_limit -> "rate_limit"
+
| `Invalid_request -> "invalid_request"
+
| `Server_error -> "server_error"
+
| `Unknown -> "unknown"
+
+
let error_of_string = function
+
| "authentication_failed" -> `Authentication_failed
+
| "billing_error" -> `Billing_error
+
| "rate_limit" -> `Rate_limit
+
| "invalid_request" -> `Invalid_request
+
| "server_error" -> `Server_error
+
| "unknown" | _ -> `Unknown
+
type t = {
content : Content_block.t list;
model : string;
+
error : error option;
}
-
-
let create ~content ~model = { content; model }
+
+
let create ~content ~model ?error () = { content; model; error }
let content t = t.content
let model t = t.model
+
let error t = t.error
let get_text_blocks t =
List.filter_map (function
···
String.concat "\n" (get_text_blocks t)
let to_json t =
+
let msg_fields = [
+
("content", `A (List.map Content_block.to_json t.content));
+
("model", `String t.model);
+
] in
+
let msg_fields = match t.error with
+
| Some err -> ("error", `String (error_to_string err)) :: msg_fields
+
| None -> msg_fields
+
in
`O [
("type", `String "assistant");
-
("message", `O [
-
("content", `A (List.map Content_block.to_json t.content));
-
("model", `String t.model);
-
]);
+
("message", `O msg_fields);
]
let of_json = function
| `O fields ->
let message = List.assoc "message" fields in
-
let content, model = match message with
+
let content, model, error = match message with
| `O msg_fields ->
-
let content =
+
let content =
match List.assoc "content" msg_fields with
| `A blocks -> List.map Content_block.of_json blocks
| _ -> raise (Invalid_argument "Assistant.of_json: invalid content")
in
let model = JU.assoc_string "model" msg_fields in
-
content, model
+
let error =
+
match JU.assoc_string_opt "error" msg_fields with
+
| Some err_str -> Some (error_of_string err_str)
+
| None -> None
+
in
+
content, model, error
| _ -> raise (Invalid_argument "Assistant.of_json: invalid message")
in
-
{ content; model }
+
{ content; model; error }
| _ -> raise (Invalid_argument "Assistant.of_json: expected object")
let pp fmt t =
···
total_cost_usd : float option;
usage : Usage.t option;
result : string option;
+
structured_output : value option;
}
-
let create ~subtype ~duration_ms ~duration_api_ms ~is_error ~num_turns
-
~session_id ?total_cost_usd ?usage ?result () =
+
let create ~subtype ~duration_ms ~duration_api_ms ~is_error ~num_turns
+
~session_id ?total_cost_usd ?usage ?result ?structured_output () =
{ subtype; duration_ms; duration_api_ms; is_error; num_turns;
-
session_id; total_cost_usd; usage; result }
+
session_id; total_cost_usd; usage; result; structured_output }
let subtype t = t.subtype
let duration_ms t = t.duration_ms
···
let total_cost_usd t = t.total_cost_usd
let usage t = t.usage
let result t = t.result
+
let structured_output t = t.structured_output
let to_json t =
let fields = [
···
| Some result -> ("result", `String result) :: fields
| None -> fields
in
+
let fields = match t.structured_output with
+
| Some output -> ("structured_output", output) :: fields
+
| None -> fields
+
in
`O fields
let of_json = function
···
let total_cost_usd = JU.assoc_float_opt "total_cost_usd" fields in
let usage = Option.map Usage.of_json (List.assoc_opt "usage" fields) in
let result = JU.assoc_string_opt "result" fields in
+
let structured_output = List.assoc_opt "structured_output" fields in
{ subtype; duration_ms; duration_api_ms; is_error; num_turns;
-
session_id; total_cost_usd; usage; result }
+
session_id; total_cost_usd; usage; result; structured_output }
| _ -> raise (Invalid_argument "Result.of_json: expected object")
let pp fmt t =
···
let user_with_tool_result ~tool_use_id ~content ?is_error () =
User (User.create_with_tool_result ~tool_use_id ~content ?is_error ())
-
let assistant ~content ~model = Assistant (Assistant.create ~content ~model)
-
let assistant_text ~text ~model =
-
Assistant (Assistant.create ~content:[Content_block.text text] ~model)
+
let assistant ~content ~model ?error () = Assistant (Assistant.create ~content ~model ?error ())
+
let assistant_text ~text ~model ?error () =
+
Assistant (Assistant.create ~content:[Content_block.text text] ~model ?error ())
let system ~subtype ~data = System (System.create ~subtype ~data)
let system_init ~session_id =
···
let data = System.Data.of_assoc [("error", `String error)] in
System (System.create ~subtype:"error" ~data)
-
let result ~subtype ~duration_ms ~duration_api_ms ~is_error ~num_turns
-
~session_id ?total_cost_usd ?usage ?result () =
-
Result (Result.create ~subtype ~duration_ms ~duration_api_ms ~is_error
-
~num_turns ~session_id ?total_cost_usd ?usage ?result ())
+
let result ~subtype ~duration_ms ~duration_api_ms ~is_error ~num_turns
+
~session_id ?total_cost_usd ?usage ?result ?structured_output () =
+
Result (Result.create ~subtype ~duration_ms ~duration_api_ms ~is_error
+
~num_turns ~session_id ?total_cost_usd ?usage ?result ?structured_output ())
let to_json = function
| User t -> User.to_json t
+59 -33
claudeio/lib/message.mli
···
module Assistant : sig
(** Messages from Claude assistant. *)
-
+
+
type error = [
+
| `Authentication_failed (** Authentication with Claude API failed *)
+
| `Billing_error (** Billing or account issue *)
+
| `Rate_limit (** Rate limit exceeded *)
+
| `Invalid_request (** Request was invalid *)
+
| `Server_error (** Internal server error *)
+
| `Unknown (** Unknown error type *)
+
]
+
(** The type of assistant message errors based on Python SDK error types. *)
+
+
val error_to_string : error -> string
+
(** [error_to_string err] converts an error to its string representation. *)
+
+
val error_of_string : string -> error
+
(** [error_of_string s] parses an error string. Unknown strings become [`Unknown]. *)
+
type t
(** The type of assistant messages. *)
-
-
val create : content:Content_block.t list -> model:string -> t
-
(** [create ~content ~model] creates an assistant message.
+
+
val create : content:Content_block.t list -> model:string -> ?error:error -> unit -> t
+
(** [create ~content ~model ?error ()] creates an assistant message.
@param content List of content blocks in the response
-
@param model The model identifier used for the response *)
-
+
@param model The model identifier used for the response
+
@param error Optional error that occurred during message generation *)
+
val content : t -> Content_block.t list
(** [content t] returns the content blocks of the assistant message. *)
-
+
val model : t -> string
(** [model t] returns the model identifier. *)
+
+
val error : t -> error option
+
(** [error t] returns the optional error that occurred during message generation. *)
val get_text_blocks : t -> string list
(** [get_text_blocks t] extracts all text content from the message. *)
···
type t
(** The type of result messages. *)
-
val create :
-
subtype:string ->
-
duration_ms:int ->
-
duration_api_ms:int ->
-
is_error:bool ->
-
num_turns:int ->
-
session_id:string ->
-
?total_cost_usd:float ->
-
?usage:Usage.t ->
-
?result:string ->
+
val create :
+
subtype:string ->
+
duration_ms:int ->
+
duration_api_ms:int ->
+
is_error:bool ->
+
num_turns:int ->
+
session_id:string ->
+
?total_cost_usd:float ->
+
?usage:Usage.t ->
+
?result:string ->
+
?structured_output:Ezjsonm.value ->
unit -> t
(** [create ~subtype ~duration_ms ~duration_api_ms ~is_error ~num_turns
~session_id ?total_cost_usd ?usage ?result ()] creates a result message.
···
@param session_id Unique session identifier
@param total_cost_usd Optional total cost in USD
@param usage Optional usage statistics as JSON
-
@param result Optional result string *)
+
@param result Optional result string
+
@param structured_output Optional structured JSON output from Claude *)
val subtype : t -> string
(** [subtype t] returns the subtype of the result. *)
···
val result : t -> string option
(** [result t] returns the optional result string. *)
-
+
+
val structured_output : t -> Ezjsonm.value option
+
(** [structured_output t] returns the optional structured JSON output. *)
+
val to_json : t -> Ezjsonm.value
(** [to_json t] converts the result message to its JSON representation. *)
···
(** [user_with_tool_result ~tool_use_id ~content ?is_error ()] creates a user message
containing a tool result. *)
-
val assistant : content:Content_block.t list -> model:string -> t
-
(** [assistant ~content ~model] creates an assistant message. *)
+
val assistant : content:Content_block.t list -> model:string -> ?error:Assistant.error -> unit -> t
+
(** [assistant ~content ~model ?error ()] creates an assistant message. *)
-
val assistant_text : text:string -> model:string -> t
-
(** [assistant_text ~text ~model] creates an assistant message with only text content. *)
+
val assistant_text : text:string -> model:string -> ?error:Assistant.error -> unit -> t
+
(** [assistant_text ~text ~model ?error ()] creates an assistant message with only text content. *)
val system : subtype:string -> data:System.Data.t -> t
(** [system ~subtype ~data] creates a system message. *)
···
val system_error : error:string -> t
(** [system_error ~error] creates a system error message. *)
-
val result :
-
subtype:string ->
-
duration_ms:int ->
-
duration_api_ms:int ->
-
is_error:bool ->
-
num_turns:int ->
-
session_id:string ->
-
?total_cost_usd:float ->
-
?usage:Result.Usage.t ->
-
?result:string ->
+
val result :
+
subtype:string ->
+
duration_ms:int ->
+
duration_api_ms:int ->
+
is_error:bool ->
+
num_turns:int ->
+
session_id:string ->
+
?total_cost_usd:float ->
+
?usage:Result.Usage.t ->
+
?result:string ->
+
?structured_output:Ezjsonm.value ->
unit -> t
(** [result ~subtype ~duration_ms ~duration_api_ms ~is_error ~num_turns
~session_id ?total_cost_usd ?usage ?result ()] creates a result message. *)
+50 -7
claudeio/lib/options.ml
···
let src = Logs.Src.create "claude.options" ~doc:"Claude configuration options"
module Log = (val Logs.src_log src : Logs.LOG)
+
type setting_source = User | Project | Local
+
type t = {
allowed_tools : string list;
disallowed_tools : string list;
···
append_system_prompt : string option;
permission_mode : Permissions.Mode.t option;
permission_callback : Permissions.callback option;
-
model : string option;
+
model : Model.t option;
cwd : Eio.Fs.dir_ty Eio.Path.t option;
env : (string * string) list;
continue_conversation : bool;
···
extra_args : (string * string option) list;
debug_stderr : Eio.Flow.sink_ty Eio.Flow.sink option;
hooks : Hooks.config option;
+
max_budget_usd : float option;
+
fallback_model : Model.t option;
+
setting_sources : setting_source list option;
+
max_buffer_size : int option;
+
user : string option;
+
output_format : Structured_output.t option;
}
let default = {
···
extra_args = [];
debug_stderr = None;
hooks = None;
+
max_budget_usd = None;
+
fallback_model = None;
+
setting_sources = None;
+
max_buffer_size = None;
+
user = None;
+
output_format = None;
}
let create
···
?(extra_args = [])
?debug_stderr
?hooks
+
?max_budget_usd
+
?fallback_model
+
?setting_sources
+
?max_buffer_size
+
?user
+
?output_format
() =
{ allowed_tools; disallowed_tools; max_thinking_tokens;
system_prompt; append_system_prompt; permission_mode;
permission_callback; model; cwd; env;
continue_conversation; resume; max_turns;
permission_prompt_tool_name; settings; add_dirs;
-
extra_args; debug_stderr; hooks }
+
extra_args; debug_stderr; hooks;
+
max_budget_usd; fallback_model; setting_sources;
+
max_buffer_size; user; output_format }
let allowed_tools t = t.allowed_tools
let disallowed_tools t = t.disallowed_tools
···
let extra_args t = t.extra_args
let debug_stderr t = t.debug_stderr
let hooks t = t.hooks
+
let max_budget_usd t = t.max_budget_usd
+
let fallback_model t = t.fallback_model
+
let setting_sources t = t.setting_sources
+
let max_buffer_size t = t.max_buffer_size
+
let user t = t.user
+
let output_format t = t.output_format
let with_allowed_tools tools t = { t with allowed_tools = tools }
let with_disallowed_tools tools t = { t with disallowed_tools = tools }
···
let with_permission_mode mode t = { t with permission_mode = Some mode }
let with_permission_callback callback t = { t with permission_callback = Some callback }
let with_model model t = { t with model = Some model }
+
let with_model_string model t = { t with model = Some (Model.of_string model) }
let with_cwd cwd t = { t with cwd = Some cwd }
let with_env env t = { t with env }
let with_continue_conversation continue t = { t with continue_conversation = continue }
···
let with_extra_args args t = { t with extra_args = args }
let with_debug_stderr sink t = { t with debug_stderr = Some sink }
let with_hooks hooks t = { t with hooks = Some hooks }
+
let with_max_budget_usd budget t = { t with max_budget_usd = Some budget }
+
let with_fallback_model model t = { t with fallback_model = Some model }
+
let with_fallback_model_string model t = { t with fallback_model = Some (Model.of_string model) }
+
let with_setting_sources sources t = { t with setting_sources = Some sources }
+
let with_no_settings t = { t with setting_sources = Some [] }
+
let with_max_buffer_size size t = { t with max_buffer_size = Some size }
+
let with_user user t = { t with user = Some user }
+
let with_output_format format t = { t with output_format = Some format }
let to_json t =
let fields = [] in
···
| None -> fields
in
let fields = match t.model with
-
| Some m -> ("model", `String m) :: fields
+
| Some m -> ("model", `String (Model.to_string m)) :: fields
| None -> fields
in
let fields =
···
try Some (Permissions.Mode.of_json (List.assoc "permission_mode" fields))
with Not_found -> None
in
-
let model =
-
try Some (get_string (List.assoc "model" fields))
+
let model =
+
try Some (Model.of_string (get_string (List.assoc "model" fields)))
with Not_found -> None
in
let env =
···
add_dirs = [];
extra_args = [];
debug_stderr = None;
-
hooks = None; }
+
hooks = None;
+
max_budget_usd = None;
+
fallback_model = None;
+
setting_sources = None;
+
max_buffer_size = None;
+
user = None;
+
output_format = None; }
| _ -> raise (Invalid_argument "Options.of_json: expected object")
let pp fmt t =
···
Fmt.(option string) t.system_prompt
Fmt.(option string) t.append_system_prompt
Fmt.(option Permissions.Mode.pp) t.permission_mode
-
Fmt.(option string) t.model
+
Fmt.(option Model.pp) t.model
Fmt.(list (pair string string)) t.env
let log_options t =
+177 -10
claudeio/lib/options.mli
···
(** Configuration options for Claude sessions.
-
-
This module provides configuration options for controlling Claude's
-
behavior, including tool permissions, system prompts, models, and
-
execution environment. *)
+
+
This module provides comprehensive configuration options for controlling
+
Claude's behavior, including tool permissions, system prompts, models,
+
execution environment, cost controls, and structured outputs.
+
+
{2 Overview}
+
+
Options control all aspects of Claude's behavior:
+
- {b Permissions}: Which tools Claude can use and how permission is granted
+
- {b Models}: Which AI model to use and fallback options
+
- {b Environment}: Working directory, environment variables, settings
+
- {b Cost Control}: Budget limits to prevent runaway spending
+
- {b Hooks}: Intercept and modify tool execution
+
- {b Structured Output}: JSON schema validation for responses
+
- {b Session Management}: Continue or resume conversations
+
+
{2 Builder Pattern}
+
+
Options use a functional builder pattern - each [with_*] function returns
+
a new options value with the specified field updated:
+
+
{[
+
let options = Options.default
+
|> Options.with_model "claude-sonnet-4-5"
+
|> Options.with_max_budget_usd 1.0
+
|> Options.with_permission_mode Permissions.Mode.Accept_edits
+
]}
+
+
{2 Common Configuration Scenarios}
+
+
{3 CI/CD: Isolated, Reproducible Builds}
+
+
{[
+
let ci_config = Options.default
+
|> Options.with_no_settings (* Ignore user config *)
+
|> Options.with_max_budget_usd 0.50 (* 50 cent limit *)
+
|> Options.with_permission_mode
+
Permissions.Mode.Bypass_permissions
+
|> Options.with_model "claude-haiku-4"
+
]}
+
+
{3 Production: Cost Control with Fallback}
+
+
{[
+
let prod_config = Options.default
+
|> Options.with_model "claude-sonnet-4-5"
+
|> Options.with_fallback_model "claude-haiku-4"
+
|> Options.with_max_budget_usd 10.0 (* $10 daily limit *)
+
|> Options.with_max_buffer_size 5_000_000
+
]}
+
+
{3 Development: User Settings with Overrides}
+
+
{[
+
let dev_config = Options.default
+
|> Options.with_setting_sources [User; Project]
+
|> Options.with_max_budget_usd 1.0
+
|> Options.with_permission_mode Permissions.Mode.Default
+
]}
+
+
{3 Structured Output: Type-Safe Responses}
+
+
{[
+
let schema = Ezjsonm.(`O [
+
("type", `String "object");
+
("properties", `O [
+
("count", `O [("type", `String "integer")]);
+
("has_tests", `O [("type", `String "boolean")]);
+
]);
+
])
+
let format = Structured_output.of_json_schema schema
+
+
let analysis_config = Options.default
+
|> Options.with_output_format format
+
|> Options.with_allowed_tools ["Read"; "Glob"; "Grep"]
+
]}
+
+
{2 Advanced Options}
+
+
{3 Budget Control}
+
+
Use {!with_max_budget_usd} to set hard spending limits. Claude will
+
terminate the session if the budget is exceeded, preventing runaway costs.
+
+
{3 Settings Isolation}
+
+
Use {!with_setting_sources} or {!with_no_settings} to control which
+
configuration files are loaded:
+
- [User] - ~/.claude/config
+
- [Project] - .claude/ in project root
+
- [Local] - Current directory settings
+
- [Some \[\]] (via {!with_no_settings}) - No settings, fully isolated
+
+
This is critical for reproducible builds in CI/CD environments.
+
+
{3 Model Fallback}
+
+
Use {!with_fallback_model} to specify an alternative model when the
+
primary model is unavailable or overloaded. This improves reliability. *)
open Ezjsonm
(** The log source for options operations *)
val src : Logs.Src.t
+
(** {1 Types} *)
+
+
type setting_source = User | Project | Local
+
(** Setting source determines which configuration files to load.
+
- [User]: Load user-level settings from ~/.claude/config
+
- [Project]: Load project-level settings from .claude/ in project root
+
- [Local]: Load local settings from current directory *)
+
type t
(** The type of configuration options. *)
···
?append_system_prompt:string ->
?permission_mode:Permissions.Mode.t ->
?permission_callback:Permissions.callback ->
-
?model:string ->
+
?model:Model.t ->
?cwd:Eio.Fs.dir_ty Eio.Path.t ->
?env:(string * string) list ->
?continue_conversation:bool ->
···
?extra_args:(string * string option) list ->
?debug_stderr:Eio.Flow.sink_ty Eio.Flow.sink ->
?hooks:Hooks.config ->
+
?max_budget_usd:float ->
+
?fallback_model:Model.t ->
+
?setting_sources:setting_source list ->
+
?max_buffer_size:int ->
+
?user:string ->
+
?output_format:Structured_output.t ->
unit -> t
(** [create ?allowed_tools ?disallowed_tools ?max_thinking_tokens ?system_prompt
?append_system_prompt ?permission_mode ?permission_callback ?model ?cwd ?env
?continue_conversation ?resume ?max_turns ?permission_prompt_tool_name ?settings
-
?add_dirs ?extra_args ?debug_stderr ()]
+
?add_dirs ?extra_args ?debug_stderr ?hooks ?max_budget_usd ?fallback_model
+
?setting_sources ?max_buffer_size ?user ()]
creates a new configuration.
@param allowed_tools List of explicitly allowed tool names
@param disallowed_tools List of explicitly disallowed tool names
···
@param settings Path to settings file
@param add_dirs Additional directories to allow access to
@param extra_args Additional CLI flags to pass through
-
@param debug_stderr Sink for debug output when debug-to-stderr is set *)
+
@param debug_stderr Sink for debug output when debug-to-stderr is set
+
@param hooks Hooks configuration for event interception
+
@param max_budget_usd Hard spending limit in USD (terminates on exceed)
+
@param fallback_model Automatic fallback on primary model unavailability
+
@param setting_sources Control which settings load (user/project/local)
+
@param max_buffer_size Control for stdout buffer size in bytes
+
@param user Unix user for subprocess execution
+
@param output_format Optional structured output format specification *)
(** {1 Accessors} *)
···
val permission_callback : t -> Permissions.callback option
(** [permission_callback t] returns the optional permission callback. *)
-
val model : t -> string option
+
val model : t -> Model.t option
(** [model t] returns the optional model override. *)
val cwd : t -> Eio.Fs.dir_ty Eio.Path.t option
···
val hooks : t -> Hooks.config option
(** [hooks t] returns the optional hooks configuration. *)
+
val max_budget_usd : t -> float option
+
(** [max_budget_usd t] returns the optional spending limit in USD. *)
+
+
val fallback_model : t -> Model.t option
+
(** [fallback_model t] returns the optional fallback model. *)
+
+
val setting_sources : t -> setting_source list option
+
(** [setting_sources t] returns the optional list of setting sources to load. *)
+
+
val max_buffer_size : t -> int option
+
(** [max_buffer_size t] returns the optional stdout buffer size in bytes. *)
+
+
val user : t -> string option
+
(** [user t] returns the optional Unix user for subprocess execution. *)
+
+
val output_format : t -> Structured_output.t option
+
(** [output_format t] returns the optional structured output format. *)
+
(** {1 Builders} *)
val with_allowed_tools : string list -> t -> t
···
val with_permission_callback : Permissions.callback -> t -> t
(** [with_permission_callback callback t] sets the permission callback. *)
-
val with_model : string -> t -> t
-
(** [with_model model t] sets the model override. *)
+
val with_model : Model.t -> t -> t
+
(** [with_model model t] sets the model override using a typed Model.t. *)
+
+
val with_model_string : string -> t -> t
+
(** [with_model_string model t] sets the model override from a string.
+
The string is parsed using {!Model.of_string}. *)
val with_cwd : Eio.Fs.dir_ty Eio.Path.t -> t -> t
(** [with_cwd cwd t] sets the working directory. *)
···
val with_hooks : Hooks.config -> t -> t
(** [with_hooks hooks t] sets the hooks configuration. *)
+
+
val with_max_budget_usd : float -> t -> t
+
(** [with_max_budget_usd budget t] sets the maximum spending limit in USD.
+
The session will terminate if this limit is exceeded. *)
+
+
val with_fallback_model : Model.t -> t -> t
+
(** [with_fallback_model model t] sets the fallback model using a typed Model.t. *)
+
+
val with_fallback_model_string : string -> t -> t
+
(** [with_fallback_model_string model t] sets the fallback model from a string.
+
The string is parsed using {!Model.of_string}. *)
+
+
val with_setting_sources : setting_source list -> t -> t
+
(** [with_setting_sources sources t] sets which configuration sources to load.
+
Use empty list for isolated environments (e.g., CI/CD). *)
+
+
val with_no_settings : t -> t
+
(** [with_no_settings t] disables all settings loading (user, project, local).
+
Useful for CI/CD environments where you want isolated, reproducible behavior. *)
+
+
val with_max_buffer_size : int -> t -> t
+
(** [with_max_buffer_size size t] sets the maximum stdout buffer size in bytes. *)
+
+
val with_user : string -> t -> t
+
(** [with_user user t] sets the Unix user for subprocess execution. *)
+
+
val with_output_format : Structured_output.t -> t -> t
+
(** [with_output_format format t] sets the structured output format. *)
(** {1 Serialization} *)
+91 -4
claudeio/lib/sdk_control.ml
···
server_name : string;
message : value;
}
-
+
+
type set_model = {
+
subtype : [`Set_model];
+
model : string;
+
}
+
+
type get_server_info = {
+
subtype : [`Get_server_info];
+
}
+
type t =
| Interrupt of interrupt
| Permission of permission
···
| Set_permission_mode of set_permission_mode
| Hook_callback of hook_callback
| Mcp_message of mcp_message
+
| Set_model of set_model
+
| Get_server_info of get_server_info
let interrupt () = Interrupt { subtype = `Interrupt }
···
server_name;
message;
}
-
+
+
let set_model ~model =
+
Set_model { subtype = `Set_model; model }
+
+
let get_server_info () =
+
Get_server_info { subtype = `Get_server_info }
+
let to_json = function
| Interrupt _ ->
`O [("subtype", `String "interrupt")]
···
("server_name", `String m.server_name);
("message", m.message);
]
-
+
| Set_model s ->
+
`O [
+
("subtype", `String "set_model");
+
("model", `String s.model);
+
]
+
| Get_server_info _ ->
+
`O [("subtype", `String "get_server_info")]
+
let of_json = function
| `O fields ->
let subtype = JU.assoc_string "subtype" fields in
···
server_name;
message;
}
+
| "set_model" ->
+
let model = JU.assoc_string "model" fields in
+
Set_model { subtype = `Set_model; model }
+
| "get_server_info" ->
+
Get_server_info { subtype = `Get_server_info }
| _ -> raise (Invalid_argument ("Unknown request subtype: " ^ subtype)))
| _ -> raise (Invalid_argument "Request.of_json: expected object")
···
| Mcp_message m ->
Fmt.pf fmt "@[<2>McpMessage@ { server = %S }@]"
m.server_name
+
| Set_model s ->
+
Fmt.pf fmt "@[<2>SetModel@ { model = %S }@]" s.model
+
| Get_server_info _ ->
+
Fmt.pf fmt "@[<2>GetServerInfo@]"
end
module Response = struct
···
Log.debug (fun m -> m "SDK control request: %a" Request.pp req)
let log_response resp =
-
Log.debug (fun m -> m "SDK control response: %a" Response.pp resp)
+
Log.debug (fun m -> m "SDK control response: %a" Response.pp resp)
+
+
(** Server information *)
+
module Server_info = struct
+
type t = {
+
version : string;
+
capabilities : string list;
+
commands : string list;
+
output_styles : string list;
+
}
+
+
let create ~version ~capabilities ~commands ~output_styles =
+
{ version; capabilities; commands; output_styles }
+
+
let version t = t.version
+
let capabilities t = t.capabilities
+
let commands t = t.commands
+
let output_styles t = t.output_styles
+
+
let of_json = function
+
| `O fields ->
+
let version = JU.assoc_string "version" fields in
+
let capabilities =
+
match List.assoc_opt "capabilities" fields with
+
| Some (`A lst) -> List.map Ezjsonm.get_string lst
+
| _ -> []
+
in
+
let commands =
+
match List.assoc_opt "commands" fields with
+
| Some (`A lst) -> List.map Ezjsonm.get_string lst
+
| _ -> []
+
in
+
let output_styles =
+
match List.assoc_opt "outputStyles" fields with
+
| Some (`A lst) -> List.map Ezjsonm.get_string lst
+
| _ -> []
+
in
+
{ version; capabilities; commands; output_styles }
+
| _ -> raise (Invalid_argument "Server_info.of_json: expected object")
+
+
let to_json t =
+
`O [
+
("version", `String t.version);
+
("capabilities", `A (List.map (fun s -> `String s) t.capabilities));
+
("commands", `A (List.map (fun s -> `String s) t.commands));
+
("outputStyles", `A (List.map (fun s -> `String s) t.output_styles));
+
]
+
+
let pp fmt t =
+
Fmt.pf fmt "@[<2>ServerInfo@ { version = %S;@ capabilities = [%a];@ commands = [%a];@ output_styles = [%a] }@]"
+
t.version
+
Fmt.(list ~sep:(any ", ") (quote string)) t.capabilities
+
Fmt.(list ~sep:(any ", ") (quote string)) t.commands
+
Fmt.(list ~sep:(any ", ") (quote string)) t.output_styles
+
end
+149 -6
claudeio/lib/sdk_control.mli
···
(** SDK Control Protocol for Claude.
-
-
This module defines the typed SDK control protocol for communication
-
between the SDK and Claude, including request and response types. *)
+
+
This module defines the typed SDK control protocol for bidirectional
+
communication between the SDK and the Claude CLI. It handles:
+
+
- Permission requests (tool usage authorization)
+
- Hook callbacks (intercepting and modifying tool execution)
+
- Dynamic control (changing settings mid-conversation)
+
- Server introspection (querying capabilities)
+
+
{2 Protocol Overview}
+
+
The SDK control protocol is a JSON-based request/response protocol that
+
runs alongside the main message stream. It enables:
+
+
1. {b Callbacks}: Claude asks the SDK for permission or hook execution
+
2. {b Control}: SDK changes Claude's behavior dynamically
+
3. {b Introspection}: SDK queries server metadata
+
+
{2 Request/Response Flow}
+
+
{v
+
SDK Claude CLI
+
| |
+
|-- Initialize (with hooks) --> |
+
|<-- Permission Request --------| (for tool usage)
+
|-- Allow/Deny Response ------> |
+
| |
+
|<-- Hook Callback -------------| (pre/post tool)
+
|-- Hook Result -------------> |
+
| |
+
|-- Set Model ---------------> | (dynamic control)
+
|<-- Success Response ----------|
+
| |
+
|-- Get Server Info ----------> |
+
|<-- Server Info Response ------|
+
v}
+
+
{2 Usage}
+
+
Most users won't interact with this module directly. The {!Client} module
+
handles the protocol automatically. However, this module is exposed for:
+
+
- Understanding the control protocol
+
- Implementing custom control logic
+
- Debugging control message flow
+
- Advanced SDK extensions
+
+
{2 Dynamic Control Examples}
+
+
See {!Client.set_permission_mode}, {!Client.set_model}, and
+
{!Client.get_server_info} for high-level APIs that use this protocol. *)
open Ezjsonm
···
message : value;
}
(** MCP server message request. *)
-
+
+
type set_model = {
+
subtype : [`Set_model];
+
model : string;
+
}
+
(** Request to change the AI model. *)
+
+
type get_server_info = {
+
subtype : [`Get_server_info];
+
}
+
(** Request to get server information. *)
+
type t =
| Interrupt of interrupt
| Permission of permission
···
| Set_permission_mode of set_permission_mode
| Hook_callback of hook_callback
| Mcp_message of mcp_message
+
| Set_model of set_model
+
| Get_server_info of get_server_info
(** The type of SDK control requests. *)
val interrupt : unit -> t
···
val mcp_message : server_name:string -> message:value -> t
(** [mcp_message ~server_name ~message] creates an MCP message request. *)
-
+
+
val set_model : model:string -> t
+
(** [set_model ~model] creates a model change request. *)
+
+
val get_server_info : unit -> t
+
(** [get_server_info ()] creates a server info request. *)
+
val to_json : t -> value
(** [to_json t] converts a request to JSON. *)
···
(** [log_request req] logs an SDK control request. *)
val log_response : Response.t -> unit
-
(** [log_response resp] logs an SDK control response. *)
+
(** [log_response resp] logs an SDK control response. *)
+
+
(** {1 Server Information}
+
+
Server information provides metadata about the Claude CLI server,
+
including version, capabilities, available commands, and output styles.
+
+
{2 Use Cases}
+
+
- Feature detection: Check if specific capabilities are available
+
- Version compatibility: Ensure minimum version requirements
+
- Debugging: Log server information for troubleshooting
+
- Dynamic adaptation: Adjust SDK behavior based on capabilities
+
+
{2 Example}
+
+
{[
+
let info = Client.get_server_info client in
+
Printf.printf "Claude CLI version: %s\n"
+
(Server_info.version info);
+
+
if List.mem "structured-output" (Server_info.capabilities info) then
+
Printf.printf "Structured output is supported\n"
+
else
+
Printf.printf "Structured output not available\n";
+
]} *)
+
+
module Server_info : sig
+
(** Server information and capabilities. *)
+
+
type t = {
+
version : string;
+
(** Server version string (e.g., "2.0.0") *)
+
+
capabilities : string list;
+
(** Available server capabilities (e.g., "hooks", "structured-output") *)
+
+
commands : string list;
+
(** Available CLI commands *)
+
+
output_styles : string list;
+
(** Supported output formats (e.g., "json", "stream-json") *)
+
}
+
(** Server metadata and capabilities.
+
+
This information is useful for feature detection and debugging. *)
+
+
val create :
+
version:string ->
+
capabilities:string list ->
+
commands:string list ->
+
output_styles:string list ->
+
t
+
(** [create ~version ~capabilities ~commands ~output_styles] creates server info. *)
+
+
val version : t -> string
+
(** [version t] returns the server version. *)
+
+
val capabilities : t -> string list
+
(** [capabilities t] returns the server capabilities. *)
+
+
val commands : t -> string list
+
(** [commands t] returns available commands. *)
+
+
val output_styles : t -> string list
+
(** [output_styles t] returns available output styles. *)
+
+
val of_json : value -> t
+
(** [of_json json] parses server info from JSON.
+
@raise Invalid_argument if the JSON is not valid server info. *)
+
+
val to_json : t -> value
+
(** [to_json t] converts server info to JSON. *)
+
+
val pp : Format.formatter -> t -> unit
+
(** [pp fmt t] pretty-prints the server info. *)
+
end
+45 -9
claudeio/lib/transport.ml
···
sw : Switch.t;
}
+
let setting_source_to_string = function
+
| Options.User -> "user"
+
| Options.Project -> "project"
+
| Options.Local -> "local"
+
let build_command ~claude_path ~options =
let cmd = [claude_path; "--output-format"; "stream-json"; "--verbose"] in
-
+
let cmd = match Options.system_prompt options with
| Some prompt -> cmd @ ["--system-prompt"; prompt]
| None -> cmd
in
-
+
let cmd = match Options.append_system_prompt options with
| Some prompt -> cmd @ ["--append-system-prompt"; prompt]
| None -> cmd
in
-
+
let cmd = match Options.allowed_tools options with
| [] -> cmd
| tools -> cmd @ ["--allowedTools"; String.concat "," tools]
in
-
+
let cmd = match Options.disallowed_tools options with
| [] -> cmd
| tools -> cmd @ ["--disallowedTools"; String.concat "," tools]
in
-
+
let cmd = match Options.model options with
-
| Some model -> cmd @ ["--model"; model]
+
| Some model -> cmd @ ["--model"; Model.to_string model]
| None -> cmd
in
-
+
let cmd = match Options.permission_mode options with
| Some mode ->
let mode_str = Permissions.Mode.to_string mode in
···
| None -> cmd
in
+
(* Advanced configuration options *)
+
let cmd = match Options.max_budget_usd options with
+
| Some budget -> cmd @ ["--max-budget-usd"; Float.to_string budget]
+
| None -> cmd
+
in
+
+
let cmd = match Options.fallback_model options with
+
| Some model -> cmd @ ["--fallback-model"; Model.to_string model]
+
| None -> cmd
+
in
+
+
let cmd = match Options.setting_sources options with
+
| Some sources ->
+
let sources_str = String.concat "," (List.map setting_source_to_string sources) in
+
cmd @ ["--setting-sources"; sources_str]
+
| None -> cmd
+
in
+
+
(* Add JSON Schema if specified *)
+
let cmd = match Options.output_format options with
+
| Some format ->
+
let schema = Structured_output.json_schema format in
+
let schema_str = Ezjsonm.value_to_string schema in
+
cmd @ ["--json-schema"; schema_str]
+
| None -> cmd
+
in
+
(* Use streaming input mode *)
cmd @ ["--input-format"; "stream-json"]
···
let stdin = (stdin_w :> Eio.Flow.sink_ty r) in
let stdin_close = (stdin_w :> [`Close | `Flow] r) in
-
let stdout = Eio.Buf_read.of_flow ~max_size:1_000_000 (stdout_r :> Eio.Flow.source_ty r) in
-
+
let max_size = match Options.max_buffer_size options with
+
| Some size -> size
+
| None -> 1_000_000 (* Default 1MB *)
+
in
+
let stdout = Eio.Buf_read.of_flow ~max_size (stdout_r :> Eio.Flow.source_ty r) in
+
{ process = P process; stdin; stdin_close; stdout; sw }
let send t json =
+1 -1
claudeio/test/camel_jokes.ml
···
let run_claude ~sw ~env name prompt =
Log.info (fun m -> m "๐Ÿช Starting %s..." name);
-
let options = Claude.Options.create ~model:"sonnet" ~allowed_tools:[] () in
+
let options = Claude.Options.create ~model:(Claude.Model.of_string "sonnet") ~allowed_tools:[] () in
let client = Claude.Client.create ~options ~sw
~process_mgr:env#process_mgr
+1 -1
claudeio/test/discovery_demo.ml
···
Log.app (fun m -> m "This will discover what permissions Claude needs.\n");
(* Create client with discovery mode *)
-
let options = Claude.Options.create ~model:"sonnet" () in
+
let options = Claude.Options.create ~model:(Claude.Model.of_string "sonnet") () in
let client = Claude.Client.discover_permissions
(Claude.Client.create ~options ~sw ~process_mgr:env#process_mgr ()) in
+27 -1
claudeio/test/dune
···
(public_name hooks_example)
(name hooks_example)
(modules hooks_example)
-
(libraries claude eio_main cmdliner logs logs.fmt fmt.tty fmt.cli logs.cli))
+
(libraries claude eio_main cmdliner logs logs.fmt fmt.tty fmt.cli logs.cli))
+
+
(executable
+
(public_name dynamic_control_demo)
+
(name dynamic_control_demo)
+
(modules dynamic_control_demo)
+
(libraries claude eio_main cmdliner logs logs.fmt fmt.tty fmt.cli logs.cli))
+
+
(executable
+
(public_name advanced_config_demo)
+
(name advanced_config_demo)
+
(modules advanced_config_demo)
+
(libraries claude eio_main logs logs.fmt fmt.tty))
+
+
(executable
+
(public_name structured_output_demo)
+
(name structured_output_demo)
+
(modules structured_output_demo)
+
(flags (:standard -w -33))
+
(libraries claude eio_main logs logs.fmt fmt.tty))
+
+
(executable
+
(public_name structured_output_simple)
+
(name structured_output_simple)
+
(modules structured_output_simple)
+
(flags (:standard -w -33))
+
(libraries claude eio_main logs logs.fmt fmt.tty))
+1 -1
claudeio/test/hooks_example.ml
···
in
let options = Claude.Options.create
-
~model:"sonnet"
+
~model:(Claude.Model.of_string "sonnet")
~hooks
() in
+1 -1
claudeio/test/permission_demo.ml
···
(* DON'T specify allowed_tools - let the permission callback handle everything.
The Default permission mode with a callback should send requests for all tools. *)
let options = Claude.Options.create
-
~model:"sonnet"
+
~model:(Claude.Model.of_string "sonnet")
~permission_mode:Claude.Permissions.Mode.Default
~permission_callback:interactive_permission_callback
() in
+1 -1
claudeio/test/simple_permission_test.ml
···
(* Create options with permission callback *)
let options = Claude.Options.create
-
~model:"sonnet"
+
~model:(Claude.Model.of_string "sonnet")
~permission_callback:auto_allow_callback
() in
+1 -1
claudeio/test/test_permissions.ml
···
(* Create options with custom permission callback *)
let options = Claude.Options.create
-
~model:"sonnet"
+
~model:(Claude.Model.of_string "sonnet")
~permission_callback:auto_allow_callback
() in
+1
stack/.gitignore
···
+
_build
-9
stack/bushel/.gitignore
···
-
_build
-
.*.swp
-
**/.claude/settings.local.json
-
.photos-api
-
.karakeep-api
-
KARAKEEP.md
-
karakeep-src
-
.DS_Store
-
.openapi-key
-1
stack/bushel/.ocamlformat
···
-
profile=janestreet
-127
stack/bushel/bin/bushel_bibtex.ml
···
-
open Cmdliner
-
open Printf
-
-
(** TODO:claude Generate bibtex entry from paper data *)
-
let generate_bibtex_entry paper =
-
let open Bushel.Paper in
-
(* Use slug as the bibtex key/label *)
-
let bibkey = slug paper in
-
let bibtype = try bibtype paper with _ -> "misc" in
-
let title = try title paper with _ -> "Untitled" in
-
let authors =
-
let auth_list = try authors paper with _ -> [] in
-
String.concat " and " auth_list
-
in
-
let year = try year paper with _ -> 0 in
-
-
(* Build the bibtex entry *)
-
let buf = Buffer.create 1024 in
-
Buffer.add_string buf (sprintf "@%s{%s,\n" bibtype bibkey);
-
Buffer.add_string buf (sprintf " title = {%s},\n" title);
-
Buffer.add_string buf (sprintf " author = {%s},\n" authors);
-
Buffer.add_string buf (sprintf " year = {%d}" year);
-
-
(* Add optional fields *)
-
(match String.lowercase_ascii bibtype with
-
| "article" ->
-
(try
-
Buffer.add_string buf (sprintf ",\n journal = {%s}" (journal paper))
-
with _ -> ());
-
(match volume paper with
-
| Some v -> Buffer.add_string buf (sprintf ",\n volume = {%s}" v)
-
| None -> ());
-
(match issue paper with
-
| Some i -> Buffer.add_string buf (sprintf ",\n number = {%s}" i)
-
| None -> ());
-
(match pages paper with
-
| "" -> ()
-
| p -> Buffer.add_string buf (sprintf ",\n pages = {%s}" p))
-
| "inproceedings" ->
-
(try
-
Buffer.add_string buf (sprintf ",\n booktitle = {%s}" (booktitle paper))
-
with _ -> ());
-
(match pages paper with
-
| "" -> ()
-
| p -> Buffer.add_string buf (sprintf ",\n pages = {%s}" p));
-
(match publisher paper with
-
| "" -> ()
-
| p -> Buffer.add_string buf (sprintf ",\n publisher = {%s}" p))
-
| "techreport" ->
-
(try
-
Buffer.add_string buf (sprintf ",\n institution = {%s}" (institution paper))
-
with _ -> ());
-
(match number paper with
-
| Some n -> Buffer.add_string buf (sprintf ",\n number = {%s}" n)
-
| None -> ())
-
| "book" ->
-
(match publisher paper with
-
| "" -> ()
-
| p -> Buffer.add_string buf (sprintf ",\n publisher = {%s}" p));
-
(try
-
Buffer.add_string buf (sprintf ",\n isbn = {%s}" (isbn paper))
-
with _ -> ())
-
| _ -> ());
-
-
(* Add DOI if available *)
-
(match doi paper with
-
| Some d -> Buffer.add_string buf (sprintf ",\n doi = {%s}" d)
-
| None -> ());
-
-
(* Add URL if available *)
-
(match url paper with
-
| Some u -> Buffer.add_string buf (sprintf ",\n url = {%s}" u)
-
| None -> ());
-
-
Buffer.add_string buf "\n}\n";
-
Buffer.contents buf
-
-
(** TODO:claude Main function to export bibtex for all papers *)
-
let export_bibtex base_dir output_file latest_only _env _xdg _profile =
-
(* Load all papers *)
-
let bushel = Bushel.load base_dir in
-
let papers = Bushel.Entry.papers bushel in
-
-
(* Filter to only latest versions if requested *)
-
let papers =
-
if latest_only then
-
List.filter (fun p -> p.Bushel.Paper.latest) papers
-
else
-
papers
-
in
-
-
(* Sort papers by year (most recent first) *)
-
let papers = List.sort Bushel.Paper.compare papers in
-
-
(* Generate bibtex for each paper *)
-
let bibtex_entries = List.map generate_bibtex_entry papers in
-
let bibtex_content = String.concat "\n" bibtex_entries in
-
-
(* Output to file or stdout *)
-
match output_file with
-
| None ->
-
print_string bibtex_content;
-
0
-
| Some file ->
-
let oc = open_out file in
-
output_string oc bibtex_content;
-
close_out oc;
-
printf "Bibtex exported to %s (%d entries)\n" file (List.length papers);
-
0
-
-
(** TODO:claude Command line arguments *)
-
let output_file_arg =
-
let doc = "Output file for bibtex (defaults to stdout)" in
-
Arg.(value & opt (some string) None & info ["o"; "output"] ~docv:"FILE" ~doc)
-
-
let latest_only_arg =
-
let doc = "Export only the latest version of each paper" in
-
Arg.(value & flag & info ["latest"] ~doc)
-
-
(** TODO:claude Command term *)
-
let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
-
Term.(const export_bibtex $ Bushel_common.base_dir $ output_file_arg $ latest_only_arg)
-
-
let cmd =
-
let doc = "Export bibtex for all papers" in
-
let info = Cmd.info "bibtex" ~doc in
-
Cmd.v info term
-67
stack/bushel/bin/bushel_common.ml
···
-
open Cmdliner
-
-
(** TODO:claude Get default base directory from BUSHEL_DATA env variable or current directory *)
-
let get_default_base_dir () =
-
match Sys.getenv_opt "BUSHEL_DATA" with
-
| Some dir -> dir
-
| None -> "."
-
-
(** TODO:claude Optional base directory term with BUSHEL_DATA env variable support *)
-
let base_dir =
-
let doc = "Base directory containing Bushel data (defaults to BUSHEL_DATA env var or current directory)" in
-
Arg.(value & opt dir (get_default_base_dir ()) & info ["d"; "dir"] ~docv:"DIR" ~doc)
-
-
(** TODO:claude Output directory as option *)
-
let output_dir ~default =
-
let doc = "Output directory for generated files" in
-
Arg.(value & opt string default & info ["o"; "output"] ~docv:"DIR" ~doc)
-
-
(** TODO:claude URL term with custom default *)
-
let url_term ~default ~doc =
-
Arg.(value & opt string default & info ["u"; "url"] ~docv:"URL" ~doc)
-
-
(** TODO:claude API key file term *)
-
let api_key_file ~default =
-
let doc = "File containing API key" in
-
Arg.(value & opt string default & info ["k"; "key-file"] ~docv:"FILE" ~doc)
-
-
(** TODO:claude API key term *)
-
let api_key =
-
let doc = "API key for authentication" in
-
Arg.(value & opt (some string) None & info ["api-key"] ~docv:"KEY" ~doc)
-
-
(** TODO:claude Overwrite flag *)
-
let overwrite =
-
let doc = "Overwrite existing files" in
-
Arg.(value & flag & info ["overwrite"] ~doc)
-
-
(** TODO:claude Verbose flag *)
-
let verbose =
-
let doc = "Enable verbose output" in
-
Arg.(value & flag & info ["v"; "verbose"] ~doc)
-
-
(** TODO:claude File path term *)
-
let file_term ~default ~doc =
-
Arg.(value & opt string default & info ["f"; "file"] ~docv:"FILE" ~doc)
-
-
(** TODO:claude Channel/handle term *)
-
let channel ~default =
-
let doc = "Channel or handle name" in
-
Arg.(value & opt string default & info ["c"; "channel"] ~docv:"CHANNEL" ~doc)
-
-
(** TODO:claude Optional handle term *)
-
let handle_opt =
-
let doc = "Process specific handle" in
-
Arg.(value & opt (some string) None & info ["h"; "handle"] ~docv:"HANDLE" ~doc)
-
-
(** TODO:claude Tag term for filtering *)
-
let tag =
-
let doc = "Tag to filter or apply" in
-
Arg.(value & opt (some string) None & info ["t"; "tag"] ~docv:"TAG" ~doc)
-
-
(** TODO:claude Limit term *)
-
let limit =
-
let doc = "Limit number of items to process" in
-
Arg.(value & opt (some int) None & info ["l"; "limit"] ~docv:"N" ~doc)
-
-
(* Note: Logging setup is now handled by eiocmd for all bushel binaries *)
-295
stack/bushel/bin/bushel_doi.ml
···
-
module ZT = Zotero_translation
-
open Lwt.Infix
-
module J = Ezjsonm
-
open Cmdliner
-
-
(* Extract all DOIs from notes by scanning for doi.org URLs *)
-
let extract_dois_from_notes notes =
-
let doi_url_pattern = Re.Perl.compile_pat "https?://(?:dx\\.)?doi\\.org/([^)\\s\"'>]+)" in
-
let dois = ref [] in
-
List.iter (fun note ->
-
let body = Bushel.Note.body note in
-
let matches = Re.all doi_url_pattern body in
-
List.iter (fun group ->
-
try
-
let encoded_doi = Re.Group.get group 1 in
-
let doi = Uri.pct_decode encoded_doi in
-
if not (List.mem doi !dois) then
-
dois := doi :: !dois
-
with _ -> ()
-
) matches
-
) notes;
-
!dois
-
-
(* Extract publisher URLs from notes (Elsevier, ScienceDirect, IEEE, Nature, ACM, Sage, UPenn, Springer, Taylor & Francis, OUP) *)
-
let extract_publisher_urls_from_notes notes =
-
(* Matches publisher URLs: linkinghub.elsevier.com, sciencedirect.com/science/article, ieeexplore.ieee.org, academic.oup.com, nature.com, journals.sagepub.com, garfield.library.upenn.edu, link.springer.com, tandfonline.com/doi, and dl.acm.org/doi/10.* URLs *)
-
let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|(?:www\\.)?sciencedirect\\.com/science/article|ieeexplore\\.ieee\\.org|academic\\.oup\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in
-
let urls = ref [] in
-
List.iter (fun note ->
-
let body = Bushel.Note.body note in
-
let matches = Re.all publisher_pattern body in
-
List.iter (fun group ->
-
try
-
let url = Re.Group.get group 0 in
-
if not (List.mem url !urls) then
-
urls := url :: !urls
-
with _ -> ()
-
) matches
-
) notes;
-
!urls
-
-
(* Resolve a single DOI via Zotero and convert to doi_entry *)
-
let resolve_doi zt ~verbose doi =
-
Printf.printf "Resolving DOI: %s\n%!" doi;
-
let doi_url = Printf.sprintf "https://doi.org/%s" doi in
-
Lwt.catch
-
(fun () ->
-
ZT.json_of_doi zt ~slug:"temp" doi >>= fun json ->
-
if verbose then begin
-
Printf.printf " Raw Zotero response:\n%s\n%!" (J.value_to_string json)
-
end;
-
try
-
let keys = J.get_dict (json :> J.value) in
-
let title = J.find json ["title"] |> J.get_string in
-
let authors = J.find json ["author"] |> J.get_list J.get_string in
-
let year = J.find json ["year"] |> J.get_string |> int_of_string in
-
let bibtype = J.find json ["bibtype"] |> J.get_string in
-
let publisher =
-
try
-
(* Try journal first, then booktitle, then proceedingsTitle, then publisher *)
-
match List.assoc_opt "journal" keys with
-
| Some j -> J.get_string j
-
| None ->
-
match List.assoc_opt "booktitle" keys with
-
| Some b -> J.get_string b
-
| None ->
-
match List.assoc_opt "proceedingsTitle" keys with
-
| Some pt -> J.get_string pt
-
| None ->
-
match List.assoc_opt "publisher" keys with
-
| Some p -> J.get_string p
-
| None -> ""
-
with _ -> ""
-
in
-
let entry = Bushel.Doi_entry.create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ~source_urls:[doi_url] () in
-
Printf.printf " โœ“ Resolved: %s (%d)\n%!" title year;
-
Lwt.return entry
-
with e ->
-
Printf.eprintf " โœ— Failed to parse response for %s: %s\n%!" doi (Printexc.to_string e);
-
Lwt.return (Bushel.Doi_entry.create_failed ~doi ~error:(Printexc.to_string e) ~source_urls:[doi_url] ())
-
)
-
(fun exn ->
-
Printf.eprintf " โœ— Failed to resolve %s: %s\n%!" doi (Printexc.to_string exn);
-
Lwt.return (Bushel.Doi_entry.create_failed ~doi ~error:(Printexc.to_string exn) ~source_urls:[doi_url] ())
-
)
-
-
(* Resolve a publisher URL via Zotero /web endpoint *)
-
let resolve_url zt ~verbose url =
-
Printf.printf "Resolving URL: %s\n%!" url;
-
Lwt.catch
-
(fun () ->
-
(* Use Zotero's resolve_url which calls /web endpoint with the URL directly *)
-
ZT.resolve_url zt url >>= function
-
| Error (`Msg err) ->
-
Printf.eprintf " โœ— Failed to resolve URL: %s\n%!" err;
-
Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:err ~source_urls:[url] ())
-
| Ok json ->
-
if verbose then begin
-
Printf.printf " Raw Zotero response:\n%s\n%!" (J.value_to_string json)
-
end;
-
try
-
(* Extract metadata from the JSON response *)
-
let json_list = match json with
-
| `A lst -> lst
-
| single -> [single]
-
in
-
match json_list with
-
| [] ->
-
Printf.eprintf " โœ— Empty response\n%!";
-
Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:"Empty response" ~source_urls:[url] ())
-
| item :: _ ->
-
(* Extract DOI if present, otherwise use URL *)
-
let doi = try J.find item ["DOI"] |> J.get_string with _ ->
-
try J.find item ["doi"] |> J.get_string with _ -> url
-
in
-
let title = try J.find item ["title"] |> J.get_string with _ ->
-
"Unknown Title"
-
in
-
(* Extract authors from Zotero's "creators" field *)
-
let authors = try
-
J.find item ["creators"] |> J.get_list (fun creator_obj ->
-
try
-
let last_name = J.find creator_obj ["lastName"] |> J.get_string in
-
let first_name = try J.find creator_obj ["firstName"] |> J.get_string with _ -> "" in
-
if first_name = "" then last_name else first_name ^ " " ^ last_name
-
with _ -> "Unknown Author"
-
)
-
with _ -> []
-
in
-
(* Extract year from Zotero's "date" field *)
-
(* Handles both ISO format "2025-07" and text format "November 28, 2023" *)
-
let year = try
-
let date_str = J.find item ["date"] |> J.get_string in
-
(* First try splitting on '-' for ISO dates like "2025-07" or "2024-11-04" *)
-
let parts = String.split_on_char '-' date_str in
-
match parts with
-
| year_str :: _ when String.length year_str = 4 ->
-
(try int_of_string year_str with _ -> 0)
-
| _ ->
-
(* Try splitting on space and comma for dates like "November 28, 2023" *)
-
let space_parts = String.split_on_char ' ' date_str in
-
let year_candidate = List.find_opt (fun s ->
-
let s = String.trim (String.map (fun c -> if c = ',' then ' ' else c) s) in
-
String.length s = 4 && String.for_all (function '0'..'9' -> true | _ -> false) s
-
) space_parts in
-
(match year_candidate with
-
| Some year_str -> int_of_string (String.trim year_str)
-
| None -> 0)
-
with _ -> 0
-
in
-
(* Extract type/bibtype from Zotero's "itemType" field *)
-
let bibtype = try J.find item ["itemType"] |> J.get_string with _ -> "article" in
-
(* Extract publisher/journal from Zotero's "publicationTitle" or "proceedingsTitle" field *)
-
let publisher = try
-
J.find item ["publicationTitle"] |> J.get_string
-
with _ ->
-
try J.find item ["proceedingsTitle"] |> J.get_string
-
with _ -> ""
-
in
-
(* Include both the original URL and the DOI URL in source_urls *)
-
let doi_url = if doi = url then [] else [Printf.sprintf "https://doi.org/%s" doi] in
-
let source_urls = url :: doi_url in
-
let entry = Bushel.Doi_entry.create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ~source_urls () in
-
Printf.printf " โœ“ Resolved: %s (%d) [DOI: %s]\n%!" title year doi;
-
Lwt.return entry
-
with e ->
-
Printf.eprintf " โœ— Failed to parse response: %s\n%!" (Printexc.to_string e);
-
Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:(Printexc.to_string e) ~source_urls:[url] ())
-
)
-
(fun exn ->
-
Printf.eprintf " โœ— Failed to resolve %s: %s\n%!" url (Printexc.to_string exn);
-
Lwt.return (Bushel.Doi_entry.create_failed ~doi:url ~error:(Printexc.to_string exn) ~source_urls:[url] ())
-
)
-
-
let run base_dir force verbose =
-
Printf.printf "Loading bushel database...\n%!";
-
let entries = Bushel.load base_dir in
-
let notes = Bushel.Entry.notes entries in
-
-
Printf.printf "Scanning %d notes for DOI URLs...\n%!" (List.length notes);
-
let found_dois = extract_dois_from_notes notes in
-
Printf.printf "Found %d unique DOIs\n%!" (List.length found_dois);
-
-
Printf.printf "Scanning %d notes for publisher URLs...\n%!" (List.length notes);
-
let found_urls = extract_publisher_urls_from_notes notes in
-
Printf.printf "Found %d unique publisher URLs\n%!" (List.length found_urls);
-
-
let data_dir = Bushel.Entry.data_dir entries in
-
let doi_yml_path = Filename.concat data_dir "doi.yml" in
-
Printf.printf "Loading existing DOI cache from %s...\n%!" doi_yml_path;
-
let existing_entries = Bushel.Doi_entry.load doi_yml_path in
-
Printf.printf "Loaded %d cached DOI entries\n%!" (List.length existing_entries);
-
-
(* Filter DOIs that need resolution *)
-
let dois_to_resolve =
-
List.filter (fun doi ->
-
match Bushel.Doi_entry.find_by_doi_including_ignored existing_entries doi with
-
| Some _ when not force ->
-
Printf.printf "Skipping DOI %s (already cached)\n%!" doi;
-
false
-
| Some _ when force ->
-
Printf.printf "Re-resolving DOI %s (--force)\n%!" doi;
-
true
-
| Some _ -> false (* Catch-all for Some case *)
-
| None -> true
-
) found_dois
-
in
-
-
(* Filter URLs that need resolution *)
-
let urls_to_resolve =
-
List.filter (fun url ->
-
match Bushel.Doi_entry.find_by_url_including_ignored existing_entries url with
-
| Some _ when not force ->
-
Printf.printf "Skipping URL %s (already cached)\n%!" url;
-
false
-
| Some _ when force ->
-
Printf.printf "Re-resolving URL %s (--force)\n%!" url;
-
true
-
| Some _ -> false (* Catch-all for Some case *)
-
| None -> true
-
) found_urls
-
in
-
-
if List.length dois_to_resolve = 0 && List.length urls_to_resolve = 0 then begin
-
Printf.printf "No DOIs or URLs to resolve!\n%!";
-
0
-
end else begin
-
Printf.printf "Resolving %d DOI(s) and %d URL(s)...\n%!" (List.length dois_to_resolve) (List.length urls_to_resolve);
-
-
let zt = ZT.v "http://svr-avsm2-eeg-ce:1969" in
-
-
(* Resolve all DOIs *)
-
let resolved_doi_entries_lwt =
-
Lwt_list.map_s (resolve_doi zt ~verbose) dois_to_resolve
-
in
-
-
(* Resolve all publisher URLs *)
-
let resolved_url_entries_lwt =
-
Lwt_list.map_s (resolve_url zt ~verbose) urls_to_resolve
-
in
-
-
let new_doi_entries = Lwt_main.run resolved_doi_entries_lwt in
-
let new_url_entries = Lwt_main.run resolved_url_entries_lwt in
-
let new_entries = new_doi_entries @ new_url_entries in
-
-
(* Merge with existing entries, combining source_urls for entries with the same DOI *)
-
let all_entries =
-
if force then
-
(* Replace existing entries with new ones - match by DOI *)
-
let is_updated entry =
-
List.exists (fun new_e ->
-
new_e.Bushel.Doi_entry.doi = entry.Bushel.Doi_entry.doi
-
) new_entries
-
in
-
let kept_existing = List.filter (fun e -> not (is_updated e)) existing_entries in
-
kept_existing @ new_entries
-
else
-
(* Merge new entries with existing ones, combining source_urls *)
-
let merged = ref existing_entries in
-
List.iter (fun new_entry ->
-
match Bushel.Doi_entry.find_by_doi_including_ignored !merged new_entry.Bushel.Doi_entry.doi with
-
| Some existing_entry ->
-
(* DOI already exists - merge the entries by combining source_urls and preserving ignore flag *)
-
let combined = Bushel.Doi_entry.merge_entries existing_entry new_entry in
-
merged := combined :: (List.filter (fun e -> e.Bushel.Doi_entry.doi <> new_entry.Bushel.Doi_entry.doi) !merged)
-
| None ->
-
(* New DOI - add it *)
-
merged := new_entry :: !merged
-
) new_entries;
-
!merged
-
in
-
-
(* Save updated cache *)
-
Printf.printf "Saving %d total entries to %s...\n%!" (List.length all_entries) doi_yml_path;
-
Bushel.Doi_entry.save doi_yml_path all_entries;
-
-
Printf.printf "Done!\n%!";
-
0
-
end
-
-
let force_flag =
-
let doc = "Force re-resolution of already cached DOIs" in
-
Arg.(value & flag & info ["force"; "f"] ~doc)
-
-
let verbose_flag =
-
let doc = "Show raw Zotero API responses for debugging" in
-
Arg.(value & flag & info ["verbose"; "v"] ~doc)
-
-
let term =
-
Term.(const run $ Bushel_common.base_dir $ force_flag $ verbose_flag)
-
-
let cmd =
-
let doc = "Resolve DOIs found in notes via Zotero Translation Server" in
-
let info = Cmd.info "doi-resolve" ~doc in
-
Cmd.v info term
-182
stack/bushel/bin/bushel_faces.ml
···
-
open Cmdliner
-
open Lwt.Infix
-
open Printf
-
-
(* Type for person response *)
-
type person = {
-
id: string;
-
name: string;
-
thumbnailPath: string option;
-
}
-
-
(* Parse a person from JSON *)
-
let parse_person json =
-
let open Ezjsonm in
-
let id = find json ["id"] |> get_string in
-
let name = find json ["name"] |> get_string in
-
let thumbnailPath =
-
try Some (find json ["thumbnailPath"] |> get_string)
-
with _ -> None
-
in
-
{ id; name; thumbnailPath }
-
-
(* Parse a list of people from JSON response *)
-
let parse_people_response json =
-
let open Ezjsonm in
-
get_list parse_person json
-
-
(* Read API key from file *)
-
let read_api_key file =
-
let ic = open_in file in
-
let key = input_line ic in
-
close_in ic;
-
key
-
-
(* Search for a person by name *)
-
let search_person base_url api_key name =
-
let open Cohttp_lwt_unix in
-
let headers = Cohttp.Header.init_with "X-Api-Key" api_key in
-
let encoded_name = Uri.pct_encode name in
-
let url = Printf.sprintf "%s/api/search/person?name=%s" base_url encoded_name in
-
-
Client.get ~headers (Uri.of_string url) >>= fun (resp, body) ->
-
if resp.status = `OK then
-
Cohttp_lwt.Body.to_string body >>= fun body_str ->
-
let json = Ezjsonm.from_string body_str in
-
Lwt.return (parse_people_response json)
-
else
-
let status_code = Cohttp.Code.code_of_status resp.status in
-
Lwt.fail_with (Printf.sprintf "HTTP error: %d" status_code)
-
-
(* Download thumbnail for a person *)
-
let download_thumbnail base_url api_key person_id output_path =
-
let open Cohttp_lwt_unix in
-
let headers = Cohttp.Header.init_with "X-Api-Key" api_key in
-
let url = Printf.sprintf "%s/api/people/%s/thumbnail" base_url person_id in
-
-
Client.get ~headers (Uri.of_string url) >>= fun (resp, body) ->
-
match resp.status with
-
| `OK ->
-
Cohttp_lwt.Body.to_string body >>= fun img_data ->
-
(* Ensure output directory exists *)
-
(try
-
let dir = Filename.dirname output_path in
-
if not (Sys.file_exists dir) then Unix.mkdir dir 0o755;
-
Lwt.return_unit
-
with _ -> Lwt.return_unit) >>= fun () ->
-
Lwt_io.with_file ~mode:Lwt_io.output output_path
-
(fun oc -> Lwt_io.write oc img_data) >>= fun () ->
-
Lwt.return_ok output_path
-
| _ ->
-
let status_code = Cohttp.Code.code_of_status resp.status in
-
Lwt.return_error (Printf.sprintf "HTTP error: %d" status_code)
-
-
(* Get face for a single contact *)
-
(* TODO:claude *)
-
let get_face_for_contact base_url api_key output_dir contact =
-
let names = Bushel.Contact.names contact in
-
let handle = Bushel.Contact.handle contact in
-
let output_path = Filename.concat output_dir (handle ^ ".jpg") in
-
-
(* Skip if file already exists *)
-
if Sys.file_exists output_path then
-
Lwt.return (`Skipped (sprintf "Thumbnail for '%s' already exists at %s" (List.hd names) output_path))
-
else begin
-
printf "Processing contact: %s (handle: %s)\n%!" (List.hd names) handle;
-
-
(* Try each name in the list until we find a match *)
-
let rec try_names = function
-
| [] ->
-
Lwt.return (`Error (sprintf "No person found with any name for contact '%s'" handle))
-
| name :: rest_names ->
-
printf " Trying name: %s\n%!" name;
-
search_person base_url api_key name >>= function
-
| [] ->
-
printf " No results for '%s', trying next name...\n%!" name;
-
try_names rest_names
-
| person :: _ ->
-
printf " Found match for '%s'\n%!" name;
-
download_thumbnail base_url api_key person.id output_path >>= function
-
| Ok path ->
-
Lwt.return (`Ok (sprintf "Saved thumbnail for '%s' to %s" name path))
-
| Error err ->
-
Lwt.return (`Error (sprintf "Error for '%s': %s" name err))
-
in
-
try_names names
-
end
-
-
(* Process all contacts or a specific one *)
-
let process_contacts base_dir output_dir specific_handle api_key base_url =
-
printf "Loading Bushel database from %s\n%!" base_dir;
-
let db = Bushel.load base_dir in
-
let contacts = Bushel.Entry.contacts db in
-
printf "Found %d contacts\n%!" (List.length contacts);
-
-
(* Ensure output directory exists *)
-
if not (Sys.file_exists output_dir) then Unix.mkdir output_dir 0o755;
-
-
(* Filter contacts based on specific_handle if provided *)
-
let contacts_to_process =
-
match specific_handle with
-
| Some handle ->
-
begin match Bushel.Contact.find_by_handle contacts handle with
-
| Some contact -> [contact]
-
| None ->
-
eprintf "No contact found with handle '%s'\n%!" handle;
-
[]
-
end
-
| None -> contacts
-
in
-
-
(* Process each contact *)
-
let results = Lwt_main.run begin
-
Lwt_list.map_s
-
(fun contact ->
-
get_face_for_contact base_url api_key output_dir contact >>= fun result ->
-
Lwt.return (Bushel.Contact.handle contact, result))
-
contacts_to_process
-
end in
-
-
(* Print summary *)
-
let ok_count = List.length (List.filter (fun (_, r) -> match r with `Ok _ -> true | _ -> false) results) in
-
let error_count = List.length (List.filter (fun (_, r) -> match r with `Error _ -> true | _ -> false) results) in
-
let skipped_count = List.length (List.filter (fun (_, r) -> match r with `Skipped _ -> true | _ -> false) results) in
-
-
printf "\nSummary:\n";
-
printf " Successfully processed: %d\n" ok_count;
-
printf " Errors: %d\n" error_count;
-
printf " Skipped (already exist): %d\n" skipped_count;
-
-
(* Print detailed results *)
-
if error_count > 0 then begin
-
printf "\nError details:\n";
-
List.iter (fun (handle, result) ->
-
match result with
-
| `Error msg -> printf " %s: %s\n" handle msg
-
| _ -> ())
-
results;
-
end;
-
-
if ok_count > 0 || skipped_count > 0 then 0 else 1
-
-
(* Command line interface *)
-
-
(* Export the term for use in main bushel.ml *)
-
let term =
-
Term.(
-
const (fun base_dir output_dir handle api_key_file base_url ->
-
try
-
let api_key = read_api_key api_key_file in
-
process_contacts base_dir output_dir handle api_key base_url
-
with e ->
-
eprintf "Error: %s\n%!" (Printexc.to_string e);
-
1
-
) $ Bushel_common.base_dir $ Bushel_common.output_dir ~default:"." $ Bushel_common.handle_opt $
-
Bushel_common.api_key_file ~default:".photos-api" $
-
Bushel_common.url_term ~default:"https://photos.recoil.org" ~doc:"Base URL of the Immich instance")
-
-
let cmd =
-
let info = Cmd.info "faces" ~doc:"Retrieve face thumbnails for Bushel contacts from Immich" in
-
Cmd.v info term
-
-
(* Main entry point removed - accessed through bushel_main.ml *)
-77
stack/bushel/bin/bushel_ideas.ml
···
-
open Cmdliner
-
-
(** TODO:claude List completed ideas as markdown bullet list *)
-
let list_ideas_md base_dir =
-
let ideas_dir = Printf.sprintf "%s/ideas" base_dir in
-
let contacts_dir = Printf.sprintf "%s/contacts" base_dir in
-
-
if not (Sys.file_exists ideas_dir) then (
-
Printf.eprintf "Ideas directory not found: %s\n" ideas_dir;
-
1
-
) else (
-
(* Load all contacts *)
-
let contacts =
-
if Sys.file_exists contacts_dir then
-
Sys.readdir contacts_dir
-
|> Array.to_list
-
|> List.filter (String.ends_with ~suffix:".md")
-
|> List.filter_map (fun contact_file ->
-
let filepath = Filename.concat contacts_dir contact_file in
-
try Some (Bushel.Contact.of_md filepath)
-
with e ->
-
Printf.eprintf "Error loading contact %s: %s\n" filepath (Printexc.to_string e);
-
None
-
)
-
else []
-
in
-
-
let idea_files = Sys.readdir ideas_dir
-
|> Array.to_list
-
|> List.filter (String.ends_with ~suffix:".md") in
-
let ideas = List.filter_map (fun idea_file ->
-
let filepath = Filename.concat ideas_dir idea_file in
-
try
-
let idea = Bushel.Idea.of_md filepath in
-
match Bushel.Idea.status idea with
-
| Bushel.Idea.Completed -> Some idea
-
| _ -> None
-
with e ->
-
Printf.eprintf "Error processing %s: %s\n" filepath (Printexc.to_string e);
-
None
-
) idea_files in
-
-
(* Sort by year descending *)
-
let sorted_ideas = List.sort (fun a b ->
-
compare (Bushel.Idea.year b) (Bushel.Idea.year a)
-
) ideas in
-
-
(* Output as markdown bullet list *)
-
List.iter (fun idea ->
-
let student_names =
-
Bushel.Idea.students idea
-
|> List.filter_map (fun handle ->
-
match Bushel.Contact.find_by_handle contacts handle with
-
| Some contact -> Some (Bushel.Contact.name contact)
-
| None ->
-
Printf.eprintf "Warning: contact not found for handle %s\n" handle;
-
Some handle
-
)
-
|> String.concat ", "
-
in
-
let level_str = Bushel.Idea.level_to_string (Bushel.Idea.level idea) in
-
Printf.printf "- %d: \"%s\", %s (%s)\n"
-
(Bushel.Idea.year idea)
-
(Bushel.Idea.title idea)
-
student_names
-
level_str
-
) sorted_ideas;
-
0
-
)
-
-
let term =
-
Term.(const list_ideas_md $ Bushel_common.base_dir)
-
-
let cmd =
-
let doc = "List completed ideas as markdown bullet list" in
-
let info = Cmd.info "ideas-md" ~doc in
-
Cmd.v info term
-227
stack/bushel/bin/bushel_info.ml
···
-
open Cmdliner
-
open Bushel
-
-
(** Determine the color for a note based on DOI and perma status *)
-
let note_color n =
-
match Note.doi n, Note.perma n with
-
| None, false -> `Red (* No DOI, no perma - red (normal note) *)
-
| None, true -> `Magenta (* Has perma but no DOI - magenta (needs DOI assignment) *)
-
| Some _, true -> `Green (* Has DOI with perma:true - green (correct state) *)
-
| Some _, false -> `Yellow (* Has DOI without perma:true - yellow (bug in metadata) *)
-
-
(** TODO:claude List all slugs with their types *)
-
let list_all_slugs entries ~notes_only =
-
let all = Entry.all_entries entries in
-
(* Filter for notes only if requested *)
-
let filtered = if notes_only then
-
List.filter (fun entry -> match entry with `Note _ -> true | _ -> false) all
-
else all in
-
(* Sort by slug for consistent output *)
-
let sorted = List.sort (fun a b ->
-
String.compare (Entry.slug a) (Entry.slug b)
-
) filtered in
-
Fmt.pr "@[<v>";
-
Fmt.pr "%a@," (Fmt.styled `Bold Fmt.string) (if notes_only then "Available notes:" else "Available entries:");
-
Fmt.pr "@,";
-
List.iter (fun entry ->
-
let slug = Entry.slug entry in
-
let type_str = Entry.to_type_string entry in
-
let title = Entry.title entry in
-
(* Color code notes based on DOI/perma status *)
-
match entry with
-
| `Note n ->
-
let color = note_color n in
-
Fmt.pr " %a %a - %a@,"
-
(Fmt.styled color Fmt.string) slug
-
(Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str)
-
Fmt.string title
-
| _ ->
-
Fmt.pr " %a %a - %a@,"
-
(Fmt.styled `Cyan Fmt.string) slug
-
(Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str)
-
Fmt.string title
-
) sorted;
-
Fmt.pr "@]@.";
-
0
-
-
(** TODO:claude Main info command implementation *)
-
let info_cmd base_dir notes_only slug_opt _env _xdg _profile =
-
let entries = load base_dir in
-
match slug_opt with
-
| None ->
-
list_all_slugs entries ~notes_only
-
| Some slug ->
-
(* Handle contact handles starting with @ *)
-
if String.starts_with ~prefix:"@" slug then
-
let handle = String.sub slug 1 (String.length slug - 1) in
-
match Contact.find_by_handle (Entry.contacts entries) handle with
-
| None ->
-
Fmt.epr "Error: No contact found with handle '@%s'@." handle;
-
1
-
| Some contact ->
-
Contact.pp Fmt.stdout contact;
-
(* Add thumbnail information for contact *)
-
(match Entry.contact_thumbnail_slug contact with
-
| Some thumb_slug ->
-
Fmt.pr "@.@.";
-
Fmt.pr "@[<v>%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail Slug" thumb_slug;
-
(* Look up the image in srcsetter *)
-
(match Entry.lookup_image entries thumb_slug with
-
| Some img ->
-
let thumbnail_url = Entry.smallest_webp_variant img in
-
Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail URL" thumbnail_url;
-
Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Origin" (Srcsetter.origin img);
-
let (w, h) = Srcsetter.dims img in
-
Fmt.pr "%a: %dx%d@," (Fmt.styled `Bold Fmt.string) "Dimensions" w h;
-
let variants = Srcsetter.variants img in
-
if not (Srcsetter.MS.is_empty variants) then begin
-
Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Variants";
-
Srcsetter.MS.iter (fun name (vw, vh) ->
-
Fmt.pr " %s: %dx%d@," name vw vh
-
) variants
-
end;
-
Fmt.pr "@]"
-
| None ->
-
Fmt.epr "Warning: Contact thumbnail image not in srcsetter: %s@." thumb_slug;
-
Fmt.pr "@]";
-
())
-
| None -> ());
-
(* Add Typesense JSON *)
-
let doc = Typesense.contact_to_document contact in
-
Fmt.pr "@.@.";
-
Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Typesense Document";
-
Fmt.pr "%s@," (Ezjsonm.value_to_string ~minify:false doc);
-
(* Add backlinks information for contact *)
-
let backlinks = Bushel.Link_graph.get_backlinks_for_slug handle in
-
if backlinks <> [] then begin
-
Fmt.pr "@.@.";
-
Fmt.pr "%a (%d):@," (Fmt.styled `Bold Fmt.string) "Backlinks" (List.length backlinks);
-
List.iter (fun source_slug ->
-
match Entry.lookup entries source_slug with
-
| Some source_entry ->
-
let source_type = Entry.to_type_string source_entry in
-
let source_title = Entry.title source_entry in
-
Fmt.pr " %a %a - %a@,"
-
(Fmt.styled `Cyan Fmt.string) source_slug
-
(Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" source_type)
-
Fmt.string source_title
-
| None ->
-
Fmt.pr " %a %a@,"
-
(Fmt.styled `Cyan Fmt.string) source_slug
-
(Fmt.styled `Red Fmt.string) "(not found)"
-
) backlinks
-
end;
-
Fmt.pr "@.";
-
0
-
else
-
(* Remove leading ':' if present, as slugs are stored without it *)
-
let normalized_slug =
-
if String.starts_with ~prefix:":" slug
-
then String.sub slug 1 (String.length slug - 1)
-
else slug
-
in
-
match Entry.lookup entries normalized_slug with
-
| None ->
-
Fmt.epr "Error: No entry found with slug '%s'@." slug;
-
1
-
| Some entry ->
-
(match entry with
-
| `Paper p -> Paper.pp Fmt.stdout p
-
| `Project p -> Project.pp Fmt.stdout p
-
| `Idea i -> Idea.pp Fmt.stdout i
-
| `Video v -> Video.pp Fmt.stdout v
-
| `Note n -> Note.pp Fmt.stdout n);
-
(* Add thumbnail information if available *)
-
(match Entry.thumbnail_slug entries entry with
-
| Some thumb_slug ->
-
Fmt.pr "@.@.";
-
Fmt.pr "@[<v>%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail Slug" thumb_slug;
-
(* Look up the image in srcsetter *)
-
(match Entry.lookup_image entries thumb_slug with
-
| Some img ->
-
let thumbnail_url = Entry.smallest_webp_variant img in
-
Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Thumbnail URL" thumbnail_url;
-
Fmt.pr "%a: %s@," (Fmt.styled `Bold Fmt.string) "Origin" (Srcsetter.origin img);
-
let (w, h) = Srcsetter.dims img in
-
Fmt.pr "%a: %dx%d@," (Fmt.styled `Bold Fmt.string) "Dimensions" w h;
-
let variants = Srcsetter.variants img in
-
if not (Srcsetter.MS.is_empty variants) then begin
-
Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Variants";
-
Srcsetter.MS.iter (fun name (vw, vh) ->
-
Fmt.pr " %s: %dx%d@," name vw vh
-
) variants
-
end;
-
Fmt.pr "@]"
-
| None ->
-
Fmt.epr "Warning: Thumbnail image not in srcsetter: %s@." thumb_slug;
-
Fmt.pr "@]";
-
())
-
| None -> ());
-
(* Add Typesense JSON *)
-
let doc = match entry with
-
| `Paper p -> Typesense.paper_to_document entries p
-
| `Project p -> Typesense.project_to_document entries p
-
| `Idea i -> Typesense.idea_to_document entries i
-
| `Video v -> Typesense.video_to_document entries v
-
| `Note n -> Typesense.note_to_document entries n
-
in
-
Fmt.pr "@.@.";
-
Fmt.pr "%a:@," (Fmt.styled `Bold Fmt.string) "Typesense Document";
-
Fmt.pr "%s@," (Ezjsonm.value_to_string ~minify:false doc);
-
(* Add backlinks information *)
-
let backlinks = Bushel.Link_graph.get_backlinks_for_slug normalized_slug in
-
if backlinks <> [] then begin
-
Fmt.pr "@.@.";
-
Fmt.pr "%a (%d):@," (Fmt.styled `Bold Fmt.string) "Backlinks" (List.length backlinks);
-
List.iter (fun source_slug ->
-
match Entry.lookup entries source_slug with
-
| Some source_entry ->
-
let source_type = Entry.to_type_string source_entry in
-
let source_title = Entry.title source_entry in
-
Fmt.pr " %a %a - %a@,"
-
(Fmt.styled `Cyan Fmt.string) source_slug
-
(Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" source_type)
-
Fmt.string source_title
-
| None ->
-
Fmt.pr " %a %a@,"
-
(Fmt.styled `Cyan Fmt.string) source_slug
-
(Fmt.styled `Red Fmt.string) "(not found)"
-
) backlinks
-
end;
-
(* Add references information for notes *)
-
(match entry with
-
| `Note n ->
-
let default_author = match Contact.find_by_handle (Entry.contacts entries) "avsm" with
-
| Some c -> c
-
| None -> failwith "Default author 'avsm' not found"
-
in
-
let references = Md.note_references entries default_author n in
-
if references <> [] then begin
-
Fmt.pr "@.@.";
-
Fmt.pr "%a (%d):@," (Fmt.styled `Bold Fmt.string) "References" (List.length references);
-
List.iter (fun (doi, citation, _is_paper) ->
-
Fmt.pr " %a: %s@,"
-
(Fmt.styled `Cyan Fmt.string) doi
-
citation
-
) references
-
end
-
| _ -> ());
-
Fmt.pr "@.";
-
0
-
-
(** TODO:claude Command line interface definition *)
-
let notes_only_flag =
-
let doc = "Show only notes when listing entries" in
-
Arg.(value & flag & info ["notes-only"; "n"] ~doc)
-
-
let slug_arg =
-
let doc = "The slug of the entry to display (with or without leading ':'), or contact handle (with '@' prefix). If not provided, lists all available slugs." in
-
Arg.(value & pos 0 (some string) None & info [] ~docv:"SLUG" ~doc)
-
-
let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
-
Term.(const info_cmd $ Bushel_common.base_dir $ notes_only_flag $ slug_arg)
-
-
let cmd =
-
let doc = "Display all information for a given slug" in
-
let info = Cmd.info "info" ~doc in
-
Cmd.v info term
-549
stack/bushel/bin/bushel_links.ml
···
-
open Cmdliner
-
open Lwt.Infix
-
-
(* Helper function for logging with proper flushing *)
-
let log fmt = Fmt.kstr (fun s -> prerr_string s; flush stderr) fmt
-
let log_verbose verbose fmt =
-
if verbose then Fmt.kstr (fun s -> prerr_string s; flush stderr) fmt
-
else Fmt.kstr (fun _ -> ()) fmt
-
-
(* Initialize a new links.yml file or ensure it exists *)
-
let init_links_file links_file =
-
if Sys.file_exists links_file then
-
print_endline (Fmt.str "Links file %s already exists" links_file)
-
else begin
-
(* Create an empty links file *)
-
Bushel.Link.save_links_file links_file [];
-
print_endline (Fmt.str "Created empty links file: %s" links_file)
-
end;
-
0
-
-
(* Update links.yml from Karakeep *)
-
let update_from_karakeep base_url api_key_opt tag links_file download_assets =
-
match api_key_opt with
-
| None ->
-
prerr_endline "Error: API key is required.";
-
prerr_endline "Please provide one with --api-key or create a ~/.karakeep-api file.";
-
1
-
| Some api_key ->
-
let assets_dir = "data/assets" in
-
-
(* Run the Lwt program *)
-
Lwt_main.run (
-
print_endline (Fmt.str "Fetching links from %s with tag '%s'..." base_url tag);
-
-
(* Prepare tag filter *)
-
let filter_tags = if tag = "" then [] else [tag] in
-
-
(* Fetch bookmarks from Karakeep with error handling *)
-
Lwt.catch
-
(fun () ->
-
Karakeep.fetch_all_bookmarks ~api_key ~filter_tags base_url >>= fun bookmarks ->
-
-
print_endline (Fmt.str "Retrieved %d bookmarks from Karakeep" (List.length bookmarks));
-
-
(* Read existing links if file exists *)
-
let existing_links = Bushel.Link.load_links_file links_file in
-
-
(* Convert bookmarks to bushel links *)
-
let new_links = List.map (fun bookmark ->
-
Karakeep.to_bushel_link ~base_url bookmark
-
) bookmarks in
-
-
(* Merge with existing links - keep existing dates (karakeep dates may be unreliable) *)
-
let merged_links = Bushel.Link.merge_links existing_links new_links in
-
-
(* Save the updated links file *)
-
Bushel.Link.save_links_file links_file merged_links;
-
-
print_endline (Fmt.str "Updated %s with %d links" links_file (List.length merged_links));
-
-
(* Download assets if requested *)
-
if download_assets then begin
-
print_endline "Downloading assets for bookmarks...";
-
-
(* Ensure the assets directory exists *)
-
(try Unix.mkdir assets_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
-
-
(* Process each bookmark with assets *)
-
Lwt_list.iter_s (fun bookmark ->
-
(* Extract asset IDs from bookmark *)
-
let assets = bookmark.Karakeep.assets in
-
-
(* Skip if no assets *)
-
if assets = [] then
-
Lwt.return_unit
-
else
-
(* Process each asset *)
-
Lwt_list.iter_s (fun (asset_id, asset_type) ->
-
let asset_dir = Fmt.str "%s/%s" assets_dir asset_id in
-
let asset_file = Fmt.str "%s/asset.bin" asset_dir in
-
let meta_file = Fmt.str "%s/metadata.json" asset_dir in
-
-
(* Skip if the asset already exists *)
-
if Sys.file_exists asset_file then
-
Lwt.return_unit
-
else begin
-
(* Create the asset directory *)
-
(try Unix.mkdir asset_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
-
-
(* Download the asset *)
-
print_endline (Fmt.str "Downloading %s asset %s..." asset_type asset_id);
-
Karakeep.fetch_asset ~api_key base_url asset_id >>= fun data ->
-
-
(* Guess content type based on first bytes *)
-
let content_type =
-
if String.length data >= 4 && String.sub data 0 4 = "\x89PNG" then
-
"image/png"
-
else if String.length data >= 3 && String.sub data 0 3 = "\xFF\xD8\xFF" then
-
"image/jpeg"
-
else if String.length data >= 4 && String.sub data 0 4 = "%PDF" then
-
"application/pdf"
-
else
-
"application/octet-stream"
-
in
-
-
(* Write the asset data *)
-
Lwt_io.with_file ~mode:Lwt_io.Output asset_file (fun oc ->
-
Lwt_io.write oc data
-
) >>= fun () ->
-
-
(* Write metadata file *)
-
let metadata = Fmt.str "{\n \"contentType\": \"%s\",\n \"assetType\": \"%s\"\n}"
-
content_type asset_type in
-
Lwt_io.with_file ~mode:Lwt_io.Output meta_file (fun oc ->
-
Lwt_io.write oc metadata
-
)
-
end
-
) assets
-
) bookmarks >>= fun () ->
-
-
print_endline "Asset download completed.";
-
Lwt.return 0
-
end else
-
Lwt.return 0
-
)
-
(fun exn ->
-
prerr_endline (Fmt.str "Error fetching bookmarks: %s" (Printexc.to_string exn));
-
Lwt.return 1
-
)
-
)
-
-
(* Extract outgoing links from Bushel entries *)
-
let update_from_bushel bushel_dir links_file include_domains exclude_domains =
-
(* Parse domain filters if provided *)
-
let include_domains_list = match include_domains with
-
| None -> []
-
| Some s -> String.split_on_char ',' s |> List.map String.trim
-
in
-
-
let exclude_domains_list = match exclude_domains with
-
| None -> []
-
| Some s -> String.split_on_char ',' s |> List.map String.trim
-
in
-
-
(* Show filter settings if any *)
-
if include_domains_list <> [] then
-
print_endline (Fmt.str "Including only domains: %s" (String.concat ", " include_domains_list));
-
-
if exclude_domains_list <> [] then
-
print_endline (Fmt.str "Excluding domains: %s" (String.concat ", " exclude_domains_list));
-
-
(* Load all entries from the bushel directory *)
-
let notes_dir = Filename.concat bushel_dir "data/notes" in
-
-
(* Make sure the notes directory exists *)
-
if not (Sys.file_exists notes_dir) then begin
-
prerr_endline (Fmt.str "Error: Notes directory %s does not exist" notes_dir);
-
exit 1
-
end;
-
-
(* Load all entries with fallback *)
-
print_endline (Fmt.str "Loading entries from %s..." bushel_dir);
-
-
let entries_data = Bushel.load bushel_dir in
-
let all_entries = Bushel.Entry.all_entries entries_data in
-
print_endline (Fmt.str "Loaded %d entries" (List.length all_entries));
-
-
(* Extract outgoing links from all entries *)
-
print_endline "Extracting outgoing links...";
-
let extracted_links = ref [] in
-
-
(* Process each entry *)
-
List.iter (fun entry ->
-
let entry_body = Bushel.Entry.body entry in
-
let entry_slug = Bushel.Entry.slug entry in
-
-
(* Skip empty bodies *)
-
if entry_body <> "" then begin
-
let links = Bushel.Entry.extract_external_links entry_body in
-
if links <> [] then begin
-
(* Add each link from this entry *)
-
List.iter (fun url ->
-
(* Try to extract domain from URL *)
-
let domain =
-
try
-
let uri = Uri.of_string url in
-
match Uri.host uri with
-
| Some host -> host
-
| None -> "unknown"
-
with _ -> "unknown"
-
in
-
-
(* Filter by domain if filters are specified *)
-
let include_by_domain =
-
if include_domains_list <> [] then
-
List.exists (fun filter ->
-
domain = filter || String.ends_with ~suffix:filter domain
-
) include_domains_list
-
else true
-
in
-
-
let exclude_by_domain =
-
List.exists (fun filter ->
-
domain = filter || String.ends_with ~suffix:filter domain
-
) exclude_domains_list
-
in
-
-
if include_by_domain && not exclude_by_domain then begin
-
let date = Bushel.Entry.date entry in
-
-
(* Extract tags from the entry *)
-
let entry_tags = Bushel.Tags.tags_of_ent entries_data entry in
-
let tag_strings = List.map Bushel.Tags.to_string entry_tags in
-
-
let link = {
-
Bushel.Link.url;
-
date;
-
description = "";
-
karakeep = None;
-
bushel = Some {
-
Bushel.Link.slugs = [entry_slug];
-
tags = tag_strings
-
};
-
} in
-
extracted_links := link :: !extracted_links
-
end
-
) links
-
end
-
end
-
) all_entries;
-
-
(* Load existing links *)
-
let existing_links = Bushel.Link.load_links_file links_file in
-
-
(* Merge with existing links - prefer bushel entry dates *)
-
let merged_links = Bushel.Link.merge_links ~prefer_new_date:true existing_links !extracted_links in
-
-
(* Save the updated links file *)
-
Bushel.Link.save_links_file links_file merged_links;
-
-
print_endline (Fmt.str "Added %d extracted links from Bushel to %s"
-
(List.length !extracted_links) links_file);
-
print_endline (Fmt.str "Total links in file: %d" (List.length merged_links));
-
0
-
-
(* Helper function to filter links that don't have karakeep data for a specific remote *)
-
let filter_links_without_karakeep base_url links =
-
List.filter (fun link ->
-
match link.Bushel.Link.karakeep with
-
| Some { remote_url; _ } when remote_url = base_url -> false
-
| _ -> true
-
) links
-
-
(* Helper function to apply limit to links if specified *)
-
let apply_limit_to_links limit links =
-
match limit with
-
| Some n when n > 0 ->
-
let rec take_n acc count = function
-
| [] -> List.rev acc
-
| _ when count = 0 -> List.rev acc
-
| x :: xs -> take_n (x :: acc) (count - 1) xs
-
in
-
let limited = take_n [] n links in
-
if List.length links > n then
-
log "Limited to first %d links (out of %d available)\n" n (List.length links);
-
limited
-
| _ -> links
-
-
(* Helper function to prepare tags for a link *)
-
let prepare_tags_for_link tag link =
-
let slug_tags =
-
match link.Bushel.Link.bushel with
-
| Some { slugs; _ } -> List.map (fun slug -> "bushel:" ^ slug) slugs
-
| None -> []
-
in
-
if tag = "" then slug_tags
-
else tag :: slug_tags
-
-
(* Helper function to create batches for parallel processing *)
-
let create_batches max_concurrent links =
-
let rec create_batches_aux links acc =
-
match links with
-
| [] -> List.rev acc
-
| _ ->
-
let batch, rest =
-
if List.length links <= max_concurrent then
-
links, []
-
else
-
let rec take n lst batch =
-
if n = 0 || lst = [] then List.rev batch, lst
-
else take (n-1) (List.tl lst) (List.hd lst :: batch)
-
in
-
take max_concurrent links []
-
in
-
create_batches_aux rest (batch :: acc)
-
in
-
create_batches_aux links []
-
-
(* Helper function to upload a single link to Karakeep *)
-
let upload_single_link api_key base_url tag verbose updated_links link =
-
let url = link.Bushel.Link.url in
-
let title =
-
if link.Bushel.Link.description <> "" then
-
Some link.Bushel.Link.description
-
else None
-
in
-
let tags = prepare_tags_for_link tag link in
-
-
if verbose then begin
-
log " Uploading: %s\n" url;
-
if tags <> [] then
-
log " Tags: %s\n" (String.concat ", " tags);
-
if title <> None then
-
log " Title: %s\n" (Option.get title);
-
end else begin
-
log "Uploading: %s\n" url;
-
end;
-
-
(* Create the bookmark with tags *)
-
Lwt.catch
-
(fun () ->
-
Karakeep.create_bookmark
-
~api_key
-
~url
-
?title
-
~tags
-
base_url
-
>>= fun bookmark ->
-
-
(* Create updated link with karakeep data *)
-
let updated_link = {
-
link with
-
Bushel.Link.karakeep =
-
Some {
-
Bushel.Link.remote_url = base_url;
-
id = bookmark.id;
-
tags = bookmark.tags;
-
metadata = []; (* Will be populated on next sync *)
-
}
-
} in
-
updated_links := updated_link :: !updated_links;
-
-
if verbose then
-
log " โœ“ Added to Karakeep with ID: %s\n" bookmark.id
-
else
-
log " - Added to Karakeep with ID: %s\n" bookmark.id;
-
Lwt.return 1 (* Success *)
-
)
-
(fun exn ->
-
if verbose then
-
log " โœ— Error uploading %s: %s\n" url (Printexc.to_string exn)
-
else
-
log " - Error uploading %s: %s\n" url (Printexc.to_string exn);
-
Lwt.return 0 (* Failure *)
-
)
-
-
(* Helper function to process a batch of links *)
-
let process_batch api_key base_url tag verbose updated_links batch_num total_batches batch =
-
log_verbose verbose "\nProcessing batch %d/%d (%d links)...\n"
-
(batch_num + 1) total_batches (List.length batch);
-
-
(* Process links in this batch concurrently *)
-
Lwt_list.map_p (upload_single_link api_key base_url tag verbose updated_links) batch
-
-
(* Helper function to update links file with new karakeep data *)
-
let update_links_file links_file original_links updated_links =
-
if !updated_links <> [] then begin
-
(* Replace the updated links in the original list *)
-
let final_links =
-
List.map (fun link ->
-
let url = link.Bushel.Link.url in
-
let updated = List.find_opt (fun ul -> ul.Bushel.Link.url = url) !updated_links in
-
match updated with
-
| Some ul -> ul
-
| None -> link
-
) original_links
-
in
-
-
(* Save the updated links file *)
-
Bushel.Link.save_links_file links_file final_links;
-
-
log "Updated %s with %d new karakeep_ids\n"
-
links_file (List.length !updated_links);
-
end
-
-
(* Upload links to Karakeep that don't already have karakeep data *)
-
let upload_to_karakeep base_url api_key_opt links_file tag max_concurrent delay_seconds limit verbose =
-
match api_key_opt with
-
| None ->
-
log "Error: API key is required.\n";
-
log "Please provide one with --api-key or create a ~/.karakeep-api file.\n";
-
1
-
| Some api_key ->
-
(* Load links from file *)
-
log_verbose verbose "Loading links from %s...\n" links_file;
-
let links = Bushel.Link.load_links_file links_file in
-
log_verbose verbose "Loaded %d total links\n" (List.length links);
-
-
(* Filter links that don't have karakeep data for this remote *)
-
log_verbose verbose "Filtering links that don't have karakeep data for %s...\n" base_url;
-
let filtered_links = filter_links_without_karakeep base_url links in
-
log_verbose verbose "Found %d links without karakeep data\n" (List.length filtered_links);
-
-
(* Apply limit if specified *)
-
let links_to_upload = apply_limit_to_links limit filtered_links in
-
-
if links_to_upload = [] then begin
-
log "No links to upload to %s (all links already have karakeep data)\n" base_url;
-
0
-
end else begin
-
log "Found %d links to upload to %s\n" (List.length links_to_upload) base_url;
-
-
(* Split links into batches for parallel processing *)
-
let batches = create_batches max_concurrent links_to_upload in
-
log_verbose verbose "Processing in %d batches of up to %d links each...\n"
-
(List.length batches) max_concurrent;
-
log_verbose verbose "Delay between batches: %.1f seconds\n" delay_seconds;
-
-
(* Process batches and accumulate updated links *)
-
let updated_links = ref [] in
-
-
let result = Lwt_main.run (
-
Lwt.catch
-
(fun () ->
-
Lwt_list.fold_left_s (fun (total_count, batch_num) batch ->
-
process_batch api_key base_url tag verbose updated_links
-
batch_num (List.length batches) batch >>= fun results ->
-
-
(* Count successes in this batch *)
-
let batch_successes = List.fold_left (+) 0 results in
-
let new_total = total_count + batch_successes in
-
-
log_verbose verbose " Batch %d complete: %d/%d successful (Total: %d/%d)\n"
-
(batch_num + 1) batch_successes (List.length batch) new_total (new_total + (List.length links_to_upload - new_total));
-
-
(* Add a delay before processing the next batch *)
-
if batch_num + 1 < List.length batches then begin
-
log_verbose verbose " Waiting %.1f seconds before next batch...\n" delay_seconds;
-
Lwt_unix.sleep delay_seconds >>= fun () ->
-
Lwt.return (new_total, batch_num + 1)
-
end else
-
Lwt.return (new_total, batch_num + 1)
-
) (0, 0) batches >>= fun (final_count, _) ->
-
Lwt.return final_count
-
)
-
(fun exn ->
-
log "Error during upload operation: %s\n" (Printexc.to_string exn);
-
Lwt.return 0
-
)
-
) in
-
-
(* Update the links file with the new karakeep_ids *)
-
update_links_file links_file links updated_links;
-
-
log "Upload complete. %d/%d links uploaded successfully.\n"
-
result (List.length links_to_upload);
-
-
0
-
end
-
-
(* Common arguments *)
-
let links_file_arg =
-
let doc = "Links YAML file. Defaults to links.yml." in
-
Arg.(value & opt string "links.yml" & info ["file"; "f"] ~doc ~docv:"FILE")
-
-
let base_url_arg =
-
let doc = "Base URL of the Karakeep instance" in
-
let default = "https://hoard.recoil.org" in
-
Arg.(value & opt string default & info ["url"] ~doc ~docv:"URL")
-
-
let api_key_arg =
-
let doc = "API key for Karakeep authentication (ak1_<key_id>_<secret>)" in
-
let get_api_key () =
-
let home = try Sys.getenv "HOME" with Not_found -> "." in
-
let key_path = Filename.concat home ".karakeep-api" in
-
try
-
let ic = open_in key_path in
-
let key = input_line ic in
-
close_in ic;
-
Some (String.trim key)
-
with _ -> None
-
in
-
Arg.(value & opt (some string) (get_api_key ()) & info ["api-key"] ~doc ~docv:"API_KEY")
-
-
let tag_arg =
-
let doc = "Tag to filter or apply to bookmarks" in
-
Arg.(value & opt string "" & info ["tag"; "t"] ~doc ~docv:"TAG")
-
-
let download_assets_arg =
-
let doc = "Download assets (screenshots, etc.) from Karakeep" in
-
Arg.(value & flag & info ["download-assets"; "d"] ~doc)
-
-
let base_dir_arg =
-
let doc = "Base directory of the Bushel project" in
-
Arg.(value & opt string "." & info ["dir"; "d"] ~doc ~docv:"DIR")
-
-
let include_domains_arg =
-
let doc = "Only include links to these domains (comma-separated list)" in
-
Arg.(value & opt (some string) None & info ["include"] ~doc ~docv:"DOMAINS")
-
-
let exclude_domains_arg =
-
let doc = "Exclude links to these domains (comma-separated list)" in
-
Arg.(value & opt (some string) None & info ["exclude"] ~doc ~docv:"DOMAINS")
-
-
let concurrent_arg =
-
let doc = "Maximum number of concurrent uploads (default: 5)" in
-
Arg.(value & opt int 5 & info ["concurrent"; "c"] ~doc ~docv:"NUM")
-
-
let delay_arg =
-
let doc = "Delay in seconds between batches (default: 1.0)" in
-
Arg.(value & opt float 1.0 & info ["delay"] ~doc ~docv:"SECONDS")
-
-
let limit_arg =
-
let doc = "Limit number of links to upload (for testing)" in
-
Arg.(value & opt (some int) None & info ["limit"; "l"] ~doc ~docv:"NUM")
-
-
let verbose_arg =
-
let doc = "Show detailed progress information during upload" in
-
Arg.(value & flag & info ["verbose"; "v"] ~doc)
-
-
(* Command definitions *)
-
let init_cmd =
-
let doc = "Initialize a new links.yml file" in
-
let info = Cmd.info "init" ~doc in
-
Cmd.v info Term.(const init_links_file $ links_file_arg)
-
-
let karakeep_cmd =
-
let doc = "Update links.yml with links from Karakeep" in
-
let info = Cmd.info "karakeep" ~doc in
-
Cmd.v info Term.(const update_from_karakeep $ base_url_arg $ api_key_arg $ tag_arg $ links_file_arg $ download_assets_arg)
-
-
let bushel_cmd =
-
let doc = "Update links.yml with outgoing links from Bushel entries" in
-
let info = Cmd.info "bushel" ~doc in
-
Cmd.v info Term.(const update_from_bushel $ base_dir_arg $ links_file_arg $ include_domains_arg $ exclude_domains_arg)
-
-
let upload_cmd =
-
let doc = "Upload links without karakeep data to Karakeep" in
-
let info = Cmd.info "upload" ~doc in
-
Cmd.v info Term.(const upload_to_karakeep $ base_url_arg $ api_key_arg $ links_file_arg $ tag_arg $ concurrent_arg $ delay_arg $ limit_arg $ verbose_arg)
-
-
(* Export the term and cmd for use in main bushel.ml *)
-
let cmd =
-
let doc = "Manage links between Bushel and Karakeep" in
-
let info = Cmd.info "links" ~doc in
-
Cmd.group info [init_cmd; karakeep_cmd; bushel_cmd; upload_cmd]
-
-
(* For standalone execution *)
-
(* Main entry point removed - accessed through bushel_main.ml *)
-119
stack/bushel/bin/bushel_main.ml
···
-
open Cmdliner
-
-
let version = "0.1.0"
-
-
(* Import actual command implementations from submodules *)
-
-
(* Obsidian command - no API keys needed *)
-
let obsidian_cmd =
-
let doc = "Convert Bushel entries to Obsidian format" in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "obsidian" ~version ~doc)
-
~app_name:"bushel"
-
~service:"bushel"
-
Bushel_obsidian.term
-
-
(* Paper classify command *)
-
let paper_classify_cmd = Bushel_paper_classify.cmd
-
-
(* Paper tex command *)
-
let paper_tex_cmd = Bushel_paper_tex.cmd
-
-
(* Thumbs command - no API keys needed *)
-
let thumbs_cmd =
-
let doc = "Generate thumbnails from paper PDFs" in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "thumbs" ~version ~doc)
-
~app_name:"bushel"
-
~service:"bushel"
-
Bushel_thumbs.term
-
-
(* Query command - needs Typesense API key *)
-
let query_cmd =
-
let doc = "Query Bushel collections using multisearch" in
-
Eiocmd.run
-
~use_keyeio:true
-
~info:(Cmd.info "query" ~version ~doc)
-
~app_name:"bushel"
-
~service:"bushel"
-
Bushel_search.term
-
-
(* Bibtex command - no API keys needed *)
-
let bibtex_cmd =
-
let doc = "Export bibtex for all papers" in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "bibtex" ~version ~doc)
-
~app_name:"bushel"
-
~service:"bushel"
-
Bushel_bibtex.term
-
-
(* Ideas command *)
-
let ideas_cmd = Bushel_ideas.cmd
-
-
(* Info command - no API keys needed *)
-
let info_cmd =
-
let doc = "Display all information for a given slug" in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "info" ~version ~doc)
-
~app_name:"bushel"
-
~service:"bushel"
-
Bushel_info.term
-
-
(* Missing command - no API keys needed *)
-
let missing_cmd =
-
let doc = "Check for missing metadata in entries" in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "missing" ~version ~doc)
-
~app_name:"bushel"
-
~service:"bushel"
-
Bushel_missing.term
-
-
(* Note DOI command - no API keys needed *)
-
let note_doi_cmd =
-
let doc = "Assign DOIs to notes with perma:true" in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "note-doi" ~version ~doc)
-
~app_name:"bushel"
-
~service:"bushel"
-
Bushel_note_doi.term
-
-
(* Main command *)
-
let bushel_cmd =
-
let doc = "Bushel content management toolkit" in
-
let sdocs = Manpage.s_common_options in
-
let man = [
-
`S Manpage.s_description;
-
`P "$(tname) is a unified command-line tool for managing various types of \
-
content in the Bushel system, including papers, videos, links, and more.";
-
`P "$(tname) provides unified access to all Bushel functionality through \
-
integrated subcommands.";
-
`S Manpage.s_commands;
-
`S Manpage.s_common_options;
-
`S "ENVIRONMENT";
-
`P "BUSHEL_CONFIG - Path to configuration file with default settings";
-
`S Manpage.s_authors;
-
`P "Anil Madhavapeddy";
-
`S Manpage.s_bugs;
-
`P "Report bugs at https://github.com/avsm/bushel/issues";
-
] in
-
let info = Cmd.info "bushel" ~version ~doc ~sdocs ~man in
-
Cmd.group info [
-
bibtex_cmd;
-
ideas_cmd;
-
info_cmd;
-
missing_cmd;
-
note_doi_cmd;
-
obsidian_cmd;
-
paper_classify_cmd;
-
paper_tex_cmd;
-
query_cmd;
-
thumbs_cmd;
-
]
-
-
let () = exit (Cmd.eval' bushel_cmd)
-186
stack/bushel/bin/bushel_missing.ml
···
-
open Cmdliner
-
open Bushel
-
-
(** Check if an entry has a thumbnail *)
-
let has_thumbnail entries entry =
-
match Entry.thumbnail_slug entries entry with
-
| Some _ -> true
-
| None -> false
-
-
(** Check if an entry has a synopsis or description *)
-
let has_synopsis = function
-
| `Paper p -> Paper.abstract p <> "" (* Papers have abstracts *)
-
| `Note n -> Note.synopsis n <> None (* Notes have optional synopsis *)
-
| `Idea _ -> true (* Ideas don't have synopsis field *)
-
| `Project _ -> true (* Projects don't have synopsis field *)
-
| `Video _ -> true (* Videos don't have synopsis field *)
-
-
(** Check if an entry has tags *)
-
let has_tags = function
-
| `Paper p -> Paper.tags p <> []
-
| `Note n -> Note.tags n <> []
-
| `Idea i -> i.Idea.tags <> [] (* Access record field directly *)
-
| `Project p -> Project.tags p <> []
-
| `Video v -> v.Video.tags <> [] (* Access record field directly *)
-
-
(** Entry with broken references *)
-
type entry_with_broken_refs = {
-
entry : Entry.entry;
-
broken_slugs : string list;
-
broken_contacts : string list;
-
}
-
-
(** Find entries missing thumbnails *)
-
let find_missing_thumbnails entries =
-
let all = Entry.all_entries entries in
-
List.filter (fun entry -> not (has_thumbnail entries entry)) all
-
-
(** Find entries missing synopsis *)
-
let find_missing_synopsis entries =
-
let all = Entry.all_entries entries in
-
List.filter (fun entry -> not (has_synopsis entry)) all
-
-
(** Find entries missing tags *)
-
let find_missing_tags entries =
-
let all = Entry.all_entries entries in
-
List.filter (fun entry -> not (has_tags entry)) all
-
-
(** Find entries with broken slugs or contact handles *)
-
let find_broken_references entries =
-
let all = Entry.all_entries entries in
-
List.filter_map (fun entry ->
-
let body = Entry.body entry in
-
let broken_slugs, broken_contacts = Md.validate_references entries body in
-
if broken_slugs <> [] || broken_contacts <> [] then
-
Some { entry; broken_slugs; broken_contacts }
-
else
-
None
-
) all
-
-
(** Print a list of entries *)
-
let print_entries title entries_list =
-
if entries_list <> [] then begin
-
Fmt.pr "@.%a (%d):@," (Fmt.styled `Bold Fmt.string) title (List.length entries_list);
-
List.iter (fun entry ->
-
let slug = Entry.slug entry in
-
let type_str = Entry.to_type_string entry in
-
let title = Entry.title entry in
-
Fmt.pr " %a %a - %a@,"
-
(Fmt.styled `Cyan Fmt.string) slug
-
(Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str)
-
Fmt.string title
-
) entries_list
-
end
-
-
(** Print entries with broken references *)
-
let print_broken_references title entries_with_broken_refs =
-
if entries_with_broken_refs <> [] then begin
-
Fmt.pr "@.%a (%d):@," (Fmt.styled `Bold Fmt.string) title (List.length entries_with_broken_refs);
-
List.iter (fun { entry; broken_slugs; broken_contacts } ->
-
let slug = Entry.slug entry in
-
let type_str = Entry.to_type_string entry in
-
let entry_title = Entry.title entry in
-
Fmt.pr " %a %a - %a@,"
-
(Fmt.styled `Cyan Fmt.string) slug
-
(Fmt.styled `Faint Fmt.string) (Printf.sprintf "(%s)" type_str)
-
Fmt.string entry_title;
-
if broken_slugs <> [] then
-
Fmt.pr " %a %a@,"
-
(Fmt.styled `Red Fmt.string) "Broken slugs:"
-
(Fmt.list ~sep:Fmt.comma Fmt.string) broken_slugs;
-
if broken_contacts <> [] then
-
Fmt.pr " %a %a@,"
-
(Fmt.styled `Red Fmt.string) "Broken contacts:"
-
(Fmt.list ~sep:Fmt.comma Fmt.string) broken_contacts;
-
) entries_with_broken_refs
-
end
-
-
(** Main missing command implementation *)
-
let missing_cmd base_dir check_thumbnails check_synopsis check_tags check_refs _env _xdg _profile =
-
let entries = load base_dir in
-
-
let count = ref 0 in
-
-
if check_thumbnails then begin
-
let missing = find_missing_thumbnails entries in
-
print_entries "Entries missing thumbnails" missing;
-
count := !count + List.length missing
-
end;
-
-
if check_synopsis then begin
-
let missing = find_missing_synopsis entries in
-
print_entries "Entries missing synopsis" missing;
-
count := !count + List.length missing
-
end;
-
-
if check_tags then begin
-
let missing = find_missing_tags entries in
-
print_entries "Entries missing tags" missing;
-
count := !count + List.length missing
-
end;
-
-
if check_refs then begin
-
let broken = find_broken_references entries in
-
print_broken_references "Entries with broken references" broken;
-
(* Count total number of broken references, not just entries *)
-
let broken_count = List.fold_left (fun acc { broken_slugs; broken_contacts; _ } ->
-
acc + List.length broken_slugs + List.length broken_contacts
-
) 0 broken in
-
count := !count + broken_count
-
end;
-
-
if !count = 0 then
-
Fmt.pr "@.No missing metadata or broken references found.@."
-
else
-
Fmt.pr "@.Total issues found: %d@." !count;
-
-
0
-
-
(** Command line arguments *)
-
let thumbnails_flag =
-
let doc = "Check for entries missing thumbnails" in
-
Arg.(value & flag & info ["thumbnails"; "t"] ~doc)
-
-
let synopsis_flag =
-
let doc = "Check for entries missing synopsis" in
-
Arg.(value & flag & info ["synopsis"; "s"] ~doc)
-
-
let tags_flag =
-
let doc = "Check for entries missing tags" in
-
Arg.(value & flag & info ["tags"; "g"] ~doc)
-
-
let refs_flag =
-
let doc = "Check for broken slugs and contact handles" in
-
Arg.(value & flag & info ["refs"; "r"] ~doc)
-
-
let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
-
Term.(const (fun base thumbnails synopsis tags refs env xdg profile ->
-
(* If no flags specified, check everything *)
-
let check_all = not (thumbnails || synopsis || tags || refs) in
-
missing_cmd base
-
(check_all || thumbnails)
-
(check_all || synopsis)
-
(check_all || tags)
-
(check_all || refs)
-
env xdg profile
-
) $ Bushel_common.base_dir $ thumbnails_flag $ synopsis_flag $ tags_flag $ refs_flag)
-
-
let cmd =
-
let doc = "List entries with missing metadata or broken references" in
-
let man = [
-
`S Manpage.s_description;
-
`P "This command scans all entries and reports any that are missing thumbnails, synopsis, tags, or have broken slugs/contact handles.";
-
`P "By default, all checks are performed. Use flags to select specific checks.";
-
`S Manpage.s_options;
-
`S Manpage.s_examples;
-
`P "Check for all issues:";
-
`Pre " $(mname) $(tname)";
-
`P "Check only for missing thumbnails:";
-
`Pre " $(mname) $(tname) --thumbnails";
-
`P "Check for missing synopsis and tags:";
-
`Pre " $(mname) $(tname) --synopsis --tags";
-
`P "Check only for broken references:";
-
`Pre " $(mname) $(tname) --refs";
-
] in
-
let info = Cmd.info "missing" ~doc ~man in
-
Cmd.v info term
-131
stack/bushel/bin/bushel_note_doi.ml
···
-
open Cmdliner
-
open Bushel
-
-
(** Generate a roguedoi identifier using Crockford base32 encoding *)
-
let generate_roguedoi () =
-
Random.self_init ();
-
(* Generate a 10-character roguedoi with checksum and split every 5 chars *)
-
let id = Crockford.generate ~length:10 ~split_every:5 ~checksum:true () in
-
Printf.sprintf "10.59999/%s" id
-
-
(** Add DOI to a specific note's frontmatter if it doesn't already have one *)
-
let add_doi_to_note note_path =
-
let content = In_channel.with_open_bin note_path In_channel.input_all in
-
(* Check if note already has a doi: field *)
-
let has_doi = try
-
let _ = String.index content 'd' in
-
let re = Str.regexp "^doi:" in
-
let lines = String.split_on_char '\n' content in
-
List.exists (fun line -> Str.string_match re (String.trim line) 0) lines
-
with Not_found -> false
-
in
-
if has_doi then begin
-
Fmt.pr "%a: Note already has a DOI, skipping@."
-
(Fmt.styled `Yellow Fmt.string) note_path;
-
false
-
end else begin
-
let roguedoi = generate_roguedoi () in
-
(* Parse the file to extract frontmatter *)
-
match String.split_on_char '\n' content with
-
| "---" :: rest ->
-
(* Find the end of frontmatter *)
-
let rec find_end_fm acc = function
-
| [] -> None
-
| "---" :: body_lines -> Some (List.rev acc, body_lines)
-
| line :: lines -> find_end_fm (line :: acc) lines
-
in
-
(match find_end_fm [] rest with
-
| Some (fm_lines, body_lines) ->
-
(* Add doi field to frontmatter *)
-
let new_fm = fm_lines @ [Printf.sprintf "doi: %s" roguedoi] in
-
let new_content =
-
String.concat "\n" (["---"] @ new_fm @ ["---"] @ body_lines)
-
in
-
Out_channel.with_open_bin note_path (fun oc ->
-
Out_channel.output_string oc new_content
-
);
-
Fmt.pr "%a: Added DOI %a@."
-
(Fmt.styled `Green Fmt.string) note_path
-
(Fmt.styled `Cyan Fmt.string) roguedoi;
-
true
-
| None ->
-
Fmt.epr "%a: Could not parse frontmatter@."
-
(Fmt.styled `Red Fmt.string) note_path;
-
false)
-
| _ ->
-
Fmt.epr "%a: No frontmatter found@."
-
(Fmt.styled `Red Fmt.string) note_path;
-
false
-
end
-
-
(** Main command implementation *)
-
let note_doi_cmd base_dir dry_run _env _xdg _profile =
-
let entries = load base_dir in
-
let notes = Entry.notes entries in
-
-
(* Filter for perma notes without DOI *)
-
let perma_notes = List.filter (fun n ->
-
Note.perma n && Option.is_none (Note.doi n)
-
) notes in
-
-
if perma_notes = [] then begin
-
Fmt.pr "No permanent notes without DOI found.@.";
-
0
-
end else begin
-
Fmt.pr "@[<v>";
-
Fmt.pr "%a: Found %d permanent notes without DOI@.@."
-
(Fmt.styled `Bold Fmt.string) "Info"
-
(List.length perma_notes);
-
-
let count = ref 0 in
-
List.iter (fun note ->
-
let slug = Note.slug note in
-
let note_path = Printf.sprintf "%s/data/notes/%s.md" base_dir slug in
-
Fmt.pr "Processing %a (%a)...@,"
-
(Fmt.styled `Cyan Fmt.string) slug
-
(Fmt.styled `Faint Fmt.string) (Note.title note);
-
-
if not dry_run then begin
-
if add_doi_to_note note_path then
-
incr count
-
end else begin
-
let roguedoi = generate_roguedoi () in
-
Fmt.pr " Would add DOI: %a@,"
-
(Fmt.styled `Cyan Fmt.string) roguedoi;
-
incr count
-
end
-
) perma_notes;
-
-
Fmt.pr "@.";
-
if dry_run then
-
Fmt.pr "%a: Would add DOI to %d notes (dry run)@."
-
(Fmt.styled `Bold Fmt.string) "Summary"
-
!count
-
else
-
Fmt.pr "%a: Added DOI to %d notes@."
-
(Fmt.styled `Bold Fmt.string) "Summary"
-
!count;
-
Fmt.pr "@]@.";
-
0
-
end
-
-
(** Command line interface definition *)
-
let dry_run_flag =
-
let doc = "Show what would be done without making changes" in
-
Arg.(value & flag & info ["n"; "dry-run"] ~doc)
-
-
let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
-
Term.(const note_doi_cmd $ Bushel_common.base_dir $ dry_run_flag)
-
-
let cmd =
-
let doc = "Generate and add DOI identifiers to permanent notes" in
-
let man = [
-
`S Manpage.s_description;
-
`P "This command generates roguedoi identifiers using Crockford base32 encoding \
-
and adds them to the frontmatter of permanent notes (notes with perma: true) \
-
that don't already have a DOI.";
-
`P "Roguedoi format: 10.59999/xxxxx-xxxxx where x is a Crockford base32 character.";
-
`S Manpage.s_options;
-
] in
-
let info = Cmd.info "note-doi" ~doc ~man in
-
Cmd.v info term
-88
stack/bushel/bin/bushel_obsidian.ml
···
-
open Bushel
-
-
let obsidian_links =
-
let inline c = function
-
| Md.Obsidian_link l ->
-
Cmarkit_renderer.Context.string c l;
-
true
-
| _ -> false
-
in
-
Cmarkit_renderer.make ~inline ()
-
;;
-
-
let obsidian_of_doc doc =
-
let default = Cmarkit_commonmark.renderer () in
-
let r = Cmarkit_renderer.compose default obsidian_links in
-
Cmarkit_renderer.doc_to_string r doc
-
;;
-
-
let md_to_obsidian entries md =
-
let open Cmarkit in
-
Doc.of_string ~strict:false ~resolver:Md.with_bushel_links md
-
|> Mapper.map_doc (Mapper.make ~inline:(Md.bushel_inline_mapper_to_obsidian entries) ())
-
|> obsidian_of_doc
-
;;
-
-
let obsidian_output base output_dir =
-
let e = load base in
-
let all = Entry.all_entries e @ Entry.all_papers e in
-
List.iter
-
(fun ent ->
-
let slug =
-
match ent with
-
| `Paper { Paper.latest; slug; ver; _ } when not latest ->
-
Printf.sprintf "%s-%s" slug ver
-
| _ -> Entry.slug ent
-
in
-
let fname = Filename.concat output_dir (slug ^ ".md") in
-
let tags =
-
Tags.tags_of_ent e ent
-
|> List.filter_map (fun tag ->
-
match tag with
-
| `Slug _ -> None
-
| `Set s -> Some (Printf.sprintf "\"#%s\"" s)
-
| `Text s -> Some (Printf.sprintf "%s" s)
-
| `Contact _ -> None
-
| `Year y -> Some (Printf.sprintf "\"#y%d\"" y))
-
|> List.map (fun s -> "- " ^ s)
-
|> String.concat "\n"
-
in
-
let links =
-
Tags.tags_of_ent e ent
-
|> List.filter_map (fun tag ->
-
match tag with
-
| `Slug s when s <> slug -> Some (Printf.sprintf "- \"[[%s]]\"" s)
-
| `Contact c -> Some (Printf.sprintf "- \"[[@%s]]\"" c)
-
| _ -> None)
-
|> String.concat "\n"
-
|> function
-
| "" -> ""
-
| s -> "linklist:\n" ^ s ^ "\n"
-
in
-
let body = Entry.body ent |> md_to_obsidian e in
-
let buf = Printf.sprintf "---\ntags:\n%s\n%s---\n\n%s" tags links body in
-
Out_channel.with_open_bin fname (fun oc -> output_string oc buf))
-
all;
-
List.iter
-
(fun contact ->
-
let slug = Contact.handle contact in
-
let fname = Filename.concat output_dir ("@" ^ slug ^ ".md") in
-
let buf = String.concat "\n" (Contact.names contact) in
-
Out_channel.with_open_bin fname (fun oc -> output_string oc buf))
-
(Entry.contacts e)
-
;;
-
-
(* Export the term for use in main bushel.ml *)
-
let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
-
Cmdliner.Term.(
-
const (fun base_dir output_dir _env _xdg _profile -> obsidian_output base_dir output_dir; 0) $
-
Bushel_common.base_dir $
-
Bushel_common.output_dir ~default:"obsidian"
-
)
-
-
let cmd =
-
let doc = "Generate Obsidian-compatible markdown files" in
-
let info = Cmdliner.Cmd.info "obsidian" ~doc in
-
Cmdliner.Cmd.v info term
-
-
(* Main entry point removed - accessed through bushel_main.ml *)
-74
stack/bushel/bin/bushel_paper.ml
···
-
module ZT = Zotero_translation
-
open Lwt.Infix
-
open Printf
-
module J = Ezjsonm
-
open Cmdliner
-
-
-
let _authors b j =
-
let keys = J.get_dict j in
-
let authors = J.get_list J.get_string (List.assoc "author" keys) in
-
let a =
-
List.fold_left (fun acc a ->
-
match Bushel.Entry.lookup_by_name b a with
-
| Some c -> `String ("@" ^ (Bushel.Contact.handle c)) :: acc
-
| None -> failwith (sprintf "author %s not found" a)
-
) [] authors
-
in
-
J.update j ["author"] (Some (`A a))
-
-
let of_doi zt ~base_dir ~slug ~version doi =
-
ZT.json_of_doi zt ~slug doi >>= fun j ->
-
let papers_dir = Printf.sprintf "%s/papers/%s" base_dir slug in
-
(* Ensure papers directory exists *)
-
(try Unix.mkdir papers_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
-
-
(* Extract abstract from JSON data *)
-
let abstract = try
-
let keys = Ezjsonm.get_dict (j :> Ezjsonm.value) in
-
match List.assoc_opt "abstract" keys with
-
| Some abstract_json -> Some (Ezjsonm.get_string abstract_json)
-
| None -> None
-
with _ -> None in
-
-
(* Remove abstract from frontmatter - it goes in body *)
-
let keys = Ezjsonm.get_dict (j :> Ezjsonm.value) in
-
let filtered_keys = List.filter (fun (k, _) -> k <> "abstract") keys in
-
let json_without_abstract = `O filtered_keys in
-
-
(* Use library function to generate YAML with abstract in body *)
-
let content = Bushel.Paper.to_yaml ?abstract ~ver:version json_without_abstract in
-
-
let filename = Printf.sprintf "%s.md" version in
-
let filepath = Filename.concat papers_dir filename in
-
let oc = open_out filepath in
-
output_string oc content;
-
close_out oc;
-
Printf.printf "Created paper file: %s\n" filepath;
-
Lwt.return ()
-
-
let slug_arg =
-
let doc = "Slug for the entry." in
-
Arg.(required & pos 0 (some string) None & info [] ~docv:"SLUG" ~doc)
-
-
let version_arg =
-
let doc = "Version of the entry." in
-
Arg.(required & pos 1 (some string) None & info [] ~docv:"VERSION" ~doc)
-
-
let doi_arg =
-
let doc = "DOI of the entry." in
-
Arg.(required & pos 2 (some string) None & info [] ~docv:"DOI" ~doc)
-
-
(* Export the term for use in main bushel.ml *)
-
let term =
-
Term.(const (fun base slug version doi ->
-
let zt = ZT.v "http://svr-avsm2-eeg-ce:1969" in
-
Lwt_main.run @@ of_doi zt ~base_dir:base ~slug ~version doi; 0
-
) $ Bushel_common.base_dir $ slug_arg $ version_arg $ doi_arg)
-
-
let cmd =
-
let doc = "Generate paper entry from DOI" in
-
let info = Cmd.info "paper" ~doc in
-
Cmd.v info term
-
-
(* Main entry point removed - accessed through bushel_main.ml *)
-57
stack/bushel/bin/bushel_paper_classify.ml
···
-
open Cmdliner
-
-
(** TODO:claude Classify papers based on heuristics and update metadata *)
-
let classify_papers base_dir overwrite =
-
let papers_dir = Printf.sprintf "%s/papers" base_dir in
-
if not (Sys.file_exists papers_dir) then (
-
Printf.eprintf "Papers directory not found: %s\n" papers_dir;
-
1
-
) else (
-
let paper_dirs = Sys.readdir papers_dir |> Array.to_list in
-
List.iter (fun paper_slug ->
-
let paper_path = Filename.concat papers_dir paper_slug in
-
if Sys.is_directory paper_path then (
-
let versions = Sys.readdir paper_path |> Array.to_list
-
|> List.filter (String.ends_with ~suffix:".md") in
-
List.iter (fun version_file ->
-
let filepath = Filename.concat paper_path version_file in
-
let version = Filename.remove_extension version_file in
-
try
-
let paper = Bushel.Paper.of_md ~slug:paper_slug ~ver:version filepath in
-
let predicted_class = Bushel.Paper.classification paper in
-
let class_str = Bushel.Paper.string_of_classification predicted_class in
-
Printf.printf "%s/%s: %s\n" paper_slug version class_str;
-
-
(* Update the file if overwrite is enabled *)
-
if overwrite then (
-
let json_data = Bushel.Paper.raw_json paper in
-
let keys = Ezjsonm.get_dict json_data in
-
let updated_keys = ("classification", `String class_str) ::
-
(List.filter (fun (k, _) -> k <> "classification") keys) in
-
let updated_json = `O updated_keys in
-
let abstract = Some (Bushel.Paper.abstract paper) in
-
let content = Bushel.Paper.to_yaml ?abstract ~ver:version updated_json in
-
let oc = open_out filepath in
-
output_string oc content;
-
close_out oc;
-
Printf.printf " Updated %s\n" filepath
-
)
-
with e ->
-
Printf.eprintf "Error processing %s: %s\n" filepath (Printexc.to_string e)
-
) versions
-
)
-
) paper_dirs;
-
0
-
)
-
-
let overwrite_flag =
-
let doc = "Update paper files with classification metadata" in
-
Arg.(value & flag & info ["overwrite"] ~doc)
-
-
let term =
-
Term.(const classify_papers $ Bushel_common.base_dir $ overwrite_flag)
-
-
let cmd =
-
let doc = "Classify papers as full/short/preprint" in
-
let info = Cmd.info "paper-classify" ~doc in
-
Cmd.v info term
-325
stack/bushel/bin/bushel_paper_tex.ml
···
-
open Printf
-
open Cmdliner
-
-
(** TODO:claude Format author name for LaTeX with initials and full last name *)
-
let format_author_name author =
-
(* Split author name and convert to "F.M.~Lastname" format *)
-
let parts = String.split_on_char ' ' author |> List.filter (fun s -> s <> "") in
-
match List.rev parts with
-
| [] -> ""
-
| lastname :: rest_rev ->
-
let firstname_parts = List.rev rest_rev in
-
let initials = List.map (fun name ->
-
if String.length name > 0 then String.sub name 0 1 ^ "." else ""
-
) firstname_parts in
-
let initials_str = String.concat "" initials in
-
if initials_str = "" then lastname
-
else initials_str ^ "~" ^ lastname
-
-
(** TODO:claude Format author name for LaTeX with underline for target author *)
-
let format_author target_name author =
-
let formatted = format_author_name author in
-
(* Check if author contains target name substring for underlining *)
-
if String.lowercase_ascii author |> fun s ->
-
Re.execp (Re.Perl.compile_pat ~opts:[`Caseless] target_name) s
-
then sprintf "\\underline{%s}" formatted
-
else formatted
-
-
(** TODO:claude Format authors list for LaTeX *)
-
let format_authors target_name authors =
-
match authors with
-
| [] -> ""
-
| [single] -> format_author target_name single
-
| _ ->
-
let formatted = List.map (format_author target_name) authors in
-
String.concat ", " formatted
-
-
(** TODO:claude Escape special LaTeX characters *)
-
let escape_latex str =
-
let replacements = [
-
("&", "\\&");
-
("%", "\\%");
-
("$", "\\$");
-
("#", "\\#");
-
("_", "\\_");
-
("{", "\\{");
-
("}", "\\}");
-
("~", "\\textasciitilde{}");
-
("^", "\\textasciicircum{}");
-
] in
-
List.fold_left (fun s (from, to_) ->
-
Re.replace_string (Re.compile (Re.str from)) ~by:to_ s
-
) str replacements
-
-
(** TODO:claude Clean venue name by removing common prefixes and handling arXiv *)
-
let clean_venue_name venue =
-
(* Special handling for arXiv to avoid redundancy like "arXiv (arXiv:ID)" *)
-
let venue_lower = String.lowercase_ascii venue in
-
if Re.execp (Re.Perl.compile_pat ~opts:[`Caseless] "arxiv") venue_lower then
-
if String.contains venue ':' then
-
(* If it contains arXiv:ID format, just return the ID part *)
-
let parts = String.split_on_char ':' venue in
-
match parts with
-
| _ :: id :: _ -> String.trim id
-
| _ -> venue
-
else venue
-
else
-
let prefixes = [
-
"in proceedings of the ";
-
"proceedings of the ";
-
"in proceedings of ";
-
"proceedings of ";
-
"in the ";
-
"the ";
-
] in
-
let rec remove_prefixes v = function
-
| [] -> v
-
| prefix :: rest ->
-
if String.length v >= String.length prefix &&
-
String.sub (String.lowercase_ascii v) 0 (String.length prefix) = prefix
-
then String.sub v (String.length prefix) (String.length v - String.length prefix)
-
else remove_prefixes v rest
-
in
-
let cleaned = remove_prefixes venue prefixes in
-
(* Capitalize first letter *)
-
if String.length cleaned > 0 then
-
String.mapi (fun i c -> if i = 0 then Char.uppercase_ascii c else c) cleaned
-
else cleaned
-
-
(** TODO:claude Format venue for LaTeX with volume/number details for full papers *)
-
let format_venue paper =
-
let open Bushel.Paper in
-
let classification = classification paper in
-
match bibtype paper with
-
| "article" ->
-
let journal_name = try journal paper |> clean_venue_name |> escape_latex with _ -> "Journal" in
-
if classification = Full then (
-
let vol_info =
-
let vol = volume paper in
-
let num = issue paper in
-
match vol, num with
-
| Some v, Some n -> sprintf ", %s(%s)" v n
-
| Some v, None -> sprintf ", vol. %s" v
-
| None, Some n -> sprintf ", no. %s" n
-
| None, None -> ""
-
in
-
sprintf "\\textit{%s%s}" journal_name vol_info
-
) else
-
sprintf "\\textit{%s}" journal_name
-
| "inproceedings" ->
-
let conf_name = try booktitle paper |> clean_venue_name |> escape_latex with _ -> "Conference" in
-
sprintf "\\textit{%s}" conf_name
-
| "techreport" ->
-
let inst = try institution paper |> escape_latex with _ -> "Institution" in
-
sprintf "\\textit{Technical Report, %s}" inst
-
| "phdthesis" ->
-
let school = try institution paper |> escape_latex with _ -> "University" in
-
sprintf "\\textit{PhD thesis, %s}" school
-
| "mastersthesis" ->
-
let school = try institution paper |> escape_latex with _ -> "University" in
-
sprintf "\\textit{Master's thesis, %s}" school
-
| "book" ->
-
let publisher_str = try Bushel.Paper.publisher paper |> escape_latex with _ -> "" in
-
let edition_str = try
-
let json = Bushel.Paper.raw_json paper in
-
let keys = Ezjsonm.get_dict json in
-
List.assoc "edition" keys |> Ezjsonm.get_string |> escape_latex
-
with _ -> "" in
-
let isbn_str = try Bushel.Paper.isbn paper |> escape_latex with _ -> "" in
-
let venue_info =
-
let base = match publisher_str, edition_str with
-
| pub, ed when pub <> "" && ed <> "" -> sprintf "%s, %s edition" pub ed
-
| pub, _ when pub <> "" -> pub
-
| _, ed when ed <> "" -> sprintf "%s edition" ed
-
| _, _ -> "Book"
-
in
-
if isbn_str <> "" then
-
sprintf "%s, ISBN %s" base isbn_str
-
else
-
base
-
in
-
sprintf "\\textit{%s}" venue_info
-
| "misc" ->
-
(* Try to get meaningful venue info for misc entries *)
-
let journal_str = try Bushel.Paper.journal paper |> clean_venue_name |> escape_latex with _ -> "" in
-
let booktitle_str = try Bushel.Paper.booktitle paper |> clean_venue_name |> escape_latex with _ -> "" in
-
let publisher_str = try Bushel.Paper.publisher paper |> escape_latex with _ -> "" in
-
if journal_str <> "" then
-
sprintf "\\textit{%s}" journal_str
-
else if booktitle_str <> "" then
-
sprintf "\\textit{%s}" booktitle_str
-
else if publisher_str <> "" then
-
sprintf "\\textit{%s}" publisher_str
-
else
-
sprintf "\\textit{Preprint}"
-
| "abstract" ->
-
(* Handle conference abstracts *)
-
let conf_name = try Bushel.Paper.booktitle paper |> clean_venue_name |> escape_latex with _ -> "" in
-
let journal_str = try Bushel.Paper.journal paper |> clean_venue_name |> escape_latex with _ -> "" in
-
if conf_name <> "" then
-
sprintf "\\textit{%s (Abstract)}" conf_name
-
else if journal_str <> "" then
-
sprintf "\\textit{%s (Abstract)}" journal_str
-
else
-
sprintf "\\textit{Conference Abstract}"
-
| _ ->
-
(* Fallback for other types with special arXiv handling *)
-
let journal_str = try Bushel.Paper.journal paper with _ -> "" in
-
let publisher_str = try Bushel.Paper.publisher paper |> escape_latex with _ -> "" in
-
-
(* Special handling for arXiv papers - skip venue, let note handle it *)
-
if String.lowercase_ascii journal_str = "arxiv" then
-
""
-
else if journal_str <> "" then
-
sprintf "\\textit{%s}" (journal_str |> clean_venue_name |> escape_latex)
-
else if publisher_str <> "" then
-
sprintf "\\textit{%s}" publisher_str
-
else
-
sprintf "\\textit{Preprint}"
-
-
(** TODO:claude Generate LaTeX PubItem for a paper *)
-
let generate_latex_entry target_name paper =
-
let open Bushel.Paper in
-
let slug_str = slug paper in
-
let title_str = title paper |> escape_latex in
-
let authors_str = format_authors target_name (authors paper) in
-
let venue_str = format_venue paper in
-
let year_str = year paper |> string_of_int in
-
let month_str =
-
let (_, m, _) = date paper in
-
sprintf "%02d" m
-
in
-
-
(* Check if paper is in the future *)
-
let is_in_press =
-
let paper_time = datetime paper in
-
let now = Ptime_clock.now () in
-
Ptime.compare paper_time now > 0
-
in
-
-
(* Add DOI or PDF link if available, but not for in-press papers unless they have explicit URL *)
-
let title_with_link =
-
if is_in_press then
-
(* For in-press papers, only add link if there's an explicit URL field *)
-
match Bushel.Paper.url paper with
-
| Some u -> sprintf "\\href{%s}{%s}" u title_str
-
| None -> title_str (* No link for in-press papers without explicit URL *)
-
else
-
(* For published papers, use DOI or URL or default PDF link *)
-
match Bushel.Paper.doi paper with
-
| Some doi -> sprintf "\\href{https://doi.org/%s}{%s}" doi title_str
-
| None ->
-
(* Check if there's a URL, otherwise default to PDF link *)
-
let url = match Bushel.Paper.url paper with
-
| Some u -> u
-
| None -> sprintf "https://anil.recoil.org/papers/%s.pdf" slug_str
-
in
-
sprintf "\\href{%s}{%s}" url title_str
-
in
-
-
(* Add "(in press)" if paper is in the future *)
-
let in_press_str = if is_in_press then " \\textit{(in press)}" else "" in
-
-
(* Add note if present *)
-
let note_str = match Bushel.Paper.note paper with
-
| Some n -> sprintf " \\textit{(%s)}" (escape_latex n)
-
| None -> ""
-
in
-
-
sprintf "\\BigGap\n\\PubItemLabeled{%s}\n{``%s,''\n%s,\n%s%s%s,\n\\DatestampYM{%s}{%s}.}\n"
-
slug_str title_with_link authors_str venue_str in_press_str note_str year_str month_str
-
-
(** TODO:claude Generate LaTeX output files for papers *)
-
let generate_tex base_dir output_dir target_name =
-
try
-
let papers = Bushel.load_papers base_dir in
-
let latest_papers = List.filter (fun p -> p.Bushel.Paper.latest) papers in
-
-
(* Extract selected papers first *)
-
let selected_papers = List.filter Bushel.Paper.selected latest_papers in
-
-
(* Group remaining papers by classification, excluding selected ones *)
-
let non_selected_papers = List.filter (fun p -> not (Bushel.Paper.selected p)) latest_papers in
-
let full_papers = List.filter (fun p ->
-
Bushel.Paper.classification p = Bushel.Paper.Full) non_selected_papers in
-
let short_papers = List.filter (fun p ->
-
Bushel.Paper.classification p = Bushel.Paper.Short) non_selected_papers in
-
let preprint_papers = List.filter (fun p ->
-
Bushel.Paper.classification p = Bushel.Paper.Preprint) non_selected_papers in
-
-
(* Sort each group by date, newest first *)
-
let sorted_full = List.sort Bushel.Paper.compare full_papers in
-
let sorted_short = List.sort Bushel.Paper.compare short_papers in
-
let sorted_preprint = List.sort Bushel.Paper.compare preprint_papers in
-
let sorted_selected = List.sort Bushel.Paper.compare selected_papers in
-
-
(* Ensure output directory exists *)
-
(try Unix.mkdir output_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
-
-
(* Write papers_full.tex *)
-
let oc_full = open_out (Filename.concat output_dir "papers_full.tex") in
-
List.iter (fun paper ->
-
let latex = generate_latex_entry target_name paper in
-
output_string oc_full latex;
-
output_char oc_full '\n'
-
) sorted_full;
-
close_out oc_full;
-
Printf.printf "Generated %s/papers_full.tex with %d entries\n" output_dir (List.length sorted_full);
-
-
(* Write papers_short.tex *)
-
let oc_short = open_out (Filename.concat output_dir "papers_short.tex") in
-
List.iter (fun paper ->
-
let latex = generate_latex_entry target_name paper in
-
output_string oc_short latex;
-
output_char oc_short '\n'
-
) sorted_short;
-
close_out oc_short;
-
Printf.printf "Generated %s/papers_short.tex with %d entries\n" output_dir (List.length sorted_short);
-
-
(* Write papers_preprint.tex *)
-
let oc_preprint = open_out (Filename.concat output_dir "papers_preprint.tex") in
-
List.iter (fun paper ->
-
let latex = generate_latex_entry target_name paper in
-
output_string oc_preprint latex;
-
output_char oc_preprint '\n'
-
) sorted_preprint;
-
close_out oc_preprint;
-
Printf.printf "Generated %s/papers_preprint.tex with %d entries\n" output_dir (List.length sorted_preprint);
-
-
(* Write papers_selected.tex *)
-
let oc_selected = open_out (Filename.concat output_dir "papers_selected.tex") in
-
List.iter (fun paper ->
-
let latex = generate_latex_entry target_name paper in
-
output_string oc_selected latex;
-
output_char oc_selected '\n'
-
) sorted_selected;
-
close_out oc_selected;
-
Printf.printf "Generated %s/papers_selected.tex with %d entries\n" output_dir (List.length sorted_selected);
-
-
(* Write paper_count.tex *)
-
let total_count = List.length latest_papers in
-
let oc_count = open_out (Filename.concat output_dir "paper_count.tex") in
-
output_string oc_count (sprintf "\\setcounter{pubcounter}{%d}\n" total_count);
-
close_out oc_count;
-
Printf.printf "Generated %s/paper_count.tex with total count: %d\n" output_dir total_count;
-
-
0
-
with e ->
-
Printf.eprintf "Error loading papers: %s\n" (Printexc.to_string e);
-
1
-
-
let output_dir_arg =
-
let doc = "Output directory for generated LaTeX files" in
-
Arg.(value & opt string "." & info ["output"; "o"] ~docv:"DIR" ~doc)
-
-
let target_name_arg =
-
let doc = "Name to underline in author list (e.g., 'Madhavapeddy')" in
-
Arg.(value & opt string "Madhavapeddy" & info ["target"; "t"] ~docv:"NAME" ~doc)
-
-
let term =
-
Term.(const generate_tex $ Bushel_common.base_dir $ output_dir_arg $ target_name_arg)
-
-
let cmd =
-
let doc = "Generate LaTeX publication entries" in
-
let info = Cmd.info "paper-tex" ~doc in
-
Cmd.v info term
-48
stack/bushel/bin/bushel_search.ml
···
-
open Cmdliner
-
-
(** Bushel search command for integration with main CLI *)
-
-
let limit =
-
let doc = "Maximum number of results to return" in
-
Arg.(value & opt int 50 & info ["limit"; "l"] ~doc)
-
-
let offset =
-
let doc = "Number of results to skip (for pagination)" in
-
Arg.(value & opt int 0 & info ["offset"; "o"] ~doc)
-
-
let query_text =
-
let doc = "Search query text" in
-
Arg.(required & pos 0 (some string) None & info [] ~docv:"QUERY" ~doc)
-
-
(** Search function using multisearch *)
-
let search query_text limit offset env _xdg _profile =
-
let config = Bushel.Typesense.load_config_from_files () in
-
-
if config.api_key = "" then (
-
Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";
-
1
-
) else (
-
Printf.printf "Searching Typesense at %s\n" config.endpoint;
-
Printf.printf "Query: \"%s\"\n" query_text;
-
Printf.printf "Limit: %d, Offset: %d\n\n" limit offset;
-
-
Eio.Switch.run (fun sw ->
-
let result = Bushel.Typesense.multisearch ~sw ~env config query_text ~limit:50 () in
-
match result with
-
| Ok multisearch_resp ->
-
let combined_response = Bushel.Typesense.combine_multisearch_results multisearch_resp ~limit ~offset () in
-
Printf.printf "Found %d results (%.2fms)\n\n" combined_response.total combined_response.query_time;
-
-
List.iteri (fun i (hit : Bushel.Typesense.search_result) ->
-
Printf.printf "%d. %s (score: %.2f)\n" (i + 1) (Bushel.Typesense.pp_search_result_oneline hit) hit.Bushel.Typesense.score
-
) combined_response.hits
-
| Error err ->
-
Format.eprintf "Search error: %a\n" Bushel.Typesense.pp_error err;
-
exit 1
-
);
-
0
-
)
-
-
(** Command line term for integration with eiocmd *)
-
let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
-
Term.(const search $ query_text $ limit $ offset)
-70
stack/bushel/bin/bushel_thumbs.ml
···
-
open Printf
-
open Cmdliner
-
-
(** TODO:claude
-
Helper module for ImageMagick operations *)
-
module Imagemagick = struct
-
(* Generate thumbnail from PDF *)
-
let generate_thumbnail ~pdf_path ~size ~output_path =
-
let cmd =
-
sprintf "magick -density 600 -quality 100 %s[0] -gravity North -crop 100%%x50%%+0+0 -resize %s %s"
-
pdf_path size output_path
-
in
-
eprintf "Running: %s\n%!" cmd;
-
Sys.command cmd
-
end
-
-
(** TODO:claude
-
Process a single paper to generate its thumbnail *)
-
let process_paper base_dir output_dir paper =
-
let slug = Bushel.Paper.slug paper in
-
let pdf_path = sprintf "%s/static/papers/%s.pdf" base_dir slug in
-
let thumbnail_path = sprintf "%s/%s.png" output_dir slug in
-
-
(* Skip if thumbnail already exists *)
-
if Sys.file_exists thumbnail_path then (
-
printf "Thumbnail already exists for %s, skipping\n%!" slug
-
) else if Sys.file_exists pdf_path then (
-
try
-
let size = sprintf "2048x" in
-
printf "Generating high-res thumbnail for %s (size: %s)\n%!" slug size;
-
match Imagemagick.generate_thumbnail ~pdf_path ~size ~output_path:thumbnail_path with
-
| 0 -> printf "Successfully generated thumbnail for %s\n%!" slug
-
| n -> eprintf "Error generating thumbnail for %s (exit code: %d)\n%!" slug n
-
with
-
| e -> eprintf "Error processing paper %s: %s\n%!" slug (Printexc.to_string e)
-
) else (
-
eprintf "PDF file not found for paper: %s\n%!" slug
-
)
-
-
(** TODO:claude
-
Main function to process all papers in a directory *)
-
let process_papers base_dir output_dir =
-
(* Create output directory if it doesn't exist *)
-
if not (Sys.file_exists output_dir) then (
-
printf "Creating output directory: %s\n%!" output_dir;
-
Unix.mkdir output_dir 0o755
-
);
-
-
(* Load Bushel entries and get papers *)
-
printf "Loading papers from %s\n%!" base_dir;
-
let e = Bushel.load base_dir in
-
let papers = Bushel.Entry.papers e in
-
-
(* Process each paper *)
-
printf "Found %d papers\n%!" (List.length papers);
-
List.iter (process_paper base_dir output_dir) papers
-
-
(* Command line arguments are now imported from Bushel_common *)
-
-
(* Export the term for use in main bushel.ml *)
-
let term : (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Cmdliner.Term.t =
-
Term.(const (fun base_dir output_dir _env _xdg _profile -> process_papers base_dir output_dir; 0) $
-
Bushel_common.base_dir $ Bushel_common.output_dir ~default:".")
-
-
let cmd =
-
let doc = "Generate thumbnails for paper PDFs" in
-
let info = Cmd.info "thumbs" ~doc in
-
Cmd.v info term
-
-
(* Main entry point removed - accessed through bushel_main.ml *)
-188
stack/bushel/bin/bushel_typesense.ml
···
-
open Cmdliner
-
-
(** Bushel Typesense binary with upload and query functionality *)
-
-
let data_dir =
-
let doc = "Directory containing bushel data files" in
-
Arg.(value & opt string "." & info ["data-dir"; "d"] ~doc)
-
-
(** Main upload function *)
-
let upload data_dir openai_key env _xdg _profile =
-
let config = Bushel.Typesense.load_config_from_files () in
-
-
let config = { config with
-
openai_key = if openai_key = "" then config.openai_key else openai_key
-
} in
-
-
if config.api_key = "" then (
-
Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";
-
1
-
) else if config.openai_key = "" then (
-
Printf.eprintf "Error: OpenAI API key is required for embeddings. Use OPENAI_API_KEY environment variable or create .openrouter-api file.\n";
-
1
-
) else (
-
Printf.printf "Loading bushel data from %s\n%!" data_dir;
-
let entries = Bushel.load data_dir in
-
-
Printf.printf "Uploading bushel data to Typesense at %s\n%!" config.endpoint;
-
-
Eio.Switch.run (fun sw ->
-
Bushel.Typesense.upload_all ~sw ~env config entries
-
);
-
0
-
)
-
-
-
(** Query function *)
-
let query query_text collection limit offset env _xdg _profile =
-
let config = Bushel.Typesense.load_config_from_files () in
-
-
if config.api_key = "" then (
-
Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";
-
1
-
) else (
-
Printf.printf "Searching Typesense at %s\n%!" config.endpoint;
-
Printf.printf "Query: \"%s\"\n%!" query_text;
-
if collection <> "" then Printf.printf "Collection: %s\n%!" collection;
-
Printf.printf "Limit: %d, Offset: %d\n\n%!" limit offset;
-
-
Eio.Switch.run (fun sw ->
-
let search_fn = if collection = "" then
-
Bushel.Typesense.search_all ~sw ~env config query_text ~limit ~offset
-
else
-
Bushel.Typesense.search_collection ~sw ~env config collection query_text ~limit ~offset
-
in
-
let result = search_fn () in
-
match result with
-
| Ok response ->
-
Printf.printf "Found %d results (%.2fms)\n\n%!" response.total response.query_time;
-
List.iteri (fun i (hit : Bushel.Typesense.search_result) ->
-
Printf.printf "%d. [%s] %s (score: %.2f)\n%!" (i + 1) hit.collection hit.title hit.score;
-
if hit.content <> "" then Printf.printf " %s\n%!" hit.content;
-
if hit.highlights <> [] then (
-
Printf.printf " Highlights:\n%!";
-
List.iter (fun (field, snippets) ->
-
List.iter (fun snippet ->
-
Printf.printf " %s: %s\n%!" field snippet
-
) snippets
-
) hit.highlights
-
);
-
Printf.printf "\n%!"
-
) response.hits
-
| Error err ->
-
Format.eprintf "Search error: %a\n%!" Bushel.Typesense.pp_error err;
-
exit 1
-
);
-
0
-
)
-
-
(** List collections function *)
-
let list env _xdg _profile =
-
let config = Bushel.Typesense.load_config_from_files () in
-
-
if config.api_key = "" then (
-
Printf.eprintf "Error: API key is required. Use TYPESENSE_API_KEY environment variable or create .typesense-key file.\n";
-
1
-
) else (
-
Printf.printf "Listing collections at %s\n\n%!" config.endpoint;
-
-
Eio.Switch.run (fun sw ->
-
let result = Bushel.Typesense.list_collections ~sw ~env config in
-
match result with
-
| Ok collections ->
-
Printf.printf "Collections:\n%!";
-
List.iter (fun (name, count) ->
-
Printf.printf " %s (%d documents)\n%!" name count
-
) collections
-
| Error err ->
-
Format.eprintf "List error: %a\n%!" Bushel.Typesense.pp_error err;
-
exit 1
-
);
-
0
-
)
-
-
(** Command line arguments *)
-
let openai_key =
-
let doc = "OpenAI API key for embeddings" in
-
Arg.(value & opt string "" & info ["openai-key"; "oa"] ~doc)
-
-
let query_text =
-
let doc = "Search query text" in
-
Arg.(required & pos 0 (some string) None & info [] ~docv:"QUERY" ~doc)
-
-
let collection =
-
let doc = "Specific collection to search (contacts, papers, projects, notes, videos, ideas)" in
-
Arg.(value & opt string "" & info ["collection"; "c"] ~doc)
-
-
let limit =
-
let doc = "Maximum number of results to return" in
-
Arg.(value & opt int 10 & info ["limit"; "l"] ~doc)
-
-
let offset =
-
let doc = "Number of results to skip (for pagination)" in
-
Arg.(value & opt int 0 & info ["offset"; "o"] ~doc)
-
-
(** Query command *)
-
let query_cmd =
-
let doc = "Search bushel collections in Typesense" in
-
let man = [
-
`S Manpage.s_description;
-
`P "Search across all or specific bushel collections in Typesense.";
-
`P "The API key can be read from .typesense-key file or TYPESENSE_API_KEY environment variable.";
-
`S Manpage.s_examples;
-
`P "Search all collections:";
-
`Pre " bushel-typesense query \"machine learning\"";
-
`P "Search specific collection:";
-
`Pre " bushel-typesense query \"OCaml\" --collection papers";
-
`P "Search with pagination:";
-
`Pre " bushel-typesense query \"AI\" --limit 5 --offset 10";
-
] in
-
Eiocmd.run
-
~info:(Cmd.info "query" ~doc ~man)
-
~app_name:"bushel-typesense"
-
~service:"typesense"
-
Term.(const query $ query_text $ collection $ limit $ offset)
-
-
(** List command *)
-
let list_cmd =
-
let doc = "List all collections in Typesense" in
-
let man = [
-
`S Manpage.s_description;
-
`P "List all available collections and their document counts.";
-
] in
-
Eiocmd.run
-
~info:(Cmd.info "list" ~doc ~man)
-
~app_name:"bushel-typesense"
-
~service:"typesense"
-
Term.(const list)
-
-
(** Upload command *)
-
let upload_cmd =
-
let doc = "Upload bushel collections to Typesense search engine" in
-
let man = [
-
`S Manpage.s_description;
-
`P "Upload all bushel object types (contacts, papers, projects, notes, videos, ideas) to a Typesense search engine instance.";
-
`P "The API keys can be read from files or environment variables.";
-
`S Manpage.s_examples;
-
`P "Upload to Typesense instance:";
-
`Pre " bushel-typesense upload --data-dir /path/to/data";
-
] in
-
Eiocmd.run
-
~info:(Cmd.info "upload" ~doc ~man)
-
~app_name:"bushel-typesense"
-
~service:"typesense"
-
Term.(const upload $ data_dir $ openai_key)
-
-
(** Main command group *)
-
let main_cmd =
-
let doc = "Bushel Typesense client" in
-
let man = [
-
`S Manpage.s_description;
-
`P "Client for uploading to and querying Bushel collections in Typesense search engine.";
-
`S Manpage.s_commands;
-
`S Manpage.s_common_options;
-
] in
-
let info = Cmd.info "bushel-typesense" ~doc ~man in
-
Cmd.group info [upload_cmd; query_cmd; list_cmd]
-
-
let () = exit (Cmd.eval' main_cmd)
-138
stack/bushel/bin/bushel_video.ml
···
-
[@@@warning "-26-27-32"]
-
-
open Lwt.Infix
-
open Cmdliner
-
-
let setup_log style_renderer level =
-
Fmt_tty.setup_std_outputs ?style_renderer ();
-
Logs.set_level level;
-
Logs.set_reporter (Logs_fmt.reporter ());
-
()
-
-
let process_videos output_dir overwrite base_url channel fetch_thumbs thumbs_dir =
-
Peertube.fetch_all_channel_videos base_url channel >>= fun all_videos ->
-
Logs.info (fun f -> f "Total videos: %d" (List.length all_videos));
-
-
(* Create thumbnails directory if needed *)
-
(if fetch_thumbs && not (Sys.file_exists thumbs_dir) then
-
Unix.mkdir thumbs_dir 0o755);
-
-
(* Process each video, fetching full details for complete descriptions *)
-
Lwt_list.map_s (fun video ->
-
(* Fetch complete video details to get full description *)
-
Peertube.fetch_video_details base_url video.Peertube.uuid >>= fun full_video ->
-
let (description, published_date, title, url, uuid, slug) =
-
Peertube.to_bushel_video full_video
-
in
-
Logs.info (fun f -> f "Title: %s, URL: %s" title url);
-
-
(* Download thumbnail if requested *)
-
(if fetch_thumbs then
-
let thumb_path = Filename.concat thumbs_dir (uuid ^ ".jpg") in
-
Peertube.download_thumbnail base_url full_video thumb_path >>= fun result ->
-
match result with
-
| Ok () ->
-
Logs.info (fun f -> f "Downloaded thumbnail for %s to %s" title thumb_path);
-
Lwt.return_unit
-
| Error (`Msg e) ->
-
Logs.warn (fun f -> f "Failed to download thumbnail for %s: %s" title e);
-
Lwt.return_unit
-
else
-
Lwt.return_unit) >>= fun () ->
-
-
Lwt.return {Bushel.Video.description; published_date; title; url; uuid; slug;
-
talk=false; paper=None; project=None; tags=full_video.tags}
-
) all_videos >>= fun vids ->
-
-
(* Write video files *)
-
Lwt_list.iter_s (fun video ->
-
let file_path = Filename.concat output_dir (video.Bushel.Video.uuid ^ ".md") in
-
let file_exists = Sys.file_exists file_path in
-
-
if file_exists then
-
try
-
(* If file exists, load it to preserve specific fields *)
-
let existing_video = Bushel.Video.of_md file_path in
-
(* Create merged video with preserved fields *)
-
let merged_video = {
-
video with
-
tags = existing_video.tags; (* Preserve existing tags *)
-
paper = existing_video.paper; (* Preserve paper field *)
-
project = existing_video.project; (* Preserve project field *)
-
talk = existing_video.talk; (* Preserve talk field *)
-
} in
-
-
(* Write the merged video data *)
-
if overwrite then
-
match Bushel.Video.to_file output_dir merged_video with
-
| Ok () ->
-
Logs.info (fun f -> f "Updated video %s with preserved fields in %s"
-
merged_video.Bushel.Video.title file_path);
-
Lwt.return_unit
-
| Error (`Msg e) ->
-
Logs.err (fun f -> f "Failed to update video %s: %s"
-
merged_video.Bushel.Video.title e);
-
Lwt.return_unit
-
else begin
-
Logs.info (fun f -> f "Skipping existing video %s (use --overwrite to replace)"
-
video.Bushel.Video.title);
-
Lwt.return_unit
-
end
-
with _ ->
-
(* If reading existing file fails, proceed with new data *)
-
if overwrite then
-
match Bushel.Video.to_file output_dir video with
-
| Ok () ->
-
Logs.info (fun f -> f "Wrote video %s to %s (existing file could not be read)"
-
video.Bushel.Video.title file_path);
-
Lwt.return_unit
-
| Error (`Msg e) ->
-
Logs.err (fun f -> f "Failed to write video %s: %s"
-
video.Bushel.Video.title e);
-
Lwt.return_unit
-
else begin
-
Logs.info (fun f -> f "Skipping existing video %s (use --overwrite to replace)"
-
video.Bushel.Video.title);
-
Lwt.return_unit
-
end
-
else
-
(* If file doesn't exist, just write new data *)
-
match Bushel.Video.to_file output_dir video with
-
| Ok () ->
-
Logs.info (fun f -> f "Wrote new video %s to %s"
-
video.Bushel.Video.title file_path);
-
Lwt.return_unit
-
| Error (`Msg e) ->
-
Logs.err (fun f -> f "Failed to write video %s: %s"
-
video.Bushel.Video.title e);
-
Lwt.return_unit
-
) vids
-
-
(* Command line arguments are now imported from Bushel_common *)
-
-
(* Export the term for use in main bushel.ml *)
-
let term =
-
let fetch_thumbs =
-
let doc = "Download video thumbnails" in
-
Arg.(value & flag & info ["fetch-thumbs"] ~doc)
-
in
-
let thumbs_dir =
-
let doc = "Directory to save thumbnails (default: images/videos)" in
-
Arg.(value & opt string "images/videos" & info ["thumbs-dir"] ~docv:"DIR" ~doc)
-
in
-
Term.(const (fun output_dir overwrite base_url channel fetch_thumbs thumbs_dir () ->
-
Lwt_main.run (process_videos output_dir overwrite base_url channel fetch_thumbs thumbs_dir); 0)
-
$ Bushel_common.output_dir ~default:"." $
-
Bushel_common.overwrite $
-
Bushel_common.url_term ~default:"https://crank.recoil.org" ~doc:"PeerTube base URL" $
-
Bushel_common.channel ~default:"anil" $
-
fetch_thumbs $
-
thumbs_dir $
-
Bushel_common.setup_term)
-
-
let cmd =
-
let doc = "Fetch and process videos from PeerTube" in
-
let info = Cmd.info "video" ~doc in
-
Cmd.v info term
-
-
(* Main entry point removed - accessed through bushel_main.ml *)
-81
stack/bushel/bin/bushel_video_thumbs.ml
···
-
[@@@warning "-26-27-32"]
-
-
open Lwt.Infix
-
open Cmdliner
-
-
let setup_log style_renderer level =
-
Fmt_tty.setup_std_outputs ?style_renderer ();
-
Logs.set_level level;
-
Logs.set_reporter (Logs_fmt.reporter ());
-
()
-
-
let process_video_thumbs videos_dir thumbs_dir base_url =
-
(* Ensure thumbnail directory exists *)
-
(if not (Sys.file_exists thumbs_dir) then
-
Unix.mkdir thumbs_dir 0o755);
-
-
(* Read all video markdown files *)
-
let video_files = Sys.readdir videos_dir
-
|> Array.to_list
-
|> List.filter (fun f -> Filename.check_suffix f ".md")
-
|> List.map (fun f -> Filename.concat videos_dir f)
-
in
-
-
Logs.info (fun f -> f "Found %d video files to process" (List.length video_files));
-
-
(* Process each video file *)
-
Lwt_list.iter_s (fun video_file ->
-
try
-
(* Load existing video *)
-
let video = Bushel.Video.of_md video_file in
-
let uuid = video.Bushel.Video.uuid in
-
-
Logs.info (fun f -> f "Processing video: %s (UUID: %s)" video.title uuid);
-
-
(* Fetch video details from PeerTube to get thumbnail info *)
-
Peertube.fetch_video_details base_url uuid >>= fun peertube_video ->
-
-
(* Download thumbnail *)
-
let thumb_path = Filename.concat thumbs_dir (uuid ^ ".jpg") in
-
Peertube.download_thumbnail base_url peertube_video thumb_path >>= fun result ->
-
-
match result with
-
| Ok () ->
-
Logs.info (fun f -> f "Downloaded thumbnail for %s to %s" video.title thumb_path);
-
-
(* Update video file with thumbnail_url field *)
-
(match Peertube.thumbnail_url base_url peertube_video with
-
| Some url ->
-
Logs.info (fun f -> f "Thumbnail URL: %s" url);
-
Lwt.return_unit
-
| None ->
-
Logs.warn (fun f -> f "No thumbnail URL for video %s" video.title);
-
Lwt.return_unit)
-
| Error (`Msg e) ->
-
Logs.err (fun f -> f "Failed to download thumbnail for %s: %s" video.title e);
-
Lwt.return_unit
-
with exn ->
-
Logs.err (fun f -> f "Error processing %s: %s" video_file (Printexc.to_string exn));
-
Lwt.return_unit
-
) video_files
-
-
let term =
-
let videos_dir =
-
let doc = "Directory containing video markdown files" in
-
Arg.(value & opt string "data/videos" & info ["videos-dir"; "d"] ~docv:"DIR" ~doc)
-
in
-
let thumbs_dir =
-
let doc = "Directory to save thumbnails" in
-
Arg.(value & opt string "images/videos" & info ["thumbs-dir"; "t"] ~docv:"DIR" ~doc)
-
in
-
Term.(const (fun videos_dir thumbs_dir base_url () ->
-
Lwt_main.run (process_video_thumbs videos_dir thumbs_dir base_url); 0)
-
$ videos_dir $
-
thumbs_dir $
-
Bushel_common.url_term ~default:"https://crank.recoil.org" ~doc:"PeerTube base URL" $
-
Bushel_common.setup_term)
-
-
let cmd =
-
let doc = "Download thumbnails for existing videos and update metadata" in
-
let info = Cmd.info "video-thumbs" ~doc in
-
Cmd.v info term
-20
stack/bushel/bin/dune
···
-
(library
-
(name bushel_common)
-
(modules bushel_common)
-
(libraries cmdliner))
-
-
(executable
-
(name bushel_main)
-
(public_name bushel)
-
(package bushel)
-
(modules bushel_main bushel_bibtex bushel_ideas bushel_info bushel_missing bushel_note_doi bushel_obsidian bushel_paper_classify bushel_paper_tex bushel_thumbs bushel_search)
-
(flags (:standard -w -69))
-
(libraries bushel bushel_common cmdliner eio eio_main eiocmd yaml ezjsonm zotero-translation fmt cmarkit uri unix ptime.clock.os crockford))
-
-
(executable
-
(name bushel_typesense)
-
(public_name bushel-typesense)
-
(package bushel)
-
(modules bushel_typesense)
-
(flags (:standard -w -69))
-
(libraries bushel bushel_common cmdliner eio eio_main eiocmd))
-51
stack/bushel/bushel.opam
···
-
# This file is generated by dune, edit dune-project instead
-
opam-version: "2.0"
-
synopsis: "A webring but not as oldskool"
-
description: "This is all still a work in progress"
-
maintainer: ["anil@recoil.org"]
-
authors: ["Anil Madhavapeddy"]
-
license: "ISC"
-
homepage: "https://github.com/avsm/bushel"
-
bug-reports: "https://github.com/avsm/bushel/issues"
-
depends: [
-
"dune" {>= "3.17"}
-
"ocaml" {>= "5.2.0"}
-
"uri"
-
"cmarkit"
-
"ezjsonm"
-
"ptime"
-
"jsont"
-
"bytesrw"
-
"jekyll-format"
-
"yaml"
-
"eio"
-
"eio_main"
-
"requests"
-
"fmt"
-
"peertubee"
-
"karakeep"
-
"typesense-client"
-
"cmdliner"
-
"eiocmd"
-
"xdge"
-
"keyeio"
-
"odoc" {with-doc}
-
]
-
build: [
-
["dune" "subst"] {dev}
-
[
-
"dune"
-
"build"
-
"-p"
-
name
-
"-j"
-
jobs
-
"@install"
-
"@runtest" {with-test}
-
"@doc" {with-doc}
-
]
-
]
-
dev-repo: "git+https://github.com/avsm/bushel.git"
-
pin-depends: [
-
[ "zotero-translation.dev" "git+https://github.com/avsm/zotero-translation.git" ]
-
]
-3
stack/bushel/bushel.opam.template
···
-
pin-depends: [
-
[ "zotero-translation.dev" "git+https://github.com/avsm/zotero-translation.git" ]
-
]
-35
stack/bushel/dune-project
···
-
(lang dune 3.17)
-
(name bushel)
-
-
(source (github avsm/bushel))
-
(license ISC)
-
(authors "Anil Madhavapeddy")
-
(maintainers "anil@recoil.org")
-
-
(generate_opam_files true)
-
-
(package
-
(name bushel)
-
(synopsis "A webring but not as oldskool")
-
(description "This is all still a work in progress")
-
(depends
-
(ocaml (>= "5.2.0"))
-
uri
-
cmarkit
-
ezjsonm
-
ptime
-
jsont
-
bytesrw
-
jekyll-format
-
yaml
-
eio
-
eio_main
-
requests
-
fmt
-
peertubee
-
karakeep
-
typesense-client
-
cmdliner
-
eiocmd
-
xdge
-
keyeio))
-79
stack/bushel/lib/bushel.ml
···
-
module Contact = Contact
-
module Idea = Idea
-
module Note = Note
-
module Paper = Paper
-
module Project = Project
-
module Video = Video
-
module Tags = Tags
-
module Link = Link
-
module Entry = Entry
-
module Util = Util
-
module Srcsetter = Srcsetter
-
module Md = Md
-
module Typesense = Typesense
-
module Link_graph = Link_graph
-
module Description = Description
-
module Doi_entry = Doi_entry
-
-
let map_md base subdir fn =
-
let dir = base ^ "/data/" ^ subdir in
-
Sys.readdir dir
-
|> Array.to_list
-
|> List.filter (fun f -> Filename.check_suffix f ".md")
-
|> List.map (fun e -> fn dir e)
-
;;
-
-
let map_category base c fn = map_md base c (fun dir e -> fn @@ Filename.concat dir e)
-
let dbg l = Printf.eprintf "loading %s\n%!" l
-
-
let load_contacts base = dbg "contacts"; map_category base "contacts" Contact.of_md
-
let load_projects base = dbg "projects"; map_category base "projects" Project.of_md
-
let load_notes base =
-
dbg "notes";
-
let notes_from_notes = map_category base "notes" Note.of_md in
-
let notes_from_news = map_category base "news" Note.of_md in
-
notes_from_notes @ notes_from_news
-
let load_ideas base = dbg "ideas"; map_category base "ideas" Idea.of_md
-
let load_videos base = dbg "videos"; map_category base "videos" Video.of_md
-
-
let load_images base =
-
Printf.eprintf "load images %s/images\n%!" base;
-
try
-
Srcsetter.list_of_json (Util.read_file (base ^ "/images/index.json")) |> Result.get_ok
-
with
-
| _ -> [] (* FIXME log *)
-
;;
-
-
let load_papers base =
-
Printf.eprintf "load papers %s/data/papers\n%!" base;
-
Sys.readdir (base ^ "/data/papers")
-
|> Array.to_list
-
|> List.filter (fun slug -> Sys.is_directory (base ^ "/data/papers/" ^ slug))
-
|> List.map (fun slug ->
-
Sys.readdir (base ^ "/data/papers/" ^ slug)
-
|> Array.to_list
-
|> List.filter (fun ver -> Filename.check_suffix ver ".md")
-
|> List.map (fun ver ->
-
let ver = Filename.chop_extension ver in
-
Paper.of_md ~slug ~ver (base ^ "/data/papers/" ^ slug ^ "/" ^ ver ^ ".md")))
-
|> List.flatten
-
|> Paper.tv
-
;;
-
-
let load base =
-
let images = load_images base in
-
let papers = load_papers base in
-
let contacts = load_contacts base in
-
let projects = load_projects base in
-
let notes = load_notes base in
-
let ideas = load_ideas base in
-
let videos = load_videos base in
-
let entries = Entry.v ~images ~papers ~notes ~projects ~ideas ~videos ~contacts ~data_dir:(base ^ "/data") in
-
(* Build link graph *)
-
Printf.eprintf "Building link_graph...\n%!";
-
let graph = Link_graph.build_link_graph entries in
-
Fmt.epr "%a@." Link_graph.pp_graph graph;
-
Link_graph.set_graph graph;
-
entries
-
;;
-
-27
stack/bushel/lib/bushel.mli
···
-
(** Bushel *)
-
-
module Contact = Contact
-
module Idea = Idea
-
module Note = Note
-
module Paper = Paper
-
module Project = Project
-
module Video = Video
-
module Tags = Tags
-
module Link = Link
-
module Entry = Entry
-
module Util = Util
-
module Md = Md
-
module Srcsetter = Srcsetter
-
module Typesense = Typesense
-
module Link_graph = Link_graph
-
module Description = Description
-
module Doi_entry = Doi_entry
-
-
val load_contacts : string -> Contact.ts
-
val load_projects : string -> Project.ts
-
val load_notes : string -> Note.ts
-
val load_ideas : string -> Idea.ts
-
val load_videos : string -> Video.ts
-
val load_images : string -> Srcsetter.ts
-
val load_papers : string -> Paper.ts
-
val load : string -> Entry.t
-172
stack/bushel/lib/contact.ml
···
-
type t =
-
{ names : string list
-
; handle : string
-
; email : string option
-
; icon : string option
-
; github : string option
-
; twitter : string option
-
; bluesky : string option
-
; mastodon : string option
-
; orcid : string option
-
; url : string option
-
; atom : string list option
-
}
-
-
type ts = t list
-
-
let v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom handle names =
-
{ names; handle; email; github; twitter; bluesky; mastodon; orcid; url; icon; atom }
-
;;
-
-
let make names email icon github twitter bluesky mastodon orcid url atom =
-
v ?email ?github ?twitter ?bluesky ?mastodon ?orcid ?icon ?url ?atom "" names
-
;;
-
-
let names { names; _ } = names
-
let name { names; _ } = List.hd names
-
let handle { handle; _ } = handle
-
let email { email; _ } = email
-
let icon { icon; _ } = icon
-
let github { github; _ } = github
-
let twitter { twitter; _ } = twitter
-
let bluesky { bluesky; _ } = bluesky
-
let mastodon { mastodon; _ } = mastodon
-
let orcid { orcid; _ } = orcid
-
let url { url; _ } = url
-
let atom { atom; _ } = atom
-
-
let json_t =
-
let open Jsont in
-
let open Jsont.Object in
-
let mem_opt f v ~enc = mem f v ~dec_absent:None ~enc_omit:Option.is_none ~enc in
-
map ~kind:"Contact" make
-
|> mem "names" (list string) ~dec_absent:[] ~enc:names
-
|> mem_opt "email" (some string) ~enc:email
-
|> mem_opt "icon" (some string) ~enc:icon
-
|> mem_opt "github" (some string) ~enc:github
-
|> mem_opt "twitter" (some string) ~enc:twitter
-
|> mem_opt "bluesky" (some string) ~enc:bluesky
-
|> mem_opt "mastodon" (some string) ~enc:mastodon
-
|> mem_opt "orcid" (some string) ~enc:orcid
-
|> mem_opt "url" (some string) ~enc:url
-
|> mem_opt "atom" (some (list string)) ~enc:atom
-
|> finish
-
;;
-
-
let v = Jsont_bytesrw.decode_string (Jsont.list json_t)
-
let compare a b = String.compare a.handle b.handle
-
let find_by_handle ts h = List.find_opt (fun { handle; _ } -> handle = h) ts
-
-
let best_url c =
-
match c.url with
-
| Some v -> Some v
-
| None ->
-
(match c.github with
-
| Some v -> Some ("https://github.com/" ^ v)
-
| None ->
-
(match c.email with
-
| Some v -> Some ("mailto:" ^ v)
-
| None -> None))
-
;;
-
-
let of_md fname =
-
(* TODO fix Jekyll_post to not error on no date *)
-
let fname' = "2000-01-01-" ^ Filename.basename fname in
-
let handle = Filename.basename fname |> Filename.chop_extension in
-
match Jekyll_post.of_string ~fname:fname' (Util.read_file fname) with
-
| Error (`Msg m) -> failwith ("contact_of_md: " ^ m)
-
| Ok jp ->
-
let fields = jp.Jekyll_post.fields |> Jekyll_format.fields_to_yaml in
-
let c = Jsont_bytesrw.decode_string json_t (Ezjsonm.value_to_string fields) in
-
(match c with
-
| Error e -> failwith e
-
| Ok c -> { c with handle })
-
;;
-
-
(* Given a name, turn it lowercase and return the concatenation of the
-
initials of all the words in the name and the full last name. *)
-
let handle_of_name name =
-
let name = String.lowercase_ascii name in
-
let words = String.split_on_char ' ' name in
-
let initials = String.concat "" (List.map (fun w -> String.sub w 0 1) words) in
-
initials ^ List.hd (List.rev words)
-
;;
-
-
(* fuzzy lookup for an author. Strip out any non alpha numeric characters while
-
searching for the name *)
-
let lookup_by_name ts a =
-
let a = String.lowercase_ascii a in
-
let rec aux acc = function
-
| [] -> acc
-
| t :: ts ->
-
if List.exists (fun n -> String.lowercase_ascii n = a) t.names
-
then aux (t :: acc) ts
-
else aux acc ts
-
in
-
match aux [] ts with
-
| [ a ] -> a
-
| [] -> raise (Failure ("contact.ml: author not found: " ^ a))
-
| _ -> raise (Failure ("ambiguous author: " ^ a))
-
;;
-
-
(* TODO:claude *)
-
let typesense_schema =
-
let open Ezjsonm in
-
dict [
-
("name", string "contacts");
-
("fields", list (fun d -> dict d) [
-
[("name", string "id"); ("type", string "string")];
-
[("name", string "handle"); ("type", string "string")];
-
[("name", string "name"); ("type", string "string")];
-
[("name", string "names"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "email"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "icon"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "github"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "twitter"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "bluesky"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "mastodon"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "orcid"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "url"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "atom"); ("type", string "string[]"); ("optional", bool true)];
-
]);
-
]
-
-
(** TODO:claude Pretty-print a contact with ANSI formatting *)
-
let pp ppf c =
-
let open Fmt in
-
pf ppf "@[<v>";
-
pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Contact";
-
pf ppf "%a: @%a@," (styled `Bold string) "Handle" string (handle c);
-
pf ppf "%a: %a@," (styled `Bold string) "Name" string (name c);
-
let ns = names c in
-
if List.length ns > 1 then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Aliases" (list ~sep:comma string) (List.tl ns);
-
(match email c with
-
| Some e -> pf ppf "%a: %a@," (styled `Bold string) "Email" string e
-
| None -> ());
-
(match github c with
-
| Some g -> pf ppf "%a: https://github.com/%a@," (styled `Bold string) "GitHub" string g
-
| None -> ());
-
(match twitter c with
-
| Some t -> pf ppf "%a: https://twitter.com/%a@," (styled `Bold string) "Twitter" string t
-
| None -> ());
-
(match bluesky c with
-
| Some b -> pf ppf "%a: %a@," (styled `Bold string) "Bluesky" string b
-
| None -> ());
-
(match mastodon c with
-
| Some m -> pf ppf "%a: %a@," (styled `Bold string) "Mastodon" string m
-
| None -> ());
-
(match orcid c with
-
| Some o -> pf ppf "%a: https://orcid.org/%a@," (styled `Bold string) "ORCID" string o
-
| None -> ());
-
(match url c with
-
| Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u
-
| None -> ());
-
(match icon c with
-
| Some i -> pf ppf "%a: %a@," (styled `Bold string) "Icon" string i
-
| None -> ());
-
(match atom c with
-
| Some atoms when atoms <> [] ->
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Atom Feeds" (list ~sep:comma string) atoms
-
| _ -> ());
-
pf ppf "@]"
-25
stack/bushel/lib/contact.mli
···
-
type t
-
type ts = t list
-
-
val v : string -> (ts, string) result
-
val names : t -> string list
-
val name : t -> string
-
val handle : t -> string
-
val email : t -> string option
-
val icon : t -> string option
-
val github : t -> string option
-
val twitter : t -> string option
-
val bluesky : t -> string option
-
val mastodon : t -> string option
-
val orcid : t -> string option
-
val url : t -> string option
-
val atom : t -> string list option
-
val best_url : t -> string option
-
val find_by_handle : t list -> string -> t option
-
val handle_of_name : string -> string
-
val lookup_by_name : ts -> string -> t
-
val json_t : t Jsont.t
-
val compare : t -> t -> int
-
val of_md : string -> t
-
val typesense_schema : Ezjsonm.value
-
val pp : Format.formatter -> t -> unit
-72
stack/bushel/lib/description.ml
···
-
(** Generate descriptive text for bushel entries *)
-
-
(* Helper to format a date as "Month Year" *)
-
let format_date date =
-
let (year, month, _day) = date in
-
let month_name = match month with
-
| 1 -> "January" | 2 -> "February" | 3 -> "March" | 4 -> "April"
-
| 5 -> "May" | 6 -> "June" | 7 -> "July" | 8 -> "August"
-
| 9 -> "September" | 10 -> "October" | 11 -> "November" | 12 -> "December"
-
| _ -> ""
-
in
-
Printf.sprintf "%s %d" month_name year
-
-
(* Generate a descriptive sentence for a paper *)
-
let paper_description (p : Paper.t) ~date_str =
-
let venue = match String.lowercase_ascii (Paper.bibtype p) with
-
| "inproceedings" -> Paper.booktitle p
-
| "article" -> Paper.journal p
-
| "book" ->
-
let pub = Paper.publisher p in
-
if pub = "" then "Book" else "Book by " ^ pub
-
| "techreport" ->
-
(try "Technical report at " ^ Paper.institution p
-
with _ -> "Technical report")
-
| "misc" ->
-
let pub = Paper.publisher p in
-
if pub = "" then "Working paper" else "Working paper at " ^ pub
-
| _ -> "Publication"
-
in
-
Printf.sprintf "Paper in %s (%s)" venue date_str
-
-
(* Generate a descriptive sentence for a note *)
-
let note_description (n : Note.t) ~date_str ~lookup_fn =
-
match Note.slug_ent n with
-
| Some slug_ent ->
-
(match lookup_fn slug_ent with
-
| Some related_title ->
-
Printf.sprintf "Note about %s (%s)" related_title date_str
-
| None -> Printf.sprintf "Research note (%s)" date_str)
-
| None -> Printf.sprintf "Research note (%s)" date_str
-
-
(* Generate a descriptive sentence for an idea *)
-
let idea_description (i : Idea.t) ~date_str =
-
let status_str = String.lowercase_ascii (Idea.status_to_string (Idea.status i)) in
-
let level_str = Idea.level_to_string (Idea.level i) in
-
Printf.sprintf "Research idea (%s, %s level, %s)" status_str level_str date_str
-
-
(* Generate a descriptive sentence for a video *)
-
let video_description (v : Video.t) ~date_str ~lookup_fn =
-
let video_type = if Video.talk v then "Talk video" else "Video" in
-
let context = match Video.paper v with
-
| Some paper_slug ->
-
(match lookup_fn paper_slug with
-
| Some title -> Printf.sprintf " about %s" title
-
| None -> "")
-
| None ->
-
(match Video.project v with
-
| Some project_slug ->
-
(match lookup_fn project_slug with
-
| Some title -> Printf.sprintf " about %s" title
-
| None -> "")
-
| None -> "")
-
in
-
Printf.sprintf "%s%s (%s)" video_type context date_str
-
-
(* Generate a descriptive sentence for a project *)
-
let project_description (pr : Project.t) =
-
let end_str = match pr.Project.finish with
-
| Some year -> string_of_int year
-
| None -> "present"
-
in
-
Printf.sprintf "Project (%dโ€“%s)" pr.Project.start end_str
-19
stack/bushel/lib/description.mli
···
-
(** Generate descriptive text for bushel entries *)
-
-
(** Format a date as "Month Year" *)
-
val format_date : int * int * int -> string
-
-
(** Generate a descriptive sentence for a paper with date string *)
-
val paper_description : Paper.t -> date_str:string -> string
-
-
(** Generate a descriptive sentence for a note with date string and lookup function *)
-
val note_description : Note.t -> date_str:string -> lookup_fn:(string -> string option) -> string
-
-
(** Generate a descriptive sentence for an idea with date string *)
-
val idea_description : Idea.t -> date_str:string -> string
-
-
(** Generate a descriptive sentence for a video with date string and lookup function *)
-
val video_description : Video.t -> date_str:string -> lookup_fn:(string -> string option) -> string
-
-
(** Generate a descriptive sentence for a project *)
-
val project_description : Project.t -> string
-147
stack/bushel/lib/doi_entry.ml
···
-
module J = Ezjsonm
-
-
type status =
-
| Resolved
-
| Failed of string
-
-
type t = {
-
doi: string;
-
title: string;
-
authors: string list;
-
year: int;
-
bibtype: string;
-
publisher: string;
-
resolved_at: string;
-
source_urls: string list;
-
status: status;
-
ignore: bool;
-
}
-
-
type ts = t list
-
-
let create_resolved ~doi ~title ~authors ~year ~bibtype ~publisher ?(source_urls=[]) () =
-
let resolved_at =
-
let now = Ptime_clock.now () in
-
let rfc3339 = Ptime.to_rfc3339 ~space:false ~frac_s:0 now in
-
String.sub rfc3339 0 10 (* Extract YYYY-MM-DD *)
-
in
-
{ doi; title; authors; year; bibtype; publisher; resolved_at; source_urls; status = Resolved; ignore = false }
-
-
let create_failed ~doi ~error ?(source_urls=[]) () =
-
let resolved_at =
-
let now = Ptime_clock.now () in
-
let rfc3339 = Ptime.to_rfc3339 ~space:false ~frac_s:0 now in
-
String.sub rfc3339 0 10 (* Extract YYYY-MM-DD *)
-
in
-
{ doi; title = ""; authors = []; year = 0; bibtype = ""; publisher = "";
-
resolved_at; source_urls; status = Failed error; ignore = false }
-
-
let merge_entries old_entry new_entry =
-
(* Combine source_urls, removing duplicates *)
-
let combined_urls =
-
List.sort_uniq String.compare (old_entry.source_urls @ new_entry.source_urls)
-
in
-
(* Use new_entry's data but with combined URLs and preserve ignore flag from old entry *)
-
{ new_entry with source_urls = combined_urls; ignore = old_entry.ignore }
-
-
let to_yaml_value entry =
-
let status_field = match entry.status with
-
| Resolved -> []
-
| Failed err -> [("error", `String err)]
-
in
-
let source_urls_field = match entry.source_urls with
-
| [] -> []
-
| urls -> [("source_urls", `A (List.map (fun url -> `String url) urls))]
-
in
-
let ignore_field = if entry.ignore then [("ignore", `Bool true)] else [] in
-
let fields = [
-
("doi", `String entry.doi);
-
("resolved_at", `String entry.resolved_at);
-
] @ status_field @ source_urls_field @ ignore_field in
-
let fields = match entry.status with
-
| Resolved ->
-
fields @ [
-
("title", `String entry.title);
-
("authors", `A (List.map (fun a -> `String a) entry.authors));
-
("year", `Float (float_of_int entry.year));
-
("bibtype", `String entry.bibtype);
-
("publisher", `String entry.publisher);
-
]
-
| Failed _ -> fields
-
in
-
`O fields
-
-
let of_yaml_value v =
-
try
-
let doi = J.find v ["doi"] |> J.get_string in
-
let resolved_at = J.find v ["resolved_at"] |> J.get_string in
-
(* Support both old source_url (single) and new source_urls (list) for backwards compatibility *)
-
let source_urls =
-
try
-
J.find v ["source_urls"] |> J.get_list J.get_string
-
with _ ->
-
try
-
let single_url = J.find v ["source_url"] |> J.get_string in
-
[single_url]
-
with _ -> []
-
in
-
let ignore = try J.find v ["ignore"] |> J.get_bool with _ -> false in
-
let error = try Some (J.find v ["error"] |> J.get_string) with _ -> None in
-
match error with
-
| Some err ->
-
{ doi; title = ""; authors = []; year = 0; bibtype = ""; publisher = "";
-
resolved_at; source_urls; status = Failed err; ignore }
-
| None ->
-
let title = J.find v ["title"] |> J.get_string in
-
let authors = J.find v ["authors"] |> J.get_list J.get_string in
-
let year = J.find v ["year"] |> J.get_float |> int_of_float in
-
let bibtype = J.find v ["bibtype"] |> J.get_string in
-
let publisher = J.find v ["publisher"] |> J.get_string in
-
{ doi; title; authors; year; bibtype; publisher; resolved_at; source_urls; status = Resolved; ignore }
-
with e ->
-
Printf.eprintf "Failed to parse DOI entry: %s\n%!" (Printexc.to_string e);
-
failwith "Invalid DOI entry in YAML"
-
-
let load path =
-
if not (Sys.file_exists path) then
-
[]
-
else
-
try
-
let yaml_str = In_channel.with_open_text path In_channel.input_all in
-
match Yaml.of_string yaml_str with
-
| Ok (`A entries) -> List.map of_yaml_value entries
-
| Ok _ -> []
-
| Error (`Msg e) ->
-
Printf.eprintf "Failed to parse %s: %s\n%!" path e;
-
[]
-
with e ->
-
Printf.eprintf "Failed to load %s: %s\n%!" path (Printexc.to_string e);
-
[]
-
-
let save path entries =
-
let yaml_list = `A (List.map to_yaml_value entries) in
-
let yaml_str = Yaml.to_string_exn yaml_list in
-
Out_channel.with_open_text path (fun oc ->
-
Out_channel.output_string oc yaml_str
-
)
-
-
let to_map entries =
-
let map = Hashtbl.create (List.length entries) in
-
List.iter (fun entry -> Hashtbl.add map entry.doi entry) entries;
-
map
-
-
let find_by_doi entries doi =
-
List.find_opt (fun entry -> not entry.ignore && entry.doi = doi) entries
-
-
let find_by_url entries url =
-
List.find_opt (fun entry ->
-
not entry.ignore && List.mem url entry.source_urls
-
) entries
-
-
let find_by_doi_including_ignored entries doi =
-
List.find_opt (fun entry -> entry.doi = doi) entries
-
-
let find_by_url_including_ignored entries url =
-
List.find_opt (fun entry ->
-
List.mem url entry.source_urls
-
) entries
-51
stack/bushel/lib/doi_entry.mli
···
-
(** DOI entries resolved from external sources via Zotero Translation Server *)
-
-
type status =
-
| Resolved (** Successfully resolved from Zotero *)
-
| Failed of string (** Failed to resolve, with error message *)
-
-
type t = {
-
doi: string;
-
title: string;
-
authors: string list;
-
year: int;
-
bibtype: string; (** article, inproceedings, book, etc *)
-
publisher: string; (** journal/conference/publisher name *)
-
resolved_at: string; (** ISO date when resolved *)
-
source_urls: string list; (** All URLs that resolve to this DOI (publisher links, doi.org URLs, etc) *)
-
status: status;
-
ignore: bool; (** If true, skip this entry when looking up references *)
-
}
-
-
type ts = t list
-
-
(** Load DOI entries from YAML file *)
-
val load : string -> ts
-
-
(** Save DOI entries to YAML file *)
-
val save : string -> ts -> unit
-
-
(** Convert list to hashtable for fast lookup by DOI *)
-
val to_map : ts -> (string, t) Hashtbl.t
-
-
(** Find entry by DOI (excludes ignored entries) *)
-
val find_by_doi : ts -> string -> t option
-
-
(** Find entry by source URL (searches through all source_urls, excludes ignored entries) *)
-
val find_by_url : ts -> string -> t option
-
-
(** Find entry by DOI including ignored entries (for resolution checks) *)
-
val find_by_doi_including_ignored : ts -> string -> t option
-
-
(** Find entry by source URL including ignored entries (for resolution checks) *)
-
val find_by_url_including_ignored : ts -> string -> t option
-
-
(** Create a new resolved entry *)
-
val create_resolved : doi:string -> title:string -> authors:string list ->
-
year:int -> bibtype:string -> publisher:string -> ?source_urls:string list -> unit -> t
-
-
(** Create a new failed entry *)
-
val create_failed : doi:string -> error:string -> ?source_urls:string list -> unit -> t
-
-
(** Merge two entries with the same DOI, combining their source_urls *)
-
val merge_entries : t -> t -> t
-19
stack/bushel/lib/dune
···
-
(library
-
(name bushel)
-
(public_name bushel)
-
(libraries
-
cmarkit
-
uri
-
jsont
-
jsont.bytesrw
-
ezjsonm
-
ptime
-
yaml.unix
-
jekyll-format
-
eio
-
requests
-
fmt
-
re
-
ptime.clock
-
ptime.clock.os
-
typesense-client))
-449
stack/bushel/lib/entry.ml
···
-
type entry =
-
[ `Paper of Paper.t
-
| `Project of Project.t
-
| `Idea of Idea.t
-
| `Video of Video.t
-
| `Note of Note.t
-
]
-
-
type slugs = (string, entry) Hashtbl.t
-
-
type t =
-
{ slugs : slugs
-
; papers : Paper.ts
-
; old_papers : Paper.ts
-
; notes : Note.ts
-
; projects : Project.ts
-
; ideas : Idea.ts
-
; videos : Video.ts
-
; contacts : Contact.ts
-
; images : Srcsetter.ts
-
; doi_entries : Doi_entry.ts
-
; data_dir : string
-
}
-
-
let contacts { contacts; _ } = contacts
-
let videos { videos; _ } = videos
-
let ideas { ideas; _ } = ideas
-
let papers { papers; _ } = papers
-
let notes { notes; _ } = notes
-
let projects { projects; _ } = projects
-
let images { images; _ } = images
-
let doi_entries { doi_entries; _ } = doi_entries
-
let data_dir { data_dir; _ } = data_dir
-
-
let v ~papers ~notes ~projects ~ideas ~videos ~contacts ~images ~data_dir =
-
let slugs : slugs = Hashtbl.create 42 in
-
let papers, old_papers = List.partition (fun p -> p.Paper.latest) papers in
-
List.iter (fun n -> Hashtbl.add slugs n.Note.slug (`Note n)) notes;
-
List.iter (fun p -> Hashtbl.add slugs p.Project.slug (`Project p)) projects;
-
List.iter (fun i -> Hashtbl.add slugs i.Idea.slug (`Idea i)) ideas;
-
List.iter (fun v -> Hashtbl.add slugs v.Video.slug (`Video v)) videos;
-
List.iter (fun p -> Hashtbl.add slugs p.Paper.slug (`Paper p)) papers;
-
(* Load DOI entries from doi.yml *)
-
let doi_yml_path = Filename.concat data_dir "doi.yml" in
-
let doi_entries = Doi_entry.load doi_yml_path in
-
{ slugs; papers; old_papers; notes; projects; ideas; videos; images; contacts; doi_entries; data_dir }
-
;;
-
-
let lookup { slugs; _ } slug = Hashtbl.find_opt slugs slug
-
let lookup_exn { slugs; _ } slug = Hashtbl.find slugs slug
-
-
let old_papers { old_papers; _ } = old_papers
-
-
let sidebar = function
-
| `Note { Note.sidebar = Some s; _ } -> Some s
-
| _ -> None
-
;;
-
-
let to_type_string = function
-
| `Paper _ -> "paper"
-
| `Note _ -> "note"
-
| `Project _ -> "project"
-
| `Idea _ -> "idea"
-
| `Video _ -> "video"
-
;;
-
-
let synopsis = function
-
| `Note n -> Note.synopsis n
-
| _ -> None
-
;;
-
-
let slug = function
-
| `Paper p -> p.Paper.slug
-
| `Note n -> n.Note.slug
-
| `Project p -> p.Project.slug
-
| `Idea i -> i.Idea.slug
-
| `Video v -> v.Video.slug
-
;;
-
-
let title = function
-
| `Paper p -> Paper.title p
-
| `Note n -> Note.title n
-
| `Project p -> Project.title p
-
| `Idea i -> Idea.title i
-
| `Video v -> Video.title v
-
;;
-
-
let body = function
-
| `Paper _ -> ""
-
| `Note n -> Note.body n
-
| `Project p -> Project.body p
-
| `Idea i -> Idea.body i
-
| `Video _ -> ""
-
;;
-
-
let site_url = function
-
| `Paper p -> "/papers/" ^ p.Paper.slug
-
| `Note n -> "/notes/" ^ n.Note.slug
-
| `Project p -> "/projects/" ^ p.Project.slug
-
| `Idea i -> "/ideas/" ^ i.Idea.slug
-
| `Video v -> "/videos/" ^ v.Video.slug
-
;;
-
-
(** Extract external URLs from markdown content *)
-
let extract_external_links md =
-
let open Cmarkit in
-
let urls = ref [] in
-
-
let is_external_url url =
-
(* XXX FIXME *)
-
let is_bushel_slug = String.starts_with ~prefix:":" in
-
let is_tag_slug = String.starts_with ~prefix:"##" in
-
if is_bushel_slug url || is_tag_slug url then false
-
else
-
try
-
let uri = Uri.of_string url in
-
match Uri.scheme uri with
-
| Some s when s = "http" || s = "https" -> true
-
| Some _ -> true (* Any other scheme is considered external *)
-
| None -> false (* Local references or relative paths *)
-
with _ -> false
-
in
-
-
let inline_mapper _ = function
-
| Inline.Link (lb, _) | Inline.Image (lb, _) ->
-
let ref = Inline.Link.reference lb in
-
(match ref with
-
| `Inline (ld, _) ->
-
(match Link_definition.dest ld with
-
| Some (url, _) when is_external_url url ->
-
urls := url :: !urls;
-
Mapper.default
-
| _ -> Mapper.default)
-
| `Ref (_, _, l) ->
-
(* Get the referenced label definition and extract URL if it exists *)
-
let defs = Doc.defs (Doc.of_string ~strict:false md) in
-
(match Label.Map.find_opt (Label.key l) defs with
-
| Some (Link_definition.Def (ld, _)) ->
-
(match Link_definition.dest ld with
-
| Some (url, _) when is_external_url url ->
-
urls := url :: !urls
-
| _ -> ())
-
| _ -> ());
-
Mapper.default)
-
| Inline.Autolink (autolink, _) ->
-
let url = Inline.Autolink.link autolink |> fst in
-
if not (Inline.Autolink.is_email autolink) && is_external_url url then
-
urls := url :: !urls;
-
Mapper.default
-
| _ -> Mapper.default
-
in
-
-
let mapper = Mapper.make ~inline:inline_mapper () in
-
let doc = Doc.of_string ~strict:false md in
-
let _ = Mapper.map_doc mapper doc in
-
List.sort_uniq String.compare !urls
-
-
let outgoing_links e = extract_external_links (body e)
-
-
let lookup_site_url t slug =
-
match lookup t slug with
-
| Some ent -> site_url ent
-
| None -> ""
-
-
let lookup_title t slug =
-
match lookup t slug with
-
| Some ent -> title ent
-
| None -> ""
-
-
-
let date (x : entry) =
-
match x with
-
| `Paper p -> Paper.date p
-
| `Note n -> Note.date n
-
| `Project p -> p.Project.start, 1, 1
-
| `Idea i -> i.Idea.year, i.Idea.month, 1
-
| `Video v -> Video.date v
-
;;
-
-
let datetime v = date v |> Ptime.of_date |> Option.get
-
-
let year x =
-
match date x with
-
| y, _, _ -> y
-
;;
-
-
let is_index_entry = function
-
| `Note { Note.index_page; _ } -> index_page
-
| _ -> false
-
;;
-
-
let notes_for_slug { notes; _ } slug =
-
List.filter (fun n -> match Note.slug_ent n with Some s -> s = slug | None -> false) notes
-
let all_entries { slugs; _ } = Hashtbl.fold (fun _ v acc -> v :: acc) slugs []
-
-
let all_papers { papers; old_papers; _ } =
-
List.map (fun x -> `Paper x) (papers @ old_papers)
-
;;
-
-
let compare a b =
-
let datetime v = Option.get (Ptime.of_date v) in
-
let da = datetime (date a) in
-
let db = datetime (date b) in
-
if da = db then compare (title a) (title b) else Ptime.compare da db
-
;;
-
-
let lookup_by_name {contacts;_} n =
-
match Contact.lookup_by_name contacts n with
-
| v -> Some v
-
| exception _ -> None
-
-
(** Extract the first image URL from markdown text *)
-
let extract_first_image md =
-
let open Cmarkit in
-
(* Don't use bushel link resolver to avoid circular dependency *)
-
let doc = Doc.of_string md in
-
let found_image = ref None in
-
-
let find_image_in_inline _mapper = function
-
| Inline.Image (img, _) ->
-
(match Inline.Link.reference img with
-
| `Inline (ld, _) ->
-
(match Link_definition.dest ld with
-
| Some (url, _) when !found_image = None ->
-
found_image := Some url;
-
Mapper.default
-
| _ -> Mapper.default)
-
| _ -> Mapper.default)
-
| _ -> Mapper.default
-
in
-
-
let mapper = Mapper.make ~inline:find_image_in_inline () in
-
let _ = Mapper.map_doc mapper doc in
-
!found_image
-
;;
-
-
(** Extract the first video slug from markdown text by looking for bushel video links *)
-
let extract_first_video entries md =
-
let open Cmarkit in
-
let doc = Doc.of_string md in
-
let found_video = ref None in
-
-
let find_video_in_inline _mapper = function
-
| Inline.Link (link, _) ->
-
(match Inline.Link.reference link with
-
| `Inline (ld, _) ->
-
(match Link_definition.dest ld with
-
| Some (url, _) when !found_video = None && String.starts_with ~prefix:":" url ->
-
(* Check if this is a video slug *)
-
let slug = String.sub url 1 (String.length url - 1) in
-
(match lookup entries slug with
-
| Some (`Video v) ->
-
found_video := Some (Video.uuid v);
-
Mapper.default
-
| _ -> Mapper.default)
-
| _ -> Mapper.default)
-
| _ -> Mapper.default)
-
| _ -> Mapper.default
-
in
-
-
let mapper = Mapper.make ~inline:find_video_in_inline () in
-
let _ = Mapper.map_doc mapper doc in
-
!found_video
-
;;
-
-
(** Look up an image in the srcsetter list by slug *)
-
let lookup_image { images; _ } slug =
-
List.find_opt (fun img -> Srcsetter.slug img = slug) images
-
-
(** Get the smallest webp variant from a srcsetter image - prefers size just above 480px *)
-
let smallest_webp_variant img =
-
let variants = Srcsetter.variants img in
-
let webp_variants =
-
Srcsetter.MS.bindings variants
-
|> List.filter (fun (name, _) -> String.ends_with ~suffix:".webp" name)
-
in
-
match webp_variants with
-
| [] ->
-
(* No webp variants - use the name field which is always webp *)
-
"/images/" ^ Srcsetter.name img
-
| variants ->
-
(* Prefer variants with width > 480px, choosing the smallest one above 480 *)
-
let large_variants = List.filter (fun (_, (w, _)) -> w > 480) variants in
-
let candidates = if large_variants = [] then variants else large_variants in
-
(* Find the smallest variant from candidates *)
-
let smallest = List.fold_left (fun acc (name, (w, h)) ->
-
match acc with
-
| None -> Some (name, w, h)
-
| Some (_, min_w, _) when w < min_w -> Some (name, w, h)
-
| _ -> acc
-
) None candidates in
-
match smallest with
-
| Some (name, _, _) -> "/images/" ^ name
-
| None -> "/images/" ^ Srcsetter.name img
-
-
(** Get thumbnail slug for a contact *)
-
let contact_thumbnail_slug contact =
-
(* Contact images use just the handle as slug *)
-
Some (Contact.handle contact)
-
-
(** Get thumbnail URL for a contact - resolved through srcsetter *)
-
let contact_thumbnail entries contact =
-
match contact_thumbnail_slug contact with
-
| None -> None
-
| Some thumb_slug ->
-
match lookup_image entries thumb_slug with
-
| Some img -> Some (smallest_webp_variant img)
-
| None -> None (* Image not in srcsetter - thumbnails are optional *)
-
-
(** Get thumbnail slug for an entry with fallbacks *)
-
let rec thumbnail_slug entries = function
-
| `Paper p ->
-
(* Slug is just the paper slug, directory is in the origin path *)
-
Some (Paper.slug p)
-
-
| `Video v ->
-
(* Videos use their UUID as the slug *)
-
Some (Video.uuid v)
-
-
| `Project p ->
-
(* Project images use "project-{slug}" format *)
-
Some (Printf.sprintf "project-%s" p.Project.slug)
-
-
| `Idea i ->
-
let is_active = match Idea.status i with
-
| Idea.Available | Idea.Discussion | Idea.Ongoing -> true
-
| Idea.Completed | Idea.Expired -> false
-
in
-
if is_active then
-
(* Use first supervisor's face image *)
-
let supervisors = Idea.supervisors i in
-
match supervisors with
-
| sup :: _ ->
-
let handle = if String.length sup > 0 && sup.[0] = '@'
-
then String.sub sup 1 (String.length sup - 1)
-
else sup
-
in
-
(match Contact.find_by_handle (contacts entries) handle with
-
| Some c ->
-
(* Contact images use just the handle as slug *)
-
Some (Contact.handle c)
-
| None ->
-
(* Fallback to project thumbnail *)
-
let project_slug = Idea.project i in
-
(match lookup entries project_slug with
-
| Some p -> thumbnail_slug entries p
-
| None -> None))
-
| [] ->
-
(* No supervisors, use project thumbnail *)
-
let project_slug = Idea.project i in
-
(match lookup entries project_slug with
-
| Some p -> thumbnail_slug entries p
-
| None -> None)
-
else
-
(* Use project thumbnail for completed/expired ideas *)
-
let project_slug = Idea.project i in
-
(match lookup entries project_slug with
-
| Some p -> thumbnail_slug entries p
-
| None -> None)
-
-
| `Note n ->
-
(* Use titleimage if set, otherwise extract first image from body, then try video, otherwise use slug_ent's thumbnail *)
-
(match Note.titleimage n with
-
| Some slug ->
-
(* Always treat titleimage as a bushel slug (without ':' prefix) *)
-
Some slug
-
| None ->
-
(* Extract first image from markdown body *)
-
match extract_first_image (Note.body n) with
-
| Some url when String.starts_with ~prefix:":" url ->
-
Some (String.sub url 1 (String.length url - 1))
-
| Some _ -> None
-
| None ->
-
(* Try extracting first video from markdown body *)
-
match extract_first_video entries (Note.body n) with
-
| Some video_uuid -> Some video_uuid
-
| None ->
-
(* Fallback to slug_ent's thumbnail if present *)
-
match Note.slug_ent n with
-
| Some slug_ent ->
-
(match lookup entries slug_ent with
-
| Some entry -> thumbnail_slug entries entry
-
| None -> None)
-
| None -> None)
-
-
(** Get thumbnail URL for an entry with fallbacks - resolved through srcsetter *)
-
let thumbnail entries entry =
-
match thumbnail_slug entries entry with
-
| None -> None
-
| Some thumb_slug ->
-
match lookup_image entries thumb_slug with
-
| Some img -> Some (smallest_webp_variant img)
-
| None ->
-
(* For projects, fallback to supervisor faces if project image doesn't exist *)
-
(match entry with
-
| `Project p ->
-
(* Find ideas for this project *)
-
let project_ideas = List.filter (fun idea ->
-
Idea.project idea = ":" ^ p.Project.slug
-
) (ideas entries) in
-
(* Collect all unique supervisors from these ideas *)
-
let all_supervisors =
-
List.fold_left (fun acc idea ->
-
List.fold_left (fun acc2 sup ->
-
if List.mem sup acc2 then acc2 else sup :: acc2
-
) acc (Idea.supervisors idea)
-
) [] project_ideas
-
in
-
(* Split into avsm and others, preferring others first *)
-
let (others, avsm) = List.partition (fun sup ->
-
let handle = if String.length sup > 0 && sup.[0] = '@'
-
then String.sub sup 1 (String.length sup - 1)
-
else sup
-
in
-
handle <> "avsm"
-
) all_supervisors in
-
(* Try supervisors in order: others first, then avsm *)
-
let ordered_supervisors = others @ avsm in
-
(* Try each supervisor's face image *)
-
let rec try_supervisors = function
-
| [] -> None
-
| sup :: rest ->
-
let handle = if String.length sup > 0 && sup.[0] = '@'
-
then String.sub sup 1 (String.length sup - 1)
-
else sup
-
in
-
(match Contact.find_by_handle (contacts entries) handle with
-
| Some c ->
-
(match lookup_image entries (Contact.handle c) with
-
| Some img -> Some (smallest_webp_variant img)
-
| None -> try_supervisors rest)
-
| None -> try_supervisors rest)
-
in
-
try_supervisors ordered_supervisors
-
| _ -> None)
-
-
(** Get thumbnail URL for a note with slug_ent *)
-
let thumbnail_note_with_ent entries note_item =
-
(* Use linked entry's thumbnail if slug_ent is set *)
-
match Note.slug_ent note_item with
-
| Some slug_ent ->
-
(match lookup entries (":" ^ slug_ent) with
-
| Some entry -> thumbnail entries entry
-
| None ->
-
(* Fallback to extracting first image from note body *)
-
extract_first_image (Note.body note_item))
-
| None ->
-
(* No slug_ent, extract from note body *)
-
extract_first_image (Note.body note_item)
-79
stack/bushel/lib/entry.mli
···
-
type entry =
-
[ `Idea of Idea.t
-
| `Note of Note.t
-
| `Paper of Paper.t
-
| `Project of Project.t
-
| `Video of Video.t
-
]
-
-
type slugs = (string, entry) Hashtbl.t
-
type t
-
-
val contacts : t -> Contact.ts
-
val videos : t -> Video.ts
-
val ideas : t -> Idea.ts
-
val papers : t -> Paper.ts
-
val notes : t -> Note.ts
-
val projects : t -> Project.ts
-
val images : t -> Srcsetter.ts
-
val doi_entries : t -> Doi_entry.ts
-
val data_dir : t -> string
-
-
val v
-
: papers:Paper.t list
-
-> notes:Note.ts
-
-> projects:Project.ts
-
-> ideas:Idea.ts
-
-> videos:Video.ts
-
-> contacts:Contact.ts
-
-> images:Srcsetter.ts
-
-> data_dir:string
-
-> t
-
-
val lookup : t -> string -> entry option
-
val lookup_exn : t -> string -> entry
-
val lookup_site_url : t -> string -> string
-
val lookup_title : t -> string -> string
-
val lookup_by_name : t -> string -> Contact.t option
-
val old_papers : t -> Paper.ts
-
val sidebar : [> `Note of Note.t ] -> string option
-
val to_type_string : entry -> string
-
val slug : entry -> string
-
val title : entry -> string
-
val body : entry -> string
-
val extract_external_links : string -> string list
-
val outgoing_links : entry -> string list
-
-
(* FIXME move to view *)
-
val site_url : entry -> string
-
val date : entry -> Ptime.date
-
val datetime : entry -> Ptime.t
-
val year : entry -> int
-
val synopsis : entry -> string option
-
-
val is_index_entry : entry -> bool
-
val notes_for_slug : t -> string -> Note.t list
-
val all_entries : t -> entry list
-
val all_papers : t -> entry list
-
val compare : entry -> entry -> int
-
-
(** Look up an image in the srcsetter list by slug *)
-
val lookup_image : t -> string -> Srcsetter.t option
-
-
(** Get the smallest webp variant from a srcsetter image *)
-
val smallest_webp_variant : Srcsetter.t -> string
-
-
(** Get thumbnail slug for a contact *)
-
val contact_thumbnail_slug : Contact.t -> string option
-
-
(** Get thumbnail URL for a contact - resolved through srcsetter *)
-
val contact_thumbnail : t -> Contact.t -> string option
-
-
(** Get thumbnail slug for an entry with fallbacks *)
-
val thumbnail_slug : t -> entry -> string option
-
-
(** Get thumbnail URL for an entry with fallbacks - resolved through srcsetter *)
-
val thumbnail : t -> entry -> string option
-
-
(** Get thumbnail URL for a note with slug_ent *)
-
val thumbnail_note_with_ent : t -> Note.t -> string option
-223
stack/bushel/lib/idea.ml
···
-
type level =
-
| Any
-
| PartII
-
| MPhil
-
| PhD
-
| Postdoc
-
-
let level_of_yaml = function
-
| `String ("Any" | "any") -> Ok Any
-
| `String ("PartII" | "partii") -> Ok PartII
-
| `String ("MPhil" | "mphil") -> Ok MPhil
-
| `String ("PhD" | "phd") -> Ok PhD
-
| `String ("postdoc" | "Postdoc") -> Ok Postdoc
-
| _ -> Error (`Msg "level_of_yaml")
-
;;
-
-
let level_to_string = function
-
| Any -> "Any"
-
| PartII -> "PartII"
-
| MPhil -> "MPhil"
-
| PhD -> "PhD"
-
| Postdoc -> "postdoctoral"
-
;;
-
-
let level_to_tag = function
-
| Any -> "idea-beginner"
-
| PartII -> "idea-medium"
-
| MPhil -> "idea-hard"
-
| PhD -> "idea-phd"
-
| Postdoc -> "idea-postdoc"
-
;;
-
-
let level_to_yaml s = `String (level_to_string s)
-
-
type status =
-
| Available
-
| Discussion
-
| Ongoing
-
| Completed
-
| Expired
-
-
let status_of_yaml = function
-
| `String ("Available" | "available") -> Ok Available
-
| `String ("Discussion" | "discussion") -> Ok Discussion
-
| `String ("Ongoing" | "ongoing") -> Ok Ongoing
-
| `String ("Completed" | "completed") -> Ok Completed
-
| `String ("Expired" | "expired") -> Ok Expired
-
| _ -> Error (`Msg "status_of_yaml")
-
;;
-
-
let status_to_string = function
-
| Available -> "Available"
-
| Discussion -> "Discussion"
-
| Ongoing -> "Ongoing"
-
| Completed -> "Completed"
-
| Expired -> "Expired"
-
;;
-
-
let status_to_tag = function
-
| Available -> "idea-available"
-
| Discussion -> "idea-discuss"
-
| Ongoing -> "idea-ongoing"
-
| Completed -> "idea-done"
-
| Expired -> "idea-expired"
-
;;
-
-
let status_to_yaml s = `String (status_to_string s)
-
-
type t =
-
{ slug : string
-
; title : string
-
; level : level
-
; project : string
-
; status : status
-
; month: int
-
; year : int
-
; supervisors : string list
-
; students : string list
-
; reading : string
-
; body : string
-
; url : string option
-
; tags : string list
-
}
-
-
type ts = t list
-
-
let title i = i.title
-
let supervisors i = i.supervisors
-
let students i = i.students
-
let reading i = i.reading
-
let status i = i.status
-
let level i = i.level
-
let year i = i.year
-
let body i = i.body
-
let project i = i.project
-
-
let compare a b =
-
match compare a.status b.status with
-
| 0 ->
-
(match a.status with
-
| Completed -> compare b.year a.year
-
| _ ->
-
(match compare a.level b.level with
-
| 0 -> begin
-
match compare b.year a.year with
-
| 0 -> compare b.month a.month
-
| n -> n
-
end
-
| n -> n))
-
| n -> n
-
;;
-
-
let of_md fname =
-
match Jekyll_post.of_string ~fname:(Filename.basename fname) (Util.read_file fname) with
-
| Error _ -> failwith "TODO"
-
| Ok jp ->
-
let fields = jp.Jekyll_post.fields in
-
let y = Jekyll_format.fields_to_yaml fields in
-
let year, month, _ = jp.Jekyll_post.date |> Ptime.to_date in
-
let body = jp.Jekyll_post.body in
-
let string f = Yaml.Util.(find_exn f y |> Option.get |> to_string |> Result.get_ok) in
-
let string' f d =
-
try Yaml.Util.(find_exn f y |> Option.get |> to_string |> Result.get_ok) with
-
| _ -> d
-
in
-
let to_list = function
-
| `A l -> Ok l
-
| _ -> Error (`Msg "to_list")
-
in
-
let strings f =
-
try
-
Yaml.Util.(
-
find_exn f y
-
|> Option.get
-
|> to_list
-
|> Result.get_ok
-
|> List.map (fun x -> to_string x |> Result.get_ok))
-
with
-
| _exn -> []
-
in
-
let level =
-
Yaml.Util.(find_exn "level" y |> Option.get |> level_of_yaml |> Result.get_ok)
-
in
-
let status =
-
Yaml.Util.(find_exn "status" y |> Option.get |> status_of_yaml |> Result.get_ok)
-
in
-
let slug = jp.Jekyll_post.slug in
-
{ slug
-
; title = string "title"
-
; level
-
; project = string "project"
-
; status
-
; supervisors = strings "supervisors"
-
; students = strings "students"
-
; tags = strings "tags"
-
; reading = string' "reading" ""
-
; month
-
; year
-
; body
-
; url = None (* TODO *)
-
}
-
;;
-
-
let lookup ideas slug = List.find_opt (fun i -> i.slug = slug) ideas
-
-
(* TODO:claude *)
-
let typesense_schema =
-
let open Ezjsonm in
-
dict [
-
("name", string "ideas");
-
("fields", list (fun d -> dict d) [
-
[("name", string "id"); ("type", string "string")];
-
[("name", string "title"); ("type", string "string")];
-
[("name", string "description"); ("type", string "string")];
-
[("name", string "year"); ("type", string "int32")];
-
[("name", string "date"); ("type", string "string")];
-
[("name", string "date_timestamp"); ("type", string "int64")];
-
[("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
-
[("name", string "level"); ("type", string "string"); ("facet", bool true)];
-
[("name", string "status"); ("type", string "string"); ("facet", bool true)];
-
[("name", string "project"); ("type", string "string"); ("facet", bool true)];
-
[("name", string "supervisors"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "body"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "students"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "reading"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "url"); ("type", string "string"); ("optional", bool true)];
-
]);
-
("default_sorting_field", string "date_timestamp");
-
]
-
-
(** TODO:claude Pretty-print an idea with ANSI formatting *)
-
let pp ppf i =
-
let open Fmt in
-
pf ppf "@[<v>";
-
pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Idea";
-
pf ppf "%a: %a@," (styled `Bold string) "Slug" string i.slug;
-
pf ppf "%a: %a@," (styled `Bold string) "Title" string (title i);
-
pf ppf "%a: %a@," (styled `Bold string) "Level" string (level_to_string (level i));
-
pf ppf "%a: %a@," (styled `Bold string) "Status" string (status_to_string (status i));
-
pf ppf "%a: %a@," (styled `Bold string) "Project" string (project i);
-
pf ppf "%a: %04d-%02d@," (styled `Bold string) "Date" (year i) i.month;
-
let sups = supervisors i in
-
if sups <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Supervisors" (list ~sep:comma string) sups;
-
let studs = students i in
-
if studs <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Students" (list ~sep:comma string) studs;
-
(match i.url with
-
| Some url -> pf ppf "%a: %a@," (styled `Bold string) "URL" string url
-
| None -> ());
-
let t = i.tags in
-
if t <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
-
let r = reading i in
-
if r <> "" then begin
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Reading";
-
pf ppf "%a@," string r;
-
end;
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Body";
-
pf ppf "%a@," string (body i);
-
pf ppf "@]"
-55
stack/bushel/lib/idea.mli
···
-
type level =
-
| Any
-
| PartII
-
| MPhil
-
| PhD
-
| Postdoc
-
-
type status =
-
| Available
-
| Discussion
-
| Ongoing
-
| Completed
-
| Expired
-
-
val level_of_yaml : Ezjsonm.value -> (level, [> `Msg of string ]) result
-
val level_to_string : level -> string
-
val level_to_tag : level -> string
-
val level_to_yaml : level -> Ezjsonm.value
-
val status_of_yaml : Ezjsonm.value -> (status, [> `Msg of string ]) result
-
val status_to_string : status -> string
-
val status_to_tag : status -> string
-
val status_to_yaml : status -> Ezjsonm.value
-
-
type t =
-
{ slug : string
-
; title : string
-
; level : level
-
; project : string
-
; status : status
-
; month : int
-
; year : int
-
; supervisors : string list
-
; students : string list
-
; reading : string
-
; body : string
-
; url : string option
-
; tags : string list
-
}
-
-
type ts = t list
-
-
val title : t -> string
-
val supervisors : t -> string list
-
val students : t -> string list
-
val reading : t -> string
-
val status : t -> status
-
val level : t -> level
-
val year : t -> int
-
val body : t -> string
-
val project : t -> string
-
val compare : t -> t -> int
-
val lookup : t list -> string -> t option
-
val of_md : string -> t
-
val typesense_schema : Ezjsonm.value
-
val pp : Format.formatter -> t -> unit
-296
stack/bushel/lib/link.ml
···
-
type karakeep_data = {
-
remote_url : string;
-
id : string;
-
tags : string list;
-
metadata : (string * string) list;
-
}
-
-
type bushel_data = {
-
slugs : string list;
-
tags : string list;
-
}
-
-
type t = {
-
url : string;
-
date : Ptime.date;
-
description : string;
-
karakeep : karakeep_data option;
-
bushel : bushel_data option;
-
}
-
-
type ts = t list
-
-
let url { url; _ } = url
-
let date { date; _ } = date
-
let description { description; _ } = description
-
let datetime v = Option.get @@ Ptime.of_date @@ date v
-
let compare a b = Ptime.compare (datetime b) (datetime a)
-
-
(* Convert YAML to Link.t *)
-
let t_of_yaml = function
-
| `O fields ->
-
let url =
-
match List.assoc_opt "url" fields with
-
| Some (`String v) -> v
-
| _ -> failwith "link: missing or invalid url"
-
in
-
let date =
-
match List.assoc_opt "date" fields with
-
| Some (`String v) -> begin
-
try
-
match Scanf.sscanf v "%04d-%02d-%02d" (fun y m d -> (y, m, d)) with
-
| (y, m, d) -> (y, m, d)
-
with _ ->
-
(* Fall back to RFC3339 parsing for backward compatibility *)
-
v |> Ptime.of_rfc3339 |> Result.get_ok |> fun (a, _, _) -> Ptime.to_date a
-
end
-
| _ -> failwith "link: missing or invalid date"
-
in
-
let description =
-
match List.assoc_opt "description" fields with
-
| Some (`String v) -> v
-
| _ -> ""
-
in
-
let karakeep =
-
match List.assoc_opt "karakeep" fields with
-
| Some (`O k_fields) ->
-
let remote_url =
-
match List.assoc_opt "remote_url" k_fields with
-
| Some (`String v) -> v
-
| _ -> failwith "link: invalid karakeep.remote_url"
-
in
-
let id =
-
match List.assoc_opt "id" k_fields with
-
| Some (`String v) -> v
-
| _ -> failwith "link: invalid karakeep.id"
-
in
-
let tags =
-
match List.assoc_opt "tags" k_fields with
-
| Some (`A tag_list) ->
-
List.fold_left (fun acc tag ->
-
match tag with
-
| `String t -> t :: acc
-
| _ -> acc
-
) [] tag_list
-
|> List.rev
-
| _ -> []
-
in
-
let metadata =
-
match List.assoc_opt "metadata" k_fields with
-
| Some (`O meta_fields) ->
-
List.fold_left (fun acc (k, v) ->
-
match v with
-
| `String value -> (k, value) :: acc
-
| _ -> acc
-
) [] meta_fields
-
| _ -> []
-
in
-
Some { remote_url; id; tags; metadata }
-
| _ -> None
-
in
-
let bushel =
-
match List.assoc_opt "bushel" fields with
-
| Some (`O b_fields) ->
-
let slugs =
-
match List.assoc_opt "slugs" b_fields with
-
| Some (`A slug_list) ->
-
List.fold_left (fun acc slug ->
-
match slug with
-
| `String s -> s :: acc
-
| _ -> acc
-
) [] slug_list
-
|> List.rev
-
| _ -> []
-
in
-
let tags =
-
match List.assoc_opt "tags" b_fields with
-
| Some (`A tag_list) ->
-
List.fold_left (fun acc tag ->
-
match tag with
-
| `String t -> t :: acc
-
| _ -> acc
-
) [] tag_list
-
|> List.rev
-
| _ -> []
-
in
-
Some { slugs; tags }
-
| _ -> None
-
in
-
{ url; date; description; karakeep; bushel }
-
| _ -> failwith "invalid yaml"
-
-
(* Read file contents *)
-
let read_file file = In_channel.(with_open_bin file input_all)
-
-
(* Load links from a YAML file *)
-
let of_md fname =
-
match Yaml.of_string_exn (read_file fname) with
-
| `A links ->
-
List.map t_of_yaml links
-
| `O _ as single_link ->
-
[t_of_yaml single_link]
-
| _ -> failwith "link_of_md: expected array or object"
-
-
(* Convert Link.t to YAML *)
-
let to_yaml t =
-
let (year, month, day) = t.date in
-
let date_str = Printf.sprintf "%04d-%02d-%02d" year month day in
-
-
(* Create base fields *)
-
let base_fields = [
-
("url", `String t.url);
-
("date", `String date_str);
-
] @
-
(if t.description = "" then [] else [("description", `String t.description)])
-
in
-
-
(* Add karakeep data if present *)
-
let karakeep_fields =
-
match t.karakeep with
-
| Some { remote_url; id; tags; metadata } ->
-
let karakeep_obj = [
-
("remote_url", `String remote_url);
-
("id", `String id);
-
] in
-
let karakeep_obj =
-
if tags = [] then karakeep_obj
-
else ("tags", `A (List.map (fun t -> `String t) tags)) :: karakeep_obj
-
in
-
let karakeep_obj =
-
if metadata = [] then karakeep_obj
-
else ("metadata", `O (List.map (fun (k, v) -> (k, `String v)) metadata)) :: karakeep_obj
-
in
-
[("karakeep", `O karakeep_obj)]
-
| None -> []
-
in
-
-
(* Add bushel data if present *)
-
let bushel_fields =
-
match t.bushel with
-
| Some { slugs; tags } ->
-
let bushel_obj = [] in
-
let bushel_obj =
-
if slugs = [] then bushel_obj
-
else ("slugs", `A (List.map (fun s -> `String s) slugs)) :: bushel_obj
-
in
-
let bushel_obj =
-
if tags = [] then bushel_obj
-
else ("tags", `A (List.map (fun t -> `String t) tags)) :: bushel_obj
-
in
-
if bushel_obj = [] then [] else [("bushel", `O bushel_obj)]
-
| None -> []
-
in
-
-
`O (base_fields @ karakeep_fields @ bushel_fields)
-
-
(* Write a link to a file in the output directory *)
-
let to_file output_dir t =
-
let filename =
-
let (y, m, d) = t.date in
-
let hash = Digest.string t.url |> Digest.to_hex in
-
let short_hash = String.sub hash 0 8 in
-
Printf.sprintf "%04d-%02d-%02d-%s.md" y m d short_hash
-
in
-
let file_path = Fpath.v (Filename.concat output_dir filename) in
-
let yaml = to_yaml t in
-
let yaml_str = Yaml.to_string_exn yaml in
-
let content = "---\n" ^ yaml_str ^ "---\n" in
-
Bos.OS.File.write file_path content
-
-
(* Load links from a YAML file *)
-
let load_links_file path =
-
try
-
let yaml_str = In_channel.(with_open_bin path input_all) in
-
match Yaml.of_string_exn yaml_str with
-
| `A links -> List.map t_of_yaml links
-
| _ -> []
-
with _ -> []
-
-
(* Save links to a YAML file *)
-
let save_links_file path links =
-
try
-
let yaml = `A (List.map to_yaml links) in
-
let yaml_str = Yaml.to_string_exn ~len:4200000 yaml in
-
let oc = open_out path in
-
output_string oc yaml_str;
-
close_out oc
-
with e ->
-
Printf.eprintf "Error saving links file: %s\n%!" (Printexc.to_string e);
-
Printf.eprintf "Attempting to save with smaller length limit...\n%!";
-
let yaml = `A (List.map to_yaml links) in
-
let yaml_str = Yaml.to_string_exn ~len:800000 yaml in
-
let oc = open_out path in
-
output_string oc yaml_str;
-
close_out oc
-
-
(* Merge two lists of links, combining metadata from duplicates *)
-
let merge_links ?(prefer_new_date=false) existing new_links =
-
let links_by_url = Hashtbl.create (List.length existing) in
-
-
(* Add existing links to hashtable *)
-
List.iter (fun link ->
-
Hashtbl.replace links_by_url link.url link
-
) existing;
-
-
(* Merge new links with existing ones *)
-
List.iter (fun new_link ->
-
match Hashtbl.find_opt links_by_url new_link.url with
-
| None ->
-
(* New link not in existing links *)
-
Hashtbl.add links_by_url new_link.url new_link
-
| Some old_link ->
-
(* Merge link data, prefer newer data for fields *)
-
let title =
-
if new_link.description <> "" then new_link.description
-
else old_link.description
-
in
-
-
(* Combine karakeep data (prefer new over old) *)
-
let karakeep =
-
match new_link.karakeep, old_link.karakeep with
-
| Some new_k, Some old_k when new_k.remote_url = old_k.remote_url ->
-
(* Same remote, merge the data *)
-
let merged_metadata =
-
let meta_tbl = Hashtbl.create (List.length old_k.metadata) in
-
List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) old_k.metadata;
-
List.iter (fun (k, v) -> Hashtbl.replace meta_tbl k v) new_k.metadata;
-
Hashtbl.fold (fun k v acc -> (k, v) :: acc) meta_tbl []
-
in
-
let merged_tags = List.sort_uniq String.compare (old_k.tags @ new_k.tags) in
-
Some { new_k with metadata = merged_metadata; tags = merged_tags }
-
| Some new_k, _ -> Some new_k
-
| None, old_k -> old_k
-
in
-
-
(* Combine bushel data *)
-
let bushel =
-
match new_link.bushel, old_link.bushel with
-
| Some new_b, Some old_b ->
-
(* Merge slugs and tags *)
-
let merged_slugs = List.sort_uniq String.compare (old_b.slugs @ new_b.slugs) in
-
let merged_tags = List.sort_uniq String.compare (old_b.tags @ new_b.tags) in
-
Some { slugs = merged_slugs; tags = merged_tags }
-
| Some new_b, _ -> Some new_b
-
| None, old_b -> old_b
-
in
-
-
(* Combined link - prefer new date when requested (for bushel entries) *)
-
let date =
-
if prefer_new_date then new_link.date
-
else if compare new_link old_link > 0 then new_link.date
-
else old_link.date
-
in
-
let merged_link = {
-
url = new_link.url;
-
date;
-
description = title;
-
karakeep;
-
bushel
-
} in
-
Hashtbl.replace links_by_url new_link.url merged_link
-
) new_links;
-
-
(* Convert hashtable back to list and sort by date *)
-
Hashtbl.to_seq_values links_by_url
-
|> List.of_seq
-
|> List.sort compare
-34
stack/bushel/lib/link.mli
···
-
type karakeep_data = {
-
remote_url : string;
-
id : string;
-
tags : string list;
-
metadata : (string * string) list;
-
}
-
-
type bushel_data = {
-
slugs : string list;
-
tags : string list;
-
}
-
-
type t = {
-
url : string;
-
date : Ptime.date;
-
description : string;
-
karakeep : karakeep_data option;
-
bushel : bushel_data option;
-
}
-
-
type ts = t list
-
-
val compare : t -> t -> int
-
val url : t -> string
-
val date : t -> Ptime.date
-
val datetime : t -> Ptime.t
-
val description : t -> string
-
val of_md : string -> ts
-
val to_yaml : t -> Yaml.value
-
val t_of_yaml : Yaml.value -> t
-
val to_file : string -> t -> (unit, [> `Msg of string]) result
-
val load_links_file : string -> ts
-
val save_links_file : string -> ts -> unit
-
val merge_links : ?prefer_new_date:bool -> ts -> ts -> ts
-317
stack/bushel/lib/link_graph.ml
···
-
module StringSet = Set.Make(String)
-
-
type entry_type = [ `Paper | `Project | `Note | `Idea | `Video | `Contact ]
-
-
type internal_link = {
-
source: string;
-
target: string;
-
target_type: entry_type;
-
}
-
-
type external_link = {
-
source: string;
-
domain: string;
-
url: string;
-
}
-
-
type link_graph = {
-
(* All links *)
-
mutable internal_links: internal_link list;
-
mutable external_links: external_link list;
-
-
(* Indices for efficient queries *)
-
outbound: (string, StringSet.t) Hashtbl.t;
-
backlinks: (string, StringSet.t) Hashtbl.t;
-
external_by_entry: (string, StringSet.t) Hashtbl.t;
-
external_by_domain: (string, StringSet.t) Hashtbl.t; (* domain -> source slugs *)
-
}
-
-
let empty_graph () = {
-
internal_links = [];
-
external_links = [];
-
outbound = Hashtbl.create 256;
-
backlinks = Hashtbl.create 256;
-
external_by_entry = Hashtbl.create 256;
-
external_by_domain = Hashtbl.create 64;
-
}
-
-
(* Global storage for the link graph *)
-
let current_graph : link_graph option ref = ref None
-
-
let set_graph graph = current_graph := Some graph
-
let get_graph () = !current_graph
-
-
let entry_type_to_string = function
-
| `Paper -> "paper"
-
| `Project -> "project"
-
| `Note -> "note"
-
| `Idea -> "idea"
-
| `Video -> "video"
-
| `Contact -> "contact"
-
-
(* Query functions *)
-
-
let get_outbound graph slug =
-
try StringSet.elements (Hashtbl.find graph.outbound slug)
-
with Not_found -> []
-
-
let get_backlinks graph slug =
-
try StringSet.elements (Hashtbl.find graph.backlinks slug)
-
with Not_found -> []
-
-
let get_external_links graph slug =
-
try StringSet.elements (Hashtbl.find graph.external_by_entry slug)
-
with Not_found -> []
-
-
let get_entries_linking_to_domain graph domain =
-
try StringSet.elements (Hashtbl.find graph.external_by_domain domain)
-
with Not_found -> []
-
-
(* Query functions that use the global graph *)
-
-
let get_backlinks_for_slug slug =
-
match !current_graph with
-
| None -> []
-
| Some graph -> get_backlinks graph slug
-
-
let get_outbound_for_slug slug =
-
match !current_graph with
-
| None -> []
-
| Some graph -> get_outbound graph slug
-
-
let get_external_links_for_slug slug =
-
match !current_graph with
-
| None -> []
-
| Some graph -> get_external_links graph slug
-
-
(* Pretty printing *)
-
-
let pp_internal_link ppf (link : internal_link) =
-
Fmt.pf ppf "%s -> %s (%s)"
-
link.source
-
link.target
-
(entry_type_to_string link.target_type)
-
-
let pp_external_link ppf (link : external_link) =
-
Fmt.pf ppf "%s -> %s (%s)"
-
link.source
-
link.domain
-
link.url
-
-
let pp_graph ppf graph =
-
Fmt.pf ppf "@[<v>Internal links: %d@,External links: %d@,Entries with outbound: %d@,Entries with backlinks: %d@]"
-
(List.length graph.internal_links)
-
(List.length graph.external_links)
-
(Hashtbl.length graph.outbound)
-
(Hashtbl.length graph.backlinks)
-
-
let entry_type_of_entry = function
-
| `Paper _ -> `Paper
-
| `Project _ -> `Project
-
| `Note _ -> `Note
-
| `Idea _ -> `Idea
-
| `Video _ -> `Video
-
| `Contact _ -> `Contact
-
-
let extract_domain url =
-
try
-
let uri = Uri.of_string url in
-
match Uri.host uri with
-
| Some host -> host
-
| None -> "unknown"
-
with _ -> "unknown"
-
-
let add_to_set_hashtbl tbl key value =
-
let current =
-
try Hashtbl.find tbl key
-
with Not_found -> StringSet.empty
-
in
-
Hashtbl.replace tbl key (StringSet.add value current)
-
-
let build_link_graph entries =
-
let graph = empty_graph () in
-
-
(* Helper to add internal link *)
-
let add_internal_link source target target_type =
-
let link = { source; target; target_type } in
-
graph.internal_links <- link :: graph.internal_links;
-
add_to_set_hashtbl graph.outbound source target;
-
add_to_set_hashtbl graph.backlinks target source
-
in
-
-
(* Helper to add external link *)
-
let add_external_link source url =
-
let domain = extract_domain url in
-
let link = { source; domain; url } in
-
graph.external_links <- link :: graph.external_links;
-
add_to_set_hashtbl graph.external_by_entry source url;
-
add_to_set_hashtbl graph.external_by_domain domain source
-
in
-
-
(* Process each entry *)
-
let process_entry entry =
-
let source_slug = Entry.slug entry in
-
-
(* Get all links from this entry's markdown content *)
-
let md_content = Entry.body entry in
-
let all_links = Md.extract_all_links md_content in
-
-
List.iter (fun link ->
-
if Md.is_bushel_slug link then
-
(* Internal bushel link *)
-
let target_slug = Md.strip_handle link in
-
match Entry.lookup entries target_slug with
-
| Some target_entry ->
-
let target_type = entry_type_of_entry target_entry in
-
add_internal_link source_slug target_slug target_type
-
| None -> ()
-
else if Md.is_contact_slug link then
-
(* Contact link *)
-
let handle = Md.strip_handle link in
-
match Contact.find_by_handle (Entry.contacts entries) handle with
-
| Some c ->
-
let target_slug = Contact.handle c in
-
add_internal_link source_slug target_slug `Contact
-
| None -> ()
-
else if Md.is_tag_slug link then
-
(* Skip tag links *)
-
()
-
else if Md.is_type_filter_slug link then
-
(* Skip type filter links *)
-
()
-
else if String.starts_with ~prefix:"http://" link ||
-
String.starts_with ~prefix:"https://" link then
-
(* External link *)
-
add_external_link source_slug link
-
else
-
(* Skip other links (relative paths, etc) *)
-
()
-
) all_links
-
in
-
-
(* Process all entries *)
-
List.iter process_entry (Entry.all_entries entries);
-
-
(* Process slug_ent references from notes *)
-
let process_note_slug_ent note =
-
match Note.slug_ent note with
-
| Some target_slug ->
-
let source_slug = Note.slug note in
-
(* Look up the target entry by slug *)
-
(match Entry.lookup entries target_slug with
-
| Some target_entry ->
-
let target_type = entry_type_of_entry target_entry in
-
add_internal_link source_slug target_slug target_type
-
| None -> ())
-
| None -> ()
-
in
-
List.iter process_note_slug_ent (Entry.notes entries);
-
-
(* Process projects: field from papers *)
-
let process_paper_projects paper =
-
let source_slug = Paper.slug paper in
-
let project_slugs = Paper.project_slugs paper in
-
List.iter (fun project_slug ->
-
(* Verify the project exists *)
-
match Entry.lookup entries project_slug with
-
| Some (`Project _) ->
-
add_internal_link source_slug project_slug `Project
-
| _ -> ()
-
) project_slugs
-
in
-
List.iter process_paper_projects (Entry.papers entries);
-
-
(* Deduplicate links *)
-
let module LinkSet = Set.Make(struct
-
type t = internal_link
-
let compare (a : internal_link) (b : internal_link) =
-
match String.compare a.source b.source with
-
| 0 -> String.compare a.target b.target
-
| n -> n
-
end) in
-
-
let module ExtLinkSet = Set.Make(struct
-
type t = external_link
-
let compare (a : external_link) (b : external_link) =
-
match String.compare a.source b.source with
-
| 0 -> String.compare a.url b.url
-
| n -> n
-
end) in
-
-
graph.internal_links <- LinkSet.elements (LinkSet.of_list graph.internal_links);
-
graph.external_links <- ExtLinkSet.elements (ExtLinkSet.of_list graph.external_links);
-
-
graph
-
-
(* Export for visualization *)
-
-
let to_json graph entries =
-
(* Build nodes from entries *)
-
let entry_nodes = List.map (fun entry ->
-
let slug = Entry.slug entry in
-
let title = Entry.title entry in
-
let entry_type = entry_type_of_entry entry in
-
`O [
-
("id", `String slug);
-
("title", `String title);
-
("type", `String (entry_type_to_string entry_type));
-
("group", `String "entry");
-
]
-
) (Entry.all_entries entries) in
-
-
(* Build nodes from contacts *)
-
let contact_nodes = List.map (fun contact ->
-
let handle = Contact.handle contact in
-
let name = Contact.name contact in
-
`O [
-
("id", `String handle);
-
("title", `String name);
-
("type", `String "contact");
-
("group", `String "entry");
-
]
-
) (Entry.contacts entries) in
-
-
(* Build domain nodes from external links *)
-
let domain_map = Hashtbl.create 64 in
-
List.iter (fun link ->
-
if not (Hashtbl.mem domain_map link.domain) then
-
Hashtbl.add domain_map link.domain ()
-
) graph.external_links;
-
-
let domain_nodes = Hashtbl.fold (fun domain () acc ->
-
(`O [
-
("id", `String ("domain:" ^ domain));
-
("title", `String domain);
-
("type", `String "domain");
-
("group", `String "domain");
-
]) :: acc
-
) domain_map [] in
-
-
let all_nodes = entry_nodes @ contact_nodes @ domain_nodes in
-
-
(* Build internal links *)
-
let internal_links_json = List.map (fun (link : internal_link) ->
-
`O [
-
("source", `String link.source);
-
("target", `String link.target);
-
("type", `String "internal");
-
]
-
) graph.internal_links in
-
-
(* Build external links (entry -> domain) *)
-
let external_links_json = List.map (fun (link : external_link) ->
-
`O [
-
("source", `String link.source);
-
("target", `String ("domain:" ^ link.domain));
-
("type", `String "external");
-
]
-
) graph.external_links in
-
-
let all_links = internal_links_json @ external_links_json in
-
-
let json = `O [
-
("nodes", `A all_nodes);
-
("links", `A all_links);
-
] in
-
-
Ezjsonm.to_string json
-781
stack/bushel/lib/md.ml
···
-
(** Bushel mappers for our Markdown extensions and utilities
-
-
This module provides mappers to convert Bushel markdown extensions to different
-
output formats. There are two main mappers:
-
-
1. {!make_bushel_inline_mapper} - Full sidenote mode for the main website
-
- Converts Bushel links to interactive sidenotes
-
- Includes entry previews, contact info, footnotes
-
- Used for the main site HTML rendering
-
-
2. {!make_bushel_link_only_mapper} - Plain HTML mode for feeds and simple output
-
- Converts Bushel links to regular HTML <a> tags
-
- Automatically cleans up link text that contains Bushel slugs
-
- Used for Atom feeds, RSS, search indexing
-
- Images need .webp extension added (handled by calling code)
-
-
For plain text output (search, LLM), use {!markdown_to_plaintext}.
-
*)
-
-
(* Sidenote data types - reuse existing Bushel types *)
-
type sidenote_data =
-
| Contact_note of Contact.t * string (* contact data + trigger text *)
-
| Paper_note of Paper.t * string
-
| Idea_note of Idea.t * string
-
| Note_note of Note.t * string
-
| Project_note of Project.t * string
-
| Video_note of Video.t * string
-
| Footnote_note of string * Cmarkit.Block.t * string
-
(* slug, block content, trigger text *)
-
-
type Cmarkit.Inline.t += Side_note of sidenote_data
-
-
let authorlink = Cmarkit.Meta.key ()
-
-
let make_authorlink label =
-
let meta = Cmarkit.Meta.tag authorlink (Cmarkit.Label.meta label) in
-
Cmarkit.Label.with_meta meta label
-
;;
-
-
let sluglink = Cmarkit.Meta.key ()
-
-
let make_sluglink label =
-
let meta = Cmarkit.Meta.tag sluglink (Cmarkit.Label.meta label) in
-
Cmarkit.Label.with_meta meta label
-
;;
-
-
let with_bushel_links = function
-
| `Def _ as ctx -> Cmarkit.Label.default_resolver ctx
-
| `Ref (_, _, (Some _ as def)) -> def
-
| `Ref (_, ref, None) ->
-
let txt = Cmarkit.Label.key ref in
-
(match txt.[0] with
-
| '@' -> Some (make_authorlink ref)
-
| ':' -> Some (make_sluglink ref)
-
| '#' -> if txt.[1] = '#' then Some (make_sluglink ref) else None
-
| _ -> None)
-
;;
-
-
let strip_handle s =
-
if s.[0] = '@' || s.[0] = ':'
-
then String.sub s 1 (String.length s - 1)
-
else if s.[0] = '#' && s.[1] = '#'
-
then String.sub s 2 (String.length s - 2)
-
else s
-
;;
-
-
(* FIXME use Tags *)
-
let is_bushel_slug = String.starts_with ~prefix:":"
-
let is_tag_slug link =
-
String.starts_with ~prefix:"##" link &&
-
not (String.starts_with ~prefix:"###" link)
-
-
let is_type_filter_slug = String.starts_with ~prefix:"###"
-
let is_contact_slug = String.starts_with ~prefix:"@"
-
-
let text_of_inline lb =
-
let open Cmarkit in
-
Inline.to_plain_text ~break_on_soft:false lb
-
|> fun r -> String.concat "\n" (List.map (String.concat "") r)
-
;;
-
-
let link_target_is_bushel ?slugs lb =
-
let open Cmarkit in
-
let ref = Inline.Link.reference lb in
-
match ref with
-
| `Inline (ld, _) ->
-
let dest = Link_definition.dest ld in
-
(match dest with
-
| Some (url, _) when is_bushel_slug url ->
-
(match slugs with
-
| Some s -> Hashtbl.replace s url ()
-
| _ -> ());
-
Some (url, Inline.Link.text lb |> text_of_inline)
-
| Some (url, _) when is_tag_slug url ->
-
(* Return the tag URL unchanged - will be handled by renderer *)
-
Some (url, Inline.Link.text lb |> text_of_inline)
-
| Some (url, _) when is_contact_slug url ->
-
Some (url, Inline.Link.text lb |> text_of_inline)
-
| _ -> None)
-
| _ -> None
-
;;
-
-
let image_target_is_bushel lb =
-
let open Cmarkit in
-
let ref = Inline.Link.reference lb in
-
match ref with
-
| `Inline (ld, _) ->
-
let dest = Link_definition.dest ld in
-
(match dest with
-
| Some (url, _) when is_bushel_slug url ->
-
let alt = Link_definition.title ld in
-
let dir =
-
Inline.Link.text lb
-
|> Inline.to_plain_text ~break_on_soft:false
-
|> fun r -> String.concat "\n" (List.map (String.concat "") r)
-
in
-
Some (url, alt, dir)
-
| _ -> None)
-
| _ -> None
-
;;
-
-
let rewrite_bushel_link_reference entries slug title meta =
-
let open Cmarkit in
-
let s = strip_handle slug in
-
(* Check if it's a tag, contact, or entry *)
-
if is_tag_slug slug then
-
(* Tag link - keep the ## prefix in dest for renderer to detect *)
-
let txt = Inline.Text (title, meta) in
-
let ld = Link_definition.make ~dest:(slug, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta))
-
else if is_contact_slug slug then
-
(* Contact sidenote *)
-
match Contact.find_by_handle (Entry.contacts entries) s with
-
| Some c ->
-
let sidenote = Side_note (Contact_note (c, title)) in
-
Mapper.ret sidenote
-
| None ->
-
(* Contact not found, fallback to regular link *)
-
let txt = Inline.Text (title, meta) in
-
let ld = Link_definition.make ~dest:("", meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta))
-
else
-
(* Check entry type and generate appropriate sidenote *)
-
match Entry.lookup entries s with
-
| Some (`Paper p) ->
-
let sidenote = Side_note (Paper_note (p, title)) in
-
Mapper.ret sidenote
-
| Some (`Idea i) ->
-
let sidenote = Side_note (Idea_note (i, title)) in
-
Mapper.ret sidenote
-
| Some (`Note n) ->
-
let sidenote = Side_note (Note_note (n, title)) in
-
Mapper.ret sidenote
-
| Some (`Project p) ->
-
let sidenote = Side_note (Project_note (p, title)) in
-
Mapper.ret sidenote
-
| Some (`Video v) ->
-
let sidenote = Side_note (Video_note (v, title)) in
-
Mapper.ret sidenote
-
| None ->
-
(* Entry not found, use regular link *)
-
let dest = Entry.lookup_site_url entries s in
-
let txt = Inline.Text (title, meta) in
-
let ld = Link_definition.make ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta))
-
;;
-
-
let rewrite_bushel_image_reference entries url title dir meta =
-
let open Cmarkit in
-
let dest =
-
match Entry.lookup entries (strip_handle url) with
-
| Some ent -> Entry.site_url ent (* This is a video *)
-
| None -> Printf.sprintf "/images/%s" (strip_handle url)
-
in
-
let txt = Inline.Text (dir, meta) in
-
let ld = Link_definition.make ?title ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
let ent_il = Inline.Image (ld, meta) in
-
Mapper.ret ent_il
-
;;
-
-
type Cmarkit.Inline.t += Obsidian_link of string
-
-
let rewrite_label_reference_to_obsidian lb meta =
-
let open Cmarkit in
-
match Inline.Link.referenced_label lb with
-
| None -> Mapper.default
-
| Some l ->
-
let m = Label.meta l in
-
(match Meta.find authorlink m with
-
| Some () ->
-
let slug = Label.key l in
-
let target = Printf.sprintf "[[%s]]" slug in
-
let txt = Obsidian_link target in
-
Mapper.ret txt
-
| None ->
-
(match Meta.find sluglink m with
-
| None -> Mapper.default
-
| Some () ->
-
let slug = Label.key l in
-
if is_bushel_slug slug
-
then (
-
let target = Printf.sprintf "[[%s]]" (strip_handle slug) in
-
let txt = Obsidian_link target in
-
Mapper.ret txt)
-
else if is_tag_slug slug
-
then (
-
let target = Printf.sprintf "#%s" (strip_handle slug) in
-
let txt = Inline.Text (target, meta) in
-
Mapper.ret txt)
-
else Mapper.default))
-
;;
-
-
let make_bushel_link_only_mapper _defs entries =
-
let open Cmarkit in
-
fun _m ->
-
function
-
| Inline.Link (lb, meta) ->
-
(* Convert Bushel link references to regular links (not sidenotes) *)
-
(match link_target_is_bushel lb with
-
| Some (url, title) ->
-
let s = strip_handle url in
-
let dest = Entry.lookup_site_url entries s in
-
(* If title is itself a Bushel slug, use the entry title instead *)
-
let link_text =
-
if is_bushel_slug title then
-
match Entry.lookup entries (strip_handle title) with
-
| Some ent -> Entry.title ent
-
| None -> title
-
else title
-
in
-
let txt = Inline.Text (link_text, meta) in
-
let ld = Link_definition.make ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta))
-
| None ->
-
(match Inline.Link.referenced_label lb with
-
| Some l ->
-
let m = Label.meta l in
-
(* Check for authorlink (contact) first *)
-
(match Meta.find authorlink m with
-
| Some () ->
-
let slug = Label.key l in
-
let s = strip_handle slug in
-
(match Contact.find_by_handle (Entry.contacts entries) s with
-
| Some c ->
-
let name = Contact.name c in
-
(match Contact.best_url c with
-
| Some dest ->
-
let txt = Inline.Text (name, meta) in
-
let ld = Link_definition.make ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta))
-
| None ->
-
(* No URL for contact, just use name as text *)
-
let txt = Inline.Text (name, meta) in
-
Mapper.ret txt)
-
| None ->
-
(* Contact not found, use title as fallback text *)
-
let title = Inline.Link.text lb |> text_of_inline in
-
let txt = Inline.Text (title, meta) in
-
Mapper.ret txt)
-
| None ->
-
(* Check for sluglink *)
-
(match Meta.find sluglink m with
-
| Some () ->
-
let slug = Label.key l in
-
if is_bushel_slug slug || is_tag_slug slug || is_contact_slug slug
-
then (
-
let s = strip_handle slug in
-
let dest = Entry.lookup_site_url entries s in
-
let title = Inline.Link.text lb |> text_of_inline in
-
(* If link text is itself a Bushel slug, use the entry title instead *)
-
let link_text =
-
let trimmed = String.trim title in
-
if is_bushel_slug trimmed then
-
match Entry.lookup entries (strip_handle trimmed) with
-
| Some ent -> Entry.title ent
-
| None -> title
-
else title
-
in
-
let txt = Inline.Text (link_text, meta) in
-
let ld = Link_definition.make ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta)))
-
else Mapper.default
-
| None -> Mapper.default))
-
| None -> Mapper.default))
-
| _ -> Mapper.default
-
;;
-
-
let rewrite_footnote_reference ?footnote_map entries defs lb _meta =
-
let open Cmarkit in
-
match Inline.Link.referenced_label lb with
-
| None -> Mapper.default
-
| Some l ->
-
(match Inline.Link.reference_definition defs lb with
-
| Some (Block.Footnote.Def (fn, _)) ->
-
let label_key = Label.key l in
-
let slug, trigger_text =
-
match footnote_map with
-
| Some fm ->
-
(match Hashtbl.find_opt fm label_key with
-
| Some (slug, text) -> (slug, text)
-
| None ->
-
let num = Hashtbl.length fm + 1 in
-
let slug = Printf.sprintf "fn-%d" num in
-
let text = Printf.sprintf "[%d]" num in
-
Hashtbl.add fm label_key (slug, text);
-
(slug, text))
-
| None ->
-
(* No map provided, use label key as slug *)
-
let slug = Printf.sprintf "fn-%s" (String.sub label_key 1 (String.length label_key - 1)) in
-
let text = "[?]" in
-
(slug, text)
-
in
-
(* Process the block to convert Bushel link references to regular links (not sidenotes) *)
-
let block = Block.Footnote.block fn in
-
let link_mapper = Mapper.make ~inline:(make_bushel_link_only_mapper defs entries) () in
-
let processed_block =
-
match Mapper.map_block link_mapper block with
-
| Some b -> b
-
| None -> block
-
in
-
let sidenote = Side_note (Footnote_note (slug, processed_block, trigger_text)) in
-
Mapper.ret sidenote
-
| _ -> Mapper.default)
-
-
let rewrite_label_reference ?slugs entries lb meta =
-
let open Cmarkit in
-
match Inline.Link.referenced_label lb with
-
| None -> Mapper.default
-
| Some l ->
-
let m = Label.meta l in
-
(match Meta.find authorlink m with
-
| Some () ->
-
let slug = Label.key l in
-
(match Contact.find_by_handle (Entry.contacts entries) (strip_handle slug) with
-
| Some c ->
-
let trigger_text = Contact.name c in
-
let sidenote = Side_note (Contact_note (c, trigger_text)) in
-
Mapper.ret sidenote
-
| None ->
-
(* Contact not found, fallback to text *)
-
let txt = Inline.Text ("Unknown Person", meta) in
-
Mapper.ret txt)
-
| None ->
-
(match Meta.find sluglink m with
-
| None -> Mapper.default
-
| Some () ->
-
let slug = Label.key l in
-
if is_bushel_slug slug
-
then (
-
(match slugs with
-
| Some s -> Hashtbl.replace s slug ()
-
| _ -> ());
-
let s = strip_handle slug in
-
(* Check entry type and generate appropriate sidenote *)
-
match Entry.lookup entries s with
-
| Some (`Paper p) ->
-
let trigger_text = Entry.lookup_title entries s in
-
let sidenote = Side_note (Paper_note (p, trigger_text)) in
-
Mapper.ret sidenote
-
| Some (`Idea i) ->
-
let trigger_text = Entry.lookup_title entries s in
-
let sidenote = Side_note (Idea_note (i, trigger_text)) in
-
Mapper.ret sidenote
-
| Some (`Note n) ->
-
let trigger_text = Entry.lookup_title entries s in
-
let sidenote = Side_note (Note_note (n, trigger_text)) in
-
Mapper.ret sidenote
-
| Some (`Project p) ->
-
let trigger_text = Entry.lookup_title entries s in
-
let sidenote = Side_note (Project_note (p, trigger_text)) in
-
Mapper.ret sidenote
-
| Some (`Video v) ->
-
let trigger_text = Entry.lookup_title entries s in
-
let sidenote = Side_note (Video_note (v, trigger_text)) in
-
Mapper.ret sidenote
-
| None ->
-
(* Entry not found, use regular link *)
-
let target = Entry.lookup_title entries s in
-
let dest = Entry.lookup_site_url entries s in
-
let txt = Inline.Text (target, meta) in
-
let ld = Link_definition.make ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
Mapper.ret (Inline.Link (ld, meta)))
-
else if is_tag_slug slug
-
then (
-
let sh = strip_handle slug in
-
(* Use # as dest to prevent navigation, JavaScript will intercept *)
-
let target, dest = sh, "#" in
-
let txt = Inline.Text (target, meta) in
-
let ld = Link_definition.make ~dest:(dest, meta) () in
-
let ll = `Inline (ld, meta) in
-
let ld = Inline.Link.make txt ll in
-
let ent_il = Inline.Link (ld, meta) in
-
Mapper.ret ent_il)
-
else Mapper.default))
-
;;
-
-
let bushel_inline_mapper_to_obsidian entries _m =
-
let open Cmarkit in
-
function
-
| Inline.Link (lb, meta) ->
-
(match link_target_is_bushel lb with
-
| None -> rewrite_label_reference_to_obsidian lb meta
-
| Some (url, title) -> rewrite_bushel_link_reference entries url title meta)
-
| Inline.Image (lb, meta) ->
-
(match image_target_is_bushel lb with
-
| None -> rewrite_label_reference_to_obsidian lb meta
-
| Some (url, alt, dir) -> rewrite_bushel_image_reference entries url alt dir meta)
-
| _ -> Mapper.default
-
;;
-
-
let make_bushel_inline_mapper ?slugs ?footnote_map defs entries =
-
let open Cmarkit in
-
fun _m ->
-
function
-
| Inline.Link (lb, meta) ->
-
(* First check if this is a footnote reference *)
-
(match Inline.Link.referenced_label lb with
-
| Some l when String.starts_with ~prefix:"^" (Label.key l) ->
-
(* This is a footnote reference *)
-
rewrite_footnote_reference ?footnote_map entries defs lb meta
-
| _ ->
-
(* Not a footnote, handle as bushel link *)
-
(match link_target_is_bushel ?slugs lb with
-
| None -> rewrite_label_reference ?slugs entries lb meta
-
| Some (url, title) -> rewrite_bushel_link_reference entries url title meta))
-
| Inline.Image (lb, meta) ->
-
(match image_target_is_bushel lb with
-
| None -> rewrite_label_reference entries lb meta
-
| Some (url, alt, dir) -> rewrite_bushel_image_reference entries url alt dir meta)
-
| _ -> Mapper.default
-
;;
-
-
let scan_for_slugs entries md =
-
let open Cmarkit in
-
let slugs = Hashtbl.create 7 in
-
let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in
-
let defs = Doc.defs doc in
-
let _ =
-
Mapper.map_doc (Mapper.make ~inline:(make_bushel_inline_mapper ~slugs defs entries) ()) doc
-
in
-
Hashtbl.fold (fun k () a -> k :: a) slugs []
-
;;
-
-
(** Validation mapper that collects broken references *)
-
let make_validation_mapper entries broken_slugs broken_contacts =
-
let open Cmarkit in
-
fun _m ->
-
function
-
| Inline.Link (lb, _meta) ->
-
(* Check inline bushel links *)
-
(match link_target_is_bushel lb with
-
| Some (url, _title) ->
-
let s = strip_handle url in
-
if is_contact_slug url then
-
(* Validate contact handle *)
-
(match Contact.find_by_handle (Entry.contacts entries) s with
-
| None -> Hashtbl.replace broken_contacts url ()
-
| Some _ -> ())
-
else if is_bushel_slug url then
-
(* Validate entry slug *)
-
(match Entry.lookup entries s with
-
| None -> Hashtbl.replace broken_slugs url ()
-
| Some _ -> ())
-
else ();
-
Mapper.default
-
| None ->
-
(* Check referenced label links *)
-
(match Inline.Link.referenced_label lb with
-
| Some l ->
-
let m = Label.meta l in
-
(* Check for contact reference *)
-
(match Meta.find authorlink m with
-
| Some () ->
-
let slug = Label.key l in
-
let handle = strip_handle slug in
-
(match Contact.find_by_handle (Entry.contacts entries) handle with
-
| None -> Hashtbl.replace broken_contacts slug ()
-
| Some _ -> ());
-
Mapper.default
-
| None ->
-
(* Check for entry slug reference *)
-
(match Meta.find sluglink m with
-
| None -> Mapper.default
-
| Some () ->
-
let slug = Label.key l in
-
if is_bushel_slug slug then (
-
let s = strip_handle slug in
-
match Entry.lookup entries s with
-
| None -> Hashtbl.replace broken_slugs slug ()
-
| Some _ -> ()
-
);
-
Mapper.default))
-
| None -> Mapper.default))
-
| _ -> Mapper.default
-
;;
-
-
(** Validate all bushel references in markdown and return broken ones *)
-
let validate_references entries md =
-
let open Cmarkit in
-
let broken_slugs = Hashtbl.create 7 in
-
let broken_contacts = Hashtbl.create 7 in
-
let doc = Doc.of_string ~strict:false ~resolver:with_bushel_links md in
-
let mapper = Mapper.make ~inline:(make_validation_mapper entries broken_slugs broken_contacts) () in
-
let _ = Mapper.map_doc mapper doc in
-
let slugs = Hashtbl.fold (fun k () a -> k :: a) broken_slugs [] in
-
let contacts = Hashtbl.fold (fun k () a -> k :: a) broken_contacts [] in
-
(slugs, contacts)
-
;;
-
-
(** Extract the first image URL from markdown text *)
-
let extract_first_image md =
-
let open Cmarkit in
-
(* Don't use bushel link resolver to avoid circular dependency with Entry *)
-
let doc = Doc.of_string md in
-
let found_image = ref None in
-
-
let find_image_in_inline _mapper = function
-
| Inline.Image (img, _) ->
-
(match Inline.Link.reference img with
-
| `Inline (ld, _) ->
-
(match Link_definition.dest ld with
-
| Some (url, _) when !found_image = None ->
-
found_image := Some url;
-
Mapper.default
-
| _ -> Mapper.default)
-
| _ -> Mapper.default)
-
| _ -> Mapper.default
-
in
-
-
let mapper = Mapper.make ~inline:find_image_in_inline () in
-
let _ = Mapper.map_doc mapper doc in
-
!found_image
-
;;
-
-
(** Convert markdown text to plain text, resolving bushel links to just their text *)
-
let markdown_to_plaintext _entries text =
-
let open Cmarkit in
-
(* Parse markdown with bushel link resolver *)
-
let doc = Doc.of_string ~resolver:with_bushel_links text in
-
-
(* Convert document blocks to plain text *)
-
let rec block_to_text = function
-
| Block.Blank_line _ -> ""
-
| Block.Thematic_break _ -> "\n---\n"
-
| Block.Paragraph (p, _) ->
-
let inline = Block.Paragraph.inline p in
-
Inline.to_plain_text ~break_on_soft:false inline
-
|> List.map (String.concat "") |> String.concat "\n"
-
| Block.Heading (h, _) ->
-
let inline = Block.Heading.inline h in
-
Inline.to_plain_text ~break_on_soft:false inline
-
|> List.map (String.concat "") |> String.concat "\n"
-
| Block.Block_quote (bq, _) ->
-
let blocks = Block.Block_quote.block bq in
-
block_to_text blocks
-
| Block.List (l, _) ->
-
let items = Block.List'.items l in
-
List.map (fun (item, _) ->
-
let blocks = Block.List_item.block item in
-
block_to_text blocks
-
) items |> String.concat "\n"
-
| Block.Code_block (cb, _) ->
-
let code = Block.Code_block.code cb in
-
String.concat "\n" (List.map Block_line.to_string code)
-
| Block.Html_block _ -> "" (* Skip HTML blocks for search *)
-
| Block.Link_reference_definition _ -> ""
-
| Block.Ext_footnote_definition _ -> ""
-
| Block.Blocks (blocks, _) ->
-
List.map block_to_text blocks |> String.concat "\n"
-
| _ -> ""
-
in
-
let blocks = Doc.block doc in
-
block_to_text blocks
-
;;
-
-
(** Extract all links from markdown text, including from images *)
-
let extract_all_links text =
-
let open Cmarkit in
-
let doc = Doc.of_string ~resolver:with_bushel_links text in
-
let links = ref [] in
-
-
let find_links_in_inline _mapper = function
-
| Inline.Link (lb, _) | Inline.Image (lb, _) ->
-
(* Check for inline link/image destination *)
-
(match Inline.Link.reference lb with
-
| `Inline (ld, _) ->
-
(match Link_definition.dest ld with
-
| Some (url, _) ->
-
links := url :: !links;
-
Mapper.default
-
| None -> Mapper.default)
-
| `Ref _ ->
-
(* For reference-style links/images, check if it has a referenced label *)
-
(match Inline.Link.referenced_label lb with
-
| Some l ->
-
let key = Label.key l in
-
(* Check if it's a bushel-style link *)
-
if String.length key > 0 && (key.[0] = ':' || key.[0] = '@' ||
-
(String.length key > 1 && key.[0] = '#' && key.[1] = '#')) then
-
links := key :: !links;
-
Mapper.default
-
| None -> Mapper.default))
-
| _ -> Mapper.default
-
in
-
-
let mapper = Mapper.make ~inline:find_links_in_inline () in
-
let _ = Mapper.map_doc mapper doc in
-
-
(* Deduplicate *)
-
let module StringSet = Set.Make(String) in
-
StringSet.elements (StringSet.of_list !links)
-
;;
-
-
(* Reference source type for CiTO annotations *)
-
type reference_source =
-
| Paper (* CitesAsSourceDocument *)
-
| Note (* CitesAsRelated *)
-
| External (* Cites *)
-
-
(* Extract references (papers/notes with DOIs) from a note *)
-
let note_references entries default_author note =
-
let refs = ref [] in
-
-
(* Helper to format author name: extract last name from full name *)
-
let format_author_last name =
-
let parts = String.split_on_char ' ' name in
-
List.nth parts (List.length parts - 1)
-
in
-
-
(* Helper to format a citation *)
-
let format_citation ~authors ~year ~title ~publisher =
-
let author_str = match authors with
-
| [] -> ""
-
| [author] -> format_author_last author ^ " "
-
| author :: _ -> (format_author_last author) ^ " et al "
-
in
-
let pub_str = match publisher with
-
| None | Some "" -> ""
-
| Some p -> p ^ ". "
-
in
-
Printf.sprintf "%s(%d). %s. %s" author_str year title pub_str
-
in
-
-
(* Check slug_ent if it exists *)
-
(match Note.slug_ent note with
-
| Some slug ->
-
(match Entry.lookup entries slug with
-
| Some (`Paper p) ->
-
(match Paper.doi p with
-
| Some doi ->
-
let authors = Paper.authors p in
-
let year = Paper.year p in
-
let title = Paper.title p in
-
let publisher = Some (Paper.publisher p) in
-
let citation = format_citation ~authors ~year ~title ~publisher in
-
refs := (doi, citation, Paper) :: !refs
-
| None -> ())
-
| Some (`Note n) ->
-
(match Note.doi n with
-
| Some doi ->
-
let authors = match Note.author n with
-
| Some a -> [a]
-
| None -> [Contact.name default_author]
-
in
-
let (year, _, _) = Note.date n in
-
let title = Note.title n in
-
let publisher = None in
-
let citation = format_citation ~authors ~year ~title ~publisher in
-
refs := (doi, citation, Note) :: !refs
-
| None -> ())
-
| _ -> ())
-
| None -> ());
-
-
(* Scan body for bushel references *)
-
let slugs = scan_for_slugs entries (Note.body note) in
-
List.iter (fun slug ->
-
(* Strip leading : or @ from slug before lookup *)
-
let normalized_slug = strip_handle slug in
-
match Entry.lookup entries normalized_slug with
-
| Some (`Paper p) ->
-
(match Paper.doi p with
-
| Some doi ->
-
let authors = Paper.authors p in
-
let year = Paper.year p in
-
let title = Paper.title p in
-
let publisher = Some (Paper.publisher p) in
-
let citation = format_citation ~authors ~year ~title ~publisher in
-
(* Check if doi already exists in refs *)
-
if not (List.exists (fun (d, _, _) -> d = doi) !refs) then
-
refs := (doi, citation, Paper) :: !refs
-
| None -> ())
-
| Some (`Note n) ->
-
(match Note.doi n with
-
| Some doi ->
-
let authors = match Note.author n with
-
| Some a -> [a]
-
| None -> [Contact.name default_author]
-
in
-
let (year, _, _) = Note.date n in
-
let title = Note.title n in
-
let publisher = None in
-
let citation = format_citation ~authors ~year ~title ~publisher in
-
(* Check if doi already exists in refs *)
-
if not (List.exists (fun (d, _, _) -> d = doi) !refs) then
-
refs := (doi, citation, Note) :: !refs
-
| None -> ())
-
| _ -> ()
-
) slugs;
-
-
(* Scan body for external DOI URLs and resolve from cache *)
-
let body = Note.body note in
-
let doi_url_pattern = Re.Perl.compile_pat "https?://(?:dx\\.)?doi\\.org/([^)\\s\"'>]+)" in
-
let matches = Re.all doi_url_pattern body in
-
let doi_entries = Entry.doi_entries entries in
-
List.iter (fun group ->
-
try
-
let encoded_doi = Re.Group.get group 1 in
-
(* URL decode the DOI *)
-
let doi = Uri.pct_decode encoded_doi in
-
(* Check if doi already exists in refs *)
-
if not (List.exists (fun (d, _, _) -> d = doi) !refs) then
-
(* Look up in DOI cache *)
-
match Doi_entry.find_by_doi doi_entries doi with
-
| Some doi_entry when doi_entry.status = Resolved ->
-
let citation = format_citation
-
~authors:doi_entry.authors
-
~year:doi_entry.year
-
~title:doi_entry.title
-
~publisher:(Some doi_entry.publisher)
-
in
-
refs := (doi, citation, External) :: !refs
-
| _ ->
-
(* Not found in cache, add minimal citation with just the DOI *)
-
refs := (doi, doi, External) :: !refs
-
with _ -> ()
-
) matches;
-
-
(* Scan body for publisher URLs (Elsevier, ScienceDirect, IEEE, Nature, ACM, Sage, UPenn, Springer, Taylor & Francis, OUP) and resolve from cache *)
-
let publisher_pattern = Re.Perl.compile_pat "https?://(?:(?:www\\.)?(?:linkinghub\\.elsevier\\.com|(?:www\\.)?sciencedirect\\.com/science/article|ieeexplore\\.ieee\\.org|academic\\.oup\\.com|nature\\.com|journals\\.sagepub\\.com|garfield\\.library\\.upenn\\.edu|link\\.springer\\.com)/[^)\\s\"'>]+|(?:dl\\.acm\\.org|(?:www\\.)?tandfonline\\.com)/doi(?:/pdf)?/10\\.[^)\\s\"'>]+)" in
-
let publisher_matches = Re.all publisher_pattern body in
-
List.iter (fun group ->
-
try
-
let url = Re.Group.get group 0 in
-
(* Look up in DOI cache by source URL *)
-
match Doi_entry.find_by_url doi_entries url with
-
| Some doi_entry when doi_entry.status = Resolved ->
-
let doi = doi_entry.doi in
-
(* Check if this DOI already exists in refs *)
-
if not (List.exists (fun (d, _, _) -> d = doi) !refs) then
-
let citation = format_citation
-
~authors:doi_entry.authors
-
~year:doi_entry.year
-
~title:doi_entry.title
-
~publisher:(Some doi_entry.publisher)
-
in
-
refs := (doi, citation, External) :: !refs
-
| _ ->
-
(* Not found in cache, skip it *)
-
()
-
with _ -> ()
-
) publisher_matches;
-
-
List.rev !refs
-
;;
-
-73
stack/bushel/lib/md.mli
···
-
val make_bushel_inline_mapper
-
: ?slugs:(string, unit) Hashtbl.t
-
-> ?footnote_map:(string, string * string) Hashtbl.t
-
-> Cmarkit.Label.defs
-
-> Entry.t
-
-> 'a
-
-> Cmarkit.Inline.t
-
-> Cmarkit.Inline.t Cmarkit.Mapper.result
-
-
val make_bushel_link_only_mapper
-
: Cmarkit.Label.defs
-
-> Entry.t
-
-> 'a
-
-> Cmarkit.Inline.t
-
-> Cmarkit.Inline.t Cmarkit.Mapper.result
-
-
type Cmarkit.Inline.t += Obsidian_link of string
-
-
type sidenote_data =
-
| Contact_note of Contact.t * string
-
| Paper_note of Paper.t * string
-
| Idea_note of Idea.t * string
-
| Note_note of Note.t * string
-
| Project_note of Project.t * string
-
| Video_note of Video.t * string
-
| Footnote_note of string * Cmarkit.Block.t * string
-
-
type Cmarkit.Inline.t += Side_note of sidenote_data
-
-
val bushel_inline_mapper_to_obsidian
-
: Entry.t
-
-> 'a
-
-> Cmarkit.Inline.t
-
-> Cmarkit.Inline.t Cmarkit.Mapper.result
-
-
val with_bushel_links
-
: [< `Def of Cmarkit.Label.t option * Cmarkit.Label.t
-
| `Ref of 'a * Cmarkit.Label.t * Cmarkit.Label.t option
-
]
-
-> Cmarkit.Label.t option
-
-
val scan_for_slugs : Entry.t -> string -> string list
-
-
(** Validate all bushel references in markdown and return broken ones.
-
Returns (broken_slugs, broken_contacts) where each list contains
-
the full reference string (e.g., ":missing-slug", "@unknown-handle") *)
-
val validate_references : Entry.t -> string -> string list * string list
-
-
(** Extract the first image URL from markdown text *)
-
val extract_first_image : string -> string option
-
-
(** Convert markdown text to plain text, resolving bushel links to just their text *)
-
val markdown_to_plaintext : 'a -> string -> string
-
-
val is_bushel_slug : string -> bool
-
val is_tag_slug : string -> bool
-
val is_type_filter_slug : string -> bool
-
val is_contact_slug : string -> bool
-
val strip_handle : string -> string
-
-
(** Extract all links from markdown text, including from images (internal and external) *)
-
val extract_all_links : string -> string list
-
-
(** Type indicating the source of a reference for CiTO annotation *)
-
type reference_source =
-
| Paper (** CitesAsSourceDocument *)
-
| Note (** CitesAsRelated *)
-
| External (** Cites *)
-
-
(** Extract references (papers/notes with DOIs) from a note.
-
Returns a list of (DOI, citation_string, reference_source) tuples.
-
Citation format: "Last, First (Year). Title. Publisher. https://doi.org/the/doi" *)
-
val note_references : Entry.t -> Contact.t -> Note.t -> (string * string * reference_source) list
-230
stack/bushel/lib/note.ml
···
-
type t =
-
{ title : string
-
; date : Ptime.date
-
; slug : string
-
; body : string
-
; tags : string list
-
; draft : bool
-
; updated : Ptime.date option
-
; sidebar : string option
-
; index_page : bool
-
; perma : bool (* Permanent article that will receive a DOI *)
-
; doi : string option (* DOI identifier for permanent articles *)
-
; synopsis: string option
-
; titleimage: string option
-
; via : (string * string) option
-
; slug_ent : string option (* Optional reference to another entry *)
-
; source : string option (* Optional source for news-style notes *)
-
; url : string option (* Optional external URL for news-style notes *)
-
; author : string option (* Optional author for news-style notes *)
-
; category : string option (* Optional category for news-style notes *)
-
}
-
-
type ts = t list
-
-
let link { body; via; slug; _ } =
-
match body, via with
-
| "", Some (l, u) -> `Ext (l, u)
-
| "", None -> failwith (slug ^ ": note external without via, via-url")
-
| _, _ -> `Local slug
-
;;
-
-
let origdate { date; _ } = Option.get @@ Ptime.of_date date
-
-
let date { date; updated; _ } =
-
match updated with
-
| None -> date
-
| Some v -> v
-
;;
-
-
let datetime v = Option.get @@ Ptime.of_date @@ date v
-
let compare a b = Ptime.compare (datetime b) (datetime a)
-
let slug { slug; _ } = slug
-
let body { body; _ } = body
-
let title { title; _ } = title
-
let tags { tags; _ } = tags
-
let sidebar { sidebar; _ } = sidebar
-
let synopsis { synopsis; _ } = synopsis
-
let draft { draft; _ } = draft
-
let perma { perma; _ } = perma
-
let doi { doi; _ } = doi
-
let titleimage { titleimage; _ } = titleimage
-
let slug_ent { slug_ent; _ } = slug_ent
-
let source { source; _ } = source
-
let url { url; _ } = url
-
let author { author; _ } = author
-
let category { category; _ } = category
-
let lookup slug notes = List.find (fun n -> n.slug = slug) notes
-
let read_file file = In_channel.(with_open_bin file input_all)
-
let words { body; _ } = Util.count_words body
-
-
-
let of_md fname =
-
(* TODO fix Jekyll_post to basename the fname all the time *)
-
match Jekyll_post.of_string ~fname:(Filename.basename fname) (read_file fname) with
-
| Error (`Msg m) -> failwith ("note_of_md: " ^ m)
-
| Ok jp ->
-
let fields = jp.Jekyll_post.fields in
-
let { Jekyll_post.title; date; slug; body; _ } = jp in
-
let date, _ = Ptime.to_date_time date in
-
let index_page =
-
match Jekyll_format.find "index_page" fields with
-
| Some (`Bool v) -> v
-
| _ -> false
-
in
-
let perma =
-
match Jekyll_format.find "perma" fields with
-
| Some (`Bool v) -> v
-
| _ -> false
-
in
-
let updated =
-
match Jekyll_format.find "updated" fields with
-
| Some (`String v) -> Some (Jekyll_format.parse_date_exn v |> Ptime.to_date)
-
| _ -> None
-
in
-
let draft =
-
match Jekyll_format.find "draft" fields with
-
| Some (`Bool v) -> v
-
| _ -> false
-
in
-
let titleimage =
-
match Jekyll_format.find "titleimage" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let synopsis =
-
match Jekyll_format.find "synopsis" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let sidebar =
-
try Some (read_file ("data/sidebar/" ^ Filename.basename fname)) with
-
| _ -> None
-
in
-
let tags =
-
match Jekyll_format.find "tags" fields with
-
| Some (`A l) ->
-
List.filter_map
-
(function
-
| `String s -> Some s
-
| _ -> None)
-
l
-
| _ -> []
-
in
-
let via =
-
match Jekyll_format.find "via" fields, Jekyll_format.find "via-url" fields with
-
| Some (`String a), Some (`String b) -> Some (a, b)
-
| None, Some (`String b) -> Some ("", b)
-
| _ -> None
-
in
-
let slug_ent =
-
match Jekyll_format.find "slug_ent" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let source =
-
match Jekyll_format.find "source" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let url =
-
match Jekyll_format.find "url" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let author =
-
match Jekyll_format.find "author" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let category =
-
match Jekyll_format.find "category" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
let doi =
-
match Jekyll_format.find "doi" fields with
-
| Some (`String v) -> Some v
-
| _ -> None
-
in
-
{ title; draft; date; slug; synopsis; titleimage; index_page; perma; doi; body; via; updated; tags; sidebar; slug_ent; source; url; author; category }
-
-
(* TODO:claude *)
-
let typesense_schema =
-
let open Ezjsonm in
-
dict [
-
("name", string "notes");
-
("fields", list (fun d -> dict d) [
-
[("name", string "id"); ("type", string "string")];
-
[("name", string "title"); ("type", string "string")];
-
[("name", string "content"); ("type", string "string")];
-
[("name", string "date"); ("type", string "string")];
-
[("name", string "date_timestamp"); ("type", string "int64")];
-
[("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
-
[("name", string "body"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "draft"); ("type", string "bool")];
-
[("name", string "synopsis"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "thumbnail_url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "type"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "status"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "related_papers"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "related_projects"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "related_contacts"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "attachments"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "source"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "author"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "category"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "slug_ent"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "words"); ("type", string "int32"); ("optional", bool true)];
-
]);
-
("default_sorting_field", string "date_timestamp");
-
]
-
-
(** TODO:claude Pretty-print a note with ANSI formatting *)
-
let pp ppf n =
-
let open Fmt in
-
pf ppf "@[<v>";
-
pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Note";
-
pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug n);
-
pf ppf "%a: %a@," (styled `Bold string) "Title" string (title n);
-
let (year, month, day) = date n in
-
pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Date" year month day;
-
(match n.updated with
-
| Some (y, m, d) -> pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Updated" y m d
-
| None -> ());
-
pf ppf "%a: %b@," (styled `Bold string) "Draft" (draft n);
-
pf ppf "%a: %b@," (styled `Bold string) "Index Page" n.index_page;
-
pf ppf "%a: %b@," (styled `Bold string) "Perma" (perma n);
-
(match doi n with
-
| Some d -> pf ppf "%a: %a@," (styled `Bold string) "DOI" string d
-
| None -> ());
-
(match synopsis n with
-
| Some syn -> pf ppf "%a: %a@," (styled `Bold string) "Synopsis" string syn
-
| None -> ());
-
(match titleimage n with
-
| Some img -> pf ppf "%a: %a@," (styled `Bold string) "Title Image" string img
-
| None -> ());
-
(match n.via with
-
| Some (label, url) ->
-
if label <> "" then
-
pf ppf "%a: %a (%a)@," (styled `Bold string) "Via" string label string url
-
else
-
pf ppf "%a: %a@," (styled `Bold string) "Via" string url
-
| None -> ());
-
let t = tags n in
-
if t <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
-
(match sidebar n with
-
| Some sb ->
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Sidebar";
-
pf ppf "%a@," string sb
-
| None -> ());
-
let bd = body n in
-
if bd <> "" then begin
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Body";
-
pf ppf "%a@," string bd;
-
end;
-
pf ppf "@]"
-49
stack/bushel/lib/note.mli
···
-
type t =
-
{ title : string
-
; date : Ptime.date
-
; slug : string
-
; body : string
-
; tags : string list
-
; draft : bool
-
; updated : Ptime.date option
-
; sidebar : string option
-
; index_page : bool
-
; perma : bool
-
; doi : string option
-
; synopsis: string option
-
; titleimage: string option
-
; via : (string * string) option
-
; slug_ent : string option
-
; source : string option
-
; url : string option
-
; author : string option
-
; category : string option
-
}
-
-
type ts = t list
-
-
val link : t -> [> `Ext of string * string | `Local of string ]
-
val origdate : t -> Ptime.t
-
val date : t -> Ptime.date
-
val datetime : t -> Ptime.t
-
val compare : t -> t -> int
-
val slug : t -> string
-
val body : t -> string
-
val title : t -> string
-
val draft : t -> bool
-
val perma : t -> bool
-
val doi : t -> string option
-
val synopsis : t -> string option
-
val titleimage : t -> string option
-
val slug_ent : t -> string option
-
val source : t -> string option
-
val url : t -> string option
-
val author : t -> string option
-
val category : t -> string option
-
val tags : t -> string list
-
val sidebar : t -> string option
-
val lookup : string -> t list -> t
-
val words : t -> int
-
val of_md : string -> t
-
val typesense_schema : Ezjsonm.value
-
val pp : Format.formatter -> t -> unit
-373
stack/bushel/lib/paper.ml
···
-
module J = Ezjsonm
-
-
type paper = Ezjsonm.value
-
-
type t =
-
{ slug : string
-
; ver : string
-
; paper : paper
-
; abstract : string
-
; latest : bool
-
}
-
-
type ts = t list
-
-
let key y k = J.find y [ k ]
-
-
let slugs ts =
-
List.fold_left (fun acc t -> if List.mem t.slug acc then acc else t.slug :: acc) [] ts
-
;;
-
-
let slug { slug; _ } = slug
-
let title { paper; _ } : string = key paper "title" |> J.get_string
-
let authors { paper; _ } : string list = key paper "author" |> J.get_list J.get_string
-
-
let project_slugs { paper; _ } : string list =
-
try key paper "projects" |> J.get_list J.get_string with
-
| _ -> []
-
;;
-
-
let slides { paper; _ } : string list =
-
try key paper "slides" |> J.get_list J.get_string with
-
| _ -> []
-
;;
-
-
let bibtype { paper; _ } : string = key paper "bibtype" |> J.get_string
-
-
let journal { paper; _ } =
-
try key paper "journal" |> J.get_string with
-
| Not_found ->
-
failwith
-
(Printf.sprintf "no journal found for %s\n%!" (Ezjsonm.value_to_string paper))
-
;;
-
-
(** TODO:claude Helper to extract raw JSON *)
-
let raw_json { paper; _ } = paper
-
-
let doi { paper; _ } =
-
try Some (key paper "doi" |> J.get_string) with
-
| _ -> None
-
;;
-
-
let volume { paper; _ } =
-
try Some (key paper "volume" |> J.get_string) with
-
| _ -> None
-
;;
-
-
let video { paper; _ } =
-
try Some (key paper "video" |> J.get_string) with
-
| _ -> None
-
;;
-
-
let issue { paper; _ } =
-
try Some (key paper "number" |> J.get_string) with
-
| _ -> None
-
;;
-
-
let url { paper; _ } =
-
try Some (key paper "url" |> J.get_string) with
-
| _ -> None
-
;;
-
-
let pages { paper; _ } = try key paper "pages" |> J.get_string with _ -> ""
-
let abstract { abstract; _ } = abstract
-
-
let institution { paper; _ } =
-
try key paper "institution" |> J.get_string with
-
| Not_found ->
-
failwith
-
(Printf.sprintf "no institution found for %s\n%!" (Ezjsonm.value_to_string paper))
-
;;
-
-
let number { paper; _ } =
-
try Some (key paper "number" |> J.get_string) with
-
| Not_found -> None
-
;;
-
-
let editor { paper; _ } = key paper "editor" |> J.get_string
-
let isbn { paper; _ } = key paper "isbn" |> J.get_string
-
let bib { paper; _ } = key paper "bib" |> J.get_string
-
let year { paper; _ } = key paper "year" |> J.get_string |> int_of_string
-
-
let publisher { paper; _ } =
-
try key paper "publisher" |> J.get_string with
-
| Not_found -> ""
-
;;
-
-
let booktitle { paper; _ } =
-
let r = key paper "booktitle" |> J.get_string |> Bytes.of_string in
-
Bytes.set r 0 (Char.lowercase_ascii (Bytes.get r 0));
-
String.of_bytes r
-
;;
-
-
let date { paper; _ } =
-
let m =
-
try
-
match String.lowercase_ascii (key paper "month" |> J.get_string) with
-
| "jan" -> 1
-
| "feb" -> 2
-
| "mar" -> 3
-
| "apr" -> 4
-
| "may" -> 5
-
| "jun" -> 6
-
| "jul" -> 7
-
| "aug" -> 8
-
| "sep" -> 9
-
| "oct" -> 10
-
| "nov" -> 11
-
| "dec" -> 12
-
| _ -> 1
-
with
-
| Not_found -> 1
-
in
-
let y =
-
try key paper "year" |> J.get_string |> int_of_string with
-
| Not_found ->
-
failwith (Printf.sprintf "no year found for %s" (Ezjsonm.value_to_string paper))
-
in
-
y, m, 1
-
;;
-
-
let datetime p = Option.get @@ Ptime.of_date @@ date p
-
-
let compare p2 p1 =
-
let d1 =
-
Ptime.of_date
-
(try date p1 with
-
| _ -> 1977, 1, 1)
-
|> Option.get
-
in
-
let d2 =
-
Ptime.of_date
-
(try date p2 with
-
| _ -> 1977, 1, 1)
-
|> Option.get
-
in
-
Ptime.compare d1 d2
-
;;
-
-
let get_papers ~slug ts =
-
List.filter (fun p -> p.slug = slug && p.latest <> true) ts |> List.sort compare
-
;;
-
-
let read_file file = In_channel.(with_open_bin file input_all)
-
-
let of_md ~slug ~ver fname =
-
(* TODO fix Jekyll_post to not error on no date *)
-
let fname' = "2000-01-01-" ^ Filename.basename fname in
-
match Jekyll_post.of_string ~fname:fname' (read_file fname) with
-
| Error (`Msg m) -> failwith ("paper_of_md: " ^ m)
-
| Ok jp ->
-
let fields = jp.Jekyll_post.fields |> Jekyll_format.fields_to_yaml in
-
let { Jekyll_post.body; _ } = jp in
-
{ slug; ver; abstract = body; paper = fields; latest = false }
-
;;
-
-
let tv (l : t list) =
-
let h = Hashtbl.create 7 in
-
List.iter
-
(fun { slug; ver; _ } ->
-
match Hashtbl.find_opt h slug with
-
| None -> Hashtbl.add h slug [ ver ]
-
| Some l ->
-
let l = ver :: l in
-
let l = List.sort Stdlib.compare l in
-
Hashtbl.replace h slug l)
-
l;
-
List.map
-
(fun p ->
-
let latest = Hashtbl.find h p.slug |> List.rev |> List.hd in
-
let latest = p.ver = latest in
-
{ p with latest })
-
l
-
;;
-
-
let lookup ts slug = List.find_opt (fun t -> t.slug = slug && t.latest) ts
-
-
let tag_of_bibtype bt =
-
match String.lowercase_ascii bt with
-
| "article" -> "journal"
-
| "inproceedings" -> "conference"
-
| "techreport" -> "report"
-
| "misc" -> "preprint"
-
| "book" -> "book"
-
| x -> x
-
;;
-
-
let tags { paper; _ } =
-
let tags f =
-
try key paper f |> J.get_list J.get_string with
-
| _ -> []
-
in
-
let core = tags "tags" in
-
let extra = tags "keywords" in
-
let projects = tags "projects" in
-
let ty = [ key paper "bibtype" |> J.get_string |> tag_of_bibtype ] in
-
List.flatten [ core; extra; ty; projects ]
-
;;
-
-
let best_url p =
-
if Sys.file_exists (Printf.sprintf "static/papers/%s.pdf" (slug p))
-
then Some (Printf.sprintf "/papers/%s.pdf" (slug p))
-
else url p
-
;;
-
-
(** TODO:claude Classification types for papers *)
-
type classification = Full | Short | Preprint
-
-
let string_of_classification = function
-
| Full -> "full"
-
| Short -> "short"
-
| Preprint -> "preprint"
-
-
let classification_of_string = function
-
| "full" -> Full
-
| "short" -> Short
-
| "preprint" -> Preprint
-
| _ -> Full (* default to full if unknown *)
-
-
(** TODO:claude Get classification from paper metadata, with fallback to heuristic *)
-
let classification { paper; _ } =
-
try
-
key paper "classification" |> J.get_string |> classification_of_string
-
with _ ->
-
(* Fallback to heuristic classification based on venue/bibtype/title *)
-
let bibtype = try key paper "bibtype" |> J.get_string with _ -> "" in
-
let journal = try key paper "journal" |> J.get_string |> String.lowercase_ascii with _ -> "" in
-
let booktitle = try key paper "booktitle" |> J.get_string |> String.lowercase_ascii with _ -> "" in
-
let title_str = try key paper "title" |> J.get_string |> String.lowercase_ascii with _ -> "" in
-
-
(* Helper function to check if text contains any of the patterns *)
-
let contains_any text patterns =
-
List.exists (fun pattern ->
-
let regex = Re.Perl.compile_pat ~opts:[`Caseless] pattern in
-
Re.execp regex text
-
) patterns
-
in
-
-
(* Check for preprint indicators *)
-
let bibtype_lower = String.lowercase_ascii bibtype in
-
if contains_any journal ["arxiv"] || contains_any booktitle ["arxiv"] || bibtype_lower = "misc" || bibtype_lower = "techreport"
-
then Preprint
-
(* Check for workshop/short paper indicators including in title *)
-
else if contains_any journal ["workshop"; "wip"; "poster"; "demo"; "hotdep"; "short"] ||
-
contains_any booktitle ["workshop"; "wip"; "poster"; "demo"; "hotdep"; "short"] ||
-
contains_any title_str ["poster"]
-
then Short
-
(* Default to full paper (journal or conference) *)
-
else Full
-
-
(** TODO:claude Check if paper is marked as selected *)
-
let selected { paper; _ } =
-
try
-
let keys = J.get_dict paper in
-
match List.assoc_opt "selected" keys with
-
| Some (`Bool true) -> true
-
| Some (`String "true") -> true
-
| _ -> false
-
with _ -> false
-
-
(** TODO:claude Get note field from paper metadata *)
-
let note { paper; _ } =
-
try
-
let keys = J.get_dict paper in
-
match List.assoc_opt "note" keys with
-
| Some note_json -> Some (J.get_string note_json)
-
| None -> None
-
with _ -> None
-
-
(* TODO:claude *)
-
let to_yaml ?abstract ~ver:_ json_data =
-
(* Don't add version - it's inferred from filename *)
-
let frontmatter = Yaml.to_string_exn json_data in
-
match abstract with
-
| Some abs ->
-
(* Trim leading/trailing whitespace and normalize blank lines *)
-
let trimmed_abs = String.trim abs in
-
let normalized_abs =
-
(* Replace 3+ consecutive newlines with exactly 2 newlines *)
-
Re.replace_string (Re.compile (Re.seq [Re.char '\n'; Re.char '\n'; Re.rep1 (Re.char '\n')])) ~by:"\n\n" trimmed_abs
-
in
-
if normalized_abs = "" then
-
Printf.sprintf "---\n%s---\n" frontmatter
-
else
-
Printf.sprintf "---\n%s---\n\n%s\n" frontmatter normalized_abs
-
| None -> Printf.sprintf "---\n%s---\n" frontmatter
-
-
(* TODO:claude *)
-
let typesense_schema =
-
let open Ezjsonm in
-
dict [
-
("name", string "papers");
-
("fields", list (fun d -> dict d) [
-
[("name", string "id"); ("type", string "string")];
-
[("name", string "title"); ("type", string "string")];
-
[("name", string "authors"); ("type", string "string[]")];
-
[("name", string "abstract"); ("type", string "string")];
-
[("name", string "date"); ("type", string "string")];
-
[("name", string "date_timestamp"); ("type", string "int64")];
-
[("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
-
[("name", string "doi"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "arxiv_id"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "pdf_url"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "thumbnail_url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "journal"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "related_projects"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "related_talks"); ("type", string "string[]"); ("optional", bool true)];
-
]);
-
("default_sorting_field", string "date_timestamp");
-
]
-
-
(** TODO:claude Pretty-print a paper with ANSI formatting *)
-
let pp ppf p =
-
let open Fmt in
-
pf ppf "@[<v>";
-
pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Paper";
-
pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug p);
-
pf ppf "%a: %a@," (styled `Bold string) "Version" string p.ver;
-
pf ppf "%a: %a@," (styled `Bold string) "Title" string (title p);
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Authors" (list ~sep:comma string) (authors p);
-
pf ppf "%a: %a@," (styled `Bold string) "Year" int (year p);
-
pf ppf "%a: %a@," (styled `Bold string) "Bibtype" string (bibtype p);
-
(match doi p with
-
| Some d -> pf ppf "%a: %a@," (styled `Bold string) "DOI" string d
-
| None -> ());
-
(match url p with
-
| Some u -> pf ppf "%a: %a@," (styled `Bold string) "URL" string u
-
| None -> ());
-
(match video p with
-
| Some v -> pf ppf "%a: %a@," (styled `Bold string) "Video" string v
-
| None -> ());
-
let projs = project_slugs p in
-
if projs <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Projects" (list ~sep:comma string) projs;
-
let sl = slides p in
-
if sl <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Slides" (list ~sep:comma string) sl;
-
(match bibtype p with
-
| "article" ->
-
pf ppf "%a: %a@," (styled `Bold string) "Journal" string (journal p);
-
(match volume p with
-
| Some vol -> pf ppf "%a: %a@," (styled `Bold string) "Volume" string vol
-
| None -> ());
-
(match issue p with
-
| Some iss -> pf ppf "%a: %a@," (styled `Bold string) "Issue" string iss
-
| None -> ());
-
let pgs = pages p in
-
if pgs <> "" then
-
pf ppf "%a: %a@," (styled `Bold string) "Pages" string pgs;
-
| "inproceedings" ->
-
pf ppf "%a: %a@," (styled `Bold string) "Booktitle" string (booktitle p);
-
let pgs = pages p in
-
if pgs <> "" then
-
pf ppf "%a: %a@," (styled `Bold string) "Pages" string pgs;
-
| "techreport" ->
-
pf ppf "%a: %a@," (styled `Bold string) "Institution" string (institution p);
-
(match number p with
-
| Some num -> pf ppf "%a: %a@," (styled `Bold string) "Number" string num
-
| None -> ());
-
| _ -> ());
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Abstract";
-
pf ppf "%a@," (styled `Faint string) (abstract p);
-
pf ppf "@]"
-55
stack/bushel/lib/paper.mli
···
-
type paper
-
-
type t =
-
{ slug : string
-
; ver : string
-
; paper : paper
-
; abstract : string
-
; latest : bool
-
}
-
-
type ts = t list
-
-
val tv : t list -> ts
-
val slug : t -> string
-
val title : t -> string
-
val authors : t -> string list
-
val project_slugs : t -> string list
-
val slides : t -> string list
-
val bibtype : t -> string
-
val journal : t -> string
-
val raw_json : t -> Ezjsonm.value
-
val doi : t -> string option
-
val volume : t -> string option
-
val video : t -> string option
-
val issue : t -> string option
-
val url : t -> string option
-
val best_url : t -> string option
-
val pages : t -> string
-
val abstract : t -> string
-
val institution : t -> string
-
val number : t -> string option
-
val editor : t -> string
-
val isbn : t -> string
-
val bib : t -> string
-
val year : t -> int
-
val publisher : t -> string
-
val booktitle : t -> string
-
val tags : t -> string list
-
val date : t -> int * int * int
-
val datetime : t -> Ptime.t
-
val compare : t -> t -> int
-
val get_papers : slug:string -> ts -> ts
-
val slugs : ts -> string list
-
val lookup : ts -> string -> t option
-
val of_md : slug:string -> ver:string -> string -> t
-
val to_yaml : ?abstract:string -> ver:string -> Ezjsonm.value -> string
-
val typesense_schema : Ezjsonm.value
-
-
type classification = Full | Short | Preprint
-
val string_of_classification : classification -> string
-
val classification_of_string : string -> classification
-
val classification : t -> classification
-
val selected : t -> bool
-
val note : t -> string option
-
val pp : Format.formatter -> t -> unit
-100
stack/bushel/lib/project.ml
···
-
type t =
-
{ slug : string
-
; title : string
-
; start : int (* year *)
-
; finish : int option
-
; tags : string list
-
; ideas : string
-
; body : string
-
}
-
-
type ts = t list
-
-
let tags p = p.tags
-
-
let compare a b =
-
match compare a.start b.start with
-
| 0 -> compare b.finish a.finish
-
| n -> n
-
;;
-
-
let title { title; _ } = title
-
let body { body; _ } = body
-
let ideas { ideas; _ } = ideas
-
-
let of_md fname =
-
match Jekyll_post.of_string ~fname (Util.read_file fname) with
-
| Error (`Msg m) -> failwith ("Project.of_file: " ^ m)
-
| Ok jp ->
-
let fields = jp.Jekyll_post.fields in
-
let { Jekyll_post.title; date; slug; body; _ } = jp in
-
let (start, _, _), _ = Ptime.to_date_time date in
-
let finish =
-
match Jekyll_format.find "finish" fields with
-
| Some (`String date) ->
-
let date = Jekyll_format.parse_date_exn date in
-
let (finish, _, _), _ = Ptime.to_date_time date in
-
Some finish
-
| _ -> None
-
in
-
let ideas =
-
match Jekyll_format.find "ideas" fields with
-
| Some (`String e) -> e
-
| _ -> failwith ("no ideas key in " ^ fname)
-
in
-
let tags =
-
match Jekyll_format.find "tags" fields with
-
| Some (`A tags) -> List.map Yaml.Util.to_string_exn tags
-
| _ -> []
-
in
-
{ slug; title; start; finish; ideas; tags; body }
-
;;
-
-
let lookup projects slug = List.find_opt (fun p -> p.slug = slug) projects
-
-
(* TODO:claude *)
-
let typesense_schema =
-
let open Ezjsonm in
-
dict [
-
("name", string "projects");
-
("fields", list (fun d -> dict d) [
-
[("name", string "id"); ("type", string "string")];
-
[("name", string "title"); ("type", string "string")];
-
[("name", string "description"); ("type", string "string")];
-
[("name", string "start_year"); ("type", string "int32")];
-
[("name", string "finish_year"); ("type", string "int32"); ("optional", bool true)];
-
[("name", string "date"); ("type", string "string")];
-
[("name", string "date_timestamp"); ("type", string "int64")];
-
[("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
-
[("name", string "repository_url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "homepage_url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "languages"); ("type", string "string[]"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "license"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "status"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "related_papers"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "related_talks"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "body"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "ideas"); ("type", string "string"); ("optional", bool true)];
-
]);
-
("default_sorting_field", string "date_timestamp");
-
]
-
-
(** TODO:claude Pretty-print a project with ANSI formatting *)
-
let pp ppf p =
-
let open Fmt in
-
pf ppf "@[<v>";
-
pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Project";
-
pf ppf "%a: %a@," (styled `Bold string) "Slug" string p.slug;
-
pf ppf "%a: %a@," (styled `Bold string) "Title" string (title p);
-
pf ppf "%a: %d@," (styled `Bold string) "Start" p.start;
-
(match p.finish with
-
| Some year -> pf ppf "%a: %d@," (styled `Bold string) "Finish" year
-
| None -> pf ppf "%a: ongoing@," (styled `Bold string) "Finish");
-
let t = tags p in
-
if t <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
-
pf ppf "%a: %a@," (styled `Bold string) "Ideas" string (ideas p);
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Body";
-
pf ppf "%a@," string (body p);
-
pf ppf "@]"
-21
stack/bushel/lib/project.mli
···
-
type t =
-
{ slug : string
-
; title : string
-
; start : int
-
; finish : int option
-
; tags : string list
-
; ideas : string
-
; body : string
-
}
-
-
type ts = t list
-
-
val title : t -> string
-
val body : t -> string
-
val ideas : t -> string
-
val lookup : t list -> string -> t option
-
val tags : t -> string list
-
val compare : t -> t -> int
-
val of_md : string -> t
-
val typesense_schema : Ezjsonm.value
-
val pp : Format.formatter -> t -> unit
-44
stack/bushel/lib/srcsetter.ml
···
-
module MS = Map.Make (String)
-
-
type t =
-
{ name : string
-
; slug : string
-
; origin : string
-
; dims : int * int
-
; variants : (int * int) MS.t
-
}
-
-
type ts = t list
-
-
let v name slug origin variants dims = { name; slug; origin; variants; dims }
-
let slug { slug; _ } = slug
-
let origin { origin; _ } = origin
-
let name { name; _ } = name
-
let dims { dims; _ } = dims
-
let variants { variants; _ } = variants
-
-
let dims_json_t =
-
let open Jsont in
-
let dec x y = x, y in
-
let enc (w, h) = function
-
| 0 -> w
-
| _ -> h
-
in
-
t2 ~dec ~enc uint16
-
;;
-
-
let json_t =
-
let open Jsont in
-
let open Jsont.Object in
-
map ~kind:"Entry" v
-
|> mem "name" string ~enc:name
-
|> mem "slug" string ~enc:slug
-
|> mem "origin" string ~enc:origin
-
|> mem "variants" (as_string_map dims_json_t) ~enc:variants
-
|> mem "dims" dims_json_t ~enc:dims
-
|> finish
-
;;
-
-
let list = Jsont.list json_t
-
let list_to_json es = Jsont_bytesrw.encode_string list ~format:Jsont.Indent es
-
let list_of_json = Jsont_bytesrw.decode_string list
-21
stack/bushel/lib/srcsetter.mli
···
-
module MS : Map.S with type key = string
-
-
type t =
-
{ name : string
-
; slug : string
-
; origin : string
-
; dims : int * int
-
; variants : (int * int) MS.t
-
}
-
-
type ts = t list
-
-
val origin : t -> string
-
val slug : t -> string
-
val name : t -> string
-
val dims : t -> int * int
-
val variants : t -> (int * int) MS.t
-
val list_to_json : t list -> (string, string) result
-
val list_of_json : string -> (t list, string) result
-
val json_t : t Jsont.t
-
val list : t list Jsont.t
-114
stack/bushel/lib/tags.ml
···
-
open Entry
-
-
type t =
-
[ `Slug of string (* :foo points to the specific slug foo *)
-
| `Contact of string (* @foo points to contact foo *)
-
| `Set of string (* #papers points to all Paper entries *)
-
| `Text of string (* foo points to a free text "foo" *)
-
| `Year of int (* a number between 1900--2100 is interpreted as a year *)
-
]
-
-
let is_text = function
-
| `Text _ -> true
-
| _ -> false
-
;;
-
-
let is_slug = function
-
| `Slug _ -> true
-
| _ -> false
-
;;
-
-
let is_set = function
-
| `Set _ -> true
-
| _ -> false
-
;;
-
-
let is_year = function
-
| `Year _ -> true
-
| _ -> false
-
;;
-
-
let of_string s : t =
-
if String.length s < 2 then invalid_arg ("Tag.of_string: " ^ s);
-
match s.[0] with
-
| ':' ->
-
let slug = String.sub s 1 (String.length s - 1) in
-
`Slug slug
-
| '@' -> failwith "TODO add contacts to entries"
-
| '#' ->
-
let cl = String.sub s 1 (String.length s - 1) in
-
`Set cl
-
| _ ->
-
(try
-
let x = int_of_string s in
-
if x > 1900 && x < 2100 then `Year x else `Text s
-
with
-
| _ -> `Text s)
-
;;
-
-
let of_string_list l = List.map of_string l
-
-
let to_string = function
-
| `Slug t -> ":" ^ t
-
| `Contact c -> "@" ^ c
-
| `Set s -> "#" ^ s
-
| `Text t -> t
-
| `Year y -> string_of_int y
-
;;
-
-
let to_raw_string = function
-
| `Slug t -> t
-
| `Contact c -> c
-
| `Set s -> s
-
| `Text t -> t
-
| `Year y -> string_of_int y
-
;;
-
-
let pp ppf t = Fmt.string ppf (to_string t)
-
-
let tags_of_ent _entries ent : t list =
-
match ent with
-
| `Paper p -> of_string_list @@ Paper.tags p
-
| `Video v -> of_string_list v.Video.tags
-
| `Project p -> of_string_list @@ Project.tags p
-
| `Note n -> of_string_list @@ Note.tags n
-
| `Idea i -> of_string_list i.Idea.tags
-
;;
-
-
let mentions tags =
-
List.filter
-
(function
-
| `Contact _ | `Slug _ -> true
-
| _ -> false)
-
tags
-
;;
-
-
let mention_entries entries tags =
-
let lk t =
-
try Some (lookup_exn entries t)
-
with Not_found -> Printf.eprintf "mention_entries not found: %s\n%!" t; None
-
in
-
List.filter_map
-
(function
-
| `Slug t -> lk t
-
| _ -> None)
-
tags
-
;;
-
-
let count_tags ?h fn vs =
-
let h =
-
match h with
-
| Some h -> h
-
| None -> Hashtbl.create 42
-
in
-
List.iter
-
(fun ent ->
-
List.iter
-
(fun tag ->
-
match Hashtbl.find_opt h tag with
-
| Some num -> Hashtbl.replace h tag (num + 1)
-
| None -> Hashtbl.add h tag 1)
-
(fn ent))
-
vs;
-
h
-
;;
-25
stack/bushel/lib/tags.mli
···
-
type t =
-
[ `Contact of string
-
| `Set of string
-
| `Slug of string
-
| `Text of string
-
| `Year of int
-
]
-
-
val is_text : t -> bool
-
val is_set : t -> bool
-
val is_slug : t -> bool
-
val is_year : t -> bool
-
val of_string : string -> t
-
val to_string : t -> string
-
val to_raw_string : t -> string
-
val pp : Format.formatter -> t -> unit
-
val mention_entries : Entry.t -> t list -> Entry.entry list
-
val tags_of_ent : Entry.t -> Entry.entry -> t list
-
val mentions : t list -> t list
-
-
val count_tags
-
: ?h:('a, int) Hashtbl.t
-
-> ('b -> 'a list)
-
-> 'b list
-
-> ('a, int) Hashtbl.t
-527
stack/bushel/lib/typesense.ml
···
-
(** Typesense API client for Bushel *)
-
-
type config = {
-
endpoint : string;
-
api_key : string;
-
openai_key : string;
-
}
-
-
type error =
-
| Http_error of int * string
-
| Json_error of string
-
| Connection_error of string
-
-
let pp_error fmt = function
-
| Http_error (code, msg) -> Fmt.pf fmt "HTTP %d: %s" code msg
-
| Json_error msg -> Fmt.pf fmt "JSON error: %s" msg
-
| Connection_error msg -> Fmt.pf fmt "Connection error: %s" msg
-
-
(** Create authentication headers for Typesense API *)
-
let auth_headers api_key =
-
Requests.Headers.empty
-
|> Requests.Headers.set "X-TYPESENSE-API-KEY" api_key
-
|> Requests.Headers.set "Content-Type" "application/json"
-
-
(** Make HTTP request to Typesense API *)
-
let make_request ~sw ~env ?(meth=`GET) ?(body="") config path =
-
let uri = Uri.of_string (config.endpoint ^ path) in
-
let headers = auth_headers config.api_key in
-
let body = if body = "" then None else Some (Requests.Body.of_string Requests.Mime.json body) in
-
-
try
-
let response = Requests.One.request ~sw
-
~clock:env#clock ~net:env#net
-
?body
-
~headers
-
~method_:meth
-
(Uri.to_string uri)
-
in
-
-
let status = Requests.Response.status_code response in
-
let body_flow = Requests.Response.body response in
-
let body_str = Eio.Flow.read_all body_flow in
-
-
if status >= 200 && status < 300 then
-
Ok body_str
-
else
-
Error (Http_error (status, body_str))
-
with exn ->
-
Error (Connection_error (Printexc.to_string exn))
-
-
(** Create a collection with given schema *)
-
let create_collection ~sw ~env config (schema : Ezjsonm.value) =
-
let body = Ezjsonm.value_to_string schema in
-
make_request ~sw ~env ~meth:`POST ~body config "/collections"
-
-
(** Check if collection exists *)
-
let collection_exists ~sw ~env config name =
-
let result = make_request ~sw ~env config ("/collections/" ^ name) in
-
match result with
-
| Ok _ -> true
-
| Error (Http_error (404, _)) -> false
-
| Error _ -> false
-
-
(** Delete a collection *)
-
let delete_collection ~sw ~env config name =
-
make_request ~sw ~env ~meth:`DELETE config ("/collections/" ^ name)
-
-
(** Upload documents to a collection in batch *)
-
let upload_documents ~sw ~env config collection_name (documents : Ezjsonm.value list) =
-
let jsonl_lines = List.map (fun doc -> Ezjsonm.value_to_string doc) documents in
-
let body = String.concat "\n" jsonl_lines in
-
make_request ~sw ~env ~meth:`POST ~body config
-
(Printf.sprintf "/collections/%s/documents/import?action=upsert" collection_name)
-
-
-
(** Convert Bushel objects to Typesense documents *)
-
-
(** Helper function to truncate long strings for embedding *)
-
let truncate_for_embedding ?(max_chars=20000) text =
-
if String.length text <= max_chars then text
-
else String.sub text 0 max_chars
-
-
(** Helper function to convert Ptime to Unix timestamp *)
-
let ptime_to_timestamp ptime =
-
let span = Ptime.to_span ptime in
-
let seconds = Ptime.Span.to_int_s span in
-
match seconds with
-
| Some s -> Int64.of_int s
-
| None -> 0L
-
-
(** Helper function to convert date tuple to Unix timestamp *)
-
let date_to_timestamp (year, month, day) =
-
match Ptime.of_date (year, month, day) with
-
| Some ptime -> ptime_to_timestamp ptime
-
| None -> 0L
-
-
(** Resolve author handles to full names in a list *)
-
let resolve_author_list contacts authors =
-
List.map (fun author ->
-
(* Strip '@' prefix if present *)
-
let handle =
-
if String.length author > 0 && author.[0] = '@' then
-
String.sub author 1 (String.length author - 1)
-
else
-
author
-
in
-
(* Try to look up as a contact handle *)
-
match Contact.find_by_handle contacts handle with
-
| Some contact -> Contact.name contact
-
| None -> author (* Keep original if not found *)
-
) authors
-
-
let contact_to_document (contact : Contact.t) =
-
let open Ezjsonm in
-
let safe_string_list_from_opt = function
-
| Some s -> [s]
-
| None -> []
-
in
-
dict [
-
("id", string (Contact.handle contact));
-
("handle", string (Contact.handle contact));
-
("name", string (Contact.name contact));
-
("names", list string (Contact.names contact));
-
("email", list string (safe_string_list_from_opt (Contact.email contact)));
-
("icon", list string (safe_string_list_from_opt (Contact.icon contact)));
-
("github", list string (safe_string_list_from_opt (Contact.github contact)));
-
("twitter", list string (safe_string_list_from_opt (Contact.twitter contact)));
-
("url", list string (safe_string_list_from_opt (Contact.url contact)));
-
]
-
-
let paper_to_document entries (paper : Paper.t) =
-
let date_tuple = Paper.date paper in
-
let contacts = Entry.contacts entries in
-
-
(* Helper to extract string arrays from JSON, handling both single strings and arrays *)
-
let extract_string_array_from_json json_field_name =
-
try
-
(* Access the raw JSON from the paper record *)
-
let paper_json = Paper.raw_json paper in
-
let value = Ezjsonm.get_dict paper_json |> List.assoc json_field_name in
-
match value with
-
| `String s -> [s]
-
| `A l -> List.filter_map (function `String s -> Some s | _ -> None) l
-
| _ -> []
-
with _ -> []
-
in
-
-
(* Resolve author handles to full names *)
-
let authors = resolve_author_list contacts (Paper.authors paper) in
-
-
(* Convert abstract markdown to plain text *)
-
let abstract = Md.markdown_to_plaintext entries (Paper.abstract paper) |> truncate_for_embedding in
-
-
(* Extract publication metadata *)
-
let bibtype = Paper.bibtype paper in
-
let metadata =
-
try
-
match bibtype with
-
| "article" -> Printf.sprintf "Journal: %s" (Paper.journal paper)
-
| "inproceedings" -> Printf.sprintf "Proceedings: %s" (Paper.journal paper)
-
| "misc" | "techreport" -> Printf.sprintf "Preprint: %s" (Paper.journal paper)
-
| _ -> Printf.sprintf "%s: %s" (String.capitalize_ascii bibtype) (Paper.journal paper)
-
with _ -> bibtype
-
in
-
-
(* Get bibtex from raw JSON *)
-
let bibtex =
-
try
-
let paper_json = Paper.raw_json paper in
-
Ezjsonm.get_dict paper_json
-
|> List.assoc "bibtex"
-
|> Ezjsonm.get_string
-
with _ -> ""
-
in
-
-
let thumbnail_url = Entry.thumbnail entries (`Paper paper) in
-
Ezjsonm.dict [
-
("id", Ezjsonm.string (Paper.slug paper));
-
("title", Ezjsonm.string (Paper.title paper));
-
("authors", Ezjsonm.list Ezjsonm.string authors);
-
("abstract", Ezjsonm.string abstract);
-
("metadata", Ezjsonm.string metadata);
-
("bibtex", Ezjsonm.string bibtex);
-
("date", Ezjsonm.string (let y, m, d = date_tuple in Printf.sprintf "%04d-%02d-%02d" y m d));
-
("date_timestamp", Ezjsonm.int64 (date_to_timestamp date_tuple));
-
("tags", Ezjsonm.list Ezjsonm.string (Paper.tags paper));
-
("doi", Ezjsonm.list Ezjsonm.string (extract_string_array_from_json "doi"));
-
("pdf_url", Ezjsonm.list Ezjsonm.string (extract_string_array_from_json "pdf_url"));
-
("journal", Ezjsonm.list Ezjsonm.string (extract_string_array_from_json "journal"));
-
("related_projects", Ezjsonm.list Ezjsonm.string (Paper.project_slugs paper));
-
("thumbnail_url", Ezjsonm.string (Option.value ~default:"" thumbnail_url));
-
]
-
-
let project_to_document entries (project : Project.t) =
-
let open Ezjsonm in
-
(* Use January 1st of start year as the date for sorting *)
-
let date_timestamp = date_to_timestamp (project.start, 1, 1) in
-
-
(* Convert body markdown to plain text *)
-
let description = Md.markdown_to_plaintext entries (Project.body project) |> truncate_for_embedding in
-
-
let thumbnail_url = Entry.thumbnail entries (`Project project) in
-
dict [
-
("id", string project.slug);
-
("title", string (Project.title project));
-
("description", string description);
-
("start", int project.start);
-
("finish", option int project.finish);
-
("start_year", int project.start);
-
("date", string (Printf.sprintf "%04d-01-01" project.start));
-
("date_timestamp", int64 date_timestamp);
-
("tags", list string (Project.tags project));
-
("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
-
]
-
-
let video_to_document entries (video : Video.t) =
-
let open Ezjsonm in
-
let datetime = Video.datetime video in
-
let safe_string_list_from_opt = function
-
| Some s -> [s]
-
| None -> []
-
in
-
-
(* Convert body markdown to plain text *)
-
let description = Md.markdown_to_plaintext entries (Video.body video) |> truncate_for_embedding in
-
-
(* Resolve paper and project slugs to titles *)
-
let paper_title = match Video.paper video with
-
| Some slug ->
-
(match Entry.lookup entries slug with
-
| Some entry -> Some (Entry.title entry)
-
| None -> Some slug) (* Fallback to slug if not found *)
-
| None -> None
-
in
-
let project_title = match Video.project video with
-
| Some slug ->
-
(match Entry.lookup entries slug with
-
| Some entry -> Some (Entry.title entry)
-
| None -> Some slug) (* Fallback to slug if not found *)
-
| None -> None
-
in
-
-
let thumbnail_url = Entry.thumbnail entries (`Video video) in
-
dict [
-
("id", string (Video.slug video));
-
("title", string (Video.title video));
-
("description", string description);
-
("published_date", string (Ptime.to_rfc3339 datetime));
-
("date", string (Ptime.to_rfc3339 datetime));
-
("date_timestamp", int64 (ptime_to_timestamp datetime));
-
("url", string (Video.url video));
-
("uuid", string (Video.uuid video));
-
("is_talk", bool (Video.talk video));
-
("paper", list string (safe_string_list_from_opt paper_title));
-
("project", list string (safe_string_list_from_opt project_title));
-
("tags", list string video.tags);
-
("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
-
]
-
-
let note_to_document entries (note : Note.t) =
-
let open Ezjsonm in
-
let datetime = Note.datetime note in
-
let safe_string_list_from_opt = function
-
| Some s -> [s]
-
| None -> []
-
in
-
-
(* Convert body markdown to plain text *)
-
let content = Md.markdown_to_plaintext entries (Note.body note) |> truncate_for_embedding in
-
-
let thumbnail_url = Entry.thumbnail entries (`Note note) in
-
let word_count = Note.words note in
-
dict [
-
("id", string (Note.slug note));
-
("title", string (Note.title note));
-
("date", string (Ptime.to_rfc3339 datetime));
-
("date_timestamp", int64 (ptime_to_timestamp datetime));
-
("content", string content);
-
("tags", list string (Note.tags note));
-
("draft", bool (Note.draft note));
-
("synopsis", list string (safe_string_list_from_opt (Note.synopsis note)));
-
("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
-
("words", int word_count);
-
]
-
-
let idea_to_document entries (idea : Idea.t) =
-
let open Ezjsonm in
-
let contacts = Entry.contacts entries in
-
(* Use January 1st of the year as the date for sorting *)
-
let date_timestamp = date_to_timestamp (Idea.year idea, 1, 1) in
-
-
(* Convert body markdown to plain text *)
-
let description = Md.markdown_to_plaintext entries (Idea.body idea) |> truncate_for_embedding in
-
-
(* Resolve supervisor and student handles to full names *)
-
let supervisors = resolve_author_list contacts (Idea.supervisors idea) in
-
let students = resolve_author_list contacts (Idea.students idea) in
-
-
(* Resolve project slug to project title *)
-
let project_title =
-
match Entry.lookup entries (Idea.project idea) with
-
| Some entry -> Entry.title entry
-
| None -> Idea.project idea (* Fallback to slug if not found *)
-
in
-
-
let thumbnail_url = Entry.thumbnail entries (`Idea idea) in
-
dict [
-
("id", string idea.slug);
-
("title", string (Idea.title idea));
-
("description", string description);
-
("level", string (Idea.level_to_string (Idea.level idea)));
-
("project", string project_title);
-
("status", string (Idea.status_to_string (Idea.status idea)));
-
("year", int (Idea.year idea));
-
("date", string (Printf.sprintf "%04d-01-01" (Idea.year idea)));
-
("date_timestamp", int64 date_timestamp);
-
("supervisors", list string supervisors);
-
("students", list string students);
-
("tags", list string idea.tags);
-
("thumbnail_url", string (Option.value ~default:"" thumbnail_url));
-
]
-
-
(** Helper function to add embedding field to schema *)
-
let add_embedding_field_to_schema schema config embedding_from_fields =
-
let open Ezjsonm in
-
let fields = get_dict schema |> List.assoc "fields" |> get_list (fun f -> f) in
-
let embedding_field = dict [
-
("name", string "embedding");
-
("type", string "float[]");
-
("embed", dict [
-
("from", list string embedding_from_fields);
-
("model_config", dict [
-
("model_name", string "openai/text-embedding-3-small");
-
("api_key", string config.openai_key);
-
]);
-
]);
-
] in
-
let updated_fields = fields @ [embedding_field] in
-
let updated_schema =
-
List.map (fun (k, v) ->
-
if k = "fields" then (k, list (fun f -> f) updated_fields)
-
else (k, v)
-
) (get_dict schema)
-
in
-
dict updated_schema
-
-
(** Upload all bushel objects to their respective collections *)
-
let upload_all ~sw ~env config entries =
-
print_string "Uploading bushel data to Typesense\n";
-
-
let contacts = Entry.contacts entries in
-
let papers = Entry.papers entries in
-
let projects = Entry.projects entries in
-
let notes = Entry.notes entries in
-
let videos = Entry.videos entries in
-
let ideas = Entry.ideas entries in
-
-
let collections = [
-
("contacts", add_embedding_field_to_schema Contact.typesense_schema config ["name"; "names"], (List.map contact_to_document contacts : Ezjsonm.value list));
-
("papers", add_embedding_field_to_schema Paper.typesense_schema config ["title"; "abstract"; "authors"], (List.map (paper_to_document entries) papers : Ezjsonm.value list));
-
("videos", add_embedding_field_to_schema Video.typesense_schema config ["title"; "description"], (List.map (video_to_document entries) videos : Ezjsonm.value list));
-
("projects", add_embedding_field_to_schema Project.typesense_schema config ["title"; "description"; "tags"], (List.map (project_to_document entries) projects : Ezjsonm.value list));
-
("notes", add_embedding_field_to_schema Note.typesense_schema config ["title"; "content"; "tags"], (List.map (note_to_document entries) notes : Ezjsonm.value list));
-
("ideas", add_embedding_field_to_schema Idea.typesense_schema config ["title"; "description"; "tags"], (List.map (idea_to_document entries) ideas : Ezjsonm.value list));
-
] in
-
-
let upload_collection ((name, schema, documents) : string * Ezjsonm.value * Ezjsonm.value list) =
-
Printf.printf "Processing collection: %s\n%!" name;
-
let exists = collection_exists ~sw ~env config name in
-
(if exists then (
-
Printf.printf "Collection %s exists, deleting...\n%!" name;
-
let result = delete_collection ~sw ~env config name in
-
match result with
-
| Ok _ -> Printf.printf "Deleted collection %s\n%!" name
-
| Error err ->
-
let err_str = Fmt.str "%a" pp_error err in
-
Printf.printf "Failed to delete collection %s: %s\n%!" name err_str
-
));
-
Printf.printf "Creating collection %s with %d documents\n%!" name (List.length documents);
-
let result = create_collection ~sw ~env config schema in
-
match result with
-
| Ok _ ->
-
Printf.printf "Created collection %s\n%!" name;
-
if documents = [] then
-
Printf.printf "No documents to upload for %s\n%!" name
-
else (
-
let result = upload_documents ~sw ~env config name documents in
-
match result with
-
| Ok response ->
-
(* Count successes and failures *)
-
let lines = String.split_on_char '\n' response in
-
let successes = List.fold_left (fun acc line ->
-
if String.contains line ':' && Str.string_match (Str.regexp ".*success.*true.*") line 0 then acc + 1 else acc) 0 lines in
-
let failures = List.fold_left (fun acc line ->
-
if String.contains line ':' && Str.string_match (Str.regexp ".*success.*false.*") line 0 then acc + 1 else acc) 0 lines in
-
Printf.printf "Upload results for %s: %d successful, %d failed out of %d total\n%!"
-
name successes failures (List.length documents);
-
if failures > 0 then (
-
Printf.printf "Failed documents in %s:\n%!" name;
-
let failed_lines = List.filter (fun line -> Str.string_match (Str.regexp ".*success.*false.*") line 0) lines in
-
List.iter (fun line -> Printf.printf "%s\n%!" line) failed_lines
-
)
-
| Error err ->
-
let err_str = Fmt.str "%a" pp_error err in
-
Printf.printf "Failed to upload documents to %s: %s\n%!" name err_str
-
)
-
| Error err ->
-
let err_str = Fmt.str "%a" pp_error err in
-
Printf.printf "Failed to create collection %s: %s\n%!" name err_str
-
in
-
-
List.iter upload_collection collections
-
-
(** Re-export search types from Typesense_client *)
-
type search_result = Typesense_client.search_result = {
-
id: string;
-
title: string;
-
content: string;
-
score: float;
-
collection: string;
-
highlights: (string * string list) list;
-
document: Ezjsonm.value;
-
}
-
-
type search_response = Typesense_client.search_response = {
-
hits: search_result list;
-
total: int;
-
query_time: float;
-
}
-
-
(** Convert bushel config to client config *)
-
let to_client_config (config : config) =
-
Typesense_client.{ endpoint = config.endpoint; api_key = config.api_key }
-
-
(** Search a single collection *)
-
let search_collection ~sw ~env (config : config) collection_name query ?(limit=10) ?(offset=0) () =
-
let client_config = to_client_config config in
-
let requests_session = Requests.create ~sw env in
-
let client = Typesense_client.create ~requests_session ~config:client_config in
-
let result = Typesense_client.search_collection client collection_name query ~limit ~offset () in
-
match result with
-
| Ok response -> Ok response
-
| Error (Typesense_client.Http_error (code, msg)) -> Error (Http_error (code, msg))
-
| Error (Typesense_client.Json_error msg) -> Error (Json_error msg)
-
| Error (Typesense_client.Connection_error msg) -> Error (Connection_error msg)
-
-
(** Search across all collections - use client multisearch *)
-
let search_all ~sw ~env (config : config) query ?(limit=10) ?(offset=0) () =
-
let client_config = to_client_config config in
-
let requests_session = Requests.create ~sw env in
-
let client = Typesense_client.create ~requests_session ~config:client_config in
-
let result = Typesense_client.multisearch client query ~limit:50 () in
-
match result with
-
| Ok multisearch_resp ->
-
let combined_response = Typesense_client.combine_multisearch_results multisearch_resp ~limit ~offset () in
-
Ok combined_response
-
| Error (Typesense_client.Http_error (code, msg)) -> Error (Http_error (code, msg))
-
| Error (Typesense_client.Json_error msg) -> Error (Json_error msg)
-
| Error (Typesense_client.Connection_error msg) -> Error (Connection_error msg)
-
-
(** List all collections *)
-
let list_collections ~sw ~env (config : config) =
-
let client_config = to_client_config config in
-
let requests_session = Requests.create ~sw env in
-
let client = Typesense_client.create ~requests_session ~config:client_config in
-
let result = Typesense_client.list_collections client in
-
match result with
-
| Ok collections -> Ok collections
-
| Error (Typesense_client.Http_error (code, msg)) -> Error (Http_error (code, msg))
-
| Error (Typesense_client.Json_error msg) -> Error (Json_error msg)
-
| Error (Typesense_client.Connection_error msg) -> Error (Connection_error msg)
-
-
(** Re-export multisearch types from Typesense_client *)
-
type multisearch_response = Typesense_client.multisearch_response = {
-
results: search_response list;
-
}
-
-
(** Perform multisearch across all collections *)
-
let multisearch ~sw ~env (config : config) query ?(limit=10) () =
-
let client_config = to_client_config config in
-
let requests_session = Requests.create ~sw env in
-
let client = Typesense_client.create ~requests_session ~config:client_config in
-
let result = Typesense_client.multisearch client query ~limit () in
-
match result with
-
| Ok multisearch_resp -> Ok multisearch_resp
-
| Error (Typesense_client.Http_error (code, msg)) -> Error (Http_error (code, msg))
-
| Error (Typesense_client.Json_error msg) -> Error (Json_error msg)
-
| Error (Typesense_client.Connection_error msg) -> Error (Connection_error msg)
-
-
(** Combine multisearch results into single result set *)
-
let combine_multisearch_results (multisearch_resp : multisearch_response) ?(limit=10) ?(offset=0) () =
-
Typesense_client.combine_multisearch_results multisearch_resp ~limit ~offset ()
-
-
(** Load configuration from files *)
-
let load_config_from_files () =
-
let read_file_if_exists filename =
-
if Sys.file_exists filename then
-
let ic = open_in filename in
-
let content = really_input_string ic (in_channel_length ic) in
-
close_in ic;
-
Some (String.trim content)
-
else None
-
in
-
-
let endpoint = match read_file_if_exists ".typesense-url" with
-
| Some url -> url
-
| None -> "http://localhost:8108"
-
in
-
-
let api_key = match read_file_if_exists ".typesense-key" with
-
| Some key -> key
-
| None ->
-
try Sys.getenv "TYPESENSE_API_KEY"
-
with Not_found -> ""
-
in
-
-
let openai_key = match read_file_if_exists ".openrouter-api" with
-
| Some key -> key
-
| None ->
-
try Sys.getenv "OPENAI_API_KEY"
-
with Not_found -> ""
-
in
-
-
{ endpoint; api_key; openai_key }
-
-
(** Re-export pretty printer from Typesense_client *)
-
let pp_search_result_oneline = Typesense_client.pp_search_result_oneline
-168
stack/bushel/lib/typesense.mli
···
-
(** Typesense API client for Bushel
-
-
This module provides an OCaml client for the Typesense search engine API.
-
It handles collection management and document indexing for all Bushel object
-
types including contacts, papers, projects, news, videos, notes, and ideas.
-
-
Example usage:
-
{[
-
let config = { endpoint = "https://search.example.com"; api_key = "xyz123"; openai_key = "sk-..." } in
-
Eio_main.run (fun env ->
-
Eio.Switch.run (fun sw ->
-
Typesense.upload_all ~sw ~env config entries))
-
]}
-
*)
-
-
(** Configuration for connecting to a Typesense server *)
-
type config = {
-
endpoint : string; (** Typesense server URL (e.g., "https://search.example.com") *)
-
api_key : string; (** API key for authentication *)
-
openai_key : string; (** OpenAI API key for embeddings *)
-
}
-
-
(** Possible errors that can occur during Typesense operations *)
-
type error =
-
| Http_error of int * string (** HTTP error with status code and message *)
-
| Json_error of string (** JSON parsing or encoding error *)
-
| Connection_error of string (** Network connection error *)
-
-
(** Pretty-printer for error types *)
-
val pp_error : Format.formatter -> error -> unit
-
-
(** Create a collection with the given schema.
-
The schema should follow Typesense's collection schema format. *)
-
val create_collection :
-
sw:Eio.Switch.t ->
-
env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
-
config ->
-
Ezjsonm.value ->
-
(string, error) result
-
-
(** Check if a collection exists by name.
-
Returns true if the collection exists, false otherwise. *)
-
val collection_exists :
-
sw:Eio.Switch.t ->
-
env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
-
config ->
-
string ->
-
bool
-
-
(** Delete a collection by name. *)
-
val delete_collection :
-
sw:Eio.Switch.t ->
-
env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
-
config ->
-
string ->
-
(string, error) result
-
-
(** Upload documents to a collection in batch using JSONL format.
-
More efficient than uploading documents one by one. *)
-
val upload_documents :
-
sw:Eio.Switch.t ->
-
env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
-
config ->
-
string ->
-
Ezjsonm.value list ->
-
(string, error) result
-
-
(** Upload all bushel objects to Typesense.
-
This function will:
-
- Extract all bushel data types from the Entry.t
-
- Create or recreate collections for each type
-
- Upload all documents in batches
-
- Report progress to stdout *)
-
val upload_all :
-
sw:Eio.Switch.t ->
-
env:< clock: [> float Eio.Time.clock_ty ] Eio.Resource.t; net: [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t; .. > ->
-
config ->
-
Entry.t ->
-
unit
-
-
(** Search result structure containing document information and relevance score *)
-
type search_result = {
-
id: string; (** Document ID *)
-
title: string; (** Document title *)
-
content: string; (** Document content/description *)
-
score: float; (** Relevance score *)
-
collection: string; (** Collection name *)
-
highlights: (string * string list) list; (** Highlighted search terms by field *)
-
document: Ezjsonm.value; (** Raw document for flexible field access *)
-
}
-
-
(** Search response containing results and metadata *)
-
type search_response = {
-
hits: search_result list; (** List of matching documents *)
-
total: int; (** Total number of matches *)
-
query_time: float; (** Query execution time in milliseconds *)
-
}
-
-
(** Search a specific collection. *)
-
val search_collection :
-
sw:Eio.Switch.t ->
-
env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
-
config ->
-
string ->
-
string ->
-
?limit:int ->
-
?offset:int ->
-
unit ->
-
(search_response, error) result
-
-
(** Search across all bushel collections.
-
Results are sorted by relevance score and paginated. *)
-
val search_all :
-
sw:Eio.Switch.t ->
-
env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
-
config ->
-
string ->
-
?limit:int ->
-
?offset:int ->
-
unit ->
-
(search_response, error) result
-
-
(** Multisearch response containing results from multiple collections *)
-
type multisearch_response = {
-
results: search_response list; (** Results from each collection *)
-
}
-
-
(** Perform multisearch across all collections using Typesense's multi_search endpoint.
-
More efficient than individual searches as it's done in a single request. *)
-
val multisearch :
-
sw:Eio.Switch.t ->
-
env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
-
config ->
-
string ->
-
?limit:int ->
-
unit ->
-
(multisearch_response, error) result
-
-
(** Combine multisearch results into a single result set.
-
Results are sorted by relevance score and paginated. *)
-
val combine_multisearch_results : multisearch_response -> ?limit:int -> ?offset:int -> unit -> search_response
-
-
(** List all collections with document counts.
-
Returns a list of (collection_name, document_count) pairs. *)
-
val list_collections :
-
sw:Eio.Switch.t ->
-
env:< clock: float Eio.Time.clock_ty Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; net: [`Generic | `Unix] Eio.Net.ty Eio.Resource.t; .. > ->
-
config ->
-
((string * int) list, error) result
-
-
(** Load configuration from .typesense-url and .typesense-api files.
-
Falls back to environment variables and defaults.
-
TODO:claude *)
-
val load_config_from_files : unit -> config
-
-
(** Pretty-print a search result in a one-line format with relevant information.
-
Shows different fields based on the collection type (papers, videos, etc.).
-
TODO:claude *)
-
val pp_search_result_oneline : search_result -> string
-
-
(** Convert Bushel objects to Typesense documents *)
-
-
val contact_to_document : Contact.t -> Ezjsonm.value
-
val paper_to_document : Entry.t -> Paper.t -> Ezjsonm.value
-
val project_to_document : Entry.t -> Project.t -> Ezjsonm.value
-
val video_to_document : Entry.t -> Video.t -> Ezjsonm.value
-
val note_to_document : Entry.t -> Note.t -> Ezjsonm.value
-
val idea_to_document : Entry.t -> Idea.t -> Ezjsonm.value
-80
stack/bushel/lib/util.ml
···
-
let first_hunk s =
-
let lines = String.split_on_char '\n' s in
-
let rec aux acc = function
-
| [] -> String.concat "\n" (List.rev acc)
-
| "" :: "" :: _ -> String.concat "\n" (List.rev acc)
-
| line :: rest -> aux (line :: acc) rest
-
in
-
aux [] lines
-
;;
-
-
let first_and_last_hunks s =
-
let lines = String.split_on_char '\n' s in
-
let rec aux acc = function
-
| [] -> String.concat "\n" (List.rev acc), ""
-
| "" :: "" :: rest ->
-
String.concat "\n" (List.rev acc), String.concat "\n" (List.rev rest)
-
| line :: rest -> aux (line :: acc) rest
-
in
-
aux [] lines
-
;;
-
-
(* Find all footnote definition lines in text *)
-
let find_footnote_lines s =
-
let lines = String.split_on_char '\n' s in
-
let is_footnote_def line =
-
String.length line > 3 &&
-
line.[0] = '[' &&
-
line.[1] = '^' &&
-
String.contains line ':' &&
-
let colon_pos = String.index line ':' in
-
colon_pos > 2 && line.[colon_pos - 1] = ']'
-
in
-
let is_continuation line =
-
String.length line > 0 && (line.[0] = ' ' || line.[0] = '\t')
-
in
-
let rec collect_footnotes acc in_footnote = function
-
| [] -> List.rev acc
-
| line :: rest ->
-
if is_footnote_def line then
-
collect_footnotes (line :: acc) true rest
-
else if in_footnote && is_continuation line then
-
collect_footnotes (line :: acc) true rest
-
else
-
collect_footnotes acc false rest
-
in
-
collect_footnotes [] false lines
-
;;
-
-
(* Augment first hunk with footnote definitions from last hunk *)
-
let first_hunk_with_footnotes s =
-
let first, last = first_and_last_hunks s in
-
let footnote_lines = find_footnote_lines last in
-
if footnote_lines = [] then first
-
else first ^ "\n\n" ^ String.concat "\n" footnote_lines
-
;;
-
-
let count_words (text : string) : int =
-
let len = String.length text in
-
let rec count_words_helper (index : int) (in_word : bool) (count : int) : int =
-
if index >= len
-
then if in_word then count + 1 else count
-
else (
-
let char = String.get text index in
-
let is_whitespace =
-
Char.equal char ' '
-
|| Char.equal char '\t'
-
|| Char.equal char '\n'
-
|| Char.equal char '\r'
-
in
-
if is_whitespace
-
then
-
if in_word
-
then count_words_helper (index + 1) false (count + 1)
-
else count_words_helper (index + 1) false count
-
else count_words_helper (index + 1) true count)
-
in
-
count_words_helper 0 false 0
-
;;
-
-
let read_file file = In_channel.(with_open_bin file input_all)
-166
stack/bushel/lib/video.ml
···
-
type t =
-
{ slug : string
-
; title : string
-
; published_date : Ptime.t
-
; uuid : string
-
; description : string
-
; url : string
-
; talk : bool
-
; paper : string option
-
; project : string option
-
; tags : string list
-
}
-
-
type ts = t list
-
-
let get_shadow fs k =
-
match List.assoc_opt k fs with
-
| Some v -> Some v
-
| None -> List.assoc_opt ("_" ^ k) fs
-
;;
-
-
let get_shadow_string fs k =
-
match get_shadow fs k with
-
| Some (`String v) -> v
-
| _ -> failwith "invalid yaml"
-
;;
-
-
let get_shadow_bool fs k =
-
match get_shadow fs k with
-
| Some (`Bool v) -> v
-
| _ -> failwith "invalid yaml"
-
;;
-
-
let compare a b = Ptime.compare b.published_date a.published_date
-
let url v = v.url
-
let body { description; _ } = description
-
let title { title; _ } = title
-
let uuid { uuid; _ } = uuid
-
let paper { paper; _ } = paper
-
let project { project; _ } = project
-
let slug { slug; _ } = slug
-
let date { published_date; _ } = published_date |> Ptime.to_date
-
let datetime { published_date; _ } = published_date
-
let talk { talk; _ } = talk
-
-
let t_of_yaml ~description = function
-
| `O fields ->
-
let slug = get_shadow_string fields "uuid" in
-
let title = get_shadow_string fields "title" in
-
let published_date =
-
get_shadow_string fields "published_date"
-
|> Ptime.of_rfc3339
-
|> Result.get_ok
-
|> fun (a, _, _) -> a
-
in
-
let uuid = get_shadow_string fields "uuid" in
-
let url = get_shadow_string fields "url" in
-
let talk =
-
try get_shadow_bool fields "talk" with
-
| _ -> false
-
in
-
let tags =
-
match List.assoc_opt "tags" fields with
-
| Some l -> Ezjsonm.get_list Ezjsonm.get_string l
-
| _ -> []
-
in
-
let paper =
-
try Some (get_shadow_string fields "paper") with
-
| _ -> None
-
in
-
let project =
-
try Some (get_shadow_string fields "project") with
-
| _ -> None
-
in
-
{ slug; title; tags; published_date; uuid; description; talk; paper; project; url }
-
| _ -> failwith "invalid yaml"
-
;;
-
-
let to_yaml t =
-
`O [
-
("title", `String t.title);
-
("description", `String t.description);
-
("url", `String t.url);
-
("uuid", `String t.uuid);
-
("slug", `String t.slug);
-
("published_date", `String (Ptime.to_rfc3339 t.published_date));
-
("talk", `Bool t.talk);
-
("tags", `A (List.map (fun t -> `String t) t.tags));
-
("paper", match t.paper with None -> `Null | Some p -> `String p);
-
("project", match t.project with None -> `Null | Some p -> `String p)
-
]
-
-
let to_file output_dir t =
-
let file_path = Fpath.v (Filename.concat output_dir (t.uuid ^ ".md")) in
-
let yaml = to_yaml t in
-
let yaml_str = Yaml.to_string_exn yaml in
-
let content = "---\n" ^ yaml_str ^ "---\n" in
-
Bos.OS.File.write file_path content
-
;;
-
-
let of_md fname =
-
(* TODO fix Jekyll_post to not error on no date *)
-
let fname' = "2000-01-01-" ^ Filename.basename fname in
-
match Jekyll_post.of_string ~fname:fname' (Util.read_file fname) with
-
| Error (`Msg m) -> failwith ("paper_of_md: " ^ m)
-
| Ok jp ->
-
let fields = jp.Jekyll_post.fields |> Jekyll_format.fields_to_yaml in
-
let { Jekyll_post.body; _ } = jp in
-
t_of_yaml ~description:body fields
-
;;
-
-
(* TODO:claude *)
-
let typesense_schema =
-
let open Ezjsonm in
-
dict [
-
("name", string "videos");
-
("fields", list (fun d -> dict d) [
-
[("name", string "id"); ("type", string "string")];
-
[("name", string "title"); ("type", string "string")];
-
[("name", string "description"); ("type", string "string")];
-
[("name", string "published_date"); ("type", string "string")];
-
[("name", string "date"); ("type", string "string")];
-
[("name", string "date_timestamp"); ("type", string "int64")];
-
[("name", string "tags"); ("type", string "string[]"); ("facet", bool true)];
-
[("name", string "url"); ("type", string "string")];
-
[("name", string "uuid"); ("type", string "string")];
-
[("name", string "is_talk"); ("type", string "bool")];
-
[("name", string "paper"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "project"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "video_url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "embed_url"); ("type", string "string"); ("optional", bool true)];
-
[("name", string "duration"); ("type", string "int32"); ("optional", bool true)];
-
[("name", string "channel"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "platform"); ("type", string "string"); ("facet", bool true); ("optional", bool true)];
-
[("name", string "views"); ("type", string "int32"); ("optional", bool true)];
-
[("name", string "related_papers"); ("type", string "string[]"); ("optional", bool true)];
-
[("name", string "related_talks"); ("type", string "string[]"); ("optional", bool true)];
-
]);
-
("default_sorting_field", string "date_timestamp");
-
]
-
-
(** TODO:claude Pretty-print a video with ANSI formatting *)
-
let pp ppf v =
-
let open Fmt in
-
pf ppf "@[<v>";
-
pf ppf "%a: %a@," (styled `Bold string) "Type" (styled `Cyan string) "Video";
-
pf ppf "%a: %a@," (styled `Bold string) "Slug" string (slug v);
-
pf ppf "%a: %a@," (styled `Bold string) "UUID" string (uuid v);
-
pf ppf "%a: %a@," (styled `Bold string) "Title" string (title v);
-
let (year, month, day) = date v in
-
pf ppf "%a: %04d-%02d-%02d@," (styled `Bold string) "Date" year month day;
-
pf ppf "%a: %a@," (styled `Bold string) "URL" string (url v);
-
pf ppf "%a: %b@," (styled `Bold string) "Talk" (talk v);
-
(match paper v with
-
| Some p -> pf ppf "%a: %a@," (styled `Bold string) "Paper" string p
-
| None -> ());
-
(match project v with
-
| Some p -> pf ppf "%a: %a@," (styled `Bold string) "Project" string p
-
| None -> ());
-
let t = v.tags in
-
if t <> [] then
-
pf ppf "%a: @[<h>%a@]@," (styled `Bold string) "Tags" (list ~sep:comma string) t;
-
pf ppf "@,";
-
pf ppf "%a:@," (styled `Bold string) "Description";
-
pf ppf "%a@," string v.description;
-
pf ppf "@]"
-32
stack/bushel/lib/video.mli
···
-
type t =
-
{ slug : string
-
; title : string
-
; published_date : Ptime.t
-
; uuid : string
-
; description : string
-
; url : string
-
; talk : bool
-
; paper : string option
-
; project : string option
-
; tags : string list
-
}
-
-
type ts = t list
-
-
val compare : t -> t -> int
-
val url : t -> string
-
val body : t -> string
-
val title : t -> string
-
val uuid : t -> string
-
val paper : t -> string option
-
val project : t -> string option
-
val slug : t -> string
-
val date : t -> Ptime.date
-
val datetime : t -> Ptime.t
-
val talk : t -> bool
-
val of_md : string -> t
-
val t_of_yaml : description:string -> Yaml.value -> t
-
val to_yaml : t -> Yaml.value
-
val to_file : string -> t -> (unit, [> `Msg of string]) result
-
val typesense_schema : Ezjsonm.value
-
val pp : Format.formatter -> t -> unit
+2 -1
stack/cacheio/cacheio.opam
···
"dune" {>= "3.16"}
"eio"
"cmdliner" {>= "2.0.0"}
-
"yojson"
+
"jsont"
+
"bytesrw"
"ptime"
"logs"
"fmt"
+2 -1
stack/cacheio/dune-project
···
dune
eio
(cmdliner (>= 2.0.0))
-
yojson
+
jsont
+
bytesrw
ptime
logs
fmt
+1 -1
stack/cacheio/lib/dune
···
(public_name cacheio)
(name cacheio)
(modules cacheio flags entry stats range chunk)
-
(libraries eio eio_main digestif yojson ptime ptime.clock.os logs fmt xdge cstruct))
+
(libraries eio eio_main digestif jsont jsont.bytesrw ptime ptime.clock.os logs fmt xdge cstruct))
(library
(public_name cacheio.cmd)
+23 -1
stack/cacheio/lib/entry.ml
···
(match t.ttl with
| None -> "never"
| Some exp -> Printf.sprintf "%.1f" exp)
-
Flags.pp t.flags
+
Flags.pp t.flags
+
+
(* Jsont support *)
+
+
(* Helper codec for int64 *)
+
let int64_jsont =
+
let kind = "Int64" in
+
let doc = "64-bit integer as number" in
+
let dec n = Int64.of_float n in
+
let enc i = Int64.to_float i in
+
Jsont.map ~kind ~doc ~dec ~enc Jsont.number
+
+
let jsont =
+
let kind = "Entry" in
+
let doc = "A cache entry" in
+
let make key size mtime ttl flags = { key; size; mtime; ttl; flags } in
+
Jsont.Object.map ~kind ~doc make
+
|> Jsont.Object.mem "key" Jsont.string ~enc:key
+
|> Jsont.Object.mem "size" int64_jsont ~enc:size
+
|> Jsont.Object.mem "mtime" Jsont.number ~enc:mtime
+
|> Jsont.Object.opt_mem "ttl" Jsont.number ~enc:ttl
+
|> Jsont.Object.mem "flags" Flags.jsont ~enc:flags
+
|> Jsont.Object.finish
+6 -1
stack/cacheio/lib/entry.mli
···
(** {1 Pretty Printing} *)
(** Pretty printer for entries *)
-
val pp : Format.formatter -> t -> unit
+
val pp : Format.formatter -> t -> unit
+
+
(** {1 JSON Support} *)
+
+
(** Jsont codec for cache entries *)
+
val jsont : t Jsont.t
+31 -1
stack/cacheio/lib/flags.ml
···
| `Pinned -> "P"
| `Stale -> "S"
| `Temporary -> "T"
-
| `Chunk -> "C") flags))
+
| `Chunk -> "C") flags))
+
+
(* Jsont support *)
+
+
(* JSON codec for individual flags - using string representation *)
+
let flag_jsont =
+
let kind = "Flag" in
+
let doc = "A cache entry flag" in
+
let dec s =
+
match s with
+
| "pinned" -> `Pinned
+
| "stale" -> `Stale
+
| "temporary" -> `Temporary
+
| "chunk" -> `Chunk
+
| _ -> Jsont.Error.msg Jsont.Meta.none "Invalid flag value"
+
in
+
let enc = function
+
| `Pinned -> "pinned"
+
| `Stale -> "stale"
+
| `Temporary -> "temporary"
+
| `Chunk -> "chunk"
+
in
+
Jsont.map ~kind ~doc ~dec ~enc Jsont.string
+
+
(* JSON codec for flag set *)
+
let jsont =
+
let kind = "Flags" in
+
let doc = "A set of cache entry flags" in
+
let dec lst = of_list lst in
+
let enc t = to_list t in
+
Jsont.map ~kind ~doc ~dec ~enc (Jsont.list flag_jsont)
+6 -1
stack/cacheio/lib/flags.mli
···
val pp_flag : Format.formatter -> flag -> unit
(** Pretty printer for flag sets *)
-
val pp : Format.formatter -> t -> unit
+
val pp : Format.formatter -> t -> unit
+
+
(** {1 JSON Support} *)
+
+
(** Jsont codec for flags *)
+
val jsont : t Jsont.t
+26 -1
stack/cacheio/lib/stats.ml
···
t.expired_count
t.pinned_count
t.stale_count
-
t.temporary_count
+
t.temporary_count
+
+
(* Jsont support *)
+
+
(* Helper codec for int64 *)
+
let int64_jsont =
+
let kind = "Int64" in
+
let doc = "64-bit integer as number" in
+
let dec n = Int64.of_float n in
+
let enc i = Int64.to_float i in
+
Jsont.map ~kind ~doc ~dec ~enc Jsont.number
+
+
let jsont =
+
let kind = "Stats" in
+
let doc = "Cache statistics" in
+
let make total_size entry_count expired_count pinned_count stale_count temporary_count =
+
{ total_size; entry_count; expired_count; pinned_count; stale_count; temporary_count }
+
in
+
Jsont.Object.map ~kind ~doc make
+
|> Jsont.Object.mem "total_size" int64_jsont ~enc:total_size
+
|> Jsont.Object.mem "entry_count" Jsont.int ~enc:entry_count
+
|> Jsont.Object.mem "expired_count" Jsont.int ~enc:expired_count
+
|> Jsont.Object.mem "pinned_count" Jsont.int ~enc:pinned_count
+
|> Jsont.Object.mem "stale_count" Jsont.int ~enc:stale_count
+
|> Jsont.Object.mem "temporary_count" Jsont.int ~enc:temporary_count
+
|> Jsont.Object.finish
+6 -1
stack/cacheio/lib/stats.mli
···
(** {1 Pretty Printing} *)
(** Pretty printer for statistics *)
-
val pp : Format.formatter -> t -> unit
+
val pp : Format.formatter -> t -> unit
+
+
(** {1 JSON Support} *)
+
+
(** Jsont codec for cache statistics *)
+
val jsont : t Jsont.t
+1 -1
stack/immich/dune
···
(library
(name immich)
(public_name immich)
-
(libraries eio eio.core requests requests_json_api ezjsonm fmt ptime uri))
+
(libraries eio eio.core requests requests_json_api jsont jsont.bytesrw fmt ptime uri))
+2 -1
stack/immich/dune-project
···
eio
(eio_main (>= 1.0))
requests
-
ezjsonm
+
jsont
+
bytesrw
fmt
ptime
uri))
+46 -40
stack/immich/immich.ml
···
(** {1 Types} *)
-
type 'net t_internal = {
+
type t = {
base_url: string;
api_key: string;
-
requests_session: (float Eio.Time.clock_ty Eio.Resource.t, 'net Eio.Net.ty Eio.Resource.t) Requests.t;
+
requests_session: Requests.t;
}
-
-
type t = [`Generic | `Unix] t_internal
type person = {
id: string;
···
birth_date: string option;
thumbnail_path: string;
is_hidden: bool;
-
}
-
-
type people_response = {
-
total: int;
-
visible: int;
-
people: person list;
+
unknown: Jsont.json;
}
(** {1 Client Creation} *)
···
let requests_session = Requests.set_default_header requests_session "x-api-key" api_key in
{ base_url; api_key; requests_session }
-
(** {1 JSON Parsing} *)
+
(** {1 JSON Codecs} *)
-
(* Parse a single person from JSON *)
-
let parse_person json =
-
let open Ezjsonm in
-
let id = find json ["id"] |> get_string in
-
let name = find json ["name"] |> get_string in
-
let birth_date =
-
try Some (find json ["birthDate"] |> get_string)
-
with _ -> None
+
(* Jsont codec for person *)
+
let person_jsont =
+
let make id name birth_date thumbnail_path is_hidden unknown =
+
{ id; name; birth_date; thumbnail_path; is_hidden; unknown }
in
-
let thumbnail_path = find json ["thumbnailPath"] |> get_string in
-
let is_hidden =
-
try find json ["isHidden"] |> get_bool
-
with _ -> false
-
in
-
{ id; name; birth_date; thumbnail_path; is_hidden }
+
let id p = p.id in
+
let name p = p.name in
+
let birth_date p = p.birth_date in
+
let thumbnail_path p = p.thumbnail_path in
+
let is_hidden p = p.is_hidden in
+
let unknown p = p.unknown in
+
Jsont.Object.map ~kind:"Person" make
+
|> Jsont.Object.mem "id" Jsont.string ~enc:id
+
|> Jsont.Object.mem "name" Jsont.string ~enc:name
+
|> Jsont.Object.opt_mem "birthDate" Jsont.string ~enc:birth_date
+
|> Jsont.Object.mem "thumbnailPath" Jsont.string ~enc:thumbnail_path
+
|> Jsont.Object.mem "isHidden" Jsont.bool ~enc:is_hidden
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:unknown
+
|> Jsont.Object.finish
-
(* Parse people response from JSON *)
-
let parse_people_response json =
-
let open Ezjsonm in
-
let total = find json ["total"] |> get_int in
-
let visible = find json ["visible"] |> get_int in
-
let people_json = find json ["people"] in
-
let people = get_list parse_person people_json in
-
{ total; visible; people }
+
type people_response = {
+
total: int;
+
visible: int;
+
people: person list;
+
unknown: Jsont.json;
+
}
-
(* Parse a list of people from search results *)
-
let parse_person_list json =
-
let open Ezjsonm in
-
get_list parse_person json
+
(* Jsont codec for people_response *)
+
let people_response_jsont =
+
let make total visible people unknown =
+
{ total; visible; people; unknown }
+
in
+
let total r = r.total in
+
let visible r = r.visible in
+
let people r = r.people in
+
let unknown r = r.unknown in
+
Jsont.Object.map ~kind:"PeopleResponse" make
+
|> Jsont.Object.mem "total" Jsont.int ~enc:total
+
|> Jsont.Object.mem "visible" Jsont.int ~enc:visible
+
|> Jsont.Object.mem "people" (Jsont.list person_jsont) ~enc:people
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:unknown
+
|> Jsont.Object.finish
(** {1 API Functions} *)
let fetch_people { base_url; requests_session; _ } =
let open Requests_json_api in
let url = base_url / "api/people" in
-
get_json_exn requests_session url parse_people_response
+
get_json_exn requests_session url people_response_jsont
let fetch_person { base_url; requests_session; _ } ~person_id =
let open Requests_json_api in
let url = base_url / "api/people" / person_id in
-
get_json_exn requests_session url parse_person
+
get_json_exn requests_session url person_jsont
let download_thumbnail { base_url; requests_session; _ } ~fs ~person_id ~output_path =
try
···
let open Requests_json_api in
let encoded_name = Uri.pct_encode name in
let url = sprintf "%s/api/search/person?name=%s" base_url encoded_name in
-
get_json_exn requests_session url parse_person_list
+
get_json_exn requests_session url (Jsont.list person_jsont)
+3 -1
stack/immich/immich.mli
···
birth_date: string option;
thumbnail_path: string;
is_hidden: bool;
+
unknown: Jsont.json; (** Unknown/extra JSON fields preserved during parsing *)
}
(** Type for the people API response *)
···
total: int;
visible: int;
people: person list;
+
unknown: Jsont.json; (** Unknown/extra JSON fields preserved during parsing *)
}
(** {1 Client Creation} *)
···
@return An Immich client configured for the specified instance
*)
val create :
-
requests_session:(float Eio.Time.clock_ty Eio.Resource.t, [`Generic | `Unix] Eio.Net.ty Eio.Resource.t) Requests.t ->
+
requests_session:Requests.t ->
base_url:string ->
api_key:string ->
t
+2 -1
stack/immich/immich.opam
···
"eio"
"eio_main" {>= "1.0"}
"requests"
-
"ezjsonm"
+
"jsont"
+
"bytesrw"
"fmt"
"ptime"
"uri"
+42 -37
stack/karakeep/bin/karakeep_cli.ml
···
let bookmarks =
List.filter (fun (b : Karakeep.bookmark) ->
(match archived with
-
| Some true -> b.archived
-
| Some false -> not b.archived
+
| Some true -> Karakeep.bookmark_archived b
+
| Some false -> not (Karakeep.bookmark_archived b)
| None -> true) &&
(match favourited with
-
| Some true -> b.favourited
-
| Some false -> not b.favourited
+
| Some true -> Karakeep.bookmark_favourited b
+
| Some false -> not (Karakeep.bookmark_favourited b)
| None -> true)
) bookmarks
in
···
Printf.printf "Found %d bookmarks\n\n" (List.length bookmarks);
List.iteri (fun i (b : Karakeep.bookmark) ->
-
Printf.printf "%d. %s\n" (i + 1) b.url;
-
(match b.title with
+
Printf.printf "%d. %s\n" (i + 1) (Karakeep.bookmark_url b);
+
(match Karakeep.bookmark_title b with
| Some title -> Printf.printf " Title: %s\n" title
| None -> ());
-
Printf.printf " ID: %s\n" b.id;
-
Printf.printf " Created: %s\n" (Ptime.to_rfc3339 b.created_at);
-
if b.tags <> [] then
-
Printf.printf " Tags: %s\n" (String.concat ", " b.tags);
-
if b.archived then Printf.printf " [ARCHIVED]\n";
-
if b.favourited then Printf.printf " [FAVOURITED]\n";
-
(match b.summary with
+
Printf.printf " ID: %s\n" (Karakeep.bookmark_id b);
+
Printf.printf " Created: %s\n" (Ptime.to_rfc3339 (Karakeep.bookmark_created_at b));
+
let tags = Karakeep.bookmark_tags b in
+
if tags <> [] then
+
Printf.printf " Tags: %s\n" (String.concat ", " tags);
+
if Karakeep.bookmark_archived b then Printf.printf " [ARCHIVED]\n";
+
if Karakeep.bookmark_favourited b then Printf.printf " [FAVOURITED]\n";
+
(match Karakeep.bookmark_summary b with
| Some s when s <> "" ->
let summary = if String.length s > 100 then String.sub s 0 100 ^ "..." else s in
Printf.printf " Summary: %s\n" summary
···
let client = Karakeep.create ~sw ~env ~api_key ~base_url in
let bookmark = Karakeep.fetch_bookmark_details client bookmark_id in
-
Printf.printf "Bookmark: %s\n" bookmark.url;
-
Printf.printf "ID: %s\n" bookmark.id;
-
(match bookmark.title with
+
Printf.printf "Bookmark: %s\n" (Karakeep.bookmark_url bookmark);
+
Printf.printf "ID: %s\n" (Karakeep.bookmark_id bookmark);
+
(match Karakeep.bookmark_title bookmark with
| Some title -> Printf.printf "Title: %s\n" title
| None -> ());
-
(match bookmark.note with
+
(match Karakeep.bookmark_note bookmark with
| Some note -> Printf.printf "Note: %s\n" note
| None -> ());
-
Printf.printf "Created: %s\n" (Ptime.to_rfc3339 bookmark.created_at);
-
(match bookmark.updated_at with
+
Printf.printf "Created: %s\n" (Ptime.to_rfc3339 (Karakeep.bookmark_created_at bookmark));
+
(match Karakeep.bookmark_updated_at bookmark with
| Some t -> Printf.printf "Updated: %s\n" (Ptime.to_rfc3339 t)
| None -> ());
-
if bookmark.tags <> [] then
-
Printf.printf "Tags: %s\n" (String.concat ", " bookmark.tags);
+
let tags = Karakeep.bookmark_tags bookmark in
+
if tags <> [] then
+
Printf.printf "Tags: %s\n" (String.concat ", " tags);
-
if bookmark.archived then Printf.printf "Status: ARCHIVED\n";
-
if bookmark.favourited then Printf.printf "Status: FAVOURITED\n";
+
if Karakeep.bookmark_archived bookmark then Printf.printf "Status: ARCHIVED\n";
+
if Karakeep.bookmark_favourited bookmark then Printf.printf "Status: FAVOURITED\n";
-
(match bookmark.summary with
+
(match Karakeep.bookmark_summary bookmark with
| Some s when s <> "" -> Printf.printf "\nSummary:\n%s\n" s
| _ -> ());
-
if bookmark.content <> [] then begin
+
let content = Karakeep.bookmark_content bookmark in
+
if content <> [] then begin
Printf.printf "\nContent metadata:\n";
List.iter (fun (k, v) ->
if v <> "null" && v <> "" then
Printf.printf " %s: %s\n" k v
-
) bookmark.content
+
) content
end;
-
if bookmark.assets <> [] then begin
+
let assets = Karakeep.bookmark_assets bookmark in
+
if assets <> [] then begin
Printf.printf "\nAssets:\n";
List.iter (fun (id, asset_type) ->
Printf.printf " %s (%s)\n" id asset_type;
Printf.printf " URL: %s\n" (Karakeep.get_asset_url client id)
-
) bookmark.assets
+
) assets
end;
0
···
in
Printf.printf "โœ“ Bookmark created successfully!\n";
-
Printf.printf "ID: %s\n" bookmark.id;
-
Printf.printf "URL: %s\n" bookmark.url;
-
(match bookmark.title with
+
Printf.printf "ID: %s\n" (Karakeep.bookmark_id bookmark);
+
Printf.printf "URL: %s\n" (Karakeep.bookmark_url bookmark);
+
(match Karakeep.bookmark_title bookmark with
| Some t -> Printf.printf "Title: %s\n" t
| None -> ());
-
if bookmark.tags <> [] then
-
Printf.printf "Tags: %s\n" (String.concat ", " bookmark.tags);
+
let tags = Karakeep.bookmark_tags bookmark in
+
if tags <> [] then
+
Printf.printf "Tags: %s\n" (String.concat ", " tags);
0
with exn ->
Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
···
(if List.length bookmarks = 1 then "" else "s");
List.iteri (fun i (b : Karakeep.bookmark) ->
-
Printf.printf "%d. %s\n" (i + 1) b.url;
-
(match b.title with
+
Printf.printf "%d. %s\n" (i + 1) (Karakeep.bookmark_url b);
+
(match Karakeep.bookmark_title b with
| Some title -> Printf.printf " Title: %s\n" title
| None -> ());
-
Printf.printf " ID: %s\n" b.id;
-
Printf.printf " Tags: %s\n" (String.concat ", " b.tags);
+
Printf.printf " ID: %s\n" (Karakeep.bookmark_id b);
+
Printf.printf " Tags: %s\n" (String.concat ", " (Karakeep.bookmark_tags b));
Printf.printf "\n"
) bookmarks;
0
+1 -1
stack/karakeep/dune
···
(library
(name karakeep)
(public_name karakeep)
-
(libraries bushel eio eio.core requests requests_json_api ezjsonm fmt ptime uri logs logs.fmt))
+
(libraries eio eio.core requests requests_json_api jsont jsont.bytesrw fmt ptime uri logs logs.fmt))
-1
stack/karakeep/dune-project
···
eio
(eio_main (>= 1.0))
requests
-
ezjsonm
fmt
ptime
uri
+315 -275
stack/karakeep/karakeep.ml
···
(** Karakeep API client implementation (Eio version) *)
-
module J = Ezjsonm
-
let src = Logs.Src.create "karakeepe" ~doc:"Karakeep API client"
module Log = (val Logs.src_log src : Logs.LOG)
+
(** RFC 3339 timestamp support for JSON *)
+
module Rfc3339 = struct
+
let parse s =
+
Ptime.of_rfc3339 s |> Result.to_option |> Option.map (fun (t, _, _) -> t)
+
+
let format t = Ptime.to_rfc3339 ~frac_s:6 ~tz_offset_s:0 t
+
let _pp ppf t = Format.pp_print_string ppf (format t)
+
+
let jsont =
+
let kind = "RFC 3339 timestamp" in
+
let dec meta s =
+
match parse s with
+
| Some t -> t
+
| None ->
+
Jsont.Error.msgf meta "invalid RFC 3339 timestamp: %S" s
+
in
+
let enc = Jsont.Base.enc format in
+
Jsont.Base.string (Jsont.Base.map ~kind ~dec ~enc ())
+
end
+
+
(** Unknown JSON fields - used when keeping unknown members *)
+
let json_mems_empty = Jsont.Object ([], Jsont.Meta.none)
+
(** Type representing a Karakeep client session *)
-
type 'net t_internal = {
+
type t = {
api_key: string;
base_url: string;
-
http_client: (float Eio.Time.clock_ty Eio.Resource.t, 'net Eio.Net.ty Eio.Resource.t) Requests.t;
+
http_client: Requests.t;
}
-
-
type t = [`Generic | `Unix] t_internal
(** Create a new Karakeep client *)
let create ~sw ~env ~api_key ~base_url : t =
let http_client = Requests.create ~sw env in
{ api_key; base_url; http_client }
-
(** Type representing a Karakeep bookmark *)
-
type bookmark = {
-
id: string;
-
title: string option;
-
url: string;
-
note: string option;
-
created_at: Ptime.t;
-
updated_at: Ptime.t option;
-
favourited: bool;
-
archived: bool;
-
tags: string list;
-
tagging_status: string option;
-
summary: string option;
-
content: (string * string) list;
-
assets: (string * string) list;
-
}
+
(** Tag type for bookmark tags *)
+
module Tag = struct
+
type t = {
+
name: string;
+
unknown: Jsont.json;
+
}
-
(** Type for Karakeep API response containing bookmarks *)
-
type bookmark_response = {
-
total: int;
-
data: bookmark list;
-
next_cursor: string option;
-
}
+
let make name unknown = { name; unknown }
+
let name t = t.name
+
let unknown t = t.unknown
-
(** Parse a date string to Ptime.t, defaulting to epoch if invalid *)
-
let parse_date str =
-
match Ptime.of_rfc3339 str with
-
| Ok (date, _, _) -> date
-
| Error _ ->
-
Fmt.epr "Warning: could not parse date '%s'\n" str;
-
(* Default to epoch time *)
-
let span_opt = Ptime.Span.of_d_ps (0, 0L) in
-
match span_opt with
-
| None -> failwith "Internal error: couldn't create epoch time span"
-
| Some span ->
-
match Ptime.of_span span with
-
| Some t -> t
-
| None -> failwith "Internal error: couldn't create epoch time"
+
let jsont =
+
let kind = "Tag" in
+
Jsont.Object.map ~kind make
+
|> Jsont.Object.mem "name" Jsont.string ~enc:name
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:unknown
+
|> Jsont.Object.finish
+
end
-
(** Extract a string field from JSON, returns None if not present or not a string *)
-
let get_string_opt json path =
-
try Some (J.find json path |> J.get_string)
-
with _ -> None
+
(** Content field pair (key-value from content object) *)
+
module ContentField = struct
+
type _t = string * string
-
(** Extract a string list field from JSON, returns empty list if not present *)
-
let get_string_list json path =
-
try
-
let items_json = J.find json path in
-
J.get_list (fun tag -> J.find tag ["name"] |> J.get_string) items_json
-
with _ -> []
+
let _key (k, _) = k
+
let _value (_, v) = v
-
(** Extract a boolean field from JSON, with default value *)
-
let get_bool_def json path default =
-
try J.find json path |> J.get_bool
-
with _ -> default
+
(* Helper to convert Jsont.json to string *)
+
let json_to_string = function
+
| Jsont.String (s, _) -> s
+
| Jsont.Bool (b, _) -> string_of_bool b
+
| Jsont.Number (n, _) -> string_of_float n
+
| Jsont.Null _ -> "null"
+
| _ -> "complex_value"
-
(** Parse a single bookmark from Karakeep JSON *)
-
let parse_bookmark json =
-
let id =
-
try J.find json ["id"] |> J.get_string
-
with e ->
-
Log.err (fun m -> m "Error parsing bookmark ID: %s@.JSON: %s"
-
(Printexc.to_string e) (J.value_to_string json));
-
failwith "Unable to parse bookmark ID"
-
in
+
(* Decode from JSON object members *)
+
let of_json_mems mems =
+
List.map (fun ((k, _meta), v) -> (k, json_to_string v)) mems
+
+
(* Encode to JSON object members *)
+
let to_json_mems fields =
+
List.map (fun (k, v) -> ((k, Jsont.Meta.none), Jsont.String (v, Jsont.Meta.none))) fields
+
end
+
+
(** Asset type *)
+
module Asset = struct
+
type t = {
+
id: string;
+
asset_type: string;
+
unknown: Jsont.json;
+
}
+
+
let make id asset_type unknown = { id; asset_type; unknown }
+
let id t = t.id
+
let asset_type t = t.asset_type
+
let unknown t = t.unknown
-
let title =
-
try Some (J.find json ["title"] |> J.get_string)
-
with _ -> None
-
in
+
let jsont =
+
let kind = "Asset" in
+
Jsont.Object.map ~kind make
+
|> Jsont.Object.mem "id" Jsont.string ~enc:id
+
|> Jsont.Object.mem "assetType" Jsont.string ~enc:asset_type
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:unknown
+
|> Jsont.Object.finish
+
end
-
let url =
-
try J.find json ["url"] |> J.get_string
-
with _ -> try
-
J.find json ["content"; "url"] |> J.get_string
-
with _ -> try
-
J.find json ["content"; "sourceUrl"] |> J.get_string
-
with _ ->
-
match J.find_opt json ["content"; "type"] with
-
| Some (`String "asset") ->
-
(try J.find json ["content"; "sourceUrl"] |> J.get_string
-
with _ ->
-
(match J.find_opt json ["id"] with
-
| Some (`String id) -> "karakeep-asset://" ^ id
-
| _ -> failwith "No URL or asset ID found in bookmark"))
-
| _ ->
-
Log.err (fun m -> m "No URL found in bookmark@.JSON structure: %s"
-
(J.value_to_string json));
-
failwith "No URL found in bookmark"
-
in
+
(** Karakeep bookmark *)
+
module Bookmark = struct
+
type t = {
+
id: string;
+
title: string option;
+
url: string;
+
note: string option;
+
created_at: Ptime.t;
+
updated_at: Ptime.t option;
+
favourited: bool;
+
archived: bool;
+
tags: string list;
+
tagging_status: string option;
+
summary: string option;
+
content: (string * string) list;
+
assets: (string * string) list;
+
}
-
let note = get_string_opt json ["note"] in
+
let id t = t.id
+
let title t = t.title
+
let url t = t.url
+
let note t = t.note
+
let created_at t = t.created_at
+
let updated_at t = t.updated_at
+
let favourited t = t.favourited
+
let archived t = t.archived
+
let tags t = t.tags
+
let tagging_status t = t.tagging_status
+
let summary t = t.summary
+
let content t = t.content
+
let assets t = t.assets
-
let created_at =
-
try J.find json ["createdAt"] |> J.get_string |> parse_date
-
with _ ->
-
try J.find json ["created_at"] |> J.get_string |> parse_date
-
with _ -> failwith "No creation date found"
-
in
+
let jsont =
+
let kind = "Bookmark" in
-
let updated_at =
-
try Some (J.find json ["updatedAt"] |> J.get_string |> parse_date)
-
with _ ->
-
try Some (J.find json ["modifiedAt"] |> J.get_string |> parse_date)
-
with _ -> None
-
in
+
(* Constructor for decoding *)
+
let make id title url note created_at updated_at favourited archived
+
tag_objs tagging_status summary content_obj assets_objs _unknown =
-
let favourited = get_bool_def json ["favourited"] false in
-
let archived = get_bool_def json ["archived"] false in
-
let tags = get_string_list json ["tags"] in
-
let tagging_status = get_string_opt json ["taggingStatus"] in
-
let summary = get_string_opt json ["summary"] in
+
(* Extract tag names from tag objects *)
+
let tags = match tag_objs with
+
| Some tags -> List.map Tag.name tags
+
| None -> []
+
in
-
let content =
-
try
-
let content_json = J.find json ["content"] in
-
let rec extract_fields acc = function
-
| [] -> acc
-
| (k, v) :: rest ->
-
let value = match v with
-
| `String s -> s
-
| `Bool b -> string_of_bool b
-
| `Float f -> string_of_float f
-
| `Null -> "null"
-
| _ -> "complex_value"
-
in
-
extract_fields ((k, value) :: acc) rest
+
(* Extract content fields from JSON object *)
+
let content = match content_obj with
+
| Some (Jsont.Object (mems, _)) -> ContentField.of_json_mems mems
+
| _ -> []
in
-
match content_json with
-
| `O fields -> extract_fields [] fields
-
| _ -> []
-
with _ -> []
-
in
-
let assets =
-
try
-
let assets_json = J.find json ["assets"] in
-
J.get_list (fun asset_json ->
-
let id = J.find asset_json ["id"] |> J.get_string in
-
let asset_type =
-
try J.find asset_json ["assetType"] |> J.get_string
-
with _ -> "unknown"
-
in
-
(id, asset_type)
-
) assets_json
-
with _ -> []
-
in
+
(* Extract asset tuples *)
+
let assets = match assets_objs with
+
| Some asset_list -> List.map (fun a -> (Asset.id a, Asset.asset_type a)) asset_list
+
| None -> []
+
in
-
{ id; title; url; note; created_at; updated_at; favourited; archived; tags;
-
tagging_status; summary; content; assets }
+
(* Handle URL extraction from content if main URL is missing *)
+
let url = match url with
+
| Some u -> u
+
| None ->
+
(* Try to find URL in content *)
+
(match List.assoc_opt "url" content with
+
| Some u -> u
+
| None ->
+
(match List.assoc_opt "sourceUrl" content with
+
| Some u -> u
+
| None ->
+
(* Check if it's an asset type *)
+
(match List.assoc_opt "type" content with
+
| Some "asset" ->
+
(match List.assoc_opt "sourceUrl" content with
+
| Some u -> u
+
| None -> "karakeep-asset://" ^ id)
+
| _ -> "unknown://no-url")))
+
in
+
+
{
+
id;
+
title;
+
url;
+
note;
+
created_at;
+
updated_at;
+
favourited = Option.value ~default:false favourited;
+
archived = Option.value ~default:false archived;
+
tags;
+
tagging_status;
+
summary;
+
content;
+
assets;
+
}
+
in
+
+
Jsont.Object.map ~kind make
+
|> Jsont.Object.mem "id" Jsont.string ~enc:id
+
|> Jsont.Object.opt_mem "title" Jsont.string ~enc:title
+
|> Jsont.Object.opt_mem "url" Jsont.string ~enc:(fun t -> Some t.url)
+
|> Jsont.Object.opt_mem "note" Jsont.string ~enc:note
+
|> Jsont.Object.mem "createdAt" Rfc3339.jsont ~enc:created_at
+
|> Jsont.Object.opt_mem "updatedAt" Rfc3339.jsont ~enc:updated_at
+
|> Jsont.Object.opt_mem "favourited" Jsont.bool ~enc:(fun t -> Some t.favourited)
+
|> Jsont.Object.opt_mem "archived" Jsont.bool ~enc:(fun t -> Some t.archived)
+
|> Jsont.Object.opt_mem "tags" (Jsont.list Tag.jsont)
+
~enc:(fun t -> if t.tags = [] then None else
+
Some (List.map (fun name -> Tag.make name json_mems_empty) t.tags))
+
|> Jsont.Object.opt_mem "taggingStatus" Jsont.string ~enc:tagging_status
+
|> Jsont.Object.opt_mem "summary" Jsont.string ~enc:summary
+
|> Jsont.Object.opt_mem "content" Jsont.json
+
~enc:(fun t -> if t.content = [] then None else
+
Some (Jsont.Object (ContentField.to_json_mems t.content, Jsont.Meta.none)))
+
|> Jsont.Object.opt_mem "assets" (Jsont.list Asset.jsont)
+
~enc:(fun t -> if t.assets = [] then None else
+
Some (List.map (fun (id, asset_type) ->
+
Asset.make id asset_type json_mems_empty) t.assets))
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:(fun _ -> json_mems_empty)
+
|> Jsont.Object.finish
+
end
+
+
(* Compatibility type aliases and accessors *)
+
type bookmark = Bookmark.t
+
let bookmark_id = Bookmark.id
+
let bookmark_title = Bookmark.title
+
let bookmark_url = Bookmark.url
+
let bookmark_note = Bookmark.note
+
let bookmark_created_at = Bookmark.created_at
+
let bookmark_updated_at = Bookmark.updated_at
+
let bookmark_favourited = Bookmark.favourited
+
let bookmark_archived = Bookmark.archived
+
let bookmark_tags = Bookmark.tags
+
let bookmark_tagging_status = Bookmark.tagging_status
+
let bookmark_summary = Bookmark.summary
+
let bookmark_content = Bookmark.content
+
let bookmark_assets = Bookmark.assets
+
+
(** Karakeep API response containing bookmarks *)
+
module BookmarkResponse = struct
+
type t = {
+
total: int;
+
data: bookmark list;
+
next_cursor: string option;
+
}
+
+
let make total data next_cursor = { total; data; next_cursor }
+
let total t = t.total
+
let data t = t.data
+
let next_cursor t = t.next_cursor
+
+
(* Format 1: {total, data, nextCursor} *)
+
let format1_jsont =
+
let kind = "BookmarkResponse" in
+
let make total data next_cursor _unknown =
+
{ total; data; next_cursor }
+
in
+
Jsont.Object.map ~kind make
+
|> Jsont.Object.mem "total" Jsont.int ~enc:total
+
|> Jsont.Object.mem "data" (Jsont.list Bookmark.jsont) ~enc:data
+
|> Jsont.Object.opt_mem "nextCursor" Jsont.string ~enc:next_cursor
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:(fun _ -> json_mems_empty)
+
|> Jsont.Object.finish
+
+
(* Format 2: {bookmarks, nextCursor} *)
+
let format2_jsont =
+
let kind = "BookmarkResponse" in
+
let make data next_cursor _unknown =
+
{ total = List.length data; data; next_cursor }
+
in
+
Jsont.Object.map ~kind make
+
|> Jsont.Object.mem "bookmarks" (Jsont.list Bookmark.jsont) ~enc:data
+
|> Jsont.Object.opt_mem "nextCursor" Jsont.string ~enc:next_cursor
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:(fun _ -> json_mems_empty)
+
|> Jsont.Object.finish
+
end
+
+
(* Compatibility type aliases and accessors *)
+
type bookmark_response = BookmarkResponse.t
+
let response_total = BookmarkResponse.total
+
let response_data = BookmarkResponse.data
+
let response_next_cursor = BookmarkResponse.next_cursor
(** Parse a Karakeep bookmark response - handles multiple API response formats *)
-
let parse_bookmark_response json =
-
Log.debug (fun m -> m "Parsing API response: %s" (J.value_to_string json));
+
let parse_bookmark_response json_str =
+
Log.debug (fun m -> m "Parsing API response (%d bytes)" (String.length json_str));
(* Try format 1: {total: int, data: [...], nextCursor?: string} *)
let try_format1 () =
Log.debug (fun m -> m "Trying format 1: {total, data, nextCursor}");
-
let total = J.find json ["total"] |> J.get_int in
-
let bookmarks_json = J.find json ["data"] in
-
let data = J.get_list parse_bookmark bookmarks_json in
-
let next_cursor =
-
try Some (J.find json ["nextCursor"] |> J.get_string)
-
with _ -> None
-
in
-
Log.debug (fun m -> m "Successfully parsed format 1: %d bookmarks" (List.length data));
-
{ total; data; next_cursor }
+
match Jsont_bytesrw.decode_string' BookmarkResponse.format1_jsont json_str with
+
| Ok response ->
+
Log.debug (fun m -> m "Successfully parsed format 1: %d bookmarks" (List.length response.data));
+
response
+
| Error e ->
+
Log.debug (fun m -> m "Format 1 failed: %s" (Jsont.Error.to_string e));
+
raise Not_found
in
-
(* Try format 2: {bookmarks: [...], nextCursor?: string} - no total field *)
+
(* Try format 2: {bookmarks: [...], nextCursor?: string} *)
let try_format2 () =
Log.debug (fun m -> m "Trying format 2: {bookmarks, nextCursor}");
-
let bookmarks_json = J.find json ["bookmarks"] in
-
let data = J.get_list parse_bookmark bookmarks_json in
-
let next_cursor =
-
try Some (J.find json ["nextCursor"] |> J.get_string)
-
with _ -> None
-
in
-
(* Calculate total from data length when total field is missing *)
-
let total = List.length data in
-
Log.debug (fun m -> m "Successfully parsed format 2: %d bookmarks" total);
-
{ total; data; next_cursor }
+
match Jsont_bytesrw.decode_string' BookmarkResponse.format2_jsont json_str with
+
| Ok response ->
+
Log.debug (fun m -> m "Successfully parsed format 2: %d bookmarks" (List.length response.data));
+
response
+
| Error e ->
+
Log.debug (fun m -> m "Format 2 failed: %s" (Jsont.Error.to_string e));
+
raise Not_found
in
-
(* Try format 3: API error response {error: string, message?: string} *)
-
let try_error_format () =
-
Log.debug (fun m -> m "Checking for API error response");
-
let error = J.find json ["error"] |> J.get_string in
-
let message =
-
try J.find json ["message"] |> J.get_string
-
with _ -> "Unknown error"
-
in
-
Log.err (fun m -> m "API returned error: %s - %s" error message);
-
{ total = 0; data = []; next_cursor = None }
-
in
-
-
(* Try format 4: Plain array at root level *)
+
(* Try format 3: Plain array at root level *)
let try_array_format () =
-
Log.debug (fun m -> m "Trying format 4: array at root");
-
match json with
-
| `A _ ->
-
let data = J.get_list parse_bookmark json in
+
Log.debug (fun m -> m "Trying format 3: array at root");
+
let array_jsont = Jsont.list Bookmark.jsont in
+
match Jsont_bytesrw.decode_string' array_jsont json_str with
+
| Ok data ->
Log.debug (fun m -> m "Successfully parsed array format: %d bookmarks" (List.length data));
-
{ total = List.length data; data; next_cursor = None }
-
| _ -> raise Not_found
+
BookmarkResponse.make (List.length data) data None
+
| Error e ->
+
Log.debug (fun m -> m "Array format failed: %s" (Jsont.Error.to_string e));
+
raise Not_found
in
(* Try each format in order *)
try try_format1 ()
-
with _ -> (
+
with Not_found -> (
try try_format2 ()
-
with _ -> (
-
try try_error_format ()
-
with _ -> (
-
try try_array_format ()
-
with _ ->
-
Log.err (fun m -> m "Failed to parse response in any known format");
-
Log.debug (fun m -> m "JSON keys: %s"
-
(match json with
-
| `O fields -> String.concat ", " (List.map fst fields)
-
| _ -> "not an object"));
-
{ total = 0; data = []; next_cursor = None }
-
)
+
with Not_found -> (
+
try try_array_format ()
+
with Not_found ->
+
Log.err (fun m -> m "Failed to parse response in any known format");
+
Log.debug (fun m -> m "Response preview: %s"
+
(if String.length json_str > 200 then String.sub json_str 0 200 ^ "..." else json_str));
+
BookmarkResponse.make 0 [] None
)
)
···
match Requests_json_api.check_ok response with
| Ok body_str ->
Log.debug (fun m -> m "Received %d bytes of response data" (String.length body_str));
-
(try
-
let json = J.from_string body_str in
-
parse_bookmark_response json
-
with e ->
-
Log.err (fun m -> m "JSON parsing error: %s" (Printexc.to_string e));
-
Log.debug (fun m -> m "Response body (first 200 chars): %s"
-
(if String.length body_str > 200 then String.sub body_str 0 200 ^ "..." else body_str));
-
raise e)
+
parse_bookmark_response body_str
| Error (status_code, _) ->
Log.err (fun m -> m "HTTP error %d" status_code);
failwith (Fmt.str "HTTP error: %d" status_code)
···
| _ -> all_bookmarks
in
-
(* Determine if more pages are available:
-
- If next_cursor is present, there are definitely more pages
-
- If no next_cursor and we got fewer items than page_size, we're done
-
- If no next_cursor and total is reliable (> current count), there may be more *)
+
(* Determine if more pages are available *)
let more_available =
match response.next_cursor with
| Some _ ->
···
let current_count = List.length all_bookmarks in
let got_full_page = List.length response.data = page_size in
let total_indicates_more = response.total > current_count in
-
(* If we got a full page and total indicates more, continue *)
let has_more = got_full_page && total_indicates_more in
if has_more then
Log.debug (fun m -> m "More pages likely available (%d fetched < %d total)"
···
let response = Requests.get client.http_client ~headers url in
match check_ok response with
| Ok body_str ->
-
let json = J.from_string body_str in
-
parse_bookmark json
+
(match Jsont_bytesrw.decode_string' Bookmark.jsont body_str with
+
| Ok bookmark -> bookmark
+
| Error e ->
+
failwith (Fmt.str "Failed to parse bookmark: %s" (Jsont.Error.to_string e)))
| Error (status_code, _) ->
failwith (Fmt.str "HTTP error: %d" status_code)
···
(** Create a new bookmark in Karakeep with optional tags *)
let create_bookmark client ~url ?title ?note ?tags ?(favourited=false) ?(archived=false) () =
+
let meta = Jsont.Meta.none in
let body_obj = [
-
("type", `String "link");
-
("url", `String url);
-
("favourited", `Bool favourited);
-
("archived", `Bool archived);
+
(("type", meta), Jsont.String ("link", meta));
+
(("url", meta), Jsont.String (url, meta));
+
(("favourited", meta), Jsont.Bool (favourited, meta));
+
(("archived", meta), Jsont.Bool (archived, meta));
] in
let body_obj = match title with
-
| Some title_str -> ("title", `String title_str) :: body_obj
+
| Some title_str -> (("title", meta), Jsont.String (title_str, meta)) :: body_obj
| None -> body_obj
in
let body_obj = match note with
-
| Some note_str -> ("note", `String note_str) :: body_obj
+
| Some note_str -> (("note", meta), Jsont.String (note_str, meta)) :: body_obj
| None -> body_obj
in
-
let body_json = `O body_obj in
-
let body_str = J.to_string body_json in
+
let body_json = Jsont.Object (body_obj, meta) in
+
let body_str = match Jsont_bytesrw.encode_string' Jsont.json body_json with
+
| Ok s -> s
+
| Error e -> failwith (Fmt.str "JSON encoding error: %s" (Jsont.Error.to_string e))
+
in
let headers = Requests.Headers.empty
|> Requests.Headers.set "Authorization" ("Bearer " ^ client.api_key)
···
let status_code = Requests.Response.status_code response in
if status_code = 201 || status_code = 200 then begin
let body_str = read_body response in
-
let json = J.from_string body_str in
-
let bookmark = parse_bookmark json in
+
let bookmark = match Jsont_bytesrw.decode_string' Bookmark.jsont body_str with
+
| Ok b -> b
+
| Error e -> failwith (Fmt.str "Failed to parse created bookmark: %s" (Jsont.Error.to_string e))
+
in
match tags with
| Some tag_list when tag_list <> [] ->
let tag_objects = List.map (fun tag_name ->
-
`O [("tagName", `String tag_name)]
+
Jsont.Object ([(("tagName", meta), Jsont.String (tag_name, meta))], meta)
) tag_list in
-
let tags_body = `O [("tags", `A tag_objects)] in
-
let tags_body_str = J.to_string tags_body in
+
let tags_body = Jsont.Object ([(("tags", meta), Jsont.Array (tag_objects, meta))], meta) in
+
let tags_body_str = match Jsont_bytesrw.encode_string' Jsont.json tags_body with
+
| Ok s -> s
+
| Error e -> failwith (Fmt.str "JSON encoding error: %s" (Jsont.Error.to_string e))
+
in
let tags_url = client.base_url / "api/v1/bookmarks" / bookmark.id / "tags" in
let tags_body = Requests.Body.of_string Requests.Mime.json tags_body_str in
···
let error_body = read_body response in
failwith (Fmt.str "Failed to create bookmark. HTTP error: %d. Details: %s" status_code error_body)
end
-
-
(** Convert a Karakeep bookmark to Bushel.Link.t compatible structure *)
-
let to_bushel_link ?base_url bookmark =
-
let description =
-
match bookmark.title with
-
| Some title when title <> "" -> title
-
| _ ->
-
let content_title = List.assoc_opt "title" bookmark.content in
-
match content_title with
-
| Some title when title <> "" && title <> "null" -> title
-
| _ -> bookmark.url
-
in
-
let date = Ptime.to_date bookmark.created_at in
-
-
let metadata =
-
(match bookmark.summary with Some s -> [("summary", s)] | None -> []) @
-
(List.filter_map (fun (id, asset_type) ->
-
match asset_type with
-
| "screenshot" | "bannerImage" -> Some (asset_type, id)
-
| _ -> None
-
) bookmark.assets) @
-
(List.filter_map (fun (k, v) ->
-
if k = "favicon" && v <> "" && v <> "null" then Some ("favicon", v) else None
-
) bookmark.content)
-
in
-
-
let karakeep =
-
match base_url with
-
| Some url ->
-
Some {
-
Bushel.Link.remote_url = url;
-
id = bookmark.id;
-
tags = bookmark.tags;
-
metadata = metadata;
-
}
-
| None -> None
-
in
-
-
let bushel_slugs =
-
List.filter_map (fun tag ->
-
if String.starts_with ~prefix:"bushel:" tag then
-
Some (String.sub tag 7 (String.length tag - 7))
-
else
-
None
-
) bookmark.tags
-
in
-
-
let bushel =
-
if bushel_slugs = [] then None
-
else Some { Bushel.Link.slugs = bushel_slugs; tags = [] }
-
in
-
-
{ Bushel.Link.url = bookmark.url; date; description; karakeep; bushel }
+23 -28
stack/karakeep/karakeep.mli
···
t
(** Type representing a Karakeep bookmark *)
-
type bookmark = {
-
id: string;
-
title: string option;
-
url: string;
-
note: string option;
-
created_at: Ptime.t;
-
updated_at: Ptime.t option;
-
favourited: bool;
-
archived: bool;
-
tags: string list;
-
tagging_status: string option;
-
summary: string option;
-
content: (string * string) list;
-
assets: (string * string) list;
-
}
+
type bookmark
+
+
(** Bookmark accessors *)
+
val bookmark_id : bookmark -> string
+
val bookmark_title : bookmark -> string option
+
val bookmark_url : bookmark -> string
+
val bookmark_note : bookmark -> string option
+
val bookmark_created_at : bookmark -> Ptime.t
+
val bookmark_updated_at : bookmark -> Ptime.t option
+
val bookmark_favourited : bookmark -> bool
+
val bookmark_archived : bookmark -> bool
+
val bookmark_tags : bookmark -> string list
+
val bookmark_tagging_status : bookmark -> string option
+
val bookmark_summary : bookmark -> string option
+
val bookmark_content : bookmark -> (string * string) list
+
val bookmark_assets : bookmark -> (string * string) list
(** Type for Karakeep API response containing bookmarks *)
-
type bookmark_response = {
-
total: int;
-
data: bookmark list;
-
next_cursor: string option;
-
}
+
type bookmark_response
-
(** Parse a single bookmark from Karakeep JSON *)
-
val parse_bookmark : Ezjsonm.value -> bookmark
+
(** Bookmark response accessors *)
+
val response_total : bookmark_response -> int
+
val response_data : bookmark_response -> bookmark list
+
val response_next_cursor : bookmark_response -> string option
-
(** Parse a Karakeep bookmark response *)
-
val parse_bookmark_response : Ezjsonm.value -> bookmark_response
+
(** Parse a Karakeep bookmark response from a JSON string *)
+
val parse_bookmark_response : string -> bookmark_response
(** Fetch bookmarks from a Karakeep instance with pagination support
@param client Karakeep client instance
···
t ->
string ->
bookmark
-
-
(** Convert a Karakeep bookmark to Bushel.Link.t compatible structure
-
@param base_url Optional base URL of the Karakeep instance (for karakeep_id) *)
-
val to_bushel_link : ?base_url:string -> bookmark -> Bushel.Link.t
(** Fetch an asset from the Karakeep server as a binary string
@param client Karakeep client instance
-1
stack/karakeep/karakeep.opam
···
"eio"
"eio_main" {>= "1.0"}
"requests"
-
"ezjsonm"
"fmt"
"ptime"
"uri"
+8 -4
stack/peertubee/bin/peertubee_cli.ml
···
Printf.printf " Description: %s\n" desc_short
| None -> ());
Printf.printf " Published: %s\n" (Ptime.to_rfc3339 v.published_at);
-
if v.tags <> [] then
-
Printf.printf " Tags: %s\n" (String.concat ", " v.tags);
+
(match v.tags with
+
| Some tags when tags <> [] ->
+
Printf.printf " Tags: %s\n" (String.concat ", " tags)
+
| _ -> ());
Printf.printf "\n"
) videos;
0
···
(match video.originally_published_at with
| Some t -> Printf.printf "Originally published: %s\n" (Ptime.to_rfc3339 t)
| None -> ());
-
if video.tags <> [] then
-
Printf.printf "Tags: %s\n" (String.concat ", " video.tags);
+
(match video.tags with
+
| Some tags when tags <> [] ->
+
Printf.printf "Tags: %s\n" (String.concat ", " tags)
+
| _ -> ());
(match Peertubee.thumbnail_url client video with
| Some url -> Printf.printf "Thumbnail: %s\n" url
| None -> ());
+1 -1
stack/peertubee/dune
···
(library
(name peertubee)
(public_name peertubee)
-
(libraries ezjsonm eio eio.core requests requests_json_api ptime fmt))
+
(libraries jsont jsont.bytesrw eio eio.core requests requests_json_api ptime fmt))
-1
stack/peertubee/dune-project
···
eio
(eio_main (>= 1.0))
requests
-
ezjsonm
fmt
ptime))
+87 -59
stack/peertubee/peertubee.ml
···
(** PeerTube API client implementation (Eio version) *)
-
module J = Ezjsonm
-
(** Type representing a PeerTube client *)
-
type 'net t_internal = {
+
type t = {
base_url: string;
-
requests_session: (float Eio.Time.clock_ty Eio.Resource.t, 'net Eio.Net.ty Eio.Resource.t) Requests.t;
+
requests_session: Requests.t;
}
-
-
type t = [`Generic | `Unix] t_internal
(** Create a new PeerTube client *)
let create ~requests_session ~base_url : t =
···
published_at: Ptime.t;
originally_published_at: Ptime.t option;
thumbnail_path: string option;
-
tags: string list;
+
tags: string list option;
+
unknown: Jsont.json;
}
(** Type for PeerTube API response containing videos *)
type video_response = {
total: int;
data: video list;
+
unknown: Jsont.json;
}
-
(** Parse a date string to Ptime.t, defaulting to epoch if invalid *)
-
let parse_date str =
-
match Ptime.of_rfc3339 str with
-
| Ok (date, _, _) -> date
-
| Error _ ->
-
Fmt.epr "Warning: could not parse date '%s'\n" str;
-
(* Default to epoch time *)
-
let span_opt = Ptime.Span.of_d_ps (0, 0L) in
-
match span_opt with
-
| None -> failwith "Internal error: couldn't create epoch time span"
-
| Some span ->
-
match Ptime.of_span span with
-
| Some t -> t
-
| None -> failwith "Internal error: couldn't create epoch time"
+
(** Accessor functions for video *)
+
let video_id (v : video) = v.id
+
let video_uuid (v : video) = v.uuid
+
let video_name (v : video) = v.name
+
let video_description (v : video) = v.description
+
let video_url (v : video) = v.url
+
let video_embed_path (v : video) = v.embed_path
+
let video_published_at (v : video) = v.published_at
+
let video_originally_published_at (v : video) = v.originally_published_at
+
let video_thumbnail_path (v : video) = v.thumbnail_path
+
let video_tags (v : video) = v.tags
+
let video_unknown (v : video) = v.unknown
-
(** Extract a string field from JSON, returns None if not present or not a string *)
-
let get_string_opt json path =
-
try Some (J.find json path |> J.get_string)
-
with _ -> None
+
(** Accessor functions for video_response *)
+
let video_response_total (vr : video_response) = vr.total
+
let video_response_data (vr : video_response) = vr.data
+
let video_response_unknown (vr : video_response) = vr.unknown
-
(** Extract a string list field from JSON, returns empty list if not present *)
-
let get_string_list json path =
-
try
-
let tags_json = J.find json path in
-
J.get_list J.get_string tags_json
-
with _ -> []
+
(** RFC3339 timestamp codec *)
+
module Rfc3339 = struct
+
let parse s =
+
Ptime.of_rfc3339 s |> Result.to_option |> Option.map (fun (t, _, _) -> t)
-
(** Parse a single video from PeerTube JSON *)
-
let parse_video json =
-
let id = J.find json ["id"] |> J.get_int in
-
let uuid = J.find json ["uuid"] |> J.get_string in
-
let name = J.find json ["name"] |> J.get_string in
-
let description = get_string_opt json ["description"] in
-
let url = J.find json ["url"] |> J.get_string in
-
let embed_path = J.find json ["embedPath"] |> J.get_string in
+
let format t = Ptime.to_rfc3339 ~frac_s:6 ~tz_offset_s:0 t
+
let pp ppf t = Format.pp_print_string ppf (format t)
-
(* Parse dates *)
-
let published_at =
-
J.find json ["publishedAt"] |> J.get_string |> parse_date
+
let jsont =
+
let kind = "RFC 3339 timestamp" in
+
let doc = "An RFC 3339 date-time string" in
+
let dec s =
+
match parse s with
+
| Some t -> t
+
| None ->
+
Jsont.Error.msgf Jsont.Meta.none "%s: invalid RFC 3339 timestamp: %S"
+
kind s
+
in
+
Jsont.map ~kind ~doc ~dec ~enc:format Jsont.string
+
end
+
+
(** Jsont codec for video *)
+
let video_jsont : video Jsont.t =
+
let kind = "PeerTube Video" in
+
let doc = "A PeerTube video object" in
+
+
let make_video id uuid name description url embed_path published_at
+
originally_published_at thumbnail_path tags unknown : video =
+
{ id; uuid; name; description; url; embed_path; published_at;
+
originally_published_at; thumbnail_path; tags; unknown }
in
-
let originally_published_at =
-
match get_string_opt json ["originallyPublishedAt"] with
-
| Some date -> Some (parse_date date)
-
| None -> None
+
Jsont.Object.map ~kind ~doc make_video
+
|> Jsont.Object.mem "id" Jsont.int ~enc:video_id
+
|> Jsont.Object.mem "uuid" Jsont.string ~enc:video_uuid
+
|> Jsont.Object.mem "name" Jsont.string ~enc:video_name
+
|> Jsont.Object.opt_mem "description" Jsont.string ~enc:video_description
+
|> Jsont.Object.mem "url" Jsont.string ~enc:video_url
+
|> Jsont.Object.mem "embedPath" Jsont.string ~enc:video_embed_path
+
|> Jsont.Object.mem "publishedAt" Rfc3339.jsont ~enc:video_published_at
+
|> Jsont.Object.opt_mem "originallyPublishedAt" Rfc3339.jsont ~enc:video_originally_published_at
+
|> Jsont.Object.opt_mem "thumbnailPath" Jsont.string ~enc:video_thumbnail_path
+
|> Jsont.Object.opt_mem "tags" (Jsont.list Jsont.string) ~enc:video_tags
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:video_unknown
+
|> Jsont.Object.finish
+
+
(** Jsont codec for video_response *)
+
let video_response_jsont =
+
let kind = "PeerTube Video Response" in
+
let doc = "A PeerTube API response containing videos" in
+
+
let make_response total data unknown =
+
{ total; data; unknown }
in
-
let thumbnail_path = get_string_opt json ["thumbnailPath"] in
-
let tags = get_string_list json ["tags"] in
+
Jsont.Object.map ~kind ~doc make_response
+
|> Jsont.Object.mem "total" Jsont.int ~enc:video_response_total
+
|> Jsont.Object.mem "data" (Jsont.list video_jsont) ~enc:video_response_data
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:video_response_unknown
+
|> Jsont.Object.finish
-
{ id; uuid; name; description; url; embed_path;
-
published_at; originally_published_at;
-
thumbnail_path; tags }
+
(** Parse a single video from JSON string *)
+
let parse_video_string s =
+
match Jsont_bytesrw.decode_string' video_jsont s with
+
| Ok video -> video
+
| Error err -> failwith (Jsont.Error.to_string err)
-
(** Parse a PeerTube video response *)
-
let parse_video_response json =
-
let total = J.find json ["total"] |> J.get_int in
-
let videos_json = J.find json ["data"] in
-
let data = J.get_list parse_video videos_json in
-
{ total; data }
+
(** Parse a video response from JSON string *)
+
let parse_video_response_string s =
+
match Jsont_bytesrw.decode_string' video_response_jsont s with
+
| Ok response -> response
+
| Error err -> failwith (Jsont.Error.to_string err)
(** Fetch videos from a PeerTube instance channel with pagination support
@param count Number of videos to fetch per page
···
let open Requests_json_api in
let url = Printf.sprintf "%s/api/v1/video-channels/%s/videos?count=%d&start=%d"
client.base_url channel count start in
-
get_json_exn client.requests_session url parse_video_response
+
get_json_exn client.requests_session url video_response_jsont
(** Fetch all videos from a PeerTube instance channel using pagination
@param page_size Number of videos to fetch per page
···
let fetch_video_details client uuid =
let open Requests_json_api in
let url = client.base_url / "api/v1/videos" / uuid in
-
get_json_exn client.requests_session url parse_video
+
get_json_exn client.requests_session url video_jsont
(** Convert a PeerTube video to Bushel.Video.t compatible structure *)
let to_bushel_video video =
+34 -6
stack/peertubee/peertubee.mli
···
@param base_url Base URL of the PeerTube instance
@return A PeerTube client configured for the specified instance *)
val create :
-
requests_session:(float Eio.Time.clock_ty Eio.Resource.t, [`Generic | `Unix] Eio.Net.ty Eio.Resource.t) Requests.t ->
+
requests_session:Requests.t ->
base_url:string ->
t
···
published_at: Ptime.t;
originally_published_at: Ptime.t option;
thumbnail_path: string option;
-
tags: string list;
+
tags: string list option;
+
unknown: Jsont.json;
}
(** Type for PeerTube API response containing videos *)
type video_response = {
total: int;
data: video list;
+
unknown: Jsont.json;
}
-
(** Parse a single video from PeerTube JSON *)
-
val parse_video : Ezjsonm.value -> video
+
(** Accessor functions for video *)
+
val video_id : video -> int
+
val video_uuid : video -> string
+
val video_name : video -> string
+
val video_description : video -> string option
+
val video_url : video -> string
+
val video_embed_path : video -> string
+
val video_published_at : video -> Ptime.t
+
val video_originally_published_at : video -> Ptime.t option
+
val video_thumbnail_path : video -> string option
+
val video_tags : video -> string list option
+
val video_unknown : video -> Jsont.json
-
(** Parse a PeerTube video response *)
-
val parse_video_response : Ezjsonm.value -> video_response
+
(** Accessor functions for video_response *)
+
val video_response_total : video_response -> int
+
val video_response_data : video_response -> video list
+
val video_response_unknown : video_response -> Jsont.json
+
+
(** RFC3339 timestamp handling *)
+
module Rfc3339 : sig
+
val parse : string -> Ptime.t option
+
val format : Ptime.t -> string
+
val pp : Format.formatter -> Ptime.t -> unit
+
val jsont : Ptime.t Jsont.t
+
end
+
+
(** Parse a single video from JSON string *)
+
val parse_video_string : string -> video
+
+
(** Parse a PeerTube video response from JSON string *)
+
val parse_video_response_string : string -> video_response
(** Fetch videos from a PeerTube instance channel with pagination support
@param client The PeerTube client
-1
stack/peertubee/peertubee.opam
···
"eio"
"eio_main" {>= "1.0"}
"requests"
-
"ezjsonm"
"fmt"
"ptime"
"odoc" {with-doc}
+1 -1
stack/requests/bin/dune
···
(executables
(public_names ocurl)
(names ocurl)
-
(libraries requests eio_main cmdliner logs logs.cli logs.fmt fmt.cli fmt.tty yojson))
+
(libraries requests eio_main cmdliner logs logs.cli logs.fmt fmt.cli fmt.tty jsont jsont.bytesrw))
+17 -16
stack/requests/bin/ocurl.ml
···
let doc = "Show progress bar for downloads" in
Arg.(value & flag & info ["progress-bar"] ~doc)
-
let enable_cache =
-
let doc = "Enable HTTP response caching for GET and HEAD requests" in
-
let env_info = Cmdliner.Cmd.Env.info "OCURL_ENABLE_CACHE" in
-
Arg.(value & flag & info ["enable-cache"] ~env:env_info ~doc)
-
(* Logging setup *)
(* Setup logging using Logs_cli for standard logging options *)
let setup_log =
···
if String.length body_str > 0 &&
(body_str.[0] = '{' || body_str.[0] = '[') then
try
-
let json = Yojson.Safe.from_string body_str in
-
if not quiet then Fmt.pr "[%s]:@." url_str;
-
Fmt.pr "%a@." (Yojson.Safe.pretty_print ~std:true) json
+
match Jsont_bytesrw.decode_string' Jsont.json body_str with
+
| Ok json ->
+
(match Jsont_bytesrw.encode_string' ~format:Jsont.Indent Jsont.json json with
+
| Ok pretty ->
+
if not quiet then Fmt.pr "[%s]:@." url_str;
+
print_string pretty
+
| Error _ ->
+
if not quiet then Fmt.pr "[%s]:@." url_str;
+
print_string body_str)
+
| Error _ ->
+
if not quiet then Fmt.pr "[%s]:@." url_str;
+
print_string body_str
with _ ->
if not quiet then Fmt.pr "[%s]:@." url_str;
print_string body_str
···
Error (url_str, exn)
(* Main function using Requests with concurrent fetching *)
-
let run_request env sw persist_cookies verify_tls enable_cache timeout follow_redirects max_redirects
+
let run_request env sw persist_cookies verify_tls timeout follow_redirects max_redirects
method_ urls headers data json_data output include_headers
auth _show_progress () =
···
(* Create requests instance with configuration *)
let timeout_obj = Option.map (fun t -> Requests.Timeout.create ~total:t ()) timeout in
-
let req = Requests.create ~sw ~xdg ~persist_cookies ~verify_tls ~enable_cache
+
let req = Requests.create ~sw ~xdg ~persist_cookies ~verify_tls
~follow_redirects ~max_redirects ?timeout:timeout_obj env in
(* Set authentication if provided *)
···
(* Main entry point *)
let main method_ urls headers data json_data output include_headers
-
auth show_progress persist_cookies verify_tls enable_cache
+
auth show_progress persist_cookies verify_tls
timeout follow_redirects max_redirects () =
Eio_main.run @@ fun env ->
Mirage_crypto_rng_unix.use_default ();
Switch.run @@ fun sw ->
-
run_request env sw persist_cookies verify_tls enable_cache timeout follow_redirects max_redirects
+
run_request env sw persist_cookies verify_tls timeout follow_redirects max_redirects
method_ urls headers data json_data output include_headers auth
show_progress ()
···
`P "Custom headers:";
`Pre " $(tname) -H 'Accept: application/json' -H 'X-Api-Key: secret' https://api.example.com";
`P "With persistent cookies:";
-
`Pre " $(tname) --persist-cookies --cache-dir ~/.ocurl https://example.com";
-
`P "Enable response caching:";
-
`Pre " $(tname) --enable-cache https://api.example.com";
+
`Pre " $(tname) --persist-cookies https://example.com";
`P "Disable TLS verification (insecure):";
`Pre " $(tname) --no-verify-tls https://self-signed.example.com";
`S "LOGGING OPTIONS";
···
show_progress $
Requests.Cmd.persist_cookies_term app_name $
Requests.Cmd.verify_tls_term app_name $
-
enable_cache $
Requests.Cmd.timeout_term app_name $
Requests.Cmd.follow_redirects_term app_name $
Requests.Cmd.max_redirects_term app_name $
-1
stack/requests/dune-project
···
ca-certs
mirage-crypto-rng-eio
uri
-
yojson
digestif
base64
logs))
+28 -150
stack/requests/lib/body.ml
···
(Eio.Path.native_exn file) (Mime.to_string mime));
File { file; mime }
-
type json =
-
[ `Null | `Bool of bool | `Float of float | `String of string
-
| `A of json list | `O of (string * json) list ]
-
-
let json json_value =
-
(* Encode json value to a JSON string *)
-
let buffer = Buffer.create 1024 in
-
let encoder = Jsonm.encoder ~minify:true (`Buffer buffer) in
-
-
let enc e l =
-
match Jsonm.encode e (`Lexeme l) with
-
| `Ok -> ()
-
| `Partial -> failwith "Unexpected partial with buffer destination"
+
(* For simple JSON encoding, we just take a Jsont.json value and encode it *)
+
let json (json_value : Jsont.json) =
+
let content = match Jsont_bytesrw.encode_string' ~format:Jsont.Minify Jsont.json json_value with
+
| Ok s -> s
+
| Error e ->
+
let msg = Jsont.Error.to_string e in
+
failwith (Printf.sprintf "Failed to encode JSON: %s" msg)
in
+
String { content; mime = Mime.json }
-
let rec encode_value v k e =
-
match v with
-
| `A vs -> encode_array vs k e
-
| `O ms -> encode_object ms k e
-
| `Null | `Bool _ | `Float _ | `String _ as v -> enc e v; k e
-
and encode_array vs k e =
-
enc e `As;
-
encode_array_values vs k e
-
and encode_array_values vs k e =
-
match vs with
-
| v :: vs' -> encode_value v (encode_array_values vs' k) e
-
| [] -> enc e `Ae; k e
-
and encode_object ms k e =
-
enc e `Os;
-
encode_object_members ms k e
-
and encode_object_members ms k e =
-
match ms with
-
| (n, v) :: ms' ->
-
enc e (`Name n);
-
encode_value v (encode_object_members ms' k) e
-
| [] -> enc e `Oe; k e
-
in
-
-
let finish e =
-
match Jsonm.encode e `End with
-
| `Ok -> ()
-
| `Partial -> failwith "Unexpected partial at end"
-
in
-
-
encode_value json_value finish encoder;
-
-
String { content = Buffer.contents buffer; mime = Mime.json }
-
+
(* JSON streaming using jsont - we encode the value to string and stream it *)
module Json_stream_source = struct
-
type encode_state =
-
| Ready (* Ready to encode new lexemes *)
-
| NeedAwait (* Need to send `Await after previous `Partial *)
-
| Finished (* All done *)
-
type t = {
-
encoder : Jsonm.encoder;
-
mutable buffer : bytes;
-
mutable buffer_offset : int;
-
mutable buffer_len : int;
-
mutable pending_lexemes : Jsonm.lexeme Queue.t;
-
mutable encode_state : encode_state;
-
mutable end_signaled : bool;
-
writer : (Jsonm.lexeme -> unit) -> unit;
+
mutable content : string;
+
mutable offset : int;
}
-
let rec single_read t dst =
-
if t.encode_state = Finished && t.buffer_offset >= t.buffer_len then
+
let single_read t dst =
+
if t.offset >= String.length t.content then
raise End_of_file
-
else if t.buffer_offset < t.buffer_len then begin
-
(* We have data in buffer to copy *)
-
let available = t.buffer_len - t.buffer_offset in
+
else begin
+
let available = String.length t.content - t.offset in
let to_copy = min (Cstruct.length dst) available in
-
Cstruct.blit_from_bytes t.buffer t.buffer_offset dst 0 to_copy;
-
t.buffer_offset <- t.buffer_offset + to_copy;
+
Cstruct.blit_from_string t.content t.offset dst 0 to_copy;
+
t.offset <- t.offset + to_copy;
to_copy
-
end else begin
-
(* Buffer empty, need to generate more data *)
-
t.buffer_offset <- 0;
-
t.buffer_len <- 0;
-
Jsonm.Manual.dst t.encoder t.buffer 0 (Bytes.length t.buffer);
-
-
let rec process_encoding () =
-
match t.encode_state with
-
| NeedAwait ->
-
(* Send `Await after previous `Partial *)
-
(match Jsonm.encode t.encoder `Await with
-
| `Ok ->
-
t.encode_state <- Ready;
-
process_encoding ()
-
| `Partial ->
-
t.buffer_len <- Bytes.length t.buffer - Jsonm.Manual.dst_rem t.encoder)
-
| Ready when not (Queue.is_empty t.pending_lexemes) ->
-
(* Encode next lexeme *)
-
let lexeme = Queue.take t.pending_lexemes in
-
(match Jsonm.encode t.encoder (`Lexeme lexeme) with
-
| `Ok ->
-
(* Successfully encoded, continue with next *)
-
process_encoding ()
-
| `Partial ->
-
(* Buffer full, need to flush and await
-
Note: The lexeme is partially encoded in the encoder's internal state,
-
we don't need to re-queue it. After `Await, the encoder continues. *)
-
t.encode_state <- NeedAwait;
-
t.buffer_len <- Bytes.length t.buffer - Jsonm.Manual.dst_rem t.encoder)
-
| Ready when Queue.is_empty t.pending_lexemes && not t.end_signaled ->
-
(* All lexemes done, signal end *)
-
t.end_signaled <- true;
-
(match Jsonm.encode t.encoder `End with
-
| `Ok ->
-
t.encode_state <- Finished;
-
t.buffer_len <- Bytes.length t.buffer - Jsonm.Manual.dst_rem t.encoder
-
| `Partial ->
-
t.encode_state <- NeedAwait;
-
t.buffer_len <- Bytes.length t.buffer - Jsonm.Manual.dst_rem t.encoder)
-
| Ready when t.end_signaled ->
-
(* Continue trying to finish *)
-
(match Jsonm.encode t.encoder `End with
-
| `Ok ->
-
t.encode_state <- Finished;
-
t.buffer_len <- Bytes.length t.buffer - Jsonm.Manual.dst_rem t.encoder
-
| `Partial ->
-
t.encode_state <- NeedAwait;
-
t.buffer_len <- Bytes.length t.buffer - Jsonm.Manual.dst_rem t.encoder)
-
| Finished ->
-
(* All done *)
-
()
-
| _ -> ()
-
in
-
process_encoding ();
-
-
if t.buffer_len > 0 then
-
single_read t dst
-
else if t.encode_state = Finished then
-
raise End_of_file
-
else
-
(* This shouldn't happen - we should always produce some data or be finished *)
-
raise End_of_file
end
let read_methods = []
end
-
let json_stream_source_create writer =
-
let buffer_size = 4096 in
-
let buffer = Bytes.create buffer_size in
-
let encoder = Jsonm.encoder ~minify:true (`Manual) in
-
let pending_lexemes = Queue.create () in
-
-
(* Call the writer to populate the queue *)
-
let encode_lexeme lexeme = Queue.add lexeme pending_lexemes in
-
writer encode_lexeme;
-
-
let t = {
-
Json_stream_source.encoder;
-
buffer;
-
buffer_offset = 0;
-
buffer_len = 0;
-
pending_lexemes;
-
encode_state = Ready;
-
end_signaled = false;
-
writer;
-
} in
+
let json_stream_source_create json_value =
+
(* Encode the entire JSON value to string with minified format *)
+
let content = match Jsont_bytesrw.encode_string' ~format:Jsont.Minify Jsont.json json_value with
+
| Ok s -> s
+
| Error e ->
+
let msg = Jsont.Error.to_string e in
+
failwith (Printf.sprintf "Failed to encode JSON stream: %s" msg)
+
in
+
let t = { Json_stream_source.content; offset = 0 } in
let ops = Eio.Flow.Pi.source (module Json_stream_source) in
Eio.Resource.T (t, ops)
-
let json_stream writer =
-
let source = json_stream_source_create writer in
+
let json_stream json_value =
+
let source = json_stream_source_create json_value in
Stream { source; mime = Mime.json; length = None }
let text content =
+14 -31
stack/requests/lib/body.mli
···
(** {1 Convenience Constructors} *)
-
type json =
-
[ `Null | `Bool of bool | `Float of float | `String of string
-
| `A of json list | `O of (string * json) list ]
-
(** JSON value representation, compatible with Jsonm's json type. *)
-
-
val json : json -> t
-
(** [json value] creates a JSON body from a json value.
+
val json : Jsont.json -> t
+
(** [json value] creates a JSON body from a Jsont.json value.
The value is encoded to a JSON string with Content-Type: application/json.
Example:
{[
-
let body = Body.json (`O [
-
("status", `String "success");
-
("count", `Float 42.);
-
("items", `A [`String "first"; `String "second"])
-
])
+
let body = Body.json (Jsont.Object ([
+
("status", Jsont.String "success");
+
("count", Jsont.Number 42.);
+
("items", Jsont.Array ([Jsont.String "first"; Jsont.String "second"], Jsont.Meta.none))
+
], Jsont.Meta.none))
]}
*)
-
val json_stream : ((Jsonm.lexeme -> unit) -> unit) -> t
-
(** [json_stream writer] creates a streaming JSON body using jsonm.
-
The [writer] function is called with a callback that accepts jsonm lexemes
-
to encode. The body will be streamed as the lexemes are produced.
+
val json_stream : Jsont.json -> t
+
(** [json_stream json_value] creates a streaming JSON body from a Jsont.json value.
+
The JSON value will be encoded to a minified JSON string and streamed.
Example:
{[
-
let body = Body.json_stream (fun encode ->
-
encode `Os; (* Start object *)
-
encode (`Name "users");
-
encode `As; (* Start array *)
-
List.iter (fun user ->
-
encode `Os;
-
encode (`Name "id");
-
encode (`Float (float_of_int user.id));
-
encode (`Name "name");
-
encode (`String user.name);
-
encode `Oe (* End object *)
-
) users;
-
encode `Ae; (* End array *)
-
encode `Oe (* End object *)
-
)
+
let large_data = Jsont.Object ([
+
("users", Jsont.Array ([...], Jsont.Meta.none))
+
], Jsont.Meta.none) in
+
let body = Body.json_stream large_data
]}
*)
-420
stack/requests/lib/cache.ml
···
-
let src = Logs.Src.create "requests.cache" ~doc:"HTTP cache with cacheio"
-
module Log = (val Logs.src_log src : Logs.LOG)
-
-
type cached_response = {
-
status : Cohttp.Code.status_code;
-
headers : Cohttp.Header.t;
-
body : string;
-
}
-
-
type t = {
-
sw : Eio.Switch.t;
-
enabled : bool;
-
cache_get_requests : bool;
-
cache_range_requests : bool;
-
cacheio : Cacheio.t option;
-
memory_cache : (string, cached_response * float) Hashtbl.t;
-
}
-
-
let create ~sw ~enabled ?(cache_get_requests=true) ?(cache_range_requests=true) ~cache_dir () =
-
let cacheio =
-
match cache_dir with
-
| Some dir when enabled ->
-
(try
-
Some (Cacheio.create ~base_dir:dir)
-
with e ->
-
Log.warn (fun m -> m "Failed to create cacheio backend: %s. Using memory cache only."
-
(Printexc.to_string e));
-
None)
-
| _ -> None
-
in
-
{ sw; enabled; cache_get_requests; cache_range_requests; cacheio;
-
memory_cache = Hashtbl.create 100 }
-
-
let make_cache_key ~method_ ~url ~headers =
-
let method_str = match method_ with
-
| `GET -> "GET" | `HEAD -> "HEAD"
-
| _ -> "OTHER"
-
in
-
let url_str = Uri.to_string url in
-
let range_str = match Cohttp.Header.get headers "range" with
-
| Some r -> "_range:" ^ r
-
| None -> ""
-
in
-
Printf.sprintf "%s_%s%s" method_str url_str range_str
-
-
let is_cacheable ~method_ ~status ~headers =
-
match method_ with
-
| `GET | `HEAD ->
-
let code = Cohttp.Code.code_of_status status in
-
if code >= 200 && code < 300 then
-
match Cohttp.Header.get headers "cache-control" with
-
| Some cc ->
-
let cc_lower = String.lowercase_ascii cc in
-
let rec contains s sub pos =
-
if pos + String.length sub > String.length s then false
-
else if String.sub s pos (String.length sub) = sub then true
-
else contains s sub (pos + 1)
-
in
-
not (contains cc_lower "no-store" 0 ||
-
contains cc_lower "no-cache" 0 ||
-
contains cc_lower "private" 0)
-
| None -> true
-
else
-
code = 301 || code = 308
-
| _ -> false
-
-
let parse_max_age headers =
-
match Cohttp.Header.get headers "cache-control" with
-
| Some cc ->
-
let parts = String.split_on_char ',' cc |> List.map String.trim in
-
List.find_map (fun part ->
-
let prefix = "max-age=" in
-
if String.starts_with ~prefix part then
-
let value = String.sub part (String.length prefix)
-
(String.length part - String.length prefix) in
-
try Some (float_of_string value) with _ -> None
-
else None
-
) parts
-
| None -> None
-
-
let serialize_metadata ~status ~headers =
-
let status_code = Cohttp.Code.code_of_status status in
-
let headers_assoc = Cohttp.Header.to_list headers in
-
let json = `Assoc [
-
("status_code", `Int status_code);
-
("headers", `Assoc (List.map (fun (k, v) -> (k, `String v)) headers_assoc));
-
] in
-
Yojson.Basic.to_string json
-
-
let deserialize_metadata json_str =
-
try
-
let open Yojson.Basic.Util in
-
let json = Yojson.Basic.from_string json_str in
-
let status_code = json |> member "status_code" |> to_int in
-
let status = Cohttp.Code.status_of_code status_code in
-
let headers_json = json |> member "headers" |> to_assoc in
-
let headers = headers_json
-
|> List.map (fun (k, v) -> (k, to_string v))
-
|> Cohttp.Header.of_list in
-
Some (status, headers)
-
with _ -> None
-
-
let get t ~method_ ~url ~headers =
-
if not t.enabled then None
-
else if method_ = `GET && not t.cache_get_requests then None
-
else
-
let key = make_cache_key ~method_ ~url ~headers in
-
-
(* Try cacheio first *)
-
match t.cacheio with
-
| Some cache ->
-
(* Check for metadata entry *)
-
let metadata_key = key ^ ".meta" in
-
let body_key = key ^ ".body" in
-
-
if Cacheio.exists cache ~key:metadata_key && Cacheio.exists cache ~key:body_key then
-
Eio.Switch.run @@ fun sw ->
-
(* Read metadata *)
-
let metadata_opt = match Cacheio.get cache ~key:metadata_key ~sw with
-
| Some source ->
-
let buf = Buffer.create 256 in
-
Eio.Flow.copy source (Eio.Flow.buffer_sink buf);
-
deserialize_metadata (Buffer.contents buf)
-
| None -> None
-
in
-
-
(match metadata_opt with
-
| Some (status, resp_headers) ->
-
(* Read body *)
-
(match Cacheio.get cache ~key:body_key ~sw with
-
| Some source ->
-
let buf = Buffer.create 4096 in
-
Eio.Flow.copy source (Eio.Flow.buffer_sink buf);
-
let body = Buffer.contents buf in
-
Log.debug (fun m -> m "Cache hit for %s" (Uri.to_string url));
-
Some { status; headers = resp_headers; body }
-
| None ->
-
Log.debug (fun m -> m "Cache body missing for %s" (Uri.to_string url));
-
None)
-
| None ->
-
Log.debug (fun m -> m "Cache metadata missing for %s" (Uri.to_string url));
-
None)
-
else
-
(Log.debug (fun m -> m "Cache miss for %s" (Uri.to_string url));
-
None)
-
| None ->
-
(* Fall back to memory cache *)
-
match Hashtbl.find_opt t.memory_cache key with
-
| Some (response, expiry) when expiry > Unix.gettimeofday () ->
-
Log.debug (fun m -> m "Memory cache hit for %s" (Uri.to_string url));
-
Some response
-
| _ ->
-
Log.debug (fun m -> m "Cache miss for %s" (Uri.to_string url));
-
None
-
-
let get_stream t ~method_ ~url ~headers ~sw =
-
if not t.enabled then None
-
else if method_ = `GET && not t.cache_get_requests then None
-
else
-
let key = make_cache_key ~method_ ~url ~headers in
-
-
match t.cacheio with
-
| Some cache ->
-
let metadata_key = key ^ ".meta" in
-
let body_key = key ^ ".body" in
-
-
if Cacheio.exists cache ~key:metadata_key && Cacheio.exists cache ~key:body_key then
-
(* Read metadata first *)
-
let metadata_opt =
-
match Cacheio.get cache ~key:metadata_key ~sw with
-
| Some source ->
-
let buf = Buffer.create 256 in
-
Eio.Flow.copy source (Eio.Flow.buffer_sink buf);
-
deserialize_metadata (Buffer.contents buf)
-
| None -> None
-
in
-
-
(match metadata_opt with
-
| Some (status, resp_headers) ->
-
(* Return body stream directly *)
-
(match Cacheio.get cache ~key:body_key ~sw with
-
| Some source ->
-
Log.debug (fun m -> m "Streaming cache hit for %s" (Uri.to_string url));
-
Some (status, resp_headers, source)
-
| None -> None)
-
| None -> None)
-
else None
-
| None -> None
-
-
let put t ~method_ ~url ~request_headers ~status ~headers ~body =
-
if not t.enabled then ()
-
else if is_cacheable ~method_ ~status ~headers then
-
let key = make_cache_key ~method_ ~url ~headers:request_headers in
-
let ttl = parse_max_age headers in
-
-
Log.debug (fun m -> m "Caching response for %s (ttl: %s)"
-
(Uri.to_string url)
-
(match ttl with Some t -> Printf.sprintf "%.0fs" t | None -> "3600s"));
-
-
(match t.cacheio with
-
| Some cache ->
-
Eio.Switch.run @@ fun _sw ->
-
let metadata_key = key ^ ".meta" in
-
let metadata = serialize_metadata ~status ~headers in
-
let metadata_source = Eio.Flow.string_source metadata in
-
Cacheio.put cache ~key:metadata_key ~source:metadata_source ~ttl ();
-
-
let body_key = key ^ ".body" in
-
let body_source = Eio.Flow.string_source body in
-
Cacheio.put cache ~key:body_key ~source:body_source ~ttl ()
-
| None -> ());
-
-
let cached_resp = { status; headers; body } in
-
let expiry = Unix.gettimeofday () +. Option.value ttl ~default:3600.0 in
-
Hashtbl.replace t.memory_cache key (cached_resp, expiry)
-
-
let put_stream t ~method_ ~url ~request_headers ~status ~headers ~body_source ~ttl =
-
if not t.enabled then ()
-
else if is_cacheable ~method_ ~status ~headers then
-
let key = make_cache_key ~method_ ~url ~headers:request_headers in
-
-
Log.debug (fun m -> m "Caching streamed response for %s (ttl: %s)"
-
(Uri.to_string url)
-
(match ttl with Some t -> Printf.sprintf "%.0fs" t | None -> "3600s"));
-
-
match t.cacheio with
-
| Some cache ->
-
Eio.Switch.run @@ fun _sw ->
-
-
(* Store metadata *)
-
let metadata_key = key ^ ".meta" in
-
let metadata = serialize_metadata ~status ~headers in
-
let metadata_source = Eio.Flow.string_source metadata in
-
Cacheio.put cache ~key:metadata_key ~source:metadata_source ~ttl ();
-
-
(* Store body directly from source *)
-
let body_key = key ^ ".body" in
-
Cacheio.put cache ~key:body_key ~source:body_source ~ttl ()
-
| None -> ()
-
-
module Range = struct
-
type t = {
-
start : int64;
-
end_ : int64 option; (* None means to end of file *)
-
}
-
-
let of_header header =
-
(* Parse Range: bytes=start-end *)
-
let prefix = "bytes=" in
-
let prefix_len = String.length prefix in
-
if String.length header >= prefix_len &&
-
String.sub header 0 prefix_len = prefix then
-
let range_str = String.sub header prefix_len (String.length header - prefix_len) in
-
match String.split_on_char '-' range_str with
-
| [start; ""] ->
-
(* bytes=N- means from N to end *)
-
(try Some { start = Int64.of_string start; end_ = None }
-
with _ -> None)
-
| [start; end_] ->
-
(* bytes=N-M *)
-
(try Some {
-
start = Int64.of_string start;
-
end_ = Some (Int64.of_string end_)
-
}
-
with _ -> None)
-
| _ -> None
-
else None
-
-
let to_header t =
-
match t.end_ with
-
| None -> Printf.sprintf "bytes=%Ld-" t.start
-
| Some e -> Printf.sprintf "bytes=%Ld-%Ld" t.start e
-
-
let to_cacheio_range t ~total_size =
-
let end_ = match t.end_ with
-
| None -> Int64.pred total_size
-
| Some e -> min e (Int64.pred total_size)
-
in
-
(* Convert to Cacheio.Range.t *)
-
Cacheio.Range.create ~start:t.start ~end_
-
end
-
-
let download_range t ~sw ~url ~range ~on_chunk =
-
let range_header = Range.to_header range in
-
Log.debug (fun m -> m "Range request for %s: %s"
-
(Uri.to_string url) range_header);
-
-
match t.cacheio with
-
| Some cache ->
-
let key = Uri.to_string url in
-
let cacheio_range = Range.to_cacheio_range range ~total_size:Int64.max_int in
-
-
(match Cacheio.get_range cache ~key ~range:cacheio_range ~sw with
-
| `Complete source ->
-
let rec read_chunks () =
-
let chunk = Cstruct.create 8192 in
-
try
-
let n = Eio.Flow.single_read source chunk in
-
if n > 0 then begin
-
on_chunk (Cstruct.to_string ~off:0 ~len:n chunk);
-
read_chunks ()
-
end
-
with End_of_file -> ()
-
in
-
read_chunks ();
-
Some true
-
| `Chunks chunk_sources ->
-
List.iter (fun (_range, source) ->
-
let rec read_chunk () =
-
let chunk = Cstruct.create 8192 in
-
try
-
let n = Eio.Flow.single_read source chunk in
-
if n > 0 then begin
-
on_chunk (Cstruct.to_string ~off:0 ~len:n chunk);
-
read_chunk ()
-
end
-
with End_of_file -> ()
-
in
-
read_chunk ()
-
) chunk_sources;
-
Some true
-
| `Not_found -> None)
-
| None -> None
-
-
let put_chunk t ~url ~range ~data =
-
if not t.enabled || not t.cache_range_requests then ()
-
else
-
match t.cacheio with
-
| Some cache ->
-
let key = Uri.to_string url in
-
let cacheio_range = Range.to_cacheio_range range ~total_size:Int64.max_int in
-
Eio.Switch.run @@ fun _sw ->
-
let source = Eio.Flow.string_source data in
-
Cacheio.put_chunk cache ~key ~range:cacheio_range ~source ()
-
| None ->
-
Log.debug (fun m -> m "Cannot cache chunk for %s: no cacheio backend"
-
(Uri.to_string url))
-
-
let has_complete t ~url ~total_size =
-
if not t.enabled then false
-
else
-
match t.cacheio with
-
| Some cache ->
-
let key = Uri.to_string url in
-
Cacheio.has_complete_chunks cache ~key ~total_size
-
| None -> false
-
-
let missing_ranges t ~url ~total_size =
-
if not t.enabled then
-
[{ Range.start = 0L; end_ = Some (Int64.pred total_size) }]
-
else
-
match t.cacheio with
-
| Some cache ->
-
let key = Uri.to_string url in
-
let cacheio_ranges = Cacheio.missing_ranges cache ~key ~total_size in
-
List.map (fun r ->
-
{ Range.start = Cacheio.Range.start r;
-
end_ = Some (Cacheio.Range.end_ r) }
-
) cacheio_ranges
-
| None ->
-
[{ Range.start = 0L; end_ = Some (Int64.pred total_size) }]
-
-
let coalesce_chunks t ~url =
-
if not t.enabled then false
-
else
-
match t.cacheio with
-
| Some cache ->
-
let key = Uri.to_string url in
-
let promise = Cacheio.coalesce_chunks cache ~key ~verify:true () in
-
(match Eio.Promise.await promise with
-
| Ok () ->
-
Log.info (fun m -> m "Successfully coalesced chunks for %s" key);
-
true
-
| Error exn ->
-
Log.warn (fun m -> m "Failed to coalesce chunks for %s: %s"
-
key (Printexc.to_string exn));
-
false)
-
| None -> false
-
-
let evict t ~url =
-
if not t.enabled then ()
-
else
-
let key = make_cache_key ~method_:`GET ~url ~headers:(Cohttp.Header.init ()) in
-
(match t.cacheio with
-
| Some cache ->
-
Cacheio.delete cache ~key:(key ^ ".meta");
-
Cacheio.delete cache ~key:(key ^ ".body")
-
| None -> ());
-
Log.debug (fun m -> m "Evicting cache for %s" (Uri.to_string url));
-
Hashtbl.remove t.memory_cache key
-
-
let clear t =
-
Log.info (fun m -> m "Clearing entire cache");
-
(match t.cacheio with
-
| Some cache -> Cacheio.clear cache
-
| None -> ());
-
Hashtbl.clear t.memory_cache
-
-
let stats t =
-
let cacheio_stats =
-
match t.cacheio with
-
| Some cache ->
-
let stats = Cacheio.stats cache in
-
`Assoc [
-
("total_entries", `Int (Cacheio.Stats.entry_count stats));
-
("total_bytes", `Int (Int64.to_int (Cacheio.Stats.total_size stats)));
-
("expired_entries", `Int (Cacheio.Stats.expired_count stats));
-
("pinned_entries", `Int (Cacheio.Stats.pinned_count stats));
-
("temporary_entries", `Int (Cacheio.Stats.temporary_count stats));
-
]
-
| None -> `Assoc []
-
in
-
`Assoc [
-
("memory_cache_entries", `Int (Hashtbl.length t.memory_cache));
-
("cache_backend", `String (if Option.is_some t.cacheio then "cacheio" else "memory"));
-
("enabled", `Bool t.enabled);
-
("cache_get_requests", `Bool t.cache_get_requests);
-
("cache_range_requests", `Bool t.cache_range_requests);
-
("cacheio_stats", cacheio_stats);
-
]
+2 -4
stack/requests/lib/dune
···
cohttp
cohttp-eio
uri
-
jsonm
-
yojson
-
ezjsonm
+
jsont
+
jsont.bytesrw
base64
-
cacheio
cookeio
xdge
logs
+41 -76
stack/requests/lib/requests.ml
···
module Status = Status
module Error = Error
module Retry = Retry
-
module Cache = Cache
(* Note: RNG initialization should be done by the application using
Mirage_crypto_rng_unix.initialize before calling Eio_main.run.
···
(* Main API - Session functionality with connection pooling *)
-
type ('clock, 'net) t = {
+
(* Internal session type with existential type parameters *)
+
type ('clock, 'net) session = {
sw : Eio.Switch.t;
clock : 'clock;
net : 'net;
···
retry : Retry.config option;
persist_cookies : bool;
xdg : Xdge.t option;
-
cache : Cache.t option;
(* Statistics - mutable for tracking across all derived sessions *)
mutable requests_made : int;
···
mutable retries_count : int;
}
+
(* Public type that hides the existential type parameters.
+
We constrain the existentials to ensure they satisfy the requirements
+
of the internal functions. *)
+
type t = T : ([> float Eio.Time.clock_ty] Eio.Resource.t,
+
[> [> `Generic] Eio.Net.ty] Eio.Resource.t) session -> t
+
let create
~sw
?http_pool
···
?(connection_lifetime = 300.0)
?retry
?(persist_cookies = false)
-
?(enable_cache = false)
?xdg
env =
let clock = env#clock in
let net = env#net in
-
let xdg = match xdg, persist_cookies || enable_cache with
+
let xdg = match xdg, persist_cookies with
| Some x, _ -> Some x
| None, true -> Some (Xdge.create env#fs "requests")
| None, false -> None
···
Cookeio.create ()
in
-
let cache = match enable_cache, xdg with
-
| true, Some xdg_ctx ->
-
let cache_dir = Xdge.cache_dir xdg_ctx in
-
Some (Cache.create ~sw ~enabled:true ~cache_dir:(Some cache_dir) ())
-
| true, None ->
-
(* Memory-only cache when no XDG available *)
-
Some (Cache.create ~sw ~enabled:true ~cache_dir:None ())
-
| false, _ -> None
-
in
-
-
{
+
T {
sw;
clock;
net;
···
retry;
persist_cookies;
xdg;
-
cache;
requests_made = 0;
total_time = 0.0;
retries_count = 0;
}
-
let set_default_header t key value =
-
{ t with default_headers = Headers.set key value t.default_headers }
+
let set_default_header (T t) key value =
+
T { t with default_headers = Headers.set key value t.default_headers }
-
let remove_default_header t key =
-
{ t with default_headers = Headers.remove key t.default_headers }
+
let remove_default_header (T t) key =
+
T { t with default_headers = Headers.remove key t.default_headers }
-
let set_auth t auth =
+
let set_auth (T t) auth =
Log.debug (fun m -> m "Setting authentication method");
-
{ t with auth = Some auth }
+
T { t with auth = Some auth }
-
let clear_auth t =
+
let clear_auth (T t) =
Log.debug (fun m -> m "Clearing authentication");
-
{ t with auth = None }
+
T { t with auth = None }
-
let set_timeout t timeout =
+
let set_timeout (T t) timeout =
Log.debug (fun m -> m "Setting timeout: %a" Timeout.pp timeout);
-
{ t with timeout }
+
T { t with timeout }
-
let set_retry t config =
+
let set_retry (T t) config =
Log.debug (fun m -> m "Setting retry config: max_retries=%d" config.Retry.max_retries);
-
{ t with retry = Some config }
+
T { t with retry = Some config }
-
let cookies t = t.cookie_jar
-
let clear_cookies t = Cookeio.clear t.cookie_jar
+
let cookies (T t) = t.cookie_jar
+
let clear_cookies (T t) = Cookeio.clear t.cookie_jar
(* Internal request function using connection pools *)
let make_request_internal t ?headers ?body ?auth ?timeout ?follow_redirects ?max_redirects ~method_ url =
···
| Some b -> Body.Private.to_string b
in
-
(* Check cache for GET and HEAD requests when body is not present *)
-
let cached_response = match t.cache, method_, body with
-
| Some cache, (`GET | `HEAD), None ->
-
Log.debug (fun m -> m "Checking cache for %s request to %s" method_str url);
-
let headers_cohttp = Cohttp.Header.of_list (Headers.to_list headers) in
-
Cache.get cache ~method_ ~url:uri ~headers:headers_cohttp
-
| _ -> None
-
in
-
-
let response = match cached_response with
-
| Some cached ->
-
Log.info (fun m -> m "Cache HIT for %s request to %s" method_str url);
-
(* Convert cached response to Response.t *)
-
let status = Cohttp.Code.code_of_status cached.Cache.status in
-
let resp_headers = Headers.of_list (Cohttp.Header.to_list cached.Cache.headers) in
-
let body_flow = Eio.Flow.string_source cached.Cache.body in
-
Response.Private.make ~sw:t.sw ~status ~headers:resp_headers ~body:body_flow ~url ~elapsed:0.0
-
| None ->
-
Log.info (fun m -> m "Cache MISS or not applicable for %s request to %s" method_str url);
+
let response =
(* Execute request with redirect handling *)
let rec make_with_redirects url_to_fetch redirects_left =
···
let elapsed = Unix.gettimeofday () -. start_time in
Log.info (fun m -> m "Request completed in %.3f seconds" elapsed);
-
(* Store in cache if successful and caching enabled *)
-
(match t.cache with
-
| Some cache when final_status >= 200 && final_status < 300 ->
-
Log.debug (fun m -> m "Storing response in cache for %s" url);
-
let status = Cohttp.Code.status_of_code final_status in
-
let resp_headers_cohttp = Cohttp.Header.of_list (Headers.to_list final_headers) in
-
let headers_cohttp = Cohttp.Header.of_list (Headers.to_list headers) in
-
Cache.put cache ~method_ ~url:uri ~request_headers:headers_cohttp
-
~status ~headers:resp_headers_cohttp ~body:final_body_str
-
| _ -> ());
-
(* Create a flow from the body string *)
let body_flow = Eio.Flow.string_source final_body_str in
···
response
(* Public request function - executes synchronously *)
-
let request t ?headers ?body ?auth ?timeout ?follow_redirects ?max_redirects ~method_ url =
+
let request (T t) ?headers ?body ?auth ?timeout ?follow_redirects ?max_redirects ~method_ url =
+
(* Keep t in scope to preserve existential types *)
make_request_internal t ?headers ?body ?auth ?timeout
?follow_redirects ?max_redirects ~method_ url
(* Convenience methods *)
-
let get t ?headers ?auth ?timeout ?params url =
+
let get (T t) ?headers ?auth ?timeout ?params url =
let url = match params with
| Some p ->
let uri = Uri.of_string url in
···
Uri.to_string uri
| None -> url
in
-
request t ?headers ?auth ?timeout ~method_:`GET url
+
make_request_internal t ?headers ?auth ?timeout ~method_:`GET url
-
let post t ?headers ?body ?auth ?timeout url =
-
request t ?headers ?body ?auth ?timeout ~method_:`POST url
+
let post (T t) ?headers ?body ?auth ?timeout url =
+
make_request_internal t ?headers ?body ?auth ?timeout ~method_:`POST url
-
let put t ?headers ?body ?auth ?timeout url =
-
request t ?headers ?body ?auth ?timeout ~method_:`PUT url
+
let put (T t) ?headers ?body ?auth ?timeout url =
+
make_request_internal t ?headers ?body ?auth ?timeout ~method_:`PUT url
-
let patch t ?headers ?body ?auth ?timeout url =
-
request t ?headers ?body ?auth ?timeout ~method_:`PATCH url
+
let patch (T t) ?headers ?body ?auth ?timeout url =
+
make_request_internal t ?headers ?body ?auth ?timeout ~method_:`PATCH url
-
let delete t ?headers ?auth ?timeout url =
-
request t ?headers ?auth ?timeout ~method_:`DELETE url
+
let delete (T t) ?headers ?auth ?timeout url =
+
make_request_internal t ?headers ?auth ?timeout ~method_:`DELETE url
-
let head t ?headers ?auth ?timeout url =
-
request t ?headers ?auth ?timeout ~method_:`HEAD url
+
let head (T t) ?headers ?auth ?timeout url =
+
make_request_internal t ?headers ?auth ?timeout ~method_:`HEAD url
-
let options t ?headers ?auth ?timeout url =
-
request t ?headers ?auth ?timeout ~method_:`OPTIONS url
+
let options (T t) ?headers ?auth ?timeout url =
+
make_request_internal t ?headers ?auth ?timeout ~method_:`OPTIONS url
(* Cmdliner integration module *)
module Cmd = struct
+37 -29
stack/requests/lib/requests.mli
···
Use Eio.Fiber.both or Eio.Fiber.all for concurrent execution.
*)
-
type ('clock, 'net) t
+
type t
(** A stateful HTTP client that maintains cookies, auth, configuration, and
-
connection pools across requests. *)
+
connection pools across requests. The internal clock and network types are
+
hidden from external users. *)
(** {2 Creation and Configuration} *)
val create :
sw:Eio.Switch.t ->
-
?http_pool:('clock Eio.Time.clock, 'net Eio.Net.t) Conpool.t ->
-
?https_pool:('clock Eio.Time.clock, 'net Eio.Net.t) Conpool.t ->
+
?http_pool:(([> float Eio.Time.clock_ty] as 'clock) Eio.Resource.t,
+
([> [> `Generic] Eio.Net.ty] as 'net) Eio.Resource.t) Conpool.t ->
+
?https_pool:('clock Eio.Resource.t, 'net Eio.Resource.t) Conpool.t ->
?cookie_jar:Cookeio.jar ->
?default_headers:Headers.t ->
?auth:Auth.t ->
···
?connection_lifetime:float ->
?retry:Retry.config ->
?persist_cookies:bool ->
-
?enable_cache:bool ->
?xdg:Xdge.t ->
< clock: 'clock Eio.Resource.t; net: 'net Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; .. > ->
-
('clock Eio.Resource.t, 'net Eio.Resource.t) t
+
t
(** Create a new requests instance with persistent state and connection pooling.
All resources are bound to the provided switch and will be cleaned up automatically.
···
@param connection_lifetime Max lifetime of any pooled connection (default: 300s)
@param retry Retry configuration for failed requests
@param persist_cookies Whether to persist cookies to disk (default: false)
-
@param enable_cache Whether to enable HTTP caching (default: false)
-
@param xdg XDG directory context for cookies/cache (required if persist_cookies or enable_cache)
+
@param xdg XDG directory context for cookies (required if persist_cookies=true)
+
+
{b Note:} HTTP caching has been disabled for simplicity. See CACHEIO.md for integration notes
+
if you need to restore caching functionality in the future.
*)
(** {2 Configuration Management} *)
-
val set_default_header : ('clock, 'net) t -> string -> string -> ('clock, 'net) t
+
val set_default_header : t -> string -> string -> t
(** Add or update a default header. Returns a new session with the updated header.
The original session's connection pools are shared. *)
-
val remove_default_header : ('clock, 'net) t -> string -> ('clock, 'net) t
+
val remove_default_header : t -> string -> t
(** Remove a default header. Returns a new session without the header. *)
-
val set_auth : ('clock, 'net) t -> Auth.t -> ('clock, 'net) t
+
val set_auth : t -> Auth.t -> t
(** Set default authentication. Returns a new session with auth configured. *)
-
val clear_auth : ('clock, 'net) t -> ('clock, 'net) t
+
val clear_auth : t -> t
(** Clear authentication. Returns a new session without auth. *)
-
val set_timeout : ('clock, 'net) t -> Timeout.t -> ('clock, 'net) t
+
val set_timeout : t -> Timeout.t -> t
(** Set default timeout. Returns a new session with the timeout configured. *)
-
val set_retry : ('clock, 'net) t -> Retry.config -> ('clock, 'net) t
+
val set_retry : t -> Retry.config -> t
(** Set retry configuration. Returns a new session with retry configured. *)
(** {2 Request Methods}
···
*)
val request :
-
(_ Eio.Time.clock, _ Eio.Net.t) t ->
+
t ->
?headers:Headers.t ->
?body:Body.t ->
?auth:Auth.t ->
···
(** Make a concurrent HTTP request *)
val get :
-
(_ Eio.Time.clock, _ Eio.Net.t) t ->
+
t ->
?headers:Headers.t ->
?auth:Auth.t ->
?timeout:Timeout.t ->
···
(** Concurrent GET request *)
val post :
-
(_ Eio.Time.clock, _ Eio.Net.t) t ->
+
t ->
?headers:Headers.t ->
?body:Body.t ->
?auth:Auth.t ->
···
(** Concurrent POST request *)
val put :
-
(_ Eio.Time.clock, _ Eio.Net.t) t ->
+
t ->
?headers:Headers.t ->
?body:Body.t ->
?auth:Auth.t ->
···
(** Concurrent PUT request *)
val patch :
-
(_ Eio.Time.clock, _ Eio.Net.t) t ->
+
t ->
?headers:Headers.t ->
?body:Body.t ->
?auth:Auth.t ->
···
(** Concurrent PATCH request *)
val delete :
-
(_ Eio.Time.clock, _ Eio.Net.t) t ->
+
t ->
?headers:Headers.t ->
?auth:Auth.t ->
?timeout:Timeout.t ->
···
(** Concurrent DELETE request *)
val head :
-
(_ Eio.Time.clock, _ Eio.Net.t) t ->
+
t ->
?headers:Headers.t ->
?auth:Auth.t ->
?timeout:Timeout.t ->
···
(** Concurrent HEAD request *)
val options :
-
(_ Eio.Time.clock, _ Eio.Net.t) t ->
+
t ->
?headers:Headers.t ->
?auth:Auth.t ->
?timeout:Timeout.t ->
···
(** {2 Cookie Management} *)
-
val cookies : ('clock, 'net) t -> Cookeio.jar
+
val cookies : t -> Cookeio.jar
(** Get the cookie jar for direct manipulation *)
-
val clear_cookies : ('clock, 'net) t -> unit
+
val clear_cookies : t -> unit
(** Clear all cookies *)
(** {1 Cmdliner Integration} *)
···
user_agent : string option; (** User-Agent header *)
}
-
val create : config -> < clock: ([> float Eio.Time.clock_ty ] as 'clock) Eio.Resource.t; net: ([> [>`Generic] Eio.Net.ty ] as 'net) Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; .. > -> Eio.Switch.t -> ('clock Eio.Resource.t, 'net Eio.Resource.t) t
+
val create : config ->
+
< clock: [> float Eio.Time.clock_ty] Eio.Resource.t;
+
net: [> [> `Generic] Eio.Net.ty] Eio.Resource.t;
+
fs: Eio.Fs.dir_ty Eio.Path.t; .. > ->
+
Eio.Switch.t -> t
(** [create config env sw] creates a requests instance from command-line configuration *)
(** {2 Individual Terms} *)
···
Cmd.eval cmd
]} *)
-
val requests_term : string -> < clock: ([> float Eio.Time.clock_ty ] as 'clock) Eio.Resource.t; net: ([> [>`Generic] Eio.Net.ty ] as 'net) Eio.Resource.t; fs: Eio.Fs.dir_ty Eio.Path.t; .. > -> Eio.Switch.t -> ('clock Eio.Resource.t, 'net Eio.Resource.t) t Cmdliner.Term.t
+
val requests_term : string ->
+
< clock: [> float Eio.Time.clock_ty] Eio.Resource.t;
+
net: [> [> `Generic] Eio.Net.ty] Eio.Resource.t;
+
fs: Eio.Fs.dir_ty Eio.Path.t; .. > ->
+
Eio.Switch.t -> t Cmdliner.Term.t
(** [requests_term app_name env sw] creates a term that directly produces a requests instance.
This is a convenience function that combines configuration parsing
···
(** Timeout configuration for requests *)
module Timeout = Timeout
-
-
(** HTTP caching with cache control and range request support *)
-
module Cache = Cache
(** {2 Logging} *)
-1
stack/requests/requests.opam
···
"ca-certs"
"mirage-crypto-rng-eio"
"uri"
-
"yojson"
"digestif"
"base64"
"logs"
-18
stack/requests/test/dune
···
-
(test
-
(name test_requests)
-
(libraries
-
requests
-
alcotest
-
eio
-
eio_main
-
cohttp
-
cohttp-eio
-
uri
-
yojson
-
logs
-
str)
-
(deps
-
(package requests)))(executable
-
(name test_connection_pool)
-
(modules test_connection_pool)
-
(libraries requests eio_main logs logs.fmt conpool))
-52
stack/requests/test/test_connection_pool.ml
···
-
(** Test stateless One API - each request opens a fresh connection *)
-
-
open Eio.Std
-
-
let test_one_stateless () =
-
(* Initialize RNG for TLS *)
-
Mirage_crypto_rng_unix.use_default ();
-
-
Eio_main.run @@ fun env ->
-
Switch.run @@ fun sw ->
-
-
(* Configure logging to see One request activity *)
-
Logs.set_reporter (Logs_fmt.reporter ());
-
Logs.set_level (Some Logs.Info);
-
Logs.Src.set_level Requests.One.src (Some Logs.Info);
-
-
traceln "=== Testing One Stateless API ===\n";
-
traceln "The One API creates fresh connections for each request (no pooling)\n";
-
-
(* Make multiple requests to the same host using stateless One API *)
-
let start_time = Unix.gettimeofday () in
-
-
for i = 1 to 10 do
-
traceln "Request %d:" i;
-
let response = Requests.One.get ~sw
-
~clock:env#clock ~net:env#net
-
"http://example.com"
-
in
-
-
traceln " Status: %d" (Requests.Response.status_code response);
-
traceln " Content-Length: %s"
-
(match Requests.Response.content_length response with
-
| Some len -> Int64.to_string len
-
| None -> "unknown");
-
-
(* Connection is fresh for each request - no pooling *)
-
traceln ""
-
done;
-
-
let elapsed = Unix.gettimeofday () -. start_time in
-
traceln "All 10 requests completed in %.2f seconds" elapsed;
-
traceln "Average: %.2f seconds per request" (elapsed /. 10.0);
-
-
traceln "\n=== Test completed successfully ==="
-
-
let () =
-
try
-
test_one_stateless ()
-
with e ->
-
traceln "Test failed with exception: %s" (Printexc.to_string e);
-
Printexc.print_backtrace stdout;
-
exit 1
-899
stack/requests/test/test_requests.ml
···
-
open Eio_main
-
-
let port = ref 8088
-
-
let get_free_port () =
-
let p = !port in
-
incr port;
-
p
-
-
let string_contains s sub =
-
try
-
let _ = Str.search_forward (Str.regexp_string sub) s 0 in
-
true
-
with Not_found -> false
-
-
module Test_server = struct
-
open Cohttp_eio
-
-
let make_server ~port handler env =
-
let server_socket =
-
Eio.Net.listen env#net ~sw:env#sw ~backlog:128 ~reuse_addr:true
-
(`Tcp (Eio.Net.Ipaddr.V4.loopback, port))
-
in
-
let callback _conn req body =
-
let (resp, body_content) = handler ~request:req ~body in
-
Server.respond_string () ~status:(Http.Response.status resp)
-
~headers:(Http.Response.headers resp)
-
~body:body_content
-
in
-
let server = Server.make ~callback () in
-
Server.run server_socket server ~on_error:(fun exn ->
-
Logs.err (fun m -> m "Server error: %s" (Printexc.to_string exn))
-
)
-
-
let echo_handler ~request ~body =
-
let uri = Http.Request.resource request in
-
let meth = Http.Request.meth request in
-
let headers = Http.Request.headers request in
-
let body_str = Eio.Flow.read_all body in
-
-
let response_body =
-
`Assoc [
-
"method", `String (Cohttp.Code.string_of_method meth);
-
"uri", `String uri;
-
"headers", `Assoc (
-
Cohttp.Header.to_lines headers
-
|> List.map (fun line ->
-
match String.split_on_char ':' line with
-
| [k; v] -> (String.trim k, `String (String.trim v))
-
| _ -> ("", `String line)
-
)
-
);
-
"body", `String body_str;
-
]
-
|> Yojson.Basic.to_string
-
in
-
-
let resp = Http.Response.make ~status:`OK () in
-
let resp_headers = Cohttp.Header.add_unless_exists
-
(Http.Response.headers resp) "content-type" "application/json"
-
in
-
({ resp with headers = resp_headers }, response_body)
-
-
let status_handler status_code ~request:_ ~body:_ =
-
let status = Cohttp.Code.status_of_code status_code in
-
let resp = Http.Response.make ~status () in
-
(resp, "")
-
-
let redirect_handler target_path ~request:_ ~body:_ =
-
let resp = Http.Response.make ~status:`Moved_permanently () in
-
let headers = Cohttp.Header.add
-
(Http.Response.headers resp) "location" target_path
-
in
-
({ resp with headers }, "")
-
-
let cookie_handler ~request ~body:_ =
-
let headers = Http.Request.headers request in
-
let cookies =
-
match Cohttp.Header.get headers "cookie" with
-
| Some cookie_str -> cookie_str
-
| None -> "no cookies"
-
in
-
-
let resp = Http.Response.make ~status:`OK () in
-
let resp_headers =
-
Http.Response.headers resp
-
|> (fun h -> Cohttp.Header.add h "set-cookie" "test_cookie=test_value; Path=/")
-
|> (fun h -> Cohttp.Header.add h "set-cookie" "session=abc123; Path=/; HttpOnly")
-
in
-
({ resp with headers = resp_headers },
-
cookies)
-
-
let auth_handler ~request ~body:_ =
-
let headers = Http.Request.headers request in
-
let auth_result =
-
match Cohttp.Header.get headers "authorization" with
-
| Some auth ->
-
if String.starts_with ~prefix:"Bearer " auth then
-
let token = String.sub auth 7 (String.length auth - 7) in
-
if token = "valid_token" then "authorized"
-
else "invalid token"
-
else if String.starts_with ~prefix:"Basic " auth then
-
"basic auth received"
-
else "unknown auth"
-
| None -> "no auth"
-
in
-
-
let status =
-
if auth_result = "authorized" || auth_result = "basic auth received"
-
then `OK
-
else `Unauthorized
-
in
-
let resp = Http.Response.make ~status () in
-
(resp, auth_result)
-
-
let json_handler ~request:_ ~body =
-
let body_str = Eio.Flow.read_all body in
-
let json =
-
try
-
let parsed = Yojson.Basic.from_string body_str in
-
`Assoc [
-
"received", parsed;
-
"echo", `Bool true;
-
]
-
with _ ->
-
`Assoc [
-
"error", `String "invalid json";
-
"received", `String body_str;
-
]
-
in
-
-
let resp = Http.Response.make ~status:`OK () in
-
let resp_headers = Cohttp.Header.add_unless_exists
-
(Http.Response.headers resp) "content-type" "application/json"
-
in
-
({ resp with headers = resp_headers },
-
Yojson.Basic.to_string json)
-
-
let timeout_handler clock delay ~request:_ ~body:_ =
-
Eio.Time.sleep clock delay;
-
let resp = Http.Response.make ~status:`OK () in
-
(resp,"delayed response")
-
-
let chunked_handler _clock chunks ~request:_ ~body:_ =
-
let resp = Http.Response.make ~status:`OK () in
-
let body_str = String.concat "" chunks in
-
(resp,body_str)
-
-
let large_response_handler size ~request:_ ~body:_ =
-
let data = String.make size 'X' in
-
let resp = Http.Response.make ~status:`OK () in
-
(resp,data)
-
-
let multipart_handler ~request ~body =
-
let headers = Http.Request.headers request in
-
let content_type = Cohttp.Header.get headers "content-type" in
-
let body_str = Eio.Flow.read_all body in
-
-
let result =
-
match content_type with
-
| Some ct when String.starts_with ~prefix:"multipart/form-data" ct ->
-
Printf.sprintf "Multipart received: %d bytes" (String.length body_str)
-
| _ -> "Not multipart"
-
in
-
-
let resp = Http.Response.make ~status:`OK () in
-
(resp,result)
-
-
let router clock ~request ~body =
-
let uri = Http.Request.resource request in
-
match uri with
-
| "/" | "/echo" -> echo_handler ~request ~body
-
| "/status/200" -> status_handler 200 ~request ~body
-
| "/status/404" -> status_handler 404 ~request ~body
-
| "/status/500" -> status_handler 500 ~request ~body
-
| "/redirect" -> redirect_handler "/redirected" ~request ~body
-
| "/redirected" ->
-
let resp = Http.Response.make ~status:`OK () in
-
(resp,"redirect successful")
-
| "/cookies" -> cookie_handler ~request ~body
-
| "/auth" -> auth_handler ~request ~body
-
| "/json" -> json_handler ~request ~body
-
| "/timeout" -> timeout_handler clock 2.0 ~request ~body
-
| "/chunked" ->
-
chunked_handler clock ["chunk1"; "chunk2"; "chunk3"] ~request ~body
-
| "/large" -> large_response_handler 10000 ~request ~body
-
| "/multipart" -> multipart_handler ~request ~body
-
| _ -> status_handler 404 ~request ~body
-
-
let start_server ~port env =
-
Eio.Fiber.fork ~sw:env#sw (fun () ->
-
make_server ~port (router env#clock) env
-
);
-
Eio.Time.sleep env#clock 0.1
-
end
-
-
let test_get_request () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let response = Requests.get req (base_url ^ "/echo") in
-
-
Alcotest.(check int) "GET status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
let json = Yojson.Basic.from_string body_str in
-
let method_str =
-
json |> Yojson.Basic.Util.member "method" |> Yojson.Basic.Util.to_string
-
in
-
-
Alcotest.(check string) "GET method" "GET" method_str
-
-
let test_post_request () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let body = Requests.Body.text "test post data" in
-
let response = Requests.post req ~body (base_url ^ "/echo") in
-
-
Alcotest.(check int) "POST status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
let json = Yojson.Basic.from_string body_str in
-
let received_body =
-
json |> Yojson.Basic.Util.member "body" |> Yojson.Basic.Util.to_string
-
in
-
-
Alcotest.(check string) "POST body" "test post data" received_body
-
-
let test_put_request () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let body = Requests.Body.text "put data" in
-
let response = Requests.put req ~body (base_url ^ "/echo") in
-
-
Alcotest.(check int) "PUT status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
let json = Yojson.Basic.from_string body_str in
-
let method_str =
-
json |> Yojson.Basic.Util.member "method" |> Yojson.Basic.Util.to_string
-
in
-
-
Alcotest.(check string) "PUT method" "PUT" method_str
-
-
let test_delete_request () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let response = Requests.delete req (base_url ^ "/echo") in
-
-
Alcotest.(check int) "DELETE status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
let json = Yojson.Basic.from_string body_str in
-
let method_str =
-
json |> Yojson.Basic.Util.member "method" |> Yojson.Basic.Util.to_string
-
in
-
-
Alcotest.(check string) "DELETE method" "DELETE" method_str
-
-
let test_patch_request () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let body = Requests.Body.of_string Requests.Mime.json {|{"patch": "data"}|} in
-
let response = Requests.patch req ~body (base_url ^ "/echo") in
-
-
Alcotest.(check int) "PATCH status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
let json = Yojson.Basic.from_string body_str in
-
let method_str =
-
json |> Yojson.Basic.Util.member "method" |> Yojson.Basic.Util.to_string
-
in
-
-
Alcotest.(check string) "PATCH method" "PATCH" method_str
-
-
let test_head_request () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let response = Requests.head req (base_url ^ "/echo") in
-
-
Alcotest.(check int) "HEAD status" 200 (Requests.Response.status_code response)
-
-
let test_options_request () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let response = Requests.options req (base_url ^ "/echo") in
-
-
Alcotest.(check int) "OPTIONS status" 200 (Requests.Response.status_code response)
-
-
let test_custom_headers () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let headers =
-
Requests.Headers.empty
-
|> Requests.Headers.set "X-Custom-Header" "custom-value"
-
|> Requests.Headers.set "User-Agent" "test-agent"
-
in
-
let response = Requests.get req ~headers (base_url ^ "/echo") in
-
-
Alcotest.(check int) "Headers status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
let json = Yojson.Basic.from_string body_str in
-
let headers_obj = json |> Yojson.Basic.Util.member "headers" in
-
-
let custom_header =
-
headers_obj
-
|> Yojson.Basic.Util.member "x-custom-header"
-
|> Yojson.Basic.Util.to_string_option
-
|> Option.value ~default:""
-
in
-
-
Alcotest.(check string) "Custom header" "custom-value" custom_header
-
-
let test_query_params () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let params = [("key1", "value1"); ("key2", "value2")] in
-
let response = Requests.get req ~params (base_url ^ "/echo") in
-
-
Alcotest.(check int) "Query params status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
let json = Yojson.Basic.from_string body_str in
-
let uri = json |> Yojson.Basic.Util.member "uri" |> Yojson.Basic.Util.to_string in
-
-
Alcotest.(check bool) "Query params present" true
-
(string_contains uri "key1=value1" && string_contains uri "key2=value2")
-
-
let test_json_body () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let json_data = {|{"name": "test", "value": 42}|} in
-
let body = Requests.Body.of_string Requests.Mime.json json_data in
-
let response = Requests.post req ~body (base_url ^ "/json") in
-
-
Alcotest.(check int) "JSON body status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
let json = Yojson.Basic.from_string body_str in
-
let received = json |> Yojson.Basic.Util.member "received" in
-
let name = received |> Yojson.Basic.Util.member "name" |> Yojson.Basic.Util.to_string in
-
-
Alcotest.(check string) "JSON field" "test" name
-
-
let test_form_data () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let form_data = [("field1", "value1"); ("field2", "value2")] in
-
let body = Requests.Body.form form_data in
-
let response = Requests.post req ~body (base_url ^ "/echo") in
-
-
Alcotest.(check int) "Form data status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
let json = Yojson.Basic.from_string body_str in
-
let received_body =
-
json |> Yojson.Basic.Util.member "body" |> Yojson.Basic.Util.to_string
-
in
-
-
Alcotest.(check bool) "Form data encoded" true
-
(string_contains received_body "field1=value1" &&
-
string_contains received_body "field2=value2")
-
-
let test_status_codes () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
-
let resp_200 = Requests.get req (base_url ^ "/status/200") in
-
Alcotest.(check int) "Status 200" 200 (Requests.Response.status_code resp_200);
-
-
let resp_404 = Requests.get req (base_url ^ "/status/404") in
-
Alcotest.(check int) "Status 404" 404 (Requests.Response.status_code resp_404);
-
-
let resp_500 = Requests.get req (base_url ^ "/status/500") in
-
Alcotest.(check int) "Status 500" 500 (Requests.Response.status_code resp_500)
-
-
let test_redirects () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw ~follow_redirects:true env in
-
let response = Requests.get req (base_url ^ "/redirect") in
-
-
Alcotest.(check int) "Redirect followed" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
Alcotest.(check string) "Redirect result" "redirect successful" body_str
-
-
let test_no_redirect () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let response = Requests.request req ~follow_redirects:false ~method_:`GET (base_url ^ "/redirect") in
-
-
Alcotest.(check int) "Redirect not followed" 301
-
(Requests.Response.status_code response)
-
-
let test_cookies () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
-
let _first_response = Requests.get req (base_url ^ "/cookies") in
-
-
let second_response = Requests.get req (base_url ^ "/cookies") in
-
let body_str = Requests.Response.body second_response |> Eio.Flow.read_all in
-
-
Alcotest.(check bool) "Cookies sent back" true
-
(string_contains body_str "test_cookie=test_value")
-
-
let test_bearer_auth () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let auth = Requests.Auth.bearer ~token:"valid_token" in
-
let response = Requests.get req ~auth (base_url ^ "/auth") in
-
-
Alcotest.(check int) "Bearer auth status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
Alcotest.(check string) "Bearer auth result" "authorized" body_str
-
-
let test_basic_auth () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let auth = Requests.Auth.basic ~username:"user" ~password:"pass" in
-
let response = Requests.get req ~auth (base_url ^ "/auth") in
-
-
Alcotest.(check int) "Basic auth status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
Alcotest.(check string) "Basic auth result" "basic auth received" body_str
-
-
let test_timeout () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let timeout = Requests.Timeout.create ~total:0.5 () in
-
-
let exception_raised =
-
try
-
let _ = Requests.get req ~timeout (base_url ^ "/timeout") in
-
false
-
with _ -> true
-
in
-
-
Alcotest.(check bool) "Timeout triggered" true exception_raised
-
-
let test_concurrent_requests () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
-
let r1 = ref None in
-
let r2 = ref None in
-
let r3 = ref None in
-
-
Eio.Fiber.all [
-
(fun () -> r1 := Some (Requests.get req (base_url ^ "/status/200")));
-
(fun () -> r2 := Some (Requests.get req (base_url ^ "/status/404")));
-
(fun () -> r3 := Some (Requests.get req (base_url ^ "/status/500")));
-
];
-
-
let r1 = Option.get !r1 in
-
let r2 = Option.get !r2 in
-
let r3 = Option.get !r3 in
-
-
Alcotest.(check int) "Concurrent 1" 200 (Requests.Response.status_code r1);
-
Alcotest.(check int) "Concurrent 2" 404 (Requests.Response.status_code r2);
-
Alcotest.(check int) "Concurrent 3" 500 (Requests.Response.status_code r3)
-
-
let test_large_response () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let response = Requests.get req (base_url ^ "/large") in
-
-
Alcotest.(check int) "Large response status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
Alcotest.(check int) "Large response size" 10000 (String.length body_str)
-
-
let test_one_module () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let response = Requests.One.get ~sw
-
~clock:env#clock ~net:env#net
-
(base_url ^ "/echo")
-
in
-
-
Alcotest.(check int) "One module status" 200 (Requests.Response.status_code response)
-
-
let test_multipart () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let parts = [
-
{ Requests.Body.name = "field1";
-
filename = None;
-
content_type = Requests.Mime.text;
-
content = `String "value1" };
-
{ Requests.Body.name = "field2";
-
filename = Some "test.txt";
-
content_type = Requests.Mime.text;
-
content = `String "file content" };
-
] in
-
let body = Requests.Body.multipart parts in
-
let response = Requests.post req ~body (base_url ^ "/multipart") in
-
-
Alcotest.(check int) "Multipart status" 200 (Requests.Response.status_code response);
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
Alcotest.(check bool) "Multipart recognized" true
-
(String.starts_with ~prefix:"Multipart received:" body_str)
-
-
let test_response_headers () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
let response = Requests.get req (base_url ^ "/json") in
-
-
let content_type =
-
Requests.Response.headers response
-
|> Requests.Headers.get "content-type"
-
in
-
-
Alcotest.(check (option string)) "Response content-type"
-
(Some "application/json") content_type
-
-
let test_default_headers () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let default_headers =
-
Requests.Headers.empty
-
|> Requests.Headers.set "X-Default" "default-value"
-
in
-
let req = Requests.create ~sw ~default_headers env in
-
let response = Requests.get req (base_url ^ "/echo") in
-
-
let body_str = Requests.Response.body response |> Eio.Flow.read_all in
-
let json = Yojson.Basic.from_string body_str in
-
let headers_obj = json |> Yojson.Basic.Util.member "headers" in
-
-
let default_header =
-
headers_obj
-
|> Yojson.Basic.Util.member "x-default"
-
|> Yojson.Basic.Util.to_string_option
-
|> Option.value ~default:""
-
in
-
-
Alcotest.(check string) "Default header present" "default-value" default_header
-
-
let test_session_persistence () =
-
run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let port = get_free_port () in
-
let base_url = Printf.sprintf "http://127.0.0.1:%d" port in
-
-
let test_env = object
-
method clock = env#clock
-
method net = env#net
-
method sw = sw
-
end in
-
Test_server.start_server ~port test_env;
-
-
let req = Requests.create ~sw env in
-
-
let req = Requests.set_default_header req "X-Session" "session-123" in
-
-
let auth = Requests.Auth.bearer ~token:"test_token" in
-
let req = Requests.set_auth req auth in
-
-
let response1 = Requests.get req (base_url ^ "/echo") in
-
let body_str1 = Requests.Response.body response1 |> Eio.Flow.read_all in
-
let json1 = Yojson.Basic.from_string body_str1 in
-
let headers1 = json1 |> Yojson.Basic.Util.member "headers" in
-
-
let session_header =
-
headers1
-
|> Yojson.Basic.Util.member "x-session"
-
|> Yojson.Basic.Util.to_string_option
-
|> Option.value ~default:""
-
in
-
-
Alcotest.(check string) "Session header persisted" "session-123" session_header;
-
-
let req = Requests.remove_default_header req "X-Session" in
-
-
let response2 = Requests.get req (base_url ^ "/echo") in
-
let body_str2 = Requests.Response.body response2 |> Eio.Flow.read_all in
-
let json2 = Yojson.Basic.from_string body_str2 in
-
let headers2 = json2 |> Yojson.Basic.Util.member "headers" in
-
-
let session_header2 =
-
headers2
-
|> Yojson.Basic.Util.member "x-session"
-
|> Yojson.Basic.Util.to_string_option
-
in
-
-
Alcotest.(check (option string)) "Session header removed" None session_header2
-
-
let () =
-
Logs.set_reporter (Logs.format_reporter ());
-
Logs.set_level (Some Logs.Warning);
-
-
let open Alcotest in
-
run "Requests Tests" [
-
"HTTP Methods", [
-
test_case "GET request" `Quick test_get_request;
-
test_case "POST request" `Quick test_post_request;
-
test_case "PUT request" `Quick test_put_request;
-
test_case "DELETE request" `Quick test_delete_request;
-
test_case "PATCH request" `Quick test_patch_request;
-
test_case "HEAD request" `Quick test_head_request;
-
test_case "OPTIONS request" `Quick test_options_request;
-
];
-
"Request Features", [
-
test_case "Custom headers" `Quick test_custom_headers;
-
test_case "Query parameters" `Quick test_query_params;
-
test_case "JSON body" `Quick test_json_body;
-
test_case "Form data" `Quick test_form_data;
-
test_case "Multipart upload" `Quick test_multipart;
-
test_case "Default headers" `Quick test_default_headers;
-
];
-
"Response Handling", [
-
test_case "Status codes" `Quick test_status_codes;
-
test_case "Response headers" `Quick test_response_headers;
-
test_case "Large response" `Quick test_large_response;
-
];
-
"Redirects", [
-
test_case "Follow redirects" `Quick test_redirects;
-
test_case "No follow redirects" `Quick test_no_redirect;
-
];
-
"Authentication", [
-
test_case "Bearer auth" `Quick test_bearer_auth;
-
test_case "Basic auth" `Quick test_basic_auth;
-
];
-
"Session Features", [
-
test_case "Cookies" `Quick test_cookies;
-
test_case "Session persistence" `Quick test_session_persistence;
-
];
-
"Advanced", [
-
test_case "Timeout handling" `Quick test_timeout;
-
test_case "Concurrent requests" `Quick test_concurrent_requests;
-
test_case "One module" `Quick test_one_module;
-
];
-
]
+2 -1
stack/requests_json_api/dune-project
···
dune
requests
eio
-
ezjsonm))
+
jsont
+
bytesrw))
+1 -1
stack/requests_json_api/lib/dune
···
(library
(public_name requests_json_api)
(name requests_json_api)
-
(libraries requests eio ezjsonm))
+
(libraries requests eio jsont jsont.bytesrw))
+83 -15
stack/requests_json_api/lib/requests_json_api.ml
···
(** {1 JSON Helpers} *)
-
let parse_json parser body_str =
-
Ezjsonm.from_string body_str |> parser
+
let parse_json decoder body_str =
+
match Jsont_bytesrw.decode_string' decoder body_str with
+
| Ok v -> v
+
| Error e -> failwith (Fmt.str "JSON parse error: %s" (Jsont.Error.to_string e))
-
let parse_json_result parser body_str =
-
try Ok (parse_json parser body_str)
-
with exn -> Error (Printexc.to_string exn)
+
let parse_json_result decoder body_str =
+
match Jsont_bytesrw.decode_string' decoder body_str with
+
| Ok v -> Ok v
+
| Error e -> Error (Jsont.Error.to_string e)
-
let get_json_exn session url parser =
+
let get_json_exn session url decoder =
let response = Requests.get session url in
let status = Requests.Response.status_code response in
if status < 200 || status >= 300 then
failwith (Printf.sprintf "HTTP %d" status);
-
read_body response |> parse_json parser
+
read_body response |> parse_json decoder
-
let get_json session url parser =
+
let get_json session url decoder =
match get_result session url with
| Ok body ->
-
(match parse_json_result parser body with
+
(match parse_json_result decoder body with
| Ok result -> Ok result
| Error msg -> Error (`Json_error msg))
| Error (status, body) -> Error (`Http (status, body))
-
let post_json session url json_value =
-
let body_str = Ezjsonm.value_to_string json_value in
+
let post_json session url jsont_codec value =
+
let body_str = match Jsont_bytesrw.encode_string' jsont_codec value with
+
| Ok s -> s
+
| Error e -> failwith (Fmt.str "JSON encoding error: %s" (Jsont.Error.to_string e))
+
in
let body = Requests.Body.of_string Requests.Mime.json body_str in
Requests.post session url ~body
-
let post_json_exn session url json_value =
-
let response = post_json session url json_value in
+
let post_json_exn session url jsont_codec value =
+
let response = post_json session url jsont_codec value in
let status = Requests.Response.status_code response in
if status < 200 || status >= 300 then
failwith (Printf.sprintf "HTTP %d" status);
read_body response
-
let post_json_result session url json_value =
+
let post_json_result session url jsont_codec value =
try
-
let response = post_json session url json_value in
+
let response = post_json session url jsont_codec value in
check_2xx response
with exn ->
Error (0, Printexc.to_string exn)
+
+
let post_json_decode_exn session url ~req req_value ~resp =
+
let response = post_json session url req req_value in
+
let status = Requests.Response.status_code response in
+
if status < 200 || status >= 300 then
+
failwith (Printf.sprintf "HTTP %d" status);
+
read_body response |> parse_json resp
+
+
let post_json_decode session url ~req req_value ~resp =
+
try
+
let response = post_json session url req req_value in
+
match check_2xx response with
+
| Ok body ->
+
(match parse_json_result resp body with
+
| Ok result -> Ok result
+
| Error msg -> Error (`Json_error msg))
+
| Error (status, body) -> Error (`Http (status, body))
+
with exn ->
+
Error (`Http (0, Printexc.to_string exn))
+
+
let put_json_exn session url jsont_codec value =
+
let body_str = match Jsont_bytesrw.encode_string' jsont_codec value with
+
| Ok s -> s
+
| Error e -> failwith (Fmt.str "JSON encoding error: %s" (Jsont.Error.to_string e))
+
in
+
let body = Requests.Body.of_string Requests.Mime.json body_str in
+
let response = Requests.put session url ~body in
+
let status = Requests.Response.status_code response in
+
if status < 200 || status >= 300 then
+
failwith (Printf.sprintf "HTTP %d" status);
+
read_body response
+
+
let put_json_decode_exn session url ~req req_value ~resp =
+
let body_str = match Jsont_bytesrw.encode_string' req req_value with
+
| Ok s -> s
+
| Error e -> failwith (Fmt.str "JSON encoding error: %s" (Jsont.Error.to_string e))
+
in
+
let body = Requests.Body.of_string Requests.Mime.json body_str in
+
let response = Requests.put session url ~body in
+
let status = Requests.Response.status_code response in
+
if status < 200 || status >= 300 then
+
failwith (Printf.sprintf "HTTP %d" status);
+
read_body response |> parse_json resp
+
+
let patch_json_exn session url jsont_codec value =
+
let body_str = match Jsont_bytesrw.encode_string' jsont_codec value with
+
| Ok s -> s
+
| Error e -> failwith (Fmt.str "JSON encoding error: %s" (Jsont.Error.to_string e))
+
in
+
let body = Requests.Body.of_string Requests.Mime.json body_str in
+
let response = Requests.patch session url ~body in
+
let status = Requests.Response.status_code response in
+
if status < 200 || status >= 300 then
+
failwith (Printf.sprintf "HTTP %d" status);
+
read_body response
+
+
let delete_json_exn session url =
+
let response = Requests.delete session url in
+
let status = Requests.Response.status_code response in
+
if status < 200 || status >= 300 then
+
failwith (Printf.sprintf "HTTP %d" status);
+
read_body response
(** {1 URL Helpers} *)
+51 -13
stack/requests_json_api/lib/requests_json_api.mli
···
{[
open Requests_json_api
+
(* Define a Jsont codec for your type *)
+
type user = { id : int; name : string }
+
+
let user_jsont =
+
Jsont.Object.map (fun id name -> { id; name })
+
|> Jsont.Object.mem "id" Jsont.int ~enc:(fun u -> u.id)
+
|> Jsont.Object.mem "name" Jsont.string ~enc:(fun u -> u.name)
+
|> Jsont.Object.finish
+
+
let users_jsont = Jsont.list user_jsont
+
let fetch_users session =
-
get_json_exn session (base_url / "users") parse_users
+
get_json_exn session (base_url / "users") users_jsont
]}
*)
(** {1 JSON Request Helpers} *)
-
val get_json_exn : (_ Eio.Time.clock, _ Eio.Net.t) Requests.t -> string -> (Ezjsonm.value -> 'a) -> 'a
-
(** [get_json_exn session url parser] makes a GET request, checks status is 2xx,
-
reads and parses JSON body, then applies the parser function.
+
val get_json_exn : Requests.t -> string -> 'a Jsont.t -> 'a
+
(** [get_json_exn session url decoder] makes a GET request, checks status is 2xx,
+
reads and parses JSON body using the provided Jsont decoder.
Raises [Failure] on any error (HTTP, network, or JSON parse). *)
-
val get_json : (_ Eio.Time.clock, _ Eio.Net.t) Requests.t -> string -> (Ezjsonm.value -> 'a) ->
+
val get_json : Requests.t -> string -> 'a Jsont.t ->
('a, [> `Http of int * string | `Json_error of string]) result
(** Like [get_json_exn] but returns [Result] instead of raising exceptions.
Returns [Ok parsed_value] on success, or [Error] with details on failure. *)
-
val post_json : (_ Eio.Time.clock, _ Eio.Net.t) Requests.t -> string -> Ezjsonm.value -> Requests.Response.t
-
(** [post_json session url json_value] creates a JSON request body and POSTs it to the URL.
+
val post_json : Requests.t -> string -> 'a Jsont.t -> 'a -> Requests.Response.t
+
(** [post_json session url codec value] encodes [value] using the Jsont codec and POSTs it to the URL.
Returns the raw response for custom handling. *)
-
val post_json_exn : (_ Eio.Time.clock, _ Eio.Net.t) Requests.t -> string -> Ezjsonm.value -> string
+
val post_json_exn : Requests.t -> string -> 'a Jsont.t -> 'a -> string
(** Like [post_json] but checks status is 2xx and returns the response body as a string.
Raises [Failure] on non-2xx status. *)
-
val post_json_result : (_ Eio.Time.clock, _ Eio.Net.t) Requests.t -> string -> Ezjsonm.value ->
+
val post_json_result : Requests.t -> string -> 'a Jsont.t -> 'a ->
(string, int * string) result
(** Like [post_json_exn] but returns [Result] instead of raising.
[Ok body] on 2xx status, [Error (status, body)] otherwise. *)
+
val post_json_decode_exn : Requests.t -> string ->
+
req:'a Jsont.t -> 'a -> resp:'b Jsont.t -> 'b
+
(** [post_json_decode_exn session url ~req req_value ~resp] encodes [req_value] using the [req] codec,
+
POSTs it to the URL, checks status is 2xx, and decodes the response using the [resp] codec.
+
Raises [Failure] on any error. *)
+
+
val post_json_decode : Requests.t -> string ->
+
req:'a Jsont.t -> 'a -> resp:'b Jsont.t ->
+
('b, [> `Http of int * string | `Json_error of string]) result
+
(** Like [post_json_decode_exn] but returns [Result] instead of raising. *)
+
+
val put_json_exn : Requests.t -> string -> 'a Jsont.t -> 'a -> string
+
(** [put_json_exn session url codec value] encodes [value] and PUTs it to the URL.
+
Returns response body. Raises [Failure] on non-2xx status. *)
+
+
val put_json_decode_exn : Requests.t -> string ->
+
req:'a Jsont.t -> 'a -> resp:'b Jsont.t -> 'b
+
(** Like [post_json_decode_exn] but uses PUT method. *)
+
+
val patch_json_exn : Requests.t -> string -> 'a Jsont.t -> 'a -> string
+
(** [patch_json_exn session url codec value] encodes [value] and PATCHes it to the URL.
+
Returns response body. Raises [Failure] on non-2xx status. *)
+
+
val delete_json_exn : Requests.t -> string -> string
+
(** [delete_json_exn session url] makes a DELETE request.
+
Returns response body. Raises [Failure] on non-2xx status. *)
+
(** {1 JSON Parsing Helpers} *)
-
val parse_json : (Ezjsonm.value -> 'a) -> string -> 'a
-
(** [parse_json parser body_str] parses a JSON string and applies the parser function.
+
val parse_json : 'a Jsont.t -> string -> 'a
+
(** [parse_json decoder body_str] parses a JSON string using the provided Jsont decoder.
Raises exception on parse error. *)
-
val parse_json_result : (Ezjsonm.value -> 'a) -> string -> ('a, string) result
+
val parse_json_result : 'a Jsont.t -> string -> ('a, string) result
(** Like [parse_json] but returns [Result] on parse error instead of raising. *)
(** {1 Low-Level Helpers} *)
···
(** [read_body response] reads the entire response body as a string.
Equivalent to [Requests.Response.body response |> Eio.Flow.read_all] *)
-
val get_result : (_ Eio.Time.clock, _ Eio.Net.t) Requests.t -> string -> (string, int * string) result
+
val get_result : Requests.t -> string -> (string, int * string) result
(** [get_result session url] makes a GET request and returns the result.
Returns [Ok body] on 2xx status, [Error (status, body)] otherwise. *)
+2 -1
stack/requests_json_api/requests_json_api.opam
···
"dune" {>= "3.0"}
"requests"
"eio"
-
"ezjsonm"
+
"jsont"
+
"bytesrw"
"odoc" {with-doc}
]
build: [
+1 -1
stack/river/bin/dune
···
(executable
(public_name river-cli)
(name river_cli)
-
(libraries river cmdliner yojson fmt fmt.tty fmt.cli eio eio_main eiocmd unix ptime syndic xdge))
+
(libraries river cmdliner jsont jsont.bytesrw fmt fmt.tty fmt.cli eio eio_main eiocmd unix ptime syndic xdge))
+36 -38
stack/river/bin/river_cli.ml
···
with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()
) dirs
-
let user_of_json json =
-
let open Yojson.Safe.Util in
-
try
-
let feeds_json = json |> member "feeds" |> to_list in
-
let feeds = List.map (fun feed ->
-
{ River.name = feed |> member "name" |> to_string;
-
url = feed |> member "url" |> to_string }
-
) feeds_json in
-
Some {
-
username = json |> member "username" |> to_string;
-
fullname = json |> member "fullname" |> to_string;
-
email = json |> member "email" |> to_string_option;
-
feeds;
-
last_synced = json |> member "last_synced" |> to_string_option;
-
}
-
with _ -> None
+
(* JSON codecs for user data *)
+
+
(* Codec for River.source (feed) *)
+
let source_jsont =
+
let make name url = { River.name; url } in
+
Jsont.Object.map ~kind:"Source" make
+
|> Jsont.Object.mem "name" Jsont.string ~enc:(fun s -> s.River.name)
+
|> Jsont.Object.mem "url" Jsont.string ~enc:(fun s -> s.River.url)
+
|> Jsont.Object.finish
+
+
(* Codec for user *)
+
let user_jsont =
+
let make username fullname email feeds last_synced =
+
{ username; fullname; email; feeds; last_synced }
+
in
+
Jsont.Object.map ~kind:"User" make
+
|> Jsont.Object.mem "username" Jsont.string ~enc:(fun u -> u.username)
+
|> Jsont.Object.mem "fullname" Jsont.string ~enc:(fun u -> u.fullname)
+
|> Jsont.Object.opt_mem "email" Jsont.string ~enc:(fun u -> u.email)
+
|> Jsont.Object.mem "feeds" (Jsont.list source_jsont) ~enc:(fun u -> u.feeds)
+
|> Jsont.Object.opt_mem "last_synced" Jsont.string ~enc:(fun u -> u.last_synced)
+
|> Jsont.Object.finish
+
+
let user_of_string s =
+
match Jsont_bytesrw.decode_string' user_jsont s with
+
| Ok user -> Some user
+
| Error err ->
+
Log.err (fun m -> m "Failed to parse user JSON: %s" (Jsont.Error.to_string err));
+
None
+
+
let user_to_string user =
+
match Jsont_bytesrw.encode_string' ~format:Jsont.Indent user_jsont user with
+
| Ok s -> s
+
| Error err -> failwith ("Failed to encode user: " ^ Jsont.Error.to_string err)
let load_user state username =
let file = user_file state username in
try
let content = Eio.Path.load file in
-
let json = Yojson.Safe.from_string content in
-
user_of_json json
+
user_of_string content
with
| Eio.Io (Eio.Fs.E (Not_found _), _) -> None
| e ->
Log.err (fun m -> m "Error loading user %s: %s" username (Printexc.to_string e));
None
-
let user_to_json user =
-
let feeds_json = List.map (fun feed ->
-
`Assoc [
-
"name", `String feed.River.name;
-
"url", `String feed.River.url;
-
]
-
) user.feeds in
-
`Assoc [
-
"username", `String user.username;
-
"fullname", `String user.fullname;
-
"email", (match user.email with
-
| Some e -> `String e
-
| None -> `Null);
-
"feeds", `List feeds_json;
-
"last_synced", (match user.last_synced with
-
| Some s -> `String s
-
| None -> `Null);
-
]
-
let save_user state user =
let file = user_file state user.username in
-
let json = user_to_json user |> Yojson.Safe.to_string ~std:true in
+
let json = user_to_string user in
Eio.Path.save ~create:(`Or_truncate 0o644) file json
let list_users state =
+1 -2
stack/river/dune-project
···
lambdasoup
uri
(cmdliner (>= 2.0.0))
-
yojson
fmt
xdge
(jsonfeed (>= 1.1.0))
(jsont (>= 0.2.0))
-
bytesrw
+
(jsont.bytesrw (>= 0.2.0))
(odoc :with-doc)))
+1 -2
stack/river/lib/client.ml
···
module Log = (val Logs.src_log src : Logs.LOG)
type t = {
-
session : (float Eio.Time.clock_ty Eio.Resource.t,
-
[`Generic | `Unix] Eio.Net.ty Eio.Resource.t) Requests.t;
+
session : Requests.t;
}
let create ~sw (env : _ ) =
+1 -2
stack/river/lib/client.mli
···
(** [session t] returns the underlying Requests session.
This is used internally by River's HTTP functions. *)
-
val session : t -> (float Eio.Time.clock_ty Eio.Resource.t,
-
[`Generic | `Unix] Eio.Net.ty Eio.Resource.t) Requests.t
+
val session : t -> Requests.t
+1 -1
stack/river/lib/dune
···
(name river)
(public_name river)
(wrapped false)
-
(libraries eio eio_main requests requests_json_api logs str syndic lambdasoup uri ptime jsonfeed jsont bytesrw))
+
(libraries eio eio_main requests requests_json_api logs str syndic lambdasoup uri ptime jsonfeed jsont jsont.bytesrw cacheio xdge))
+264 -189
stack/river/lib/river_store.ml
···
(*
-
* Persistent storage for Atom feed entries using Cacheio
+
* Persistent storage for Atom feed entries using Cacheio and Jsonfeed
*)
let src = Logs.Src.create "river.store" ~doc:"River persistent storage"
···
(* Types *)
-
type stored_entry = {
-
atom_id : string;
-
title : string;
-
link : Uri.t option;
-
published : Ptime.t option;
-
updated : Ptime.t;
-
author_name : string;
-
author_email : string option;
-
content : string;
+
(* Storage metadata that extends Jsonfeed.Item via unknown fields *)
+
type storage_meta = {
feed_url : string;
feed_name : string;
feed_title : string;
stored_at : Ptime.t;
-
tags : string list;
-
summary : string option;
+
}
+
+
(* A stored entry is a Jsonfeed.Item.t with storage metadata in unknown fields *)
+
type stored_entry = {
+
item : Jsonfeed.Item.t;
+
meta : storage_meta;
}
+
+
(* Stored entry accessors *)
+
let entry_item entry = entry.item
+
let entry_feed_url entry = entry.meta.feed_url
+
let entry_feed_name entry = entry.meta.feed_name
+
let entry_feed_title entry = entry.meta.feed_title
+
let entry_stored_at entry = entry.meta.stored_at
type feed_info = {
url : string;
···
let feed_key = make_feed_key feed_url in
feed_key ^ "/meta.json"
-
(* JSON serialization *)
+
(* JSON serialization using Jsonfeed and Jsont *)
-
let entry_to_json entry =
-
`Assoc [
-
"atom_id", `String entry.atom_id;
-
"title", `String entry.title;
-
"link", (match entry.link with
-
| Some u -> `String (Uri.to_string u)
-
| None -> `Null);
-
"published", (match entry.published with
-
| Some t -> `String (Ptime.to_rfc3339 t)
-
| None -> `Null);
-
"updated", `String (Ptime.to_rfc3339 entry.updated);
-
"author_name", `String entry.author_name;
-
"author_email", (match entry.author_email with Some e -> `String e | None -> `Null);
-
"content", `String entry.content;
-
"feed_url", `String entry.feed_url;
-
"feed_name", `String entry.feed_name;
-
"feed_title", `String entry.feed_title;
-
"stored_at", `String (Ptime.to_rfc3339 entry.stored_at);
-
"tags", `List (List.map (fun t -> `String t) entry.tags);
-
"summary", (match entry.summary with Some s -> `String s | None -> `Null);
-
]
+
(* Storage metadata codec - stores feed info and storage timestamp *)
+
let storage_meta_jsont : storage_meta Jsont.t =
+
Jsont.Object.(
+
map ~kind:"StorageMeta" (fun feed_url feed_name feed_title stored_at : storage_meta ->
+
{ feed_url; feed_name; feed_title; stored_at })
+
|> mem "x_river_feed_url" Jsont.string ~enc:(fun m -> m.feed_url)
+
|> mem "x_river_feed_name" Jsont.string ~enc:(fun m -> m.feed_name)
+
|> mem "x_river_feed_title" Jsont.string ~enc:(fun m -> m.feed_title)
+
|> mem "x_river_stored_at" Jsonfeed.Rfc3339.jsont ~enc:(fun m -> m.stored_at)
+
|> finish
+
)
-
let entry_of_json json =
-
let open Yojson.Safe.Util in
-
let parse_time s =
-
match Ptime.of_rfc3339 s with
-
| Ok (t, _, _) -> t
-
| Error _ -> failwith ("Invalid timestamp: " ^ s)
+
(* Codec for feed_info *)
+
let feed_meta_jsont : feed_info Jsont.t =
+
Jsont.Object.(
+
map ~kind:"FeedInfo" (fun url name title last_updated entry_count : feed_info ->
+
{ url; name; title; last_updated; entry_count })
+
|> mem "url" Jsont.string ~enc:(fun (m : feed_info) -> m.url)
+
|> mem "name" Jsont.string ~enc:(fun m -> m.name)
+
|> mem "title" Jsont.string ~enc:(fun m -> m.title)
+
|> mem "last_updated" Jsonfeed.Rfc3339.jsont ~enc:(fun m -> m.last_updated)
+
|> mem "entry_count" Jsont.int ~enc:(fun m -> m.entry_count)
+
|> finish
+
)
+
+
(* Helper to create item with storage metadata in unknown fields *)
+
let merge_storage_meta item meta =
+
let meta_json = Jsont_bytesrw.encode_string' storage_meta_jsont meta
+
|> Result.get_ok in
+
let meta_unknown = Jsont_bytesrw.decode_string' Jsont.json meta_json
+
|> Result.get_ok in
+
Jsonfeed.Item.create
+
~id:(Jsonfeed.Item.id item)
+
~content:(Jsonfeed.Item.content item)
+
?url:(Jsonfeed.Item.url item)
+
?external_url:(Jsonfeed.Item.external_url item)
+
?title:(Jsonfeed.Item.title item)
+
?summary:(Jsonfeed.Item.summary item)
+
?image:(Jsonfeed.Item.image item)
+
?banner_image:(Jsonfeed.Item.banner_image item)
+
?date_published:(Jsonfeed.Item.date_published item)
+
?date_modified:(Jsonfeed.Item.date_modified item)
+
?authors:(Jsonfeed.Item.authors item)
+
?tags:(Jsonfeed.Item.tags item)
+
?language:(Jsonfeed.Item.language item)
+
?attachments:(Jsonfeed.Item.attachments item)
+
?references:(Jsonfeed.Item.references item)
+
~unknown:meta_unknown
+
()
+
+
(* Helper to extract storage metadata from item's unknown fields *)
+
let extract_storage_meta item =
+
let unknown = Jsonfeed.Item.unknown item in
+
let meta_str = Jsont_bytesrw.encode_string' Jsont.json unknown |> Result.get_ok in
+
match Jsont_bytesrw.decode_string' storage_meta_jsont meta_str with
+
| Ok meta -> meta
+
| Error e -> failwith ("Missing storage metadata: " ^ Jsont.Error.to_string e)
+
+
(* Stored entry codec - just wraps Jsonfeed.Item.jsont *)
+
let stored_entry_jsont : stored_entry Jsont.t =
+
let kind = "StoredEntry" in
+
let of_string s =
+
match Jsont_bytesrw.decode_string' Jsonfeed.Item.jsont s with
+
| Ok item -> Ok { item; meta = extract_storage_meta item }
+
| Error e -> Error (Jsont.Error.to_string e)
+
in
+
let enc entry =
+
let item_with_meta = merge_storage_meta entry.item entry.meta in
+
match Jsont_bytesrw.encode_string' Jsonfeed.Item.jsont item_with_meta with
+
| Ok s -> s
+
| Error e -> failwith ("Failed to encode: " ^ Jsont.Error.to_string e)
in
-
{
-
atom_id = json |> member "atom_id" |> to_string;
-
title = json |> member "title" |> to_string;
-
link = json |> member "link" |> to_string_option |> Option.map Uri.of_string;
-
published = json |> member "published" |> to_string_option |> Option.map parse_time;
-
updated = json |> member "updated" |> to_string |> parse_time;
-
author_name = json |> member "author_name" |> to_string;
-
author_email = json |> member "author_email" |> to_string_option;
-
content = json |> member "content" |> to_string;
-
feed_url = json |> member "feed_url" |> to_string;
-
feed_name = json |> member "feed_name" |> to_string;
-
feed_title = json |> member "feed_title" |> to_string;
-
stored_at = json |> member "stored_at" |> to_string |> parse_time;
-
tags = (try json |> member "tags" |> to_list |> List.map to_string with _ -> []);
-
summary = (try json |> member "summary" |> to_string_option with _ -> None);
-
}
+
Jsont.of_of_string ~kind of_string ~enc
+
+
(* Encode/decode functions *)
+
let entry_to_string entry =
+
match Jsont_bytesrw.encode_string' stored_entry_jsont entry with
+
| Ok s -> s
+
| Error err -> failwith ("Failed to encode entry: " ^ Jsont.Error.to_string err)
+
+
let entry_of_string s =
+
match Jsont_bytesrw.decode_string' stored_entry_jsont s with
+
| Ok entry -> entry
+
| Error err -> failwith ("Failed to parse entry: " ^ Jsont.Error.to_string err)
-
let feed_meta_to_json meta =
-
`Assoc [
-
"url", `String meta.url;
-
"name", `String meta.name;
-
"title", `String meta.title;
-
"last_updated", `String (Ptime.to_rfc3339 meta.last_updated);
-
]
+
let feed_meta_to_string meta =
+
match Jsont_bytesrw.encode_string' feed_meta_jsont meta with
+
| Ok s -> s
+
| Error err -> failwith ("Failed to encode feed metadata: " ^ Jsont.Error.to_string err)
-
let feed_meta_of_json json =
-
let open Yojson.Safe.Util in
-
let parse_time s =
-
match Ptime.of_rfc3339 s with
-
| Ok (t, _, _) -> t
-
| Error _ -> failwith ("Invalid timestamp: " ^ s)
-
in
-
{
-
url = json |> member "url" |> to_string;
-
name = json |> member "name" |> to_string;
-
title = json |> member "title" |> to_string;
-
last_updated = json |> member "last_updated" |> to_string |> parse_time;
-
entry_count = 0; (* Will be counted separately *)
-
}
+
let feed_meta_of_string s =
+
match Jsont_bytesrw.decode_string' feed_meta_jsont s with
+
| Ok meta -> meta
+
| Error err -> failwith ("Failed to parse feed metadata: " ^ Jsont.Error.to_string err)
(* Store creation *)
···
Log.info (fun m -> m "Created River store with XDG at %a" Eio.Path.pp base_dir);
{ cache; base_dir }
-
(* Convert Post.t to stored_entry *)
-
let entry_of_post ~feed_url ~feed_name ~feed_title (post : Post.t) =
-
let atom_id = post.id in (* Use the post's unique ID *)
-
let updated = match post.date with
-
| Some d -> d
-
| None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get
+
(* Convert Post.t to Jsonfeed.Item.t *)
+
let item_of_post ~feed_url ~feed_name ~feed_title (post : Post.t) =
+
let content =
+
let html = Soup.to_string post.content in
+
`Html html
in
-
let published = post.date in
-
{
-
atom_id;
-
title = post.title;
-
link = post.link;
-
published;
-
updated;
-
author_name = post.author;
-
author_email = if post.email = "" then None else Some post.email;
-
content = Soup.to_string post.content;
+
let url = Option.map Uri.to_string post.link in
+
let authors =
+
if post.author = "" then None
+
else Some [Jsonfeed.Author.create ~name:post.author ()]
+
in
+
let tags = if post.tags = [] then None else Some post.tags in
+
let item = Jsonfeed.Item.create
+
~id:post.id
+
~content
+
?url
+
?title:(if post.title = "" then None else Some post.title)
+
?summary:post.summary
+
?date_published:post.date
+
?date_modified:post.date
+
?authors
+
?tags
+
()
+
in
+
let meta = {
feed_url;
feed_name;
feed_title;
stored_at = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get;
-
tags = post.tags;
-
summary = post.summary;
-
}
+
} in
+
{ item; meta }
-
(* Convert Syndic.Atom.entry to stored_entry *)
-
let entry_of_atom ~feed_url ~feed_name ~feed_title (atom_entry : Syndic.Atom.entry) =
+
(* Convert Syndic.Atom.entry to Jsonfeed.Item.t *)
+
let item_of_atom ~feed_url ~feed_name ~feed_title (atom_entry : Syndic.Atom.entry) =
let atom_id = Uri.to_string atom_entry.id in
-
let updated = atom_entry.updated in
-
let published = match atom_entry.published with
+
let date_modified = atom_entry.updated in
+
let date_published = match atom_entry.published with
| Some p -> Some p
| None -> Some atom_entry.updated
in
-
(* Extract author info - Syndic doesn't expose person record fields,
-
so we'll use placeholders and reconstruct via Atom.author later *)
-
let content = match atom_entry.content with
-
| Some (Syndic.Atom.Text s) -> s
-
| Some (Syndic.Atom.Html (_, s)) -> s
+
(* Extract content *)
+
let content_html = match atom_entry.content with
+
| Some (Syndic.Atom.Text s) -> Some s
+
| Some (Syndic.Atom.Html (_, s)) -> Some s
| Some (Syndic.Atom.Xhtml (_, nodes)) ->
let ns_prefix _ = Some "" in
-
String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) nodes)
-
| Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None ->
-
(match atom_entry.summary with
-
| Some (Syndic.Atom.Text s) -> s
-
| Some (Syndic.Atom.Html (_, s)) -> s
-
| Some (Syndic.Atom.Xhtml (_, nodes)) ->
-
let ns_prefix _ = Some "" in
-
String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) nodes)
-
| None -> "")
+
Some (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) nodes))
+
| Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> None
+
in
+
let content_text = match atom_entry.summary with
+
| Some s -> Some (Util.string_of_text_construct s)
+
| None -> None
+
in
+
let content = match content_html, content_text with
+
| Some h, Some t -> `Both (h, t)
+
| Some h, None -> `Html h
+
| None, Some t -> `Text t
+
| None, None -> `Text "" (* Fallback *)
in
-
let link = try
-
Some (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) atom_entry.links).href
+
let url = try
+
Some (Uri.to_string (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) atom_entry.links).href)
with Not_found ->
match atom_entry.links with
-
| l :: _ -> Some l.href
+
| l :: _ -> Some (Uri.to_string l.href)
| [] -> None
in
-
(* Extract tags from categories *)
-
let tags = List.map (fun cat -> cat.Syndic.Atom.term) atom_entry.categories in
-
(* Extract summary *)
+
let tags =
+
let cat_tags = List.map (fun cat -> cat.Syndic.Atom.term) atom_entry.categories in
+
if cat_tags = [] then None else Some cat_tags
+
in
let summary = match atom_entry.summary with
| Some s -> Some (Util.string_of_text_construct s)
| None -> None
in
-
{
-
atom_id;
-
title = Util.string_of_text_construct atom_entry.title;
-
link;
-
published;
-
updated;
-
author_name = feed_name; (* Use feed name as fallback *)
-
author_email = None;
-
content;
+
let item = Jsonfeed.Item.create
+
~id:atom_id
+
~content
+
?url
+
~title:(Util.string_of_text_construct atom_entry.title)
+
?summary
+
?date_published
+
~date_modified
+
?tags
+
()
+
in
+
let meta = {
feed_url;
feed_name;
feed_title;
stored_at = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get;
-
tags;
-
summary;
-
}
+
} in
+
{ item; meta }
(* Feed metadata management *)
let update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw:_ =
···
last_updated = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get;
entry_count = 0;
} in
-
let json = feed_meta_to_json meta |> Yojson.Safe.to_string in
-
let source = Eio.Flow.string_source json in
+
let json_str = feed_meta_to_string meta in
+
let source = Eio.Flow.string_source json_str in
Cacheio.put store.cache ~key ~source ~ttl:None ();
Log.debug (fun m -> m "Updated feed metadata for %s" feed_url)
···
| Some source ->
try
let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
-
let json = Yojson.Safe.from_string json_str in
-
Some (feed_meta_of_json json)
+
Some (feed_meta_of_string json_str)
with e ->
Log.err (fun m -> m "Failed to parse feed metadata: %s" (Printexc.to_string e));
None
···
(* Entry storage *)
let store_entry store ~feed_url ~feed_name ~feed_title ~post ~sw =
-
let entry = entry_of_post ~feed_url ~feed_name ~feed_title post in
-
let key = make_entry_key feed_url entry.atom_id in
-
let json = entry_to_json entry |> Yojson.Safe.to_string in
-
let source = Eio.Flow.string_source json in
+
let entry = item_of_post ~feed_url ~feed_name ~feed_title post in
+
let key = make_entry_key feed_url (Jsonfeed.Item.id entry.item) in
+
let json_str = entry_to_string entry in
+
let source = Eio.Flow.string_source json_str in
Cacheio.put store.cache ~key ~source ~ttl:None ();
-
Log.debug (fun m -> m "Stored entry %s for feed %s" entry.atom_id feed_url);
+
Log.debug (fun m -> m "Stored entry %s for feed %s" (Jsonfeed.Item.id entry.item) feed_url);
(* Update feed metadata *)
update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw
···
let store_atom_entries store ~feed_url ~feed_name ~feed_title ~entries ~sw =
Log.info (fun m -> m "Storing %d Atom entries for feed %s" (List.length entries) feed_url);
List.iter (fun atom_entry ->
-
let entry = entry_of_atom ~feed_url ~feed_name ~feed_title atom_entry in
-
let key = make_entry_key feed_url entry.atom_id in
-
let json = entry_to_json entry |> Yojson.Safe.to_string in
-
let source = Eio.Flow.string_source json in
+
let entry = item_of_atom ~feed_url ~feed_name ~feed_title atom_entry in
+
let key = make_entry_key feed_url (Jsonfeed.Item.id entry.item) in
+
let json_str = entry_to_string entry in
+
let source = Eio.Flow.string_source json_str in
Cacheio.put store.cache ~key ~source ~ttl:None ();
-
Log.debug (fun m -> m "Stored Atom entry %s" entry.atom_id);
+
Log.debug (fun m -> m "Stored Atom entry %s" (Jsonfeed.Item.id entry.item));
) entries;
update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw;
Log.info (fun m -> m "Stored %d Atom entries for feed %s" (List.length entries) feed_url)
···
| Some source ->
try
let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
-
let json = Yojson.Safe.from_string json_str in
-
Some (entry_of_json json)
+
Some (entry_of_string json_str)
with e ->
Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e));
None
···
| Some source ->
try
let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
-
let json = Yojson.Safe.from_string json_str in
-
Some (entry_of_json json)
+
Some (entry_of_string json_str)
with e ->
Log.err (fun m -> m "Failed to parse entry from scan: %s" (Printexc.to_string e));
None
else None
) entries in
-
(* Sort by updated time, newest first *)
-
List.sort (fun a b -> Ptime.compare b.updated a.updated) feed_entries
+
(* Sort by date_modified, newest first *)
+
List.sort (fun a b ->
+
let time_a = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in
+
let time_b = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in
+
Ptime.compare time_b time_a
+
) feed_entries
let list_entries_filtered store ~feed_url ?since ?until ?limit ?(sort=`Updated) () =
let entries = list_entries store ~feed_url in
(* Filter by time *)
let entries = match since with
| None -> entries
-
| Some t -> List.filter (fun e -> Ptime.is_later e.updated ~than:t || Ptime.equal e.updated t) entries
+
| Some t -> List.filter (fun e ->
+
let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in
+
Ptime.is_later time ~than:t || Ptime.equal time t) entries
in
let entries = match until with
| None -> entries
-
| Some t -> List.filter (fun e -> Ptime.is_earlier e.updated ~than:t || Ptime.equal e.updated t) entries
+
| Some t -> List.filter (fun e ->
+
let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in
+
Ptime.is_earlier time ~than:t || Ptime.equal time t) entries
in
(* Sort *)
let entries = match sort with
| `Published -> List.sort (fun a b ->
-
match a.published, b.published with
-
| Some pa, Some pb -> Ptime.compare pb pa
+
let pa = Jsonfeed.Item.date_published a.item in
+
let pb = Jsonfeed.Item.date_published b.item in
+
match pa, pb with
+
| Some ta, Some tb -> Ptime.compare tb ta
| None, Some _ -> 1
| Some _, None -> -1
-
| None, None -> Ptime.compare b.updated a.updated
+
| None, None ->
+
let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in
+
let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in
+
Ptime.compare tb ta
+
) entries
+
| `Updated -> List.sort (fun a b ->
+
let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in
+
let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in
+
Ptime.compare tb ta
) entries
-
| `Updated -> List.sort (fun a b -> Ptime.compare b.updated a.updated) entries
-
| `Stored -> List.sort (fun a b -> Ptime.compare b.stored_at a.stored_at) entries
+
| `Stored -> List.sort (fun a b -> Ptime.compare b.meta.stored_at a.meta.stored_at) entries
in
(* Limit *)
match limit with
···
| Some source ->
try
let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
-
let json = Yojson.Safe.from_string json_str in
-
Some (entry_of_json json)
+
Some (entry_of_string json_str)
with e ->
Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e));
None
else None
) entries in
-
let sorted = List.sort (fun a b -> Ptime.compare b.updated a.updated) all_entries in
+
let sorted = List.sort (fun a b ->
+
let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in
+
let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in
+
Ptime.compare tb ta
+
) all_entries in
List.filteri (fun i _ -> i < limit) sorted
let find_entry_by_id store ~id =
···
| Some source ->
(try
let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
-
let json = Yojson.Safe.from_string json_str in
-
let entry = entry_of_json json in
+
let entry = entry_of_string json_str in
(* Exact ID match only *)
-
if entry.atom_id = id then
+
if Jsonfeed.Item.id entry.item = id then
Some entry
else
None
···
else None
) entries in
(match matching_entry with
-
| Some e -> Log.debug (fun m -> m "Found entry: %s" e.title)
+
| Some e -> Log.debug (fun m -> m "Found entry: %s"
+
(Jsonfeed.Item.title e.item |> Option.value ~default:"(no title)"))
| None -> Log.debug (fun m -> m "No entry found with ID: %s" id));
matching_entry
···
let entries = list_entries store ~feed_url in
let to_delete = List.filteri (fun i _ -> i >= keep) entries in
List.iter (fun entry ->
-
delete_entry store ~feed_url ~atom_id:entry.atom_id
+
delete_entry store ~feed_url ~atom_id:(Jsonfeed.Item.id entry.item)
) to_delete;
let deleted = List.length to_delete in
Log.info (fun m -> m "Pruned %d entries from feed %s (kept %d)" deleted feed_url keep);
···
let prune_old_entries store ~feed_url ~older_than =
let entries = list_entries store ~feed_url in
let to_delete = List.filter (fun e ->
-
Ptime.is_earlier e.updated ~than:older_than
+
let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in
+
Ptime.is_earlier time ~than:older_than
) entries in
List.iter (fun entry ->
-
delete_entry store ~feed_url ~atom_id:entry.atom_id
+
delete_entry store ~feed_url ~atom_id:(Jsonfeed.Item.id entry.item)
) to_delete;
let deleted = List.length to_delete in
Log.info (fun m -> m "Pruned %d old entries from feed %s" deleted feed_url);
···
| Some source ->
try
let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
-
let json = Yojson.Safe.from_string json_str in
-
Some (feed_meta_of_json json)
+
Some (feed_meta_of_string json_str)
with e ->
Log.err (fun m -> m "Failed to parse feed metadata: %s" (Printexc.to_string e));
None
···
| Some n -> list_entries_filtered store ~feed_url ~limit:n ()
in
let atom_entries = List.map (fun entry ->
-
let id = Uri.of_string entry.atom_id in
-
let entry_title : Syndic.Atom.text_construct = Syndic.Atom.Text entry.title in
-
let links = match entry.link with
-
| Some uri -> [Syndic.Atom.link ~rel:Syndic.Atom.Alternate uri]
+
let item = entry.item in
+
let id = Uri.of_string (Jsonfeed.Item.id item) in
+
let entry_title : Syndic.Atom.text_construct =
+
Syndic.Atom.Text (Jsonfeed.Item.title item |> Option.value ~default:"(no title)") in
+
let links = match Jsonfeed.Item.url item with
+
| Some url_str -> [Syndic.Atom.link ~rel:Syndic.Atom.Alternate (Uri.of_string url_str)]
| None -> []
in
-
let entry_content : Syndic.Atom.content = Syndic.Atom.Html (None, entry.content) in
-
let author = Syndic.Atom.author ?email:entry.author_email entry.author_name in
+
let content_str = match Jsonfeed.Item.content item with
+
| `Html h -> h
+
| `Text t -> t
+
| `Both (h, _) -> h
+
in
+
let entry_content : Syndic.Atom.content = Syndic.Atom.Html (None, content_str) in
+
let author_name = match Jsonfeed.Item.authors item with
+
| Some (a :: _) -> Jsonfeed.Author.name a |> Option.value ~default:entry.meta.feed_name
+
| _ -> entry.meta.feed_name
+
in
+
let author = Syndic.Atom.author author_name in
let authors = (author, []) in
-
Syndic.Atom.entry ~id ~title:entry_title ~updated:entry.updated ?published:entry.published
+
let updated = Jsonfeed.Item.date_modified item |> Option.value ~default:entry.meta.stored_at in
+
Syndic.Atom.entry ~id ~title:entry_title ~updated
+
?published:(Jsonfeed.Item.date_published item)
~links ~content:entry_content ~authors ()
) entries in
let feed_title : Syndic.Atom.text_construct = match title with
···
let feed_id = Uri.of_string ("urn:river:archive:" ^ (Digest.string feed_url |> Digest.to_hex)) in
let feed_updated = match entries with
| [] -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get
-
| e :: _ -> e.updated
+
| e :: _ -> Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at
in
{
Syndic.Atom.id = feed_id;
···
(* Pretty printing *)
let pp_entry fmt entry =
+
let item = entry.item in
Format.fprintf fmt "@[<v 2>Entry:@,";
-
Format.fprintf fmt "ID: %s@," entry.atom_id;
-
Format.fprintf fmt "Title: %s@," entry.title;
-
Format.fprintf fmt "Link: %s@," (match entry.link with Some u -> Uri.to_string u | None -> "none");
-
Format.fprintf fmt "Published: %s@," (match entry.published with
-
| Some t -> Ptime.to_rfc3339 t
-
| None -> "unknown");
-
Format.fprintf fmt "Updated: %s@," (Ptime.to_rfc3339 entry.updated);
-
Format.fprintf fmt "Feed: %s (%s)@," entry.feed_name entry.feed_url;
-
Format.fprintf fmt "Stored: %s@]" (Ptime.to_rfc3339 entry.stored_at)
+
Format.fprintf fmt "ID: %s@," (Jsonfeed.Item.id item);
+
Format.fprintf fmt "Title: %s@," (Jsonfeed.Item.title item |> Option.value ~default:"(no title)");
+
Format.fprintf fmt "URL: %s@," (Jsonfeed.Item.url item |> Option.value ~default:"(none)");
+
(match Jsonfeed.Item.date_published item with
+
| Some t -> Format.fprintf fmt "Published: %s@," (Ptime.to_rfc3339 t)
+
| None -> ());
+
(match Jsonfeed.Item.date_modified item with
+
| Some t -> Format.fprintf fmt "Modified: %s@," (Ptime.to_rfc3339 t)
+
| None -> ());
+
Format.fprintf fmt "Feed: %s (%s)@," entry.meta.feed_name entry.meta.feed_url;
+
Format.fprintf fmt "Stored: %s@]" (Ptime.to_rfc3339 entry.meta.stored_at)
let pp_feed_info fmt info =
Format.fprintf fmt "@[<v 2>Feed:@,";
···
List.iter (fun feed ->
Format.fprintf fmt " - %s: %d entries@," feed.name feed.entry_count
) feeds;
-
Format.fprintf fmt "@]"
+
Format.fprintf fmt "@]"
+13 -38
stack/river/lib/river_store.mli
···
(** Abstract type representing the store *)
type t
-
(** Stored entry with resolved URLs and metadata *)
-
type stored_entry = {
-
atom_id : string;
-
(** Unique Atom entry ID (used as key) *)
+
(** Stored entry - combines Jsonfeed.Item with storage metadata *)
+
type stored_entry
-
title : string;
-
(** Entry title *)
+
(** {2 Stored Entry Accessors} *)
-
link : Uri.t option;
-
(** Primary link (resolved against feed base URI) *)
+
val entry_item : stored_entry -> Jsonfeed.Item.t
+
(** Get the underlying Jsonfeed Item *)
-
published : Ptime.t option;
-
(** Publication date *)
+
val entry_feed_url : stored_entry -> string
+
(** Get the source feed URL *)
-
updated : Ptime.t;
-
(** Last update time *)
+
val entry_feed_name : stored_entry -> string
+
(** Get the source feed name *)
-
author_name : string;
-
(** Entry author name *)
+
val entry_feed_title : stored_entry -> string
+
(** Get the source feed title *)
-
author_email : string option;
-
(** Entry author email *)
-
-
content : string;
-
(** HTML content with resolved URLs *)
-
-
feed_url : string;
-
(** URL of the source feed *)
-
-
feed_name : string;
-
(** Name of the source feed *)
-
-
feed_title : string;
-
(** Title of the source feed *)
-
-
stored_at : Ptime.t;
-
(** When this entry was stored *)
-
-
tags : string list;
-
(** Tags associated with the entry *)
-
-
summary : string option;
-
(** Summary/excerpt of the entry *)
-
}
+
val entry_stored_at : stored_entry -> Ptime.t
+
(** Get the storage timestamp *)
(** Feed metadata *)
type feed_info = {
+1 -2
stack/river/river.opam
···
"lambdasoup"
"uri"
"cmdliner" {>= "2.0.0"}
-
"yojson"
"fmt"
"xdge"
"jsonfeed" {>= "1.1.0"}
"jsont" {>= "0.2.0"}
-
"bytesrw"
+
"jsont.bytesrw" {>= "0.2.0"}
"odoc" {with-doc}
]
build: [
+1 -1
stack/typesense-client/dune
···
(library
(public_name typesense-client)
(name typesense_client)
-
(libraries eio requests requests_json_api ezjsonm fmt uri ptime))
+
(libraries eio requests requests_json_api jsont jsont.bytesrw fmt uri ptime))
+2 -1
stack/typesense-client/dune-project
···
(ocaml (>= 4.14))
eio
requests
-
ezjsonm
+
jsont
+
jsont-bytesrw
fmt
uri
ptime))
+2 -1
stack/typesense-client/typesense-client.opam
···
"ocaml" {>= "4.14"}
"eio"
"requests"
-
"ezjsonm"
+
"jsont"
+
"jsont-bytesrw"
"fmt"
"uri"
"ptime"
+178 -100
stack/typesense-client/typesense_client.ml
···
api_key : string;
}
-
(** Internal polymorphic type for the client *)
-
type 'net t_internal = {
+
(** Client type *)
+
type t = {
config: config;
-
requests_session: (float Eio.Time.clock_ty Eio.Resource.t, 'net Eio.Net.ty Eio.Resource.t) Requests.t;
+
requests_session: Requests.t;
}
-
-
(** Public client type *)
-
type t = [`Generic | `Unix] t_internal
(** Create a new Typesense client *)
let create ~requests_session ~config : t =
···
Error (Connection_error (Printexc.to_string exn))
(** Search result types *)
+
type highlight = {
+
field: string;
+
snippets: string list;
+
}
+
type search_result = {
id: string;
title: string;
content: string;
score: float;
collection: string;
-
highlights: (string * string list) list;
-
document: Ezjsonm.value; (* Store raw document for flexible field access *)
+
highlights: highlight list;
+
document: Jsont.json; (* Store raw document for flexible field access *)
}
type search_response = {
···
query_time: float;
}
-
(** Parse search result from JSON *)
-
let parse_search_result collection json =
-
let open Ezjsonm in
-
let document = get_dict json |> List.assoc "document" in
-
let highlights = try get_dict json |> List.assoc "highlights" with _ -> `A [] in
-
let score = try get_dict json |> List.assoc "text_match" |> get_float with _ -> 0.0 in
+
(* Jsont codecs *)
+
+
(** Helper to find a field by name in the fields list *)
+
let find_field field_name fields =
+
List.find_opt (fun ((name, _), _value) -> name = field_name) fields
-
let id = get_dict document |> List.assoc "id" |> get_string in
-
let title = try get_dict document |> List.assoc "title" |> get_string with _ -> "" in
-
let content = try
+
module Highlight = struct
+
let make field snippets = { field; snippets }
+
let field h = h.field
+
let snippets h = h.snippets
+
+
let jsont =
+
Jsont.Object.map ~kind:"Highlight" make
+
|> Jsont.Object.mem "field" Jsont.string ~enc:field
+
|> Jsont.Object.mem "snippets" (Jsont.list Jsont.string) ~enc:snippets
+
|> Jsont.Object.finish
+
end
+
+
module Search_result = struct
+
(* Helper to extract content from document based on collection *)
+
let extract_content collection document =
+
let get_string_field field_name =
+
match document with
+
| Jsont.Object (fields, _) ->
+
(match find_field field_name fields with
+
| Some (_, Jsont.String (s, _)) -> s
+
| _ -> "")
+
| _ -> ""
+
in
match collection with
-
| "papers" -> get_dict document |> List.assoc "abstract" |> get_string
-
| "projects" -> get_dict document |> List.assoc "description" |> get_string
-
| "news" -> get_dict document |> List.assoc "content" |> get_string
-
| "videos" -> get_dict document |> List.assoc "description" |> get_string
-
| "notes" -> get_dict document |> List.assoc "content" |> get_string
-
| "ideas" -> get_dict document |> List.assoc "description" |> get_string
-
| "contacts" -> get_dict document |> List.assoc "name" |> get_string
+
| "papers" -> get_string_field "abstract"
+
| "projects" -> get_string_field "description"
+
| "news" -> get_string_field "content"
+
| "videos" -> get_string_field "description"
+
| "notes" -> get_string_field "content"
+
| "ideas" -> get_string_field "description"
+
| "contacts" -> get_string_field "name"
| _ -> ""
-
with _ -> "" in
+
+
let make collection document highlights text_match =
+
let get_string_field field_name =
+
match document with
+
| Jsont.Object (fields, _) ->
+
(match find_field field_name fields with
+
| Some (_, Jsont.String (s, _)) -> s
+
| _ -> "")
+
| _ -> ""
+
in
+
let id = get_string_field "id" in
+
let title = get_string_field "title" in
+
let content = extract_content collection document in
+
let score = Option.value text_match ~default:0.0 in
+
let highlights = Option.value highlights ~default:[] in
+
{ id; title; content; score; collection; highlights; document }
+
+
let document r = r.document
+
let highlights r = if r.highlights = [] then None else Some r.highlights
+
let score r = if r.score = 0.0 then None else Some r.score
+
+
let jsont collection =
+
Jsont.Object.map ~kind:"SearchResult" (make collection)
+
|> Jsont.Object.mem "document" Jsont.json ~enc:document
+
|> Jsont.Object.opt_mem "highlights" (Jsont.list Highlight.jsont) ~enc:highlights
+
|> Jsont.Object.opt_mem "text_match" Jsont.number ~enc:score
+
|> Jsont.Object.finish
+
end
-
let parse_highlights highlights =
-
try
-
get_list (fun h ->
-
let field = get_dict h |> List.assoc "field" |> get_string in
-
let snippets = get_dict h |> List.assoc "snippets" |> get_list get_string in
-
(field, snippets)
-
) highlights
-
with _ -> []
-
in
+
module Search_response = struct
+
let make hits found search_time_ms =
+
{ hits; total = found; query_time = search_time_ms }
-
{ id; title; content; score; collection; highlights = parse_highlights highlights; document }
+
let hits r = r.hits
+
let total r = r.total
+
let query_time r = r.query_time
-
(** Parse search response from JSON *)
-
let parse_search_response collection json =
-
let open Ezjsonm in
-
let hits = get_dict json |> List.assoc "hits" |> get_list (parse_search_result collection) in
-
let total = get_dict json |> List.assoc "found" |> get_int in
-
let query_time = get_dict json |> List.assoc "search_time_ms" |> get_float in
-
{ hits; total; query_time }
+
let jsont collection =
+
Jsont.Object.map ~kind:"SearchResponse" make
+
|> Jsont.Object.mem "hits" (Jsont.list (Search_result.jsont collection)) ~enc:hits
+
|> Jsont.Object.mem "found" Jsont.int ~enc:total
+
|> Jsont.Object.mem "search_time_ms" Jsont.number ~enc:query_time
+
|> Jsont.Object.finish
+
end
(** Search a single collection *)
let search_collection client collection_name query ?(limit=10) ?(offset=0) () =
···
match make_request client path with
| Ok response_str ->
-
(match Requests_json_api.parse_json_result (parse_search_response collection_name) response_str with
+
(match Jsont_bytesrw.decode_string' (Search_response.jsont collection_name) response_str with
| Ok search_response -> Ok search_response
-
| Error msg -> Error (Json_error msg))
+
| Error error -> Error (Json_error (Jsont.Error.to_string error)))
| Error err -> Error err
(** Helper function to drop n elements from list *)
···
results: search_response list;
}
-
(** Parse multisearch response from JSON *)
-
let parse_multisearch_response json =
-
let open Ezjsonm in
-
let results_json = get_dict json |> List.assoc "results" |> get_list (fun r -> r) in
-
let results = List.mapi (fun i result_json ->
-
let collection_name = match i with
-
| 0 -> "contacts"
-
| 1 -> "news"
-
| 2 -> "notes"
-
| 3 -> "papers"
-
| 4 -> "projects"
-
| 5 -> "ideas"
-
| 6 -> "videos"
-
| _ -> "unknown"
-
in
-
parse_search_response collection_name result_json
-
) results_json in
-
{ results }
+
(* Multisearch response decoder - needs special handling for collection names *)
+
let decode_multisearch_response collections json_str =
+
(* First decode as generic JSON *)
+
match Jsont_bytesrw.decode_string' Jsont.json json_str with
+
| Error e -> Error e
+
| Ok json ->
+
(* Extract the results array *)
+
match json with
+
| Jsont.Object (fields, _) ->
+
(match find_field "results" fields with
+
| Some (_, Jsont.Array (results_array, _)) ->
+
(* Decode each result with its corresponding collection name *)
+
let decode_result idx result_json =
+
let collection = List.nth collections idx in
+
match Jsont_bytesrw.encode_string' ~format:Jsont.Minify Jsont.json result_json with
+
| Error e -> Error e
+
| Ok result_str ->
+
Jsont_bytesrw.decode_string' (Search_response.jsont collection) result_str
+
in
+
let rec decode_all idx acc = function
+
| [] -> Ok (List.rev acc)
+
| hd :: tl ->
+
match decode_result idx hd with
+
| Error e -> Error e
+
| Ok result -> decode_all (idx + 1) (result :: acc) tl
+
in
+
(match decode_all 0 [] results_array with
+
| Ok results -> Ok { results }
+
| Error e -> Error e)
+
| _ -> Error (Jsont.Error.msg Jsont.Meta.none "Missing or invalid results field"))
+
| _ -> Error (Jsont.Error.msg Jsont.Meta.none "Expected JSON object")
(** Perform multisearch across all collections *)
let multisearch client query ?(limit=10) () =
···
("videos", "title,description,channel,platform,tags");
] in
+
(* Build search request objects *)
let searches = List.map (fun collection ->
let query_by = List.assoc collection query_by_collection in
-
Ezjsonm.dict [
-
("collection", Ezjsonm.string collection);
-
("q", Ezjsonm.string query);
-
("query_by", Ezjsonm.string query_by);
-
("exclude_fields", Ezjsonm.string "embedding");
-
("per_page", Ezjsonm.int limit);
-
]
+
Jsont.Object ([
+
(("collection", Jsont.Meta.none), Jsont.String (collection, Jsont.Meta.none));
+
(("q", Jsont.Meta.none), Jsont.String (query, Jsont.Meta.none));
+
(("query_by", Jsont.Meta.none), Jsont.String (query_by, Jsont.Meta.none));
+
(("exclude_fields", Jsont.Meta.none), Jsont.String ("embedding", Jsont.Meta.none));
+
(("per_page", Jsont.Meta.none), Jsont.Number (float_of_int limit, Jsont.Meta.none));
+
], Jsont.Meta.none)
) collections in
-
let body = Ezjsonm.dict [("searches", Ezjsonm.list (fun x -> x) searches)] |> Ezjsonm.value_to_string in
+
let request_obj = Jsont.Object ([
+
(("searches", Jsont.Meta.none), Jsont.Array (searches, Jsont.Meta.none));
+
], Jsont.Meta.none) in
-
match make_request client ~meth:`POST ~body "/multi_search" with
-
| Ok response_str ->
-
(match Requests_json_api.parse_json_result parse_multisearch_response response_str with
-
| Ok multisearch_resp -> Ok multisearch_resp
-
| Error msg -> Error (Json_error msg))
-
| Error err -> Error err
+
match Jsont_bytesrw.encode_string' ~format:Jsont.Minify Jsont.json request_obj with
+
| Error encode_error -> Error (Json_error (Jsont.Error.to_string encode_error))
+
| Ok body ->
+
match make_request client ~meth:`POST ~body "/multi_search" with
+
| Ok response_str ->
+
(match decode_multisearch_response collections response_str with
+
| Ok multisearch_resp -> Ok multisearch_resp
+
| Error error -> Error (Json_error (Jsont.Error.to_string error)))
+
| Error err -> Error err
(** Combine multisearch results into single result set *)
let combine_multisearch_results (multisearch_resp : multisearch_response) ?(limit=10) ?(offset=0) () =
···
(** List all collections *)
let list_collections client =
-
let parse_collections json =
-
Ezjsonm.get_list (fun c ->
-
let name = Ezjsonm.get_dict c |> List.assoc "name" |> Ezjsonm.get_string in
-
let num_docs = Ezjsonm.get_dict c |> List.assoc "num_documents" |> Ezjsonm.get_int in
-
(name, num_docs)
-
) json
-
in
+
let module Collection_info = struct
+
let make name num_documents = (name, num_documents)
+
let name ci = fst ci
+
let num_documents ci = snd ci
+
+
let jsont =
+
Jsont.Object.map ~kind:"CollectionInfo" make
+
|> Jsont.Object.mem "name" Jsont.string ~enc:name
+
|> Jsont.Object.mem "num_documents" Jsont.int ~enc:num_documents
+
|> Jsont.Object.finish
+
end in
+
match make_request client "/collections" with
| Ok response_str ->
-
(match Requests_json_api.parse_json_result parse_collections response_str with
+
(match Jsont_bytesrw.decode_string' (Jsont.list Collection_info.jsont) response_str with
| Ok collections -> Ok collections
-
| Error msg -> Error (Json_error msg))
+
| Error error -> Error (Json_error (Jsont.Error.to_string error)))
| Error err -> Error err
(** Pretty printer utilities *)
(** Extract field value from JSON document or return empty string if not found *)
-
let extract_field_string document field =
-
try
-
let open Ezjsonm in
-
get_dict document |> List.assoc field |> get_string
-
with _ -> ""
+
let extract_field_string document field_name =
+
match document with
+
| Jsont.Object (fields, _) ->
+
(match find_field field_name fields with
+
| Some (_, Jsont.String (s, _)) -> s
+
| _ -> "")
+
| _ -> ""
(** Extract field value from JSON document as string list or return empty list if not found *)
-
let extract_field_string_list document field =
-
try
-
let open Ezjsonm in
-
get_dict document |> List.assoc field |> get_list get_string
-
with _ -> []
+
let extract_field_string_list document field_name =
+
match document with
+
| Jsont.Object (fields, _) ->
+
(match find_field field_name fields with
+
| Some (_, Jsont.Array (items, _)) ->
+
List.filter_map (function
+
| Jsont.String (s, _) -> Some s
+
| _ -> None
+
) items
+
| _ -> [])
+
| _ -> []
(** Extract field value from JSON document as boolean or return false if not found *)
-
let extract_field_bool document field =
-
try
-
let open Ezjsonm in
-
get_dict document |> List.assoc field |> get_bool
-
with _ -> false
+
let extract_field_bool document field_name =
+
match document with
+
| Jsont.Object (fields, _) ->
+
(match find_field field_name fields with
+
| Some (_, Jsont.Bool (b, _)) -> b
+
| _ -> false)
+
| _ -> false
(** Format authors list for display *)
let format_authors authors =
+13 -6
stack/typesense-client/typesense_client.mli
···
@param config Configuration with endpoint and API key
@return A client instance *)
val create :
-
requests_session:(float Eio.Time.clock_ty Eio.Resource.t, [`Generic | `Unix] Eio.Net.ty Eio.Resource.t) Requests.t ->
+
requests_session:Requests.t ->
config:config ->
t
···
val pp_error : Format.formatter -> error -> unit
(** Search result types *)
+
+
(** A highlight snippet from a search result *)
+
type highlight = {
+
field: string;
+
snippets: string list;
+
}
+
type search_result = {
id: string;
title: string;
content: string;
score: float;
collection: string;
-
highlights: (string * string list) list;
-
document: Ezjsonm.value; (* Store raw document for flexible field access *)
+
highlights: highlight list;
+
document: Jsont.json; (* Store raw document for flexible field access *)
}
type search_response = {
···
((string * int) list, error) result
(** Pretty printer utilities *)
-
val extract_field_string : Ezjsonm.value -> string -> string
-
val extract_field_string_list : Ezjsonm.value -> string -> string list
-
val extract_field_bool : Ezjsonm.value -> string -> bool
+
val extract_field_string : Jsont.json -> string -> string
+
val extract_field_string_list : Jsont.json -> string -> string list
+
val extract_field_bool : Jsont.json -> string -> bool
val format_authors : string list -> string
val format_date : string -> string
val format_tags : string list -> string
+1 -1
stack/zotero-translation/dune
···
(library
(name zotero_translation)
(public_name zotero-translation)
-
(libraries astring eio requests ezjsonm fpath uri))
+
(libraries astring eio requests jsont jsont.bytesrw fpath uri))
+2 -1
stack/zotero-translation/dune-project
···
uri
eio
requests
-
ezjsonm
+
jsont
+
(jsont-bytesrw (>= "0.4"))
yaml
astring))
+2 -1
stack/zotero-translation/zotero-translation.opam
···
"uri"
"eio"
"requests"
-
"ezjsonm"
+
"jsont"
+
"jsont-bytesrw" {>= "0.4"}
"yaml"
"astring"
"odoc" {with-doc}
+107 -60
stack/zotero-translation/zotero_translation.ml
···
(** Resolve a DOI from a Zotero translation server *)
-
module J = Ezjsonm
-
(* From the ZTS source code: https://github.com/zotero/translation-server/blob/master/src/formats.js
bibtex: "9cb70025-a888-4a29-a210-93ec52da40d4",
biblatex: "b6e39b57-8942-4d11-8259-342c46ce395f",
···
| true -> Uri.of_string (base_uri ^ "import")
| false -> Uri.of_string (base_uri ^ "/import")
-
type ('clock, 'net) t = {
+
type t = {
base_uri: string;
-
requests_session: ('clock, 'net) Requests.t;
+
requests_session: Requests.t;
}
-
let create ~sw ~env ?requests_session base_uri =
-
let requests_session = match requests_session with
-
| Some session -> session
-
| None -> Requests.create ~sw env
-
in
+
let create ~requests_session base_uri =
{ base_uri; requests_session }
-
let v _base_uri =
-
failwith "Zotero_translation.v is deprecated. Use Zotero_translation.create ~sw ~env base_uri instead"
-
let resolve_doi { base_uri; requests_session } doi =
let body_str = "https://doi.org/" ^ doi in
let uri = web_endp base_uri in
···
let body = Requests.Response.body response |> Eio.Flow.read_all in
if status = 200 then begin
try
-
let doi_json = J.from_string body in
-
Ok doi_json
+
match Jsont_bytesrw.decode_string' Jsont.json body with
+
| Ok doi_json -> Ok doi_json
+
| Error e -> Error (`Msg (Jsont.Error.to_string e))
with exn -> Error (`Msg (Printexc.to_string exn))
end else
Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body))
···
let body = Requests.Response.body response |> Eio.Flow.read_all in
if status = 200 then begin
try
-
let url_json = J.from_string body in
-
Ok url_json
+
match Jsont_bytesrw.decode_string' Jsont.json body with
+
| Ok url_json -> Ok url_json
+
| Error e -> Error (`Msg (Jsont.Error.to_string e))
with exn -> Error (`Msg (Printexc.to_string exn))
end else
Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body))
···
let body = Requests.Response.body response |> Eio.Flow.read_all in
if status = 200 then begin
try
-
let doi_json = J.from_string body in
-
Ok doi_json
+
match Jsont_bytesrw.decode_string' Jsont.json body with
+
| Ok doi_json -> Ok doi_json
+
| Error e -> Error (`Msg (Jsont.Error.to_string e))
with exn -> Error (`Msg (Printexc.to_string exn))
end else
Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body))
let export { base_uri; requests_session } format api =
-
let body_str = J.to_string api in
-
let uri = Uri.with_query' (export_endp base_uri ) ["format", (format_to_string format)] in
-
let body = Requests.Body.of_string Requests.Mime.json body_str in
-
let response = Requests.post requests_session ~body (Uri.to_string uri) in
-
let status = Requests.Response.status_code response in
-
let body = Requests.Response.body response |> Eio.Flow.read_all in
-
if status = 200 then begin
-
try
-
match format with
-
| Bibtex -> Ok (Astring.String.trim body)
-
| _ -> Ok body
-
with exn -> Error (`Msg (Printexc.to_string exn))
-
end else
-
Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body))
+
match Jsont_bytesrw.encode_string' Jsont.json api with
+
| Error e -> Error (`Msg (Jsont.Error.to_string e))
+
| Ok body_str ->
+
let uri = Uri.with_query' (export_endp base_uri ) ["format", (format_to_string format)] in
+
let body = Requests.Body.of_string Requests.Mime.json body_str in
+
let response = Requests.post requests_session ~body (Uri.to_string uri) in
+
let status = Requests.Response.status_code response in
+
let body = Requests.Response.body response |> Eio.Flow.read_all in
+
if status = 200 then begin
+
try
+
match format with
+
| Bibtex -> Ok (Astring.String.trim body)
+
| _ -> Ok body
+
with exn -> Error (`Msg (Printexc.to_string exn))
+
end else
+
Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body))
let unescape_hex s =
let buf = Buffer.create (String.length s) in
···
| Ok [bib] ->
let f = Bibtex.fields bib |> Bibtex.SM.bindings |> List.map (fun (k,v) -> k, (unescape_bibtex v)) in
let ty = match Bibtex.type' bib with "inbook" -> "book" | x -> x in
-
let v = List.fold_left (fun acc (k,v) -> (k,(`String v))::acc) ["bibtype",`String ty] f in
+
let v = List.fold_left (fun acc (k,v) -> ((k, Jsont.Meta.none), Jsont.String (v, Jsont.Meta.none))::acc)
+
[(("bibtype", Jsont.Meta.none), Jsont.String (ty, Jsont.Meta.none))] f in
v
| Ok _ -> failwith "one bib at a time plz"
let bib_of_doi zt doi =
prerr_endline ("Fetching " ^ doi);
-
let v = match resolve_doi zt doi with
-
| Ok r -> r
+
match resolve_doi zt doi with
| Error (`Msg _) ->
Printf.eprintf "%s failed on /web, trying to /search\n%!" doi;
-
match search_id zt doi with
+
begin match search_id zt doi with
| Error (`Msg e) -> failwith e
-
| Ok r -> r
-
in
-
match export zt Bibtex v with
-
| Error (`Msg e) -> failwith e
-
| Ok r ->
-
print_endline r;
-
r
+
| Ok v ->
+
match export zt Bibtex v with
+
| Error (`Msg e) -> failwith e
+
| Ok r ->
+
print_endline r;
+
r
+
end
+
| Ok v ->
+
match export zt Bibtex v with
+
| Error (`Msg e) -> failwith e
+
| Ok r ->
+
print_endline r;
+
r
+
+
(* Helper to get string from Jsont.json *)
+
let get_string = function
+
| Jsont.String (s, _) -> s
+
| _ -> failwith "Expected string in JSON"
+
+
(* Helper to get list from Jsont.json *)
+
let get_list f = function
+
| Jsont.Array (arr, _) -> List.map f arr
+
| _ -> failwith "Expected array in JSON"
+
+
(* Helper to find a field in Jsont.Object *)
+
let find_field name = function
+
| Jsont.Object (mems, _) ->
+
List.find_map (fun ((k, _), v) -> if k = name then Some v else None) mems
+
| _ -> None
+
+
(* Helper to get a required field as string *)
+
let get_field name json =
+
match find_field name json with
+
| Some v -> get_string v
+
| None -> failwith ("Missing field: " ^ name)
+
+
(* Helper to update a field in a Jsont.Object *)
+
let update_field name value json =
+
match json with
+
| Jsont.Object (mems, meta) ->
+
let mems' =
+
match value with
+
| None -> List.filter (fun ((k, _), _) -> k <> name) mems
+
| Some v ->
+
let without = List.filter (fun ((k, _), _) -> k <> name) mems in
+
((name, Jsont.Meta.none), v) :: without
+
in
+
Jsont.Object (mems', meta)
+
| _ -> json
let split_authors keys =
+
let json = Jsont.Object (keys, Jsont.Meta.none) in
+
let author_str = get_field "author" json in
let authors =
-
List.assoc "author" keys |> J.get_string |>
-
Astring.String.cuts ~empty:false ~sep:" and " |>
+
Astring.String.cuts ~empty:false ~sep:" and " author_str |>
List.map Bibtex.list_value |>
List.map (fun v -> List.rev v |> String.concat " ") |>
-
List.map (fun x -> `String x)
+
List.map (fun x -> Jsont.String (x, Jsont.Meta.none))
in
let keywords =
-
List.assoc_opt "keywords" keys |> function
+
match find_field "keywords" json with
| None -> []
| Some k ->
-
Astring.String.cuts ~empty:false ~sep:", " (J.get_string k) |>
-
List.map (fun x -> `String x)
+
Astring.String.cuts ~empty:false ~sep:", " (get_string k) |>
+
List.map (fun x -> Jsont.String (x, Jsont.Meta.none))
in
-
J.update (`O keys) ["author"] (Some (`A authors)) |> fun j ->
-
J.update j ["keywords"] (match keywords with [] -> None | _ -> Some (`A keywords))
+
let json' = update_field "author" (Some (Jsont.Array (authors, Jsont.Meta.none))) json in
+
let json'' = update_field "keywords"
+
(match keywords with [] -> None | _ -> Some (Jsont.Array (keywords, Jsont.Meta.none))) json' in
+
match json'' with
+
| Jsont.Object (mems, _) -> mems
+
| _ -> failwith "Expected object"
let add_bibtex ~slug y =
-
let (.%{}) = fun y k -> J.find y [k] in
+
let json = Jsont.Object (y, Jsont.Meta.none) in
+
let find_opt k = find_field k json in
let add_if_present k f m =
-
match J.find y [k] with
-
| v -> Bibtex.SM.add k (f v) m
-
| exception Not_found -> m in
-
let string k m = add_if_present k J.get_string m in
-
let authors m = add_if_present "author" (fun j -> J.get_list J.get_string j |> String.concat " and ") m in
+
match find_opt k with
+
| Some v -> Bibtex.SM.add k (f v) m
+
| None -> m
+
in
+
let string k m = add_if_present k get_string m in
+
let authors m = add_if_present "author" (fun j -> get_list get_string j |> String.concat " and ") m in
let cite_key = Astring.String.map (function '-' -> '_' |x -> x) slug in
let fields = Bibtex.SM.empty in
-
let type' = y.%{"bibtype"} |> J.get_string |> String.lowercase_ascii in
+
let type' = get_field "bibtype" json |> String.lowercase_ascii in
let fields = authors fields |> string "title" |> string "doi" |> string "month" |> string "year" |> string "url" in
let fields = match type' with
| "article" -> string "journal" fields |> string "volume" |> string "number" |> string "pages"
···
| "misc" -> string "howpublished" fields
| "techreport" -> string "institution" fields |> string "number" |> string "address"
| b -> prerr_endline ("unknown bibtype " ^ b); fields in
-
Bibtex.v ~type' ~cite_key ~fields () |> Fmt.str "%a" Bibtex.pp |>
-
fun bib -> J.update y ["bib"] (Some (`String bib))
+
let bib = Bibtex.v ~type' ~cite_key ~fields () |> Fmt.str "%a" Bibtex.pp in
+
match update_field "bib" (Some (Jsont.String (bib, Jsont.Meta.none))) json with
+
| Jsont.Object (mems, _) -> mems
+
| _ -> failwith "Expected object"
let json_of_doi zt ~slug doi =
let x = bib_of_doi zt doi in
+15 -23
stack/zotero-translation/zotero_translation.mli
···
(** {1 Interface to the Zotero Translation Server} *)
-
type ('clock, 'net) t
+
type t
type format =
| Bibtex
···
val format_of_string: string -> format option
(** Create a Zotero Translation client.
-
@param requests_session Optional Requests session for connection pooling.
-
If not provided, a new session is created. *)
+
@param requests_session Shared Requests session for connection pooling.
+
@param base_uri Base URI of the Zotero translation server (e.g., "http://localhost:1969"). *)
val create :
-
sw:Eio.Switch.t ->
-
env:< clock: ([> float Eio.Time.clock_ty ] as 'clock) Eio.Resource.t;
-
net: ([> [> `Generic ] Eio.Net.ty ] as 'net) Eio.Resource.t;
-
fs: Eio.Fs.dir_ty Eio.Path.t; .. > ->
-
?requests_session:('clock Eio.Resource.t, 'net Eio.Resource.t) Requests.t ->
-
string -> ('clock Eio.Resource.t, 'net Eio.Resource.t) t
+
requests_session:Requests.t ->
+
string -> t
-
(** Deprecated: use [create] instead *)
-
val v : string -> (_, _) t
-
[@@deprecated "Use create ~sw ~env base_uri instead"]
-
-
val resolve_doi: ([> float Eio.Time.clock_ty ] Eio.Resource.t, [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t) t ->
-
string -> (Ezjsonm.t, [>`Msg of string]) result
+
val resolve_doi: t ->
+
string -> (Jsont.json, [>`Msg of string]) result
-
val resolve_url: ([> float Eio.Time.clock_ty ] Eio.Resource.t, [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t) t ->
-
string -> (Ezjsonm.t, [>`Msg of string]) result
+
val resolve_url: t ->
+
string -> (Jsont.json, [>`Msg of string]) result
-
val search_id: ([> float Eio.Time.clock_ty ] Eio.Resource.t, [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t) t ->
-
string -> (Ezjsonm.t, [>`Msg of string]) result
+
val search_id: t ->
+
string -> (Jsont.json, [>`Msg of string]) result
-
val export: ([> float Eio.Time.clock_ty ] Eio.Resource.t, [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t) t ->
-
format -> Ezjsonm.t -> (string, [>`Msg of string]) result
+
val export: t ->
+
format -> Jsont.json -> (string, [>`Msg of string]) result
-
val json_of_doi : ([> float Eio.Time.clock_ty ] Eio.Resource.t, [> [> `Generic ] Eio.Net.ty ] Eio.Resource.t) t ->
-
slug:string -> string -> Ezjsonm.value
+
val json_of_doi : t ->
+
slug:string -> string -> Jsont.object'
+217
stack/zulip/ARCHITECTURE.md
···
+
# Zulip Library Architecture
+
+
## Overview
+
+
The Zulip OCaml library follows a clean, layered architecture that separates protocol types, encoding concerns, and HTTP communication.
+
+
## Architecture Layers
+
+
```
+
โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
+
โ”‚ API Modules (Messages, Channels) โ”‚ โ† High-level operations
+
โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค
+
โ”‚ Protocol Types (Message, Channel) โ”‚ โ† Business logic types with Jsont codecs
+
โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค
+
โ”‚ Encode Module โ”‚ โ† JSON/Form encoding utilities
+
โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค
+
โ”‚ Client Module โ”‚ โ† HTTP request/response handling
+
โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค
+
โ”‚ Requests Library (EIO-based) โ”‚ โ† Low-level HTTP
+
โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
+
```
+
+
## Key Design Principles
+
+
### 1. **Protocol Types with Jsont Codecs**
+
+
Each Zulip API type (Message, Channel, User, etc.) has:
+
- A clean OCaml record type
+
- A `jsont` codec that defines bidirectional JSON conversion
+
- Accessor functions
+
- Pretty printer
+
+
Example from `channel.ml`:
+
```ocaml
+
type t = {
+
name : string;
+
description : string;
+
invite_only : bool;
+
history_public_to_subscribers : bool;
+
}
+
+
let jsont =
+
Jsont.Object.map ~kind:"Channel" make
+
|> Jsont.Object.mem "name" Jsont.string ~enc:name
+
|> Jsont.Object.mem "description" Jsont.string ~enc:description
+
|> Jsont.Object.mem "invite_only" Jsont.bool ~enc:invite_only
+
|> Jsont.Object.mem "history_public_to_subscribers" Jsont.bool ~enc:history_public_to_subscribers
+
|> Jsont.Object.finish
+
```
+
+
### 2. **Encode Module: Separation of Encoding Concerns**
+
+
The `Encode` module provides clean utilities for converting between OCaml types and wire formats:
+
+
```ocaml
+
(** Convert using a jsont codec *)
+
val to_json_string : 'a Jsont.t -> 'a -> string
+
val to_form_urlencoded : 'a Jsont.t -> 'a -> string
+
val from_json : 'a Jsont.t -> Jsont.json -> ('a, string) result
+
```
+
+
This eliminates the need for:
+
- โŒ Manual JSON tree walking
+
- โŒ Round-trip encodeโ†’decode conversions
+
- โŒ Per-type encoding functions
+
+
### 3. **Request/Response Types with Codecs**
+
+
API operations define request/response types locally with their codecs:
+
+
```ocaml
+
(* In channels.ml *)
+
module Subscribe_request = struct
+
type t = { subscriptions : string list }
+
+
let codec =
+
Jsont.Object.(
+
map ~kind:"SubscribeRequest" (fun subscriptions -> { subscriptions })
+
|> mem "subscriptions" (Jsont.list Jsont.string) ~enc:(fun r -> r.subscriptions)
+
|> finish
+
)
+
end
+
+
let subscribe client ~channels =
+
let req = Subscribe_request.{ subscriptions = channels } in
+
let body = Encode.to_form_urlencoded Subscribe_request.codec req in
+
let content_type = "application/x-www-form-urlencoded" in
+
match Client.request client ~method_:`POST ~path:"/api/v1/users/me/subscriptions"
+
~body ~content_type () with
+
| Ok _json -> Ok ()
+
| Error err -> Error err
+
```
+
+
### 4. **Type-Safe Decoding**
+
+
Response parsing uses codecs directly instead of manual pattern matching:
+
+
```ocaml
+
(* OLD - manual JSON walking *)
+
match json with
+
| Jsont.Object (fields, _) ->
+
let assoc = List.map (fun ((k, _), v) -> (k, v)) fields in
+
(match List.assoc_opt "streams" assoc with
+
| Some (Jsont.Array (channel_list, _)) -> ...
+
+
(* NEW - type-safe codec *)
+
let response_codec =
+
Jsont.Object.(
+
map ~kind:"StreamsResponse" (fun streams -> streams)
+
|> mem "streams" (Jsont.list Channel.jsont) ~enc:(fun x -> x)
+
|> finish
+
)
+
in
+
match Encode.from_json response_codec json with
+
| Ok channels -> Ok channels
+
| Error msg -> Error (...)
+
```
+
+
## Benefits
+
+
### โœ… Type Safety
+
- Jsont codecs ensure correct JSON structure
+
- Compilation errors catch schema mismatches
+
- No runtime type confusion
+
+
### โœ… Maintainability
+
- Protocol changes only require updating codecs
+
- No manual JSON manipulation scattered through code
+
- Clear separation of concerns
+
+
### โœ… Reusability
+
- Codecs can be composed and reused
+
- Encode module works for any jsont-encoded type
+
- Request/response types are self-documenting
+
+
### โœ… Testability
+
- Easy to test encoding/decoding in isolation
+
- Mock responses can be type-checked
+
- Round-trip property testing possible
+
+
## Migration Pattern
+
+
When adding new API endpoints:
+
+
1. **Define the protocol type with codec**:
+
```ocaml
+
type my_request = { field1: string; field2: int }
+
+
let my_request_codec =
+
Jsont.Object.(
+
map ~kind:"MyRequest" (fun field1 field2 -> { field1; field2 })
+
|> mem "field1" Jsont.string ~enc:(fun r -> r.field1)
+
|> mem "field2" Jsont.int ~enc:(fun r -> r.field2)
+
|> finish
+
)
+
```
+
+
2. **Encode using Encode module**:
+
```ocaml
+
let body = Encode.to_form_urlencoded my_request_codec req in
+
(* or *)
+
let json = Encode.to_json_string my_request_codec req in
+
```
+
+
3. **Decode responses with codec**:
+
```ocaml
+
match Client.request client ~method_:`POST ~path:"/api/..." ~body () with
+
| Ok json ->
+
(match Encode.from_json response_codec json with
+
| Ok data -> Ok data
+
| Error msg -> Error ...)
+
```
+
+
## Comparison with Old Approach
+
+
### Old (Manual JSON Manipulation):
+
```ocaml
+
let send client message =
+
let json = Message.to_json message in (* Round-trip conversion *)
+
let params = match json with
+
| Jsont.Object (fields, _) -> (* Manual pattern matching *)
+
List.fold_left (fun acc ((key, _), value) ->
+
let str_value = match value with (* More pattern matching *)
+
| Jsont.String (s, _) -> s
+
| Jsont.Bool (true, _) -> "true"
+
| _ -> ""
+
in
+
(key, str_value) :: acc
+
) [] fields
+
| _ -> [] in
+
(* ... *)
+
```
+
+
### New (Codec-Based):
+
```ocaml
+
let send client message =
+
let body = Message.to_form_urlencoded message in (* Clean encoding *)
+
let content_type = "application/x-www-form-urlencoded" in
+
match Client.request client ~method_:`POST ~path:"/api/v1/messages"
+
~body ~content_type () with
+
| Ok response -> Message_response.of_json response
+
| Error err -> Error err
+
```
+
+
## Future Enhancements
+
+
- **Validation**: Add validation layers on top of codecs
+
- **Versioning**: Support multiple API versions with codec variants
+
- **Documentation**: Generate API docs from codec definitions
+
- **Testing**: Property-based testing with codec round-trips
+
- **Code Generation**: Consider generating codecs from OpenAPI specs
+
+
## References
+
+
- Jsont library: https://erratique.ch/software/jsont
+
- Zulip REST API: https://zulip.com/api/rest
+
- Original design doc: `CLAUDE.md`
-689
stack/zulip/CLAUDE.md
···
-
I would like to build high quality OCaml bindings to the Zulip REST API,
-
documented at https://zulip.com/api/rest. As another reference, the Python
-
`zulip` library from pip is well maintained.
-
-
My target is to use the OCaml EIO direct-style library, with an idiomatic as
-
possible API that implements it. For JSON parsing, using the jsonm library is
-
right. For HTTPS, use cohttp-eio with the tls-eio library. You have access to
-
an OCaml LSP via MCP which provides type hints and other language server
-
features after you complete a `dune build`.
-
-
# OCaml Zulip Library Design
-
-
Based on analysis of:
-
- Zulip REST API documentation: https://zulip.com/api/rest
-
- Python zulip library: https://github.com/zulip/python-zulip-api
-
- Zulip error handling: https://zulip.com/api/rest-error-handling
-
- Zulip send message API: https://zulip.com/api/send-message
-
-
## Overview
-
The library follows OCaml best practices with abstract types (`type t`) per module, comprehensive constructors/accessors, and proper pretty printers. Each core concept gets its own module with a clean interface.
-
-
## Module Structure
-
-
### Authentication (`Zulip.Auth`)
-
```ocaml
-
type t (* abstract *)
-
-
val create : server_url:string -> email:string -> api_key:string -> t
-
val from_zuliprc : ?path:string -> unit -> (t, Error.t) result
-
val server_url : t -> string
-
val email : t -> string
-
val to_basic_auth_header : t -> string
-
val pp : Format.formatter -> t -> unit
-
```
-
-
### Error Handling (`Zulip.Error`)
-
```ocaml
-
type code =
-
| Invalid_api_key
-
| Request_variable_missing
-
| Bad_request
-
| User_deactivated
-
| Realm_deactivated
-
| Rate_limit_hit
-
| Other of string
-
-
type t (* abstract *)
-
-
val create : code:code -> msg:string -> ?extra:(string * Jsonm.value) list -> unit -> t
-
val code : t -> code
-
val message : t -> string
-
val extra : t -> (string * Jsonm.value) list
-
val pp : Format.formatter -> t -> unit
-
val of_json : Jsonm.value -> t option
-
```
-
-
### Message Types (`Zulip.Message_type`)
-
```ocaml
-
type t = [ `Direct | `Channel ]
-
-
val to_string : t -> string
-
val of_string : string -> t option
-
val pp : Format.formatter -> t -> unit
-
```
-
-
### Message (`Zulip.Message`)
-
```ocaml
-
type t (* abstract *)
-
-
val create :
-
type_:Message_type.t ->
-
to_:string list ->
-
content:string ->
-
?topic:string ->
-
?queue_id:string ->
-
?local_id:string ->
-
?read_by_sender:bool ->
-
unit -> t
-
-
val type_ : t -> Message_type.t
-
val to_ : t -> string list
-
val content : t -> string
-
val topic : t -> string option
-
val queue_id : t -> string option
-
val local_id : t -> string option
-
val read_by_sender : t -> bool
-
val to_json : t -> Jsonm.value
-
val pp : Format.formatter -> t -> unit
-
```
-
-
### Message Response (`Zulip.Message_response`)
-
```ocaml
-
type t (* abstract *)
-
-
val id : t -> int
-
val automatic_new_visibility_policy : t -> string option
-
val of_json : Jsonm.value -> (t, Error.t) result
-
val pp : Format.formatter -> t -> unit
-
```
-
-
### Client (`Zulip.Client`)
-
```ocaml
-
type t (* abstract *)
-
-
val create : #Eio.Env.t -> Auth.t -> t
-
val with_client : #Eio.Env.t -> Auth.t -> (t -> 'a) -> 'a
-
-
val request :
-
t ->
-
method_:[`GET | `POST | `PUT | `DELETE | `PATCH] ->
-
path:string ->
-
?params:(string * string) list ->
-
?body:string ->
-
unit ->
-
(Jsonm.value, Error.t) result
-
```
-
-
### Messages (`Zulip.Messages`)
-
```ocaml
-
val send : Client.t -> Message.t -> (Message_response.t, Error.t) result
-
val edit : Client.t -> message_id:int -> ?content:string -> ?topic:string -> unit -> (unit, Error.t) result
-
val delete : Client.t -> message_id:int -> (unit, Error.t) result
-
val get : Client.t -> message_id:int -> (Jsonm.value, Error.t) result
-
val get_messages :
-
Client.t ->
-
?anchor:string ->
-
?num_before:int ->
-
?num_after:int ->
-
?narrow:string list ->
-
unit ->
-
(Jsonm.value, Error.t) result
-
```
-
-
### Channel (`Zulip.Channel`)
-
```ocaml
-
type t (* abstract *)
-
-
val create :
-
name:string ->
-
description:string ->
-
?invite_only:bool ->
-
?history_public_to_subscribers:bool ->
-
unit -> t
-
-
val name : t -> string
-
val description : t -> string
-
val invite_only : t -> bool
-
val history_public_to_subscribers : t -> bool
-
val to_json : t -> Jsonm.value
-
val of_json : Jsonm.value -> (t, Error.t) result
-
val pp : Format.formatter -> t -> unit
-
```
-
-
### Channels (`Zulip.Channels`)
-
```ocaml
-
val create_channel : Client.t -> Channel.t -> (unit, Error.t) result
-
val delete : Client.t -> name:string -> (unit, Error.t) result
-
val list : Client.t -> (Channel.t list, Error.t) result
-
val subscribe : Client.t -> channels:string list -> (unit, Error.t) result
-
val unsubscribe : Client.t -> channels:string list -> (unit, Error.t) result
-
```
-
-
### User (`Zulip.User`)
-
```ocaml
-
type t (* abstract *)
-
-
val create :
-
email:string ->
-
full_name:string ->
-
?is_active:bool ->
-
?is_admin:bool ->
-
?is_bot:bool ->
-
unit -> t
-
-
val email : t -> string
-
val full_name : t -> string
-
val is_active : t -> bool
-
val is_admin : t -> bool
-
val is_bot : t -> bool
-
val to_json : t -> Jsonm.value
-
val of_json : Jsonm.value -> (t, Error.t) result
-
val pp : Format.formatter -> t -> unit
-
```
-
-
### Users (`Zulip.Users`)
-
```ocaml
-
val list : Client.t -> (User.t list, Error.t) result
-
val get : Client.t -> email:string -> (User.t, Error.t) result
-
val create_user : Client.t -> email:string -> full_name:string -> (unit, Error.t) result
-
val deactivate : Client.t -> email:string -> (unit, Error.t) result
-
```
-
-
### Event Type (`Zulip.Event_type`)
-
```ocaml
-
type t =
-
| Message
-
| Subscription
-
| User_activity
-
| Other of string
-
-
val to_string : t -> string
-
val of_string : string -> t
-
val pp : Format.formatter -> t -> unit
-
```
-
-
### Event (`Zulip.Event`)
-
```ocaml
-
type t (* abstract *)
-
-
val id : t -> int
-
val type_ : t -> Event_type.t
-
val data : t -> Jsonm.value
-
val of_json : Jsonm.value -> (t, Error.t) result
-
val pp : Format.formatter -> t -> unit
-
```
-
-
### Event Queue (`Zulip.Event_queue`)
-
```ocaml
-
type t (* abstract *)
-
-
val register :
-
Client.t ->
-
?event_types:Event_type.t list ->
-
unit ->
-
(t, Error.t) result
-
-
val id : t -> string
-
val get_events : t -> Client.t -> ?last_event_id:int -> unit -> (Event.t list, Error.t) result
-
val delete : t -> Client.t -> (unit, Error.t) result
-
val pp : Format.formatter -> t -> unit
-
```
-
-
## EIO Bot Framework Extension
-
-
Based on analysis of the Python bot framework at:
-
- https://github.com/zulip/python-zulip-api/blob/main/zulip_bots/zulip_bots/lib.py
-
- https://github.com/zulip/python-zulip-api/blob/main/zulip_botserver/zulip_botserver/server.py
-
-
### Bot Handler (`Zulip.Bot`)
-
```ocaml
-
module Storage : sig
-
type t (* abstract *)
-
-
val create : Client.t -> t
-
val get : t -> key:string -> string option
-
val put : t -> key:string -> value:string -> unit
-
val contains : t -> key:string -> bool
-
end
-
-
module Identity : sig
-
type t (* abstract *)
-
-
val full_name : t -> string
-
val email : t -> string
-
val mention_name : t -> string
-
end
-
-
type handler = {
-
handle_message :
-
client:Client.t ->
-
message:Jsonm.value ->
-
response:(Message.t -> unit) ->
-
unit;
-
-
usage : unit -> string;
-
description : unit -> string;
-
}
-
-
type t (* abstract *)
-
-
val create :
-
Client.t ->
-
handler:handler ->
-
?storage:Storage.t ->
-
unit -> t
-
-
val identity : t -> Identity.t
-
val storage : t -> Storage.t
-
val handle_message : t -> Jsonm.value -> unit
-
val send_reply : t -> original_message:Jsonm.value -> content:string -> unit
-
val send_message : t -> Message.t -> unit
-
```
-
-
### Bot Server (`Zulip.Bot_server`)
-
```ocaml
-
module Config : sig
-
type bot_config = {
-
email : string;
-
api_key : string;
-
token : string; (* webhook token *)
-
server_url : string;
-
module_name : string;
-
}
-
-
type t (* abstract *)
-
-
val create : bot_configs:bot_config list -> ?host:string -> ?port:int -> unit -> t
-
val from_file : string -> (t, Error.t) result
-
val from_env : string -> (t, Error.t) result
-
val host : t -> string
-
val port : t -> int
-
val bot_configs : t -> bot_config list
-
end
-
-
type t (* abstract *)
-
-
val create : #Eio.Env.t -> Config.t -> (t, Error.t) result
-
-
val run : t -> unit
-
(* Starts the server using EIO structured concurrency *)
-
-
val with_server : #Eio.Env.t -> Config.t -> (t -> 'a) -> ('a, Error.t) result
-
(* Resource-safe server management *)
-
```
-
-
### Bot Registry (`Zulip.Bot_registry`)
-
```ocaml
-
type bot_module = {
-
name : string;
-
handler : Bot.handler;
-
create_instance : Client.t -> Bot.t;
-
}
-
-
type t (* abstract *)
-
-
val create : unit -> t
-
val register : t -> bot_module -> unit
-
val get_handler : t -> email:string -> Bot.t option
-
val list_bots : t -> string list
-
-
(* Dynamic module loading *)
-
val load_from_file : string -> (bot_module, Error.t) result
-
val load_from_directory : string -> (bot_module list, Error.t) result
-
```
-
-
### Webhook Handler (`Zulip.Webhook`)
-
```ocaml
-
type webhook_event = {
-
bot_email : string;
-
token : string;
-
message : Jsonm.value;
-
trigger : [`Direct_message | `Mention];
-
}
-
-
type response = {
-
content : string option;
-
message_type : Message_type.t option;
-
to_ : string list option;
-
topic : string option;
-
}
-
-
val parse_webhook : string -> (webhook_event, Error.t) result
-
val handle_webhook : Bot_registry.t -> webhook_event -> (response option, Error.t) result
-
```
-
-
### Structured Concurrency Design
-
-
The EIO-based server uses structured concurrency to manage multiple bots safely:
-
-
```ocaml
-
(* Example server implementation using EIO *)
-
let run_server env config =
-
let registry = Bot_registry.create () in
-
-
(* Load and register all configured bots concurrently *)
-
Eio.Switch.run @@ fun sw ->
-
-
(* Start each bot in its own fiber *)
-
List.iter (fun bot_config ->
-
Eio.Fiber.fork ~sw (fun () ->
-
let auth = Auth.create
-
~server_url:bot_config.server_url
-
~email:bot_config.email
-
~api_key:bot_config.api_key in
-
-
Client.with_client env auth @@ fun client ->
-
-
(* Load bot module *)
-
match Bot_registry.load_from_file bot_config.module_name with
-
| Ok bot_module ->
-
let bot = bot_module.create_instance client in
-
Bot_registry.register registry bot_module;
-
-
(* Keep bot alive and handle events *)
-
Event_loop.run client bot
-
| Error e ->
-
Printf.eprintf "Failed to load bot %s: %s\n"
-
bot_config.email (Error.message e)
-
)
-
) (Config.bot_configs config);
-
-
(* Start HTTP server for webhooks *)
-
let server_addr = `Tcp (Eio.Net.Ipaddr.V4.loopback, Config.port config) in
-
Eio.Net.run_server env#net server_addr ~on_error:raise @@ fun flow _addr ->
-
-
(* Handle each webhook request concurrently *)
-
Eio.Switch.run @@ fun req_sw ->
-
Eio.Fiber.fork ~sw:req_sw (fun () ->
-
handle_http_request registry flow
-
)
-
```
-
-
### Event Loop (`Zulip.Event_loop`)
-
```ocaml
-
type t (* abstract *)
-
-
val create : Client.t -> Bot.t -> t
-
-
val run : Client.t -> Bot.t -> unit
-
(* Runs the event loop using real-time events API *)
-
-
val run_webhook_mode : Client.t -> Bot.t -> unit
-
(* Runs in webhook mode, waiting for HTTP callbacks *)
-
-
(* For advanced use cases *)
-
val with_event_loop :
-
Client.t ->
-
Bot.t ->
-
(Event_queue.t -> unit) ->
-
unit
-
```
-
-
## Key EIO Advantages
-
-
1. **Structured Concurrency**: Each bot runs in its own fiber with proper cleanup
-
2. **Resource Safety**: Automatic cleanup of connections, event queues, and HTTP servers
-
3. **Backpressure**: Natural flow control through EIO's cooperative scheduling
-
4. **Error Isolation**: Bot failures don't crash the entire server
-
5. **Graceful Shutdown**: Structured teardown of all resources
-
-
## Design Principles
-
-
1. **Abstract Types**: Each major concept has its own module with abstract `type t`
-
2. **Constructors**: Clear `create` functions with optional parameters
-
3. **Accessors**: All fields accessible via dedicated functions
-
4. **Pretty Printing**: Every type has a `pp` function for debugging
-
5. **JSON Conversion**: Bidirectional JSON conversion where appropriate
-
6. **Error Handling**: Consistent `(_, Error.t) result` return types
-
-
## Authentication Strategy
-
-
- Support zuliprc files and direct credential passing
-
- Abstract `Auth.t` prevents credential leakage
-
- HTTP Basic Auth with proper encoding
-
-
## EIO Integration
-
-
- All operations use EIO's direct-style async
-
- Resource-safe client management with `with_client`
-
- Proper cleanup of connections and event queues
-
-
## Example Usage
-
-
### Simple Message Sending
-
```ocaml
-
let () =
-
Eio_main.run @@ fun env ->
-
let auth = Zulip.Auth.create
-
~server_url:"https://example.zulipchat.com"
-
~email:"bot@example.com"
-
~api_key:"your-api-key" in
-
-
Zulip.Client.with_client env auth @@ fun client ->
-
-
let message = Zulip.Message.create
-
~type_:`Channel
-
~to_:["general"]
-
~content:"Hello from OCaml!"
-
~topic:"Bots"
-
() in
-
-
match Zulip.Messages.send client message with
-
| Ok response ->
-
Printf.printf "Message sent with ID: %d\n"
-
(Zulip.Message_response.id response)
-
| Error error ->
-
Printf.printf "Error: %s\n"
-
(Zulip.Error.message error)
-
```
-
-
### Simple Bot
-
```ocaml
-
let echo_handler = Zulip.Bot.{
-
handle_message = (fun ~client ~message ~response ->
-
let content = extract_content message in
-
let echo_msg = Message.create
-
~type_:`Direct
-
~to_:[sender_email message]
-
~content:("Echo: " ^ content) () in
-
response echo_msg
-
);
-
usage = (fun () -> "Echo bot - repeats your message");
-
description = (fun () -> "A simple echo bot");
-
}
-
-
let () =
-
Eio_main.run @@ fun env ->
-
let auth = Auth.from_zuliprc () |> Result.get_ok in
-
-
Client.with_client env auth @@ fun client ->
-
let bot = Bot.create client ~handler:echo_handler () in
-
Event_loop.run client bot
-
```
-
-
### Multi-Bot Server
-
```ocaml
-
let () =
-
Eio_main.run @@ fun env ->
-
let config = Bot_server.Config.from_file "bots.conf" |> Result.get_ok in
-
-
Bot_server.with_server env config @@ fun server ->
-
Bot_server.run server
-
```
-
-
## Package Dependencies
-
-
- `eio` - Effects-based I/O
-
- `cohttp-eio` - HTTP client with EIO support
-
- `tls-eio` - TLS support for HTTPS
-
- `jsonm` - Streaming JSON codec
-
- `uri` - URI parsing and manipulation
-
- `base64` - Base64 encoding for authentication
-
-
# Architecture Analysis: zulip_bot vs zulip_botserver
-
-
## Library Separation
-
-
### `zulip_bot` - Individual Bot Framework
-
**Purpose**: Library for building and running a single bot instance
-
-
**Key Components**:
-
- `Bot_handler` - Interface for bot logic with EIO environment access
-
- `Bot_runner` - Manages lifecycle of one bot (real-time events or webhook mode)
-
- `Bot_config` - Configuration for a single bot
-
- `Bot_storage` - Simple in-memory storage for bot state
-
-
**Usage Pattern**:
-
```ocaml
-
(* Run a single bot directly *)
-
let my_bot = Bot_handler.create (module My_echo_bot) ~config ~storage ~identity in
-
let runner = Bot_runner.create ~client ~handler:my_bot in
-
Bot_runner.run_realtime runner (* Bot connects to Zulip events API directly *)
-
```
-
-
### `zulip_botserver` - Multi-Bot Server Infrastructure
-
**Purpose**: HTTP server that manages multiple bots via webhooks
-
-
**Key Components**:
-
- `Bot_server` - HTTP server receiving webhook events from Zulip
-
- `Bot_registry` - Manages multiple bot instances
-
- `Server_config` - Configuration for multiple bots + server settings
-
- `Webhook_handler` - Parses incoming webhook requests and routes to appropriate bots
-
-
**Usage Pattern**:
-
```ocaml
-
(* Run a server hosting multiple bots *)
-
let registry = Bot_registry.create () in
-
Bot_registry.register registry echo_bot_module;
-
Bot_registry.register registry weather_bot_module;
-
-
let server = Bot_server.create ~env ~config ~registry in
-
Bot_server.run server (* HTTP server waits for webhook calls *)
-
```
-
-
## EIO Environment Requirements
-
-
### Why Bot Handlers Need Direct EIO Access
-
-
Bot handlers require direct access to the EIO environment for legitimate I/O operations beyond HTTP requests to Zulip:
-
-
1. **Network Operations**: Custom HTTP requests, API calls to external services
-
2. **File System Operations**: Reading configuration files, CSV dictionaries, logs
-
3. **Resource Management**: Proper cleanup via structured concurrency
-
-
### Example: URL Checker Bot
-
```ocaml
-
module Url_checker_bot : Zulip_bot.Bot_handler.Bot_handler = struct
-
let handle_message ~config ~storage ~identity ~message ~env =
-
match parse_command message with
-
| "!check", url ->
-
(* Direct EIO network access needed *)
-
Eio.Switch.run @@ fun sw ->
-
let client = Cohttp_eio.Client.make ~sw env#net in
-
let response = Cohttp_eio.Client.head ~sw client (Uri.of_string url) in
-
let status = Cohttp.Code.code_of_status response.status in
-
Ok (Response.reply ~content:(format_status_message url status))
-
| _ -> Ok Response.none
-
end
-
```
-
-
### Example: CSV Dictionary Bot
-
```ocaml
-
module Csv_dict_bot : Zulip_bot.Bot_handler.Bot_handler = struct
-
let handle_message ~config ~storage ~identity ~message ~env =
-
match parse_command message with
-
| "!lookup", term ->
-
(* Direct EIO file system access needed *)
-
let csv_path = Bot_config.get_required config ~key:"csv_file" in
-
let content = Eio.Path.load env#fs (Eio.Path.parse csv_path) in
-
let matches = search_csv_content content term in
-
Ok (Response.reply ~content:(format_matches matches))
-
| _ -> Ok Response.none
-
end
-
```
-
-
## Refined Bot Handler Interface
-
-
Based on analysis, the current EIO environment plumbing is **essential** and should be cleaned up:
-
-
```ocaml
-
(** Clean bot handler interface with direct EIO access *)
-
module type Bot_handler = sig
-
val initialize : Bot_config.t -> (unit, Zulip.Error.t) result
-
val usage : unit -> string
-
val description : unit -> string
-
-
(** Handle message with full EIO environment access *)
-
val handle_message :
-
config:Bot_config.t ->
-
storage:Bot_storage.t ->
-
identity:Identity.t ->
-
message:Message_context.t ->
-
env:#Eio.Env.t -> (* Essential for custom I/O *)
-
(Response.t, Zulip.Error.t) result
-
end
-
-
type t
-
-
(** Single creation interface *)
-
val create :
-
(module Bot_handler) ->
-
config:Bot_config.t ->
-
storage:Bot_storage.t ->
-
identity:Identity.t ->
-
t
-
-
(** Single message handler requiring EIO environment *)
-
val handle_message : t -> #Eio.Env.t -> Message_context.t -> (Response.t, Zulip.Error.t) result
-
```
-
-
## Storage Strategy
-
-
Bot storage can be simplified to in-memory key-value storage since it's server-side:
-
-
```ocaml
-
(* In zulip_bot - storage per bot instance *)
-
module Bot_storage = struct
-
type t = (string, string) Hashtbl.t (* Simple in-memory key-value *)
-
-
let create () = Hashtbl.create 16
-
let get t ~key = Hashtbl.find_opt t key
-
let put t ~key ~value = Hashtbl.replace t key value
-
let contains t ~key = Hashtbl.mem t key
-
end
-
-
(* In zulip_botserver - storage shared across bots *)
-
module Server_storage = struct
-
type t = (string * string, string) Hashtbl.t (* (bot_email, key) -> value *)
-
-
let create () = Hashtbl.create 64
-
let get t ~bot_email ~key = Hashtbl.find_opt t (bot_email, key)
-
let put t ~bot_email ~key ~value = Hashtbl.replace t (bot_email, key) value
-
end
-
```
-
-
## Interface Cleanup Recommendations
-
-
1. **Remove** the problematic `handle_message` function with mock environment
-
2. **Keep** `handle_message_with_env` but rename to `handle_message`
-
3. **Use** `#Eio.Env.t` constraint for clean typing
-
4. **Document** that bot handlers have full EIO access for custom I/O operations
-
-
This design maintains flexibility for real-world bot functionality while providing clean, type-safe interfaces.
-
-
## Sources and References
-
-
This design is based on comprehensive analysis of:
-
-
1. **Zulip REST API Documentation**:
-
- Main API: https://zulip.com/api/rest
-
- Error Handling: https://zulip.com/api/rest-error-handling
-
- Send Message: https://zulip.com/api/send-message
-
-
2. **Python Zulip Library**:
-
- Main repository: https://github.com/zulip/python-zulip-api
-
- Bot framework: https://github.com/zulip/python-zulip-api/blob/main/zulip_bots/zulip_bots/lib.py
-
- Bot server: https://github.com/zulip/python-zulip-api/blob/main/zulip_botserver/zulip_botserver/server.py
-
-
The design adapts these Python patterns to idiomatic OCaml with abstract types, proper error handling, and EIO's structured concurrency for robust, type-safe Zulip integration.
+2 -2
stack/zulip/dune-project
···
(name ocaml-zulip)
+
(generate_opam_files true)
+
(package
(name zulip)
(synopsis "OCaml bindings for the Zulip REST API")
···
dune
eio
requests
-
ezjsonm
uri
base64
(alcotest :with-test)
···
dune
zulip
eio
-
ezjsonm
(alcotest :with-test)))
(package
+2 -5
stack/zulip/examples/example.ml
···
(match Message.topic message with Some t -> t | None -> "None");
(* Test JSON serialization *)
-
let json = Message.to_json message in
-
Printf.printf "\nMessage JSON: %s\n"
-
(match json with
-
| `O _ -> "JSON object (serialized correctly)"
-
| _ -> "Invalid JSON");
+
let json_str = Message.to_json_string message in
+
Printf.printf "\nMessage JSON: %s\n" json_str;
(* Create client *)
let client = Client.create ~sw env auth in
+4 -3
stack/zulip/examples/test_client.ml
···
| Ok json ->
Printf.printf "Fetched messages successfully!\n";
(match json with
-
| `O fields ->
-
(match List.assoc_opt "messages" fields with
-
| Some (`A messages) ->
+
| Jsont.Object (fields, _) ->
+
let assoc = List.map (fun ((k, _), v) -> (k, v)) fields in
+
(match List.assoc_opt "messages" assoc with
+
| Some (Jsont.Array (messages, _)) ->
Printf.printf "Got %d messages\n" (List.length messages)
| _ -> Printf.printf "No messages field found\n")
| _ -> Printf.printf "Unexpected JSON format\n")
+25 -33
stack/zulip/lib/zulip/lib/channel.ml
···
let invite_only t = t.invite_only
let history_public_to_subscribers t = t.history_public_to_subscribers
-
let to_json t =
-
`O [
-
("name", `String t.name);
-
("description", `String t.description);
-
("invite_only", `Bool t.invite_only);
-
("history_public_to_subscribers", `Bool t.history_public_to_subscribers);
-
]
+
let pp fmt t = Format.fprintf fmt "Channel{name=%s, description=%s}" t.name t.description
-
let of_json json =
-
try
-
match json with
-
| `O fields ->
-
let get_string key =
-
match List.assoc key fields with
-
| `String s -> s
-
| _ -> failwith ("Expected string for " ^ key) in
-
let get_bool key default =
-
match List.assoc_opt key fields with
-
| Some (`Bool b) -> b
-
| None -> default
-
| _ -> failwith ("Expected bool for " ^ key) in
-
-
let name = get_string "name" in
-
let description = get_string "description" in
-
let invite_only = get_bool "invite_only" false in
-
let history_public_to_subscribers = get_bool "history_public_to_subscribers" true in
-
-
Ok { name; description; invite_only; history_public_to_subscribers }
-
| _ ->
-
Error (Zulip_types.create_error ~code:(Other "json_parse_error") ~msg:"Channel JSON must be an object" ())
-
with
-
| exn ->
-
Error (Zulip_types.create_error ~code:(Other "json_parse_error") ~msg:("Channel JSON parsing failed: " ^ Printexc.to_string exn) ())
+
(* Jsont codec for channel *)
+
let jsont =
+
let kind = "Channel" in
+
let doc = "A Zulip channel (stream)" in
+
let make name description invite_only history_public_to_subscribers =
+
{ name; description; invite_only; history_public_to_subscribers }
+
in
+
Jsont.Object.map ~kind ~doc make
+
|> Jsont.Object.mem "name" Jsont.string ~enc:name
+
|> Jsont.Object.mem "description" Jsont.string ~enc:description
+
|> Jsont.Object.mem "invite_only" Jsont.bool ~enc:invite_only
+
|> Jsont.Object.mem "history_public_to_subscribers" Jsont.bool ~enc:history_public_to_subscribers
+
|> Jsont.Object.finish
+
+
(* Decode and encode functions using Encode module *)
+
let of_json json =
+
match Encode.from_json jsont json with
+
| Ok v -> Ok v
+
| Error msg -> Error (Zulip_types.create_error ~code:(Other "json_parse_error") ~msg ())
+
+
let to_json_string t =
+
Encode.to_json_string jsont t
-
let pp fmt t = Format.fprintf fmt "Channel{name=%s, description=%s}" t.name t.description
+
let to_form_urlencoded t =
+
Encode.to_form_urlencoded jsont t
+17 -6
stack/zulip/lib/zulip/lib/channel.mli
···
type t
-
val create :
-
name:string ->
-
description:string ->
-
?invite_only:bool ->
-
?history_public_to_subscribers:bool ->
+
val create :
+
name:string ->
+
description:string ->
+
?invite_only:bool ->
+
?history_public_to_subscribers:bool ->
unit -> t
val name : t -> string
val description : t -> string
val invite_only : t -> bool
val history_public_to_subscribers : t -> bool
-
val to_json : t -> Zulip_types.json
+
+
(** Jsont codec for the channel type *)
+
val jsont : t Jsont.t
+
+
(** Decode from Jsont.json *)
val of_json : Zulip_types.json -> (t, Zulip_types.zerror) result
+
+
(** Encode to JSON string *)
+
val to_json_string : t -> string
+
+
(** Encode to form-urlencoded string *)
+
val to_form_urlencoded : t -> string
+
val pp : Format.formatter -> t -> unit
+53 -43
stack/zulip/lib/zulip/lib/channels.ml
···
-
let create_channel client channel =
-
let body = match Channel.to_json channel with
-
| `O fields ->
-
String.concat "&" (List.map (fun (k, v) ->
-
match v with
-
| `String s -> k ^ "=" ^ Uri.pct_encode s
-
| `Bool b -> k ^ "=" ^ string_of_bool b
-
| _ -> ""
-
) fields)
-
| _ -> "" in
-
match Client.request client ~method_:`POST ~path:"/api/v1/streams" ~body () with
+
let create_channel client channel =
+
let body = Channel.to_form_urlencoded channel in
+
let content_type = "application/x-www-form-urlencoded" in
+
match Client.request client ~method_:`POST ~path:"/api/v1/streams" ~body ~content_type () with
| Ok _json -> Ok ()
| Error err -> Error err
-
let delete client ~name =
+
let delete client ~name =
let encoded_name = Uri.pct_encode name in
match Client.request client ~method_:`DELETE ~path:("/api/v1/streams/" ^ encoded_name) () with
| Ok _json -> Ok ()
| Error err -> Error err
-
let list client =
+
let list client =
+
(* Define response codec *)
+
let response_codec =
+
Jsont.Object.(
+
map ~kind:"StreamsResponse" (fun streams -> streams)
+
|> mem "streams" (Jsont.list Channel.jsont) ~enc:(fun x -> x)
+
|> finish
+
)
+
in
+
match Client.request client ~method_:`GET ~path:"/api/v1/streams" () with
-
| Ok json ->
-
(match json with
-
| `O fields ->
-
(match List.assoc_opt "streams" fields with
-
| Some (`A channel_list) ->
-
let channels = List.fold_left (fun acc channel_json ->
-
match Channel.of_json channel_json with
-
| Ok channel -> channel :: acc
-
| Error _ -> acc
-
) [] channel_list in
-
Ok (List.rev channels)
-
| _ -> Error (Zulip_types.create_error ~code:(Other "api_error") ~msg:"Invalid streams response format" ()))
-
| _ -> Error (Zulip_types.create_error ~code:(Other "api_error") ~msg:"Streams response must be an object" ()))
+
| Ok json ->
+
(match Encode.from_json response_codec json with
+
| Ok channels -> Ok channels
+
| Error msg -> Error (Zulip_types.create_error ~code:(Other "api_error") ~msg ()))
| Error err -> Error err
-
let subscribe client ~channels =
-
let channels_json = `A (List.map (fun name -> `String name) channels) in
-
let body = "subscriptions=" ^ (match channels_json with
-
| `A items -> "[" ^ String.concat "," (List.map (function
-
| `String s -> "\"" ^ s ^ "\""
-
| _ -> "") items) ^ "]"
-
| _ -> "[]") in
-
match Client.request client ~method_:`POST ~path:"/api/v1/users/me/subscriptions" ~body () with
+
(* Request types with jsont codecs *)
+
module Subscribe_request = struct
+
type t = { subscriptions : string list }
+
+
let codec =
+
Jsont.Object.(
+
map ~kind:"SubscribeRequest" (fun subscriptions -> { subscriptions })
+
|> mem "subscriptions" (Jsont.list Jsont.string) ~enc:(fun r -> r.subscriptions)
+
|> finish
+
)
+
end
+
+
module Unsubscribe_request = struct
+
type t = { delete : string list }
+
+
let codec =
+
Jsont.Object.(
+
map ~kind:"UnsubscribeRequest" (fun delete -> { delete })
+
|> mem "delete" (Jsont.list Jsont.string) ~enc:(fun r -> r.delete)
+
|> finish
+
)
+
end
+
+
let subscribe client ~channels =
+
let req = Subscribe_request.{ subscriptions = channels } in
+
let body = Encode.to_form_urlencoded Subscribe_request.codec req in
+
let content_type = "application/x-www-form-urlencoded" in
+
match Client.request client ~method_:`POST ~path:"/api/v1/users/me/subscriptions" ~body ~content_type () with
| Ok _json -> Ok ()
| Error err -> Error err
-
let unsubscribe client ~channels =
-
let channels_json = `A (List.map (fun name -> `String name) channels) in
-
let body = "delete=" ^ (match channels_json with
-
| `A items -> "[" ^ String.concat "," (List.map (function
-
| `String s -> "\"" ^ s ^ "\""
-
| _ -> "") items) ^ "]"
-
| _ -> "[]") in
-
match Client.request client ~method_:`DELETE ~path:"/api/v1/users/me/subscriptions" ~body () with
+
let unsubscribe client ~channels =
+
let req = Unsubscribe_request.{ delete = channels } in
+
let body = Encode.to_form_urlencoded Unsubscribe_request.codec req in
+
let content_type = "application/x-www-form-urlencoded" in
+
match Client.request client ~method_:`DELETE ~path:"/api/v1/users/me/subscriptions" ~body ~content_type () with
| Ok _json -> Ok ()
-
| Error err -> Error err
+
| Error err -> Error err
+17 -16
stack/zulip/lib/zulip/lib/client.ml
···
type t = {
auth : Auth.t;
-
session : (float Eio.Time.clock_ty Eio.Resource.t,
-
[`Generic | `Unix] Eio.Net.ty Eio.Resource.t) Requests.t;
+
session : Requests.t;
}
let create ~sw env auth =
···
Buffer.contents buf
in
-
(* Parse JSON response using Ezjsonm *)
+
(* Parse JSON response using Jsont_bytesrw *)
let json =
-
try
-
Ezjsonm.from_string body_str
-
with Ezjsonm.Parse_error (_, msg) ->
-
Log.err (fun m -> m "JSON parse error: %s" msg);
-
failwith ("JSON parse error: " ^ msg)
+
match Jsont_bytesrw.decode_string' Jsont.json body_str with
+
| Ok j -> j
+
| Error e ->
+
let msg = Jsont.Error.to_string e in
+
Log.err (fun m -> m "JSON parse error: %s" msg);
+
failwith ("JSON parse error: " ^ msg)
in
(* Check for Zulip error response *)
match json with
-
| `O fields ->
-
(match List.assoc_opt "result" fields with
-
| Some (`String "error") ->
-
let msg = match List.assoc_opt "msg" fields with
-
| Some (`String s) -> s
+
| Jsont.Object (fields, _) ->
+
let assoc = List.map (fun ((k, _), v) -> (k, v)) fields in
+
(match List.assoc_opt "result" assoc with
+
| Some (Jsont.String ("error", _)) ->
+
let msg = match List.assoc_opt "msg" assoc with
+
| Some (Jsont.String (s, _)) -> s
| _ -> "Unknown error"
in
-
let code = match List.assoc_opt "code" fields with
-
| Some (`String s) -> Zulip_types.error_code_of_string s
+
let code = match List.assoc_opt "code" assoc with
+
| Some (Jsont.String (s, _)) -> Zulip_types.error_code_of_string s
| _ -> Zulip_types.Other "unknown"
in
Log.warn (fun m -> m "API error: %s (code: %s)" msg
···
~msg:"Invalid JSON response" ()))
let pp fmt t =
-
Format.fprintf fmt "Client(server=%s)" (Auth.server_url t.auth)
+
Format.fprintf fmt "Client(server=%s)" (Auth.server_url t.auth)
+1 -1
stack/zulip/lib/zulip/lib/dune
···
(library
(public_name zulip)
(name zulip)
-
(libraries eio requests ezjsonm uri base64 logs))
+
(libraries eio requests jsont jsont.bytesrw uri base64 logs))
+56
stack/zulip/lib/zulip/lib/encode.ml
···
+
(** Encoding utilities for Zulip API requests *)
+
+
(** Convert a jsont-encoded value to JSON string *)
+
let to_json_string : 'a Jsont.t -> 'a -> string = fun codec value ->
+
match Jsont_bytesrw.encode_string' codec value with
+
| Ok s -> s
+
| Error e -> failwith ("JSON encoding error: " ^ Jsont.Error.to_string e)
+
+
(** Convert a jsont-encoded value to form-urlencoded string *)
+
let to_form_urlencoded : 'a Jsont.t -> 'a -> string = fun codec value ->
+
(* First encode to JSON, then extract fields *)
+
let json_str = to_json_string codec value in
+
match Jsont_bytesrw.decode_string' Jsont.json json_str with
+
| Error e -> failwith ("JSON decode error: " ^ Jsont.Error.to_string e)
+
| Ok (Jsont.Object (fields, _)) ->
+
(* Convert object fields to form-urlencoded *)
+
let encode_value = function
+
| Jsont.String (s, _) -> Some (Uri.pct_encode ~component:`Query_value s)
+
| Jsont.Bool (b, _) -> Some (string_of_bool b)
+
| Jsont.Number (n, _) -> Some (string_of_float n)
+
| Jsont.Null _ -> None
+
| Jsont.Array (items, _) ->
+
(* For arrays, encode as JSON array string *)
+
let array_str = "[" ^ String.concat "," (List.filter_map (function
+
| Jsont.String (s, _) -> Some ("\"" ^ String.escaped s ^ "\"")
+
| Jsont.Number (n, _) -> Some (string_of_float n)
+
| Jsont.Bool (b, _) -> Some (string_of_bool b)
+
| _ -> None
+
) items) ^ "]" in
+
Some array_str
+
| Jsont.Object _ -> None (* Skip nested objects *)
+
in
+
+
let params = List.filter_map (fun ((key, _), value) ->
+
match encode_value value with
+
| Some encoded -> Some (key ^ "=" ^ encoded)
+
| None -> None
+
) fields in
+
+
String.concat "&" params
+
| Ok _ ->
+
failwith "Expected JSON object for form encoding"
+
+
(** Parse JSON string using a jsont codec *)
+
let from_json_string : 'a Jsont.t -> string -> ('a, string) result = fun codec json_str ->
+
match Jsont_bytesrw.decode_string' codec json_str with
+
| Ok v -> Ok v
+
| Error e -> Error (Jsont.Error.to_string e)
+
+
(** Parse a Jsont.json value using a codec *)
+
let from_json : 'a Jsont.t -> Jsont.json -> ('a, string) result = fun codec json ->
+
let json_str = match Jsont_bytesrw.encode_string' Jsont.json json with
+
| Ok s -> s
+
| Error e -> failwith ("Failed to re-encode json: " ^ Jsont.Error.to_string e)
+
in
+
from_json_string codec json_str
+21
stack/zulip/lib/zulip/lib/encode.mli
···
+
(** Encoding utilities for Zulip API requests *)
+
+
(** Convert a value to JSON string using its jsont codec *)
+
val to_json_string : 'a Jsont.t -> 'a -> string
+
+
(** Convert a value to application/x-www-form-urlencoded string using its jsont codec
+
+
The codec should represent a JSON object. Fields will be converted to key=value pairs:
+
- Strings: URL-encoded
+
- Booleans: "true"/"false"
+
- Numbers: string representation
+
- Arrays: JSON array string "[...]"
+
- Null: omitted
+
- Nested objects: omitted *)
+
val to_form_urlencoded : 'a Jsont.t -> 'a -> string
+
+
(** Parse JSON string using a jsont codec *)
+
val from_json_string : 'a Jsont.t -> string -> ('a, string) result
+
+
(** Parse a Jsont.json value using a codec *)
+
val from_json : 'a Jsont.t -> Jsont.json -> ('a, string) result
+35 -24
stack/zulip/lib/zulip/lib/event.ml
···
let type_ t = t.type_
let data t = t.data
-
let of_json json =
+
let pp fmt t = Format.fprintf fmt "Event{id=%d, type=%a}" t.id Event_type.pp t.type_
+
+
(* Helper to extract fields from Jsont.json *)
+
let get_int_field json name =
+
match json with
+
| Jsont.Object (fields, _) ->
+
let assoc = List.map (fun ((k, _), v) -> (k, v)) fields in
+
(match List.assoc_opt name assoc with
+
| Some (Jsont.Number (n, _)) -> int_of_float n
+
| _ -> Jsont.Error.msg Jsont.Meta.none
+
(Format.sprintf "Field '%s' not found or not an int" name))
+
| _ -> Jsont.Error.msg Jsont.Meta.none "Expected JSON object"
+
+
let get_string_field json name =
+
match json with
+
| Jsont.Object (fields, _) ->
+
let assoc = List.map (fun ((k, _), v) -> (k, v)) fields in
+
(match List.assoc_opt name assoc with
+
| Some (Jsont.String (s, _)) -> s
+
| _ -> Jsont.Error.msg Jsont.Meta.none
+
(Format.sprintf "Field '%s' not found or not a string" name))
+
| _ -> Jsont.Error.msg Jsont.Meta.none "Expected JSON object"
+
+
(* Simple decoder that extracts id and type, keeping full JSON as data *)
+
let of_json_direct json =
try
-
match json with
-
| `O fields ->
-
let get_int key =
-
match List.assoc key fields with
-
| `Float f -> int_of_float f
-
| _ -> failwith ("Expected int for " ^ key) in
-
let get_string key =
-
match List.assoc key fields with
-
| `String s -> s
-
| _ -> failwith ("Expected string for " ^ key) in
-
let id = get_int "id" in
-
let type_str = get_string "type" in
-
let type_ = Event_type.of_string type_str in
-
(* The whole event is the data - store it all *)
-
let data = json in
+
let id = get_int_field json "id" in
+
let type_str = get_string_field json "type" in
+
let type_ = Event_type.of_string type_str in
+
Ok { id; type_; data = json }
+
with e ->
+
Error (Zulip_types.create_error ~code:(Other "json_parse_error")
+
~msg:("Event JSON parsing failed: " ^ Printexc.to_string e) ())
-
Ok { id; type_; data }
-
| _ ->
-
Error (Zulip_types.create_error ~code:(Other "json_parse_error") ~msg:"Event JSON must be an object" ())
-
with
-
| exn ->
-
Error (Zulip_types.create_error ~code:(Other "json_parse_error") ~msg:("Event JSON parsing failed: " ^ Printexc.to_string exn) ())
-
-
let pp fmt t = Format.fprintf fmt "Event{id=%d, type=%a}" t.id Event_type.pp t.type_
+
(* Decode function *)
+
let of_json json =
+
of_json_direct json
+76 -42
stack/zulip/lib/zulip/lib/event_queue.ml
···
id : string;
}
+
(* Request/response codecs *)
+
module Register_request = struct
+
type t = { event_types : string list option }
+
+
let codec =
+
Jsont.Object.(
+
map ~kind:"RegisterRequest" (fun event_types -> { event_types })
+
|> opt_mem "event_types" (Jsont.list Jsont.string) ~enc:(fun r -> r.event_types)
+
|> finish
+
)
+
end
+
+
module Register_response = struct
+
type t = { queue_id : string }
+
+
let codec =
+
Jsont.Object.(
+
map ~kind:"RegisterResponse" (fun queue_id -> { queue_id })
+
|> mem "queue_id" Jsont.string ~enc:(fun r -> r.queue_id)
+
|> finish
+
)
+
end
+
let register client ?event_types () =
-
let params = match event_types with
-
| None -> []
-
| Some types ->
-
let types_json = "[" ^
-
String.concat "," (List.map (fun t -> "\"" ^ Event_type.to_string t ^ "\"") types) ^
-
"]"
-
in
-
Log.debug (fun m -> m "Registering with event_types: %s" types_json);
-
[("event_types", types_json)]
-
in
-
match Client.request client ~method_:`POST ~path:"/api/v1/register" ~params () with
-
| Ok json ->
-
(match json with
-
| `O fields ->
-
(match List.assoc_opt "queue_id" fields with
-
| Some (`String queue_id) -> Ok { id = queue_id }
-
| _ -> Error (Zulip_types.create_error ~code:(Other "api_error") ~msg:"Invalid register response: missing queue_id" ()))
-
| _ -> Error (Zulip_types.create_error ~code:(Other "api_error") ~msg:"Register response must be an object" ()))
+
let event_types_str = Option.map (List.map Event_type.to_string) event_types in
+
let req = Register_request.{ event_types = event_types_str } in
+
let body = Encode.to_form_urlencoded Register_request.codec req in
+
let content_type = "application/x-www-form-urlencoded" in
+
+
(match event_types_str with
+
| Some types -> Log.debug (fun m -> m "Registering with event_types: %s" (String.concat "," types))
+
| None -> ());
+
+
match Client.request client ~method_:`POST ~path:"/api/v1/register" ~body ~content_type () with
+
| Ok json ->
+
(match Encode.from_json Register_response.codec json with
+
| Ok response -> Ok { id = response.queue_id }
+
| Error msg -> Error (Zulip_types.create_error ~code:(Other "api_error") ~msg ()))
| Error err -> Error err
let id t = t.id
-
let get_events t client ?last_event_id () =
-
let params = [("queue_id", t.id)] @
+
(* Events response codec - events field is optional (may not be present) *)
+
module Events_response = struct
+
type t = { events : Event.t list }
+
+
let codec =
+
(* Use keep_unknown pattern to handle the whole object and extract events manually *)
+
let make raw_json =
+
match raw_json with
+
| Jsont.Object (fields, _) ->
+
let assoc = List.map (fun ((k, _), v) -> (k, v)) fields in
+
(match List.assoc_opt "events" assoc with
+
| Some (Jsont.Array (items, _)) ->
+
(* Parse each event, skipping failures *)
+
let events = List.fold_left (fun acc item ->
+
match Event.of_json item with
+
| Ok event -> event :: acc
+
| Error _ -> acc
+
) [] items |> List.rev in
+
{ events }
+
| Some _ -> { events = [] }
+
| None -> { events = [] })
+
| _ -> { events = [] }
+
in
+
Jsont.Object.map ~kind:"EventsResponse" make
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:(fun _ -> Jsont.Object ([], Jsont.Meta.none))
+
|> Jsont.Object.finish
+
end
+
+
let get_events t client ?last_event_id () =
+
let params = [("queue_id", t.id)] @
(match last_event_id with
| None -> []
| Some event_id -> [("last_event_id", string_of_int event_id)]) in
match Client.request client ~method_:`GET ~path:"/api/v1/events" ~params () with
-
| Ok json ->
-
(match json with
-
| `O fields ->
-
(match List.assoc_opt "events" fields with
-
| Some (`A event_list) ->
-
Log.debug (fun m -> m "Got %d raw events from API" (List.length event_list));
-
let events = List.fold_left (fun acc event_json ->
-
match Event.of_json event_json with
-
| Ok event -> event :: acc
-
| Error e ->
-
Log.warn (fun m -> m "Failed to parse event: %s" (Zulip_types.error_message e));
-
acc
-
) [] event_list in
-
Ok (List.rev events)
-
| Some _other ->
-
Log.warn (fun m -> m "Events field is not an array");
-
Error (Zulip_types.create_error ~code:(Other "api_error") ~msg:"Invalid events response format" ())
-
| None ->
-
Log.debug (fun m -> m "No events field in response");
-
Ok [])
-
| _ -> Error (Zulip_types.create_error ~code:(Other "api_error") ~msg:"Events response must be an object" ()))
+
| Ok json ->
+
(match Encode.from_json Events_response.codec json with
+
| Ok response ->
+
Log.debug (fun m -> m "Got %d events from API" (List.length response.events));
+
Ok response.events
+
| Error msg ->
+
Log.warn (fun m -> m "Failed to parse events response: %s" msg);
+
Error (Zulip_types.create_error ~code:(Other "api_error") ~msg ()))
| Error err -> Error err
-
let delete t client =
+
let delete t client =
let params = [("queue_id", t.id)] in
match Client.request client ~method_:`DELETE ~path:"/api/v1/events" ~params () with
| Ok _json -> Ok ()
-206
stack/zulip/lib/zulip/lib/jsonu.ml
···
-
(** JSON utility functions for Zulip API *)
-
-
type json = Zulip_types.json
-
-
(** {1 Field extraction utilities} *)
-
-
let get_string fields key =
-
match List.assoc_opt key fields with
-
| Some (`String s) -> Ok s
-
| Some _ -> Error (Zulip_types.create_error ~code:(Other "json_type_error") ~msg:(Printf.sprintf "Field '%s' is not a string" key) ())
-
| None -> Error (Zulip_types.create_error ~code:(Other "json_missing_field") ~msg:(Printf.sprintf "Field '%s' not found" key) ())
-
-
let get_string_default fields key default =
-
match get_string fields key with
-
| Ok s -> s
-
| Error _ -> default
-
-
let get_string_opt fields key =
-
match List.assoc_opt key fields with
-
| Some (`String s) -> Some s
-
| _ -> None
-
-
let to_int_flex = function
-
| `Float f -> int_of_float f
-
| `String s -> (try int_of_string s with _ -> failwith "Invalid integer string")
-
| json -> failwith (Printf.sprintf "Expected int or float, got %s" (match json with
-
| `Null -> "null"
-
| `Bool _ -> "bool"
-
| `O _ -> "object"
-
| `A _ -> "array"
-
| _ -> "unknown"))
-
-
let get_int fields key =
-
match List.assoc_opt key fields with
-
| Some json ->
-
(try Ok (to_int_flex json) with
-
| Failure msg -> Error (Zulip_types.create_error ~code:(Other "json_type_error") ~msg ()))
-
| None -> Error (Zulip_types.create_error ~code:(Other "json_missing_field") ~msg:(Printf.sprintf "Field '%s' not found" key) ())
-
-
let get_int_default fields key default =
-
match get_int fields key with
-
| Ok i -> i
-
| Error _ -> default
-
-
let get_int_opt fields key =
-
match List.assoc_opt key fields with
-
| Some json -> (try Some (to_int_flex json) with _ -> None)
-
| None -> None
-
-
let get_float fields key =
-
match List.assoc_opt key fields with
-
| Some (`Float f) -> Ok f
-
| Some (`String s) ->
-
(try Ok (float_of_string s) with
-
| _ -> Error (Zulip_types.create_error ~code:(Other "json_type_error") ~msg:(Printf.sprintf "Field '%s' is not a valid float" key) ()))
-
| Some _ -> Error (Zulip_types.create_error ~code:(Other "json_type_error") ~msg:(Printf.sprintf "Field '%s' is not a float" key) ())
-
| None -> Error (Zulip_types.create_error ~code:(Other "json_missing_field") ~msg:(Printf.sprintf "Field '%s' not found" key) ())
-
-
let get_float_default fields key default =
-
match get_float fields key with
-
| Ok f -> f
-
| Error _ -> default
-
-
let get_bool fields key =
-
match List.assoc_opt key fields with
-
| Some (`Bool b) -> Ok b
-
| Some _ -> Error (Zulip_types.create_error ~code:(Other "json_type_error") ~msg:(Printf.sprintf "Field '%s' is not a boolean" key) ())
-
| None -> Error (Zulip_types.create_error ~code:(Other "json_missing_field") ~msg:(Printf.sprintf "Field '%s' not found" key) ())
-
-
let get_bool_default fields key default =
-
match get_bool fields key with
-
| Ok b -> b
-
| Error _ -> default
-
-
let get_bool_opt fields key =
-
match List.assoc_opt key fields with
-
| Some (`Bool b) -> Some b
-
| _ -> None
-
-
let get_object fields key =
-
match List.assoc_opt key fields with
-
| Some (`O obj) -> Ok obj
-
| Some _ -> Error (Zulip_types.create_error ~code:(Other "json_type_error") ~msg:(Printf.sprintf "Field '%s' is not an object" key) ())
-
| None -> Error (Zulip_types.create_error ~code:(Other "json_missing_field") ~msg:(Printf.sprintf "Field '%s' not found" key) ())
-
-
let get_object_opt fields key =
-
match List.assoc_opt key fields with
-
| Some (`O obj) -> Some obj
-
| _ -> None
-
-
let get_array fields key =
-
match List.assoc_opt key fields with
-
| Some (`A arr) -> Ok arr
-
| Some _ -> Error (Zulip_types.create_error ~code:(Other "json_type_error") ~msg:(Printf.sprintf "Field '%s' is not an array" key) ())
-
| None -> Error (Zulip_types.create_error ~code:(Other "json_missing_field") ~msg:(Printf.sprintf "Field '%s' not found" key) ())
-
-
let get_array_opt fields key =
-
match List.assoc_opt key fields with
-
| Some (`A arr) -> Some arr
-
| _ -> None
-
-
(** {1 Type conversion utilities} *)
-
-
let to_int_safe = function
-
| `Float f -> Some (int_of_float f)
-
| `String s -> (try Some (int_of_string s) with _ -> None)
-
| _ -> None
-
-
let to_string_safe = function
-
| `String s -> Some s
-
| _ -> None
-
-
let to_bool_safe = function
-
| `Bool b -> Some b
-
| _ -> None
-
-
let to_float_safe = function
-
| `Float f -> Some f
-
| `String s -> (try Some (float_of_string s) with _ -> None)
-
| _ -> None
-
-
(** {1 Object parsing utilities} *)
-
-
let with_object context f = function
-
| `O fields -> f fields
-
| _ -> Error (Zulip_types.create_error ~code:(Other "json_type_error") ~msg:(Printf.sprintf "%s: expected JSON object" context) ())
-
-
let with_array context f json =
-
match json with
-
| `A items ->
-
let rec process acc = function
-
| [] -> Ok (List.rev acc)
-
| item :: rest ->
-
match f item with
-
| Ok v -> process (v :: acc) rest
-
| Error e -> Error e
-
in
-
process [] items
-
| _ -> Error (Zulip_types.create_error ~code:(Other "json_type_error") ~msg:(Printf.sprintf "%s: expected JSON array" context) ())
-
-
(** {1 Construction utilities} *)
-
-
let optional_field key encoder = function
-
| Some value -> Some (key, encoder value)
-
| None -> None
-
-
let optional_fields fields =
-
List.filter_map (fun x -> x) fields
-
-
let string_array strings =
-
`A (List.map (fun s -> `String s) strings)
-
-
let int_array ints =
-
`A (List.map (fun i -> `Float (float_of_int i)) ints)
-
-
(** {1 Error handling} *)
-
-
let json_error msg =
-
Zulip_types.create_error ~code:(Other "json_error") ~msg ()
-
-
let field_missing_error field =
-
Zulip_types.create_error ~code:(Other "json_missing_field") ~msg:(Printf.sprintf "Required field '%s' not found" field) ()
-
-
let type_mismatch_error field expected =
-
Zulip_types.create_error ~code:(Other "json_type_error") ~msg:(Printf.sprintf "Field '%s' type mismatch: expected %s" field expected) ()
-
-
let parse_with_error context f =
-
try f ()
-
with
-
| Failure msg -> Error (Zulip_types.create_error ~code:(Other "json_parse_error") ~msg:(Printf.sprintf "%s: %s" context msg) ())
-
| exn -> Error (Zulip_types.create_error ~code:(Other "json_parse_error") ~msg:(Printf.sprintf "%s: %s" context (Printexc.to_string exn)) ())
-
-
-
(** {1 Debugging utilities} *)
-
-
let to_string_pretty json =
-
let rec aux indent = function
-
| `Null -> "null"
-
| `Bool b -> string_of_bool b
-
| `Float f ->
-
if float_of_int (int_of_float f) = f then
-
string_of_int (int_of_float f)
-
else
-
string_of_float f
-
| `String s -> Printf.sprintf "%S" s
-
| `A [] -> "[]"
-
| `A lst ->
-
let items = List.map (aux (indent ^ " ")) lst in
-
Printf.sprintf "[\n%s%s\n%s]"
-
(indent ^ " ")
-
(String.concat (",\n" ^ indent ^ " ") items)
-
indent
-
| `O [] -> "{}"
-
| `O fields ->
-
let items = List.map (fun (k, v) ->
-
Printf.sprintf "%S: %s" k (aux (indent ^ " ") v)
-
) fields in
-
Printf.sprintf "{\n%s%s\n%s}"
-
(indent ^ " ")
-
(String.concat (",\n" ^ indent ^ " ") items)
-
indent
-
in
-
aux "" json
-
-
let pp fmt json =
-
Format.pp_print_string fmt (to_string_pretty json)
-117
stack/zulip/lib/zulip/lib/jsonu.mli
···
-
(** JSON utility functions for Zulip API
-
-
This module provides common utilities for working with JSON in the Zulip API,
-
reducing boilerplate and providing consistent error handling. *)
-
-
(** {1 Type definitions} *)
-
-
type json = Zulip_types.json
-
-
(** {1 Field extraction utilities} *)
-
-
(** Extract a string field from a JSON object *)
-
val get_string : (string * json) list -> string -> (string, Zulip_types.zerror) result
-
-
(** Extract a string field with a default value *)
-
val get_string_default : (string * json) list -> string -> string -> string
-
-
(** Extract an optional string field *)
-
val get_string_opt : (string * json) list -> string -> string option
-
-
(** Extract an integer field (handles both int and float representations) *)
-
val get_int : (string * json) list -> string -> (int, Zulip_types.zerror) result
-
-
(** Extract an integer field with a default value *)
-
val get_int_default : (string * json) list -> string -> int -> int
-
-
(** Extract an optional integer field *)
-
val get_int_opt : (string * json) list -> string -> int option
-
-
(** Extract a float field *)
-
val get_float : (string * json) list -> string -> (float, Zulip_types.zerror) result
-
-
(** Extract a float field with a default value *)
-
val get_float_default : (string * json) list -> string -> float -> float
-
-
(** Extract a boolean field *)
-
val get_bool : (string * json) list -> string -> (bool, Zulip_types.zerror) result
-
-
(** Extract a boolean field with a default value *)
-
val get_bool_default : (string * json) list -> string -> bool -> bool
-
-
(** Extract an optional boolean field *)
-
val get_bool_opt : (string * json) list -> string -> bool option
-
-
(** Extract a JSON object field *)
-
val get_object : (string * json) list -> string -> ((string * json) list, Zulip_types.zerror) result
-
-
(** Extract an optional JSON object field *)
-
val get_object_opt : (string * json) list -> string -> (string * json) list option
-
-
(** Extract a JSON array field *)
-
val get_array : (string * json) list -> string -> (json list, Zulip_types.zerror) result
-
-
(** Extract an optional JSON array field *)
-
val get_array_opt : (string * json) list -> string -> json list option
-
-
(** {1 Type conversion utilities} *)
-
-
(** Convert JSON to int, handling both int and float representations *)
-
val to_int_flex : json -> int
-
-
(** Safely convert JSON to int *)
-
val to_int_safe : json -> int option
-
-
(** Convert JSON to string *)
-
val to_string_safe : json -> string option
-
-
(** Convert JSON to bool *)
-
val to_bool_safe : json -> bool option
-
-
(** Convert JSON to float *)
-
val to_float_safe : json -> float option
-
-
(** {1 Object parsing utilities} *)
-
-
(** Parse a JSON value as an object, applying a function to its fields *)
-
val with_object : string -> ((string * json) list -> ('a, Zulip_types.zerror) result) -> json -> ('a, Zulip_types.zerror) result
-
-
(** Parse a JSON value as an array, applying a function to each element *)
-
val with_array : string -> (json -> ('a, Zulip_types.zerror) result) -> json -> ('a list, Zulip_types.zerror) result
-
-
(** {1 Construction utilities} *)
-
-
(** Create an optional field for JSON object construction *)
-
val optional_field : string -> ('a -> json) -> 'a option -> (string * json) option
-
-
(** Create a list of optional fields, filtering out None values *)
-
val optional_fields : (string * json) option list -> (string * json) list
-
-
(** Convert a string list to a JSON array *)
-
val string_array : string list -> json
-
-
(** Convert an int list to a JSON array *)
-
val int_array : int list -> json
-
-
(** {1 Error handling} *)
-
-
(** Create a JSON parsing error *)
-
val json_error : string -> Zulip_types.zerror
-
-
(** Create a field missing error *)
-
val field_missing_error : string -> Zulip_types.zerror
-
-
(** Create a type mismatch error *)
-
val type_mismatch_error : string -> string -> Zulip_types.zerror
-
-
(** Wrap a parsing function with exception handling *)
-
val parse_with_error : string -> (unit -> ('a, Zulip_types.zerror) result) -> ('a, Zulip_types.zerror) result
-
-
-
(** {1 Debugging utilities} *)
-
-
(** Convert JSON to a pretty-printed string *)
-
val to_string_pretty : json -> string
-
-
(** Print JSON value for debugging *)
-
val pp : Format.formatter -> json -> unit
-144
stack/zulip/lib/zulip/lib/jsonu_syntax.ml
···
-
(** Syntax module for monadic and applicative JSON parsing *)
-
-
type json = Zulip_types.json
-
type 'a parser = json -> ('a, Zulip_types.zerror) result
-
-
(** Monadic bind operator for sequential parsing with error handling *)
-
let ( let* ) = Result.bind
-
-
(** Map operator for transforming successful results *)
-
let ( let+ ) x f = Result.map f x
-
-
(** Applicative parallel composition *)
-
let ( and+ ) x y =
-
match x, y with
-
| Ok x, Ok y -> Ok (x, y)
-
| Error e, _ | _, Error e -> Error e
-
-
(** Applicative parallel composition for 3 values *)
-
let ( and++ ) xy z =
-
match xy, z with
-
| Ok (x, y), Ok z -> Ok (x, y, z)
-
| Error e, _ | _, Error e -> Error e
-
-
(** Applicative parallel composition for 4 values *)
-
let ( and+++ ) xyz w =
-
match xyz, w with
-
| Ok (x, y, z), Ok w -> Ok (x, y, z, w)
-
| Error e, _ | _, Error e -> Error e
-
-
(** Applicative parallel composition for 5 values *)
-
let ( and++++ ) xyzw v =
-
match xyzw, v with
-
| Ok (x, y, z, w), Ok v -> Ok (x, y, z, w, v)
-
| Error e, _ | _, Error e -> Error e
-
-
(** Alternative operator - try first, if fails try second *)
-
let ( <|> ) x y =
-
match x with
-
| Ok _ -> x
-
| Error _ -> y
-
-
(** Provide a default value if parsing fails *)
-
let ( |? ) x default =
-
match x with
-
| Ok v -> v
-
| Error _ -> default
-
-
(** Convert option to result with error message *)
-
let required name = function
-
| Some v -> Ok v
-
| None -> Error (Zulip_types.create_error ~code:(Other "missing_field") ~msg:(Printf.sprintf "Required field '%s' not found" name) ())
-
-
(** Convert option to result with default *)
-
let default v = function
-
| Some x -> x
-
| None -> v
-
-
(** Lift a pure value into parser context *)
-
let pure x = Ok x
-
-
(** Fail with an error message *)
-
let fail msg = Error (Zulip_types.create_error ~code:(Other "parse_error") ~msg ())
-
-
(** Map over a list with error handling *)
-
let traverse f lst =
-
let rec go acc = function
-
| [] -> Ok (List.rev acc)
-
| x :: xs ->
-
let* v = f x in
-
go (v :: acc) xs
-
in
-
go [] lst
-
-
(** Filter and map over a list, dropping errors *)
-
let filter_map f lst =
-
List.filter_map (fun x ->
-
match f x with
-
| Ok v -> Some v
-
| Error _ -> None
-
) lst
-
-
(** Parse a field with a custom parser *)
-
let field fields key parser =
-
match List.assoc_opt key fields with
-
| Some json -> parser json
-
| None -> Error (Jsonu.field_missing_error key)
-
-
(** Parse an optional field with a custom parser *)
-
let field_opt fields key parser =
-
match List.assoc_opt key fields with
-
| Some json ->
-
(match parser json with
-
| Ok v -> Ok (Some v)
-
| Error _ -> Ok None)
-
| None -> Ok None
-
-
(** Parse a field with a default value if missing or fails *)
-
let field_or fields key parser default =
-
match List.assoc_opt key fields with
-
| Some json ->
-
(match parser json with
-
| Ok v -> Ok v
-
| Error _ -> Ok default)
-
| None -> Ok default
-
-
(** Common parsers *)
-
let string = function
-
| `String s -> Ok s
-
| _ -> Error (Zulip_types.create_error ~code:(Other "type_error") ~msg:"Expected string" ())
-
-
let int = function
-
| `Float f -> Ok (int_of_float f)
-
| `String s ->
-
(try Ok (int_of_string s)
-
with _ -> Error (Zulip_types.create_error ~code:(Other "type_error") ~msg:"Expected integer" ()))
-
| _ -> Error (Zulip_types.create_error ~code:(Other "type_error") ~msg:"Expected integer" ())
-
-
let float = function
-
| `Float f -> Ok f
-
| `String s ->
-
(try Ok (float_of_string s)
-
with _ -> Error (Zulip_types.create_error ~code:(Other "type_error") ~msg:"Expected float" ()))
-
| _ -> Error (Zulip_types.create_error ~code:(Other "type_error") ~msg:"Expected float" ())
-
-
let bool = function
-
| `Bool b -> Ok b
-
| _ -> Error (Zulip_types.create_error ~code:(Other "type_error") ~msg:"Expected boolean" ())
-
-
let array parser = function
-
| `A items -> traverse parser items
-
| _ -> Error (Zulip_types.create_error ~code:(Other "type_error") ~msg:"Expected array" ())
-
-
let object_ = function
-
| `O fields -> Ok fields
-
| _ -> Error (Zulip_types.create_error ~code:(Other "type_error") ~msg:"Expected object" ())
-
-
(** Run a parser on JSON *)
-
let parse parser json = parser json
-
-
(** Run a parser with error context *)
-
let with_context ctx parser json =
-
match parser json with
-
| Ok v -> Ok v
-
| Error e -> Error (Zulip_types.create_error ~code:(Zulip_types.error_code e) ~msg:(Printf.sprintf "%s: %s" ctx (Zulip_types.error_message e)) ())
-96
stack/zulip/lib/zulip/lib/jsonu_syntax.mli
···
-
(** Syntax module for monadic and applicative JSON parsing
-
-
This module provides binding operators and combinators to make JSON parsing
-
more ergonomic and composable. It enables code like:
-
-
{[
-
let parse_user json =
-
with_object "user" @@ fun fields ->
-
let+ user_id = field fields "user_id" int
-
and+ email = field fields "email" string
-
and+ full_name = field fields "full_name" string in
-
{ user_id; email; full_name }
-
]}
-
*)
-
-
type json = Zulip_types.json
-
type 'a parser = json -> ('a, Zulip_types.zerror) result
-
-
(** {1 Binding Operators} *)
-
-
(** Monadic bind operator for sequential parsing with error handling *)
-
val ( let* ) : ('a, 'e) result -> ('a -> ('b, 'e) result) -> ('b, 'e) result
-
-
(** Map operator for transforming successful results *)
-
val ( let+ ) : ('a, 'e) result -> ('a -> 'b) -> ('b, 'e) result
-
-
(** Applicative parallel composition for independent field extraction *)
-
val ( and+ ) : ('a, 'e) result -> ('b, 'e) result -> ('a * 'b, 'e) result
-
val ( and++ ) : ('a * 'b, 'e) result -> ('c, 'e) result -> ('a * 'b * 'c, 'e) result
-
val ( and+++ ) : ('a * 'b * 'c, 'e) result -> ('d, 'e) result -> ('a * 'b * 'c * 'd, 'e) result
-
val ( and++++ ) : ('a * 'b * 'c * 'd, 'e) result -> ('f, 'e) result -> ('a * 'b * 'c * 'd * 'f, 'e) result
-
-
(** {1 Alternative and Default Operators} *)
-
-
(** Alternative operator - try first parser, if fails try second *)
-
val ( <|> ) : ('a, 'e) result -> ('a, 'e) result -> ('a, 'e) result
-
-
(** Provide a default value if parsing fails *)
-
val ( |? ) : ('a, 'e) result -> 'a -> 'a
-
-
(** {1 Field Extraction} *)
-
-
(** Parse a required field with a custom parser *)
-
val field : (string * json) list -> string -> 'a parser -> ('a, Zulip_types.zerror) result
-
-
(** Parse an optional field with a custom parser *)
-
val field_opt : (string * json) list -> string -> 'a parser -> ('a option, Zulip_types.zerror) result
-
-
(** Parse a field with a default value if missing or fails *)
-
val field_or : (string * json) list -> string -> 'a parser -> 'a -> ('a, Zulip_types.zerror) result
-
-
(** {1 Basic Parsers} *)
-
-
(** Parse a JSON string *)
-
val string : string parser
-
-
(** Parse a JSON number as integer (handles both int and float) *)
-
val int : int parser
-
-
(** Parse a JSON number as float *)
-
val float : float parser
-
-
(** Parse a JSON boolean *)
-
val bool : bool parser
-
-
(** Parse a JSON array with a parser for elements *)
-
val array : 'a parser -> 'a list parser
-
-
(** Parse a JSON object to get its fields *)
-
val object_ : json -> ((string * json) list, Zulip_types.zerror) result
-
-
(** {1 Utility Functions} *)
-
-
(** Convert option to result with error message *)
-
val required : string -> 'a option -> ('a, Zulip_types.zerror) result
-
-
(** Get value from option with default *)
-
val default : 'a -> 'a option -> 'a
-
-
(** Lift a pure value into parser context *)
-
val pure : 'a -> ('a, 'e) result
-
-
(** Fail with an error message *)
-
val fail : string -> ('a, Zulip_types.zerror) result
-
-
(** Map over a list with error handling *)
-
val traverse : ('a -> ('b, Zulip_types.zerror) result) -> 'a list -> ('b list, Zulip_types.zerror) result
-
-
(** Filter and map over a list, dropping errors *)
-
val filter_map : ('a -> ('b, Zulip_types.zerror) result) -> 'a list -> 'b list
-
-
(** Run a parser on JSON *)
-
val parse : 'a parser -> json -> ('a, Zulip_types.zerror) result
-
-
(** Run a parser with error context *)
-
val with_context : string -> 'a parser -> 'a parser
+35 -21
stack/zulip/lib/zulip/lib/message.ml
···
let local_id t = t.local_id
let read_by_sender t = t.read_by_sender
-
let to_json t =
-
let base_fields = [
-
("type", `String (Message_type.to_string t.type_));
-
("to", `A (List.map (fun s -> `String s) t.to_));
-
("content", `String t.content);
-
("read_by_sender", `Bool t.read_by_sender);
-
] in
-
let with_topic = match t.topic with
-
| Some topic -> ("topic", `String topic) :: base_fields
-
| None -> base_fields in
-
let with_queue_id = match t.queue_id with
-
| Some qid -> ("queue_id", `String qid) :: with_topic
-
| None -> with_topic in
-
let with_local_id = match t.local_id with
-
| Some lid -> ("local_id", `String lid) :: with_queue_id
-
| None -> with_queue_id in
-
`O with_local_id
+
let pp fmt t = Format.fprintf fmt "Message{type=%a, to=%s, content=%s}"
+
Message_type.pp t.type_
+
(String.concat "," t.to_)
+
t.content
-
let pp fmt t = Format.fprintf fmt "Message{type=%a, to=%s, content=%s}"
-
Message_type.pp t.type_
-
(String.concat "," t.to_)
-
t.content
+
(* Jsont codec for Message_type.t *)
+
let message_type_jsont =
+
let of_string s = match Message_type.of_string s with
+
| Some t -> Ok t
+
| None -> Error (Format.sprintf "Invalid message type: %s" s)
+
in
+
Jsont.of_of_string ~kind:"Message_type.t" of_string ~enc:Message_type.to_string
+
+
(* Jsont codec for the message *)
+
let jsont =
+
let kind = "Message" in
+
let doc = "A Zulip message to be sent" in
+
let make type_ to_ content topic queue_id local_id read_by_sender =
+
{ type_; to_; content; topic; queue_id; local_id; read_by_sender }
+
in
+
Jsont.Object.map ~kind ~doc make
+
|> Jsont.Object.mem "type" message_type_jsont ~enc:type_
+
|> Jsont.Object.mem "to" (Jsont.list Jsont.string) ~enc:to_
+
|> Jsont.Object.mem "content" Jsont.string ~enc:content
+
|> Jsont.Object.opt_mem "topic" Jsont.string ~enc:topic
+
|> Jsont.Object.opt_mem "queue_id" Jsont.string ~enc:queue_id
+
|> Jsont.Object.opt_mem "local_id" Jsont.string ~enc:local_id
+
|> Jsont.Object.mem "read_by_sender" Jsont.bool ~enc:read_by_sender
+
|> Jsont.Object.finish
+
+
(* Encoding functions *)
+
let to_json_string t =
+
Encode.to_json_string jsont t
+
+
let to_form_urlencoded t =
+
Encode.to_form_urlencoded jsont t
+18 -9
stack/zulip/lib/zulip/lib/message.mli
···
type t
-
val create :
-
type_:Message_type.t ->
-
to_:string list ->
-
content:string ->
-
?topic:string ->
-
?queue_id:string ->
-
?local_id:string ->
-
?read_by_sender:bool ->
+
val create :
+
type_:Message_type.t ->
+
to_:string list ->
+
content:string ->
+
?topic:string ->
+
?queue_id:string ->
+
?local_id:string ->
+
?read_by_sender:bool ->
unit -> t
val type_ : t -> Message_type.t
···
val queue_id : t -> string option
val local_id : t -> string option
val read_by_sender : t -> bool
-
val to_json : t -> Zulip_types.json
+
+
(** Jsont codec for the message type *)
+
val jsont : t Jsont.t
+
+
(** Encode to JSON string *)
+
val to_json_string : t -> string
+
+
(** Encode to form-urlencoded string *)
+
val to_form_urlencoded : t -> string
+
val pp : Format.formatter -> t -> unit
+20 -8
stack/zulip/lib/zulip/lib/message_response.ml
···
let id t = t.id
let automatic_new_visibility_policy t = t.automatic_new_visibility_policy
+
let pp fmt t = Format.fprintf fmt "MessageResponse{id=%d}" t.id
+
+
(* Jsont codec for message response *)
+
let jsont =
+
let kind = "MessageResponse" in
+
let doc = "A Zulip message response" in
+
let make id automatic_new_visibility_policy =
+
{ id; automatic_new_visibility_policy }
+
in
+
Jsont.Object.map ~kind ~doc make
+
|> Jsont.Object.mem "id" Jsont.int ~enc:id
+
|> Jsont.Object.opt_mem "automatic_new_visibility_policy" Jsont.string ~enc:automatic_new_visibility_policy
+
|> Jsont.Object.finish
+
+
(* Decode and encode functions using Encode module *)
let of_json json =
-
Jsonu.with_object "message_response" (fun fields ->
-
match Jsonu.get_int fields "id" with
-
| Error e -> Error e
-
| Ok id ->
-
let automatic_new_visibility_policy = Jsonu.get_string_opt fields "automatic_new_visibility_policy" in
-
Ok { id; automatic_new_visibility_policy }
-
) json
+
match Encode.from_json jsont json with
+
| Ok v -> Ok v
+
| Error msg -> Error (Zulip_types.create_error ~code:(Other "json_parse_error") ~msg ())
-
let pp fmt t = Format.fprintf fmt "MessageResponse{id=%d}" t.id
+
let to_json_string t =
+
Encode.to_json_string jsont t
+5
stack/zulip/lib/zulip/lib/message_response.mli
···
val id : t -> int
val automatic_new_visibility_policy : t -> string option
+
+
(** Jsont codec for message response *)
+
val jsont : t Jsont.t
+
val of_json : Zulip_types.json -> (t, Zulip_types.zerror) result
+
val to_json_string : t -> string
val pp : Format.formatter -> t -> unit
+15 -62
stack/zulip/lib/zulip/lib/messages.ml
···
let send client message =
-
let json = Message.to_json message in
-
let params = match json with
-
| `O fields ->
-
List.fold_left (fun acc (key, value) ->
-
let str_value = match value with
-
| `String s -> s
-
| `Bool true -> "true"
-
| `Bool false -> "false"
-
| `A arr -> String.concat "," (List.map (function `String s -> s | _ -> "") arr)
-
| _ -> ""
-
in
-
(key, str_value) :: acc
-
) [] fields
-
| _ -> [] in
-
-
match Client.request client ~method_:`POST ~path:"/api/v1/messages" ~params () with
+
(* Use form-urlencoded encoding for the message *)
+
let body = Message.to_form_urlencoded message in
+
let content_type = "application/x-www-form-urlencoded" in
+
+
match Client.request client ~method_:`POST ~path:"/api/v1/messages" ~body ~content_type () with
| Ok response -> Message_response.of_json response
| Error err -> Error err
let edit client ~message_id ?content ?topic () =
-
let params =
+
let params =
(("message_id", string_of_int message_id) ::
(match content with Some c -> [("content", c)] | None -> []) @
(match topic with Some t -> [("topic", t)] | None -> [])) in
-
+
match Client.request client ~method_:`PATCH ~path:("/api/v1/messages/" ^ string_of_int message_id) ~params () with
| Ok _ -> Ok ()
| Error err -> Error err
···
let add_reaction client ~message_id ~emoji_name =
let params = [
("emoji_name", emoji_name);
-
("reaction_type", "unicode_emoji");
] in
match Client.request client ~method_:`POST
-
~path:("/api/v1/messages/" ^ string_of_int message_id ^ "/reactions")
-
~params () with
-
| Ok _ -> Ok ()
+
~path:("/api/v1/messages/" ^ string_of_int message_id ^ "/reactions") ~params () with
+
| Ok _json -> Ok ()
| Error err -> Error err
let remove_reaction client ~message_id ~emoji_name =
let params = [
("emoji_name", emoji_name);
-
("reaction_type", "unicode_emoji");
] in
match Client.request client ~method_:`DELETE
-
~path:("/api/v1/messages/" ^ string_of_int message_id ^ "/reactions")
-
~params () with
-
| Ok _ -> Ok ()
+
~path:("/api/v1/messages/" ^ string_of_int message_id ^ "/reactions") ~params () with
+
| Ok _json -> Ok ()
| Error err -> Error err
-
let upload_file client ~filename =
-
(* Read file contents *)
-
let ic = open_in_bin filename in
-
let len = in_channel_length ic in
-
let content = really_input_string ic len in
-
close_in ic;
-
-
(* Extract just the filename from the path *)
-
let basename = Filename.basename filename in
-
-
(* Create multipart form data boundary *)
-
let boundary = "----OCamlZulipBoundary" ^ string_of_float (Unix.gettimeofday ()) in
-
-
(* Build multipart body *)
-
let body = Buffer.create (len + 1024) in
-
Buffer.add_string body ("--" ^ boundary ^ "\r\n");
-
Buffer.add_string body ("Content-Disposition: form-data; name=\"file\"; filename=\"" ^ basename ^ "\"\r\n");
-
Buffer.add_string body "Content-Type: application/octet-stream\r\n";
-
Buffer.add_string body "\r\n";
-
Buffer.add_string body content;
-
Buffer.add_string body ("\r\n--" ^ boundary ^ "--\r\n");
-
-
let body_str = Buffer.contents body in
-
let content_type = "multipart/form-data; boundary=" ^ boundary in
-
-
match Client.request client ~method_:`POST ~path:"/api/v1/user_uploads"
-
~body:body_str ~content_type () with
-
| Ok json ->
-
(* Parse response to extract URI *)
-
(match json with
-
| `O fields ->
-
(match Jsonu.get_string fields "uri" with
-
| Ok uri -> Ok uri
-
| Error e -> Error e)
-
| _ -> Error (Zulip_types.create_error ~code:(Zulip_types.Other "upload_error") ~msg:"Failed to parse upload response" ()))
-
| Error err -> Error err
+
let upload_file _client ~filename:_ =
+
(* TODO: Implement file upload using multipart/form-data *)
+
Error (Zulip_types.create_error ~code:(Other "not_implemented")
+
~msg:"File upload not yet implemented" ())
+25 -18
stack/zulip/lib/zulip/lib/user.ml
···
let is_admin t = t.is_admin
let is_bot t = t.is_bot
-
let to_json t =
-
`O [
-
("email", `String t.email);
-
("full_name", `String t.full_name);
-
("is_active", `Bool t.is_active);
-
("is_admin", `Bool t.is_admin);
-
("is_bot", `Bool t.is_bot);
-
]
+
let pp fmt t = Format.fprintf fmt "User{email=%s, full_name=%s}" t.email t.full_name
+
+
(* Jsont codec for user *)
+
let jsont =
+
let kind = "User" in
+
let doc = "A Zulip user" in
+
let make email full_name is_active is_admin is_bot =
+
{ email; full_name; is_active; is_admin; is_bot }
+
in
+
Jsont.Object.map ~kind ~doc make
+
|> Jsont.Object.mem "email" Jsont.string ~enc:email
+
|> Jsont.Object.mem "full_name" Jsont.string ~enc:full_name
+
|> Jsont.Object.mem "is_active" Jsont.bool ~enc:is_active
+
|> Jsont.Object.mem "is_admin" Jsont.bool ~enc:is_admin
+
|> Jsont.Object.mem "is_bot" Jsont.bool ~enc:is_bot
+
|> Jsont.Object.finish
+
(* Decode and encode functions using Encode module *)
let of_json json =
-
Jsonu.with_object "user" (fun fields ->
-
match Jsonu.get_string fields "email", Jsonu.get_string fields "full_name" with
-
| Ok email, Ok full_name ->
-
let is_active = Jsonu.get_bool_default fields "is_active" true in
-
let is_admin = Jsonu.get_bool_default fields "is_admin" false in
-
let is_bot = Jsonu.get_bool_default fields "is_bot" false in
-
Ok { email; full_name; is_active; is_admin; is_bot }
-
| Error e, _ | _, Error e -> Error e
-
) json
+
match Encode.from_json jsont json with
+
| Ok v -> Ok v
+
| Error msg -> Error (Zulip_types.create_error ~code:(Other "json_parse_error") ~msg ())
+
+
let to_json_string t =
+
Encode.to_json_string jsont t
-
let pp fmt t = Format.fprintf fmt "User{email=%s, full_name=%s}" t.email t.full_name
+
let to_form_urlencoded t =
+
Encode.to_form_urlencoded jsont t
+18 -7
stack/zulip/lib/zulip/lib/user.mli
···
type t
-
val create :
-
email:string ->
-
full_name:string ->
-
?is_active:bool ->
-
?is_admin:bool ->
-
?is_bot:bool ->
+
val create :
+
email:string ->
+
full_name:string ->
+
?is_active:bool ->
+
?is_admin:bool ->
+
?is_bot:bool ->
unit -> t
val email : t -> string
···
val is_active : t -> bool
val is_admin : t -> bool
val is_bot : t -> bool
-
val to_json : t -> Zulip_types.json
+
+
(** Jsont codec for the user type *)
+
val jsont : t Jsont.t
+
+
(** Decode from Jsont.json *)
val of_json : Zulip_types.json -> (t, Zulip_types.zerror) result
+
+
(** Encode to JSON string *)
+
val to_json_string : t -> string
+
+
(** Encode to form-urlencoded string *)
+
val to_form_urlencoded : t -> string
+
val pp : Format.formatter -> t -> unit
+32 -28
stack/zulip/lib/zulip/lib/users.ml
···
-
let list client =
+
let list client =
+
(* Define response codec *)
+
let response_codec =
+
Jsont.Object.(
+
map ~kind:"UsersResponse" (fun members -> members)
+
|> mem "members" (Jsont.list User.jsont) ~enc:(fun x -> x)
+
|> finish
+
)
+
in
+
match Client.request client ~method_:`GET ~path:"/api/v1/users" () with
-
| Ok json ->
-
(match json with
-
| `O fields ->
-
(match List.assoc_opt "members" fields with
-
| Some (`A user_list) ->
-
let users = List.fold_left (fun acc user_json ->
-
match User.of_json user_json with
-
| Ok user -> user :: acc
-
| Error _ -> acc
-
) [] user_list in
-
Ok (List.rev users)
-
| _ -> Error (Zulip_types.create_error ~code:(Other "api_error") ~msg:"Invalid users response format" ()))
-
| _ -> Error (Zulip_types.create_error ~code:(Other "api_error") ~msg:"Users response must be an object" ()))
+
| Ok json ->
+
(match Encode.from_json response_codec json with
+
| Ok users -> Ok users
+
| Error msg -> Error (Zulip_types.create_error ~code:(Other "api_error") ~msg ()))
| Error err -> Error err
let get client ~email =
···
| Error err -> Error err)
| Error err -> Error err
-
let create_user client ~email ~full_name =
-
let body_json = `O [
-
("email", `String email);
-
("full_name", `String full_name);
-
] in
-
let body = match body_json with
-
| `O fields ->
-
String.concat "&" (List.map (fun (k, v) ->
-
match v with
-
| `String s -> k ^ "=" ^ s
-
| _ -> ""
-
) fields)
-
| _ -> "" in
-
match Client.request client ~method_:`POST ~path:"/api/v1/users" ~body () with
+
(* Request type for create_user *)
+
module Create_user_request = struct
+
type t = { email : string; full_name : string }
+
+
let codec =
+
Jsont.Object.(
+
map ~kind:"CreateUserRequest" (fun email full_name -> { email; full_name })
+
|> mem "email" Jsont.string ~enc:(fun r -> r.email)
+
|> mem "full_name" Jsont.string ~enc:(fun r -> r.full_name)
+
|> finish
+
)
+
end
+
+
let create_user client ~email ~full_name =
+
let req = Create_user_request.{ email; full_name } in
+
let body = Encode.to_form_urlencoded Create_user_request.codec req in
+
let content_type = "application/x-www-form-urlencoded" in
+
match Client.request client ~method_:`POST ~path:"/api/v1/users" ~body ~content_type () with
| Ok _json -> Ok ()
| Error err -> Error err
+1 -5
stack/zulip/lib/zulip/lib/zulip.ml
···
module Event = Event
module Event_type = Event_type
module Event_queue = Event_queue
-
-
(** JSON utilities with short alias *)
-
module J = Jsonu
-
module Jsonu_syntax = Jsonu_syntax
-
module Jsonu = Jsonu
+
module Encode = Encode
+2 -10
stack/zulip/lib/zulip/lib/zulip.mli
···
module Event_type = Event_type
module Event_queue = Event_queue
-
(** {1 JSON Utilities} *)
-
-
(** JSON utility functions (abbreviated as J for convenience) *)
-
module J = Jsonu
-
-
(** JSON parsing syntax extensions *)
-
module Jsonu_syntax = Jsonu_syntax
-
-
(** Full JSON utilities module *)
-
module Jsonu = Jsonu
+
(** JSON encoding/decoding utilities *)
+
module Encode = Encode
+32 -15
stack/zulip/lib/zulip/lib/zulip_types.ml
···
(** Core types for Zulip API *)
(** JSON type used throughout the API *)
-
type json = [`Null | `Bool of bool | `Float of float | `String of string | `A of json list | `O of (string * json) list]
+
type json = Jsont.json
(** Error codes returned by Zulip API *)
type error_code =
···
let pp_error fmt t = Format.fprintf fmt "Error(%s): %s"
(error_code_to_string t.code) t.message
+
(* Jsont codec for error_code *)
+
let error_code_jsont =
+
let of_string s = Ok (error_code_of_string s) in
+
Jsont.of_of_string ~kind:"ErrorCode" of_string ~enc:error_code_to_string
+
+
(* Jsont codec for zerror *)
+
let zerror_jsont =
+
let kind = "ZulipError" in
+
let make code msg =
+
(* Extra fields handled by keep_unknown - we'll extract them separately *)
+
{ code = error_code_of_string code; message = msg; extra = [] }
+
in
+
let code t = error_code_to_string t.code in
+
let msg t = t.message in
+
Jsont.Object.(
+
map ~kind make
+
|> mem "code" Jsont.string ~enc:code
+
|> mem "msg" Jsont.string ~enc:msg
+
|> finish
+
)
+
let error_of_json json =
-
match json with
-
| `O fields ->
-
(try
-
let code_str = match List.assoc "code" fields with
-
| `String s -> s
-
| _ -> "OTHER" in
-
let msg = match List.assoc "msg" fields with
-
| `String s -> s
-
| _ -> "Unknown error" in
-
let code = error_code_of_string code_str in
-
let extra = List.filter (fun (k, _) -> k <> "code" && k <> "msg" && k <> "result") fields in
-
Some (create_error ~code ~msg ~extra ())
-
with Not_found -> None)
-
| _ -> None
+
match Encode.from_json zerror_jsont json with
+
| Ok err ->
+
(* Extract extra fields by getting all fields except code, msg, result *)
+
(match json with
+
| Jsont.Object (fields, _) ->
+
let assoc = List.map (fun ((k, _), v) -> (k, v)) fields in
+
let extra = List.filter (fun (k, _) -> k <> "code" && k <> "msg" && k <> "result") assoc in
+
Some { err with extra }
+
| _ -> Some err)
+
| Error _ -> None
+6 -1
stack/zulip/lib/zulip/lib/zulip_types.mli
···
(** Core types for Zulip API *)
(** JSON type used throughout the API *)
-
type json = [`Null | `Bool of bool | `Float of float | `String of string | `A of json list | `O of (string * json) list]
+
type json = Jsont.json
(** Error codes returned by Zulip API *)
type error_code =
···
val error_message : zerror -> string
val error_extra : zerror -> (string * json) list
val pp_error : Format.formatter -> zerror -> unit
+
+
(** Jsont codecs *)
+
val error_code_jsont : error_code Jsont.t
+
val zerror_jsont : zerror Jsont.t
+
val error_of_json : json -> zerror option
+6 -5
stack/zulip/lib/zulip_bot/lib/bot_runner.ml
···
(* Extract the actual message from the event *)
let message_json, flags =
match event_data with
-
| `O fields ->
-
let msg = match List.assoc_opt "message" fields with
+
| Jsont.Object (fields, _) ->
+
let assoc = List.map (fun ((k, _), v) -> (k, v)) fields in
+
let msg = match List.assoc_opt "message" assoc with
| Some m -> m
| None -> event_data (* Fallback if structure is different *)
in
-
let flgs = match List.assoc_opt "flags" fields with
-
| Some (`A f) -> f
+
let flgs = match List.assoc_opt "flags" assoc with
+
| Some (Jsont.Array (f, _)) -> f
| _ -> []
in
(msg, flgs)
···
(* Check if mentioned *)
let is_mentioned =
-
List.exists (function `String "mentioned" -> true | _ -> false) flags ||
+
List.exists (function Jsont.String ("mentioned", _) -> true | _ -> false) flags ||
Message.is_mentioned message ~user_email:bot_email in
(* Check if it's a private message *)
+54 -30
stack/zulip/lib/zulip_bot/lib/bot_storage.ml
···
mutable dirty_keys : string list;
}
+
(** {1 JSON Codecs for Bot Storage} *)
+
+
(* String map for storage values *)
+
module String_map = Map.Make (String)
+
+
(* Storage response type - {"storage": {...}} *)
+
type storage_response = {
+
storage : string String_map.t;
+
unknown : Jsont.json; (** Unknown/extra JSON fields preserved during parsing *)
+
}
+
+
(* Codec for storage response using Jsont.Object with keep_unknown *)
+
let storage_response_jsont : storage_response Jsont.t =
+
let make storage unknown = { storage; unknown } in
+
let storage_map_jsont =
+
Jsont.Object.map ~kind:"StorageMap" Fun.id
+
|> Jsont.Object.keep_unknown (Jsont.Object.Mems.string_map Jsont.string) ~enc:Fun.id
+
|> Jsont.Object.finish
+
in
+
Jsont.Object.map ~kind:"StorageResponse" make
+
|> Jsont.Object.mem "storage" storage_map_jsont ~enc:(fun r -> r.storage)
+
~dec_absent:String_map.empty
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:(fun r -> r.unknown)
+
|> Jsont.Object.finish
+
let create client ~bot_email =
Log.info (fun m -> m "Creating bot storage for %s" bot_email);
let cache = Hashtbl.create 16 in
···
~path:"/api/v1/bot_storage"
() with
| Ok json ->
-
(match Zulip.Jsonu.get_object_opt (match json with `O f -> f | _ -> []) "storage" with
-
| Some storage_fields ->
-
List.iter (fun (k, v) ->
-
match Zulip.Jsonu.to_string_safe v with
-
| Some value ->
-
Log.debug (fun m -> m "Loaded key from server: %s" k);
-
Hashtbl.add cache k value
-
| None -> ()
-
) storage_fields
-
| None -> ())
+
(match Zulip.Encode.from_json storage_response_jsont json with
+
| Ok response ->
+
String_map.iter (fun k v ->
+
Log.debug (fun m -> m "Loaded key from server: %s" k);
+
Hashtbl.add cache k v
+
) response.storage
+
| Error msg ->
+
Log.warn (fun m -> m "Failed to parse storage response: %s" msg))
| Error e ->
Log.warn (fun m -> m "Failed to load existing storage: %s" (Zulip.error_message e)));
···
let encode_storage_update keys_values =
(* Build the storage object as JSON - the API expects storage={"key": "value"} *)
let storage_obj =
-
List.map (fun (k, v) -> (k, `String v)) keys_values
+
List.map (fun (k, v) -> ((k, Jsont.Meta.none), Jsont.String (v, Jsont.Meta.none))) keys_values
in
-
let json_obj = `O storage_obj in
+
let json_obj = Jsont.Object (storage_obj, Jsont.Meta.none) in
-
(* Convert to JSON string using Ezjsonm *)
-
let json_str = Ezjsonm.to_string json_obj in
+
(* Convert to JSON string using Jsont_bytesrw *)
+
let json_str = Jsont_bytesrw.encode_string' Jsont.json json_obj |> Result.get_ok in
(* Return as form-encoded body: storage=<url-encoded-json> *)
"storage=" ^ Uri.pct_encode json_str
···
~path:"/api/v1/bot_storage"
~params () with
| Ok json ->
-
(match Zulip.Jsonu.get_object_opt (match json with `O f -> f | _ -> []) "storage" with
-
| Some storage_fields ->
-
(match Zulip.Jsonu.get_string_opt storage_fields key with
+
(match Zulip.Encode.from_json storage_response_jsont json with
+
| Ok response ->
+
(match String_map.find_opt key response.storage with
| Some value ->
(* Cache the value *)
Log.debug (fun m -> m "Retrieved key from API: %s" key);
···
| None ->
Log.debug (fun m -> m "Key not found in API: %s" key);
None)
-
| None -> None)
+
| Error msg ->
+
Log.warn (fun m -> m "Failed to parse storage response: %s" msg);
+
None)
| Error e ->
Log.warn (fun m -> m "Error fetching key %s: %s" key (Zulip.error_message e));
None
···
~path:"/api/v1/bot_storage"
() with
| Ok json ->
-
(match json with
-
| `O fields ->
-
(match List.assoc_opt "storage" fields with
-
| Some (`O storage_fields) ->
-
let api_keys = List.map fst storage_fields in
-
(* Merge with cache keys *)
-
let cache_keys = Hashtbl.fold (fun k _ acc -> k :: acc) t.cache [] in
-
let all_keys = List.sort_uniq String.compare (api_keys @ cache_keys) in
-
Ok all_keys
-
| _ -> Ok [])
-
| _ -> Ok [])
+
(match Zulip.Encode.from_json storage_response_jsont json with
+
| Ok response ->
+
let api_keys = String_map.fold (fun k _ acc -> k :: acc) response.storage [] in
+
(* Merge with cache keys *)
+
let cache_keys = Hashtbl.fold (fun k _ acc -> k :: acc) t.cache [] in
+
let all_keys = List.sort_uniq String.compare (api_keys @ cache_keys) in
+
Ok all_keys
+
| Error msg ->
+
Log.warn (fun m -> m "Failed to parse storage response: %s" msg);
+
Ok [])
| Error e -> Error e
(* Flush all dirty keys to API *)
···
| Error e ->
Log.err (fun m -> m "Failed to flush storage: %s" (Zulip.error_message e));
Error e
-
end
+
end
+1 -1
stack/zulip/lib/zulip_bot/lib/dune
···
(public_name zulip_bot)
(name zulip_bot)
(wrapped true)
-
(libraries zulip unix eio ezjsonm logs mirage-crypto-rng fmt)
+
(libraries zulip unix eio jsont jsont.bytesrw logs mirage-crypto-rng fmt)
(flags (:standard -warn-error -3)))
+192 -88
stack/zulip/lib/zulip_bot/lib/message.ml
···
-
(* Use Jsonm exclusively via Zulip.Jsonu utilities *)
+
(* Message parsing using Jsont codecs *)
let logs_src = Logs.Src.create "zulip_bot.message"
module Log = (val Logs.src_log logs_src : Logs.LOG)
···
email: string;
full_name: string;
short_name: string option;
+
unknown: Jsont.json; (** Unknown/extra JSON fields preserved during parsing *)
}
let user_id t = t.user_id
···
let full_name t = t.full_name
let short_name t = t.short_name
+
(* Jsont codec for User - handles both user_id and id fields *)
+
let jsont : t Jsont.t =
+
let make email full_name short_name unknown =
+
(* user_id will be extracted in a custom way from the object *)
+
fun user_id_opt id_opt ->
+
let user_id = match user_id_opt, id_opt with
+
| Some uid, _ -> uid
+
| None, Some id -> id
+
| None, None -> Jsont.Error.msgf Jsont.Meta.none "Missing user_id or id field"
+
in
+
{ user_id; email; full_name; short_name; unknown }
+
in
+
Jsont.Object.map ~kind:"User" make
+
|> Jsont.Object.mem "email" Jsont.string ~enc:email
+
|> Jsont.Object.mem "full_name" Jsont.string ~enc:full_name
+
|> Jsont.Object.opt_mem "short_name" Jsont.string ~enc:short_name
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:(fun t -> t.unknown)
+
|> Jsont.Object.opt_mem "user_id" Jsont.int ~enc:(fun t -> Some t.user_id)
+
|> Jsont.Object.opt_mem "id" Jsont.int ~enc:(fun _ -> None)
+
|> Jsont.Object.finish
+
let of_json (json : Zulip.json) : (t, Zulip.zerror) result =
-
let open Zulip.Jsonu_syntax in
-
(Zulip.Jsonu.with_object "user" @@ fun fields ->
-
let* user_id = (field fields "user_id" int) <|> (field fields "id" int) in
-
let* email = field fields "email" string in
-
let* full_name = field fields "full_name" string in
-
let* short_name = field_opt fields "short_name" string in
-
Ok { user_id; email; full_name; short_name }) json
+
match Zulip.Encode.from_json jsont json with
+
| Ok v -> Ok v
+
| Error msg -> Error (Zulip.create_error ~code:(Other "json_parse_error") ~msg ())
end
(** Reaction representation *)
···
emoji_code: string;
reaction_type: string;
user_id: int;
+
unknown: Jsont.json; (** Unknown/extra JSON fields preserved during parsing *)
}
let emoji_name t = t.emoji_name
···
let reaction_type t = t.reaction_type
let user_id t = t.user_id
+
(* Jsont codec for Reaction - handles user_id in different locations *)
+
let jsont : t Jsont.t =
+
(* Helper codec for nested user object - extracts just the user_id *)
+
let user_obj_codec =
+
Jsont.Object.map ~kind:"ReactionUser" Fun.id
+
|> Jsont.Object.mem "user_id" Jsont.int ~enc:Fun.id
+
|> Jsont.Object.finish
+
in
+
let make emoji_name emoji_code reaction_type unknown =
+
fun user_id_direct user_obj_nested ->
+
let user_id = match user_id_direct, user_obj_nested with
+
| Some uid, _ -> uid
+
| None, Some uid -> uid
+
| None, None -> Jsont.Error.msgf Jsont.Meta.none "Missing user_id field"
+
in
+
{ emoji_name; emoji_code; reaction_type; user_id; unknown }
+
in
+
Jsont.Object.map ~kind:"Reaction" make
+
|> Jsont.Object.mem "emoji_name" Jsont.string ~enc:emoji_name
+
|> Jsont.Object.mem "emoji_code" Jsont.string ~enc:emoji_code
+
|> Jsont.Object.mem "reaction_type" Jsont.string ~enc:reaction_type
+
|> Jsont.Object.keep_unknown Jsont.json_mems ~enc:(fun t -> t.unknown)
+
|> Jsont.Object.opt_mem "user_id" Jsont.int ~enc:(fun t -> Some t.user_id)
+
|> Jsont.Object.opt_mem "user" user_obj_codec ~enc:(fun _ -> None)
+
|> Jsont.Object.finish
+
let of_json (json : Zulip.json) : (t, Zulip.zerror) result =
-
let open Zulip.Jsonu_syntax in
-
(Zulip.Jsonu.with_object "reaction" @@ fun fields ->
-
let* emoji_name = field fields "emoji_name" string in
-
let* emoji_code = field fields "emoji_code" string in
-
let* reaction_type = field fields "reaction_type" string in
-
let* user_id =
-
(field fields "user_id" int) <|>
-
(match field fields "user" object_ with
-
| Ok user_obj -> field user_obj "user_id" int
-
| Error _ -> fail "user_id not found") in
-
Ok { emoji_name; emoji_code; reaction_type; user_id }) json
+
match Zulip.Encode.from_json jsont json with
+
| Ok v -> Ok v
+
| Error msg -> Error (Zulip.create_error ~code:(Other "json_parse_error") ~msg ())
end
let parse_reaction_json json = Reaction.of_json json
···
(** Helper function to parse common fields *)
let parse_common json =
-
Zulip.Jsonu.parse_with_error "common fields" @@ fun () ->
-
(Zulip.Jsonu.with_object "message" @@ fun fields ->
-
let open Zulip.Jsonu_syntax in
-
let* id = field fields "id" int in
-
let* sender_id = field fields "sender_id" int in
-
let* sender_email = field fields "sender_email" string in
-
let* sender_full_name = field fields "sender_full_name" string in
-
let sender_short_name = field_opt fields "sender_short_name" string |? None in
-
let timestamp = field_or fields "timestamp" float 0.0 |? 0.0 in
-
let content = field_or fields "content" string "" |? "" in
-
let content_type = field_or fields "content_type" string "text/html" |? "text/html" in
+
match json with
+
| Jsont.Object (fields, _) ->
+
let assoc = List.map (fun ((k, _), v) -> (k, v)) fields in
+
let get_int key =
+
match List.assoc_opt key assoc with
+
| Some (Jsont.Number (f, _)) -> Some (int_of_float f)
+
| _ -> None
+
in
+
let get_string key =
+
match List.assoc_opt key assoc with
+
| Some (Jsont.String (s, _)) -> Some s
+
| _ -> None
+
in
+
let get_float key default =
+
match List.assoc_opt key assoc with
+
| Some (Jsont.Number (f, _)) -> f
+
| _ -> default
+
in
+
let get_bool key default =
+
match List.assoc_opt key assoc with
+
| Some (Jsont.Bool (b, _)) -> b
+
| _ -> default
+
in
+
let get_array key =
+
match List.assoc_opt key assoc with
+
| Some (Jsont.Array (arr, _)) -> Some arr
+
| _ -> None
+
in
-
let reactions =
-
match Zulip.Jsonu.get_array_opt fields "reactions" with
-
| Some reactions_json ->
-
List.filter_map (fun r ->
-
match parse_reaction_json r with
-
| Ok reaction -> Some reaction
-
| Error err ->
-
Log.warn (fun m -> m "Failed to parse reaction: %s" (Zulip.error_message err));
-
None
-
) reactions_json
-
| None -> []
-
in
+
(match (get_int "id", get_int "sender_id", get_string "sender_email", get_string "sender_full_name") with
+
| (Some id, Some sender_id, Some sender_email, Some sender_full_name) ->
+
let sender_short_name = get_string "sender_short_name" in
+
let timestamp = get_float "timestamp" 0.0 in
+
let content = get_string "content" |> Option.value ~default:"" in
+
let content_type = get_string "content_type" |> Option.value ~default:"text/html" in
-
let submessages = Zulip.Jsonu.get_array_opt fields "submessages" |> Option.value ~default:[] in
+
let reactions =
+
match get_array "reactions" with
+
| Some reactions_json ->
+
List.filter_map (fun r ->
+
match parse_reaction_json r with
+
| Ok reaction -> Some reaction
+
| Error err ->
+
Log.warn (fun m -> m "Failed to parse reaction: %s" (Zulip.error_message err));
+
None
+
) reactions_json
+
| None -> []
+
in
-
let flags =
-
match Zulip.Jsonu.get_array_opt fields "flags" with
-
| Some flags_json -> List.filter_map Zulip.Jsonu.to_string_safe flags_json
-
| None -> []
-
in
+
let submessages = get_array "submessages" |> Option.value ~default:[] in
-
let is_me_message = field_or fields "is_me_message" bool false |? false in
-
let client = field_or fields "client" string "" |? "" in
-
let gravatar_hash = field_or fields "gravatar_hash" string "" |? "" in
-
let avatar_url = field_opt fields "avatar_url" string |? None in
+
let flags =
+
match get_array "flags" with
+
| Some flags_json ->
+
List.filter_map (fun f ->
+
match f with
+
| Jsont.String (s, _) -> Some s
+
| _ -> None
+
) flags_json
+
| None -> []
+
in
-
Ok {
-
id; sender_id; sender_email; sender_full_name; sender_short_name;
-
timestamp; content; content_type; reactions; submessages;
-
flags; is_me_message; client; gravatar_hash; avatar_url
-
}) json
+
let is_me_message = get_bool "is_me_message" false in
+
let client = get_string "client" |> Option.value ~default:"" in
+
let gravatar_hash = get_string "gravatar_hash" |> Option.value ~default:"" in
+
let avatar_url = get_string "avatar_url" in
+
+
Ok {
+
id; sender_id; sender_email; sender_full_name; sender_short_name;
+
timestamp; content; content_type; reactions; submessages;
+
flags; is_me_message; client; gravatar_hash; avatar_url
+
}
+
| _ -> Error (Zulip.create_error ~code:(Other "json_parse_error") ~msg:"Missing required message fields" ()))
+
| _ -> Error (Zulip.create_error ~code:(Other "json_parse_error") ~msg:"Expected JSON object for message" ())
(** JSON parsing *)
let of_json json =
-
Log.debug (fun m -> m "Parsing message JSON: %s" (Zulip.Jsonu.to_string_pretty json));
+
(* Helper to pretty print JSON without using jsonu *)
+
let json_str =
+
match Jsont_bytesrw.encode_string' Jsont.json json with
+
| Ok s -> s
+
| Error _ -> "<error encoding json>"
+
in
+
Log.debug (fun m -> m "Parsing message JSON: %s" json_str);
-
let open Zulip.Jsonu_syntax in
match parse_common json with
| Error err -> Error (Zulip.error_message err)
| Ok common ->
-
(Zulip.Jsonu.parse_with_error "message type" @@ fun () ->
-
(Zulip.Jsonu.with_object "message" @@ fun fields ->
-
match Zulip.Jsonu.get_string fields "type" with
-
| Ok "private" ->
-
let* recipient_json = field fields "display_recipient" (array (fun x -> Ok x)) in
-
let users = List.filter_map (fun u ->
-
match parse_user_json u with
-
| Ok user -> Some user
-
| Error err ->
-
Log.warn (fun m -> m "Failed to parse user in display_recipient: %s" (Zulip.error_message err));
-
None
-
) recipient_json in
+
match json with
+
| Jsont.Object (fields, _) ->
+
let assoc = List.map (fun ((k, _), v) -> (k, v)) fields in
+
let msg_type =
+
match List.assoc_opt "type" assoc with
+
| Some (Jsont.String (s, _)) -> Some s
+
| _ -> None
+
in
+
(match msg_type with
+
| Some "private" ->
+
(match List.assoc_opt "display_recipient" assoc with
+
| Some (Jsont.Array (recipient_json, _)) ->
+
let users = List.filter_map (fun u ->
+
match parse_user_json u with
+
| Ok user -> Some user
+
| Error err ->
+
Log.warn (fun m -> m "Failed to parse user in display_recipient: %s" (Zulip.error_message err));
+
None
+
) recipient_json in
-
if List.length users = 0 && List.length recipient_json > 0 then
-
fail "Failed to parse any users in display_recipient"
-
else
-
Ok (Private { common; display_recipient = users })
+
if List.length users = 0 && List.length recipient_json > 0 then
+
Error "Failed to parse any users in display_recipient"
+
else
+
Ok (Private { common; display_recipient = users })
+
| _ ->
+
Log.warn (fun m -> m "display_recipient is not an array for private message");
+
Ok (Unknown { common; raw_json = json }))
-
| Ok "stream" ->
-
let* display_recipient = field fields "display_recipient" string in
-
let* stream_id = field fields "stream_id" int in
-
let* subject = field fields "subject" string in
-
Ok (Stream { common; display_recipient; stream_id; subject })
+
| Some "stream" ->
+
let display_recipient =
+
match List.assoc_opt "display_recipient" assoc with
+
| Some (Jsont.String (s, _)) -> Some s
+
| _ -> None
+
in
+
let stream_id =
+
match List.assoc_opt "stream_id" assoc with
+
| Some (Jsont.Number (f, _)) -> Some (int_of_float f)
+
| _ -> None
+
in
+
let subject =
+
match List.assoc_opt "subject" assoc with
+
| Some (Jsont.String (s, _)) -> Some s
+
| _ -> None
+
in
+
(match (display_recipient, stream_id, subject) with
+
| (Some display_recipient, Some stream_id, Some subject) ->
+
Ok (Stream { common; display_recipient; stream_id; subject })
+
| _ ->
+
Log.warn (fun m -> m "Missing required fields for stream message");
+
Ok (Unknown { common; raw_json = json }))
-
| Ok unknown_type ->
-
Log.warn (fun m -> m "Unknown message type: %s" unknown_type);
-
Ok (Unknown { common; raw_json = json })
+
| Some unknown_type ->
+
Log.warn (fun m -> m "Unknown message type: %s" unknown_type);
+
Ok (Unknown { common; raw_json = json })
-
| Error _ ->
-
Log.warn (fun m -> m "No message type field found");
-
Ok (Unknown { common; raw_json = json })
-
) json) |> Result.map_error Zulip.error_message
+
| None ->
+
Log.warn (fun m -> m "No message type field found");
+
Ok (Unknown { common; raw_json = json }))
+
| _ -> Error "Expected JSON object for message"
(** Accessor functions *)
let get_common = function
···
(** Pretty print JSON for debugging *)
let pp_json_debug ppf json =
let open Fmt in
-
let json_str = Zulip.Jsonu.to_string_pretty json in
+
let json_str =
+
match Jsont_bytesrw.encode_string' Jsont.json json with
+
| Ok s -> s
+
| Error _ -> "<error encoding json>"
+
in
pf ppf "@[<v>%a@.%a@]"
(styled `Bold (styled (`Fg `Blue) string)) "Raw JSON:"
(styled (`Fg `Black) string) json_str
+10
stack/zulip/lib/zulip_bot/lib/message.mli
···
email: string;
full_name: string;
short_name: string option;
+
unknown: Jsont.json; (** Unknown/extra JSON fields preserved during parsing *)
}
val user_id : t -> int
val email : t -> string
val full_name : t -> string
val short_name : t -> string option
+
+
(** Jsont codec for User *)
+
val jsont : t Jsont.t
+
val of_json : Zulip.json -> (t, Zulip.zerror) result
end
···
emoji_code: string;
reaction_type: string;
user_id: int;
+
unknown: Jsont.json; (** Unknown/extra JSON fields preserved during parsing *)
}
val emoji_name : t -> string
val emoji_code : t -> string
val reaction_type : t -> string
val user_id : t -> int
+
+
(** Jsont codec for Reaction *)
+
val jsont : t Jsont.t
+
val of_json : Zulip.json -> (t, Zulip.zerror) result
end
+30
stack/zulip/zulip.opam
···
+
# This file is generated by dune, edit dune-project instead
+
opam-version: "2.0"
+
synopsis: "OCaml bindings for the Zulip REST API"
+
description:
+
"High-quality OCaml bindings to the Zulip REST API using EIO for async operations"
+
depends: [
+
"ocaml"
+
"dune" {>= "3.0"}
+
"eio"
+
"requests"
+
"uri"
+
"base64"
+
"alcotest" {with-test}
+
"eio_main" {with-test}
+
"odoc" {with-doc}
+
]
+
build: [
+
["dune" "subst"] {dev}
+
[
+
"dune"
+
"build"
+
"-p"
+
name
+
"-j"
+
jobs
+
"@install"
+
"@runtest" {with-test}
+
"@doc" {with-doc}
+
]
+
]
+26
stack/zulip/zulip_bot.opam
···
+
# This file is generated by dune, edit dune-project instead
+
opam-version: "2.0"
+
synopsis: "OCaml bot framework for Zulip"
+
description: "Interactive bot framework built on the OCaml Zulip library"
+
depends: [
+
"ocaml"
+
"dune" {>= "3.0"}
+
"zulip"
+
"eio"
+
"alcotest" {with-test}
+
"odoc" {with-doc}
+
]
+
build: [
+
["dune" "subst"] {dev}
+
[
+
"dune"
+
"build"
+
"-p"
+
name
+
"-j"
+
jobs
+
"@install"
+
"@runtest" {with-test}
+
"@doc" {with-doc}
+
]
+
]
+29
stack/zulip/zulip_botserver.opam
···
+
# This file is generated by dune, edit dune-project instead
+
opam-version: "2.0"
+
synopsis: "OCaml bot server for running multiple Zulip bots"
+
description:
+
"HTTP server for running multiple Zulip bots with webhook support"
+
depends: [
+
"ocaml"
+
"dune" {>= "3.0"}
+
"zulip"
+
"zulip_bot"
+
"eio"
+
"requests"
+
"alcotest" {with-test}
+
"odoc" {with-doc}
+
]
+
build: [
+
["dune" "subst"] {dev}
+
[
+
"dune"
+
"build"
+
"-p"
+
name
+
"-j"
+
jobs
+
"@install"
+
"@runtest" {with-test}
+
"@doc" {with-doc}
+
]
+
]