My agentic slop goes here. Not intended for anyone else!

sync

+1 -1
stack/river/bin/dune
···
(executable
(public_name river-cli)
(name river_cli)
-
(libraries river cmdliner jsont jsont.bytesrw fmt fmt.tty fmt.cli eio eio_main eiocmd unix ptime syndic xdge))
···
(executable
(public_name river-cli)
(name river_cli)
+
(libraries river river_cmd cmdliner jsont jsont.bytesrw fmt fmt.tty fmt.cli eio eio_main eiocmd unix ptime syndic xdge))
+2 -795
stack/river/bin/river_cli.ml
···
-
(* Logging setup *)
-
let src = Logs.Src.create "river-cli" ~doc:"River CLI application"
-
module Log = (val Logs.src_log src : Logs.LOG)
-
-
(* Types *)
-
type user = {
-
username : string;
-
fullname : string;
-
email : string option;
-
feeds : River.source list;
-
last_synced : string option;
-
}
-
-
type state = {
-
xdg : Xdge.t;
-
}
-
-
(* State directory management *)
-
module State = struct
-
let users_dir state = Eio.Path.(Xdge.state_dir state.xdg / "users")
-
let feeds_dir state = Eio.Path.(Xdge.state_dir state.xdg / "feeds")
-
let user_feeds_dir state = Eio.Path.(feeds_dir state / "user")
-
-
let user_file state username =
-
Eio.Path.(users_dir state / (username ^ ".json"))
-
-
let user_feed_file state username =
-
Eio.Path.(user_feeds_dir state / (username ^ ".xml"))
-
-
let ensure_directories state =
-
let dirs = [
-
users_dir state;
-
feeds_dir state;
-
user_feeds_dir state;
-
] in
-
List.iter (fun dir ->
-
try Eio.Path.mkdir ~perm:0o755 dir
-
with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()
-
) dirs
-
-
(* JSON codecs for user data *)
-
-
(* Codec for River.source (feed) *)
-
let source_jsont =
-
let make name url = { River.name; url } in
-
Jsont.Object.map ~kind:"Source" make
-
|> Jsont.Object.mem "name" Jsont.string ~enc:(fun s -> s.River.name)
-
|> Jsont.Object.mem "url" Jsont.string ~enc:(fun s -> s.River.url)
-
|> Jsont.Object.finish
-
-
(* Codec for user *)
-
let user_jsont =
-
let make username fullname email feeds last_synced =
-
{ username; fullname; email; feeds; last_synced }
-
in
-
Jsont.Object.map ~kind:"User" make
-
|> Jsont.Object.mem "username" Jsont.string ~enc:(fun u -> u.username)
-
|> Jsont.Object.mem "fullname" Jsont.string ~enc:(fun u -> u.fullname)
-
|> Jsont.Object.opt_mem "email" Jsont.string ~enc:(fun u -> u.email)
-
|> Jsont.Object.mem "feeds" (Jsont.list source_jsont) ~enc:(fun u -> u.feeds)
-
|> Jsont.Object.opt_mem "last_synced" Jsont.string ~enc:(fun u -> u.last_synced)
-
|> Jsont.Object.finish
-
-
let user_of_string s =
-
match Jsont_bytesrw.decode_string' user_jsont s with
-
| Ok user -> Some user
-
| Error err ->
-
Log.err (fun m -> m "Failed to parse user JSON: %s" (Jsont.Error.to_string err));
-
None
-
-
let user_to_string user =
-
match Jsont_bytesrw.encode_string' ~format:Jsont.Indent user_jsont user with
-
| Ok s -> s
-
| Error err -> failwith ("Failed to encode user: " ^ Jsont.Error.to_string err)
-
-
let load_user state username =
-
let file = user_file state username in
-
try
-
let content = Eio.Path.load file in
-
user_of_string content
-
with
-
| Eio.Io (Eio.Fs.E (Not_found _), _) -> None
-
| e ->
-
Log.err (fun m -> m "Error loading user %s: %s" username (Printexc.to_string e));
-
None
-
-
let save_user state user =
-
let file = user_file state user.username in
-
let json = user_to_string user in
-
Eio.Path.save ~create:(`Or_truncate 0o644) file json
-
-
let list_users state =
-
try
-
Eio.Path.read_dir (users_dir state)
-
|> List.filter_map (fun name ->
-
if Filename.check_suffix name ".json" then
-
Some (Filename.chop_suffix name ".json")
-
else None
-
)
-
with _ -> []
-
-
let load_existing_posts state username =
-
let file = user_feed_file state username in
-
try
-
let content = Eio.Path.load file in
-
(* Parse existing Atom feed *)
-
let input = Xmlm.make_input (`String (0, content)) in
-
let feed = Syndic.Atom.parse input in
-
feed.Syndic.Atom.entries
-
with
-
| Eio.Io (Eio.Fs.E (Not_found _), _) -> []
-
| e ->
-
Log.err (fun m -> m "Error loading existing posts for %s: %s"
-
username (Printexc.to_string e));
-
[]
-
-
let save_atom_feed state username entries =
-
let file = user_feed_file state username in
-
let feed : Syndic.Atom.feed = {
-
id = Uri.of_string ("urn:river:user:" ^ username);
-
title = Syndic.Atom.Text username;
-
updated = Ptime.of_float_s (Unix.time ()) |> Option.get;
-
entries;
-
authors = [];
-
categories = [];
-
contributors = [];
-
generator = Some {
-
Syndic.Atom.version = Some "1.0";
-
uri = None;
-
content = "River Feed Aggregator";
-
};
-
icon = None;
-
links = [];
-
logo = None;
-
rights = None;
-
subtitle = None;
-
} in
-
let output = Buffer.create 1024 in
-
Syndic.Atom.output feed (`Buffer output);
-
Eio.Path.save ~create:(`Or_truncate 0o644) file (Buffer.contents output)
-
end
-
-
(* User management commands *)
-
module User = struct
-
let add state ~username ~fullname ~email =
-
match State.load_user state username with
-
| Some _ ->
-
Log.err (fun m -> m "User %s already exists" username);
-
1
-
| None ->
-
let user = { username; fullname; email; feeds = []; last_synced = None } in
-
State.save_user state user;
-
Log.info (fun m -> m "User %s created" username);
-
0
-
-
let remove state ~username =
-
match State.load_user state username with
-
| None ->
-
Log.err (fun m -> m "User %s not found" username);
-
1
-
| Some _ ->
-
(* Remove user file and feed file *)
-
let user_file = State.user_file state username in
-
let feed_file = State.user_feed_file state username in
-
(try Eio.Path.unlink user_file with _ -> ());
-
(try Eio.Path.unlink feed_file with _ -> ());
-
Log.info (fun m -> m "User %s removed" username);
-
0
-
-
let list state =
-
let users = State.list_users state in
-
if users = [] then
-
Printf.printf "No users found\n"
-
else begin
-
Printf.printf "Users:\n";
-
List.iter (fun username ->
-
match State.load_user state username with
-
| Some user ->
-
let email_str = match user.email with
-
| Some e -> " <" ^ e ^ ">"
-
| None -> ""
-
in
-
Printf.printf " %s (%s%s) - %d feeds\n"
-
username user.fullname email_str (List.length user.feeds)
-
| None -> ()
-
) users
-
end;
-
0
-
-
let add_feed state ~username ~name ~url =
-
match State.load_user state username with
-
| None ->
-
Log.err (fun m -> m "User %s not found" username);
-
1
-
| Some user ->
-
let feed = { River.name; url } in
-
if List.exists (fun f -> f.River.url = url) user.feeds then begin
-
Log.err (fun m -> m "Feed %s already exists for user %s" url username);
-
1
-
end else begin
-
let user = { user with feeds = feed :: user.feeds } in
-
State.save_user state user;
-
Log.info (fun m -> m "Feed %s added to user %s" name username);
-
0
-
end
-
-
let remove_feed state ~username ~url =
-
match State.load_user state username with
-
| None ->
-
Log.err (fun m -> m "User %s not found" username);
-
1
-
| Some user ->
-
let feeds = List.filter (fun f -> f.River.url <> url) user.feeds in
-
if List.length feeds = List.length user.feeds then begin
-
Log.err (fun m -> m "Feed %s not found for user %s" url username);
-
1
-
end else begin
-
let user = { user with feeds } in
-
State.save_user state user;
-
Log.info (fun m -> m "Feed removed from user %s" username);
-
0
-
end
-
-
let show state ~username =
-
match State.load_user state username with
-
| None ->
-
Log.err (fun m -> m "User %s not found" username);
-
1
-
| Some user ->
-
Printf.printf "Username: %s\n" user.username;
-
Printf.printf "Full name: %s\n" user.fullname;
-
Printf.printf "Email: %s\n"
-
(Option.value user.email ~default:"(none)");
-
Printf.printf "Last synced: %s\n"
-
(Option.value user.last_synced ~default:"never");
-
Printf.printf "Feeds (%d):\n" (List.length user.feeds);
-
List.iter (fun feed ->
-
Printf.printf " - %s: %s\n" feed.River.name feed.River.url
-
) user.feeds;
-
0
-
end
-
-
(* Sync command *)
-
module Sync = struct
-
let merge_entries ~existing ~new_entries =
-
(* Create a map of new entry IDs for efficient lookup and updates *)
-
let module UriMap = Map.Make(Uri) in
-
let new_entries_map =
-
List.fold_left (fun acc (entry : Syndic.Atom.entry) ->
-
UriMap.add entry.id entry acc
-
) UriMap.empty new_entries
-
in
-
-
(* Update existing entries with new ones if IDs match, otherwise keep existing *)
-
let updated_existing =
-
List.filter_map (fun (entry : Syndic.Atom.entry) ->
-
if UriMap.mem entry.id new_entries_map then
-
None (* Will be replaced by new entry *)
-
else
-
Some entry (* Keep existing entry *)
-
) existing
-
in
-
-
(* Combine new entries with non-replaced existing entries *)
-
let combined = new_entries @ updated_existing in
-
List.sort (fun (a : Syndic.Atom.entry) (b : Syndic.Atom.entry) ->
-
Ptime.compare b.updated a.updated
-
) combined
-
-
let sync_user session state ~username =
-
match State.load_user state username with
-
| None ->
-
Log.err (fun m -> m "User %s not found" username);
-
1
-
| Some user when user.feeds = [] ->
-
Log.info (fun m -> m "No feeds configured for user %s" username);
-
0
-
| Some user ->
-
Log.info (fun m -> m "Syncing feeds for user %s..." username);
-
-
(* Fetch all feeds concurrently using the session *)
-
let fetched_feeds =
-
Eio.Fiber.List.filter_map (fun source ->
-
try
-
Log.info (fun m -> m " Fetching %s (%s)..." source.River.name source.River.url);
-
Some (River.fetch session source)
-
with e ->
-
Log.err (fun m -> m " Failed to fetch %s: %s"
-
source.River.name (Printexc.to_string e));
-
None
-
) user.feeds
-
in
-
-
if fetched_feeds = [] then begin
-
Log.err (fun m -> m "No feeds successfully fetched");
-
1
-
end else begin
-
(* Get posts from fetched feeds *)
-
let posts = River.posts fetched_feeds in
-
Log.info (fun m -> m " Found %d new posts" (List.length posts));
-
-
(* Convert to Atom entries *)
-
let new_entries = River.create_atom_entries posts in
-
-
(* Load existing entries *)
-
let existing = State.load_existing_posts state username in
-
Log.info (fun m -> m " Found %d existing posts" (List.length existing));
-
-
(* Merge entries *)
-
let merged = merge_entries ~existing ~new_entries in
-
Log.info (fun m -> m " Total posts after merge: %d" (List.length merged));
-
-
(* Save updated feed *)
-
State.save_atom_feed state username merged;
-
-
(* Update last_synced timestamp *)
-
let now =
-
let open Unix in
-
let tm = gmtime (time ()) in
-
Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ"
-
(tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday
-
tm.tm_hour tm.tm_min tm.tm_sec
-
in
-
let user = { user with last_synced = Some now } in
-
State.save_user state user;
-
-
Log.info (fun m -> m "Sync completed for user %s" username);
-
0
-
end
-
-
let sync_all session state =
-
let users = State.list_users state in
-
if users = [] then begin
-
Log.info (fun m -> m "No users to sync");
-
0
-
end else begin
-
Log.info (fun m -> m "Syncing %d users concurrently..." (List.length users));
-
-
let results =
-
Eio.Fiber.List.map (fun username ->
-
let result = sync_user session state ~username in
-
Log.debug (fun m -> m "Completed sync for user");
-
result
-
) users
-
in
-
let failures = List.filter ((<>) 0) results in
-
if failures = [] then begin
-
Log.info (fun m -> m "All users synced successfully");
-
0
-
end else begin
-
Log.err (fun m -> m "Failed to sync %d users" (List.length failures));
-
1
-
end
-
end
-
end
-
-
(* Post listing commands *)
-
module Post = struct
-
let format_date ptime =
-
let open Ptime in
-
let (y, m, d), _ = to_date_time ptime in
-
Printf.sprintf "%02d/%02d/%04d" d m y
-
-
let format_text_construct : Syndic.Atom.text_construct -> string = function
-
| Syndic.Atom.Text s -> s
-
| Syndic.Atom.Html (_, s) -> s
-
| Syndic.Atom.Xhtml (_, _) -> "<xhtml content>"
-
-
let get_content_length (entry : Syndic.Atom.entry) =
-
match entry.content with
-
| Some (Syndic.Atom.Text s) -> String.length s
-
| Some (Syndic.Atom.Html (_, s)) -> String.length s
-
| Some (Syndic.Atom.Xhtml (_, _)) -> 0 (* Could calculate but complex *)
-
| Some (Syndic.Atom.Mime _) -> 0
-
| Some (Syndic.Atom.Src _) -> 0
-
| None -> (
-
match entry.summary with
-
| Some (Syndic.Atom.Text s) -> String.length s
-
| Some (Syndic.Atom.Html (_, s)) -> String.length s
-
| Some (Syndic.Atom.Xhtml (_, _)) -> 0
-
| None -> 0)
-
-
let list state ~username_opt ~limit =
-
match username_opt with
-
| Some username ->
-
(* List posts for a specific user *)
-
(match State.load_user state username with
-
| None ->
-
Log.err (fun m -> m "User %s not found" username);
-
1
-
| Some user ->
-
let entries = State.load_existing_posts state username in
-
if entries = [] then begin
-
Fmt.pr "%a@." Fmt.(styled `Yellow string)
-
("No posts found for user " ^ username);
-
Fmt.pr "%a@." Fmt.(styled `Faint string)
-
("(Run 'river-cli sync " ^ username ^ "' to fetch posts)");
-
0
-
end else begin
-
let to_show = match limit with
-
| Some n -> List.filteri (fun i _ -> i < n) entries
-
| None -> entries
-
in
-
Fmt.pr "%a@."
-
Fmt.(styled `Bold string)
-
(Printf.sprintf "Posts for %s (%d total, showing %d):"
-
user.fullname (List.length entries) (List.length to_show));
-
-
List.iteri (fun i (entry : Syndic.Atom.entry) ->
-
(* Use user's full name for all entries *)
-
let author_name = user.fullname in
-
let content_len = get_content_length entry in
-
let entry_id = Uri.to_string entry.id in
-
Fmt.pr "%a %a@."
-
Fmt.(styled `Cyan string) (Printf.sprintf "[%d]" (i + 1))
-
Fmt.(styled (`Fg `Blue) string) (format_text_construct entry.title);
-
Fmt.pr " %a %a@."
-
Fmt.(styled `Faint string) "ID:"
-
Fmt.(styled `Faint string) entry_id;
-
Fmt.pr " %a - %a - %a chars@."
-
Fmt.(styled `Green string) author_name
-
Fmt.(styled `Magenta string) (format_date entry.updated)
-
Fmt.(styled `Yellow string) (string_of_int content_len)
-
) to_show;
-
0
-
end)
-
| None ->
-
(* List posts from all users *)
-
let users = State.list_users state in
-
if users = [] then begin
-
Fmt.pr "%a@." Fmt.(styled `Yellow string)
-
"No users found";
-
Fmt.pr "%a@." Fmt.(styled `Faint string)
-
"(Run 'river-cli user add' to create a user)";
-
0
-
end else begin
-
(* Load user data to get full names *)
-
let user_map =
-
List.fold_left (fun acc username ->
-
match State.load_user state username with
-
| Some user -> (username, user) :: acc
-
| None -> acc
-
) [] users
-
in
-
-
(* Collect all entries from all users with username tag *)
-
let all_entries =
-
List.concat_map (fun username ->
-
let entries = State.load_existing_posts state username in
-
List.map (fun entry -> (username, entry)) entries
-
) users
-
in
-
-
if all_entries = [] then begin
-
Fmt.pr "%a@." Fmt.(styled `Yellow string)
-
"No posts found for any users";
-
Fmt.pr "%a@." Fmt.(styled `Faint string)
-
"(Run 'river-cli sync' to fetch posts)";
-
0
-
end else begin
-
(* Sort by date (newest first) *)
-
let sorted = List.sort (fun (_, a : string * Syndic.Atom.entry) (_, b) ->
-
Ptime.compare b.updated a.updated
-
) all_entries in
-
-
let to_show = match limit with
-
| Some n -> List.filteri (fun i _ -> i < n) sorted
-
| None -> sorted
-
in
-
-
Fmt.pr "%a@."
-
Fmt.(styled `Bold string)
-
(Printf.sprintf "Posts from all users (%d total, showing %d):"
-
(List.length all_entries) (List.length to_show));
-
-
List.iteri (fun i (username, entry : string * Syndic.Atom.entry) ->
-
(* Use user's full name instead of feed author *)
-
let author_name =
-
match List.assoc_opt username user_map with
-
| Some user -> user.fullname
-
| None ->
-
(* Fallback to entry author if user not found *)
-
let (author, _) = entry.authors in
-
String.trim author.name
-
in
-
let content_len = get_content_length entry in
-
let entry_id = Uri.to_string entry.id in
-
(* Shorten ID for display if it's too long *)
-
Fmt.pr "%a %a@."
-
Fmt.(styled `Cyan string) (Printf.sprintf "[%d]" (i + 1))
-
Fmt.(styled (`Fg `Blue) string) (format_text_construct entry.title);
-
Fmt.pr " %a %a@."
-
Fmt.(styled `Faint string) "ID:"
-
Fmt.(styled `Faint string) entry_id;
-
Fmt.pr " %a - %a - %a chars@."
-
Fmt.(styled `Green string) author_name
-
Fmt.(styled `Magenta string) (format_date entry.updated)
-
Fmt.(styled `Yellow string) (string_of_int content_len)
-
) to_show;
-
0
-
end
-
end
-
end
-
-
(* Cmdliner interface *)
-
open Cmdliner
-
-
let username_arg =
-
let doc = "Username" in
-
Arg.(required & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
-
-
let fullname_arg =
-
let doc = "Full name of the user" in
-
Arg.(required & opt (some string) None & info ["name"; "n"] ~doc)
-
-
let email_arg =
-
let doc = "Email address of the user (optional)" in
-
Arg.(value & opt (some string) None & info ["email"; "e"] ~doc)
-
-
let feed_name_arg =
-
let doc = "Feed name/label" in
-
Arg.(required & opt (some string) None & info ["name"; "n"] ~doc)
-
-
let feed_url_arg =
-
let doc = "Feed URL" in
-
Arg.(required & opt (some string) None & info ["url"; "u"] ~doc)
-
-
(* Note: eiocmd handles all logging setup automatically via Logs_cli *)
-
-
(* User commands - these don't need network, just filesystem access via Xdge *)
-
let user_add_cmd =
-
let doc = "Add a new user" in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "add" ~doc)
-
~app_name:"river"
-
~service:"river"
-
Term.(const (fun username fullname email _env xdg _profile ->
-
let state = { xdg } in
-
State.ensure_directories state;
-
User.add state ~username ~fullname ~email
-
) $ username_arg $ fullname_arg $ email_arg)
-
-
let user_remove_cmd =
-
let doc = "Remove a user" in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "remove" ~doc)
-
~app_name:"river"
-
~service:"river"
-
Term.(const (fun username _env xdg _profile ->
-
let state = { xdg } in
-
User.remove state ~username
-
) $ username_arg)
-
-
let user_list_cmd =
-
let doc = "List all users" in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "list" ~doc)
-
~app_name:"river"
-
~service:"river"
-
Term.(const (fun _env xdg _profile ->
-
let state = { xdg } in
-
User.list state
-
))
-
-
let user_show_cmd =
-
let doc = "Show user details" in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "show" ~doc)
-
~app_name:"river"
-
~service:"river"
-
Term.(const (fun username _env xdg _profile ->
-
let state = { xdg } in
-
User.show state ~username
-
) $ username_arg)
-
-
let user_add_feed_cmd =
-
let doc = "Add a feed to a user" in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "add-feed" ~doc)
-
~app_name:"river"
-
~service:"river"
-
Term.(const (fun username name url _env xdg _profile ->
-
let state = { xdg } in
-
User.add_feed state ~username ~name ~url
-
) $ username_arg $ feed_name_arg $ feed_url_arg)
-
-
let user_remove_feed_cmd =
-
let doc = "Remove a feed from a user" in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "remove-feed" ~doc)
-
~app_name:"river"
-
~service:"river"
-
Term.(const (fun username url _env xdg _profile ->
-
let state = { xdg } in
-
User.remove_feed state ~username ~url
-
) $ username_arg $ feed_url_arg)
-
-
let user_cmd =
-
let doc = "Manage users" in
-
let info = Cmd.info "user" ~doc in
-
Cmd.group info [
-
user_add_cmd;
-
user_remove_cmd;
-
user_list_cmd;
-
user_show_cmd;
-
user_add_feed_cmd;
-
user_remove_feed_cmd;
-
]
-
-
(* Sync command - needs Eio environment for HTTP requests *)
-
let sync_cmd =
-
let doc = "Sync feeds for users" in
-
let username_opt =
-
let doc = "Sync specific user (omit to sync all)" in
-
Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
-
in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "sync" ~doc)
-
~app_name:"river"
-
~service:"river"
-
Term.(const (fun username_opt env xdg _profile ->
-
let state = { xdg } in
-
State.ensure_directories state;
-
-
(* Use River.with_session for resource management *)
-
River.with_session env @@ fun session ->
-
match username_opt with
-
| Some username -> Sync.sync_user session state ~username
-
| None -> Sync.sync_all session state
-
) $ username_opt)
-
-
(* List command - doesn't need network, just reads local files *)
-
let list_cmd =
-
let doc = "List recent posts (from all users by default, or specify a user)" in
-
let username_opt_arg =
-
let doc = "Username (optional - defaults to all users)" in
-
Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
-
in
-
let limit_arg =
-
let doc = "Limit number of posts to display (default: all)" in
-
Arg.(value & opt (some int) None & info ["limit"; "n"] ~doc)
-
in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "list" ~doc)
-
~app_name:"river"
-
~service:"river"
-
Term.(const (fun username_opt limit _env xdg _profile ->
-
let state = { xdg } in
-
Post.list state ~username_opt ~limit
-
) $ username_opt_arg $ limit_arg)
-
-
(* Info command - show detailed post information *)
-
let info_cmd =
-
let doc = "Display detailed information about a post by ID" in
-
let id_arg =
-
let doc = "Exact post ID to display" in
-
Arg.(required & pos 0 (some string) None & info [] ~docv:"ID" ~doc)
-
in
-
let verbose_flag =
-
let doc = "Show full content and all metadata" in
-
Arg.(value & flag & info ["full"; "f"] ~doc)
-
in
-
Eiocmd.run
-
~use_keyeio:false
-
~info:(Cmd.info "info" ~doc)
-
~app_name:"river"
-
~service:"river"
-
Term.(const (fun id verbose _env xdg _profile ->
-
let state = { xdg } in
-
let users = State.list_users state in
-
-
(* Load all entries from all users *)
-
let all_entries =
-
List.concat_map (fun username ->
-
let entries = State.load_existing_posts state username in
-
List.map (fun entry -> (username, entry)) entries
-
) users
-
in
-
-
(* Find entry with matching ID *)
-
let entry_opt = List.find_opt (fun (_username, entry : string * Syndic.Atom.entry) ->
-
Uri.to_string entry.id = id
-
) all_entries in
-
-
match entry_opt with
-
| None ->
-
Fmt.pr "%a@." Fmt.(styled `Red string) (Printf.sprintf "No post found with ID: %s" id);
-
Fmt.pr "%a@." Fmt.(styled `Faint string) "Hint: Use 'river-cli list' to see available posts and their IDs";
-
1
-
| Some (username, entry) ->
-
(* Get user info for author name *)
-
let user_opt = State.load_user state username in
-
let author_name = match user_opt with
-
| Some user -> user.fullname
-
| None ->
-
let (author, _) = entry.authors in
-
String.trim author.name
-
in
-
-
(* Print header *)
-
Fmt.pr "@.";
-
Fmt.pr "%a@." Fmt.(styled `Bold string)
-
(String.make 70 '=');
-
Fmt.pr " %a@." Fmt.(styled `Bold (styled (`Fg `Blue) string))
-
(Post.format_text_construct entry.title);
-
Fmt.pr "%a@.@." Fmt.(styled `Bold string)
-
(String.make 70 '=');
-
-
(* Basic metadata *)
-
Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "ID: " (Uri.to_string entry.id);
-
-
(* Links *)
-
let links = entry.links in
-
(match links with
-
| [] -> ()
-
| link :: _ ->
-
Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "URL: " (Uri.to_string link.href));
-
-
Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "Author: " author_name;
-
-
Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "Updated: " (Ptime.to_rfc3339 entry.updated);
-
-
(* Summary *)
-
(match entry.summary with
-
| Some summary ->
-
Fmt.pr "@.%a@." Fmt.(styled (`Fg `Yellow) string) "Summary:";
-
let summary_text = Post.format_text_construct summary in
-
Fmt.pr " %s@." summary_text
-
| None -> ());
-
-
(* Content *)
-
(match entry.content with
-
| Some content ->
-
let content_html = match content with
-
| Syndic.Atom.Text s -> s
-
| Syndic.Atom.Html (_, s) -> s
-
| Syndic.Atom.Xhtml (_, nodes) ->
-
String.concat "" (List.map Syndic.XML.to_string nodes)
-
| Syndic.Atom.Mime _ -> "(MIME content)"
-
| Syndic.Atom.Src _ -> "(External content)"
-
in
-
-
(* Extract outgoing links *)
-
let links = Markdown_converter.extract_links content_html in
-
-
(* Convert to markdown *)
-
let content_markdown = Markdown_converter.to_markdown content_html in
-
-
Fmt.pr "@.%a@." Fmt.(styled (`Fg `Green) string) "Content:";
-
if verbose then
-
Fmt.pr "%s@." content_markdown
-
else begin
-
let preview =
-
if String.length content_markdown > 500 then
-
String.sub content_markdown 0 500 ^ "..."
-
else
-
content_markdown
-
in
-
Fmt.pr "%s@." preview;
-
if String.length content_markdown > 500 then
-
Fmt.pr "@.%a@." Fmt.(styled `Faint string) "(Use --full to see full content)"
-
end;
-
-
(* Display outgoing links *)
-
if links <> [] then begin
-
Fmt.pr "@.%a (%d)@." Fmt.(styled (`Fg `Cyan) string) "Outgoing Links:" (List.length links);
-
List.iteri (fun i (href, text) ->
-
let link_text = if text = "" then "(no text)" else text in
-
Fmt.pr " %a %s@."
-
Fmt.(styled `Faint string) (Printf.sprintf "[%d]" (i + 1))
-
(Uri.to_string (Uri.of_string href));
-
if text <> "" && String.length text < 80 then
-
Fmt.pr " %a %s@." Fmt.(styled `Faint string) "→" link_text
-
) links
-
end
-
| None -> ());
-
-
Fmt.pr "@.";
-
0
-
) $ id_arg $ verbose_flag)
-
-
let main_cmd =
-
let doc = "River feed management CLI" in
-
let info = Cmd.info "river-cli" ~version:"1.0" ~doc in
-
Cmd.group info [user_cmd; sync_cmd; list_cmd; info_cmd]
-
-
let () = exit (Cmd.eval' main_cmd)
···
+
(* Ultra-thin binary that delegates all command handling to River_cmd *)
+
let () = exit (Cmdliner.Cmd.eval' River_cmd.main_cmd)
+4
stack/river/cmd/dune
···
···
+
(library
+
(name river_cmd)
+
(public_name river.cmd)
+
(libraries river cmdliner eiocmd fmt logs))
+761
stack/river/cmd/river_cmd.ml
···
···
+
(** River.Cmd - Cmdliner terms for River CLI
+
+
This module provides cmdliner terms that are thin wrappers around
+
the River library functions. All business logic resides in the
+
main River module. *)
+
+
open Cmdliner
+
+
(* Logging setup *)
+
let src = Logs.Src.create "river-cli" ~doc:"River CLI application"
+
module Log = (val Logs.src_log src : Logs.LOG)
+
+
(* User management commands *)
+
module User = struct
+
let add state ~username ~fullname ~email =
+
let user = River.User.make ~username ~fullname ?email () in
+
match River.State.create_user state user with
+
| Ok () ->
+
Log.info (fun m -> m "User %s created" username);
+
0
+
| Error err ->
+
Log.err (fun m -> m "%s" err);
+
1
+
+
let remove state ~username =
+
match River.State.delete_user state ~username with
+
| Ok () ->
+
Log.info (fun m -> m "User %s removed" username);
+
0
+
| Error err ->
+
Log.err (fun m -> m "%s" err);
+
1
+
+
let list state =
+
let users = River.State.list_users state in
+
if users = [] then
+
Printf.printf "No users found\n"
+
else begin
+
Printf.printf "Users:\n";
+
List.iter (fun username ->
+
match River.State.get_user state ~username with
+
| Some user ->
+
let email_str = match River.User.email user with
+
| Some e -> " <" ^ e ^ ">"
+
| None -> ""
+
in
+
Printf.printf " %s (%s%s) - %d feeds\n"
+
username (River.User.fullname user) email_str
+
(List.length (River.User.feeds user))
+
| None -> ()
+
) users
+
end;
+
0
+
+
let add_feed state ~username ~name ~url =
+
match River.State.get_user state ~username with
+
| None ->
+
Log.err (fun m -> m "User %s not found" username);
+
1
+
| Some user ->
+
let source = River.Source.make ~name ~url in
+
let user = River.User.add_feed user source in
+
(match River.State.update_user state user with
+
| Ok () ->
+
Log.info (fun m -> m "Feed %s added to user %s" name username);
+
0
+
| Error err ->
+
Log.err (fun m -> m "%s" err);
+
1)
+
+
let remove_feed state ~username ~url =
+
match River.State.get_user state ~username with
+
| None ->
+
Log.err (fun m -> m "User %s not found" username);
+
1
+
| Some user ->
+
let user = River.User.remove_feed user ~url in
+
(match River.State.update_user state user with
+
| Ok () ->
+
Log.info (fun m -> m "Feed removed from user %s" username);
+
0
+
| Error err ->
+
Log.err (fun m -> m "%s" err);
+
1)
+
+
let show state ~username =
+
match River.State.get_user state ~username with
+
| None ->
+
Log.err (fun m -> m "User %s not found" username);
+
1
+
| Some user ->
+
Printf.printf "Username: %s\n" (River.User.username user);
+
Printf.printf "Full name: %s\n" (River.User.fullname user);
+
Printf.printf "Email: %s\n"
+
(Option.value (River.User.email user) ~default:"(none)");
+
Printf.printf "Last synced: %s\n"
+
(Option.value (River.User.last_synced user) ~default:"never");
+
let feeds = River.User.feeds user in
+
Printf.printf "Feeds (%d):\n" (List.length feeds);
+
List.iter (fun feed ->
+
Printf.printf " - %s: %s\n"
+
(River.Source.name feed) (River.Source.url feed)
+
) feeds;
+
0
+
end
+
+
(* Sync command *)
+
module Sync = struct
+
let sync_user env state ~username =
+
match River.State.sync_user env state ~username with
+
| Ok () ->
+
Log.info (fun m -> m "Sync completed for user %s" username);
+
0
+
| Error err ->
+
Log.err (fun m -> m "Sync failed: %s" err);
+
1
+
+
let sync_all env state =
+
match River.State.sync_all env state with
+
| Ok (success, fail) ->
+
Log.info (fun m -> m "Synced %d users (%d failed)" success fail);
+
if fail = 0 then 0 else 1
+
| Error err ->
+
Log.err (fun m -> m "Sync failed: %s" err);
+
1
+
end
+
+
(* Post listing commands *)
+
module Post = struct
+
let format_date ptime =
+
let open Ptime in
+
let (y, m, d), _ = to_date_time ptime in
+
Printf.sprintf "%02d/%02d/%04d" d m y
+
+
let format_text_construct : Syndic.Atom.text_construct -> string = function
+
| Syndic.Atom.Text s -> s
+
| Syndic.Atom.Html (_, s) -> s
+
| Syndic.Atom.Xhtml (_, _) -> "<xhtml content>"
+
+
let get_content_text (entry : Syndic.Atom.entry) =
+
match entry.content with
+
| Some (Syndic.Atom.Text s) -> Some s
+
| Some (Syndic.Atom.Html (_, s)) -> Some s
+
| Some (Syndic.Atom.Xhtml (_, _)) -> Some "<xhtml content>"
+
| Some (Syndic.Atom.Mime _) -> Some "<mime content>"
+
| Some (Syndic.Atom.Src _) -> Some "<external content>"
+
| None -> None
+
+
let truncate_string s max_len =
+
if String.length s <= max_len then s
+
else String.sub s 0 max_len ^ "..."
+
+
let list state ~username_opt ~limit ~metadata =
+
match username_opt with
+
| Some username ->
+
(* List posts for a specific user *)
+
(match River.State.get_user state ~username with
+
| None ->
+
Log.err (fun m -> m "User %s not found" username);
+
1
+
| Some user ->
+
let entries = River.State.get_user_posts state ~username ?limit () in
+
if entries = [] then begin
+
Fmt.pr "%a@." Fmt.(styled `Yellow string)
+
("No posts found for user " ^ username);
+
Fmt.pr "%a@." Fmt.(styled `Faint string)
+
("(Run 'river-cli sync " ^ username ^ "' to fetch posts)");
+
0
+
end else begin
+
Fmt.pr "@.%a@.@."
+
Fmt.(styled `Bold (styled (`Fg `Cyan) string))
+
(Printf.sprintf "Posts for %s (%d total)"
+
(River.User.fullname user) (List.length entries));
+
+
List.iter (fun (entry : Syndic.Atom.entry) ->
+
let entry_id = Uri.to_string entry.id in
+
+
(* Title and ID on separate lines for clarity *)
+
Fmt.pr "%a@."
+
Fmt.(styled `Bold (styled (`Fg `Blue) string))
+
(format_text_construct entry.title);
+
Fmt.pr " %a %a@."
+
Fmt.(styled `Faint string) "ID:"
+
Fmt.(styled (`Fg `Magenta) string) entry_id;
+
+
if metadata then begin
+
(* Show all metadata *)
+
Fmt.pr " %a %a@."
+
Fmt.(styled `Faint string) "Author:"
+
Fmt.(styled `Green string) (River.User.fullname user);
+
Fmt.pr " %a %a@."
+
Fmt.(styled `Faint string) "Updated:"
+
Fmt.(styled `Yellow string) (format_date entry.updated);
+
+
(* Summary if present *)
+
(match entry.summary with
+
| Some summary ->
+
let summary_text = format_text_construct summary in
+
Fmt.pr " %a %a@."
+
Fmt.(styled `Faint string) "Summary:"
+
Fmt.string (truncate_string summary_text 150)
+
| None -> ());
+
+
(* Content (truncated) *)
+
(match get_content_text entry with
+
| Some content ->
+
let clean = Str.global_replace (Str.regexp "<[^>]*>") "" content in
+
let clean = Str.global_replace (Str.regexp "[ \t\n\r]+") " " clean in
+
Fmt.pr " %a %a@."
+
Fmt.(styled `Faint string) "Content:"
+
Fmt.string (truncate_string (String.trim clean) 200)
+
| None -> ());
+
+
(* Links *)
+
(match entry.links with
+
| [] -> ()
+
| links ->
+
Fmt.pr " %a@." Fmt.(styled `Faint string) "Links:";
+
List.iter (fun link ->
+
Fmt.pr " %a@." Fmt.(styled (`Fg `Cyan) string)
+
(Uri.to_string link.Syndic.Atom.href)
+
) links);
+
+
(* Tags/Categories *)
+
(match entry.categories with
+
| [] -> ()
+
| categories ->
+
Fmt.pr " %a %a@." Fmt.(styled `Faint string) "Tags:"
+
Fmt.(list ~sep:comma (styled (`Fg `Yellow) string))
+
(List.map (fun c -> c.Syndic.Atom.term) categories));
+
end else begin
+
(* Compact view: just author and date *)
+
Fmt.pr " %a %a %a %a@."
+
Fmt.(styled `Faint string) "By"
+
Fmt.(styled `Green string) (River.User.fullname user)
+
Fmt.(styled `Faint string) "on"
+
Fmt.(styled `Yellow string) (format_date entry.updated);
+
end;
+
Fmt.pr "@."
+
) entries;
+
0
+
end)
+
| None ->
+
(* List posts from all users *)
+
let all_posts = River.State.get_all_posts state ?limit () in
+
if all_posts = [] then begin
+
Fmt.pr "%a@." Fmt.(styled `Yellow string)
+
"No posts found for any users";
+
Fmt.pr "%a@." Fmt.(styled `Faint string)
+
"(Run 'river-cli sync' to fetch posts)";
+
0
+
end else begin
+
Fmt.pr "@.%a@.@."
+
Fmt.(styled `Bold (styled (`Fg `Cyan) string))
+
(Printf.sprintf "Posts from all users (%d total)"
+
(List.length all_posts));
+
+
List.iter (fun (username, entry : string * Syndic.Atom.entry) ->
+
let author_name =
+
match River.State.get_user state ~username with
+
| Some user -> River.User.fullname user
+
| None ->
+
let (author, _) = entry.authors in
+
String.trim author.name
+
in
+
let entry_id = Uri.to_string entry.id in
+
+
(* Title and ID on separate lines for clarity *)
+
Fmt.pr "%a@."
+
Fmt.(styled `Bold (styled (`Fg `Blue) string))
+
(format_text_construct entry.title);
+
Fmt.pr " %a %a@."
+
Fmt.(styled `Faint string) "ID:"
+
Fmt.(styled (`Fg `Magenta) string) entry_id;
+
+
if metadata then begin
+
(* Show all metadata *)
+
Fmt.pr " %a %a@."
+
Fmt.(styled `Faint string) "Author:"
+
Fmt.(styled `Green string) author_name;
+
Fmt.pr " %a %a@."
+
Fmt.(styled `Faint string) "Updated:"
+
Fmt.(styled `Yellow string) (format_date entry.updated);
+
+
(* Summary if present *)
+
(match entry.summary with
+
| Some summary ->
+
let summary_text = format_text_construct summary in
+
Fmt.pr " %a %a@."
+
Fmt.(styled `Faint string) "Summary:"
+
Fmt.string (truncate_string summary_text 150)
+
| None -> ());
+
+
(* Content (truncated) *)
+
(match get_content_text entry with
+
| Some content ->
+
let clean = Str.global_replace (Str.regexp "<[^>]*>") "" content in
+
let clean = Str.global_replace (Str.regexp "[ \t\n\r]+") " " clean in
+
Fmt.pr " %a %a@."
+
Fmt.(styled `Faint string) "Content:"
+
Fmt.string (truncate_string (String.trim clean) 200)
+
| None -> ());
+
+
(* Links *)
+
(match entry.links with
+
| [] -> ()
+
| links ->
+
Fmt.pr " %a@." Fmt.(styled `Faint string) "Links:";
+
List.iter (fun link ->
+
Fmt.pr " %a@." Fmt.(styled (`Fg `Cyan) string)
+
(Uri.to_string link.Syndic.Atom.href)
+
) links);
+
+
(* Tags/Categories *)
+
(match entry.categories with
+
| [] -> ()
+
| categories ->
+
Fmt.pr " %a %a@." Fmt.(styled `Faint string) "Tags:"
+
Fmt.(list ~sep:comma (styled (`Fg `Yellow) string))
+
(List.map (fun c -> c.Syndic.Atom.term) categories));
+
end else begin
+
(* Compact view: just author and date *)
+
Fmt.pr " %a %a %a %a@."
+
Fmt.(styled `Faint string) "By"
+
Fmt.(styled `Green string) author_name
+
Fmt.(styled `Faint string) "on"
+
Fmt.(styled `Yellow string) (format_date entry.updated);
+
end;
+
Fmt.pr "@."
+
) all_posts;
+
0
+
end
+
+
let info state ~post_id ~verbose =
+
(* Find the post by ID across all users *)
+
let all_posts = River.State.get_all_posts state () in
+
match List.find_opt (fun (_, entry : string * Syndic.Atom.entry) ->
+
Uri.to_string entry.id = post_id
+
) all_posts with
+
| None ->
+
Log.err (fun m -> m "Post with ID %s not found" post_id);
+
1
+
| Some (username, entry) ->
+
(* Display post information *)
+
Fmt.pr "@.";
+
Fmt.pr "%a@." Fmt.(styled `Bold string) (String.make 70 '=');
+
Fmt.pr " %a@." Fmt.(styled `Bold (styled (`Fg `Blue) string))
+
(format_text_construct entry.title);
+
Fmt.pr "%a@.@." Fmt.(styled `Bold string) (String.make 70 '=');
+
+
(* Author and date *)
+
let author_name =
+
match River.State.get_user state ~username with
+
| Some user -> River.User.fullname user
+
| None ->
+
let (author, _) = entry.authors in
+
String.trim author.name
+
in
+
Fmt.pr "%a %a@." Fmt.(styled `Cyan string) "Author:"
+
Fmt.(styled `Green string) author_name;
+
Fmt.pr "%a %a@." Fmt.(styled `Cyan string) "Published:"
+
Fmt.(styled `Magenta string) (format_date entry.updated);
+
Fmt.pr "%a %a@.@." Fmt.(styled `Cyan string) "ID:"
+
Fmt.(styled `Faint string) post_id;
+
+
(* Summary if present *)
+
(match entry.summary with
+
| Some summary ->
+
Fmt.pr "%a@." Fmt.(styled (`Fg `Yellow) string) "Summary:";
+
Fmt.pr "%s@.@." (format_text_construct summary)
+
| None -> ());
+
+
(* Content *)
+
(match entry.content with
+
| Some content ->
+
let content_str = match content with
+
| Syndic.Atom.Text s -> s
+
| Syndic.Atom.Html (_, s) -> s
+
| Syndic.Atom.Xhtml (_, _) -> "<xhtml content>"
+
| Syndic.Atom.Mime _ -> "<mime content>"
+
| Syndic.Atom.Src _ -> "<external content>"
+
in
+
Fmt.pr "%a@." Fmt.(styled (`Fg `Yellow) string) "Content:";
+
if verbose then begin
+
(* In verbose mode, attempt to convert HTML to markdown *)
+
let markdown = try
+
(* Simple HTML to markdown conversion - just strip tags for now *)
+
let re = Str.regexp "<[^>]*>" in
+
Str.global_replace re "" content_str
+
with _ -> content_str
+
in
+
Fmt.pr "%s@.@." markdown
+
end else begin
+
(* Non-verbose mode: show truncated content *)
+
let max_len = 500 in
+
if String.length content_str > max_len then
+
Fmt.pr "%s...@.@." (String.sub content_str 0 max_len)
+
else
+
Fmt.pr "%s@.@." content_str
+
end
+
| None -> ());
+
+
(* Links *)
+
(match entry.links with
+
| [] -> ()
+
| links ->
+
Fmt.pr "%a@." Fmt.(styled `Cyan string) "Links:";
+
List.iter (fun link ->
+
Fmt.pr " - %s@." (Uri.to_string link.Syndic.Atom.href)
+
) links;
+
Fmt.pr "@.");
+
+
(* Categories/Tags if verbose *)
+
if verbose then begin
+
match entry.categories with
+
| [] -> ()
+
| categories ->
+
Fmt.pr "%a@." Fmt.(styled `Cyan string) "Tags:";
+
List.iter (fun cat ->
+
Fmt.pr " - %s@." cat.Syndic.Atom.term
+
) categories;
+
Fmt.pr "@."
+
end;
+
+
0
+
end
+
+
(* Cmdliner argument definitions *)
+
let username_arg =
+
let doc = "Username" in
+
Arg.(required & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
+
+
let fullname_arg =
+
let doc = "Full name of the user" in
+
Arg.(required & opt (some string) None & info ["name"; "n"] ~doc)
+
+
let email_arg =
+
let doc = "Email address of the user (optional)" in
+
Arg.(value & opt (some string) None & info ["email"; "e"] ~doc)
+
+
let feed_name_arg =
+
let doc = "Feed name/label" in
+
Arg.(required & opt (some string) None & info ["name"; "n"] ~doc)
+
+
let feed_url_arg =
+
let doc = "Feed URL" in
+
Arg.(required & opt (some string) None & info ["url"; "u"] ~doc)
+
+
(* User commands - these don't need network, just filesystem access *)
+
let user_add =
+
Term.(const (fun username fullname email env _xdg _profile ->
+
let state = River.State.create env ~app_name:"river" in
+
User.add state ~username ~fullname ~email
+
) $ username_arg $ fullname_arg $ email_arg)
+
+
let user_remove =
+
Term.(const (fun username env _xdg _profile ->
+
let state = River.State.create env ~app_name:"river" in
+
User.remove state ~username
+
) $ username_arg)
+
+
let user_list =
+
Term.(const (fun env _xdg _profile ->
+
let state = River.State.create env ~app_name:"river" in
+
User.list state
+
))
+
+
let user_show =
+
Term.(const (fun username env _xdg _profile ->
+
let state = River.State.create env ~app_name:"river" in
+
User.show state ~username
+
) $ username_arg)
+
+
let user_add_feed =
+
Term.(const (fun username name url env _xdg _profile ->
+
let state = River.State.create env ~app_name:"river" in
+
User.add_feed state ~username ~name ~url
+
) $ username_arg $ feed_name_arg $ feed_url_arg)
+
+
let user_remove_feed =
+
Term.(const (fun username url env _xdg _profile ->
+
let state = River.State.create env ~app_name:"river" in
+
User.remove_feed state ~username ~url
+
) $ username_arg $ feed_url_arg)
+
+
let user_cmd =
+
let doc = "Manage users" in
+
let info = Cmd.info "user" ~doc in
+
let user_add_cmd =
+
Eiocmd.run
+
~use_keyeio:false
+
~info:(Cmd.info "add" ~doc:"Add a new user")
+
~app_name:"river"
+
~service:"river"
+
user_add
+
in
+
let user_remove_cmd =
+
Eiocmd.run
+
~use_keyeio:false
+
~info:(Cmd.info "remove" ~doc:"Remove a user")
+
~app_name:"river"
+
~service:"river"
+
user_remove
+
in
+
let user_list_cmd =
+
Eiocmd.run
+
~use_keyeio:false
+
~info:(Cmd.info "list" ~doc:"List all users")
+
~app_name:"river"
+
~service:"river"
+
user_list
+
in
+
let user_show_cmd =
+
Eiocmd.run
+
~use_keyeio:false
+
~info:(Cmd.info "show" ~doc:"Show user details")
+
~app_name:"river"
+
~service:"river"
+
user_show
+
in
+
let user_add_feed_cmd =
+
Eiocmd.run
+
~use_keyeio:false
+
~info:(Cmd.info "add-feed" ~doc:"Add a feed to a user")
+
~app_name:"river"
+
~service:"river"
+
user_add_feed
+
in
+
let user_remove_feed_cmd =
+
Eiocmd.run
+
~use_keyeio:false
+
~info:(Cmd.info "remove-feed" ~doc:"Remove a feed from a user")
+
~app_name:"river"
+
~service:"river"
+
user_remove_feed
+
in
+
Cmd.group info [
+
user_add_cmd;
+
user_remove_cmd;
+
user_list_cmd;
+
user_show_cmd;
+
user_add_feed_cmd;
+
user_remove_feed_cmd;
+
]
+
+
(* Sync command - needs Eio environment for HTTP requests *)
+
let sync =
+
let username_opt =
+
let doc = "Sync specific user (omit to sync all)" in
+
Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
+
in
+
Term.(const (fun username_opt env _xdg _profile ->
+
let state = River.State.create env ~app_name:"river" in
+
match username_opt with
+
| Some username -> Sync.sync_user env state ~username
+
| None -> Sync.sync_all env state
+
) $ username_opt)
+
+
(* List command - doesn't need network, just reads local files *)
+
let list =
+
let username_opt_arg =
+
let doc = "Username (optional - defaults to all users)" in
+
Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
+
in
+
let limit_arg =
+
let doc = "Limit number of posts to display (default: all)" in
+
Arg.(value & opt (some int) None & info ["limit"; "n"] ~doc)
+
in
+
let metadata_arg =
+
let doc = "Show all metadata (author, date, summary, content preview, links, tags)" in
+
Arg.(value & flag & info ["metadata"; "m"] ~doc)
+
in
+
Term.(const (fun username_opt limit metadata env _xdg _profile ->
+
let state = River.State.create env ~app_name:"river" in
+
Post.list state ~username_opt ~limit ~metadata
+
) $ username_opt_arg $ limit_arg $ metadata_arg)
+
+
(* Info command - show detailed post information *)
+
let info =
+
let post_id_arg =
+
let doc = "Post ID (URI)" in
+
Arg.(required & pos 0 (some string) None & info [] ~docv:"POST_ID" ~doc)
+
in
+
let full_arg =
+
let doc = "Show full content without truncation" in
+
Arg.(value & flag & info ["full"] ~doc)
+
in
+
Term.(const (fun post_id full env _xdg _profile ->
+
let state = River.State.create env ~app_name:"river" in
+
Post.info state ~post_id ~verbose:full
+
) $ post_id_arg $ full_arg)
+
+
(* Merge command - export merged feed *)
+
let merge =
+
let format_arg =
+
let doc = "Output format: atom or jsonfeed" in
+
Arg.(value & opt string "atom" & info ["format"; "f"] ~doc)
+
in
+
let title_arg =
+
let doc = "Feed title" in
+
Arg.(value & opt string "River Merged Feed" & info ["title"; "t"] ~doc)
+
in
+
let limit_arg =
+
let doc = "Maximum number of entries to include (default: all)" in
+
Arg.(value & opt (some int) None & info ["limit"; "n"] ~doc)
+
in
+
Term.(const (fun format title limit env _xdg _profile ->
+
let state = River.State.create env ~app_name:"river" in
+
let format_type = match String.lowercase_ascii format with
+
| "jsonfeed" | "json" -> `Jsonfeed
+
| _ -> `Atom
+
in
+
match River.State.export_merged_feed state ~title ~format:format_type ?limit () with
+
| Ok output ->
+
print_endline output;
+
0
+
| Error err ->
+
Log.err (fun m -> m "Failed to export merged feed: %s" err);
+
1
+
) $ format_arg $ title_arg $ limit_arg)
+
+
(* Quality command - analyze feed quality *)
+
let quality =
+
let username_arg =
+
let doc = "Username to analyze" in
+
Arg.(required & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)
+
in
+
Term.(const (fun username env _xdg _profile ->
+
let state = River.State.create env ~app_name:"river" in
+
match River.State.analyze_user_quality state ~username with
+
| Error err ->
+
Log.err (fun m -> m "%s" err);
+
1
+
| Ok metrics ->
+
(* Display quality metrics *)
+
Fmt.pr "@.";
+
Fmt.pr "%a@." Fmt.(styled `Bold string)
+
(String.make 70 '=');
+
Fmt.pr " %a %s@." Fmt.(styled `Bold (styled (`Fg `Blue) string))
+
"User Quality Analysis:" username;
+
Fmt.pr "%a@.@." Fmt.(styled `Bold string)
+
(String.make 70 '=');
+
+
(* Overall quality score *)
+
let score = River.Quality.quality_score metrics in
+
let score_color = match score with
+
| s when s >= 80.0 -> `Green
+
| s when s >= 60.0 -> `Yellow
+
| s when s >= 40.0 -> `Magenta
+
| _ -> `Red
+
in
+
Fmt.pr "%a %.1f/100.0@.@."
+
Fmt.(styled (`Fg score_color) (styled `Bold string))
+
"Overall Quality Score:"
+
score;
+
+
(* Entry statistics *)
+
Fmt.pr "%a@." Fmt.(styled `Cyan string) "Entry Statistics:";
+
Fmt.pr " Total entries: %d@." (River.Quality.total_entries metrics);
+
Fmt.pr "@.";
+
+
(* Completeness metrics *)
+
Fmt.pr "%a@." Fmt.(styled `Cyan string) "Completeness:";
+
let total = River.Quality.total_entries metrics in
+
let pct entries =
+
float_of_int entries /. float_of_int total *. 100.0
+
in
+
Fmt.pr " Entries with content: %3d/%d (%5.1f%%)@."
+
(River.Quality.entries_with_content metrics)
+
total
+
(pct (River.Quality.entries_with_content metrics));
+
Fmt.pr " Entries with dates: %3d/%d (%5.1f%%)@."
+
(River.Quality.entries_with_date metrics)
+
total
+
(pct (River.Quality.entries_with_date metrics));
+
Fmt.pr " Entries with authors: %3d/%d (%5.1f%%)@."
+
(River.Quality.entries_with_author metrics)
+
total
+
(pct (River.Quality.entries_with_author metrics));
+
Fmt.pr " Entries with summaries:%3d/%d (%5.1f%%)@."
+
(River.Quality.entries_with_summary metrics)
+
total
+
(pct (River.Quality.entries_with_summary metrics));
+
Fmt.pr " Entries with tags: %3d/%d (%5.1f%%)@."
+
(River.Quality.entries_with_tags metrics)
+
total
+
(pct (River.Quality.entries_with_tags metrics));
+
Fmt.pr "@.";
+
+
(* Content statistics *)
+
if River.Quality.entries_with_content metrics > 0 then begin
+
Fmt.pr "%a@." Fmt.(styled `Cyan string) "Content Statistics:";
+
Fmt.pr " Average length: %.0f characters@."
+
(River.Quality.avg_content_length metrics);
+
Fmt.pr " Min length: %d characters@."
+
(River.Quality.min_content_length metrics);
+
Fmt.pr " Max length: %d characters@."
+
(River.Quality.max_content_length metrics);
+
Fmt.pr "@."
+
end;
+
+
(* Posting frequency *)
+
(match River.Quality.posting_frequency_days metrics with
+
| Some freq ->
+
Fmt.pr "%a@." Fmt.(styled `Cyan string) "Posting Frequency:";
+
Fmt.pr " Average: %.1f days between posts@." freq;
+
let posts_per_week = 7.0 /. freq in
+
Fmt.pr " (~%.1f posts per week)@." posts_per_week;
+
Fmt.pr "@."
+
| None ->
+
Fmt.pr "%a@.@." Fmt.(styled `Faint string)
+
"Not enough data to calculate posting frequency");
+
+
Fmt.pr "@.";
+
0
+
) $ username_arg)
+
+
let main_cmd =
+
let doc = "River feed management CLI" in
+
let main_info = Cmd.info "river-cli" ~version:"1.0" ~doc in
+
let sync_cmd =
+
Eiocmd.run
+
~use_keyeio:false
+
~info:(Cmd.info "sync" ~doc:"Sync feeds for users")
+
~app_name:"river"
+
~service:"river"
+
sync
+
in
+
let list_cmd =
+
Eiocmd.run
+
~use_keyeio:false
+
~info:(Cmd.info "list" ~doc:"List recent posts (from all users by default, or specify a user)")
+
~app_name:"river"
+
~service:"river"
+
list
+
in
+
let info_cmd =
+
Eiocmd.run
+
~use_keyeio:false
+
~info:(Cmd.info "info" ~doc:"Show detailed post information")
+
~app_name:"river"
+
~service:"river"
+
info
+
in
+
let merge_cmd =
+
Eiocmd.run
+
~use_keyeio:false
+
~info:(Cmd.info "merge" ~doc:"Export a merged feed combining all users' feeds")
+
~app_name:"river"
+
~service:"river"
+
merge
+
in
+
let quality_cmd =
+
Eiocmd.run
+
~use_keyeio:false
+
~info:(Cmd.info "quality" ~doc:"Analyze feed quality metrics for a user")
+
~app_name:"river"
+
~service:"river"
+
quality
+
in
+
Cmd.group main_info [user_cmd; sync_cmd; list_cmd; info_cmd; merge_cmd; quality_cmd]
+115
stack/river/cmd/river_cmd.mli
···
···
+
(** River.Cmd - Cmdliner terms for River CLI
+
+
This module provides cmdliner terms that are thin wrappers around
+
the River library functions. All business logic resides in the
+
main River module. *)
+
+
(** {1 Cmdliner Terms}
+
+
These terms can be used to build command-line interfaces using
+
Cmdliner and Eiocmd. They handle argument parsing and call into
+
the River library functions. *)
+
+
open Cmdliner
+
+
(** {2 User Management Commands} *)
+
+
val user_add :
+
(Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
+
(** [user_add] command term for adding a new user.
+
+
Reads: username, fullname, email from command-line arguments.
+
Calls: [River.State.create_user] *)
+
+
val user_remove :
+
(Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
+
(** [user_remove] command term for removing a user.
+
+
Reads: username from command-line arguments.
+
Calls: [River.State.delete_user] *)
+
+
val user_list :
+
(Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
+
(** [user_list] command term for listing all users.
+
+
Calls: [River.State.list_users] *)
+
+
val user_show :
+
(Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
+
(** [user_show] command term for showing user details.
+
+
Reads: username from command-line arguments.
+
Calls: [River.State.get_user] *)
+
+
val user_add_feed :
+
(Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
+
(** [user_add_feed] command term for adding a feed to a user.
+
+
Reads: username, name, url from command-line arguments.
+
Calls: [River.State.get_user], [River.User.add_feed], [River.State.update_user] *)
+
+
val user_remove_feed :
+
(Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
+
(** [user_remove_feed] command term for removing a feed from a user.
+
+
Reads: username, url from command-line arguments.
+
Calls: [River.State.get_user], [River.User.remove_feed], [River.State.update_user] *)
+
+
val user_cmd : int Cmd.t
+
(** [user_cmd] is the user management command group containing all user subcommands. *)
+
+
(** {2 Feed Sync Commands} *)
+
+
val sync :
+
(Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
+
(** [sync] command term for syncing feeds.
+
+
Reads: optional username from command-line arguments.
+
Calls: [River.State.sync_user] or [River.State.sync_all] *)
+
+
(** {2 Post Listing Commands} *)
+
+
val list :
+
(Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
+
(** [list] command term for listing posts with enhanced formatting.
+
+
Features:
+
- Pretty-printed output with colors using Fmt
+
- Clear ID display (never truncated) for each post
+
- Compact view (default): shows title, ID, author, and date
+
- Metadata view (--metadata/-m): shows all post metadata including summary,
+
content preview (truncated), links, and tags
+
+
Reads: optional username, optional limit, --metadata flag from command-line arguments.
+
Calls: [River.State.get_user_posts] or [River.State.get_all_posts] *)
+
+
val info :
+
(Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
+
(** [info] command term for showing detailed post information.
+
+
Reads: post ID, --full flag from command-line arguments.
+
Uses Logs for informational output (controlled by -v/--verbose from Eiocmd).
+
Calls: [River.State.get_all_posts] *)
+
+
(** {2 Feed Export Commands} *)
+
+
val merge :
+
(Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
+
(** [merge] command term for exporting merged feed.
+
+
Reads: format (atom|jsonfeed), title, limit from command-line arguments.
+
Calls: [River.State.export_merged_feed] *)
+
+
(** {2 Quality Analysis Commands} *)
+
+
val quality :
+
(Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t
+
(** [quality] command term for analyzing feed quality.
+
+
Reads: username from command-line arguments.
+
Calls: [River.State.analyze_user_quality] *)
+
+
(** {2 Main Command} *)
+
+
val main_cmd : int Cmd.t
+
(** [main_cmd] is the main command group containing all River CLI commands. *)
+6 -10
stack/river/example/aggregate_feeds.ml
···
let sources =
-
River.
[
-
{ name = "KC Sivaramakrishnan"; url = "http://kcsrk.info/atom-ocaml.xml" };
-
{
-
name = "Amir Chaudhry";
-
url = "http://amirchaudhry.com/tags/ocamllabs-atom.xml";
-
};
]
let main env =
(* Use River.with_session for proper resource management *)
-
River.with_session env @@ fun session ->
-
let feeds = List.map (River.fetch session) sources in
-
let posts = River.posts feeds in
-
let entries = River.create_atom_entries posts in
let feed =
let authors = [ Syndic.Atom.author "OCaml Blog" ] in
let id = Uri.of_string "https://ocaml.org/atom.xml" in
···
let sources =
[
+
River.Source.make ~name:"KC Sivaramakrishnan" ~url:"http://kcsrk.info/atom-ocaml.xml";
+
River.Source.make ~name:"Amir Chaudhry" ~url:"http://amirchaudhry.com/tags/ocamllabs-atom.xml";
]
let main env =
(* Use River.with_session for proper resource management *)
+
River.Session.with_session env @@ fun session ->
+
let feeds = List.map (River.Feed.fetch session) sources in
+
let posts = River.Post.of_feeds feeds in
+
let entries = River.Format.Atom.entries_of_posts posts in
let feed =
let authors = [ Syndic.Atom.author "OCaml Blog" ] in
let id = Uri.of_string "https://ocaml.org/atom.xml" in
-45
stack/river/lib/client.ml
···
-
(*
-
* Copyright (c) 2014, OCaml.org project
-
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
-
*
-
* Permission to use, copy, modify, and distribute this software for any
-
* purpose with or without fee is hereby granted, provided that the above
-
* copyright notice and this permission notice appear in all copies.
-
*
-
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
*)
-
-
(* River HTTP client using Requests *)
-
-
let src = Logs.Src.create "river.client" ~doc:"River HTTP client"
-
module Log = (val Logs.src_log src : Logs.LOG)
-
-
type t = {
-
session : Requests.t;
-
}
-
-
let create ~sw (env : _ ) =
-
Log.info (fun m -> m "Creating River client");
-
let session = Requests.create ~sw
-
~default_headers:(Requests.Headers.of_list [
-
("User-Agent", "OCaml-River/1.0");
-
])
-
~follow_redirects:true
-
~max_redirects:5
-
~verify_tls:true
-
env
-
in
-
{ session }
-
-
let with_client (env : _) f =
-
Eio.Switch.run @@ fun sw ->
-
let client = create ~sw env in
-
f client
-
-
let session t = t.session
···
-57
stack/river/lib/client.mli
···
-
(*
-
* Copyright (c) 2014, OCaml.org project
-
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
-
*
-
* Permission to use, copy, modify, and distribute this software for any
-
* purpose with or without fee is hereby granted, provided that the above
-
* copyright notice and this permission notice appear in all copies.
-
*
-
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
*)
-
-
(** River HTTP client using Requests library.
-
-
This module provides a session-based HTTP client for fetching RSS/Atom feeds.
-
The client manages a Requests session with appropriate defaults for feed fetching. *)
-
-
(** The type of a River HTTP client *)
-
type t
-
-
(** [create ~sw env] creates a new River client with a Requests session.
-
-
The session is configured with:
-
- User-Agent: "OCaml-River/1.0"
-
- Automatic redirect following (max 5 redirects)
-
- TLS verification enabled
-
-
@param sw The switch for resource management
-
@param env The Eio environment providing network and time resources *)
-
val create :
-
sw:Eio.Switch.t ->
-
< clock : float Eio.Time.clock_ty Eio.Resource.t;
-
fs : Eio.Fs.dir_ty Eio.Path.t;
-
net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
-
t
-
-
(** [with_client env f] creates a client and automatically manages its lifecycle.
-
-
This is the recommended way to use the client as it ensures proper cleanup.
-
-
@param env The Eio environment
-
@param f The function to run with the client *)
-
val with_client :
-
< clock : float Eio.Time.clock_ty Eio.Resource.t;
-
fs : Eio.Fs.dir_ty Eio.Path.t;
-
net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
-
(t -> 'a) -> 'a
-
-
(** [session t] returns the underlying Requests session.
-
-
This is used internally by River's HTTP functions. *)
-
val session : t -> Requests.t
···
+1 -2
stack/river/lib/dune
···
(library
(name river)
(public_name river)
-
(wrapped false)
-
(libraries eio eio_main requests requests_json_api logs str syndic lambdasoup uri ptime jsonfeed jsont jsont.bytesrw cacheio xdge))
···
(library
(name river)
(public_name river)
+
(libraries eio eio_main requests requests_json_api logs str syndic lambdasoup uri ptime jsonfeed jsont jsont.bytesrw xdge cmdliner eiocmd fmt))
-106
stack/river/lib/feed.ml
···
-
(*
-
* Copyright (c) 2014, OCaml.org project
-
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
-
*
-
* Permission to use, copy, modify, and distribute this software for any
-
* purpose with or without fee is hereby granted, provided that the above
-
* copyright notice and this permission notice appear in all copies.
-
*
-
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
*)
-
-
let src = Logs.Src.create "river.feed" ~doc:"River feed parsing"
-
module Log = (val Logs.src_log src : Logs.LOG)
-
-
type source = { name : string; url : string }
-
type content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel | Json of Jsonfeed.t
-
-
let string_of_feed = function Atom _ -> "Atom" | Rss2 _ -> "Rss2" | Json _ -> "JSONFeed"
-
-
type t = { name : string; title : string; url : string; content : content }
-
-
let classify_feed ~xmlbase (body : string) =
-
Log.debug (fun m -> m "Attempting to parse feed (%d bytes)" (String.length body));
-
-
(* Quick check - does it look like JSON? *)
-
let looks_like_json =
-
String.length body > 0 &&
-
let first_char = String.get body 0 in
-
first_char = '{' || first_char = '['
-
in
-
-
if looks_like_json then (
-
(* Try JSONFeed first *)
-
Log.debug (fun m -> m "Body looks like JSON, trying JSONFeed parser");
-
match Jsonfeed.of_string body with
-
| Ok jsonfeed ->
-
Log.debug (fun m -> m "Successfully parsed as JSONFeed");
-
Json jsonfeed
-
| Error err ->
-
Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err));
-
(* Fall through to XML parsing *)
-
failwith "Not a valid JSONFeed"
-
) else (
-
(* Try XML formats *)
-
try
-
let feed = Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in
-
Log.debug (fun m -> m "Successfully parsed as Atom feed");
-
feed
-
with
-
| Syndic.Atom.Error.Error (pos, msg) -> (
-
Log.debug (fun m -> m "Not an Atom feed: %s at position (%d, %d)"
-
msg (fst pos) (snd pos));
-
try
-
let feed = Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in
-
Log.debug (fun m -> m "Successfully parsed as RSS2 feed");
-
feed
-
with Syndic.Rss2.Error.Error (pos, msg) ->
-
Log.err (fun m -> m "Failed to parse as RSS2: %s at position (%d, %d)"
-
msg (fst pos) (snd pos));
-
failwith "Neither Atom nor RSS2 feed")
-
| Not_found as e ->
-
Log.err (fun m -> m "Not_found exception during Atom feed parsing");
-
Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));
-
raise e
-
| e ->
-
Log.err (fun m -> m "Unexpected exception during feed parsing: %s"
-
(Printexc.to_string e));
-
Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));
-
raise e
-
)
-
-
let fetch client (source : source) =
-
Log.info (fun m -> m "Fetching feed '%s' from %s" source.name source.url);
-
-
let xmlbase = Uri.of_string @@ source.url in
-
-
(* Use Requests_json_api.get_result for clean Result-based error handling *)
-
let session = Client.session client in
-
let response =
-
match Requests_json_api.get_result session source.url with
-
| Ok body ->
-
Log.info (fun m -> m "Successfully fetched %s (%d bytes)" source.url (String.length body));
-
body
-
| Error (status, msg) ->
-
Log.err (fun m -> m "Failed to fetch feed '%s': HTTP %d - %s" source.name status msg);
-
failwith (Printf.sprintf "HTTP %d: %s" status msg)
-
in
-
-
let content = classify_feed ~xmlbase response in
-
let title =
-
match content with
-
| Atom atom -> Util.string_of_text_construct atom.Syndic.Atom.title
-
| Rss2 ch -> ch.Syndic.Rss2.title
-
| Json jsonfeed -> Jsonfeed.title jsonfeed
-
in
-
-
Log.info (fun m -> m "Successfully fetched %s feed '%s' (title: '%s')"
-
(string_of_feed content) source.name title);
-
-
{ name = source.name; title; content; url = source.url }
···
-309
stack/river/lib/markdown_converter.ml
···
-
(** HTML to Markdown converter using Lambda Soup *)
-
-
(** Extract all links from HTML content *)
-
let extract_links html_str =
-
try
-
let soup = Soup.parse html_str in
-
let links = Soup.select "a[href]" soup in
-
Soup.fold (fun acc link ->
-
match Soup.attribute "href" link with
-
| Some href ->
-
let text = Soup.texts link |> String.concat "" |> String.trim in
-
(href, text) :: acc
-
| None -> acc
-
) [] links
-
|> List.rev
-
with _ -> []
-
-
(** Check if string contains any whitespace *)
-
let has_whitespace s =
-
try
-
let _ = Str.search_forward (Str.regexp "[ \t\n\r]") s 0 in
-
true
-
with Not_found -> false
-
-
(** Clean up excessive newlines and normalize spacing *)
-
let cleanup_markdown s =
-
(* Normalize line endings *)
-
let s = Str.global_replace (Str.regexp "\r\n") "\n" s in
-
-
(* Remove trailing whitespace from each line *)
-
let lines = String.split_on_char '\n' s in
-
let lines = List.map (fun line ->
-
(* Trim trailing spaces but preserve leading spaces for indentation *)
-
let len = String.length line in
-
let rec find_last_non_space i =
-
if i < 0 then -1
-
else if line.[i] = ' ' || line.[i] = '\t' then find_last_non_space (i - 1)
-
else i
-
in
-
let last = find_last_non_space (len - 1) in
-
if last < 0 then ""
-
else String.sub line 0 (last + 1)
-
) lines in
-
-
(* Join back and collapse excessive blank lines *)
-
let s = String.concat "\n" lines in
-
-
(* Replace 3+ consecutive newlines with just 2 *)
-
let s = Str.global_replace (Str.regexp "\n\n\n+") "\n\n" s in
-
-
(* Trim leading and trailing whitespace *)
-
String.trim s
-
-
(** Convert HTML to Markdown using state-based whitespace handling *)
-
let html_to_markdown html_str =
-
try
-
let soup = Soup.parse html_str in
-
let buffer = Buffer.create 256 in
-
-
(* State: track if we need to insert a space before next text *)
-
let need_space = ref false in
-
-
(* Get last character in buffer, if any *)
-
let last_char () =
-
let len = Buffer.length buffer in
-
if len = 0 then None
-
else Some (Buffer.nth buffer (len - 1))
-
in
-
-
(* Add text with proper spacing *)
-
let add_text text =
-
let trimmed = String.trim text in
-
if trimmed <> "" then begin
-
(* Check if text starts with punctuation that shouldn't have space before it *)
-
let starts_with_punctuation =
-
String.length trimmed > 0 &&
-
(match trimmed.[0] with
-
| ',' | '.' | ';' | ':' | '!' | '?' | ')' | ']' | '}' -> true
-
| _ -> false)
-
in
-
-
(* Add space if needed, unless we're before punctuation *)
-
if !need_space && not starts_with_punctuation then begin
-
match last_char () with
-
| Some (' ' | '\n') -> ()
-
| _ -> Buffer.add_char buffer ' '
-
end;
-
Buffer.add_string buffer trimmed;
-
need_space := false
-
end
-
in
-
-
(* Mark that we need space before next text (for inline elements) *)
-
let mark_space_needed () =
-
need_space := has_whitespace (Buffer.contents buffer) || Buffer.length buffer > 0
-
in
-
-
(* Process header with ID/anchor handling *)
-
let process_header level elem =
-
need_space := false;
-
-
(* Check if header contains a link with an ID fragment *)
-
let link_opt = Soup.select_one "a[href]" elem in
-
let anchor_id = match link_opt with
-
| Some link ->
-
(match Soup.attribute "href" link with
-
| Some href ->
-
(* Extract fragment from URL *)
-
let uri = Uri.of_string href in
-
Uri.fragment uri
-
| None -> None)
-
| None -> None
-
in
-
-
(* Add anchor if we found an ID *)
-
(match anchor_id with
-
| Some id when id <> "" ->
-
Buffer.add_string buffer (Printf.sprintf "\n<a name=\"%s\"></a>\n" id)
-
| _ -> ());
-
-
(* Add the header marker *)
-
let marker = String.make level '#' in
-
Buffer.add_string buffer ("\n" ^ marker ^ " ");
-
-
(* Get text content, excluding link tags *)
-
let text = Soup.texts elem |> String.concat " " |> String.trim in
-
Buffer.add_string buffer text;
-
-
Buffer.add_string buffer "\n\n";
-
need_space := false
-
in
-
-
let rec process_node node =
-
match Soup.element node with
-
| Some elem ->
-
let tag = Soup.name elem in
-
(match tag with
-
(* Block elements - reset space tracking *)
-
| "h1" -> process_header 1 elem
-
| "h2" -> process_header 2 elem
-
| "h3" -> process_header 3 elem
-
| "h4" -> process_header 4 elem
-
| "h5" -> process_header 5 elem
-
| "h6" -> process_header 6 elem
-
| "p" ->
-
need_space := false;
-
Soup.children elem |> Soup.iter process_node;
-
Buffer.add_string buffer "\n\n";
-
need_space := false
-
| "br" ->
-
Buffer.add_string buffer "\n";
-
need_space := false
-
(* Inline elements - preserve space tracking *)
-
| "strong" | "b" ->
-
(* Add space before if needed *)
-
if !need_space then begin
-
match last_char () with
-
| Some (' ' | '\n') -> ()
-
| _ -> Buffer.add_char buffer ' '
-
end;
-
Buffer.add_string buffer "**";
-
need_space := false;
-
Soup.children elem |> Soup.iter process_node;
-
Buffer.add_string buffer "**";
-
mark_space_needed ()
-
| "em" | "i" ->
-
(* Add space before if needed *)
-
if !need_space then begin
-
match last_char () with
-
| Some (' ' | '\n') -> ()
-
| _ -> Buffer.add_char buffer ' '
-
end;
-
Buffer.add_string buffer "*";
-
need_space := false;
-
Soup.children elem |> Soup.iter process_node;
-
Buffer.add_string buffer "*";
-
mark_space_needed ()
-
| "code" ->
-
(* Add space before if needed *)
-
if !need_space then begin
-
match last_char () with
-
| Some (' ' | '\n') -> ()
-
| _ -> Buffer.add_char buffer ' '
-
end;
-
Buffer.add_string buffer "`";
-
need_space := false;
-
Soup.children elem |> Soup.iter process_node;
-
Buffer.add_string buffer "`";
-
mark_space_needed ()
-
| "pre" ->
-
need_space := false;
-
Buffer.add_string buffer "\n```\n";
-
Soup.children elem |> Soup.iter process_node;
-
Buffer.add_string buffer "\n```\n\n";
-
need_space := false
-
| "a" ->
-
let text = Soup.texts elem |> String.concat " " |> String.trim in
-
let href = Soup.attribute "href" elem in
-
(match href with
-
| Some href ->
-
(* Add space before link if needed *)
-
if !need_space then begin
-
match last_char () with
-
| Some (' ' | '\n') -> ()
-
| _ -> Buffer.add_char buffer ' '
-
end;
-
need_space := false;
-
-
(* Add the link markdown *)
-
if text = "" then
-
Buffer.add_string buffer (Printf.sprintf "<%s>" href)
-
else
-
Buffer.add_string buffer (Printf.sprintf "[%s](%s)" text href);
-
-
(* Mark that space may be needed after link *)
-
mark_space_needed ()
-
| None ->
-
add_text text)
-
| "ul" | "ol" ->
-
need_space := false;
-
Buffer.add_string buffer "\n";
-
let is_ordered = tag = "ol" in
-
let items = Soup.children elem |> Soup.to_list in
-
List.iteri (fun i item ->
-
match Soup.element item with
-
| Some li when Soup.name li = "li" ->
-
need_space := false;
-
if is_ordered then
-
Buffer.add_string buffer (Printf.sprintf "%d. " (i + 1))
-
else
-
Buffer.add_string buffer "- ";
-
Soup.children li |> Soup.iter process_node;
-
Buffer.add_string buffer "\n"
-
| _ -> ()
-
) items;
-
Buffer.add_string buffer "\n";
-
need_space := false
-
| "blockquote" ->
-
need_space := false;
-
Buffer.add_string buffer "\n> ";
-
Soup.children elem |> Soup.iter process_node;
-
Buffer.add_string buffer "\n\n";
-
need_space := false
-
| "img" ->
-
(* Add space before if needed *)
-
if !need_space then begin
-
match last_char () with
-
| Some (' ' | '\n') -> ()
-
| _ -> Buffer.add_char buffer ' '
-
end;
-
let alt = Soup.attribute "alt" elem |> Option.value ~default:"" in
-
let src = Soup.attribute "src" elem |> Option.value ~default:"" in
-
Buffer.add_string buffer (Printf.sprintf "![%s](%s)" alt src);
-
need_space := false;
-
mark_space_needed ()
-
| "hr" ->
-
need_space := false;
-
Buffer.add_string buffer "\n---\n\n";
-
need_space := false
-
(* Strip these tags but keep content *)
-
| "div" | "span" | "article" | "section" | "header" | "footer"
-
| "main" | "nav" | "aside" | "figure" | "figcaption" | "details" | "summary" ->
-
Soup.children elem |> Soup.iter process_node
-
(* Ignore script, style, etc *)
-
| "script" | "style" | "noscript" -> ()
-
(* Default: just process children *)
-
| _ ->
-
Soup.children elem |> Soup.iter process_node)
-
| None ->
-
(* Text node - handle whitespace properly *)
-
match Soup.leaf_text node with
-
| Some text ->
-
(* If text is only whitespace, mark that we need space *)
-
let trimmed = String.trim text in
-
if trimmed = "" then begin
-
if has_whitespace text then
-
need_space := true
-
end else begin
-
(* Text has content - check if it had leading/trailing whitespace *)
-
let had_leading_ws = has_whitespace text &&
-
(String.length text > 0 &&
-
(text.[0] = ' ' || text.[0] = '\t' || text.[0] = '\n' || text.[0] = '\r')) in
-
-
(* If had leading whitespace, mark we need space *)
-
if had_leading_ws then need_space := true;
-
-
(* Add the text content *)
-
add_text trimmed;
-
-
(* If had trailing whitespace, mark we need space for next *)
-
let had_trailing_ws = has_whitespace text &&
-
(String.length text > 0 &&
-
let last = text.[String.length text - 1] in
-
last = ' ' || last = '\t' || last = '\n' || last = '\r') in
-
if had_trailing_ws then need_space := true
-
end
-
| None -> ()
-
in
-
-
Soup.children soup |> Soup.iter process_node;
-
-
(* Clean up the result *)
-
let result = Buffer.contents buffer in
-
cleanup_markdown result
-
with _ -> html_str
-
-
(** Convert HTML content to clean Markdown *)
-
let to_markdown html_str =
-
html_to_markdown html_str
···
-7
stack/river/lib/markdown_converter.mli
···
-
(** HTML to Markdown converter *)
-
-
(** Extract all links from HTML content as (href, anchor_text) pairs *)
-
val extract_links : string -> (string * string) list
-
-
(** Convert HTML content to clean Markdown format *)
-
val to_markdown : string -> string
···
-80
stack/river/lib/meta.ml
···
-
(*
-
* Copyright (c) 2014, OCaml.org project
-
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
-
*
-
* Permission to use, copy, modify, and distribute this software for any
-
* purpose with or without fee is hereby granted, provided that the above
-
* copyright notice and this permission notice appear in all copies.
-
*
-
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
*)
-
-
(** This module determines an image to be used as preview of a website.
-
-
It does this by following the same logic Google+ and other websites use, and
-
described in this article:
-
https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *)
-
-
let og_image html =
-
let open Soup in
-
let soup = parse html in
-
try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some
-
with Failure _ -> None
-
-
let image_src html =
-
let open Soup in
-
let soup = parse html in
-
try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some
-
with Failure _ -> None
-
-
let twitter_image html =
-
let open Soup in
-
let soup = parse html in
-
try
-
soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content"
-
|> Option.some
-
with Failure _ -> None
-
-
let og_description html =
-
let open Soup in
-
let soup = parse html in
-
try
-
soup $ "meta[property=og:description]" |> R.attribute "content"
-
|> Option.some
-
with Failure _ -> None
-
-
let description html =
-
let open Soup in
-
let soup = parse html in
-
try
-
soup $ "meta[property=description]" |> R.attribute "content" |> Option.some
-
with Failure _ -> None
-
-
let preview_image html =
-
let preview_image =
-
match og_image html with
-
| None -> (
-
match image_src html with
-
| None -> twitter_image html
-
| Some x -> Some x)
-
| Some x -> Some x
-
in
-
match Option.map String.trim preview_image with
-
| Some "" -> None
-
| Some x -> Some x
-
| None -> None
-
-
let description html =
-
let preview_image =
-
match og_description html with None -> description html | Some x -> Some x
-
in
-
match Option.map String.trim preview_image with
-
| Some "" -> None
-
| Some x -> Some x
-
| None -> None
···
-449
stack/river/lib/post.ml
···
-
(*
-
* Copyright (c) 2014, OCaml.org project
-
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
-
*
-
* Permission to use, copy, modify, and distribute this software for any
-
* purpose with or without fee is hereby granted, provided that the above
-
* copyright notice and this permission notice appear in all copies.
-
*
-
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
*)
-
-
let src = Logs.Src.create "river.post" ~doc:"River post processing"
-
module Log = (val Logs.src_log src : Logs.LOG)
-
-
type t = {
-
id : string;
-
title : string;
-
link : Uri.t option;
-
date : Syndic.Date.t option;
-
feed : Feed.t;
-
author : string;
-
email : string;
-
content : Soup.soup Soup.node;
-
mutable link_response : (string, string) result option;
-
tags : string list;
-
summary : string option;
-
}
-
-
(** Generate a stable, unique ID from available data *)
-
let generate_id ?guid ?link ?title ?date ~feed_url () =
-
match guid with
-
| Some id when id <> "" ->
-
(* Use explicit ID/GUID if available *)
-
id
-
| _ ->
-
match link with
-
| Some uri when Uri.to_string uri <> "" ->
-
(* Use permalink as ID (stable and unique) *)
-
Uri.to_string uri
-
| _ ->
-
(* Fallback: hash of feed_url + title + date *)
-
let title_str = Option.value title ~default:"" in
-
let date_str =
-
match date with
-
| Some d -> Ptime.to_rfc3339 d
-
| None -> ""
-
in
-
let composite = Printf.sprintf "%s|%s|%s" feed_url title_str date_str in
-
(* Use SHA256 for stable hashing *)
-
Digest.string composite |> Digest.to_hex
-
-
let post_id post = post.id
-
-
let resolve_links_attr ~xmlbase attr el =
-
Soup.R.attribute attr el
-
|> Uri.of_string
-
|> Syndic.XML.resolve ~xmlbase
-
|> Uri.to_string
-
|> fun value -> Soup.set_attribute attr value el
-
-
(* Things that posts should not contain *)
-
let undesired_tags = [ "style"; "script" ]
-
let undesired_attr = [ "id" ]
-
-
let html_of_text ?xmlbase s =
-
let soup = Soup.parse s in
-
let ($$) = Soup.($$) in
-
soup $$ "a[href]" |> Soup.iter (resolve_links_attr ~xmlbase "href");
-
soup $$ "img[src]" |> Soup.iter (resolve_links_attr ~xmlbase "src");
-
undesired_tags |> List.iter (fun tag -> soup $$ tag |> Soup.iter Soup.delete);
-
soup $$ "*" |> Soup.iter (fun el ->
-
undesired_attr |> List.iter (fun attr -> Soup.delete_attribute attr el));
-
soup
-
-
(* Do not trust sites using XML for HTML content. Convert to string and parse
-
back. (Does not always fix bad HTML unfortunately.) *)
-
let html_of_syndic =
-
let ns_prefix _ = Some "" in
-
fun ?xmlbase h ->
-
html_of_text ?xmlbase
-
(String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) h))
-
-
let string_of_option = function None -> "" | Some s -> s
-
-
(* Email on the forge contain the name in parenthesis *)
-
let forge_name_re = Str.regexp ".*(\\([^()]*\\))"
-
-
let post_compare p1 p2 =
-
(* Most recent posts first. Posts with no date are always last *)
-
match (p1.date, p2.date) with
-
| Some d1, Some d2 -> Syndic.Date.compare d2 d1
-
| None, Some _ -> 1
-
| Some _, None -> -1
-
| None, None -> 1
-
-
let rec remove n l =
-
if n <= 0 then l else match l with [] -> [] | _ :: tl -> remove (n - 1) tl
-
-
let rec take n = function
-
| [] -> []
-
| e :: tl -> if n > 0 then e :: take (n - 1) tl else []
-
-
(* Blog feed
-
***********************************************************************)
-
-
let post_of_atom ~(feed : Feed.t) (e : Syndic.Atom.entry) =
-
Log.debug (fun m -> m "Processing Atom entry: %s"
-
(Util.string_of_text_construct e.title));
-
-
let link =
-
try
-
Some
-
(List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) e.links)
-
.href
-
with Not_found -> (
-
Log.debug (fun m -> m "No alternate link found, trying fallback");
-
match e.links with
-
| l :: _ -> Some l.href
-
| [] -> (
-
match Uri.scheme e.id with
-
| Some "http" -> Some e.id
-
| Some "https" -> Some e.id
-
| _ -> None))
-
in
-
let date =
-
match e.published with Some _ -> e.published | None -> Some e.updated
-
in
-
let content =
-
match e.content with
-
| Some (Text s) -> html_of_text s
-
| Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s
-
| Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h
-
| Some (Mime _) | Some (Src _) | None -> (
-
match e.summary with
-
| Some (Text s) -> html_of_text s
-
| Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s
-
| Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h
-
| None -> Soup.parse "")
-
in
-
let is_valid_author_name name =
-
(* Filter out empty strings and placeholder values like "Unknown" *)
-
let trimmed = String.trim name in
-
trimmed <> "" && trimmed <> "Unknown"
-
in
-
let author_name =
-
(* Fallback chain for author:
-
1. Entry author (if present, not empty, and not "Unknown")
-
2. Feed-level author (from Atom feed metadata)
-
3. Feed title (from Atom feed metadata)
-
4. Source name (manually entered feed name) *)
-
try
-
let author, _ = e.authors in
-
let trimmed = String.trim author.name in
-
if is_valid_author_name author.name then trimmed
-
else raise Not_found (* Try feed-level author *)
-
with Not_found -> (
-
match feed.content with
-
| Feed.Atom atom_feed -> (
-
(* Try feed-level authors *)
-
match atom_feed.Syndic.Atom.authors with
-
| author :: _ when is_valid_author_name author.name ->
-
String.trim author.name
-
| _ ->
-
(* Use feed title *)
-
Util.string_of_text_construct atom_feed.Syndic.Atom.title)
-
| Feed.Rss2 _ | Feed.Json _ ->
-
(* For RSS2 and JSONFeed, use the feed name which is the source name *)
-
feed.name)
-
in
-
(* Extract tags from Atom categories *)
-
let tags =
-
List.map (fun cat -> cat.Syndic.Atom.term) e.categories
-
in
-
(* Extract summary - convert from text_construct to string *)
-
let summary =
-
match e.summary with
-
| Some s -> Some (Util.string_of_text_construct s)
-
| None -> None
-
in
-
(* Generate unique ID *)
-
let guid = Uri.to_string e.id in
-
let title_str = Util.string_of_text_construct e.title in
-
let id =
-
generate_id ~guid ?link ~title:title_str ?date ~feed_url:feed.url ()
-
in
-
{
-
id;
-
title = title_str;
-
link;
-
date;
-
feed;
-
author = author_name;
-
email = "";
-
content;
-
link_response = None;
-
tags;
-
summary;
-
}
-
-
let post_of_rss2 ~(feed : Feed.t) it =
-
let title, content =
-
match it.Syndic.Rss2.story with
-
| All (t, xmlbase, d) -> (
-
( t,
-
match it.content with
-
| _, "" -> html_of_text ?xmlbase d
-
| xmlbase, c -> html_of_text ?xmlbase c ))
-
| Title t ->
-
let xmlbase, c = it.content in
-
(t, html_of_text ?xmlbase c)
-
| Description (xmlbase, d) -> (
-
( "",
-
match it.content with
-
| _, "" -> html_of_text ?xmlbase d
-
| xmlbase, c -> html_of_text ?xmlbase c ))
-
in
-
(* Note: it.link is of type Uri.t option in Syndic *)
-
let link =
-
match (it.guid, it.link) with
-
| Some u, _ when u.permalink -> Some u.data
-
| _, Some _ -> it.link
-
| Some u, _ ->
-
(* Sometimes the guid is indicated with isPermaLink="false" but is
-
nonetheless the only URL we get (e.g. ocamlpro). *)
-
Some u.data
-
| None, None -> None
-
in
-
(* Extract GUID string for ID generation *)
-
let guid_str =
-
match it.guid with
-
| Some u -> Some (Uri.to_string u.data)
-
| None -> None
-
in
-
(* RSS2 doesn't have a categories field exposed, use empty list *)
-
let tags = [] in
-
(* RSS2 doesn't have a separate summary field, so leave it empty *)
-
let summary = None in
-
(* Generate unique ID *)
-
let id =
-
generate_id ?guid:guid_str ?link ~title ?date:it.pubDate ~feed_url:feed.url ()
-
in
-
{
-
id;
-
title;
-
link;
-
feed;
-
author = feed.name;
-
email = string_of_option it.author;
-
content;
-
date = it.pubDate;
-
link_response = None;
-
tags;
-
summary;
-
}
-
-
let post_of_jsonfeed_item ~(feed : Feed.t) (item : Jsonfeed.Item.t) =
-
Log.debug (fun m -> m "Processing JSONFeed item: %s"
-
(Option.value (Jsonfeed.Item.title item) ~default:"Untitled"));
-
-
(* Extract content - prefer HTML, fall back to text *)
-
let content =
-
match Jsonfeed.Item.content item with
-
| `Html html -> html_of_text html
-
| `Text text -> html_of_text text
-
| `Both (html, _text) -> html_of_text html
-
in
-
-
(* Extract author - use first author if multiple *)
-
let author_name, author_email =
-
match Jsonfeed.Item.authors item with
-
| Some (first :: _) ->
-
let name = Jsonfeed.Author.name first |> Option.value ~default:"" in
-
(* JSONFeed authors don't typically have email *)
-
(name, "")
-
| _ ->
-
(* Fall back to feed-level authors or feed title *)
-
(match feed.content with
-
| Feed.Json jsonfeed ->
-
(match Jsonfeed.authors jsonfeed with
-
| Some (first :: _) ->
-
let name = Jsonfeed.Author.name first |> Option.value ~default:feed.title in
-
(name, "")
-
| _ -> (feed.title, ""))
-
| _ -> (feed.title, ""))
-
in
-
-
(* Link - use url field *)
-
let link =
-
Jsonfeed.Item.url item
-
|> Option.map Uri.of_string
-
in
-
-
(* Date *)
-
let date = Jsonfeed.Item.date_published item in
-
-
(* Summary *)
-
let summary = Jsonfeed.Item.summary item in
-
-
(* Tags *)
-
let tags =
-
Jsonfeed.Item.tags item
-
|> Option.value ~default:[]
-
in
-
-
(* Generate unique ID - JSONFeed items always have an id field (required) *)
-
let guid = Jsonfeed.Item.id item in
-
let title_str = Jsonfeed.Item.title item |> Option.value ~default:"Untitled" in
-
let id =
-
generate_id ~guid ?link ~title:title_str ?date ~feed_url:feed.url ()
-
in
-
-
{
-
id;
-
title = title_str;
-
link;
-
date;
-
feed;
-
author = author_name;
-
email = author_email;
-
content;
-
link_response = None;
-
tags;
-
summary;
-
}
-
-
let posts_of_feed c =
-
match c.Feed.content with
-
| Feed.Atom f ->
-
let posts = List.map (post_of_atom ~feed:c) f.Syndic.Atom.entries in
-
Log.debug (fun m -> m "Extracted %d posts from Atom feed '%s'"
-
(List.length posts) c.Feed.name);
-
posts
-
| Feed.Rss2 ch ->
-
let posts = List.map (post_of_rss2 ~feed:c) ch.Syndic.Rss2.items in
-
Log.debug (fun m -> m "Extracted %d posts from RSS2 feed '%s'"
-
(List.length posts) c.Feed.name);
-
posts
-
| Feed.Json jsonfeed ->
-
let items = Jsonfeed.items jsonfeed in
-
let posts = List.map (post_of_jsonfeed_item ~feed:c) items in
-
Log.debug (fun m -> m "Extracted %d posts from JSONFeed '%s'"
-
(List.length posts) c.Feed.name);
-
posts
-
-
let mk_entry post =
-
let content = Syndic.Atom.Html (None, Soup.to_string post.content) in
-
let contributors =
-
[ Syndic.Atom.author ~uri:(Uri.of_string post.feed.url) post.feed.name ]
-
in
-
let links =
-
match post.link with
-
| Some l -> [ Syndic.Atom.link ~rel:Syndic.Atom.Alternate l ]
-
| None -> []
-
in
-
(* TODO: include source *)
-
let id =
-
match post.link with
-
| Some l -> l
-
| None -> Uri.of_string (Digest.to_hex (Digest.string post.title))
-
in
-
let authors = (Syndic.Atom.author ~email:post.email post.author, []) in
-
let title : Syndic.Atom.text_construct = Syndic.Atom.Text post.title in
-
let updated =
-
match post.date with
-
(* Atom entry requires a date but RSS2 does not. So if a date
-
* is not available, just capture the current date. *)
-
| None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get
-
| Some d -> d
-
in
-
Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated
-
()
-
-
let mk_entries posts = List.map mk_entry posts
-
-
let mk_jsonfeed_item post =
-
(* Convert HTML content back to string *)
-
let html = Soup.to_string post.content in
-
let content = `Html html in
-
-
(* Create author *)
-
let authors =
-
if post.author <> "" then
-
let author = Jsonfeed.Author.create ~name:post.author () in
-
Some [author]
-
else
-
None
-
in
-
-
(* Create item *)
-
Jsonfeed.Item.create
-
~id:post.id
-
~content
-
?url:(Option.map Uri.to_string post.link)
-
~title:post.title
-
?summary:post.summary
-
?date_published:post.date
-
?authors
-
~tags:post.tags
-
()
-
-
let mk_jsonfeed_items posts = List.map mk_jsonfeed_item posts
-
-
let get_posts ?n ?(ofs = 0) planet_feeds =
-
Log.info (fun m -> m "Processing %d feeds for posts" (List.length planet_feeds));
-
-
let posts = List.concat @@ List.map posts_of_feed planet_feeds in
-
Log.debug (fun m -> m "Total posts collected: %d" (List.length posts));
-
-
let posts = List.sort post_compare posts in
-
Log.debug (fun m -> m "Posts sorted by date (most recent first)");
-
-
let posts = remove ofs posts in
-
let result =
-
match n with
-
| None ->
-
Log.debug (fun m -> m "Returning all %d posts (offset=%d)"
-
(List.length posts) ofs);
-
posts
-
| Some n ->
-
let limited = take n posts in
-
Log.debug (fun m -> m "Returning %d posts (requested=%d, offset=%d)"
-
(List.length limited) n ofs);
-
limited
-
in
-
result
-
-
(* Fetch the link response and cache it. *)
-
(* TODO: This requires environment for HTTP access
-
let fetch_link env t =
-
match (t.link, t.link_response) with
-
| None, _ -> None
-
| Some _, Some (Ok x) -> Some x
-
| Some _, Some (Error _) -> None
-
| Some link, None -> (
-
try
-
let response = Http.get env (Uri.to_string link) in
-
t.link_response <- Some (Ok response);
-
Some response
-
with _exn ->
-
t.link_response <- Some (Error "");
-
None)
-
*)
-
let fetch_link _ = None
···
+1683 -67
stack/river/lib/river.ml
···
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)
let src = Logs.Src.create "river" ~doc:"River RSS/Atom aggregator"
module Log = (val Logs.src_log src : Logs.LOG)
-
(* Keep Client module internal *)
-
module Internal_client = Client
-
(* Abstract session type *)
-
type session = Client.t
-
type source = Feed.source = { name : string; url : string }
-
type feed = Feed.t
-
type post = Post.t
-
(* Session management *)
-
let init ~sw env =
-
Log.info (fun m -> m "Initializing River session");
-
Internal_client.create ~sw env
-
let with_session env f =
-
Log.info (fun m -> m "Creating River session");
-
Internal_client.with_client env f
-
(* Feed operations *)
-
let fetch session source =
-
Log.info (fun m -> m "Fetching feed: %s" source.name);
-
Feed.fetch session source
-
let name feed = feed.Feed.name
-
let url feed = feed.Feed.url
-
let posts feeds =
-
Log.info (fun m -> m "Aggregating posts from %d feed(s)" (List.length feeds));
-
let result = Post.get_posts feeds in
-
Log.info (fun m -> m "Aggregated %d posts total" (List.length result));
-
result
-
let title post = post.Post.title
-
let link post = post.Post.link
-
let date post = post.Post.date
-
let feed post = post.Post.feed
-
let author post = post.Post.author
-
let email post = post.Post.email
-
let content post = Soup.to_string post.Post.content
-
let id post = post.Post.id
-
let tags post = post.Post.tags
-
let summary post = post.Post.summary
-
let meta_description _post =
-
(* TODO: This requires environment for HTTP access *)
-
Log.debug (fun m -> m "meta_description not implemented (requires environment)");
-
None
-
let seo_image _post =
-
(* TODO: This requires environment for HTTP access *)
-
Log.debug (fun m -> m "seo_image not implemented (requires environment)");
-
None
-
let create_atom_entries posts =
-
Log.info (fun m -> m "Creating Atom entries for %d posts" (List.length posts));
-
Post.mk_entries posts
-
(* JSONFeed support *)
-
let create_jsonfeed_items posts =
-
Log.info (fun m -> m "Creating JSONFeed items for %d posts" (List.length posts));
-
Post.mk_jsonfeed_items posts
-
let create_jsonfeed ~title ?home_page_url ?feed_url ?description ?icon ?favicon posts =
-
Log.info (fun m -> m "Creating JSONFeed with %d posts" (List.length posts));
-
let items = create_jsonfeed_items posts in
-
Jsonfeed.create ~title ?home_page_url ?feed_url ?description ?icon ?favicon ~items ()
-
let jsonfeed_to_string ?(minify = false) jsonfeed =
-
match Jsonfeed.to_string ~minify jsonfeed with
-
| Ok s -> Ok s
-
| Error err -> Error (Jsont.Error.to_string err)
-
type feed_content =
-
| Atom of Syndic.Atom.feed
-
| Rss2 of Syndic.Rss2.channel
-
| JSONFeed of Jsonfeed.t
-
let feed_content feed =
-
match feed.Feed.content with
-
| Feed.Atom f -> Atom f
-
| Feed.Rss2 ch -> Rss2 ch
-
| Feed.Json jf -> JSONFeed jf
···
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)
+
(** River RSS/Atom/JSONFeed aggregator library *)
+
let src = Logs.Src.create "river" ~doc:"River RSS/Atom aggregator"
module Log = (val Logs.src_log src : Logs.LOG)
+
(** {1 Internal Utilities} *)
+
+
module Text_extract = struct
+
open Syndic
+
+
(* Remove all tags *)
+
let rec syndic_to_buffer b = function
+
| XML.Node (_, _, subs) -> List.iter (syndic_to_buffer b) subs
+
| XML.Data (_, d) -> Buffer.add_string b d
+
+
let syndic_to_string x =
+
let b = Buffer.create 1024 in
+
List.iter (syndic_to_buffer b) x;
+
Buffer.contents b
+
+
let string_of_text_construct : Atom.text_construct -> string = function
+
| Atom.Text s | Atom.Html (_, s) -> s
+
| Atom.Xhtml (_, x) -> syndic_to_string x
+
end
+
+
module Html_meta = struct
+
[@@@warning "-32"] (* Suppress unused value warnings for internal utilities *)
+
+
(** This module determines an image to be used as preview of a website.
+
+
It does this by following the same logic Google+ and other websites use, and
+
described in this article:
+
https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *)
+
+
let og_image html =
+
let open Soup in
+
let soup = parse html in
+
try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some
+
with Failure _ -> None
+
+
let image_src html =
+
let open Soup in
+
let soup = parse html in
+
try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some
+
with Failure _ -> None
+
+
let twitter_image html =
+
let open Soup in
+
let soup = parse html in
+
try
+
soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content"
+
|> Option.some
+
with Failure _ -> None
+
+
let og_description html =
+
let open Soup in
+
let soup = parse html in
+
try
+
soup $ "meta[property=og:description]" |> R.attribute "content"
+
|> Option.some
+
with Failure _ -> None
+
+
let description html =
+
let open Soup in
+
let soup = parse html in
+
try
+
soup $ "meta[property=description]" |> R.attribute "content" |> Option.some
+
with Failure _ -> None
+
+
let preview_image html =
+
let preview_image =
+
match og_image html with
+
| None -> (
+
match image_src html with
+
| None -> twitter_image html
+
| Some x -> Some x)
+
| Some x -> Some x
+
in
+
match Option.map String.trim preview_image with
+
| Some "" -> None
+
| Some x -> Some x
+
| None -> None
+
+
let description html =
+
let preview_image =
+
match og_description html with None -> description html | Some x -> Some x
+
in
+
match Option.map String.trim preview_image with
+
| Some "" -> None
+
| Some x -> Some x
+
| None -> None
+
end
+
+
module Html_markdown = struct
+
[@@@warning "-32"] (* Suppress unused value warnings for internal utilities *)
+
+
(** HTML to Markdown converter using Lambda Soup *)
+
+
(** Extract all links from HTML content *)
+
let extract_links html_str =
+
try
+
let soup = Soup.parse html_str in
+
let links = Soup.select "a[href]" soup in
+
Soup.fold (fun acc link ->
+
match Soup.attribute "href" link with
+
| Some href ->
+
let text = Soup.texts link |> String.concat "" |> String.trim in
+
(href, text) :: acc
+
| None -> acc
+
) [] links
+
|> List.rev
+
with _ -> []
+
+
(** Check if string contains any whitespace *)
+
let has_whitespace s =
+
try
+
let _ = Str.search_forward (Str.regexp "[ \t\n\r]") s 0 in
+
true
+
with Not_found -> false
+
+
(** Clean up excessive newlines and normalize spacing *)
+
let cleanup_markdown s =
+
(* Normalize line endings *)
+
let s = Str.global_replace (Str.regexp "\r\n") "\n" s in
+
+
(* Remove trailing whitespace from each line *)
+
let lines = String.split_on_char '\n' s in
+
let lines = List.map (fun line ->
+
(* Trim trailing spaces but preserve leading spaces for indentation *)
+
let len = String.length line in
+
let rec find_last_non_space i =
+
if i < 0 then -1
+
else if line.[i] = ' ' || line.[i] = '\t' then find_last_non_space (i - 1)
+
else i
+
in
+
let last = find_last_non_space (len - 1) in
+
if last < 0 then ""
+
else String.sub line 0 (last + 1)
+
) lines in
+
+
(* Join back and collapse excessive blank lines *)
+
let s = String.concat "\n" lines in
+
+
(* Replace 3+ consecutive newlines with just 2 *)
+
let s = Str.global_replace (Str.regexp "\n\n\n+") "\n\n" s in
+
+
(* Trim leading and trailing whitespace *)
+
String.trim s
+
+
(** Convert HTML to Markdown using state-based whitespace handling *)
+
let html_to_markdown html_str =
+
try
+
let soup = Soup.parse html_str in
+
let buffer = Buffer.create 256 in
+
+
(* State: track if we need to insert a space before next text *)
+
let need_space = ref false in
+
+
(* Get last character in buffer, if any *)
+
let last_char () =
+
let len = Buffer.length buffer in
+
if len = 0 then None
+
else Some (Buffer.nth buffer (len - 1))
+
in
+
+
(* Add text with proper spacing *)
+
let add_text text =
+
let trimmed = String.trim text in
+
if trimmed <> "" then begin
+
(* Check if text starts with punctuation that shouldn't have space before it *)
+
let starts_with_punctuation =
+
String.length trimmed > 0 &&
+
(match trimmed.[0] with
+
| ',' | '.' | ';' | ':' | '!' | '?' | ')' | ']' | '}' -> true
+
| _ -> false)
+
in
+
+
(* Add space if needed, unless we're before punctuation *)
+
if !need_space && not starts_with_punctuation then begin
+
match last_char () with
+
| Some (' ' | '\n') -> ()
+
| _ -> Buffer.add_char buffer ' '
+
end;
+
Buffer.add_string buffer trimmed;
+
need_space := false
+
end
+
in
+
+
(* Mark that we need space before next text (for inline elements) *)
+
let mark_space_needed () =
+
need_space := has_whitespace (Buffer.contents buffer) || Buffer.length buffer > 0
+
in
+
+
(* Process header with ID/anchor handling *)
+
let process_header level elem =
+
need_space := false;
+
+
(* Check if header contains a link with an ID fragment *)
+
let link_opt = Soup.select_one "a[href]" elem in
+
let anchor_id = match link_opt with
+
| Some link ->
+
(match Soup.attribute "href" link with
+
| Some href ->
+
(* Extract fragment from URL *)
+
let uri = Uri.of_string href in
+
Uri.fragment uri
+
| None -> None)
+
| None -> None
+
in
+
+
(* Add anchor if we found an ID *)
+
(match anchor_id with
+
| Some id when id <> "" ->
+
Buffer.add_string buffer (Printf.sprintf "\n<a name=\"%s\"></a>\n" id)
+
| _ -> ());
+
+
(* Add the header marker *)
+
let marker = String.make level '#' in
+
Buffer.add_string buffer ("\n" ^ marker ^ " ");
+
+
(* Get text content, excluding link tags *)
+
let text = Soup.texts elem |> String.concat " " |> String.trim in
+
Buffer.add_string buffer text;
+
+
Buffer.add_string buffer "\n\n";
+
need_space := false
+
in
+
+
let rec process_node node =
+
match Soup.element node with
+
| Some elem ->
+
let tag = Soup.name elem in
+
(match tag with
+
(* Block elements - reset space tracking *)
+
| "h1" -> process_header 1 elem
+
| "h2" -> process_header 2 elem
+
| "h3" -> process_header 3 elem
+
| "h4" -> process_header 4 elem
+
| "h5" -> process_header 5 elem
+
| "h6" -> process_header 6 elem
+
| "p" ->
+
need_space := false;
+
Soup.children elem |> Soup.iter process_node;
+
Buffer.add_string buffer "\n\n";
+
need_space := false
+
| "br" ->
+
Buffer.add_string buffer "\n";
+
need_space := false
+
(* Inline elements - preserve space tracking *)
+
| "strong" | "b" ->
+
(* Add space before if needed *)
+
if !need_space then begin
+
match last_char () with
+
| Some (' ' | '\n') -> ()
+
| _ -> Buffer.add_char buffer ' '
+
end;
+
Buffer.add_string buffer "**";
+
need_space := false;
+
Soup.children elem |> Soup.iter process_node;
+
Buffer.add_string buffer "**";
+
mark_space_needed ()
+
| "em" | "i" ->
+
(* Add space before if needed *)
+
if !need_space then begin
+
match last_char () with
+
| Some (' ' | '\n') -> ()
+
| _ -> Buffer.add_char buffer ' '
+
end;
+
Buffer.add_string buffer "*";
+
need_space := false;
+
Soup.children elem |> Soup.iter process_node;
+
Buffer.add_string buffer "*";
+
mark_space_needed ()
+
| "code" ->
+
(* Add space before if needed *)
+
if !need_space then begin
+
match last_char () with
+
| Some (' ' | '\n') -> ()
+
| _ -> Buffer.add_char buffer ' '
+
end;
+
Buffer.add_string buffer "`";
+
need_space := false;
+
Soup.children elem |> Soup.iter process_node;
+
Buffer.add_string buffer "`";
+
mark_space_needed ()
+
| "pre" ->
+
need_space := false;
+
Buffer.add_string buffer "\n```\n";
+
Soup.children elem |> Soup.iter process_node;
+
Buffer.add_string buffer "\n```\n\n";
+
need_space := false
+
| "a" ->
+
let text = Soup.texts elem |> String.concat " " |> String.trim in
+
let href = Soup.attribute "href" elem in
+
(match href with
+
| Some href ->
+
(* Add space before link if needed *)
+
if !need_space then begin
+
match last_char () with
+
| Some (' ' | '\n') -> ()
+
| _ -> Buffer.add_char buffer ' '
+
end;
+
need_space := false;
+
+
(* Add the link markdown *)
+
if text = "" then
+
Buffer.add_string buffer (Printf.sprintf "<%s>" href)
+
else
+
Buffer.add_string buffer (Printf.sprintf "[%s](%s)" text href);
+
+
(* Mark that space may be needed after link *)
+
mark_space_needed ()
+
| None ->
+
add_text text)
+
| "ul" | "ol" ->
+
need_space := false;
+
Buffer.add_string buffer "\n";
+
let is_ordered = tag = "ol" in
+
let items = Soup.children elem |> Soup.to_list in
+
List.iteri (fun i item ->
+
match Soup.element item with
+
| Some li when Soup.name li = "li" ->
+
need_space := false;
+
if is_ordered then
+
Buffer.add_string buffer (Printf.sprintf "%d. " (i + 1))
+
else
+
Buffer.add_string buffer "- ";
+
Soup.children li |> Soup.iter process_node;
+
Buffer.add_string buffer "\n"
+
| _ -> ()
+
) items;
+
Buffer.add_string buffer "\n";
+
need_space := false
+
| "blockquote" ->
+
need_space := false;
+
Buffer.add_string buffer "\n> ";
+
Soup.children elem |> Soup.iter process_node;
+
Buffer.add_string buffer "\n\n";
+
need_space := false
+
| "img" ->
+
(* Add space before if needed *)
+
if !need_space then begin
+
match last_char () with
+
| Some (' ' | '\n') -> ()
+
| _ -> Buffer.add_char buffer ' '
+
end;
+
let alt = Soup.attribute "alt" elem |> Option.value ~default:"" in
+
let src = Soup.attribute "src" elem |> Option.value ~default:"" in
+
Buffer.add_string buffer (Printf.sprintf "![%s](%s)" alt src);
+
need_space := false;
+
mark_space_needed ()
+
| "hr" ->
+
need_space := false;
+
Buffer.add_string buffer "\n---\n\n";
+
need_space := false
+
(* Strip these tags but keep content *)
+
| "div" | "span" | "article" | "section" | "header" | "footer"
+
| "main" | "nav" | "aside" | "figure" | "figcaption" | "details" | "summary" ->
+
Soup.children elem |> Soup.iter process_node
+
(* Ignore script, style, etc *)
+
| "script" | "style" | "noscript" -> ()
+
(* Default: just process children *)
+
| _ ->
+
Soup.children elem |> Soup.iter process_node)
+
| None ->
+
(* Text node - handle whitespace properly *)
+
match Soup.leaf_text node with
+
| Some text ->
+
(* If text is only whitespace, mark that we need space *)
+
let trimmed = String.trim text in
+
if trimmed = "" then begin
+
if has_whitespace text then
+
need_space := true
+
end else begin
+
(* Text has content - check if it had leading/trailing whitespace *)
+
let had_leading_ws = has_whitespace text &&
+
(String.length text > 0 &&
+
(text.[0] = ' ' || text.[0] = '\t' || text.[0] = '\n' || text.[0] = '\r')) in
+
+
(* If had leading whitespace, mark we need space *)
+
if had_leading_ws then need_space := true;
+
+
(* Add the text content *)
+
add_text trimmed;
+
+
(* If had trailing whitespace, mark we need space for next *)
+
let had_trailing_ws = has_whitespace text &&
+
(String.length text > 0 &&
+
let last = text.[String.length text - 1] in
+
last = ' ' || last = '\t' || last = '\n' || last = '\r') in
+
if had_trailing_ws then need_space := true
+
end
+
| None -> ()
+
in
+
+
Soup.children soup |> Soup.iter process_node;
+
+
(* Clean up the result *)
+
let result = Buffer.contents buffer in
+
cleanup_markdown result
+
with _ -> html_str
+
+
(** Convert HTML content to clean Markdown *)
+
let to_markdown html_str =
+
html_to_markdown html_str
+
end
+
+
(** {1 Feed Sources} *)
+
+
module Source = struct
+
type t = {
+
name : string;
+
url : string;
+
}
+
+
let make ~name ~url = { name; url }
+
+
let name t = t.name
+
let url t = t.url
+
+
let jsont =
+
let make name url = { name; url } in
+
Jsont.Object.map ~kind:"Source" make
+
|> Jsont.Object.mem "name" Jsont.string ~enc:(fun s -> s.name)
+
|> Jsont.Object.mem "url" Jsont.string ~enc:(fun s -> s.url)
+
|> Jsont.Object.finish
+
end
+
+
(** {1 HTTP Session Management} *)
+
+
module Session = struct
+
type t = {
+
session : (float Eio.Time.clock_ty Eio.Resource.t,
+
[`Generic | `Unix] Eio.Net.ty Eio.Resource.t) Requests.t;
+
}
+
+
let init ~sw env =
+
Log.info (fun m -> m "Initializing River session");
+
let session = Requests.create ~sw
+
~default_headers:(Requests.Headers.of_list [
+
("User-Agent", "OCaml-River/1.0");
+
])
+
~follow_redirects:true
+
~max_redirects:5
+
~verify_tls:true
+
env
+
in
+
{ session }
+
+
let with_session env f =
+
Log.info (fun m -> m "Creating River session");
+
Eio.Switch.run @@ fun sw ->
+
let client = init ~sw env in
+
f client
+
+
let get_requests_session t = t.session
+
end
+
+
(** {1 Feeds and Posts} *)
+
+
module Feed = struct
+
type feed_content =
+
| Atom of Syndic.Atom.feed
+
| Rss2 of Syndic.Rss2.channel
+
| Json of Jsonfeed.t
+
+
type t = {
+
source : Source.t;
+
title : string;
+
content : feed_content;
+
}
+
+
let string_of_feed = function
+
| Atom _ -> "Atom"
+
| Rss2 _ -> "Rss2"
+
| Json _ -> "JSONFeed"
+
+
let classify_feed ~xmlbase (body : string) =
+
Log.debug (fun m -> m "Attempting to parse feed (%d bytes)" (String.length body));
+
+
(* Quick check - does it look like JSON? *)
+
let looks_like_json =
+
String.length body > 0 &&
+
let first_char = String.get body 0 in
+
first_char = '{' || first_char = '['
+
in
+
+
if looks_like_json then (
+
(* Try JSONFeed first *)
+
Log.debug (fun m -> m "Body looks like JSON, trying JSONFeed parser");
+
match Jsonfeed.of_string body with
+
| Ok jsonfeed ->
+
Log.debug (fun m -> m "Successfully parsed as JSONFeed");
+
Json jsonfeed
+
| Error err ->
+
Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err));
+
(* Fall through to XML parsing *)
+
failwith "Not a valid JSONFeed"
+
) else (
+
(* Try XML formats *)
+
try
+
let feed = Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in
+
Log.debug (fun m -> m "Successfully parsed as Atom feed");
+
feed
+
with
+
| Syndic.Atom.Error.Error (pos, msg) -> (
+
Log.debug (fun m -> m "Not an Atom feed: %s at position (%d, %d)"
+
msg (fst pos) (snd pos));
+
try
+
let feed = Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in
+
Log.debug (fun m -> m "Successfully parsed as RSS2 feed");
+
feed
+
with Syndic.Rss2.Error.Error (pos, msg) ->
+
Log.err (fun m -> m "Failed to parse as RSS2: %s at position (%d, %d)"
+
msg (fst pos) (snd pos));
+
failwith "Neither Atom nor RSS2 feed")
+
| Not_found as e ->
+
Log.err (fun m -> m "Not_found exception during Atom feed parsing");
+
Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));
+
raise e
+
| e ->
+
Log.err (fun m -> m "Unexpected exception during feed parsing: %s"
+
(Printexc.to_string e));
+
Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));
+
raise e
+
)
+
+
let fetch session source =
+
Log.info (fun m -> m "Fetching feed: %s" (Source.name source));
+
+
let xmlbase = Uri.of_string (Source.url source) in
+
+
(* Use Requests_json_api.get_result for clean Result-based error handling *)
+
let requests_session = Session.get_requests_session session in
+
let response =
+
match Requests_json_api.get_result requests_session (Source.url source) with
+
| Ok body ->
+
Log.info (fun m -> m "Successfully fetched %s (%d bytes)"
+
(Source.url source) (String.length body));
+
body
+
| Error (status, msg) ->
+
Log.err (fun m -> m "Failed to fetch feed '%s': HTTP %d - %s"
+
(Source.name source) status msg);
+
failwith (Printf.sprintf "HTTP %d: %s" status msg)
+
in
+
+
let content = classify_feed ~xmlbase response in
+
let title =
+
match content with
+
| Atom atom -> Text_extract.string_of_text_construct atom.Syndic.Atom.title
+
| Rss2 ch -> ch.Syndic.Rss2.title
+
| Json jsonfeed -> Jsonfeed.title jsonfeed
+
in
+
+
Log.info (fun m -> m "Successfully fetched %s feed '%s' (title: '%s')"
+
(string_of_feed content) (Source.name source) title);
+
+
{ source; title; content }
+
+
let source t = t.source
+
end
+
+
(** {1 Posts} *)
+
module Post = struct
+
type t = {
+
id : string;
+
title : string;
+
link : Uri.t option;
+
date : Syndic.Date.t option;
+
feed : Feed.t;
+
author : string;
+
email : string;
+
content : Soup.soup Soup.node;
+
mutable link_response : (string, string) result option;
+
tags : string list;
+
summary : string option;
+
}
+
(** Generate a stable, unique ID from available data *)
+
let generate_id ?guid ?link ?title ?date ~feed_url () =
+
match guid with
+
| Some id when id <> "" ->
+
(* Use explicit ID/GUID if available *)
+
id
+
| _ ->
+
match link with
+
| Some uri when Uri.to_string uri <> "" ->
+
(* Use permalink as ID (stable and unique) *)
+
Uri.to_string uri
+
| _ ->
+
(* Fallback: hash of feed_url + title + date *)
+
let title_str = Option.value title ~default:"" in
+
let date_str =
+
match date with
+
| Some d -> Ptime.to_rfc3339 d
+
| None -> ""
+
in
+
let composite = Printf.sprintf "%s|%s|%s" feed_url title_str date_str in
+
(* Use SHA256 for stable hashing *)
+
Digest.string composite |> Digest.to_hex
+
let resolve_links_attr ~xmlbase attr el =
+
Soup.R.attribute attr el
+
|> Uri.of_string
+
|> Syndic.XML.resolve ~xmlbase
+
|> Uri.to_string
+
|> fun value -> Soup.set_attribute attr value el
+
(* Things that posts should not contain *)
+
let undesired_tags = [ "style"; "script" ]
+
let undesired_attr = [ "id" ]
+
let html_of_text ?xmlbase s =
+
let soup = Soup.parse s in
+
let ($$) = Soup.($$) in
+
soup $$ "a[href]" |> Soup.iter (resolve_links_attr ~xmlbase "href");
+
soup $$ "img[src]" |> Soup.iter (resolve_links_attr ~xmlbase "src");
+
undesired_tags |> List.iter (fun tag -> soup $$ tag |> Soup.iter Soup.delete);
+
soup $$ "*" |> Soup.iter (fun el ->
+
undesired_attr |> List.iter (fun attr -> Soup.delete_attribute attr el));
+
soup
+
(* Do not trust sites using XML for HTML content. Convert to string and parse
+
back. (Does not always fix bad HTML unfortunately.) *)
+
let html_of_syndic =
+
let ns_prefix _ = Some "" in
+
fun ?xmlbase h ->
+
html_of_text ?xmlbase
+
(String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) h))
+
let string_of_option = function None -> "" | Some s -> s
+
let post_compare p1 p2 =
+
(* Most recent posts first. Posts with no date are always last *)
+
match (p1.date, p2.date) with
+
| Some d1, Some d2 -> Syndic.Date.compare d2 d1
+
| None, Some _ -> 1
+
| Some _, None -> -1
+
| None, None -> 1
+
let rec remove n l =
+
if n <= 0 then l else match l with [] -> [] | _ :: tl -> remove (n - 1) tl
+
let rec take n = function
+
| [] -> []
+
| e :: tl -> if n > 0 then e :: take (n - 1) tl else []
+
let post_of_atom ~(feed : Feed.t) (e : Syndic.Atom.entry) =
+
Log.debug (fun m -> m "Processing Atom entry: %s"
+
(Text_extract.string_of_text_construct e.title));
+
let link =
+
try
+
Some
+
(List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) e.links)
+
.href
+
with Not_found -> (
+
Log.debug (fun m -> m "No alternate link found, trying fallback");
+
match e.links with
+
| l :: _ -> Some l.href
+
| [] -> (
+
match Uri.scheme e.id with
+
| Some "http" -> Some e.id
+
| Some "https" -> Some e.id
+
| _ -> None))
+
in
+
let date =
+
match e.published with Some _ -> e.published | None -> Some e.updated
+
in
+
let content =
+
match e.content with
+
| Some (Text s) -> html_of_text s
+
| Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s
+
| Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h
+
| Some (Mime _) | Some (Src _) | None -> (
+
match e.summary with
+
| Some (Text s) -> html_of_text s
+
| Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s
+
| Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h
+
| None -> Soup.parse "")
+
in
+
let is_valid_author_name name =
+
(* Filter out empty strings and placeholder values like "Unknown" *)
+
let trimmed = String.trim name in
+
trimmed <> "" && trimmed <> "Unknown"
+
in
+
let author_name =
+
(* Fallback chain for author:
+
1. Entry author (if present, not empty, and not "Unknown")
+
2. Feed-level author (from Atom feed metadata)
+
3. Feed title (from Atom feed metadata)
+
4. Source name (manually entered feed name) *)
+
try
+
let author, _ = e.authors in
+
let trimmed = String.trim author.name in
+
if is_valid_author_name author.name then trimmed
+
else raise Not_found (* Try feed-level author *)
+
with Not_found -> (
+
match feed.content with
+
| Feed.Atom atom_feed -> (
+
(* Try feed-level authors *)
+
match atom_feed.Syndic.Atom.authors with
+
| author :: _ when is_valid_author_name author.name ->
+
String.trim author.name
+
| _ ->
+
(* Use feed title *)
+
Text_extract.string_of_text_construct atom_feed.Syndic.Atom.title)
+
| Feed.Rss2 _ | Feed.Json _ ->
+
(* For RSS2 and JSONFeed, use the source name *)
+
Source.name feed.source)
+
in
+
(* Extract tags from Atom categories *)
+
let tags =
+
List.map (fun cat -> cat.Syndic.Atom.term) e.categories
+
in
+
(* Extract summary - convert from text_construct to string *)
+
let summary =
+
match e.summary with
+
| Some s -> Some (Text_extract.string_of_text_construct s)
+
| None -> None
+
in
+
(* Generate unique ID *)
+
let guid = Uri.to_string e.id in
+
let title_str = Text_extract.string_of_text_construct e.title in
+
let id =
+
generate_id ~guid ?link ~title:title_str ?date
+
~feed_url:(Source.url feed.source) ()
+
in
+
{
+
id;
+
title = title_str;
+
link;
+
date;
+
feed;
+
author = author_name;
+
email = "";
+
content;
+
link_response = None;
+
tags;
+
summary;
+
}
+
let post_of_rss2 ~(feed : Feed.t) it =
+
let title, content =
+
match it.Syndic.Rss2.story with
+
| All (t, xmlbase, d) -> (
+
( t,
+
match it.content with
+
| _, "" -> html_of_text ?xmlbase d
+
| xmlbase, c -> html_of_text ?xmlbase c ))
+
| Title t ->
+
let xmlbase, c = it.content in
+
(t, html_of_text ?xmlbase c)
+
| Description (xmlbase, d) -> (
+
( "",
+
match it.content with
+
| _, "" -> html_of_text ?xmlbase d
+
| xmlbase, c -> html_of_text ?xmlbase c ))
+
in
+
(* Note: it.link is of type Uri.t option in Syndic *)
+
let link =
+
match (it.guid, it.link) with
+
| Some u, _ when u.permalink -> Some u.data
+
| _, Some _ -> it.link
+
| Some u, _ ->
+
(* Sometimes the guid is indicated with isPermaLink="false" but is
+
nonetheless the only URL we get (e.g. ocamlpro). *)
+
Some u.data
+
| None, None -> None
+
in
+
(* Extract GUID string for ID generation *)
+
let guid_str =
+
match it.guid with
+
| Some u -> Some (Uri.to_string u.data)
+
| None -> None
+
in
+
(* RSS2 doesn't have a categories field exposed, use empty list *)
+
let tags = [] in
+
(* RSS2 doesn't have a separate summary field, so leave it empty *)
+
let summary = None in
+
(* Generate unique ID *)
+
let id =
+
generate_id ?guid:guid_str ?link ~title ?date:it.pubDate
+
~feed_url:(Source.url feed.source) ()
+
in
+
{
+
id;
+
title;
+
link;
+
feed;
+
author = Source.name feed.source;
+
email = string_of_option it.author;
+
content;
+
date = it.pubDate;
+
link_response = None;
+
tags;
+
summary;
+
}
+
let post_of_jsonfeed_item ~(feed : Feed.t) (item : Jsonfeed.Item.t) =
+
Log.debug (fun m -> m "Processing JSONFeed item: %s"
+
(Option.value (Jsonfeed.Item.title item) ~default:"Untitled"));
+
(* Extract content - prefer HTML, fall back to text *)
+
let content =
+
match Jsonfeed.Item.content item with
+
| `Html html -> html_of_text html
+
| `Text text -> html_of_text text
+
| `Both (html, _text) -> html_of_text html
+
in
+
(* Extract author - use first author if multiple *)
+
let author_name, author_email =
+
match Jsonfeed.Item.authors item with
+
| Some (first :: _) ->
+
let name = Jsonfeed.Author.name first |> Option.value ~default:"" in
+
(* JSONFeed authors don't typically have email *)
+
(name, "")
+
| _ ->
+
(* Fall back to feed-level authors or feed title *)
+
(match feed.content with
+
| Feed.Json jsonfeed ->
+
(match Jsonfeed.authors jsonfeed with
+
| Some (first :: _) ->
+
let name = Jsonfeed.Author.name first |> Option.value ~default:feed.title in
+
(name, "")
+
| _ -> (feed.title, ""))
+
| _ -> (feed.title, ""))
+
in
+
+
(* Link - use url field *)
+
let link =
+
Jsonfeed.Item.url item
+
|> Option.map Uri.of_string
+
in
+
+
(* Date *)
+
let date = Jsonfeed.Item.date_published item in
+
+
(* Summary *)
+
let summary = Jsonfeed.Item.summary item in
+
+
(* Tags *)
+
let tags =
+
Jsonfeed.Item.tags item
+
|> Option.value ~default:[]
+
in
+
+
(* Generate unique ID - JSONFeed items always have an id field (required) *)
+
let guid = Jsonfeed.Item.id item in
+
let title_str = Jsonfeed.Item.title item |> Option.value ~default:"Untitled" in
+
let id =
+
generate_id ~guid ?link ~title:title_str ?date
+
~feed_url:(Source.url feed.source) ()
+
in
+
+
{
+
id;
+
title = title_str;
+
link;
+
date;
+
feed;
+
author = author_name;
+
email = author_email;
+
content;
+
link_response = None;
+
tags;
+
summary;
+
}
+
+
let posts_of_feed c =
+
match c.Feed.content with
+
| Feed.Atom f ->
+
let posts = List.map (post_of_atom ~feed:c) f.Syndic.Atom.entries in
+
Log.debug (fun m -> m "Extracted %d posts from Atom feed '%s'"
+
(List.length posts) (Source.name c.source));
+
posts
+
| Feed.Rss2 ch ->
+
let posts = List.map (post_of_rss2 ~feed:c) ch.Syndic.Rss2.items in
+
Log.debug (fun m -> m "Extracted %d posts from RSS2 feed '%s'"
+
(List.length posts) (Source.name c.source));
+
posts
+
| Feed.Json jsonfeed ->
+
let items = Jsonfeed.items jsonfeed in
+
let posts = List.map (post_of_jsonfeed_item ~feed:c) items in
+
Log.debug (fun m -> m "Extracted %d posts from JSONFeed '%s'"
+
(List.length posts) (Source.name c.source));
+
posts
+
+
let get_posts ?n ?(ofs = 0) planet_feeds =
+
Log.info (fun m -> m "Processing %d feeds for posts" (List.length planet_feeds));
+
+
let posts = List.concat @@ List.map posts_of_feed planet_feeds in
+
Log.debug (fun m -> m "Total posts collected: %d" (List.length posts));
+
+
let posts = List.sort post_compare posts in
+
Log.debug (fun m -> m "Posts sorted by date (most recent first)");
+
+
let posts = remove ofs posts in
+
let result =
+
match n with
+
| None ->
+
Log.debug (fun m -> m "Returning all %d posts (offset=%d)"
+
(List.length posts) ofs);
+
posts
+
| Some n ->
+
let limited = take n posts in
+
Log.debug (fun m -> m "Returning %d posts (requested=%d, offset=%d)"
+
(List.length limited) n ofs);
+
limited
+
in
+
result
+
+
let of_feeds feeds = get_posts feeds
+
+
let feed t = t.feed
+
let title t = t.title
+
let link t = t.link
+
let date t = t.date
+
let author t = t.author
+
let email t = t.email
+
let content t = Soup.to_string t.content
+
let id t = t.id
+
let tags t = t.tags
+
let summary t = t.summary
+
+
let meta_description _t =
+
(* TODO: This requires environment for HTTP access *)
+
Log.debug (fun m -> m "meta_description not implemented (requires environment)");
+
None
+
+
let seo_image _t =
+
(* TODO: This requires environment for HTTP access *)
+
Log.debug (fun m -> m "seo_image not implemented (requires environment)");
+
None
+
end
+
+
(** {1 Format Conversion and Export} *)
+
+
module Format = struct
+
module Atom = struct
+
let entry_of_post post =
+
let content = Syndic.Atom.Html (None, Post.content post) in
+
let contributors =
+
[ Syndic.Atom.author ~uri:(Uri.of_string (Source.url (Feed.source (Post.feed post))))
+
(Source.name (Feed.source (Post.feed post))) ]
+
in
+
let links =
+
match Post.link post with
+
| Some l -> [ Syndic.Atom.link ~rel:Syndic.Atom.Alternate l ]
+
| None -> []
+
in
+
let id =
+
match Post.link post with
+
| Some l -> l
+
| None -> Uri.of_string (Digest.to_hex (Digest.string (Post.title post)))
+
in
+
let authors = (Syndic.Atom.author ~email:(Post.email post) (Post.author post), []) in
+
let title : Syndic.Atom.text_construct = Syndic.Atom.Text (Post.title post) in
+
let updated =
+
match Post.date post with
+
(* Atom entry requires a date but RSS2 does not. So if a date
+
* is not available, just capture the current date. *)
+
| None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get
+
| Some d -> d
+
in
+
Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated
+
()
+
+
let entries_of_posts posts = List.map entry_of_post posts
+
+
let feed_of_entries ~title ?id ?(authors = []) entries =
+
let feed_id = match id with
+
| Some i -> Uri.of_string i
+
| None -> Uri.of_string "urn:river:merged"
+
in
+
let feed_authors = List.map (fun (name, email) ->
+
match email with
+
| Some e -> Syndic.Atom.author ~email:e name
+
| None -> Syndic.Atom.author name
+
) authors in
+
{
+
Syndic.Atom.id = feed_id;
+
title = Syndic.Atom.Text title;
+
updated = Ptime.of_float_s (Unix.time ()) |> Option.get;
+
entries;
+
authors = feed_authors;
+
categories = [];
+
contributors = [];
+
generator = Some {
+
Syndic.Atom.version = Some "1.0";
+
uri = None;
+
content = "River Feed Aggregator";
+
};
+
icon = None;
+
links = [];
+
logo = None;
+
rights = None;
+
subtitle = None;
+
}
+
+
let to_string feed =
+
let output = Buffer.create 4096 in
+
Syndic.Atom.output feed (`Buffer output);
+
Buffer.contents output
+
end
+
+
module Rss2 = struct
+
let of_feed feed =
+
match feed.Feed.content with
+
| Feed.Rss2 ch -> Some ch
+
| _ -> None
+
end
+
+
module Jsonfeed = struct
+
let item_of_post post =
+
(* Convert HTML content back to string *)
+
let html = Post.content post in
+
let content = `Html html in
+
+
(* Create author *)
+
let authors =
+
if Post.author post <> "" then
+
let author = Jsonfeed.Author.create ~name:(Post.author post) () in
+
Some [author]
+
else
+
None
+
in
+
+
(* Create item *)
+
Jsonfeed.Item.create
+
~id:(Post.id post)
+
~content
+
?url:(Option.map Uri.to_string (Post.link post))
+
~title:(Post.title post)
+
?summary:(Post.summary post)
+
?date_published:(Post.date post)
+
?authors
+
~tags:(Post.tags post)
+
()
+
+
let items_of_posts posts = List.map item_of_post posts
+
+
let feed_of_items ~title ?home_page_url ?feed_url ?description ?icon ?favicon items =
+
Jsonfeed.create ~title ?home_page_url ?feed_url ?description ?icon ?favicon ~items ()
+
+
let feed_of_posts ~title ?home_page_url ?feed_url ?description ?icon ?favicon posts =
+
let items = items_of_posts posts in
+
feed_of_items ~title ?home_page_url ?feed_url ?description ?icon ?favicon items
+
+
let to_string ?(minify = false) jsonfeed =
+
match Jsonfeed.to_string ~minify jsonfeed with
+
| Ok s -> Ok s
+
| Error err -> Error (Jsont.Error.to_string err)
+
+
let of_feed feed =
+
match feed.Feed.content with
+
| Feed.Json jf -> Some jf
+
| _ -> None
+
end
+
end
+
+
(** {1 User Management} *)
+
+
module User = struct
+
type t = {
+
username : string;
+
fullname : string;
+
email : string option;
+
feeds : Source.t list;
+
last_synced : string option;
+
}
+
+
let make ~username ~fullname ?email ?(feeds = []) ?last_synced () =
+
{ username; fullname; email; feeds; last_synced }
+
+
let username t = t.username
+
let fullname t = t.fullname
+
let email t = t.email
+
let feeds t = t.feeds
+
let last_synced t = t.last_synced
+
+
let add_feed t source =
+
{ t with feeds = source :: t.feeds }
+
+
let remove_feed t ~url =
+
let feeds = List.filter (fun s -> Source.url s <> url) t.feeds in
+
{ t with feeds }
+
+
let set_last_synced t timestamp =
+
{ t with last_synced = Some timestamp }
+
+
let jsont =
+
let make username fullname email feeds last_synced =
+
{ username; fullname; email; feeds; last_synced }
+
in
+
Jsont.Object.map ~kind:"User" make
+
|> Jsont.Object.mem "username" Jsont.string ~enc:(fun u -> u.username)
+
|> Jsont.Object.mem "fullname" Jsont.string ~enc:(fun u -> u.fullname)
+
|> Jsont.Object.opt_mem "email" Jsont.string ~enc:(fun u -> u.email)
+
|> Jsont.Object.mem "feeds" (Jsont.list Source.jsont) ~enc:(fun u -> u.feeds)
+
|> Jsont.Object.opt_mem "last_synced" Jsont.string ~enc:(fun u -> u.last_synced)
+
|> Jsont.Object.finish
+
end
+
+
(** {1 Feed Quality Analysis} *)
+
+
module Quality = struct
+
type t = {
+
total_entries : int;
+
entries_with_summary : int;
+
entries_with_author : int;
+
entries_with_date : int;
+
entries_with_content : int;
+
entries_with_tags : int;
+
avg_content_length : float;
+
min_content_length : int;
+
max_content_length : int;
+
posting_frequency_days : float option;
+
quality_score : float;
+
}
+
+
let make ~total_entries ~entries_with_summary ~entries_with_author
+
~entries_with_date ~entries_with_content ~entries_with_tags
+
~avg_content_length ~min_content_length ~max_content_length
+
~posting_frequency_days ~quality_score =
+
{
+
total_entries;
+
entries_with_summary;
+
entries_with_author;
+
entries_with_date;
+
entries_with_content;
+
entries_with_tags;
+
avg_content_length;
+
min_content_length;
+
max_content_length;
+
posting_frequency_days;
+
quality_score;
+
}
+
+
let total_entries t = t.total_entries
+
let entries_with_summary t = t.entries_with_summary
+
let entries_with_author t = t.entries_with_author
+
let entries_with_date t = t.entries_with_date
+
let entries_with_content t = t.entries_with_content
+
let entries_with_tags t = t.entries_with_tags
+
let avg_content_length t = t.avg_content_length
+
let min_content_length t = t.min_content_length
+
let max_content_length t = t.max_content_length
+
let posting_frequency_days t = t.posting_frequency_days
+
let quality_score t = t.quality_score
+
+
(** Get content length from an Atom entry *)
+
let get_content_length (entry : Syndic.Atom.entry) =
+
match entry.content with
+
| Some (Syndic.Atom.Text s) -> String.length s
+
| Some (Syndic.Atom.Html (_, s)) -> String.length s
+
| Some (Syndic.Atom.Xhtml (_, _)) -> 0 (* Could calculate but complex *)
+
| Some (Syndic.Atom.Mime _) -> 0
+
| Some (Syndic.Atom.Src _) -> 0
+
| None -> (
+
match entry.summary with
+
| Some (Syndic.Atom.Text s) -> String.length s
+
| Some (Syndic.Atom.Html (_, s)) -> String.length s
+
| Some (Syndic.Atom.Xhtml (_, _)) -> 0
+
| None -> 0)
+
+
(** Check if entry has non-empty summary *)
+
let has_summary (entry : Syndic.Atom.entry) =
+
match entry.summary with
+
| Some (Syndic.Atom.Text s) when String.trim s <> "" -> true
+
| Some (Syndic.Atom.Html (_, s)) when String.trim s <> "" -> true
+
| Some (Syndic.Atom.Xhtml (_, _)) -> true
+
| _ -> false
+
+
(** Check if entry has author *)
+
let has_author (entry : Syndic.Atom.entry) =
+
let (author, _) = entry.authors in
+
String.trim author.name <> ""
+
+
(** Check if entry has content *)
+
let has_content (entry : Syndic.Atom.entry) =
+
get_content_length entry > 0
+
+
(** Check if entry has tags/categories *)
+
let has_tags (entry : Syndic.Atom.entry) =
+
entry.categories <> []
+
+
(** Calculate quality score from metrics *)
+
let calculate_quality_score t =
+
let total = float_of_int t.total_entries in
+
if total = 0.0 then 0.0
+
else
+
let summary_pct = float_of_int t.entries_with_summary /. total *. 100.0 in
+
let author_pct = float_of_int t.entries_with_author /. total *. 100.0 in
+
let date_pct = float_of_int t.entries_with_date /. total *. 100.0 in
+
let content_pct = float_of_int t.entries_with_content /. total *. 100.0 in
+
let tags_pct = float_of_int t.entries_with_tags /. total *. 100.0 in
+
+
(* Weighted average: content and dates are most important *)
+
let score =
+
(content_pct *. 0.30) +.
+
(date_pct *. 0.25) +.
+
(author_pct *. 0.20) +.
+
(summary_pct *. 0.15) +.
+
(tags_pct *. 0.10)
+
in
+
score
+
+
let analyze entries =
+
if entries = [] then
+
failwith "No entries to analyze"
+
else
+
let total_entries = List.length entries in
+
+
let entries_with_summary = ref 0 in
+
let entries_with_author = ref 0 in
+
let entries_with_date = ref total_entries in (* All Atom entries have updated *)
+
let entries_with_content = ref 0 in
+
let entries_with_tags = ref 0 in
+
let content_lengths = ref [] in
+
let dates = ref [] in
+
+
List.iter (fun (entry : Syndic.Atom.entry) ->
+
if has_summary entry then incr entries_with_summary;
+
if has_author entry then incr entries_with_author;
+
if has_content entry then begin
+
incr entries_with_content;
+
content_lengths := get_content_length entry :: !content_lengths
+
end;
+
if has_tags entry then incr entries_with_tags;
+
dates := entry.updated :: !dates
+
) entries;
+
+
(* Calculate content statistics *)
+
let avg_content_length, min_content_length, max_content_length =
+
if !content_lengths = [] then
+
(0.0, 0, 0)
+
else
+
let sorted = List.sort compare !content_lengths in
+
let sum = List.fold_left (+) 0 sorted in
+
let avg = float_of_int sum /. float_of_int (List.length sorted) in
+
let min_len = List.hd sorted in
+
let max_len = List.hd (List.rev sorted) in
+
(avg, min_len, max_len)
+
in
+
+
(* Calculate posting frequency *)
+
let posting_frequency_days =
+
if List.length !dates < 2 then
+
None
+
else
+
try
+
let timestamps = List.map Ptime.to_float_s !dates in
+
let sorted_timestamps = List.sort compare timestamps in
+
let first = List.hd sorted_timestamps in
+
let last = List.hd (List.rev sorted_timestamps) in
+
let total_days = (last -. first) /. 86400.0 in
+
let num_intervals = float_of_int (List.length sorted_timestamps - 1) in
+
Some (total_days /. num_intervals)
+
with _ -> None
+
in
+
+
(* Create metrics record (without quality_score first) *)
+
let metrics = {
+
total_entries;
+
entries_with_summary = !entries_with_summary;
+
entries_with_author = !entries_with_author;
+
entries_with_date = !entries_with_date;
+
entries_with_content = !entries_with_content;
+
entries_with_tags = !entries_with_tags;
+
avg_content_length;
+
min_content_length;
+
max_content_length;
+
posting_frequency_days;
+
quality_score = 0.0; (* Placeholder *)
+
} in
+
+
(* Calculate quality score *)
+
let quality_score = calculate_quality_score metrics in
+
{ metrics with quality_score }
+
end
+
+
(** {1 State Management} *)
+
+
module State = struct
+
type t = {
+
xdg : Xdge.t;
+
}
+
+
module Paths = struct
+
(** Get the users directory path *)
+
let users_dir state = Eio.Path.(Xdge.state_dir state.xdg / "users")
+
+
(** Get the feeds directory path *)
+
let feeds_dir state = Eio.Path.(Xdge.state_dir state.xdg / "feeds")
+
+
(** Get the user feeds directory path *)
+
let user_feeds_dir state = Eio.Path.(feeds_dir state / "user")
+
+
(** Get the path to a user's JSON file *)
+
let user_file state username =
+
Eio.Path.(users_dir state / (username ^ ".json"))
+
+
(** Get the path to a user's Atom feed file *)
+
let user_feed_file state username =
+
Eio.Path.(user_feeds_dir state / (username ^ ".xml"))
+
+
(** Ensure all necessary directories exist *)
+
let ensure_directories state =
+
let dirs = [
+
users_dir state;
+
feeds_dir state;
+
user_feeds_dir state;
+
] in
+
List.iter (fun dir ->
+
try Eio.Path.mkdir ~perm:0o755 dir
+
with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()
+
) dirs
+
end
+
+
module Json = struct
+
(** Decode a user from JSON string *)
+
let user_of_string s =
+
match Jsont_bytesrw.decode_string' User.jsont s with
+
| Ok user -> Some user
+
| Error err ->
+
Log.err (fun m -> m "Failed to parse user JSON: %s" (Jsont.Error.to_string err));
+
None
+
+
(** Encode a user to JSON string *)
+
let user_to_string user =
+
match Jsont_bytesrw.encode_string' ~format:Jsont.Indent User.jsont user with
+
| Ok s -> s
+
| Error err -> failwith ("Failed to encode user: " ^ Jsont.Error.to_string err)
+
end
+
+
module Storage = struct
+
(** Load a user from disk *)
+
let load_user state username =
+
let file = Paths.user_file state username in
+
try
+
let content = Eio.Path.load file in
+
Json.user_of_string content
+
with
+
| Eio.Io (Eio.Fs.E (Not_found _), _) -> None
+
| e ->
+
Log.err (fun m -> m "Error loading user %s: %s" username (Printexc.to_string e));
+
None
+
+
(** Save a user to disk *)
+
let save_user state user =
+
let file = Paths.user_file state (User.username user) in
+
let json = Json.user_to_string user in
+
Eio.Path.save ~create:(`Or_truncate 0o644) file json
+
+
(** List all usernames *)
+
let list_users state =
+
try
+
Eio.Path.read_dir (Paths.users_dir state)
+
|> List.filter_map (fun name ->
+
if Filename.check_suffix name ".json" then
+
Some (Filename.chop_suffix name ".json")
+
else None
+
)
+
with _ -> []
+
+
(** Load existing Atom entries for a user *)
+
let load_existing_posts state username =
+
let file = Paths.user_feed_file state username in
+
try
+
let content = Eio.Path.load file in
+
(* Parse existing Atom feed *)
+
let input = Xmlm.make_input (`String (0, content)) in
+
let feed = Syndic.Atom.parse input in
+
feed.Syndic.Atom.entries
+
with
+
| Eio.Io (Eio.Fs.E (Not_found _), _) -> []
+
| e ->
+
Log.err (fun m -> m "Error loading existing posts for %s: %s"
+
username (Printexc.to_string e));
+
[]
+
+
(** Save Atom entries for a user *)
+
let save_atom_feed state username entries =
+
let file = Paths.user_feed_file state username in
+
let feed = Format.Atom.feed_of_entries ~title:username entries in
+
let xml = Format.Atom.to_string feed in
+
Eio.Path.save ~create:(`Or_truncate 0o644) file xml
+
+
(** Delete a user and their feed file *)
+
let delete_user state username =
+
let user_file = Paths.user_file state username in
+
let feed_file = Paths.user_feed_file state username in
+
(try Eio.Path.unlink user_file with _ -> ());
+
(try Eio.Path.unlink feed_file with _ -> ())
+
end
+
+
module Sync = struct
+
(** Merge new entries with existing ones, updating matching IDs *)
+
let merge_entries ~existing ~new_entries =
+
(* Create a map of new entry IDs for efficient lookup and updates *)
+
let module UriMap = Map.Make(Uri) in
+
let new_entries_map =
+
List.fold_left (fun acc (entry : Syndic.Atom.entry) ->
+
UriMap.add entry.id entry acc
+
) UriMap.empty new_entries
+
in
+
+
(* Update existing entries with new ones if IDs match, otherwise keep existing *)
+
let updated_existing =
+
List.filter_map (fun (entry : Syndic.Atom.entry) ->
+
if UriMap.mem entry.id new_entries_map then
+
None (* Will be replaced by new entry *)
+
else
+
Some entry (* Keep existing entry *)
+
) existing
+
in
+
+
(* Combine new entries with non-replaced existing entries *)
+
let combined = new_entries @ updated_existing in
+
List.sort (fun (a : Syndic.Atom.entry) (b : Syndic.Atom.entry) ->
+
Ptime.compare b.updated a.updated
+
) combined
+
+
(** Get current timestamp in ISO 8601 format *)
+
let current_timestamp () =
+
let open Unix in
+
let tm = gmtime (time ()) in
+
Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ"
+
(tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday
+
tm.tm_hour tm.tm_min tm.tm_sec
+
+
(** Sync feeds for a single user *)
+
let sync_user session state ~username =
+
match Storage.load_user state username with
+
| None ->
+
Error (Printf.sprintf "User %s not found" username)
+
| Some user when User.feeds user = [] ->
+
Log.info (fun m -> m "No feeds configured for user %s" username);
+
Ok ()
+
| Some user ->
+
Log.info (fun m -> m "Syncing feeds for user %s..." username);
+
+
(* Fetch all feeds concurrently *)
+
let fetched_feeds =
+
Eio.Fiber.List.filter_map (fun source ->
+
try
+
Log.info (fun m -> m " Fetching %s (%s)..."
+
(Source.name source) (Source.url source));
+
Some (Feed.fetch session source)
+
with e ->
+
Log.err (fun m -> m " Failed to fetch %s: %s"
+
(Source.name source) (Printexc.to_string e));
+
None
+
) (User.feeds user)
+
in
+
+
if fetched_feeds = [] then begin
+
Error "No feeds successfully fetched"
+
end else begin
+
(* Get posts from fetched feeds *)
+
let posts = Post.of_feeds fetched_feeds in
+
Log.info (fun m -> m " Found %d new posts" (List.length posts));
+
+
(* Convert to Atom entries *)
+
let new_entries = Format.Atom.entries_of_posts posts in
+
+
(* Load existing entries *)
+
let existing = Storage.load_existing_posts state username in
+
Log.info (fun m -> m " Found %d existing posts" (List.length existing));
+
+
(* Merge entries *)
+
let merged = merge_entries ~existing ~new_entries in
+
Log.info (fun m -> m " Total posts after merge: %d" (List.length merged));
+
+
(* Save updated feed *)
+
Storage.save_atom_feed state username merged;
+
+
(* Update last_synced timestamp *)
+
let now = current_timestamp () in
+
let user = User.set_last_synced user now in
+
Storage.save_user state user;
+
+
Log.info (fun m -> m "Sync completed for user %s" username);
+
Ok ()
+
end
+
end
+
+
module Export = struct
+
(** Convert Atom entry to JSONFeed item *)
+
let atom_entry_to_jsonfeed_item (entry : Syndic.Atom.entry) =
+
(* Extract ID *)
+
let id = Uri.to_string entry.id in
+
+
(* Extract title *)
+
let title =
+
match entry.title with
+
| Syndic.Atom.Text s -> Some s
+
| Syndic.Atom.Html (_, s) -> Some s
+
| Syndic.Atom.Xhtml (_, _) -> Some "Untitled"
+
in
+
+
(* Extract URL *)
+
let url =
+
match entry.links with
+
| link :: _ -> Some (Uri.to_string link.href)
+
| [] -> None
+
in
+
+
(* Extract content *)
+
let content =
+
match entry.content with
+
| Some (Syndic.Atom.Text s) -> `Text s
+
| Some (Syndic.Atom.Html (_, s)) -> `Html s
+
| Some (Syndic.Atom.Xhtml (_, nodes)) ->
+
let html = String.concat "" (List.map Syndic.XML.to_string nodes) in
+
`Html html
+
| Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None ->
+
`Text ""
+
in
+
+
(* Extract summary *)
+
let summary =
+
match entry.summary with
+
| Some (Syndic.Atom.Text s) when String.trim s <> "" -> Some s
+
| Some (Syndic.Atom.Html (_, s)) when String.trim s <> "" -> Some s
+
| _ -> None
+
in
+
+
(* Extract authors *)
+
let authors =
+
let (author, contributors) = entry.authors in
+
let author_list = author :: contributors in
+
let jsonfeed_authors = List.filter_map (fun (a : Syndic.Atom.author) ->
+
let name = String.trim a.name in
+
if name = "" then None
+
else Some (Jsonfeed.Author.create ~name ())
+
) author_list in
+
if jsonfeed_authors = [] then None else Some jsonfeed_authors
+
in
+
+
(* Extract tags *)
+
let tags =
+
match entry.categories with
+
| [] -> None
+
| cats ->
+
let tag_list = List.map (fun (c : Syndic.Atom.category) ->
+
match c.label with
+
| Some lbl -> lbl
+
| None -> c.term
+
) cats in
+
if tag_list = [] then None else Some tag_list
+
in
+
+
(* Create JSONFeed item *)
+
Jsonfeed.Item.create
+
~id
+
~content
+
?title
+
?url
+
?summary
+
?authors
+
?tags
+
~date_published:entry.updated
+
()
+
+
(** Export entries as JSONFeed *)
+
let export_jsonfeed ~title entries =
+
let items = List.map atom_entry_to_jsonfeed_item entries in
+
let feed = Jsonfeed.create ~title ~items () in
+
match Jsonfeed.to_string ~minify:false feed with
+
| Ok json -> Ok json
+
| Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err))
+
end
+
+
let create env ~app_name =
+
let xdg = Xdge.create env#fs app_name in
+
let state = { xdg } in
+
Paths.ensure_directories state;
+
state
+
+
let create_user state user =
+
match Storage.load_user state (User.username user) with
+
| Some _ ->
+
Error (Printf.sprintf "User %s already exists" (User.username user))
+
| None ->
+
Storage.save_user state user;
+
Log.info (fun m -> m "User %s created" (User.username user));
+
Ok ()
+
+
let delete_user state ~username =
+
match Storage.load_user state username with
+
| None ->
+
Error (Printf.sprintf "User %s not found" username)
+
| Some _ ->
+
Storage.delete_user state username;
+
Log.info (fun m -> m "User %s deleted" username);
+
Ok ()
+
+
let get_user state ~username =
+
Storage.load_user state username
+
+
let update_user state user =
+
match Storage.load_user state (User.username user) with
+
| None ->
+
Error (Printf.sprintf "User %s not found" (User.username user))
+
| Some _ ->
+
Storage.save_user state user;
+
Log.info (fun m -> m "User %s updated" (User.username user));
+
Ok ()
+
+
let list_users state =
+
Storage.list_users state
+
+
let sync_user env state ~username =
+
Session.with_session env @@ fun session ->
+
Sync.sync_user session state ~username
+
+
let sync_all env state =
+
let users = Storage.list_users state in
+
if users = [] then begin
+
Log.info (fun m -> m "No users to sync");
+
Ok (0, 0)
+
end else begin
+
Log.info (fun m -> m "Syncing %d users concurrently..." (List.length users));
+
+
Session.with_session env @@ fun session ->
+
let results =
+
Eio.Fiber.List.map (fun username ->
+
match Sync.sync_user session state ~username with
+
| Ok () -> true
+
| Error err ->
+
Log.err (fun m -> m "Failed to sync user %s: %s" username err);
+
false
+
) users
+
in
+
let success_count = List.length (List.filter (fun x -> x) results) in
+
let fail_count = List.length users - success_count in
+
+
if fail_count = 0 then
+
Log.info (fun m -> m "All users synced successfully");
+
+
Ok (success_count, fail_count)
+
end
+
+
let get_user_posts state ~username ?limit () =
+
let entries = Storage.load_existing_posts state username in
+
match limit with
+
| None -> entries
+
| Some n -> List.filteri (fun i _ -> i < n) entries
+
+
let get_all_posts state ?limit () =
+
let users = Storage.list_users state in
+
+
(* Collect all entries from all users with username tag *)
+
let all_entries =
+
List.concat_map (fun username ->
+
let entries = Storage.load_existing_posts state username in
+
List.map (fun entry -> (username, entry)) entries
+
) users
+
in
+
+
(* Sort by date (newest first) *)
+
let sorted = List.sort (fun (_, a : string * Syndic.Atom.entry) (_, b) ->
+
Ptime.compare b.updated a.updated
+
) all_entries in
+
+
match limit with
+
| None -> sorted
+
| Some n -> List.filteri (fun i _ -> i < n) sorted
+
+
let export_merged_feed state ~title ~format ?limit () =
+
let all_posts = get_all_posts state ?limit () in
+
let entries = List.map snd all_posts in
+
+
match format with
+
| `Atom ->
+
let xml = Format.Atom.to_string (Format.Atom.feed_of_entries ~title entries) in
+
Ok xml
+
| `Jsonfeed ->
+
if entries = [] then
+
(* Empty JSONFeed *)
+
let feed = Jsonfeed.create ~title ~items:[] () in
+
match Jsonfeed.to_string ~minify:false feed with
+
| Ok json -> Ok json
+
| Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err))
+
else
+
Export.export_jsonfeed ~title entries
+
+
let analyze_user_quality state ~username =
+
match Storage.load_user state username with
+
| None ->
+
Error (Printf.sprintf "User %s not found" username)
+
| Some _ ->
+
let entries = Storage.load_existing_posts state username in
+
if entries = [] then
+
Error "No entries to analyze"
+
else
+
Ok (Quality.analyze entries)
+
end
+376 -107
stack/river/lib/river.mli
···
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)
-
(** River RSS/Atom feed aggregator *)
-
(** {1 Session Management} *)
-
type session
-
(** An abstract River session for fetching feeds.
-
The session manages HTTP connections and is tied to an Eio switch
-
for proper resource cleanup. *)
-
val init :
-
sw:Eio.Switch.t ->
-
< clock : float Eio.Time.clock_ty Eio.Resource.t;
-
fs : Eio.Fs.dir_ty Eio.Path.t;
-
net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
-
session
-
(** [init ~sw env] creates a new River session.
-
The session is configured with appropriate defaults for fetching feeds:
-
- User-Agent: "OCaml-River/1.0"
-
- Automatic redirect following (max 5 redirects)
-
- TLS verification enabled
-
@param sw The switch for resource management
-
@param env The Eio environment *)
-
val with_session :
-
< clock : float Eio.Time.clock_ty Eio.Resource.t;
-
fs : Eio.Fs.dir_ty Eio.Path.t;
-
net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
-
(session -> 'a) -> 'a
-
(** [with_session env f] creates a session and automatically manages its lifecycle.
-
This is the recommended way to use River as it ensures proper cleanup.
-
@param env The Eio environment
-
@param f The function to run with the session *)
-
(** {1 Feed Sources and Fetching} *)
-
type source = { name : string; url : string }
-
(** The source of a feed. *)
-
type feed
-
(** An Atom, RSS2, or JSON Feed. *)
-
type post
-
(** A post from a feed. *)
-
val fetch : session -> source -> feed
-
(** [fetch session source] returns an Atom or RSS feed from a source.
-
@param session The River session
-
@param source The feed source to fetch *)
-
val name : feed -> string
-
(** [name feed] is the name of the feed source passed to [fetch]. *)
-
val url : feed -> string
-
(** [url feed] is the url of the feed source passed to [fetch]. *)
-
val posts : feed list -> post list
-
(** [posts feeds] is the list of deduplicated posts of the given feeds. *)
-
val feed : post -> feed
-
(** [feed post] is the feed the post originates from. *)
-
val title : post -> string
-
(** [title post] is the title of the post. *)
-
val link : post -> Uri.t option
-
(** [link post] is the link of the post. *)
-
val date : post -> Syndic.Date.t option
-
(** [date post] is the date of the post. *)
-
val author : post -> string
-
(** [author post] is the author of the post. *)
-
val email : post -> string
-
(** [email post] is the email of the post. *)
-
val content : post -> string
-
(** [content post] is the content of the post. *)
-
val id : post -> string
-
(** [id post] is the unique identifier of the post. *)
-
val tags : post -> string list
-
(** [tags post] is the list of tags associated with the post. *)
-
val summary : post -> string option
-
(** [summary post] is the summary/excerpt of the post, if available. *)
-
val meta_description : post -> string option
-
(** [meta_description post] is the meta description of the post on the origin
-
site.
-
To get the meta description, we make get the content of [link post] and look
-
for an HTML meta tag with the name "description" or "og:description".*)
-
val seo_image : post -> string option
-
(** [seo_image post] is the image to be used by social networks and links to the
-
post.
-
To get the seo image, we make get the content of [link post] and look for an
-
HTML meta tag with the name "og:image" or "twitter:image". *)
-
val create_atom_entries : post list -> Syndic.Atom.entry list
-
(** [create_atom_feed posts] creates a list of atom entries, which can then be
-
used to create an atom feed that is an aggregate of the posts. *)
-
(** {1 JSON Feed Support} *)
-
val create_jsonfeed_items : post list -> Jsonfeed.Item.t list
-
(** [create_jsonfeed_items posts] creates a list of JSONFeed items from posts. *)
-
val create_jsonfeed :
-
title:string ->
-
?home_page_url:string ->
-
?feed_url:string ->
-
?description:string ->
-
?icon:string ->
-
?favicon:string ->
-
post list ->
-
Jsonfeed.t
-
(** [create_jsonfeed ~title ?home_page_url ?feed_url ?description posts]
-
creates a complete JSONFeed from the given posts.
-
@param title The feed title (required)
-
@param home_page_url The URL of the website the feed represents
-
@param feed_url The URL of the feed itself
-
@param description A description of the feed
-
@param icon URL of an icon for the feed (512x512 recommended)
-
@param favicon URL of a favicon for the feed (64x64 recommended)
-
@param posts The posts to include in the feed *)
-
val jsonfeed_to_string : ?minify:bool -> Jsonfeed.t -> (string, string) result
-
(** [jsonfeed_to_string ?minify jsonfeed] serializes a JSONFeed to a string.
-
@param minify If true, output compact JSON; if false, pretty-print (default: false) *)
-
type feed_content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel | JSONFeed of Jsonfeed.t
-
(** The native format of a feed. *)
-
val feed_content : feed -> feed_content
-
(** [feed_content feed] returns the feed in its native format (Atom, RSS2, or JSONFeed).
-
This allows access to format-specific features like JSONFeed attachments. *)
···
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)
+
(** River RSS/Atom/JSONFeed aggregator library *)
+
+
(** {1 Feed Sources} *)
+
+
module Source : sig
+
type t
+
(** A feed source with name and URL. *)
+
+
val make : name:string -> url:string -> t
+
(** [make ~name ~url] creates a new feed source. *)
+
+
val name : t -> string
+
(** [name source] returns the feed name/label. *)
+
+
val url : t -> string
+
(** [url source] returns the feed URL. *)
+
+
val jsont : t Jsont.t
+
(** JSON codec for sources. *)
+
end
+
+
(** {1 HTTP Session Management} *)
+
+
module Session : sig
+
type t
+
(** An abstract HTTP session for fetching feeds.
+
+
The session manages HTTP connections and is tied to an Eio switch
+
for proper resource cleanup. *)
+
+
val init :
+
sw:Eio.Switch.t ->
+
< clock : float Eio.Time.clock_ty Eio.Resource.t;
+
fs : Eio.Fs.dir_ty Eio.Path.t;
+
net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
+
t
+
(** [init ~sw env] creates a new HTTP session.
+
+
The session is configured with appropriate defaults for fetching feeds:
+
- User-Agent: "OCaml-River/1.0"
+
- Automatic redirect following (max 5 redirects)
+
- TLS verification enabled
+
+
@param sw The switch for resource management
+
@param env The Eio environment *)
+
+
val with_session :
+
< clock : float Eio.Time.clock_ty Eio.Resource.t;
+
fs : Eio.Fs.dir_ty Eio.Path.t;
+
net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
+
(t -> 'a) -> 'a
+
(** [with_session env f] creates a session and automatically manages its lifecycle.
+
+
This is the recommended way to use River as it ensures proper cleanup.
+
+
@param env The Eio environment
+
@param f The function to run with the session *)
+
end
+
+
(** {1 Feeds and Posts} *)
+
+
module Feed : sig
+
type t
+
(** An Atom, RSS2, or JSON Feed. *)
+
+
val fetch : Session.t -> Source.t -> t
+
(** [fetch session source] fetches and parses a feed from the given source.
+
+
@param session The HTTP session
+
@param source The feed source to fetch
+
@raise Failure if the feed cannot be fetched or parsed *)
+
+
val source : t -> Source.t
+
(** [source feed] returns the source this feed was fetched from. *)
+
end
+
+
module Post : sig
+
type t
+
(** A post from a feed. *)
+
+
val of_feeds : Feed.t list -> t list
+
(** [of_feeds feeds] extracts and deduplicates posts from the given feeds.
+
+
Posts are deduplicated by ID. *)
+
+
val feed : t -> Feed.t
+
(** [feed post] returns the feed this post originated from. *)
+
+
val title : t -> string
+
(** [title post] returns the post title. *)
+
+
val link : t -> Uri.t option
+
(** [link post] returns the post link. *)
+
+
val date : t -> Syndic.Date.t option
+
(** [date post] returns the post date. *)
+
+
val author : t -> string
+
(** [author post] returns the post author name. *)
+
+
val email : t -> string
+
(** [email post] returns the post author email. *)
+
+
val content : t -> string
+
(** [content post] returns the post content. *)
+
+
val id : t -> string
+
(** [id post] returns the unique identifier of the post. *)
+
+
val tags : t -> string list
+
(** [tags post] returns the list of tags associated with the post. *)
+
+
val summary : t -> string option
+
(** [summary post] returns the summary/excerpt of the post, if available. *)
+
+
val meta_description : t -> string option
+
(** [meta_description post] returns the meta description from the origin site.
+
+
To get the meta description, we fetch the content of [link post] and look
+
for an HTML meta tag with name "description" or "og:description". *)
+
val seo_image : t -> string option
+
(** [seo_image post] returns the social media image URL.
+
+
To get the SEO image, we fetch the content of [link post] and look for an
+
HTML meta tag with name "og:image" or "twitter:image". *)
+
end
+
+
(** {1 Format Conversion and Export} *)
+
+
module Format : sig
+
(** Feed format conversion and export. *)
+
+
module Atom : sig
+
(** Atom 1.0 format support. *)
+
+
val entry_of_post : Post.t -> Syndic.Atom.entry
+
(** [entry_of_post post] converts a post to an Atom entry. *)
+
+
val entries_of_posts : Post.t list -> Syndic.Atom.entry list
+
(** [entries_of_posts posts] converts posts to Atom entries. *)
+
+
val feed_of_entries :
+
title:string ->
+
?id:string ->
+
?authors:(string * string option) list ->
+
Syndic.Atom.entry list ->
+
Syndic.Atom.feed
+
(** [feed_of_entries ~title entries] creates an Atom feed from entries.
+
+
@param title The feed title
+
@param id Optional feed ID (default: "urn:river:merged")
+
@param authors Optional list of (name, email) tuples *)
+
+
val to_string : Syndic.Atom.feed -> string
+
(** [to_string feed] serializes an Atom feed to XML string. *)
+
end
+
+
module Rss2 : sig
+
(** RSS 2.0 format support. *)
+
+
val of_feed : Feed.t -> Syndic.Rss2.channel option
+
(** [of_feed feed] extracts RSS2 channel if the feed is RSS2 format.
+
+
Returns None if the feed is not RSS2. *)
+
end
+
+
module Jsonfeed : sig
+
(** JSON Feed 1.1 format support. *)
+
+
val item_of_post : Post.t -> Jsonfeed.Item.t
+
(** [item_of_post post] converts a post to a JSONFeed item. *)
+
+
val items_of_posts : Post.t list -> Jsonfeed.Item.t list
+
(** [items_of_posts posts] converts posts to JSONFeed items. *)
+
+
val feed_of_items :
+
title:string ->
+
?home_page_url:string ->
+
?feed_url:string ->
+
?description:string ->
+
?icon:string ->
+
?favicon:string ->
+
Jsonfeed.Item.t list ->
+
Jsonfeed.t
+
(** [feed_of_items ~title items] creates a JSONFeed from items.
+
+
@param title The feed title (required)
+
@param home_page_url The URL of the website the feed represents
+
@param feed_url The URL of the feed itself
+
@param description A description of the feed
+
@param icon URL of an icon for the feed (512x512 recommended)
+
@param favicon URL of a favicon for the feed (64x64 recommended) *)
+
+
val feed_of_posts :
+
title:string ->
+
?home_page_url:string ->
+
?feed_url:string ->
+
?description:string ->
+
?icon:string ->
+
?favicon:string ->
+
Post.t list ->
+
Jsonfeed.t
+
(** [feed_of_posts ~title posts] creates a JSONFeed from posts.
+
+
Convenience function that combines [items_of_posts] and [feed_of_items]. *)
+
+
val to_string : ?minify:bool -> Jsonfeed.t -> (string, string) result
+
(** [to_string ?minify feed] serializes a JSONFeed to JSON string.
+
+
@param minify If true, output compact JSON; if false, pretty-print (default: false) *)
+
+
val of_feed : Feed.t -> Jsonfeed.t option
+
(** [of_feed feed] extracts JSONFeed if the feed is JSONFeed format.
+
+
Returns None if the feed is not JSONFeed. *)
+
end
+
end
+
+
(** {1 User Management} *)
+
+
module User : sig
+
type t
+
(** User configuration with feed subscriptions. *)
+
+
val make :
+
username:string ->
+
fullname:string ->
+
?email:string ->
+
?feeds:Source.t list ->
+
?last_synced:string ->
+
unit ->
+
t
+
(** [make ~username ~fullname ()] creates a new user.
+
+
@param username Unique username identifier
+
@param fullname User's display name
+
@param email Optional email address
+
@param feeds Optional list of feed sources (default: [])
+
@param last_synced Optional ISO 8601 timestamp of last sync *)
+
+
val username : t -> string
+
(** [username user] returns the username. *)
+
val fullname : t -> string
+
(** [fullname user] returns the full name. *)
+
val email : t -> string option
+
(** [email user] returns the email address if set. *)
+
val feeds : t -> Source.t list
+
(** [feeds user] returns the list of subscribed feeds. *)
+
val last_synced : t -> string option
+
(** [last_synced user] returns the last sync timestamp if set. *)
+
val add_feed : t -> Source.t -> t
+
(** [add_feed user source] returns a new user with the feed added. *)
+
val remove_feed : t -> url:string -> t
+
(** [remove_feed user ~url] returns a new user with the feed removed by URL. *)
+
val set_last_synced : t -> string -> t
+
(** [set_last_synced user timestamp] returns a new user with updated sync time. *)
+
val jsont : t Jsont.t
+
(** JSON codec for users. *)
+
end
+
(** {1 Feed Quality Analysis} *)
+
module Quality : sig
+
type t
+
(** Quality metrics for a feed or user's aggregated feed. *)
+
val make :
+
total_entries:int ->
+
entries_with_summary:int ->
+
entries_with_author:int ->
+
entries_with_date:int ->
+
entries_with_content:int ->
+
entries_with_tags:int ->
+
avg_content_length:float ->
+
min_content_length:int ->
+
max_content_length:int ->
+
posting_frequency_days:float option ->
+
quality_score:float ->
+
t
+
(** [make ~total_entries ...] creates quality metrics. *)
+
val total_entries : t -> int
+
val entries_with_summary : t -> int
+
val entries_with_author : t -> int
+
val entries_with_date : t -> int
+
val entries_with_content : t -> int
+
val entries_with_tags : t -> int
+
val avg_content_length : t -> float
+
val min_content_length : t -> int
+
val max_content_length : t -> int
+
val posting_frequency_days : t -> float option
+
val quality_score : t -> float
+
(** Accessors for quality metrics. *)
+
val analyze : Syndic.Atom.entry list -> t
+
(** [analyze entries] computes quality metrics from Atom entries.
+
The quality score is a weighted average of:
+
- Content completeness (40%)
+
- Metadata completeness (30%)
+
- Content richness (30%) *)
+
end
+
(** {1 State Management} *)
+
module State : sig
+
type t
+
(** State handle for managing user data and feeds on disk. *)
+
val create :
+
< fs : Eio.Fs.dir_ty Eio.Path.t; .. > ->
+
app_name:string ->
+
t
+
(** [create env ~app_name] creates a state handle using XDG directories.
+
Data is stored in:
+
- Users: $XDG_STATE_HOME/[app_name]/users/
+
- Feeds: $XDG_STATE_HOME/[app_name]/feeds/user/
+
@param env The Eio environment with filesystem access
+
@param app_name Application name for XDG paths *)
+
(** {2 User Operations} *)
+
val create_user : t -> User.t -> (unit, string) result
+
(** [create_user state user] creates a new user.
+
Returns [Error] if the user already exists. *)
+
val delete_user : t -> username:string -> (unit, string) result
+
(** [delete_user state ~username] deletes a user and their feed data. *)
+
val get_user : t -> username:string -> User.t option
+
(** [get_user state ~username] retrieves a user by username. *)
+
val update_user : t -> User.t -> (unit, string) result
+
(** [update_user state user] saves updated user configuration. *)
+
val list_users : t -> string list
+
(** [list_users state] returns all usernames. *)
+
(** {2 Feed Operations} *)
+
val sync_user :
+
< clock : float Eio.Time.clock_ty Eio.Resource.t;
+
fs : Eio.Fs.dir_ty Eio.Path.t;
+
net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
+
t ->
+
username:string ->
+
(unit, string) result
+
(** [sync_user env state ~username] fetches all feeds for the user and stores merged result.
+
Posts are fetched concurrently and merged with existing posts.
+
The result is stored as an Atom feed. *)
+
val sync_all :
+
< clock : float Eio.Time.clock_ty Eio.Resource.t;
+
fs : Eio.Fs.dir_ty Eio.Path.t;
+
net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
+
t ->
+
(int * int, string) result
+
(** [sync_all env state] syncs all users concurrently.
+
Returns [Ok (success_count, fail_count)]. *)
+
val get_user_posts :
+
t ->
+
username:string ->
+
?limit:int ->
+
unit ->
+
Syndic.Atom.entry list
+
(** [get_user_posts state ~username ()] retrieves stored posts for a user.
+
@param limit Optional maximum number of posts to return *)
+
val get_all_posts :
+
t ->
+
?limit:int ->
+
unit ->
+
(string * Syndic.Atom.entry) list
+
(** [get_all_posts state ()] retrieves posts from all users, sorted by date.
+
Returns list of (username, entry) tuples.
+
@param limit Optional maximum number of posts to return *)
+
(** {2 Export} *)
+
val export_merged_feed :
+
t ->
+
title:string ->
+
format:[ `Atom | `Jsonfeed ] ->
+
?limit:int ->
+
unit ->
+
(string, string) result
+
(** [export_merged_feed state ~title ~format ()] exports a merged feed of all users.
+
@param title Feed title
+
@param format Output format
+
@param limit Optional maximum number of entries *)
+
(** {2 Analysis} *)
+
val analyze_user_quality :
+
t ->
+
username:string ->
+
(Quality.t, string) result
+
(** [analyze_user_quality state ~username] analyzes quality metrics for a user's feed. *)
+
end
-656
stack/river/lib/river_store.ml
···
-
(*
-
* Persistent storage for Atom feed entries using Cacheio and Jsonfeed
-
*)
-
-
let src = Logs.Src.create "river.store" ~doc:"River persistent storage"
-
module Log = (val Logs.src_log src : Logs.LOG)
-
-
(* Types *)
-
-
(* Storage metadata that extends Jsonfeed.Item via unknown fields *)
-
type storage_meta = {
-
feed_url : string;
-
feed_name : string;
-
feed_title : string;
-
stored_at : Ptime.t;
-
}
-
-
(* A stored entry is a Jsonfeed.Item.t with storage metadata in unknown fields *)
-
type stored_entry = {
-
item : Jsonfeed.Item.t;
-
meta : storage_meta;
-
}
-
-
(* Stored entry accessors *)
-
let entry_item entry = entry.item
-
let entry_feed_url entry = entry.meta.feed_url
-
let entry_feed_name entry = entry.meta.feed_name
-
let entry_feed_title entry = entry.meta.feed_title
-
let entry_stored_at entry = entry.meta.stored_at
-
-
type feed_info = {
-
url : string;
-
name : string;
-
title : string;
-
last_updated : Ptime.t;
-
entry_count : int;
-
}
-
-
type t = {
-
cache : Cacheio.t;
-
base_dir : Eio.Fs.dir_ty Eio.Path.t;
-
}
-
-
(* Helper functions *)
-
-
let make_feed_key feed_url =
-
(* Use SHA256 hash of feed URL as directory name for safety *)
-
let hash = Digest.string feed_url |> Digest.to_hex in
-
"feeds/" ^ hash
-
-
let make_entry_key feed_url atom_id =
-
(* Store entry under feed directory with atom_id hash *)
-
let feed_key = make_feed_key feed_url in
-
let entry_hash = Digest.string atom_id |> Digest.to_hex in
-
feed_key ^ "/entries/" ^ entry_hash
-
-
let make_feed_meta_key feed_url =
-
let feed_key = make_feed_key feed_url in
-
feed_key ^ "/meta.json"
-
-
(* JSON serialization using Jsonfeed and Jsont *)
-
-
(* Storage metadata codec - stores feed info and storage timestamp *)
-
let storage_meta_jsont : storage_meta Jsont.t =
-
Jsont.Object.(
-
map ~kind:"StorageMeta" (fun feed_url feed_name feed_title stored_at : storage_meta ->
-
{ feed_url; feed_name; feed_title; stored_at })
-
|> mem "x_river_feed_url" Jsont.string ~enc:(fun m -> m.feed_url)
-
|> mem "x_river_feed_name" Jsont.string ~enc:(fun m -> m.feed_name)
-
|> mem "x_river_feed_title" Jsont.string ~enc:(fun m -> m.feed_title)
-
|> mem "x_river_stored_at" Jsonfeed.Rfc3339.jsont ~enc:(fun m -> m.stored_at)
-
|> finish
-
)
-
-
(* Codec for feed_info *)
-
let feed_meta_jsont : feed_info Jsont.t =
-
Jsont.Object.(
-
map ~kind:"FeedInfo" (fun url name title last_updated entry_count : feed_info ->
-
{ url; name; title; last_updated; entry_count })
-
|> mem "url" Jsont.string ~enc:(fun (m : feed_info) -> m.url)
-
|> mem "name" Jsont.string ~enc:(fun m -> m.name)
-
|> mem "title" Jsont.string ~enc:(fun m -> m.title)
-
|> mem "last_updated" Jsonfeed.Rfc3339.jsont ~enc:(fun m -> m.last_updated)
-
|> mem "entry_count" Jsont.int ~enc:(fun m -> m.entry_count)
-
|> finish
-
)
-
-
(* Helper to create item with storage metadata in unknown fields *)
-
let merge_storage_meta item meta =
-
let meta_json = Jsont_bytesrw.encode_string' storage_meta_jsont meta
-
|> Result.get_ok in
-
let meta_unknown = Jsont_bytesrw.decode_string' Jsont.json meta_json
-
|> Result.get_ok in
-
Jsonfeed.Item.create
-
~id:(Jsonfeed.Item.id item)
-
~content:(Jsonfeed.Item.content item)
-
?url:(Jsonfeed.Item.url item)
-
?external_url:(Jsonfeed.Item.external_url item)
-
?title:(Jsonfeed.Item.title item)
-
?summary:(Jsonfeed.Item.summary item)
-
?image:(Jsonfeed.Item.image item)
-
?banner_image:(Jsonfeed.Item.banner_image item)
-
?date_published:(Jsonfeed.Item.date_published item)
-
?date_modified:(Jsonfeed.Item.date_modified item)
-
?authors:(Jsonfeed.Item.authors item)
-
?tags:(Jsonfeed.Item.tags item)
-
?language:(Jsonfeed.Item.language item)
-
?attachments:(Jsonfeed.Item.attachments item)
-
?references:(Jsonfeed.Item.references item)
-
~unknown:meta_unknown
-
()
-
-
(* Helper to extract storage metadata from item's unknown fields *)
-
let extract_storage_meta item =
-
let unknown = Jsonfeed.Item.unknown item in
-
let meta_str = Jsont_bytesrw.encode_string' Jsont.json unknown |> Result.get_ok in
-
match Jsont_bytesrw.decode_string' storage_meta_jsont meta_str with
-
| Ok meta -> meta
-
| Error e -> failwith ("Missing storage metadata: " ^ Jsont.Error.to_string e)
-
-
(* Stored entry codec - just wraps Jsonfeed.Item.jsont *)
-
let stored_entry_jsont : stored_entry Jsont.t =
-
let kind = "StoredEntry" in
-
let of_string s =
-
match Jsont_bytesrw.decode_string' Jsonfeed.Item.jsont s with
-
| Ok item -> Ok { item; meta = extract_storage_meta item }
-
| Error e -> Error (Jsont.Error.to_string e)
-
in
-
let enc entry =
-
let item_with_meta = merge_storage_meta entry.item entry.meta in
-
match Jsont_bytesrw.encode_string' Jsonfeed.Item.jsont item_with_meta with
-
| Ok s -> s
-
| Error e -> failwith ("Failed to encode: " ^ Jsont.Error.to_string e)
-
in
-
Jsont.of_of_string ~kind of_string ~enc
-
-
(* Encode/decode functions *)
-
let entry_to_string entry =
-
match Jsont_bytesrw.encode_string' stored_entry_jsont entry with
-
| Ok s -> s
-
| Error err -> failwith ("Failed to encode entry: " ^ Jsont.Error.to_string err)
-
-
let entry_of_string s =
-
match Jsont_bytesrw.decode_string' stored_entry_jsont s with
-
| Ok entry -> entry
-
| Error err -> failwith ("Failed to parse entry: " ^ Jsont.Error.to_string err)
-
-
let feed_meta_to_string meta =
-
match Jsont_bytesrw.encode_string' feed_meta_jsont meta with
-
| Ok s -> s
-
| Error err -> failwith ("Failed to encode feed metadata: " ^ Jsont.Error.to_string err)
-
-
let feed_meta_of_string s =
-
match Jsont_bytesrw.decode_string' feed_meta_jsont s with
-
| Ok meta -> meta
-
| Error err -> failwith ("Failed to parse feed metadata: " ^ Jsont.Error.to_string err)
-
-
(* Store creation *)
-
-
let create ~base_dir =
-
let cache_dir = Eio.Path.(base_dir / "river_store") in
-
(try
-
Eio.Path.mkdir ~perm:0o755 cache_dir
-
with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ());
-
let cache = Cacheio.create ~base_dir:cache_dir in
-
Log.info (fun m -> m "Created River store at %a" Eio.Path.pp cache_dir);
-
{ cache; base_dir = cache_dir }
-
-
let create_with_xdge xdge =
-
let cache = Cacheio.create_with_xdge xdge in
-
let base_dir = Eio.Path.( / ) (Xdge.cache_dir xdge) "river_store" in
-
Log.info (fun m -> m "Created River store with XDG at %a" Eio.Path.pp base_dir);
-
{ cache; base_dir }
-
-
(* Convert Post.t to Jsonfeed.Item.t *)
-
let item_of_post ~feed_url ~feed_name ~feed_title (post : Post.t) =
-
let content =
-
let html = Soup.to_string post.content in
-
`Html html
-
in
-
let url = Option.map Uri.to_string post.link in
-
let authors =
-
if post.author = "" then None
-
else Some [Jsonfeed.Author.create ~name:post.author ()]
-
in
-
let tags = if post.tags = [] then None else Some post.tags in
-
let item = Jsonfeed.Item.create
-
~id:post.id
-
~content
-
?url
-
?title:(if post.title = "" then None else Some post.title)
-
?summary:post.summary
-
?date_published:post.date
-
?date_modified:post.date
-
?authors
-
?tags
-
()
-
in
-
let meta = {
-
feed_url;
-
feed_name;
-
feed_title;
-
stored_at = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get;
-
} in
-
{ item; meta }
-
-
(* Convert Syndic.Atom.entry to Jsonfeed.Item.t *)
-
let item_of_atom ~feed_url ~feed_name ~feed_title (atom_entry : Syndic.Atom.entry) =
-
let atom_id = Uri.to_string atom_entry.id in
-
let date_modified = atom_entry.updated in
-
let date_published = match atom_entry.published with
-
| Some p -> Some p
-
| None -> Some atom_entry.updated
-
in
-
(* Extract content *)
-
let content_html = match atom_entry.content with
-
| Some (Syndic.Atom.Text s) -> Some s
-
| Some (Syndic.Atom.Html (_, s)) -> Some s
-
| Some (Syndic.Atom.Xhtml (_, nodes)) ->
-
let ns_prefix _ = Some "" in
-
Some (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) nodes))
-
| Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> None
-
in
-
let content_text = match atom_entry.summary with
-
| Some s -> Some (Util.string_of_text_construct s)
-
| None -> None
-
in
-
let content = match content_html, content_text with
-
| Some h, Some t -> `Both (h, t)
-
| Some h, None -> `Html h
-
| None, Some t -> `Text t
-
| None, None -> `Text "" (* Fallback *)
-
in
-
let url = try
-
Some (Uri.to_string (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) atom_entry.links).href)
-
with Not_found ->
-
match atom_entry.links with
-
| l :: _ -> Some (Uri.to_string l.href)
-
| [] -> None
-
in
-
let tags =
-
let cat_tags = List.map (fun cat -> cat.Syndic.Atom.term) atom_entry.categories in
-
if cat_tags = [] then None else Some cat_tags
-
in
-
let summary = match atom_entry.summary with
-
| Some s -> Some (Util.string_of_text_construct s)
-
| None -> None
-
in
-
let item = Jsonfeed.Item.create
-
~id:atom_id
-
~content
-
?url
-
~title:(Util.string_of_text_construct atom_entry.title)
-
?summary
-
?date_published
-
~date_modified
-
?tags
-
()
-
in
-
let meta = {
-
feed_url;
-
feed_name;
-
feed_title;
-
stored_at = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get;
-
} in
-
{ item; meta }
-
-
(* Feed metadata management *)
-
let update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw:_ =
-
let key = make_feed_meta_key feed_url in
-
let meta = {
-
url = feed_url;
-
name = feed_name;
-
title = feed_title;
-
last_updated = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get;
-
entry_count = 0;
-
} in
-
let json_str = feed_meta_to_string meta in
-
let source = Eio.Flow.string_source json_str in
-
Cacheio.put store.cache ~key ~source ~ttl:None ();
-
Log.debug (fun m -> m "Updated feed metadata for %s" feed_url)
-
-
let get_feed_meta store ~feed_url ~sw =
-
let key = make_feed_meta_key feed_url in
-
match Cacheio.get store.cache ~key ~sw with
-
| None -> None
-
| Some source ->
-
try
-
let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
-
Some (feed_meta_of_string json_str)
-
with e ->
-
Log.err (fun m -> m "Failed to parse feed metadata: %s" (Printexc.to_string e));
-
None
-
-
(* Entry storage *)
-
-
let store_entry store ~feed_url ~feed_name ~feed_title ~post ~sw =
-
let entry = item_of_post ~feed_url ~feed_name ~feed_title post in
-
let key = make_entry_key feed_url (Jsonfeed.Item.id entry.item) in
-
let json_str = entry_to_string entry in
-
let source = Eio.Flow.string_source json_str in
-
Cacheio.put store.cache ~key ~source ~ttl:None ();
-
Log.debug (fun m -> m "Stored entry %s for feed %s" (Jsonfeed.Item.id entry.item) feed_url);
-
(* Update feed metadata *)
-
update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw
-
-
let store_posts store ~feed_url ~feed_name ~feed_title ~posts ~sw =
-
Log.info (fun m -> m "Storing %d posts for feed %s" (List.length posts) feed_url);
-
List.iter (fun post ->
-
store_entry store ~feed_url ~feed_name ~feed_title ~post ~sw
-
) posts;
-
Log.info (fun m -> m "Stored %d entries for feed %s" (List.length posts) feed_url)
-
-
let store_atom_entries store ~feed_url ~feed_name ~feed_title ~entries ~sw =
-
Log.info (fun m -> m "Storing %d Atom entries for feed %s" (List.length entries) feed_url);
-
List.iter (fun atom_entry ->
-
let entry = item_of_atom ~feed_url ~feed_name ~feed_title atom_entry in
-
let key = make_entry_key feed_url (Jsonfeed.Item.id entry.item) in
-
let json_str = entry_to_string entry in
-
let source = Eio.Flow.string_source json_str in
-
Cacheio.put store.cache ~key ~source ~ttl:None ();
-
Log.debug (fun m -> m "Stored Atom entry %s" (Jsonfeed.Item.id entry.item));
-
) entries;
-
update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw;
-
Log.info (fun m -> m "Stored %d Atom entries for feed %s" (List.length entries) feed_url)
-
-
(* Entry retrieval *)
-
-
let get_entry store ~feed_url ~atom_id ~sw =
-
let key = make_entry_key feed_url atom_id in
-
match Cacheio.get store.cache ~key ~sw with
-
| None -> None
-
| Some source ->
-
try
-
let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
-
Some (entry_of_string json_str)
-
with e ->
-
Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e));
-
None
-
-
let list_entries store ~feed_url =
-
let feed_key = make_feed_key feed_url in
-
let prefix = feed_key ^ "/entries/" in
-
let entries = Cacheio.scan store.cache in
-
let feed_entries = List.filter_map (fun (cache_entry : Cacheio.Entry.t) ->
-
let key = Cacheio.Entry.key cache_entry in
-
if String.starts_with ~prefix key then
-
Eio.Switch.run @@ fun sw ->
-
match Cacheio.get store.cache ~key ~sw with
-
| None -> None
-
| Some source ->
-
try
-
let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
-
Some (entry_of_string json_str)
-
with e ->
-
Log.err (fun m -> m "Failed to parse entry from scan: %s" (Printexc.to_string e));
-
None
-
else None
-
) entries in
-
(* Sort by date_modified, newest first *)
-
List.sort (fun a b ->
-
let time_a = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in
-
let time_b = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in
-
Ptime.compare time_b time_a
-
) feed_entries
-
-
let list_entries_filtered store ~feed_url ?since ?until ?limit ?(sort=`Updated) () =
-
let entries = list_entries store ~feed_url in
-
(* Filter by time *)
-
let entries = match since with
-
| None -> entries
-
| Some t -> List.filter (fun e ->
-
let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in
-
Ptime.is_later time ~than:t || Ptime.equal time t) entries
-
in
-
let entries = match until with
-
| None -> entries
-
| Some t -> List.filter (fun e ->
-
let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in
-
Ptime.is_earlier time ~than:t || Ptime.equal time t) entries
-
in
-
(* Sort *)
-
let entries = match sort with
-
| `Published -> List.sort (fun a b ->
-
let pa = Jsonfeed.Item.date_published a.item in
-
let pb = Jsonfeed.Item.date_published b.item in
-
match pa, pb with
-
| Some ta, Some tb -> Ptime.compare tb ta
-
| None, Some _ -> 1
-
| Some _, None -> -1
-
| None, None ->
-
let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in
-
let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in
-
Ptime.compare tb ta
-
) entries
-
| `Updated -> List.sort (fun a b ->
-
let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in
-
let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in
-
Ptime.compare tb ta
-
) entries
-
| `Stored -> List.sort (fun a b -> Ptime.compare b.meta.stored_at a.meta.stored_at) entries
-
in
-
(* Limit *)
-
match limit with
-
| None -> entries
-
| Some n -> List.filteri (fun i _ -> i < n) entries
-
-
let exists_entry store ~feed_url ~atom_id =
-
let key = make_entry_key feed_url atom_id in
-
Cacheio.exists store.cache ~key
-
-
let get_recent_entries store ?(limit=50) () =
-
let entries = Cacheio.scan store.cache in
-
let all_entries = List.filter_map (fun (cache_entry : Cacheio.Entry.t) ->
-
let key = Cacheio.Entry.key cache_entry in
-
if String.contains key '/' &&
-
String.ends_with ~suffix:"entries/" (String.sub key 0 (String.rindex key '/') ^ "/") then
-
Eio.Switch.run @@ fun sw ->
-
match Cacheio.get store.cache ~key ~sw with
-
| None -> None
-
| Some source ->
-
try
-
let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
-
Some (entry_of_string json_str)
-
with e ->
-
Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e));
-
None
-
else None
-
) entries in
-
let sorted = List.sort (fun a b ->
-
let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in
-
let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in
-
Ptime.compare tb ta
-
) all_entries in
-
List.filteri (fun i _ -> i < limit) sorted
-
-
let find_entry_by_id store ~id =
-
Log.debug (fun m -> m "Searching for entry with ID: %s" id);
-
let entries = Cacheio.scan store.cache in
-
let matching_entry = List.find_map (fun (cache_entry : Cacheio.Entry.t) ->
-
let key = Cacheio.Entry.key cache_entry in
-
if String.contains key '/' &&
-
String.ends_with ~suffix:"entries/" (String.sub key 0 (String.rindex key '/') ^ "/") then
-
Eio.Switch.run @@ fun sw ->
-
match Cacheio.get store.cache ~key ~sw with
-
| None -> None
-
| Some source ->
-
(try
-
let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
-
let entry = entry_of_string json_str in
-
(* Exact ID match only *)
-
if Jsonfeed.Item.id entry.item = id then
-
Some entry
-
else
-
None
-
with e ->
-
Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e));
-
None)
-
else None
-
) entries in
-
(match matching_entry with
-
| Some e -> Log.debug (fun m -> m "Found entry: %s"
-
(Jsonfeed.Item.title e.item |> Option.value ~default:"(no title)"))
-
| None -> Log.debug (fun m -> m "No entry found with ID: %s" id));
-
matching_entry
-
-
(* Entry management *)
-
-
let delete_entry store ~feed_url ~atom_id =
-
let key = make_entry_key feed_url atom_id in
-
Cacheio.delete store.cache ~key;
-
Log.info (fun m -> m "Deleted entry %s from feed %s" atom_id feed_url)
-
-
let delete_feed store ~feed_url =
-
let feed_key = make_feed_key feed_url in
-
let prefix = feed_key ^ "/" in
-
let entries = Cacheio.scan store.cache in
-
let count = ref 0 in
-
List.iter (fun (cache_entry : Cacheio.Entry.t) ->
-
let key = Cacheio.Entry.key cache_entry in
-
if String.starts_with ~prefix key then begin
-
Cacheio.delete store.cache ~key;
-
incr count
-
end
-
) entries;
-
Log.info (fun m -> m "Deleted feed %s (%d entries)" feed_url !count)
-
-
let prune_entries store ~feed_url ~keep =
-
let entries = list_entries store ~feed_url in
-
let to_delete = List.filteri (fun i _ -> i >= keep) entries in
-
List.iter (fun entry ->
-
delete_entry store ~feed_url ~atom_id:(Jsonfeed.Item.id entry.item)
-
) to_delete;
-
let deleted = List.length to_delete in
-
Log.info (fun m -> m "Pruned %d entries from feed %s (kept %d)" deleted feed_url keep);
-
deleted
-
-
let prune_old_entries store ~feed_url ~older_than =
-
let entries = list_entries store ~feed_url in
-
let to_delete = List.filter (fun e ->
-
let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in
-
Ptime.is_earlier time ~than:older_than
-
) entries in
-
List.iter (fun entry ->
-
delete_entry store ~feed_url ~atom_id:(Jsonfeed.Item.id entry.item)
-
) to_delete;
-
let deleted = List.length to_delete in
-
Log.info (fun m -> m "Pruned %d old entries from feed %s" deleted feed_url);
-
deleted
-
-
(* Feed information *)
-
-
let list_feeds store =
-
let feed_entries = Cacheio.scan store.cache in
-
let feed_metas = List.filter_map (fun (cache_entry : Cacheio.Entry.t) ->
-
let key = Cacheio.Entry.key cache_entry in
-
if String.ends_with ~suffix:"/meta.json" key then
-
Eio.Switch.run @@ fun sw ->
-
match Cacheio.get store.cache ~key ~sw with
-
| None -> None
-
| Some source ->
-
try
-
let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in
-
Some (feed_meta_of_string json_str)
-
with e ->
-
Log.err (fun m -> m "Failed to parse feed metadata: %s" (Printexc.to_string e));
-
None
-
else None
-
) feed_entries in
-
(* Count entries for each feed *)
-
List.map (fun meta ->
-
let entries = list_entries store ~feed_url:meta.url in
-
{ meta with entry_count = List.length entries }
-
) feed_metas
-
-
let get_feed_info store ~feed_url =
-
Eio.Switch.run @@ fun sw ->
-
match get_feed_meta store ~feed_url ~sw with
-
| None -> None
-
| Some meta ->
-
let entries = list_entries store ~feed_url in
-
Some { meta with entry_count = List.length entries }
-
-
let stats store =
-
Cacheio.stats store.cache
-
-
(* Maintenance *)
-
-
let expire store =
-
Cacheio.expire store.cache
-
-
let compact _store =
-
(* TODO: Implement compaction logic *)
-
Log.info (fun m -> m "Compaction not yet implemented")
-
-
(* Export/Import *)
-
-
let export_to_atom store ~feed_url ?title ?limit () =
-
let entries = match limit with
-
| None -> list_entries store ~feed_url
-
| Some n -> list_entries_filtered store ~feed_url ~limit:n ()
-
in
-
let atom_entries = List.map (fun entry ->
-
let item = entry.item in
-
let id = Uri.of_string (Jsonfeed.Item.id item) in
-
let entry_title : Syndic.Atom.text_construct =
-
Syndic.Atom.Text (Jsonfeed.Item.title item |> Option.value ~default:"(no title)") in
-
let links = match Jsonfeed.Item.url item with
-
| Some url_str -> [Syndic.Atom.link ~rel:Syndic.Atom.Alternate (Uri.of_string url_str)]
-
| None -> []
-
in
-
let content_str = match Jsonfeed.Item.content item with
-
| `Html h -> h
-
| `Text t -> t
-
| `Both (h, _) -> h
-
in
-
let entry_content : Syndic.Atom.content = Syndic.Atom.Html (None, content_str) in
-
let author_name = match Jsonfeed.Item.authors item with
-
| Some (a :: _) -> Jsonfeed.Author.name a |> Option.value ~default:entry.meta.feed_name
-
| _ -> entry.meta.feed_name
-
in
-
let author = Syndic.Atom.author author_name in
-
let authors = (author, []) in
-
let updated = Jsonfeed.Item.date_modified item |> Option.value ~default:entry.meta.stored_at in
-
Syndic.Atom.entry ~id ~title:entry_title ~updated
-
?published:(Jsonfeed.Item.date_published item)
-
~links ~content:entry_content ~authors ()
-
) entries in
-
let feed_title : Syndic.Atom.text_construct = match title with
-
| Some t -> Syndic.Atom.Text t
-
| None -> Syndic.Atom.Text ("Archive: " ^ feed_url)
-
in
-
let feed_id = Uri.of_string ("urn:river:archive:" ^ (Digest.string feed_url |> Digest.to_hex)) in
-
let feed_updated = match entries with
-
| [] -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get
-
| e :: _ -> Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at
-
in
-
{
-
Syndic.Atom.id = feed_id;
-
title = feed_title;
-
updated = feed_updated;
-
entries = atom_entries;
-
authors = [];
-
categories = [];
-
contributors = [];
-
generator = Some {
-
Syndic.Atom.version = Some "1.0";
-
uri = None;
-
content = "River Store";
-
};
-
icon = None;
-
links = [];
-
logo = None;
-
rights = None;
-
subtitle = None;
-
}
-
-
let import_from_atom store ~feed_url ~feed_name ~feed ~sw =
-
let entries = feed.Syndic.Atom.entries in
-
store_atom_entries store ~feed_url ~feed_name ~feed_title:(Util.string_of_text_construct feed.title) ~entries ~sw;
-
List.length entries
-
-
(* Pretty printing *)
-
-
let pp_entry fmt entry =
-
let item = entry.item in
-
Format.fprintf fmt "@[<v 2>Entry:@,";
-
Format.fprintf fmt "ID: %s@," (Jsonfeed.Item.id item);
-
Format.fprintf fmt "Title: %s@," (Jsonfeed.Item.title item |> Option.value ~default:"(no title)");
-
Format.fprintf fmt "URL: %s@," (Jsonfeed.Item.url item |> Option.value ~default:"(none)");
-
(match Jsonfeed.Item.date_published item with
-
| Some t -> Format.fprintf fmt "Published: %s@," (Ptime.to_rfc3339 t)
-
| None -> ());
-
(match Jsonfeed.Item.date_modified item with
-
| Some t -> Format.fprintf fmt "Modified: %s@," (Ptime.to_rfc3339 t)
-
| None -> ());
-
Format.fprintf fmt "Feed: %s (%s)@," entry.meta.feed_name entry.meta.feed_url;
-
Format.fprintf fmt "Stored: %s@]" (Ptime.to_rfc3339 entry.meta.stored_at)
-
-
let pp_feed_info fmt info =
-
Format.fprintf fmt "@[<v 2>Feed:@,";
-
Format.fprintf fmt "Name: %s@," info.name;
-
Format.fprintf fmt "Title: %s@," info.title;
-
Format.fprintf fmt "URL: %s@," info.url;
-
Format.fprintf fmt "Last updated: %s@," (Ptime.to_rfc3339 info.last_updated);
-
Format.fprintf fmt "Entries: %d@]" info.entry_count
-
-
let pp fmt store =
-
let feeds = list_feeds store in
-
Format.fprintf fmt "@[<v 2>River Store:@,";
-
Format.fprintf fmt "Base dir: %a@," Eio.Path.pp store.base_dir;
-
Format.fprintf fmt "Feeds: %d@," (List.length feeds);
-
List.iter (fun feed ->
-
Format.fprintf fmt " - %s: %d entries@," feed.name feed.entry_count
-
) feeds;
-
Format.fprintf fmt "@]"
···
-218
stack/river/lib/river_store.mli
···
-
(** Persistent storage for Atom feed entries using Cacheio
-
-
River_store provides a persistent, per-feed storage system for Atom entries,
-
enabling long-term archival of feed items that may have expired upstream.
-
-
{2 Key Features}
-
-
- {b Per-feed storage}: Each feed's entries stored independently
-
- {b Atom ID keying}: Entries keyed by their unique Atom ID
-
- {b URL resolution}: Resolves all URLs relative to feed base URI
-
- {b Persistent caching}: Built on Cacheio for reliable file storage
-
- {b Entry management}: List, update, delete, and prune operations
-
- {b Metadata tracking}: Stores feed source, timestamps, and relationships
-
-
{2 Usage Example}
-
-
{[
-
let store = River_store.create ~base_dir:store_dir in
-
-
(* Store entries from a feed *)
-
Eio.Switch.run @@ fun sw ->
-
let feed = River.fetch env source in
-
let posts = River.posts [feed] in
-
River_store.store_posts store ~feed_url:source.url ~posts ~sw ();
-
-
(* List entries for a feed *)
-
let entries = River_store.list_entries store ~feed_url:source.url in
-
List.iter (fun entry ->
-
Printf.printf "%s: %s\n" entry.atom_id entry.title
-
) entries;
-
-
(* Get a specific entry *)
-
match River_store.get_entry store ~feed_url:source.url ~atom_id:"..." ~sw with
-
| Some entry -> (* Use entry *)
-
| None -> (* Not found *)
-
]} *)
-
-
(** {1 Core Types} *)
-
-
(** Abstract type representing the store *)
-
type t
-
-
(** Stored entry - combines Jsonfeed.Item with storage metadata *)
-
type stored_entry
-
-
(** {2 Stored Entry Accessors} *)
-
-
val entry_item : stored_entry -> Jsonfeed.Item.t
-
(** Get the underlying Jsonfeed Item *)
-
-
val entry_feed_url : stored_entry -> string
-
(** Get the source feed URL *)
-
-
val entry_feed_name : stored_entry -> string
-
(** Get the source feed name *)
-
-
val entry_feed_title : stored_entry -> string
-
(** Get the source feed title *)
-
-
val entry_stored_at : stored_entry -> Ptime.t
-
(** Get the storage timestamp *)
-
-
(** Feed metadata *)
-
type feed_info = {
-
url : string;
-
(** Feed URL *)
-
-
name : string;
-
(** Feed name/label *)
-
-
title : string;
-
(** Feed title from metadata *)
-
-
last_updated : Ptime.t;
-
(** Last time feed was synced *)
-
-
entry_count : int;
-
(** Number of stored entries *)
-
}
-
-
(** {1 Store Creation} *)
-
-
(** Create a store at the specified base directory *)
-
val create : base_dir:Eio.Fs.dir_ty Eio.Path.t -> t
-
-
(** Create a store using an Xdge context for XDG-compliant paths *)
-
val create_with_xdge : Xdge.t -> t
-
-
(** {1 Entry Storage} *)
-
-
(** Store a single post entry from a feed *)
-
val store_entry :
-
t ->
-
feed_url:string ->
-
feed_name:string ->
-
feed_title:string ->
-
post:Post.t ->
-
sw:Eio.Switch.t ->
-
unit
-
-
(** Store multiple posts from a feed *)
-
val store_posts :
-
t ->
-
feed_url:string ->
-
feed_name:string ->
-
feed_title:string ->
-
posts:Post.t list ->
-
sw:Eio.Switch.t ->
-
unit
-
-
(** Store entries directly from Syndic.Atom.entry list *)
-
val store_atom_entries :
-
t ->
-
feed_url:string ->
-
feed_name:string ->
-
feed_title:string ->
-
entries:Syndic.Atom.entry list ->
-
sw:Eio.Switch.t ->
-
unit
-
-
(** {1 Entry Retrieval} *)
-
-
(** Get a specific entry by Atom ID *)
-
val get_entry :
-
t ->
-
feed_url:string ->
-
atom_id:string ->
-
sw:Eio.Switch.t ->
-
stored_entry option
-
-
(** List all entries for a feed *)
-
val list_entries : t -> feed_url:string -> stored_entry list
-
-
(** List entries with filtering and sorting options *)
-
val list_entries_filtered :
-
t ->
-
feed_url:string ->
-
?since:Ptime.t ->
-
?until:Ptime.t ->
-
?limit:int ->
-
?sort:[`Published | `Updated | `Stored] ->
-
unit ->
-
stored_entry list
-
-
(** Check if an entry exists *)
-
val exists_entry : t -> feed_url:string -> atom_id:string -> bool
-
-
(** Get the most recent entries across all feeds *)
-
val get_recent_entries : t -> ?limit:int -> unit -> stored_entry list
-
-
(** Find an entry by ID across all feeds (searches by atom_id) *)
-
val find_entry_by_id : t -> id:string -> stored_entry option
-
-
(** {1 Entry Management} *)
-
-
(** Delete a specific entry *)
-
val delete_entry : t -> feed_url:string -> atom_id:string -> unit
-
-
(** Delete all entries for a feed *)
-
val delete_feed : t -> feed_url:string -> unit
-
-
(** Prune old entries (keep most recent N per feed) *)
-
val prune_entries : t -> feed_url:string -> keep:int -> int
-
(** Returns number of entries deleted *)
-
-
(** Prune entries older than a given time *)
-
val prune_old_entries : t -> feed_url:string -> older_than:Ptime.t -> int
-
(** Returns number of entries deleted *)
-
-
(** {1 Feed Information} *)
-
-
(** List all feeds that have stored entries *)
-
val list_feeds : t -> feed_info list
-
-
(** Get information about a specific feed *)
-
val get_feed_info : t -> feed_url:string -> feed_info option
-
-
(** Get statistics about the store *)
-
val stats : t -> Cacheio.Stats.t
-
-
(** {1 Maintenance} *)
-
-
(** Clean up expired entries (respects TTL if set) *)
-
val expire : t -> int
-
(** Returns number of entries expired *)
-
-
(** Compact storage (remove duplicate/orphaned data) *)
-
val compact : t -> unit
-
-
(** Export entries to an Atom feed *)
-
val export_to_atom :
-
t ->
-
feed_url:string ->
-
?title:string ->
-
?limit:int ->
-
unit ->
-
Syndic.Atom.feed
-
-
(** Import entries from an Atom feed *)
-
val import_from_atom :
-
t ->
-
feed_url:string ->
-
feed_name:string ->
-
feed:Syndic.Atom.feed ->
-
sw:Eio.Switch.t ->
-
int
-
(** Returns number of entries imported *)
-
-
(** {1 Pretty Printing} *)
-
-
(** Pretty printer for stored entries *)
-
val pp_entry : Format.formatter -> stored_entry -> unit
-
-
(** Pretty printer for feed info *)
-
val pp_feed_info : Format.formatter -> feed_info -> unit
-
-
(** Pretty printer for the store *)
-
val pp : Format.formatter -> t -> unit
···
-33
stack/river/lib/util.ml
···
-
(*
-
* Copyright (c) 2014, OCaml.org project
-
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
-
*
-
* Permission to use, copy, modify, and distribute this software for any
-
* purpose with or without fee is hereby granted, provided that the above
-
* copyright notice and this permission notice appear in all copies.
-
*
-
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
*)
-
-
open Syndic
-
-
(* Remove all tags *)
-
let rec syndic_to_buffer b = function
-
| XML.Node (_, _, subs) -> List.iter (syndic_to_buffer b) subs
-
| XML.Data (_, d) -> Buffer.add_string b d
-
-
let syndic_to_string x =
-
let b = Buffer.create 1024 in
-
List.iter (syndic_to_buffer b) x;
-
Buffer.contents b
-
-
let string_of_text_construct : Atom.text_construct -> string = function
-
(* FIXME: we probably would like to parse the HTML and remove the tags *)
-
| Atom.Text s | Atom.Html (_, s) -> s
-
| Atom.Xhtml (_, x) -> syndic_to_string x
···
+9 -10
stack/river/test/test_eio_river.ml
···
(* Test the Eio-based River library *)
let test_sources =
-
River.
[
-
{ name = "OCaml Planet"; url = "https://ocaml.org/feed.xml" };
]
let main env =
Printf.printf "Testing River library with Eio and Requests...\n";
-
(* Use River.with_session for proper resource management *)
-
River.with_session env @@ fun session ->
(* Test fetching feeds *)
let feeds =
try
-
List.map (River.fetch session) test_sources
with
| Failure msg ->
Printf.printf "Error: %s\n" msg;
···
Printf.printf "Successfully fetched %d feed(s)\n" (List.length feeds);
(* Get posts from feeds *)
-
let posts = River.posts feeds in
Printf.printf "Found %d posts\n" (List.length posts);
(* Show first 3 posts *)
···
List.iteri (fun i post ->
Printf.printf "\nPost %d:\n" (i + 1);
-
Printf.printf " Title: %s\n" (River.title post);
-
Printf.printf " Author: %s\n" (River.author post);
Printf.printf " Date: %s\n"
-
(match River.date post with
| Some _ -> "Date available" (* Syndic.Date doesn't have to_string *)
| None -> "N/A");
Printf.printf " Link: %s\n"
-
(match River.link post with
| Some uri -> Uri.to_string uri
| None -> "N/A")
) first_posts
···
(* Test the Eio-based River library *)
let test_sources =
[
+
River.Source.make ~name:"OCaml Planet" ~url:"https://ocaml.org/feed.xml";
]
let main env =
Printf.printf "Testing River library with Eio and Requests...\n";
+
(* Use River.Session.with_session for proper resource management *)
+
River.Session.with_session env @@ fun session ->
(* Test fetching feeds *)
let feeds =
try
+
List.map (River.Feed.fetch session) test_sources
with
| Failure msg ->
Printf.printf "Error: %s\n" msg;
···
Printf.printf "Successfully fetched %d feed(s)\n" (List.length feeds);
(* Get posts from feeds *)
+
let posts = River.Post.of_feeds feeds in
Printf.printf "Found %d posts\n" (List.length posts);
(* Show first 3 posts *)
···
List.iteri (fun i post ->
Printf.printf "\nPost %d:\n" (i + 1);
+
Printf.printf " Title: %s\n" (River.Post.title post);
+
Printf.printf " Author: %s\n" (River.Post.author post);
Printf.printf " Date: %s\n"
+
(match River.Post.date post with
| Some _ -> "Date available" (* Syndic.Date doesn't have to_string *)
| None -> "N/A");
Printf.printf " Link: %s\n"
+
(match River.Post.link post with
| Some uri -> Uri.to_string uri
| None -> "N/A")
) first_posts
+6 -7
stack/river/test/test_logging.ml
···
Printf.printf "---\n\n"
let test_sources =
-
River.
[
-
{ name = "Test Feed"; url = "https://example.com/feed.xml" };
]
let main env =
(* Test with logging *)
Printf.printf "Testing River library with logging...\n\n";
-
(* Use River.with_session for proper resource management *)
-
River.with_session env @@ fun session ->
(* Demonstrate fetching with logging *)
let feeds =
try
-
List.map (River.fetch session) test_sources
with
| Failure msg ->
Printf.printf "Expected error (for demo): %s\n" msg;
···
if feeds <> [] then begin
(* This would show post aggregation logs *)
-
let posts = River.posts feeds in
Printf.printf "\nFound %d posts\n" (List.length posts);
(* This would show Atom entry creation logs *)
-
let _entries = River.create_atom_entries posts in
Printf.printf "Created Atom entries\n"
end
···
Printf.printf "---\n\n"
let test_sources =
[
+
River.Source.make ~name:"Test Feed" ~url:"https://example.com/feed.xml";
]
let main env =
(* Test with logging *)
Printf.printf "Testing River library with logging...\n\n";
+
(* Use River.Session.with_session for proper resource management *)
+
River.Session.with_session env @@ fun session ->
(* Demonstrate fetching with logging *)
let feeds =
try
+
List.map (River.Feed.fetch session) test_sources
with
| Failure msg ->
Printf.printf "Expected error (for demo): %s\n" msg;
···
if feeds <> [] then begin
(* This would show post aggregation logs *)
+
let posts = River.Post.of_feeds feeds in
Printf.printf "\nFound %d posts\n" (List.length posts);
(* This would show Atom entry creation logs *)
+
let _entries = River.Format.Atom.entries_of_posts posts in
Printf.printf "Created Atom entries\n"
end