My agentic slop goes here. Not intended for anyone else!
1(*
2 * Copyright (c) 2014, OCaml.org project
3 * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *)
17
18(** State management for sync state and feeds.
19
20 User contact data is read from Sortal on-demand. River only persists
21 sync timestamps and feed data. *)
22
23let src = Logs.Src.create "river" ~doc:"River RSS/Atom aggregator"
24module Log = (val Logs.src_log src : Logs.LOG)
25
26type t = {
27 xdg : Xdge.t;
28 sortal : Sortal.t;
29}
30
31module Paths = struct
32 (** Get the feeds directory path *)
33 let feeds_dir state = Eio.Path.(Xdge.state_dir state.xdg / "feeds")
34
35 (** Get the user feeds directory path *)
36 let user_feeds_dir state = Eio.Path.(feeds_dir state / "user")
37
38 (** Get the sync state file path *)
39 let sync_state_file state = Eio.Path.(Xdge.state_dir state.xdg / "sync_state.json")
40
41 (** Get the path to a user's JSONFeed file *)
42 let user_feed_file state username =
43 Eio.Path.(user_feeds_dir state / (username ^ ".json"))
44
45 (** Get the path to a user's old Atom feed file (for migration) *)
46 let user_feed_file_legacy state username =
47 Eio.Path.(user_feeds_dir state / (username ^ ".xml"))
48
49 (** Ensure all necessary directories exist *)
50 let ensure_directories state =
51 let dirs = [
52 feeds_dir state;
53 user_feeds_dir state;
54 ] in
55 List.iter (fun dir ->
56 try Eio.Path.mkdir ~perm:0o755 dir
57 with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()
58 ) dirs
59end
60
61(** Sync state management - maps username to last_synced timestamp *)
62module Sync_state = struct
63 let jsont =
64 let pair_t =
65 let make username timestamp = (username, timestamp) in
66 Jsont.Object.map ~kind:"SyncEntry" make
67 |> Jsont.Object.mem "username" Jsont.string ~enc:fst
68 |> Jsont.Object.mem "timestamp" Jsont.string ~enc:snd
69 |> Jsont.Object.finish
70 in
71 Jsont.Object.map ~kind:"SyncState" (fun pairs -> pairs)
72 |> Jsont.Object.mem "synced_users" (Jsont.list pair_t) ~enc:(fun s -> s)
73 |> Jsont.Object.finish
74
75 let load state =
76 let file = Paths.sync_state_file state in
77 try
78 let content = Eio.Path.load file in
79 match Jsont_bytesrw.decode_string' jsont content with
80 | Ok pairs -> pairs
81 | Error err ->
82 Log.warn (fun m -> m "Failed to parse sync state: %s" (Jsont.Error.to_string err));
83 []
84 with
85 | Eio.Io (Eio.Fs.E (Not_found _), _) -> []
86 | e ->
87 Log.err (fun m -> m "Error loading sync state: %s" (Printexc.to_string e));
88 []
89
90 let save state sync_state =
91 let file = Paths.sync_state_file state in
92 match Jsont_bytesrw.encode_string' ~format:Jsont.Indent jsont sync_state with
93 | Ok json -> Eio.Path.save ~create:(`Or_truncate 0o644) file json
94 | Error err -> failwith ("Failed to encode sync state: " ^ Jsont.Error.to_string err)
95
96 let get_timestamp state username =
97 load state |> List.assoc_opt username
98
99 let set_timestamp state username timestamp =
100 let sync_state = load state in
101 let updated = (username, timestamp) :: List.remove_assoc username sync_state in
102 save state updated
103end
104
105(** Category storage - manages custom categories *)
106module Category_storage = struct
107 let categories_file state = Eio.Path.(Xdge.state_dir state.xdg / "categories.json")
108
109 let jsont = Jsont.list Category.jsont
110
111 let load state =
112 let file = categories_file state in
113 try
114 let content = Eio.Path.load file in
115 match Jsont_bytesrw.decode_string' jsont content with
116 | Ok categories -> categories
117 | Error err ->
118 Log.warn (fun m -> m "Failed to parse categories: %s" (Jsont.Error.to_string err));
119 []
120 with
121 | Eio.Io (Eio.Fs.E (Not_found _), _) -> []
122 | e ->
123 Log.err (fun m -> m "Error loading categories: %s" (Printexc.to_string e));
124 []
125
126 let save state categories =
127 let file = categories_file state in
128 match Jsont_bytesrw.encode_string' ~format:Jsont.Indent jsont categories with
129 | Ok json -> Eio.Path.save ~create:(`Or_truncate 0o644) file json
130 | Error err -> failwith ("Failed to encode categories: " ^ Jsont.Error.to_string err)
131
132 let get state id =
133 load state |> List.find_opt (fun cat -> Category.id cat = id)
134
135 let add state category =
136 let categories = load state in
137 let filtered = List.filter (fun cat -> Category.id cat <> Category.id category) categories in
138 save state (category :: filtered)
139
140 let remove state id =
141 let categories = load state in
142 save state (List.filter (fun cat -> Category.id cat <> id) categories)
143end
144
145(** Post-category mapping storage - maps post IDs to category IDs *)
146module Post_category_storage = struct
147 let post_categories_file state = Eio.Path.(Xdge.state_dir state.xdg / "post_categories.json")
148
149 (* Type: list of (post_id, category_ids) pairs *)
150 let jsont =
151 let pair_t =
152 let make post_id category_ids = (post_id, category_ids) in
153 Jsont.Object.map ~kind:"PostCategoryMapping" make
154 |> Jsont.Object.mem "post_id" Jsont.string ~enc:fst
155 |> Jsont.Object.mem "category_ids" (Jsont.list Jsont.string) ~enc:snd
156 |> Jsont.Object.finish
157 in
158 Jsont.list pair_t
159
160 let load state =
161 let file = post_categories_file state in
162 try
163 let content = Eio.Path.load file in
164 match Jsont_bytesrw.decode_string' jsont content with
165 | Ok mappings -> mappings
166 | Error err ->
167 Log.warn (fun m -> m "Failed to parse post categories: %s" (Jsont.Error.to_string err));
168 []
169 with
170 | Eio.Io (Eio.Fs.E (Not_found _), _) -> []
171 | e ->
172 Log.err (fun m -> m "Error loading post categories: %s" (Printexc.to_string e));
173 []
174
175 let save state mappings =
176 let file = post_categories_file state in
177 match Jsont_bytesrw.encode_string' ~format:Jsont.Indent jsont mappings with
178 | Ok json -> Eio.Path.save ~create:(`Or_truncate 0o644) file json
179 | Error err -> failwith ("Failed to encode post categories: " ^ Jsont.Error.to_string err)
180
181 let get state post_id =
182 load state |> List.assoc_opt post_id |> Option.value ~default:[]
183
184 let set state post_id category_ids =
185 let mappings = load state in
186 let filtered = List.remove_assoc post_id mappings in
187 let updated = if category_ids = [] then filtered else (post_id, category_ids) :: filtered in
188 save state updated
189
190 let add state post_id category_id =
191 let current = get state post_id in
192 if List.mem category_id current then ()
193 else set state post_id (category_id :: current)
194
195 let remove state post_id category_id =
196 let current = get state post_id in
197 set state post_id (List.filter ((<>) category_id) current)
198
199 let get_posts_by_category state category_id =
200 load state
201 |> List.filter (fun (_, category_ids) -> List.mem category_id category_ids)
202 |> List.map fst
203
204 let remove_category state category_id =
205 let mappings = load state in
206 let updated = List.filter_map (fun (post_id, category_ids) ->
207 let filtered = List.filter ((<>) category_id) category_ids in
208 if filtered = [] then None else Some (post_id, filtered)
209 ) mappings in
210 save state updated
211end
212
213(** {2 Category Management - Internal functions} *)
214
215let list_categories state =
216 Category_storage.load state
217
218let get_category state ~id =
219 Category_storage.get state id
220
221let add_category state category =
222 try
223 Category_storage.add state category;
224 Ok ()
225 with e ->
226 Error (Printf.sprintf "Failed to add category: %s" (Printexc.to_string e))
227
228let remove_category state ~id =
229 try
230 Category_storage.remove state id;
231 Post_category_storage.remove_category state id;
232 Ok ()
233 with e ->
234 Error (Printf.sprintf "Failed to remove category: %s" (Printexc.to_string e))
235
236let get_post_categories state ~post_id =
237 Post_category_storage.get state post_id
238
239let set_post_categories state ~post_id ~category_ids =
240 try
241 Post_category_storage.set state post_id category_ids;
242 Ok ()
243 with e ->
244 Error (Printf.sprintf "Failed to set post categories: %s" (Printexc.to_string e))
245
246let add_post_category state ~post_id ~category_id =
247 try
248 Post_category_storage.add state post_id category_id;
249 Ok ()
250 with e ->
251 Error (Printf.sprintf "Failed to add post category: %s" (Printexc.to_string e))
252
253let remove_post_category state ~post_id ~category_id =
254 try
255 Post_category_storage.remove state post_id category_id;
256 Ok ()
257 with e ->
258 Error (Printf.sprintf "Failed to remove post category: %s" (Printexc.to_string e))
259
260let get_posts_by_category state ~category_id =
261 Post_category_storage.get_posts_by_category state category_id
262
263module Storage = struct
264 (** List all usernames with feeds from Sortal *)
265 let list_users state =
266 try
267 Sortal.list state.sortal
268 |> List.filter (fun contact -> Sortal.Contact.feeds contact <> None)
269 |> List.map Sortal.Contact.handle
270 with e ->
271 Log.err (fun m -> m "Error listing Sortal users: %s" (Printexc.to_string e));
272 []
273
274 (** Get a user from Sortal with sync state *)
275 let get_user state username =
276 match Sortal.lookup state.sortal username with
277 | None -> None
278 | Some contact ->
279 (* Only return users with feeds *)
280 if Sortal.Contact.feeds contact = None then None
281 else
282 let last_synced = Sync_state.get_timestamp state username in
283 Some (User.of_contact contact ?last_synced ())
284
285 (** Get all users from Sortal with sync state *)
286 let get_all_users state =
287 try
288 Sortal.list state.sortal
289 |> List.filter (fun contact -> Sortal.Contact.feeds contact <> None)
290 |> List.map (fun contact ->
291 let username = Sortal.Contact.handle contact in
292 let last_synced = Sync_state.get_timestamp state username in
293 User.of_contact contact ?last_synced ())
294 with e ->
295 Log.err (fun m -> m "Error getting all users: %s" (Printexc.to_string e));
296 []
297
298 (** Migrate legacy Atom XML feed to JSONFeed format *)
299 let migrate_legacy_feed state username =
300 let legacy_file = Paths.user_feed_file_legacy state username in
301 try
302 let content = Eio.Path.load legacy_file in
303 Log.info (fun m -> m "Migrating legacy Atom feed for %s to JSONFeed" username);
304 (* Parse existing Atom feed *)
305 let input = Xmlm.make_input (`String (0, content)) in
306 let atom_feed = Syndic.Atom.parse input in
307 (* Convert to JSONFeed with extensions *)
308 let jsonfeed = River_jsonfeed.of_atom atom_feed in
309 (* Save as JSONFeed *)
310 let json_file = Paths.user_feed_file state username in
311 (match River_jsonfeed.to_string ~minify:false jsonfeed with
312 | Ok json ->
313 Eio.Path.save ~create:(`Or_truncate 0o644) json_file json;
314 Log.info (fun m -> m "Successfully migrated %s from Atom to JSONFeed" username);
315 (* Rename legacy file to .xml.backup *)
316 let backup_file = Eio.Path.(Paths.user_feeds_dir state / (username ^ ".xml.backup")) in
317 (try
318 Eio.Path.save ~create:(`Or_truncate 0o644) backup_file content;
319 Log.info (fun m -> m "Backed up legacy Atom file to %s.xml.backup" username)
320 with e ->
321 Log.warn (fun m -> m "Failed to backup legacy file: %s" (Printexc.to_string e)));
322 Some jsonfeed
323 | Error err ->
324 Log.err (fun m -> m "Failed to serialize JSONFeed during migration: %s" err);
325 None)
326 with
327 | Eio.Io (Eio.Fs.E (Not_found _), _) -> None
328 | e ->
329 Log.err (fun m -> m "Error migrating legacy feed for %s: %s"
330 username (Printexc.to_string e));
331 None
332
333 (** Load existing JSONFeed for a user (with legacy migration support) *)
334 let load_existing_feed state username =
335 let file = Paths.user_feed_file state username in
336 try
337 let content = Eio.Path.load file in
338 (* Parse JSONFeed *)
339 match River_jsonfeed.of_string content with
340 | Ok jsonfeed -> Some jsonfeed
341 | Error err ->
342 Log.err (fun m -> m "Failed to parse JSONFeed for %s: %s" username err);
343 (* Try migration from legacy Atom *)
344 migrate_legacy_feed state username
345 with
346 | Eio.Io (Eio.Fs.E (Not_found _), _) ->
347 (* JSON file not found, try legacy migration *)
348 migrate_legacy_feed state username
349 | e ->
350 Log.err (fun m -> m "Error loading feed for %s: %s"
351 username (Printexc.to_string e));
352 None
353
354 (** Load existing posts as Atom entries for a user (for backwards compatibility) *)
355 let load_existing_posts state username =
356 match load_existing_feed state username with
357 | None -> []
358 | Some jsonfeed ->
359 (* Convert JSONFeed back to Atom for backwards compatibility *)
360 let atom_feed = River_jsonfeed.to_atom jsonfeed in
361 atom_feed.Syndic.Atom.entries
362
363 (** Save JSONFeed for a user *)
364 let save_jsonfeed state username jsonfeed =
365 let file = Paths.user_feed_file state username in
366 match River_jsonfeed.to_string ~minify:false jsonfeed with
367 | Ok json -> Eio.Path.save ~create:(`Or_truncate 0o644) file json
368 | Error err -> failwith ("Failed to serialize JSONFeed: " ^ err)
369
370 (** Save Atom entries for a user (converts to JSONFeed first) *)
371 let save_atom_feed state username entries =
372 (* Convert Atom entries to JSONFeed with extensions *)
373 let items_with_ext = List.map River_jsonfeed.item_of_atom entries in
374 let items = List.map (fun i -> i.River_jsonfeed.item) items_with_ext in
375
376 (* Create feed extension *)
377 let feed_ext = {
378 River_jsonfeed.feed_subtitle = None;
379 feed_id = "urn:river:user:" ^ username;
380 feed_categories = [];
381 feed_contributors = [];
382 feed_generator = Some {
383 River_jsonfeed.generator_name = "River Feed Aggregator";
384 generator_uri = None;
385 generator_version = Some "1.0";
386 };
387 feed_rights = None;
388 feed_logo = None;
389 } in
390
391 let jsonfeed_inner = Jsonfeed.create ~title:username ~items () in
392 let jsonfeed = { River_jsonfeed.feed = jsonfeed_inner; extension = Some feed_ext } in
393 save_jsonfeed state username jsonfeed
394end
395
396module Sync = struct
397 (** Merge new entries with existing ones, updating matching IDs *)
398 let merge_entries ~existing ~new_entries =
399 (* Create a map of new entry IDs for efficient lookup and updates *)
400 let module UriMap = Map.Make(Uri) in
401 let new_entries_map =
402 List.fold_left (fun acc (entry : Syndic.Atom.entry) ->
403 UriMap.add entry.id entry acc
404 ) UriMap.empty new_entries
405 in
406
407 (* Update existing entries with new ones if IDs match, otherwise keep existing *)
408 let updated_existing =
409 List.filter_map (fun (entry : Syndic.Atom.entry) ->
410 if UriMap.mem entry.id new_entries_map then
411 None (* Will be replaced by new entry *)
412 else
413 Some entry (* Keep existing entry *)
414 ) existing
415 in
416
417 (* Combine new entries with non-replaced existing entries *)
418 let combined = new_entries @ updated_existing in
419 List.sort (fun (a : Syndic.Atom.entry) (b : Syndic.Atom.entry) ->
420 Ptime.compare b.updated a.updated
421 ) combined
422
423 (** Get current timestamp in ISO 8601 format *)
424 let current_timestamp () =
425 let open Unix in
426 let tm = gmtime (time ()) in
427 Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ"
428 (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday
429 tm.tm_hour tm.tm_min tm.tm_sec
430
431 (** Sync feeds for a single user *)
432 let sync_user session state ~username =
433 match Storage.get_user state username with
434 | None ->
435 Error (Printf.sprintf "User %s not found" username)
436 | Some user when User.feeds user = [] ->
437 Log.info (fun m -> m "No feeds configured for user %s" username);
438 Ok ()
439 | Some user ->
440 Log.info (fun m -> m "Syncing feeds for user %s..." username);
441
442 (* Fetch all feeds concurrently *)
443 let fetched_feeds =
444 Eio.Fiber.List.filter_map (fun source ->
445 try
446 Log.info (fun m -> m " [%s] Fetching %s (%s)..."
447 username (Source.name source) (Source.url source));
448 Some (Feed.fetch session source)
449 with e ->
450 Log.err (fun m -> m " [%s] Failed to fetch %s: %s"
451 username (Source.name source) (Printexc.to_string e));
452 None
453 ) (User.feeds user)
454 in
455
456 if fetched_feeds = [] then begin
457 Error "No feeds successfully fetched"
458 end else begin
459 (* Get posts from fetched feeds *)
460 let posts = Post.of_feeds fetched_feeds in
461 Log.info (fun m -> m " Found %d new posts" (List.length posts));
462
463 (* Convert to Atom entries *)
464 let new_entries = Format.Atom.entries_of_posts posts in
465
466 (* Load existing entries *)
467 let existing = Storage.load_existing_posts state username in
468 Log.info (fun m -> m " Found %d existing posts" (List.length existing));
469
470 (* Merge entries *)
471 let merged = merge_entries ~existing ~new_entries in
472 Log.info (fun m -> m " Total posts after merge: %d" (List.length merged));
473
474 (* Save updated feed *)
475 Storage.save_atom_feed state username merged;
476
477 (* Update last_synced timestamp *)
478 let now = current_timestamp () in
479 Sync_state.set_timestamp state username now;
480
481 Log.info (fun m -> m "Sync completed for user %s" username);
482 Ok ()
483 end
484end
485
486module Export = struct
487 (** Convert Atom entry to JSONFeed item *)
488 let atom_entry_to_jsonfeed_item (entry : Syndic.Atom.entry) =
489 (* Extract ID *)
490 let id = Uri.to_string entry.id in
491
492 (* Extract title *)
493 let title =
494 match entry.title with
495 | Syndic.Atom.Text s -> Some s
496 | Syndic.Atom.Html (_, s) -> Some s
497 | Syndic.Atom.Xhtml (_, _) -> Some "Untitled"
498 in
499
500 (* Extract URL *)
501 let url =
502 match entry.links with
503 | link :: _ -> Some (Uri.to_string link.href)
504 | [] -> None
505 in
506
507 (* Extract content *)
508 let content =
509 match entry.content with
510 | Some (Syndic.Atom.Text s) -> `Text s
511 | Some (Syndic.Atom.Html (_, s)) -> `Html s
512 | Some (Syndic.Atom.Xhtml (_, nodes)) ->
513 let html = String.concat "" (List.map Syndic.XML.to_string nodes) in
514 `Html html
515 | Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None ->
516 `Text ""
517 in
518
519 (* Extract summary *)
520 let summary =
521 match entry.summary with
522 | Some (Syndic.Atom.Text s) when String.trim s <> "" -> Some s
523 | Some (Syndic.Atom.Html (_, s)) when String.trim s <> "" -> Some s
524 | _ -> None
525 in
526
527 (* Extract authors *)
528 let authors =
529 let (author, contributors) = entry.authors in
530 let author_list = author :: contributors in
531 let jsonfeed_authors = List.filter_map (fun (a : Syndic.Atom.author) ->
532 let name = String.trim a.name in
533 if name = "" then None
534 else Some (Jsonfeed.Author.create ~name ())
535 ) author_list in
536 if jsonfeed_authors = [] then None else Some jsonfeed_authors
537 in
538
539 (* Extract tags *)
540 let tags =
541 match entry.categories with
542 | [] -> None
543 | cats ->
544 let tag_list = List.map (fun (c : Syndic.Atom.category) ->
545 match c.label with
546 | Some lbl -> lbl
547 | None -> c.term
548 ) cats in
549 if tag_list = [] then None else Some tag_list
550 in
551
552 (* Create JSONFeed item *)
553 Jsonfeed.Item.create
554 ~id
555 ~content
556 ?title
557 ?url
558 ?summary
559 ?authors
560 ?tags
561 ~date_published:entry.updated
562 ()
563
564 (** Export entries as JSONFeed *)
565 let export_jsonfeed ~title entries =
566 let items = List.map atom_entry_to_jsonfeed_item entries in
567 let feed = Jsonfeed.create ~title ~items () in
568 match Jsonfeed.to_string ~minify:false feed with
569 | Ok json -> Ok json
570 | Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err))
571end
572
573let create env ~app_name =
574 let xdg = Xdge.create env#fs app_name in
575 (* Sortal always uses "sortal" as the app name for shared contact database *)
576 let sortal = Sortal.create env#fs "sortal" in
577 let state = { xdg; sortal } in
578 Paths.ensure_directories state;
579 state
580
581let get_user state ~username =
582 Storage.get_user state username
583
584let get_all_users state =
585 Storage.get_all_users state
586
587let list_users state =
588 Storage.list_users state
589
590let update_sync_state state ~username ~timestamp =
591 Sync_state.set_timestamp state username timestamp;
592 Ok ()
593
594let sync_user env state ~username =
595 Session.with_session env @@ fun session ->
596 Sync.sync_user session state ~username
597
598let sync_all env state =
599 let users = Storage.list_users state in
600 if users = [] then begin
601 Log.info (fun m -> m "No users to sync");
602 Ok (0, 0)
603 end else begin
604 Log.info (fun m -> m "Syncing %d users concurrently..." (List.length users));
605
606 Session.with_session env @@ fun session ->
607 let results =
608 Eio.Fiber.List.map (fun username ->
609 match Sync.sync_user session state ~username with
610 | Ok () -> true
611 | Error err ->
612 Log.err (fun m -> m "Failed to sync user %s: %s" username err);
613 false
614 ) users
615 in
616 let success_count = List.length (List.filter (fun x -> x) results) in
617 let fail_count = List.length users - success_count in
618
619 if fail_count = 0 then
620 Log.info (fun m -> m "All users synced successfully");
621
622 Ok (success_count, fail_count)
623 end
624
625let get_user_posts state ~username ?limit () =
626 let entries = Storage.load_existing_posts state username in
627 match limit with
628 | None -> entries
629 | Some n -> List.filteri (fun i _ -> i < n) entries
630
631let get_all_posts state ?limit () =
632 let users = Storage.list_users state in
633
634 (* Collect all entries from all users with username tag *)
635 let all_entries =
636 List.concat_map (fun username ->
637 let entries = Storage.load_existing_posts state username in
638 List.map (fun entry -> (username, entry)) entries
639 ) users
640 in
641
642 (* Sort by date (newest first) *)
643 let sorted = List.sort (fun (_, a : string * Syndic.Atom.entry) (_, b) ->
644 Ptime.compare b.updated a.updated
645 ) all_entries in
646
647 match limit with
648 | None -> sorted
649 | Some n -> List.filteri (fun i _ -> i < n) sorted
650
651let export_merged_feed state ~title ~format ?limit () =
652 let all_posts = get_all_posts state ?limit () in
653
654 (* Rewrite author metadata from Sortal user info and replace tags with River categories *)
655 let rewrite_entry_author_and_categories username (entry : Syndic.Atom.entry) =
656 let entry = match Storage.get_user state username with
657 | None -> entry
658 | Some user ->
659 (* Get user's full name and email from Sortal *)
660 let fullname = User.fullname user in
661 let email = User.email user in
662 let username = User.username user in
663
664 (* Create new author with Sortal information *)
665 let new_author =
666 match email with
667 | Some email_addr ->
668 Syndic.Atom.author ~email:email_addr ~uri:(Uri.of_string ("https://" ^ username)) fullname
669 | None ->
670 Syndic.Atom.author ~uri:(Uri.of_string ("https://" ^ username)) fullname
671 in
672
673 (* Update entry with new author, keeping existing contributors *)
674 let _, other_authors = entry.authors in
675 { entry with authors = (new_author, other_authors) }
676 in
677
678 (* Replace original blog tags with River categories *)
679 let post_id = Uri.to_string entry.id in
680 let river_category_ids = get_post_categories state ~post_id in
681 (* Deduplicate category IDs and create Atom categories *)
682 let unique_category_ids = List.sort_uniq String.compare river_category_ids in
683 let river_categories = List.filter_map (fun cat_id ->
684 match get_category state ~id:cat_id with
685 | Some cat -> Some (Syndic.Atom.category ~label:(Category.name cat) cat_id)
686 | None -> None
687 ) unique_category_ids in
688
689 { entry with categories = river_categories }
690 in
691
692 let entries = List.map (fun (username, entry) ->
693 rewrite_entry_author_and_categories username entry
694 ) all_posts in
695
696 match format with
697 | `Atom ->
698 let xml = Format.Atom.to_string (Format.Atom.feed_of_entries ~title entries) in
699 Ok xml
700 | `Jsonfeed ->
701 if entries = [] then
702 (* Empty JSONFeed *)
703 let feed = Jsonfeed.create ~title ~items:[] () in
704 match Jsonfeed.to_string ~minify:false feed with
705 | Ok json -> Ok json
706 | Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err))
707 else
708 Export.export_jsonfeed ~title entries
709
710let export_html_site state ~output_dir ~title ?(posts_per_page = 25) () =
711 try
712 Log.info (fun m -> m "=== Starting HTML site generation ===");
713 Log.info (fun m -> m "Output directory: %s" (Eio.Path.native_exn output_dir));
714 Log.info (fun m -> m "Site title: %s" title);
715 Log.info (fun m -> m "Posts per page: %d" posts_per_page);
716
717 (* Sanitize a string for use in filenames - replace unsafe characters *)
718 let sanitize_filename s =
719 let buf = Buffer.create (String.length s) in
720 String.iter (fun c ->
721 match c with
722 | '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' -> Buffer.add_char buf '-'
723 | ' ' -> Buffer.add_char buf '-'
724 | c -> Buffer.add_char buf c
725 ) s;
726 Buffer.contents buf
727 in
728
729 (* Create directory structure *)
730 Log.info (fun m -> m "Creating directory structure");
731 let mkdir_if_not_exists dir =
732 try Eio.Path.mkdir ~perm:0o755 dir
733 with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()
734 in
735 mkdir_if_not_exists output_dir;
736 mkdir_if_not_exists Eio.Path.(output_dir / "authors");
737 mkdir_if_not_exists Eio.Path.(output_dir / "categories");
738 mkdir_if_not_exists Eio.Path.(output_dir / "thumbnails");
739 Log.info (fun m -> m "Directory structure created");
740
741 (* Helper to get and copy author thumbnail *)
742 let get_author_thumbnail username =
743 Log.debug (fun m -> m "Looking up thumbnail for username: %s" username);
744 match Sortal.lookup state.sortal username with
745 | Some contact ->
746 Log.debug (fun m -> m " Found Sortal contact for %s: %s" username (Sortal.Contact.name contact));
747 (match Sortal.thumbnail_path state.sortal contact with
748 | Some src_path ->
749 Log.info (fun m -> m " Copying thumbnail for %s from: %s" username (Eio.Path.native_exn src_path));
750 (* Copy thumbnail to output directory *)
751 let filename = Filename.basename (Eio.Path.native_exn src_path) in
752 let dest_path = Eio.Path.(output_dir / "thumbnails" / filename) in
753 (try
754 Log.debug (fun m -> m " Source path: %s" (Eio.Path.native_exn src_path));
755 Log.debug (fun m -> m " Destination path: %s" (Eio.Path.native_exn dest_path));
756 let content = Eio.Path.load src_path in
757 Eio.Path.save ~create:(`Or_truncate 0o644) dest_path content;
758 Log.info (fun m -> m " Successfully copied thumbnail to: thumbnails/%s" filename);
759 Some ("thumbnails/" ^ filename)
760 with e ->
761 Log.warn (fun m -> m " Failed to copy thumbnail for %s: %s" username (Printexc.to_string e));
762 None)
763 | None ->
764 Log.debug (fun m -> m " No thumbnail set for %s" username);
765 None)
766 | None ->
767 Log.warn (fun m -> m " No Sortal contact found for username: %s" username);
768 None
769 in
770
771 (* Helper to convert Atom entry to a simple record for HTML generation *)
772 let entry_to_html_data username (entry : Syndic.Atom.entry) =
773 let title = Text_extract.string_of_text_construct entry.title in
774 let link = List.find_opt (fun (l : Syndic.Atom.link) ->
775 l.rel = Syndic.Atom.Alternate
776 ) entry.links in
777 let link_uri = match link with
778 | Some l -> Some l.href
779 | None -> if List.length entry.links > 0 then Some (List.hd entry.links).href else None
780 in
781 let content_html = match entry.content with
782 | Some (Syndic.Atom.Text s) -> s
783 | Some (Syndic.Atom.Html (_, s)) -> s
784 | Some (Syndic.Atom.Xhtml (_, nodes)) ->
785 String.concat "" (List.map Syndic.XML.to_string nodes)
786 | Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> ""
787 in
788 (* Get author name from Sortal, fallback to entry author *)
789 let author_name = match Sortal.lookup state.sortal username with
790 | Some contact -> Sortal.Contact.name contact
791 | None ->
792 let author, _ = entry.authors in
793 author.name
794 in
795 (* Don't use original blog tags - River categories will be fetched separately *)
796 let post_id = Uri.to_string entry.id in
797 (username, title, author_name, entry.updated, link_uri, content_html, [], post_id)
798 in
799
800 (* Get all posts *)
801 Log.info (fun m -> m "Retrieving all posts from state");
802 let all_posts = get_all_posts state () in
803 let html_data = List.map (fun (username, entry) ->
804 entry_to_html_data username entry
805 ) all_posts in
806
807 let unique_users = List.sort_uniq String.compare (List.map (fun (u, _, _, _, _, _, _, _) -> u) html_data) in
808 Log.info (fun m -> m "Retrieved %d posts from %d users" (List.length html_data) (List.length unique_users));
809 Log.info (fun m -> m "Users: %s" (String.concat ", " unique_users));
810
811 (* Generate main index pages with pagination *)
812 let total_posts = List.length html_data in
813 let total_pages = (total_posts + posts_per_page - 1) / posts_per_page in
814 Log.info (fun m -> m "Generating main index: %d posts across %d pages" total_posts total_pages);
815
816 for page = 1 to total_pages do
817 Log.info (fun m -> m " Generating index page %d/%d" page total_pages);
818 let start_idx = (page - 1) * posts_per_page in
819 let page_posts = List.filteri (fun i _ ->
820 i >= start_idx && i < start_idx + posts_per_page
821 ) html_data in
822
823 let post_htmls = List.map (fun (username, title, _feed_author, date, link, content, _tags, post_id) ->
824 Log.debug (fun m -> m " Processing post: %s by @%s" title username);
825
826 (* Get author name from Sortal, fallback to username *)
827 let author_name = match Sortal.lookup state.sortal username with
828 | Some contact -> Sortal.Contact.name contact
829 | None -> username
830 in
831
832 let post_html =
833 let date_str = Format.Html.format_date date in
834 let link_html = match link with
835 | Some uri ->
836 Printf.sprintf {|<a href="%s">%s</a>|}
837 (Format.Html.html_escape (Uri.to_string uri))
838 (Format.Html.html_escape title)
839 | None -> Format.Html.html_escape title
840 in
841 let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
842 let full_content = Format.Html.full_content_from_html content in
843
844 (* Get River categories for this post *)
845 let river_category_ids = get_post_categories state ~post_id in
846 let river_categories = List.filter_map (fun cat_id ->
847 match get_category state ~id:cat_id with
848 | Some cat -> Some (Category.id cat, Category.name cat)
849 | None -> None
850 ) river_category_ids in
851
852 (* Display only River categories *)
853 let tags_html =
854 match river_categories with
855 | [] -> ""
856 | _ ->
857 let category_links = List.map (fun (cat_id, cat_name) ->
858 Printf.sprintf {|<a href="categories/%s.html">%s</a>|}
859 (Format.Html.html_escape (sanitize_filename cat_id)) (Format.Html.html_escape cat_name)
860 ) river_categories in
861 Printf.sprintf {|<div class="post-tags">%s</div>|}
862 (String.concat "" category_links)
863 in
864 let tags_and_actions =
865 if tags_html = "" then
866 {|<a href="#" class="read-more">Read more</a>|}
867 else
868 Printf.sprintf {|<div class="post-tags-and-actions"><a href="#" class="read-more">Read more</a>%s</div>|}
869 tags_html
870 in
871 let thumbnail_html = match get_author_thumbnail username with
872 | Some thumb_path ->
873 Printf.sprintf {|<a href="authors/%s.html"><img src="%s" alt="%s" class="author-thumbnail"></a>|}
874 (Format.Html.html_escape (sanitize_filename username))
875 (Format.Html.html_escape thumb_path)
876 (Format.Html.html_escape author_name)
877 | None ->
878 Printf.sprintf {|<a href="authors/%s.html"><div class="author-thumbnail" style="background: linear-gradient(135deg, #667eea 0%%, #764ba2 100%%); color: white; display: flex; align-items: center; justify-content: center; font-size: 20px; font-weight: 700;">%s</div></a>|}
879 (Format.Html.html_escape (sanitize_filename username))
880 (String.uppercase_ascii (String.sub author_name 0 1))
881 in
882 Printf.sprintf {|<article class="post">
883 %s
884 <h2 class="post-title">%s</h2>
885 <div class="post-meta-line">By <a href="authors/%s.html">%s</a> · %s</div>
886 <div class="post-excerpt">
887%s
888 </div>
889 <div class="post-full-content">
890%s
891 </div>
892%s
893</article>|}
894 thumbnail_html
895 link_html
896 (Format.Html.html_escape (sanitize_filename username))
897 (Format.Html.html_escape author_name)
898 date_str
899 excerpt
900 full_content
901 tags_and_actions
902 in
903 post_html
904 ) page_posts in
905
906 let page_html = Format.Html.render_posts_page
907 ~title
908 ~posts:post_htmls
909 ~current_page:page
910 ~total_pages
911 ~base_path:""
912 ~nav_current:"posts"
913 in
914
915 let filename = if page = 1 then "index.html"
916 else Printf.sprintf "page-%d.html" page in
917 Eio.Path.save ~create:(`Or_truncate 0o644)
918 Eio.Path.(output_dir / filename)
919 page_html
920 done;
921
922 (* Generate author index *)
923 Log.info (fun m -> m "Generating author index and pages");
924 let authors_map = Hashtbl.create 32 in
925 List.iter (fun (username, _, author, _, _, _, _, _) ->
926 let count = match Hashtbl.find_opt authors_map username with
927 | Some (_, c) -> c + 1
928 | None -> 1
929 in
930 Hashtbl.replace authors_map username (author, count)
931 ) html_data;
932
933 let authors_list = Hashtbl.fold (fun username (author, count) acc ->
934 (username, author, count) :: acc
935 ) authors_map [] |> List.sort (fun (_, a1, _) (_, a2, _) -> String.compare a1 a2) in
936
937 Log.info (fun m -> m "Found %d authors" (List.length authors_list));
938
939 let authors_index_content =
940 (* SVG icon definitions *)
941 let icon_github = {|<svg viewBox="0 0 16 16" fill="currentColor"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/></svg>|} in
942 let icon_email = {|<svg viewBox="0 0 16 16" fill="currentColor"><path d="M0 4a2 2 0 012-2h12a2 2 0 012 2v8a2 2 0 01-2 2H2a2 2 0 01-2-2V4zm2-1a1 1 0 00-1 1v.217l7 4.2 7-4.2V4a1 1 0 00-1-1H2zm13 2.383l-4.758 2.855L15 11.114v-5.73zm-.034 6.878L9.271 8.82 8 9.583 6.728 8.82l-5.694 3.44A1 1 0 002 13h12a1 1 0 00.966-.739zM1 11.114l4.758-2.876L1 5.383v5.73z"/></svg>|} in
943 let icon_link = {|<svg viewBox="0 0 16 16" fill="currentColor"><path d="M4.715 6.542L3.343 7.914a3 3 0 104.243 4.243l1.828-1.829A3 3 0 008.586 5.5L8 6.086a1.001 1.001 0 00-.154.199 2 2 0 01.861 3.337L6.88 11.45a2 2 0 11-2.83-2.83l.793-.792a4.018 4.018 0 01-.128-1.287z"/><path d="M6.586 4.672A3 3 0 007.414 9.5l.775-.776a2 2 0 01-.896-3.346L9.12 3.55a2 2 0 112.83 2.83l-.793.792c.112.42.155.855.128 1.287l1.372-1.372a3 3 0 10-4.243-4.243L6.586 4.672z"/></svg>|} in
944 let icon_rss = {|<svg viewBox="0 0 16 16" fill="currentColor"><path d="M2 0a2 2 0 00-2 2v12a2 2 0 002 2h12a2 2 0 002-2V2a2 2 0 00-2-2H2zm1.5 2.5c5.523 0 10 4.477 10 10a1 1 0 11-2 0 8 8 0 00-8-8 1 1 0 010-2zm0 4a6 6 0 016 6 1 1 0 11-2 0 4 4 0 00-4-4 1 1 0 010-2zm.5 7a1.5 1.5 0 110-3 1.5 1.5 0 010 3z"/></svg>|} in
945
946 let items = List.map (fun (username, _author, count) ->
947 (* Get Sortal contact data *)
948 let contact_opt = Sortal.lookup state.sortal username in
949
950 (* Get the proper display name from Sortal, fallback to username *)
951 let display_name = match contact_opt with
952 | Some contact -> Sortal.Contact.name contact
953 | None -> username
954 in
955
956 let thumbnail_html = match contact_opt with
957 | Some _contact ->
958 (match get_author_thumbnail username with
959 | Some thumb_path ->
960 Printf.sprintf {|<img src="../%s" alt="%s" class="author-item-thumbnail">|}
961 (Format.Html.html_escape thumb_path)
962 (Format.Html.html_escape display_name)
963 | None ->
964 Printf.sprintf {|<div class="author-item-thumbnail" style="background: linear-gradient(135deg, #667eea 0%%, #764ba2 100%%); color: white; display: flex; align-items: center; justify-content: center; font-size: 16px; font-weight: 700;">%s</div>|}
965 (String.uppercase_ascii (String.sub display_name 0 1)))
966 | None ->
967 Printf.sprintf {|<div class="author-item-thumbnail" style="background: linear-gradient(135deg, #667eea 0%%, #764ba2 100%%); color: white; display: flex; align-items: center; justify-content: center; font-size: 16px; font-weight: 700;">%s</div>|}
968 (String.uppercase_ascii (String.sub display_name 0 1))
969 in
970
971 let links_html = match contact_opt with
972 | Some contact ->
973 let links = [] in
974 let links = match Sortal.Contact.github contact with
975 | Some gh -> (Printf.sprintf {|<a href="https://github.com/%s" class="author-item-link" target="_blank" title="GitHub">%s</a>|} gh icon_github) :: links
976 | None -> links
977 in
978 let links = match Sortal.Contact.url contact with
979 | Some url -> (Printf.sprintf {|<a href="%s" class="author-item-link" target="_blank" title="Website">%s</a>|} url icon_link) :: links
980 | None -> links
981 in
982 let links = match Sortal.Contact.email contact with
983 | Some email -> (Printf.sprintf {|<a href="mailto:%s" class="author-item-link" title="Email">%s</a>|} email icon_email) :: links
984 | None -> links
985 in
986 if links = [] then "" else
987 Printf.sprintf {|<div class="author-item-links">%s</div>|} (String.concat "" (List.rev links))
988 | None -> ""
989 in
990
991 let feed_count = match contact_opt with
992 | Some contact ->
993 (match Sortal.Contact.feeds contact with
994 | Some feeds -> List.length feeds
995 | None -> 0)
996 | None -> 0
997 in
998
999 Printf.sprintf {|<div class="author-item">
1000 %s
1001 <div class="author-item-main">
1002 <div class="author-item-name"><a href="%s.html">%s</a></div>
1003 <div class="author-item-meta">
1004 <span class="author-item-username">@%s</span>
1005 <span class="author-item-stat">%d post%s</span>
1006 %s
1007 %s
1008 </div>
1009 </div>
1010</div>|}
1011 thumbnail_html
1012 (Format.Html.html_escape (sanitize_filename username))
1013 (Format.Html.html_escape display_name)
1014 (Format.Html.html_escape username)
1015 count
1016 (if count = 1 then "" else "s")
1017 (if feed_count > 0 then Printf.sprintf {|<span class="author-item-stat">%s %d feed%s</span>|} icon_rss feed_count (if feed_count = 1 then "" else "s") else "")
1018 links_html
1019 ) authors_list in
1020 Printf.sprintf "<div class=\"author-list\">\n%s\n</div>"
1021 (String.concat "\n" items)
1022 in
1023
1024 let authors_index_html = Format.Html.page_template
1025 ~title:(title ^ " - Authors")
1026 ~nav_current:"authors"
1027 authors_index_content
1028 in
1029 Eio.Path.save ~create:(`Or_truncate 0o644)
1030 Eio.Path.(output_dir / "authors" / "index.html")
1031 authors_index_html;
1032
1033 (* Generate individual author pages *)
1034 Hashtbl.iter (fun username (author, _) ->
1035 let author_posts = List.filter (fun (u, _, _, _, _, _, _, _) -> u = username) html_data in
1036 let author_total = List.length author_posts in
1037 let author_pages = (author_total + posts_per_page - 1) / posts_per_page in
1038 Log.info (fun m -> m " Author: %s (@%s) - %d posts, %d pages" author username author_total author_pages);
1039
1040 (* Generate author header with Sortal data *)
1041 let author_header =
1042 let contact_opt = Sortal.lookup state.sortal username in
1043
1044 (* Get proper display name from Sortal *)
1045 let display_name = match contact_opt with
1046 | Some contact -> Sortal.Contact.name contact
1047 | None -> author
1048 in
1049
1050 (* SVG icons for author header *)
1051 let icon_github = {|<svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/></svg>|} in
1052 let icon_email = {|<svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor"><path d="M0 4a2 2 0 012-2h12a2 2 0 012 2v8a2 2 0 01-2 2H2a2 2 0 01-2-2V4zm2-1a1 1 0 00-1 1v.217l7 4.2 7-4.2V4a1 1 0 00-1-1H2zm13 2.383l-4.758 2.855L15 11.114v-5.73zm-.034 6.878L9.271 8.82 8 9.583 6.728 8.82l-5.694 3.44A1 1 0 002 13h12a1 1 0 00.966-.739zM1 11.114l4.758-2.876L1 5.383v5.73z"/></svg>|} in
1053 let icon_link = {|<svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor"><path d="M4.715 6.542L3.343 7.914a3 3 0 104.243 4.243l1.828-1.829A3 3 0 008.586 5.5L8 6.086a1.001 1.001 0 00-.154.199 2 2 0 01.861 3.337L6.88 11.45a2 2 0 11-2.83-2.83l.793-.792a4.018 4.018 0 01-.128-1.287z"/><path d="M6.586 4.672A3 3 0 007.414 9.5l.775-.776a2 2 0 01-.896-3.346L9.12 3.55a2 2 0 112.83 2.83l-.793.792c.112.42.155.855.128 1.287l1.372-1.372a3 3 0 10-4.243-4.243L6.586 4.672z"/></svg>|} in
1054
1055 match contact_opt with
1056 | Some contact ->
1057 let thumbnail_html = match get_author_thumbnail username with
1058 | Some thumb_path ->
1059 Printf.sprintf {|<img src="../%s" alt="%s" class="author-header-thumbnail">|}
1060 (Format.Html.html_escape thumb_path)
1061 (Format.Html.html_escape display_name)
1062 | None ->
1063 Printf.sprintf {|<div class="author-header-thumbnail" style="background: linear-gradient(135deg, #667eea 0%%, #764ba2 100%%); color: white; display: flex; align-items: center; justify-content: center; font-size: 36px; font-weight: 700;">%s</div>|}
1064 (String.uppercase_ascii (String.sub display_name 0 1))
1065 in
1066
1067 let links = [] in
1068 let links = match Sortal.Contact.github contact with
1069 | Some gh -> (Printf.sprintf {|<a href="https://github.com/%s" class="author-header-link" target="_blank">%s GitHub</a>|} gh icon_github) :: links
1070 | None -> links
1071 in
1072 let links = match Sortal.Contact.twitter contact with
1073 | Some tw -> (Printf.sprintf {|<a href="https://twitter.com/%s" class="author-header-link" target="_blank">%s Twitter</a>|} tw icon_link) :: links
1074 | None -> links
1075 in
1076 let links = match Sortal.Contact.mastodon contact with
1077 | Some m -> (Printf.sprintf {|<a href="%s" class="author-header-link" target="_blank">%s Mastodon</a>|} m icon_link) :: links
1078 | None -> links
1079 in
1080 let links = match Sortal.Contact.url contact with
1081 | Some url -> (Printf.sprintf {|<a href="%s" class="author-header-link" target="_blank">%s Website</a>|} url icon_link) :: links
1082 | None -> links
1083 in
1084 let links = match Sortal.Contact.email contact with
1085 | Some email -> (Printf.sprintf {|<a href="mailto:%s" class="author-header-link">%s Email</a>|} email icon_email) :: links
1086 | None -> links
1087 in
1088
1089 let links_html = if links = [] then "" else
1090 Printf.sprintf {|<div class="author-header-links">%s</div>|} (String.concat "" (List.rev links))
1091 in
1092
1093 let feed_count = match Sortal.Contact.feeds contact with
1094 | Some feeds -> List.length feeds
1095 | None -> 0
1096 in
1097
1098 Printf.sprintf {|<div class="author-header">
1099 <div class="author-header-main">
1100 %s
1101 <div class="author-header-info">
1102 <div class="author-header-name">%s</div>
1103 <div class="author-header-username">@%s</div>
1104 %s
1105 </div>
1106 </div>
1107 <div class="author-header-stats">
1108 <div class="author-header-stat">
1109 <div class="author-header-stat-value">%d</div>
1110 <div class="author-header-stat-label">Posts</div>
1111 </div>
1112 <div class="author-header-stat">
1113 <div class="author-header-stat-value">%d</div>
1114 <div class="author-header-stat-label">Feeds</div>
1115 </div>
1116 </div>
1117</div>|}
1118 thumbnail_html
1119 (Format.Html.html_escape display_name)
1120 (Format.Html.html_escape username)
1121 links_html
1122 author_total
1123 feed_count
1124 | None ->
1125 Printf.sprintf {|<div class="author-header">
1126 <div class="author-header-main">
1127 <div class="author-header-info">
1128 <div class="author-header-name">%s</div>
1129 <div class="author-header-username">@%s</div>
1130 </div>
1131 </div>
1132 <div class="author-header-stats">
1133 <div class="author-header-stat">
1134 <div class="author-header-stat-value">%d</div>
1135 <div class="author-header-stat-label">Posts</div>
1136 </div>
1137 </div>
1138</div>|}
1139 (Format.Html.html_escape display_name)
1140 (Format.Html.html_escape username)
1141 author_total
1142 in
1143
1144 for page = 1 to author_pages do
1145 let start_idx = (page - 1) * posts_per_page in
1146 let page_posts = List.filteri (fun i _ ->
1147 i >= start_idx && i < start_idx + posts_per_page
1148 ) author_posts in
1149
1150 let post_htmls = List.map (fun (_username, title, author, date, link, content, _tags, post_id) ->
1151 let date_str = Format.Html.format_date date in
1152 let link_html = match link with
1153 | Some uri ->
1154 Printf.sprintf {|<a href="%s">%s</a>|}
1155 (Format.Html.html_escape (Uri.to_string uri))
1156 (Format.Html.html_escape title)
1157 | None -> Format.Html.html_escape title
1158 in
1159 let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
1160 let full_content = Format.Html.full_content_from_html content in
1161
1162 (* Get River categories for this post *)
1163 let river_category_ids = get_post_categories state ~post_id in
1164 let river_categories = List.filter_map (fun cat_id ->
1165 match get_category state ~id:cat_id with
1166 | Some cat -> Some (Category.id cat, Category.name cat)
1167 | None -> None
1168 ) river_category_ids in
1169
1170 (* Display only River categories *)
1171 let tags_html =
1172 match river_categories with
1173 | [] -> ""
1174 | _ ->
1175 let category_links = List.map (fun (cat_id, cat_name) ->
1176 Printf.sprintf {|<a href="../categories/%s.html">%s</a>|}
1177 (Format.Html.html_escape (sanitize_filename cat_id)) (Format.Html.html_escape cat_name)
1178 ) river_categories in
1179 Printf.sprintf {|<div class="post-tags">%s</div>|}
1180 (String.concat "" category_links)
1181 in
1182 let tags_and_actions =
1183 if tags_html = "" then
1184 {|<a href="#" class="read-more">Read more</a>|}
1185 else
1186 Printf.sprintf {|<div class="post-tags-and-actions"><a href="#" class="read-more">Read more</a>%s</div>|}
1187 tags_html
1188 in
1189 Printf.sprintf {|<article class="post">
1190 <h2 class="post-title">%s</h2>
1191 <div class="post-meta">
1192 By %s on %s
1193 </div>
1194 <div class="post-excerpt">
1195%s
1196 </div>
1197 <div class="post-full-content">
1198%s
1199 </div>
1200%s
1201</article>|}
1202 link_html
1203 (Format.Html.html_escape author)
1204 date_str
1205 excerpt
1206 full_content
1207 tags_and_actions
1208 ) page_posts in
1209
1210 let posts_with_header = author_header ^ "\n" ^ String.concat "\n" post_htmls in
1211 let page_html = Format.Html.render_posts_page
1212 ~title:(author ^ " - " ^ title)
1213 ~posts:[posts_with_header]
1214 ~current_page:page
1215 ~total_pages:author_pages
1216 ~base_path:(sanitize_filename username ^ "-")
1217 ~nav_current:"authors"
1218 in
1219
1220 let safe_username = sanitize_filename username in
1221 let filename = if page = 1 then safe_username ^ ".html"
1222 else Printf.sprintf "%s-%d.html" safe_username page in
1223 Eio.Path.save ~create:(`Or_truncate 0o644)
1224 Eio.Path.(output_dir / "authors" / filename)
1225 page_html
1226 done
1227 ) authors_map;
1228
1229 (* Generate category index and pages *)
1230 Log.info (fun m -> m "Generating category index and pages");
1231 let categories_map = Hashtbl.create 32 in
1232 List.iter (fun (_, _, _, _, _, _, tags, post_id) ->
1233 (* Count feed tags *)
1234 List.iter (fun tag ->
1235 let count = match Hashtbl.find_opt categories_map tag with
1236 | Some c -> c + 1
1237 | None -> 1
1238 in
1239 Hashtbl.replace categories_map tag count
1240 ) tags;
1241 (* Count custom categories *)
1242 let custom_cat_ids = get_post_categories state ~post_id in
1243 List.iter (fun cat_id ->
1244 let count = match Hashtbl.find_opt categories_map cat_id with
1245 | Some c -> c + 1
1246 | None -> 1
1247 in
1248 Hashtbl.replace categories_map cat_id count
1249 ) custom_cat_ids
1250 ) html_data;
1251
1252 let categories_list = Hashtbl.fold (fun tag count acc ->
1253 (tag, count) :: acc
1254 ) categories_map [] |> List.sort (fun (t1, _) (t2, _) -> String.compare t1 t2) in
1255
1256 Log.info (fun m -> m "Found %d categories" (List.length categories_list));
1257
1258 let categories_index_content =
1259 let items = List.map (fun (tag, count) ->
1260 Printf.sprintf {|<li><a href="%s.html">%s</a><span class="count">%d post%s</span></li>|}
1261 (Format.Html.html_escape (sanitize_filename tag))
1262 (Format.Html.html_escape tag)
1263 count
1264 (if count = 1 then "" else "s")
1265 ) categories_list in
1266 Printf.sprintf "<ul class=\"category-list\">\n%s\n</ul>"
1267 (String.concat "\n" items)
1268 in
1269
1270 let categories_index_html = Format.Html.page_template
1271 ~title:(title ^ " - Categories")
1272 ~nav_current:"categories"
1273 categories_index_content
1274 in
1275 Eio.Path.save ~create:(`Or_truncate 0o644)
1276 Eio.Path.(output_dir / "categories" / "index.html")
1277 categories_index_html;
1278
1279 (* Generate individual category pages *)
1280 List.iter (fun (tag, count) ->
1281 let tag_posts = List.filter (fun (_, _, _, _, _, _, tags, post_id) ->
1282 (* Check if tag is in feed tags or custom categories *)
1283 let in_feed_tags = List.mem tag tags in
1284 let custom_cat_ids = get_post_categories state ~post_id in
1285 let in_custom_cats = List.exists (fun cat_id ->
1286 match get_category state ~id:cat_id with
1287 | Some cat -> Category.id cat = tag
1288 | None -> false
1289 ) custom_cat_ids in
1290 in_feed_tags || in_custom_cats
1291 ) html_data in
1292
1293 let tag_total = List.length tag_posts in
1294 let tag_pages = (tag_total + posts_per_page - 1) / posts_per_page in
1295 Log.info (fun m -> m " Category: %s - %d posts, %d pages" tag count tag_pages);
1296
1297 for page = 1 to tag_pages do
1298 let start_idx = (page - 1) * posts_per_page in
1299 let page_posts = List.filteri (fun i _ ->
1300 i >= start_idx && i < start_idx + posts_per_page
1301 ) tag_posts in
1302
1303 let post_htmls = List.map (fun (username, title, author, date, link, content, _tags, post_id) ->
1304 let date_str = Format.Html.format_date date in
1305 let link_html = match link with
1306 | Some uri ->
1307 Printf.sprintf {|<a href="%s">%s</a>|}
1308 (Format.Html.html_escape (Uri.to_string uri))
1309 (Format.Html.html_escape title)
1310 | None -> Format.Html.html_escape title
1311 in
1312 let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
1313 let full_content = Format.Html.full_content_from_html content in
1314
1315 (* Get River categories for this post *)
1316 let river_category_ids = get_post_categories state ~post_id in
1317 let river_categories = List.filter_map (fun cat_id ->
1318 match get_category state ~id:cat_id with
1319 | Some cat -> Some (Category.id cat, Category.name cat)
1320 | None -> None
1321 ) river_category_ids in
1322
1323 (* Display only River categories *)
1324 let tags_html =
1325 match river_categories with
1326 | [] -> ""
1327 | _ ->
1328 let category_links = List.map (fun (cat_id, cat_name) ->
1329 Printf.sprintf {|<a href="%s.html">%s</a>|}
1330 (Format.Html.html_escape (sanitize_filename cat_id)) (Format.Html.html_escape cat_name)
1331 ) river_categories in
1332 Printf.sprintf {|<div class="post-tags">%s</div>|}
1333 (String.concat "" category_links)
1334 in
1335 let tags_and_actions =
1336 if tags_html = "" then
1337 {|<a href="#" class="read-more">Read more</a>|}
1338 else
1339 Printf.sprintf {|<div class="post-tags-and-actions"><a href="#" class="read-more">Read more</a>%s</div>|}
1340 tags_html
1341 in
1342 (* Get thumbnail *)
1343 let thumbnail_html = match get_author_thumbnail username with
1344 | Some thumb_path ->
1345 Printf.sprintf {|<a href="../authors/%s.html"><img src="../%s" alt="%s" class="author-thumbnail"></a>|}
1346 (Format.Html.html_escape (sanitize_filename username))
1347 (Format.Html.html_escape thumb_path)
1348 (Format.Html.html_escape author)
1349 | None ->
1350 Printf.sprintf {|<a href="../authors/%s.html"><div class="author-thumbnail" style="background: linear-gradient(135deg, #667eea 0%%, #764ba2 100%%); color: white; display: flex; align-items: center; justify-content: center; font-size: 20px; font-weight: 700;">%s</div></a>|}
1351 (Format.Html.html_escape (sanitize_filename username))
1352 (String.uppercase_ascii (String.sub author 0 (min 1 (String.length author))))
1353 in
1354 Printf.sprintf {|<article class="post">
1355 %s
1356 <h2 class="post-title">%s</h2>
1357 <div class="post-meta-line">By <a href="../authors/%s.html">%s</a> · %s</div>
1358 <div class="post-excerpt">
1359%s
1360 </div>
1361 <div class="post-full-content">
1362%s
1363 </div>
1364%s
1365</article>|}
1366 thumbnail_html
1367 link_html
1368 (Format.Html.html_escape (sanitize_filename username))
1369 (Format.Html.html_escape author)
1370 date_str
1371 excerpt
1372 full_content
1373 tags_and_actions
1374 ) page_posts in
1375
1376 let page_html = Format.Html.render_posts_page
1377 ~title:(tag ^ " - " ^ title)
1378 ~posts:post_htmls
1379 ~current_page:page
1380 ~total_pages:tag_pages
1381 ~base_path:(sanitize_filename tag ^ "-")
1382 ~nav_current:"categories"
1383 in
1384
1385 let safe_tag = sanitize_filename tag in
1386 let filename = if page = 1 then safe_tag ^ ".html"
1387 else Printf.sprintf "%s-%d.html" safe_tag page in
1388 Eio.Path.save ~create:(`Or_truncate 0o644)
1389 Eio.Path.(output_dir / "categories" / filename)
1390 page_html
1391 done
1392 ) categories_list;
1393
1394 (* Generate links page *)
1395 Log.info (fun m -> m "Generating links page");
1396 let all_links = List.concat_map (fun (username, title, author, date, post_link, content, _, _) ->
1397 let links = Html_markdown.extract_links content in
1398 List.map (fun (href, link_text) ->
1399 (href, link_text, username, author, title, post_link, date)
1400 ) links
1401 ) html_data in
1402
1403 Log.info (fun m -> m " Extracted %d total links from all posts" (List.length all_links));
1404
1405 (* Group by URL and track most recent post date *)
1406 let links_map = Hashtbl.create 256 in
1407 List.iter (fun (href, link_text, username, author, post_title, post_link, date) ->
1408 let existing = Hashtbl.find_opt links_map href in
1409 let new_entry = (link_text, username, author, post_title, post_link, date) in
1410 match existing with
1411 | None -> Hashtbl.add links_map href [new_entry]
1412 | Some entries ->
1413 (* Add to list, will sort by date later *)
1414 Hashtbl.replace links_map href (new_entry :: entries)
1415 ) all_links;
1416
1417 (* Sort links by most recent post date *)
1418 let sorted_links = Hashtbl.fold (fun href entries acc ->
1419 (* Get the most recent entry for this URL *)
1420 let sorted_entries = List.sort (fun (_, _, _, _, _, d1) (_, _, _, _, _, d2) ->
1421 Ptime.compare d2 d1
1422 ) entries in
1423 let most_recent = List.hd sorted_entries in
1424 (href, most_recent, entries) :: acc
1425 ) links_map [] |> List.sort (fun (_, (_, _, _, _, _, d1), _) (_, (_, _, _, _, _, d2), _) ->
1426 Ptime.compare d2 d1
1427 ) in
1428
1429 Log.info (fun m -> m " Deduplicated to %d unique links" (List.length sorted_links));
1430
1431 let links_content =
1432 let items = List.map (fun (href, (_link_text, _username, _author, _post_title, _post_link, _date), all_entries) ->
1433 (* Parse URL to extract domain and path *)
1434 let uri = Uri.of_string href in
1435 let domain = match Uri.host uri with
1436 | Some h -> h
1437 | None -> "unknown"
1438 in
1439 let path = Uri.path uri in
1440 let fragment = Uri.fragment uri in
1441
1442 (* Shorten path if too long *)
1443 let shortened_path =
1444 let full_path = path ^ (match fragment with Some f -> "#" ^ f | None -> "") in
1445 if String.length full_path > 40 then
1446 let start = String.sub full_path 0 20 in
1447 let ending = String.sub full_path (String.length full_path - 17) 17 in
1448 start ^ "..." ^ ending
1449 else
1450 full_path
1451 in
1452
1453 let display_text =
1454 if shortened_path = "" || shortened_path = "/" then
1455 Printf.sprintf {|<span class="link-domain">%s</span>|}
1456 (Format.Html.html_escape domain)
1457 else
1458 Printf.sprintf {|<span class="link-domain">%s</span><span class="link-path">%s</span>|}
1459 (Format.Html.html_escape domain)
1460 (Format.Html.html_escape shortened_path)
1461 in
1462
1463 (* Group all backlinks *)
1464 let backlinks_html = List.map (fun (_, _username, author, post_title, post_link, date) ->
1465 let date_str = Format.Html.format_date date in
1466 let post_link_html = match post_link with
1467 | Some uri ->
1468 Printf.sprintf {|<a href="%s" title="%s by %s on %s">%s</a>|}
1469 (Format.Html.html_escape (Uri.to_string uri))
1470 (Format.Html.html_escape post_title)
1471 (Format.Html.html_escape author)
1472 date_str
1473 (Format.Html.html_escape post_title)
1474 | None -> Format.Html.html_escape post_title
1475 in
1476 Printf.sprintf {|<span class="link-backlink"><span class="link-backlink-icon">↩</span>%s</span>|}
1477 post_link_html
1478 ) all_entries |> String.concat "" in
1479
1480 Printf.sprintf {|<div class="link-item">
1481 <div class="link-url"><a href="%s">%s</a></div>
1482 <div class="link-backlinks">%s</div>
1483</div>|}
1484 (Format.Html.html_escape href)
1485 display_text
1486 backlinks_html
1487 ) sorted_links in
1488 String.concat "\n" items
1489 in
1490
1491 let links_html = Format.Html.page_template
1492 ~title:(title ^ " - Links")
1493 ~nav_current:"links"
1494 links_content
1495 in
1496 Eio.Path.save ~create:(`Or_truncate 0o644)
1497 Eio.Path.(output_dir / "links.html")
1498 links_html;
1499
1500 Log.info (fun m -> m "HTML site generated successfully in %s"
1501 (Eio.Path.native_exn output_dir));
1502 Ok ()
1503 with e ->
1504 Error (Printf.sprintf "Failed to generate HTML site: %s" (Printexc.to_string e))
1505
1506let analyze_user_quality state ~username =
1507 match Storage.get_user state username with
1508 | None ->
1509 Error (Printf.sprintf "User %s not found" username)
1510 | Some _ ->
1511 let entries = Storage.load_existing_posts state username in
1512 if entries = [] then
1513 Error "No entries to analyze"
1514 else
1515 Ok (Quality.analyze entries)