My agentic slop goes here. Not intended for anyone else!
at main 63 kB view raw
1(* 2 * Copyright (c) 2014, OCaml.org project 3 * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 *) 17 18(** State management for sync state and feeds. 19 20 User contact data is read from Sortal on-demand. River only persists 21 sync timestamps and feed data. *) 22 23let src = Logs.Src.create "river" ~doc:"River RSS/Atom aggregator" 24module Log = (val Logs.src_log src : Logs.LOG) 25 26type t = { 27 xdg : Xdge.t; 28 sortal : Sortal.t; 29} 30 31module Paths = struct 32 (** Get the feeds directory path *) 33 let feeds_dir state = Eio.Path.(Xdge.state_dir state.xdg / "feeds") 34 35 (** Get the user feeds directory path *) 36 let user_feeds_dir state = Eio.Path.(feeds_dir state / "user") 37 38 (** Get the sync state file path *) 39 let sync_state_file state = Eio.Path.(Xdge.state_dir state.xdg / "sync_state.json") 40 41 (** Get the path to a user's JSONFeed file *) 42 let user_feed_file state username = 43 Eio.Path.(user_feeds_dir state / (username ^ ".json")) 44 45 (** Get the path to a user's old Atom feed file (for migration) *) 46 let user_feed_file_legacy state username = 47 Eio.Path.(user_feeds_dir state / (username ^ ".xml")) 48 49 (** Ensure all necessary directories exist *) 50 let ensure_directories state = 51 let dirs = [ 52 feeds_dir state; 53 user_feeds_dir state; 54 ] in 55 List.iter (fun dir -> 56 try Eio.Path.mkdir ~perm:0o755 dir 57 with Eio.Io (Eio.Fs.E (Already_exists _), _) -> () 58 ) dirs 59end 60 61(** Sync state management - maps username to last_synced timestamp *) 62module Sync_state = struct 63 let jsont = 64 let pair_t = 65 let make username timestamp = (username, timestamp) in 66 Jsont.Object.map ~kind:"SyncEntry" make 67 |> Jsont.Object.mem "username" Jsont.string ~enc:fst 68 |> Jsont.Object.mem "timestamp" Jsont.string ~enc:snd 69 |> Jsont.Object.finish 70 in 71 Jsont.Object.map ~kind:"SyncState" (fun pairs -> pairs) 72 |> Jsont.Object.mem "synced_users" (Jsont.list pair_t) ~enc:(fun s -> s) 73 |> Jsont.Object.finish 74 75 let load state = 76 let file = Paths.sync_state_file state in 77 try 78 let content = Eio.Path.load file in 79 match Jsont_bytesrw.decode_string' jsont content with 80 | Ok pairs -> pairs 81 | Error err -> 82 Log.warn (fun m -> m "Failed to parse sync state: %s" (Jsont.Error.to_string err)); 83 [] 84 with 85 | Eio.Io (Eio.Fs.E (Not_found _), _) -> [] 86 | e -> 87 Log.err (fun m -> m "Error loading sync state: %s" (Printexc.to_string e)); 88 [] 89 90 let save state sync_state = 91 let file = Paths.sync_state_file state in 92 match Jsont_bytesrw.encode_string' ~format:Jsont.Indent jsont sync_state with 93 | Ok json -> Eio.Path.save ~create:(`Or_truncate 0o644) file json 94 | Error err -> failwith ("Failed to encode sync state: " ^ Jsont.Error.to_string err) 95 96 let get_timestamp state username = 97 load state |> List.assoc_opt username 98 99 let set_timestamp state username timestamp = 100 let sync_state = load state in 101 let updated = (username, timestamp) :: List.remove_assoc username sync_state in 102 save state updated 103end 104 105(** Category storage - manages custom categories *) 106module Category_storage = struct 107 let categories_file state = Eio.Path.(Xdge.state_dir state.xdg / "categories.json") 108 109 let jsont = Jsont.list Category.jsont 110 111 let load state = 112 let file = categories_file state in 113 try 114 let content = Eio.Path.load file in 115 match Jsont_bytesrw.decode_string' jsont content with 116 | Ok categories -> categories 117 | Error err -> 118 Log.warn (fun m -> m "Failed to parse categories: %s" (Jsont.Error.to_string err)); 119 [] 120 with 121 | Eio.Io (Eio.Fs.E (Not_found _), _) -> [] 122 | e -> 123 Log.err (fun m -> m "Error loading categories: %s" (Printexc.to_string e)); 124 [] 125 126 let save state categories = 127 let file = categories_file state in 128 match Jsont_bytesrw.encode_string' ~format:Jsont.Indent jsont categories with 129 | Ok json -> Eio.Path.save ~create:(`Or_truncate 0o644) file json 130 | Error err -> failwith ("Failed to encode categories: " ^ Jsont.Error.to_string err) 131 132 let get state id = 133 load state |> List.find_opt (fun cat -> Category.id cat = id) 134 135 let add state category = 136 let categories = load state in 137 let filtered = List.filter (fun cat -> Category.id cat <> Category.id category) categories in 138 save state (category :: filtered) 139 140 let remove state id = 141 let categories = load state in 142 save state (List.filter (fun cat -> Category.id cat <> id) categories) 143end 144 145(** Post-category mapping storage - maps post IDs to category IDs *) 146module Post_category_storage = struct 147 let post_categories_file state = Eio.Path.(Xdge.state_dir state.xdg / "post_categories.json") 148 149 (* Type: list of (post_id, category_ids) pairs *) 150 let jsont = 151 let pair_t = 152 let make post_id category_ids = (post_id, category_ids) in 153 Jsont.Object.map ~kind:"PostCategoryMapping" make 154 |> Jsont.Object.mem "post_id" Jsont.string ~enc:fst 155 |> Jsont.Object.mem "category_ids" (Jsont.list Jsont.string) ~enc:snd 156 |> Jsont.Object.finish 157 in 158 Jsont.list pair_t 159 160 let load state = 161 let file = post_categories_file state in 162 try 163 let content = Eio.Path.load file in 164 match Jsont_bytesrw.decode_string' jsont content with 165 | Ok mappings -> mappings 166 | Error err -> 167 Log.warn (fun m -> m "Failed to parse post categories: %s" (Jsont.Error.to_string err)); 168 [] 169 with 170 | Eio.Io (Eio.Fs.E (Not_found _), _) -> [] 171 | e -> 172 Log.err (fun m -> m "Error loading post categories: %s" (Printexc.to_string e)); 173 [] 174 175 let save state mappings = 176 let file = post_categories_file state in 177 match Jsont_bytesrw.encode_string' ~format:Jsont.Indent jsont mappings with 178 | Ok json -> Eio.Path.save ~create:(`Or_truncate 0o644) file json 179 | Error err -> failwith ("Failed to encode post categories: " ^ Jsont.Error.to_string err) 180 181 let get state post_id = 182 load state |> List.assoc_opt post_id |> Option.value ~default:[] 183 184 let set state post_id category_ids = 185 let mappings = load state in 186 let filtered = List.remove_assoc post_id mappings in 187 let updated = if category_ids = [] then filtered else (post_id, category_ids) :: filtered in 188 save state updated 189 190 let add state post_id category_id = 191 let current = get state post_id in 192 if List.mem category_id current then () 193 else set state post_id (category_id :: current) 194 195 let remove state post_id category_id = 196 let current = get state post_id in 197 set state post_id (List.filter ((<>) category_id) current) 198 199 let get_posts_by_category state category_id = 200 load state 201 |> List.filter (fun (_, category_ids) -> List.mem category_id category_ids) 202 |> List.map fst 203 204 let remove_category state category_id = 205 let mappings = load state in 206 let updated = List.filter_map (fun (post_id, category_ids) -> 207 let filtered = List.filter ((<>) category_id) category_ids in 208 if filtered = [] then None else Some (post_id, filtered) 209 ) mappings in 210 save state updated 211end 212 213(** {2 Category Management - Internal functions} *) 214 215let list_categories state = 216 Category_storage.load state 217 218let get_category state ~id = 219 Category_storage.get state id 220 221let add_category state category = 222 try 223 Category_storage.add state category; 224 Ok () 225 with e -> 226 Error (Printf.sprintf "Failed to add category: %s" (Printexc.to_string e)) 227 228let remove_category state ~id = 229 try 230 Category_storage.remove state id; 231 Post_category_storage.remove_category state id; 232 Ok () 233 with e -> 234 Error (Printf.sprintf "Failed to remove category: %s" (Printexc.to_string e)) 235 236let get_post_categories state ~post_id = 237 Post_category_storage.get state post_id 238 239let set_post_categories state ~post_id ~category_ids = 240 try 241 Post_category_storage.set state post_id category_ids; 242 Ok () 243 with e -> 244 Error (Printf.sprintf "Failed to set post categories: %s" (Printexc.to_string e)) 245 246let add_post_category state ~post_id ~category_id = 247 try 248 Post_category_storage.add state post_id category_id; 249 Ok () 250 with e -> 251 Error (Printf.sprintf "Failed to add post category: %s" (Printexc.to_string e)) 252 253let remove_post_category state ~post_id ~category_id = 254 try 255 Post_category_storage.remove state post_id category_id; 256 Ok () 257 with e -> 258 Error (Printf.sprintf "Failed to remove post category: %s" (Printexc.to_string e)) 259 260let get_posts_by_category state ~category_id = 261 Post_category_storage.get_posts_by_category state category_id 262 263module Storage = struct 264 (** List all usernames with feeds from Sortal *) 265 let list_users state = 266 try 267 Sortal.list state.sortal 268 |> List.filter (fun contact -> Sortal.Contact.feeds contact <> None) 269 |> List.map Sortal.Contact.handle 270 with e -> 271 Log.err (fun m -> m "Error listing Sortal users: %s" (Printexc.to_string e)); 272 [] 273 274 (** Get a user from Sortal with sync state *) 275 let get_user state username = 276 match Sortal.lookup state.sortal username with 277 | None -> None 278 | Some contact -> 279 (* Only return users with feeds *) 280 if Sortal.Contact.feeds contact = None then None 281 else 282 let last_synced = Sync_state.get_timestamp state username in 283 Some (User.of_contact contact ?last_synced ()) 284 285 (** Get all users from Sortal with sync state *) 286 let get_all_users state = 287 try 288 Sortal.list state.sortal 289 |> List.filter (fun contact -> Sortal.Contact.feeds contact <> None) 290 |> List.map (fun contact -> 291 let username = Sortal.Contact.handle contact in 292 let last_synced = Sync_state.get_timestamp state username in 293 User.of_contact contact ?last_synced ()) 294 with e -> 295 Log.err (fun m -> m "Error getting all users: %s" (Printexc.to_string e)); 296 [] 297 298 (** Migrate legacy Atom XML feed to JSONFeed format *) 299 let migrate_legacy_feed state username = 300 let legacy_file = Paths.user_feed_file_legacy state username in 301 try 302 let content = Eio.Path.load legacy_file in 303 Log.info (fun m -> m "Migrating legacy Atom feed for %s to JSONFeed" username); 304 (* Parse existing Atom feed *) 305 let input = Xmlm.make_input (`String (0, content)) in 306 let atom_feed = Syndic.Atom.parse input in 307 (* Convert to JSONFeed with extensions *) 308 let jsonfeed = River_jsonfeed.of_atom atom_feed in 309 (* Save as JSONFeed *) 310 let json_file = Paths.user_feed_file state username in 311 (match River_jsonfeed.to_string ~minify:false jsonfeed with 312 | Ok json -> 313 Eio.Path.save ~create:(`Or_truncate 0o644) json_file json; 314 Log.info (fun m -> m "Successfully migrated %s from Atom to JSONFeed" username); 315 (* Rename legacy file to .xml.backup *) 316 let backup_file = Eio.Path.(Paths.user_feeds_dir state / (username ^ ".xml.backup")) in 317 (try 318 Eio.Path.save ~create:(`Or_truncate 0o644) backup_file content; 319 Log.info (fun m -> m "Backed up legacy Atom file to %s.xml.backup" username) 320 with e -> 321 Log.warn (fun m -> m "Failed to backup legacy file: %s" (Printexc.to_string e))); 322 Some jsonfeed 323 | Error err -> 324 Log.err (fun m -> m "Failed to serialize JSONFeed during migration: %s" err); 325 None) 326 with 327 | Eio.Io (Eio.Fs.E (Not_found _), _) -> None 328 | e -> 329 Log.err (fun m -> m "Error migrating legacy feed for %s: %s" 330 username (Printexc.to_string e)); 331 None 332 333 (** Load existing JSONFeed for a user (with legacy migration support) *) 334 let load_existing_feed state username = 335 let file = Paths.user_feed_file state username in 336 try 337 let content = Eio.Path.load file in 338 (* Parse JSONFeed *) 339 match River_jsonfeed.of_string content with 340 | Ok jsonfeed -> Some jsonfeed 341 | Error err -> 342 Log.err (fun m -> m "Failed to parse JSONFeed for %s: %s" username err); 343 (* Try migration from legacy Atom *) 344 migrate_legacy_feed state username 345 with 346 | Eio.Io (Eio.Fs.E (Not_found _), _) -> 347 (* JSON file not found, try legacy migration *) 348 migrate_legacy_feed state username 349 | e -> 350 Log.err (fun m -> m "Error loading feed for %s: %s" 351 username (Printexc.to_string e)); 352 None 353 354 (** Load existing posts as Atom entries for a user (for backwards compatibility) *) 355 let load_existing_posts state username = 356 match load_existing_feed state username with 357 | None -> [] 358 | Some jsonfeed -> 359 (* Convert JSONFeed back to Atom for backwards compatibility *) 360 let atom_feed = River_jsonfeed.to_atom jsonfeed in 361 atom_feed.Syndic.Atom.entries 362 363 (** Save JSONFeed for a user *) 364 let save_jsonfeed state username jsonfeed = 365 let file = Paths.user_feed_file state username in 366 match River_jsonfeed.to_string ~minify:false jsonfeed with 367 | Ok json -> Eio.Path.save ~create:(`Or_truncate 0o644) file json 368 | Error err -> failwith ("Failed to serialize JSONFeed: " ^ err) 369 370 (** Save Atom entries for a user (converts to JSONFeed first) *) 371 let save_atom_feed state username entries = 372 (* Convert Atom entries to JSONFeed with extensions *) 373 let items_with_ext = List.map River_jsonfeed.item_of_atom entries in 374 let items = List.map (fun i -> i.River_jsonfeed.item) items_with_ext in 375 376 (* Create feed extension *) 377 let feed_ext = { 378 River_jsonfeed.feed_subtitle = None; 379 feed_id = "urn:river:user:" ^ username; 380 feed_categories = []; 381 feed_contributors = []; 382 feed_generator = Some { 383 River_jsonfeed.generator_name = "River Feed Aggregator"; 384 generator_uri = None; 385 generator_version = Some "1.0"; 386 }; 387 feed_rights = None; 388 feed_logo = None; 389 } in 390 391 let jsonfeed_inner = Jsonfeed.create ~title:username ~items () in 392 let jsonfeed = { River_jsonfeed.feed = jsonfeed_inner; extension = Some feed_ext } in 393 save_jsonfeed state username jsonfeed 394end 395 396module Sync = struct 397 (** Merge new entries with existing ones, updating matching IDs *) 398 let merge_entries ~existing ~new_entries = 399 (* Create a map of new entry IDs for efficient lookup and updates *) 400 let module UriMap = Map.Make(Uri) in 401 let new_entries_map = 402 List.fold_left (fun acc (entry : Syndic.Atom.entry) -> 403 UriMap.add entry.id entry acc 404 ) UriMap.empty new_entries 405 in 406 407 (* Update existing entries with new ones if IDs match, otherwise keep existing *) 408 let updated_existing = 409 List.filter_map (fun (entry : Syndic.Atom.entry) -> 410 if UriMap.mem entry.id new_entries_map then 411 None (* Will be replaced by new entry *) 412 else 413 Some entry (* Keep existing entry *) 414 ) existing 415 in 416 417 (* Combine new entries with non-replaced existing entries *) 418 let combined = new_entries @ updated_existing in 419 List.sort (fun (a : Syndic.Atom.entry) (b : Syndic.Atom.entry) -> 420 Ptime.compare b.updated a.updated 421 ) combined 422 423 (** Get current timestamp in ISO 8601 format *) 424 let current_timestamp () = 425 let open Unix in 426 let tm = gmtime (time ()) in 427 Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ" 428 (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday 429 tm.tm_hour tm.tm_min tm.tm_sec 430 431 (** Sync feeds for a single user *) 432 let sync_user session state ~username = 433 match Storage.get_user state username with 434 | None -> 435 Error (Printf.sprintf "User %s not found" username) 436 | Some user when User.feeds user = [] -> 437 Log.info (fun m -> m "No feeds configured for user %s" username); 438 Ok () 439 | Some user -> 440 Log.info (fun m -> m "Syncing feeds for user %s..." username); 441 442 (* Fetch all feeds concurrently *) 443 let fetched_feeds = 444 Eio.Fiber.List.filter_map (fun source -> 445 try 446 Log.info (fun m -> m " [%s] Fetching %s (%s)..." 447 username (Source.name source) (Source.url source)); 448 Some (Feed.fetch session source) 449 with e -> 450 Log.err (fun m -> m " [%s] Failed to fetch %s: %s" 451 username (Source.name source) (Printexc.to_string e)); 452 None 453 ) (User.feeds user) 454 in 455 456 if fetched_feeds = [] then begin 457 Error "No feeds successfully fetched" 458 end else begin 459 (* Get posts from fetched feeds *) 460 let posts = Post.of_feeds fetched_feeds in 461 Log.info (fun m -> m " Found %d new posts" (List.length posts)); 462 463 (* Convert to Atom entries *) 464 let new_entries = Format.Atom.entries_of_posts posts in 465 466 (* Load existing entries *) 467 let existing = Storage.load_existing_posts state username in 468 Log.info (fun m -> m " Found %d existing posts" (List.length existing)); 469 470 (* Merge entries *) 471 let merged = merge_entries ~existing ~new_entries in 472 Log.info (fun m -> m " Total posts after merge: %d" (List.length merged)); 473 474 (* Save updated feed *) 475 Storage.save_atom_feed state username merged; 476 477 (* Update last_synced timestamp *) 478 let now = current_timestamp () in 479 Sync_state.set_timestamp state username now; 480 481 Log.info (fun m -> m "Sync completed for user %s" username); 482 Ok () 483 end 484end 485 486module Export = struct 487 (** Convert Atom entry to JSONFeed item *) 488 let atom_entry_to_jsonfeed_item (entry : Syndic.Atom.entry) = 489 (* Extract ID *) 490 let id = Uri.to_string entry.id in 491 492 (* Extract title *) 493 let title = 494 match entry.title with 495 | Syndic.Atom.Text s -> Some s 496 | Syndic.Atom.Html (_, s) -> Some s 497 | Syndic.Atom.Xhtml (_, _) -> Some "Untitled" 498 in 499 500 (* Extract URL *) 501 let url = 502 match entry.links with 503 | link :: _ -> Some (Uri.to_string link.href) 504 | [] -> None 505 in 506 507 (* Extract content *) 508 let content = 509 match entry.content with 510 | Some (Syndic.Atom.Text s) -> `Text s 511 | Some (Syndic.Atom.Html (_, s)) -> `Html s 512 | Some (Syndic.Atom.Xhtml (_, nodes)) -> 513 let html = String.concat "" (List.map Syndic.XML.to_string nodes) in 514 `Html html 515 | Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> 516 `Text "" 517 in 518 519 (* Extract summary *) 520 let summary = 521 match entry.summary with 522 | Some (Syndic.Atom.Text s) when String.trim s <> "" -> Some s 523 | Some (Syndic.Atom.Html (_, s)) when String.trim s <> "" -> Some s 524 | _ -> None 525 in 526 527 (* Extract authors *) 528 let authors = 529 let (author, contributors) = entry.authors in 530 let author_list = author :: contributors in 531 let jsonfeed_authors = List.filter_map (fun (a : Syndic.Atom.author) -> 532 let name = String.trim a.name in 533 if name = "" then None 534 else Some (Jsonfeed.Author.create ~name ()) 535 ) author_list in 536 if jsonfeed_authors = [] then None else Some jsonfeed_authors 537 in 538 539 (* Extract tags *) 540 let tags = 541 match entry.categories with 542 | [] -> None 543 | cats -> 544 let tag_list = List.map (fun (c : Syndic.Atom.category) -> 545 match c.label with 546 | Some lbl -> lbl 547 | None -> c.term 548 ) cats in 549 if tag_list = [] then None else Some tag_list 550 in 551 552 (* Create JSONFeed item *) 553 Jsonfeed.Item.create 554 ~id 555 ~content 556 ?title 557 ?url 558 ?summary 559 ?authors 560 ?tags 561 ~date_published:entry.updated 562 () 563 564 (** Export entries as JSONFeed *) 565 let export_jsonfeed ~title entries = 566 let items = List.map atom_entry_to_jsonfeed_item entries in 567 let feed = Jsonfeed.create ~title ~items () in 568 match Jsonfeed.to_string ~minify:false feed with 569 | Ok json -> Ok json 570 | Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err)) 571end 572 573let create env ~app_name = 574 let xdg = Xdge.create env#fs app_name in 575 (* Sortal always uses "sortal" as the app name for shared contact database *) 576 let sortal = Sortal.create env#fs "sortal" in 577 let state = { xdg; sortal } in 578 Paths.ensure_directories state; 579 state 580 581let get_user state ~username = 582 Storage.get_user state username 583 584let get_all_users state = 585 Storage.get_all_users state 586 587let list_users state = 588 Storage.list_users state 589 590let update_sync_state state ~username ~timestamp = 591 Sync_state.set_timestamp state username timestamp; 592 Ok () 593 594let sync_user env state ~username = 595 Session.with_session env @@ fun session -> 596 Sync.sync_user session state ~username 597 598let sync_all env state = 599 let users = Storage.list_users state in 600 if users = [] then begin 601 Log.info (fun m -> m "No users to sync"); 602 Ok (0, 0) 603 end else begin 604 Log.info (fun m -> m "Syncing %d users concurrently..." (List.length users)); 605 606 Session.with_session env @@ fun session -> 607 let results = 608 Eio.Fiber.List.map (fun username -> 609 match Sync.sync_user session state ~username with 610 | Ok () -> true 611 | Error err -> 612 Log.err (fun m -> m "Failed to sync user %s: %s" username err); 613 false 614 ) users 615 in 616 let success_count = List.length (List.filter (fun x -> x) results) in 617 let fail_count = List.length users - success_count in 618 619 if fail_count = 0 then 620 Log.info (fun m -> m "All users synced successfully"); 621 622 Ok (success_count, fail_count) 623 end 624 625let get_user_posts state ~username ?limit () = 626 let entries = Storage.load_existing_posts state username in 627 match limit with 628 | None -> entries 629 | Some n -> List.filteri (fun i _ -> i < n) entries 630 631let get_all_posts state ?limit () = 632 let users = Storage.list_users state in 633 634 (* Collect all entries from all users with username tag *) 635 let all_entries = 636 List.concat_map (fun username -> 637 let entries = Storage.load_existing_posts state username in 638 List.map (fun entry -> (username, entry)) entries 639 ) users 640 in 641 642 (* Sort by date (newest first) *) 643 let sorted = List.sort (fun (_, a : string * Syndic.Atom.entry) (_, b) -> 644 Ptime.compare b.updated a.updated 645 ) all_entries in 646 647 match limit with 648 | None -> sorted 649 | Some n -> List.filteri (fun i _ -> i < n) sorted 650 651let export_merged_feed state ~title ~format ?limit () = 652 let all_posts = get_all_posts state ?limit () in 653 654 (* Rewrite author metadata from Sortal user info and replace tags with River categories *) 655 let rewrite_entry_author_and_categories username (entry : Syndic.Atom.entry) = 656 let entry = match Storage.get_user state username with 657 | None -> entry 658 | Some user -> 659 (* Get user's full name and email from Sortal *) 660 let fullname = User.fullname user in 661 let email = User.email user in 662 let username = User.username user in 663 664 (* Create new author with Sortal information *) 665 let new_author = 666 match email with 667 | Some email_addr -> 668 Syndic.Atom.author ~email:email_addr ~uri:(Uri.of_string ("https://" ^ username)) fullname 669 | None -> 670 Syndic.Atom.author ~uri:(Uri.of_string ("https://" ^ username)) fullname 671 in 672 673 (* Update entry with new author, keeping existing contributors *) 674 let _, other_authors = entry.authors in 675 { entry with authors = (new_author, other_authors) } 676 in 677 678 (* Replace original blog tags with River categories *) 679 let post_id = Uri.to_string entry.id in 680 let river_category_ids = get_post_categories state ~post_id in 681 (* Deduplicate category IDs and create Atom categories *) 682 let unique_category_ids = List.sort_uniq String.compare river_category_ids in 683 let river_categories = List.filter_map (fun cat_id -> 684 match get_category state ~id:cat_id with 685 | Some cat -> Some (Syndic.Atom.category ~label:(Category.name cat) cat_id) 686 | None -> None 687 ) unique_category_ids in 688 689 { entry with categories = river_categories } 690 in 691 692 let entries = List.map (fun (username, entry) -> 693 rewrite_entry_author_and_categories username entry 694 ) all_posts in 695 696 match format with 697 | `Atom -> 698 let xml = Format.Atom.to_string (Format.Atom.feed_of_entries ~title entries) in 699 Ok xml 700 | `Jsonfeed -> 701 if entries = [] then 702 (* Empty JSONFeed *) 703 let feed = Jsonfeed.create ~title ~items:[] () in 704 match Jsonfeed.to_string ~minify:false feed with 705 | Ok json -> Ok json 706 | Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err)) 707 else 708 Export.export_jsonfeed ~title entries 709 710let export_html_site state ~output_dir ~title ?(posts_per_page = 25) () = 711 try 712 Log.info (fun m -> m "=== Starting HTML site generation ==="); 713 Log.info (fun m -> m "Output directory: %s" (Eio.Path.native_exn output_dir)); 714 Log.info (fun m -> m "Site title: %s" title); 715 Log.info (fun m -> m "Posts per page: %d" posts_per_page); 716 717 (* Sanitize a string for use in filenames - replace unsafe characters *) 718 let sanitize_filename s = 719 let buf = Buffer.create (String.length s) in 720 String.iter (fun c -> 721 match c with 722 | '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' -> Buffer.add_char buf '-' 723 | ' ' -> Buffer.add_char buf '-' 724 | c -> Buffer.add_char buf c 725 ) s; 726 Buffer.contents buf 727 in 728 729 (* Create directory structure *) 730 Log.info (fun m -> m "Creating directory structure"); 731 let mkdir_if_not_exists dir = 732 try Eio.Path.mkdir ~perm:0o755 dir 733 with Eio.Io (Eio.Fs.E (Already_exists _), _) -> () 734 in 735 mkdir_if_not_exists output_dir; 736 mkdir_if_not_exists Eio.Path.(output_dir / "authors"); 737 mkdir_if_not_exists Eio.Path.(output_dir / "categories"); 738 mkdir_if_not_exists Eio.Path.(output_dir / "thumbnails"); 739 Log.info (fun m -> m "Directory structure created"); 740 741 (* Helper to get and copy author thumbnail *) 742 let get_author_thumbnail username = 743 Log.debug (fun m -> m "Looking up thumbnail for username: %s" username); 744 match Sortal.lookup state.sortal username with 745 | Some contact -> 746 Log.debug (fun m -> m " Found Sortal contact for %s: %s" username (Sortal.Contact.name contact)); 747 (match Sortal.thumbnail_path state.sortal contact with 748 | Some src_path -> 749 Log.info (fun m -> m " Copying thumbnail for %s from: %s" username (Eio.Path.native_exn src_path)); 750 (* Copy thumbnail to output directory *) 751 let filename = Filename.basename (Eio.Path.native_exn src_path) in 752 let dest_path = Eio.Path.(output_dir / "thumbnails" / filename) in 753 (try 754 Log.debug (fun m -> m " Source path: %s" (Eio.Path.native_exn src_path)); 755 Log.debug (fun m -> m " Destination path: %s" (Eio.Path.native_exn dest_path)); 756 let content = Eio.Path.load src_path in 757 Eio.Path.save ~create:(`Or_truncate 0o644) dest_path content; 758 Log.info (fun m -> m " Successfully copied thumbnail to: thumbnails/%s" filename); 759 Some ("thumbnails/" ^ filename) 760 with e -> 761 Log.warn (fun m -> m " Failed to copy thumbnail for %s: %s" username (Printexc.to_string e)); 762 None) 763 | None -> 764 Log.debug (fun m -> m " No thumbnail set for %s" username); 765 None) 766 | None -> 767 Log.warn (fun m -> m " No Sortal contact found for username: %s" username); 768 None 769 in 770 771 (* Helper to convert Atom entry to a simple record for HTML generation *) 772 let entry_to_html_data username (entry : Syndic.Atom.entry) = 773 let title = Text_extract.string_of_text_construct entry.title in 774 let link = List.find_opt (fun (l : Syndic.Atom.link) -> 775 l.rel = Syndic.Atom.Alternate 776 ) entry.links in 777 let link_uri = match link with 778 | Some l -> Some l.href 779 | None -> if List.length entry.links > 0 then Some (List.hd entry.links).href else None 780 in 781 let content_html = match entry.content with 782 | Some (Syndic.Atom.Text s) -> s 783 | Some (Syndic.Atom.Html (_, s)) -> s 784 | Some (Syndic.Atom.Xhtml (_, nodes)) -> 785 String.concat "" (List.map Syndic.XML.to_string nodes) 786 | Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> "" 787 in 788 (* Get author name from Sortal, fallback to entry author *) 789 let author_name = match Sortal.lookup state.sortal username with 790 | Some contact -> Sortal.Contact.name contact 791 | None -> 792 let author, _ = entry.authors in 793 author.name 794 in 795 (* Don't use original blog tags - River categories will be fetched separately *) 796 let post_id = Uri.to_string entry.id in 797 (username, title, author_name, entry.updated, link_uri, content_html, [], post_id) 798 in 799 800 (* Get all posts *) 801 Log.info (fun m -> m "Retrieving all posts from state"); 802 let all_posts = get_all_posts state () in 803 let html_data = List.map (fun (username, entry) -> 804 entry_to_html_data username entry 805 ) all_posts in 806 807 let unique_users = List.sort_uniq String.compare (List.map (fun (u, _, _, _, _, _, _, _) -> u) html_data) in 808 Log.info (fun m -> m "Retrieved %d posts from %d users" (List.length html_data) (List.length unique_users)); 809 Log.info (fun m -> m "Users: %s" (String.concat ", " unique_users)); 810 811 (* Generate main index pages with pagination *) 812 let total_posts = List.length html_data in 813 let total_pages = (total_posts + posts_per_page - 1) / posts_per_page in 814 Log.info (fun m -> m "Generating main index: %d posts across %d pages" total_posts total_pages); 815 816 for page = 1 to total_pages do 817 Log.info (fun m -> m " Generating index page %d/%d" page total_pages); 818 let start_idx = (page - 1) * posts_per_page in 819 let page_posts = List.filteri (fun i _ -> 820 i >= start_idx && i < start_idx + posts_per_page 821 ) html_data in 822 823 let post_htmls = List.map (fun (username, title, _feed_author, date, link, content, _tags, post_id) -> 824 Log.debug (fun m -> m " Processing post: %s by @%s" title username); 825 826 (* Get author name from Sortal, fallback to username *) 827 let author_name = match Sortal.lookup state.sortal username with 828 | Some contact -> Sortal.Contact.name contact 829 | None -> username 830 in 831 832 let post_html = 833 let date_str = Format.Html.format_date date in 834 let link_html = match link with 835 | Some uri -> 836 Printf.sprintf {|<a href="%s">%s</a>|} 837 (Format.Html.html_escape (Uri.to_string uri)) 838 (Format.Html.html_escape title) 839 | None -> Format.Html.html_escape title 840 in 841 let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in 842 let full_content = Format.Html.full_content_from_html content in 843 844 (* Get River categories for this post *) 845 let river_category_ids = get_post_categories state ~post_id in 846 let river_categories = List.filter_map (fun cat_id -> 847 match get_category state ~id:cat_id with 848 | Some cat -> Some (Category.id cat, Category.name cat) 849 | None -> None 850 ) river_category_ids in 851 852 (* Display only River categories *) 853 let tags_html = 854 match river_categories with 855 | [] -> "" 856 | _ -> 857 let category_links = List.map (fun (cat_id, cat_name) -> 858 Printf.sprintf {|<a href="categories/%s.html">%s</a>|} 859 (Format.Html.html_escape (sanitize_filename cat_id)) (Format.Html.html_escape cat_name) 860 ) river_categories in 861 Printf.sprintf {|<div class="post-tags">%s</div>|} 862 (String.concat "" category_links) 863 in 864 let tags_and_actions = 865 if tags_html = "" then 866 {|<a href="#" class="read-more">Read more</a>|} 867 else 868 Printf.sprintf {|<div class="post-tags-and-actions"><a href="#" class="read-more">Read more</a>%s</div>|} 869 tags_html 870 in 871 let thumbnail_html = match get_author_thumbnail username with 872 | Some thumb_path -> 873 Printf.sprintf {|<a href="authors/%s.html"><img src="%s" alt="%s" class="author-thumbnail"></a>|} 874 (Format.Html.html_escape (sanitize_filename username)) 875 (Format.Html.html_escape thumb_path) 876 (Format.Html.html_escape author_name) 877 | None -> 878 Printf.sprintf {|<a href="authors/%s.html"><div class="author-thumbnail" style="background: linear-gradient(135deg, #667eea 0%%, #764ba2 100%%); color: white; display: flex; align-items: center; justify-content: center; font-size: 20px; font-weight: 700;">%s</div></a>|} 879 (Format.Html.html_escape (sanitize_filename username)) 880 (String.uppercase_ascii (String.sub author_name 0 1)) 881 in 882 Printf.sprintf {|<article class="post"> 883 %s 884 <h2 class="post-title">%s</h2> 885 <div class="post-meta-line">By <a href="authors/%s.html">%s</a> · %s</div> 886 <div class="post-excerpt"> 887%s 888 </div> 889 <div class="post-full-content"> 890%s 891 </div> 892%s 893</article>|} 894 thumbnail_html 895 link_html 896 (Format.Html.html_escape (sanitize_filename username)) 897 (Format.Html.html_escape author_name) 898 date_str 899 excerpt 900 full_content 901 tags_and_actions 902 in 903 post_html 904 ) page_posts in 905 906 let page_html = Format.Html.render_posts_page 907 ~title 908 ~posts:post_htmls 909 ~current_page:page 910 ~total_pages 911 ~base_path:"" 912 ~nav_current:"posts" 913 in 914 915 let filename = if page = 1 then "index.html" 916 else Printf.sprintf "page-%d.html" page in 917 Eio.Path.save ~create:(`Or_truncate 0o644) 918 Eio.Path.(output_dir / filename) 919 page_html 920 done; 921 922 (* Generate author index *) 923 Log.info (fun m -> m "Generating author index and pages"); 924 let authors_map = Hashtbl.create 32 in 925 List.iter (fun (username, _, author, _, _, _, _, _) -> 926 let count = match Hashtbl.find_opt authors_map username with 927 | Some (_, c) -> c + 1 928 | None -> 1 929 in 930 Hashtbl.replace authors_map username (author, count) 931 ) html_data; 932 933 let authors_list = Hashtbl.fold (fun username (author, count) acc -> 934 (username, author, count) :: acc 935 ) authors_map [] |> List.sort (fun (_, a1, _) (_, a2, _) -> String.compare a1 a2) in 936 937 Log.info (fun m -> m "Found %d authors" (List.length authors_list)); 938 939 let authors_index_content = 940 (* SVG icon definitions *) 941 let icon_github = {|<svg viewBox="0 0 16 16" fill="currentColor"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/></svg>|} in 942 let icon_email = {|<svg viewBox="0 0 16 16" fill="currentColor"><path d="M0 4a2 2 0 012-2h12a2 2 0 012 2v8a2 2 0 01-2 2H2a2 2 0 01-2-2V4zm2-1a1 1 0 00-1 1v.217l7 4.2 7-4.2V4a1 1 0 00-1-1H2zm13 2.383l-4.758 2.855L15 11.114v-5.73zm-.034 6.878L9.271 8.82 8 9.583 6.728 8.82l-5.694 3.44A1 1 0 002 13h12a1 1 0 00.966-.739zM1 11.114l4.758-2.876L1 5.383v5.73z"/></svg>|} in 943 let icon_link = {|<svg viewBox="0 0 16 16" fill="currentColor"><path d="M4.715 6.542L3.343 7.914a3 3 0 104.243 4.243l1.828-1.829A3 3 0 008.586 5.5L8 6.086a1.001 1.001 0 00-.154.199 2 2 0 01.861 3.337L6.88 11.45a2 2 0 11-2.83-2.83l.793-.792a4.018 4.018 0 01-.128-1.287z"/><path d="M6.586 4.672A3 3 0 007.414 9.5l.775-.776a2 2 0 01-.896-3.346L9.12 3.55a2 2 0 112.83 2.83l-.793.792c.112.42.155.855.128 1.287l1.372-1.372a3 3 0 10-4.243-4.243L6.586 4.672z"/></svg>|} in 944 let icon_rss = {|<svg viewBox="0 0 16 16" fill="currentColor"><path d="M2 0a2 2 0 00-2 2v12a2 2 0 002 2h12a2 2 0 002-2V2a2 2 0 00-2-2H2zm1.5 2.5c5.523 0 10 4.477 10 10a1 1 0 11-2 0 8 8 0 00-8-8 1 1 0 010-2zm0 4a6 6 0 016 6 1 1 0 11-2 0 4 4 0 00-4-4 1 1 0 010-2zm.5 7a1.5 1.5 0 110-3 1.5 1.5 0 010 3z"/></svg>|} in 945 946 let items = List.map (fun (username, _author, count) -> 947 (* Get Sortal contact data *) 948 let contact_opt = Sortal.lookup state.sortal username in 949 950 (* Get the proper display name from Sortal, fallback to username *) 951 let display_name = match contact_opt with 952 | Some contact -> Sortal.Contact.name contact 953 | None -> username 954 in 955 956 let thumbnail_html = match contact_opt with 957 | Some _contact -> 958 (match get_author_thumbnail username with 959 | Some thumb_path -> 960 Printf.sprintf {|<img src="../%s" alt="%s" class="author-item-thumbnail">|} 961 (Format.Html.html_escape thumb_path) 962 (Format.Html.html_escape display_name) 963 | None -> 964 Printf.sprintf {|<div class="author-item-thumbnail" style="background: linear-gradient(135deg, #667eea 0%%, #764ba2 100%%); color: white; display: flex; align-items: center; justify-content: center; font-size: 16px; font-weight: 700;">%s</div>|} 965 (String.uppercase_ascii (String.sub display_name 0 1))) 966 | None -> 967 Printf.sprintf {|<div class="author-item-thumbnail" style="background: linear-gradient(135deg, #667eea 0%%, #764ba2 100%%); color: white; display: flex; align-items: center; justify-content: center; font-size: 16px; font-weight: 700;">%s</div>|} 968 (String.uppercase_ascii (String.sub display_name 0 1)) 969 in 970 971 let links_html = match contact_opt with 972 | Some contact -> 973 let links = [] in 974 let links = match Sortal.Contact.github contact with 975 | Some gh -> (Printf.sprintf {|<a href="https://github.com/%s" class="author-item-link" target="_blank" title="GitHub">%s</a>|} gh icon_github) :: links 976 | None -> links 977 in 978 let links = match Sortal.Contact.url contact with 979 | Some url -> (Printf.sprintf {|<a href="%s" class="author-item-link" target="_blank" title="Website">%s</a>|} url icon_link) :: links 980 | None -> links 981 in 982 let links = match Sortal.Contact.email contact with 983 | Some email -> (Printf.sprintf {|<a href="mailto:%s" class="author-item-link" title="Email">%s</a>|} email icon_email) :: links 984 | None -> links 985 in 986 if links = [] then "" else 987 Printf.sprintf {|<div class="author-item-links">%s</div>|} (String.concat "" (List.rev links)) 988 | None -> "" 989 in 990 991 let feed_count = match contact_opt with 992 | Some contact -> 993 (match Sortal.Contact.feeds contact with 994 | Some feeds -> List.length feeds 995 | None -> 0) 996 | None -> 0 997 in 998 999 Printf.sprintf {|<div class="author-item"> 1000 %s 1001 <div class="author-item-main"> 1002 <div class="author-item-name"><a href="%s.html">%s</a></div> 1003 <div class="author-item-meta"> 1004 <span class="author-item-username">@%s</span> 1005 <span class="author-item-stat">%d post%s</span> 1006 %s 1007 %s 1008 </div> 1009 </div> 1010</div>|} 1011 thumbnail_html 1012 (Format.Html.html_escape (sanitize_filename username)) 1013 (Format.Html.html_escape display_name) 1014 (Format.Html.html_escape username) 1015 count 1016 (if count = 1 then "" else "s") 1017 (if feed_count > 0 then Printf.sprintf {|<span class="author-item-stat">%s %d feed%s</span>|} icon_rss feed_count (if feed_count = 1 then "" else "s") else "") 1018 links_html 1019 ) authors_list in 1020 Printf.sprintf "<div class=\"author-list\">\n%s\n</div>" 1021 (String.concat "\n" items) 1022 in 1023 1024 let authors_index_html = Format.Html.page_template 1025 ~title:(title ^ " - Authors") 1026 ~nav_current:"authors" 1027 authors_index_content 1028 in 1029 Eio.Path.save ~create:(`Or_truncate 0o644) 1030 Eio.Path.(output_dir / "authors" / "index.html") 1031 authors_index_html; 1032 1033 (* Generate individual author pages *) 1034 Hashtbl.iter (fun username (author, _) -> 1035 let author_posts = List.filter (fun (u, _, _, _, _, _, _, _) -> u = username) html_data in 1036 let author_total = List.length author_posts in 1037 let author_pages = (author_total + posts_per_page - 1) / posts_per_page in 1038 Log.info (fun m -> m " Author: %s (@%s) - %d posts, %d pages" author username author_total author_pages); 1039 1040 (* Generate author header with Sortal data *) 1041 let author_header = 1042 let contact_opt = Sortal.lookup state.sortal username in 1043 1044 (* Get proper display name from Sortal *) 1045 let display_name = match contact_opt with 1046 | Some contact -> Sortal.Contact.name contact 1047 | None -> author 1048 in 1049 1050 (* SVG icons for author header *) 1051 let icon_github = {|<svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/></svg>|} in 1052 let icon_email = {|<svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor"><path d="M0 4a2 2 0 012-2h12a2 2 0 012 2v8a2 2 0 01-2 2H2a2 2 0 01-2-2V4zm2-1a1 1 0 00-1 1v.217l7 4.2 7-4.2V4a1 1 0 00-1-1H2zm13 2.383l-4.758 2.855L15 11.114v-5.73zm-.034 6.878L9.271 8.82 8 9.583 6.728 8.82l-5.694 3.44A1 1 0 002 13h12a1 1 0 00.966-.739zM1 11.114l4.758-2.876L1 5.383v5.73z"/></svg>|} in 1053 let icon_link = {|<svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor"><path d="M4.715 6.542L3.343 7.914a3 3 0 104.243 4.243l1.828-1.829A3 3 0 008.586 5.5L8 6.086a1.001 1.001 0 00-.154.199 2 2 0 01.861 3.337L6.88 11.45a2 2 0 11-2.83-2.83l.793-.792a4.018 4.018 0 01-.128-1.287z"/><path d="M6.586 4.672A3 3 0 007.414 9.5l.775-.776a2 2 0 01-.896-3.346L9.12 3.55a2 2 0 112.83 2.83l-.793.792c.112.42.155.855.128 1.287l1.372-1.372a3 3 0 10-4.243-4.243L6.586 4.672z"/></svg>|} in 1054 1055 match contact_opt with 1056 | Some contact -> 1057 let thumbnail_html = match get_author_thumbnail username with 1058 | Some thumb_path -> 1059 Printf.sprintf {|<img src="../%s" alt="%s" class="author-header-thumbnail">|} 1060 (Format.Html.html_escape thumb_path) 1061 (Format.Html.html_escape display_name) 1062 | None -> 1063 Printf.sprintf {|<div class="author-header-thumbnail" style="background: linear-gradient(135deg, #667eea 0%%, #764ba2 100%%); color: white; display: flex; align-items: center; justify-content: center; font-size: 36px; font-weight: 700;">%s</div>|} 1064 (String.uppercase_ascii (String.sub display_name 0 1)) 1065 in 1066 1067 let links = [] in 1068 let links = match Sortal.Contact.github contact with 1069 | Some gh -> (Printf.sprintf {|<a href="https://github.com/%s" class="author-header-link" target="_blank">%s GitHub</a>|} gh icon_github) :: links 1070 | None -> links 1071 in 1072 let links = match Sortal.Contact.twitter contact with 1073 | Some tw -> (Printf.sprintf {|<a href="https://twitter.com/%s" class="author-header-link" target="_blank">%s Twitter</a>|} tw icon_link) :: links 1074 | None -> links 1075 in 1076 let links = match Sortal.Contact.mastodon contact with 1077 | Some m -> (Printf.sprintf {|<a href="%s" class="author-header-link" target="_blank">%s Mastodon</a>|} m icon_link) :: links 1078 | None -> links 1079 in 1080 let links = match Sortal.Contact.url contact with 1081 | Some url -> (Printf.sprintf {|<a href="%s" class="author-header-link" target="_blank">%s Website</a>|} url icon_link) :: links 1082 | None -> links 1083 in 1084 let links = match Sortal.Contact.email contact with 1085 | Some email -> (Printf.sprintf {|<a href="mailto:%s" class="author-header-link">%s Email</a>|} email icon_email) :: links 1086 | None -> links 1087 in 1088 1089 let links_html = if links = [] then "" else 1090 Printf.sprintf {|<div class="author-header-links">%s</div>|} (String.concat "" (List.rev links)) 1091 in 1092 1093 let feed_count = match Sortal.Contact.feeds contact with 1094 | Some feeds -> List.length feeds 1095 | None -> 0 1096 in 1097 1098 Printf.sprintf {|<div class="author-header"> 1099 <div class="author-header-main"> 1100 %s 1101 <div class="author-header-info"> 1102 <div class="author-header-name">%s</div> 1103 <div class="author-header-username">@%s</div> 1104 %s 1105 </div> 1106 </div> 1107 <div class="author-header-stats"> 1108 <div class="author-header-stat"> 1109 <div class="author-header-stat-value">%d</div> 1110 <div class="author-header-stat-label">Posts</div> 1111 </div> 1112 <div class="author-header-stat"> 1113 <div class="author-header-stat-value">%d</div> 1114 <div class="author-header-stat-label">Feeds</div> 1115 </div> 1116 </div> 1117</div>|} 1118 thumbnail_html 1119 (Format.Html.html_escape display_name) 1120 (Format.Html.html_escape username) 1121 links_html 1122 author_total 1123 feed_count 1124 | None -> 1125 Printf.sprintf {|<div class="author-header"> 1126 <div class="author-header-main"> 1127 <div class="author-header-info"> 1128 <div class="author-header-name">%s</div> 1129 <div class="author-header-username">@%s</div> 1130 </div> 1131 </div> 1132 <div class="author-header-stats"> 1133 <div class="author-header-stat"> 1134 <div class="author-header-stat-value">%d</div> 1135 <div class="author-header-stat-label">Posts</div> 1136 </div> 1137 </div> 1138</div>|} 1139 (Format.Html.html_escape display_name) 1140 (Format.Html.html_escape username) 1141 author_total 1142 in 1143 1144 for page = 1 to author_pages do 1145 let start_idx = (page - 1) * posts_per_page in 1146 let page_posts = List.filteri (fun i _ -> 1147 i >= start_idx && i < start_idx + posts_per_page 1148 ) author_posts in 1149 1150 let post_htmls = List.map (fun (_username, title, author, date, link, content, _tags, post_id) -> 1151 let date_str = Format.Html.format_date date in 1152 let link_html = match link with 1153 | Some uri -> 1154 Printf.sprintf {|<a href="%s">%s</a>|} 1155 (Format.Html.html_escape (Uri.to_string uri)) 1156 (Format.Html.html_escape title) 1157 | None -> Format.Html.html_escape title 1158 in 1159 let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in 1160 let full_content = Format.Html.full_content_from_html content in 1161 1162 (* Get River categories for this post *) 1163 let river_category_ids = get_post_categories state ~post_id in 1164 let river_categories = List.filter_map (fun cat_id -> 1165 match get_category state ~id:cat_id with 1166 | Some cat -> Some (Category.id cat, Category.name cat) 1167 | None -> None 1168 ) river_category_ids in 1169 1170 (* Display only River categories *) 1171 let tags_html = 1172 match river_categories with 1173 | [] -> "" 1174 | _ -> 1175 let category_links = List.map (fun (cat_id, cat_name) -> 1176 Printf.sprintf {|<a href="../categories/%s.html">%s</a>|} 1177 (Format.Html.html_escape (sanitize_filename cat_id)) (Format.Html.html_escape cat_name) 1178 ) river_categories in 1179 Printf.sprintf {|<div class="post-tags">%s</div>|} 1180 (String.concat "" category_links) 1181 in 1182 let tags_and_actions = 1183 if tags_html = "" then 1184 {|<a href="#" class="read-more">Read more</a>|} 1185 else 1186 Printf.sprintf {|<div class="post-tags-and-actions"><a href="#" class="read-more">Read more</a>%s</div>|} 1187 tags_html 1188 in 1189 Printf.sprintf {|<article class="post"> 1190 <h2 class="post-title">%s</h2> 1191 <div class="post-meta"> 1192 By %s on %s 1193 </div> 1194 <div class="post-excerpt"> 1195%s 1196 </div> 1197 <div class="post-full-content"> 1198%s 1199 </div> 1200%s 1201</article>|} 1202 link_html 1203 (Format.Html.html_escape author) 1204 date_str 1205 excerpt 1206 full_content 1207 tags_and_actions 1208 ) page_posts in 1209 1210 let posts_with_header = author_header ^ "\n" ^ String.concat "\n" post_htmls in 1211 let page_html = Format.Html.render_posts_page 1212 ~title:(author ^ " - " ^ title) 1213 ~posts:[posts_with_header] 1214 ~current_page:page 1215 ~total_pages:author_pages 1216 ~base_path:(sanitize_filename username ^ "-") 1217 ~nav_current:"authors" 1218 in 1219 1220 let safe_username = sanitize_filename username in 1221 let filename = if page = 1 then safe_username ^ ".html" 1222 else Printf.sprintf "%s-%d.html" safe_username page in 1223 Eio.Path.save ~create:(`Or_truncate 0o644) 1224 Eio.Path.(output_dir / "authors" / filename) 1225 page_html 1226 done 1227 ) authors_map; 1228 1229 (* Generate category index and pages *) 1230 Log.info (fun m -> m "Generating category index and pages"); 1231 let categories_map = Hashtbl.create 32 in 1232 List.iter (fun (_, _, _, _, _, _, tags, post_id) -> 1233 (* Count feed tags *) 1234 List.iter (fun tag -> 1235 let count = match Hashtbl.find_opt categories_map tag with 1236 | Some c -> c + 1 1237 | None -> 1 1238 in 1239 Hashtbl.replace categories_map tag count 1240 ) tags; 1241 (* Count custom categories *) 1242 let custom_cat_ids = get_post_categories state ~post_id in 1243 List.iter (fun cat_id -> 1244 let count = match Hashtbl.find_opt categories_map cat_id with 1245 | Some c -> c + 1 1246 | None -> 1 1247 in 1248 Hashtbl.replace categories_map cat_id count 1249 ) custom_cat_ids 1250 ) html_data; 1251 1252 let categories_list = Hashtbl.fold (fun tag count acc -> 1253 (tag, count) :: acc 1254 ) categories_map [] |> List.sort (fun (t1, _) (t2, _) -> String.compare t1 t2) in 1255 1256 Log.info (fun m -> m "Found %d categories" (List.length categories_list)); 1257 1258 let categories_index_content = 1259 let items = List.map (fun (tag, count) -> 1260 Printf.sprintf {|<li><a href="%s.html">%s</a><span class="count">%d post%s</span></li>|} 1261 (Format.Html.html_escape (sanitize_filename tag)) 1262 (Format.Html.html_escape tag) 1263 count 1264 (if count = 1 then "" else "s") 1265 ) categories_list in 1266 Printf.sprintf "<ul class=\"category-list\">\n%s\n</ul>" 1267 (String.concat "\n" items) 1268 in 1269 1270 let categories_index_html = Format.Html.page_template 1271 ~title:(title ^ " - Categories") 1272 ~nav_current:"categories" 1273 categories_index_content 1274 in 1275 Eio.Path.save ~create:(`Or_truncate 0o644) 1276 Eio.Path.(output_dir / "categories" / "index.html") 1277 categories_index_html; 1278 1279 (* Generate individual category pages *) 1280 List.iter (fun (tag, count) -> 1281 let tag_posts = List.filter (fun (_, _, _, _, _, _, tags, post_id) -> 1282 (* Check if tag is in feed tags or custom categories *) 1283 let in_feed_tags = List.mem tag tags in 1284 let custom_cat_ids = get_post_categories state ~post_id in 1285 let in_custom_cats = List.exists (fun cat_id -> 1286 match get_category state ~id:cat_id with 1287 | Some cat -> Category.id cat = tag 1288 | None -> false 1289 ) custom_cat_ids in 1290 in_feed_tags || in_custom_cats 1291 ) html_data in 1292 1293 let tag_total = List.length tag_posts in 1294 let tag_pages = (tag_total + posts_per_page - 1) / posts_per_page in 1295 Log.info (fun m -> m " Category: %s - %d posts, %d pages" tag count tag_pages); 1296 1297 for page = 1 to tag_pages do 1298 let start_idx = (page - 1) * posts_per_page in 1299 let page_posts = List.filteri (fun i _ -> 1300 i >= start_idx && i < start_idx + posts_per_page 1301 ) tag_posts in 1302 1303 let post_htmls = List.map (fun (username, title, author, date, link, content, _tags, post_id) -> 1304 let date_str = Format.Html.format_date date in 1305 let link_html = match link with 1306 | Some uri -> 1307 Printf.sprintf {|<a href="%s">%s</a>|} 1308 (Format.Html.html_escape (Uri.to_string uri)) 1309 (Format.Html.html_escape title) 1310 | None -> Format.Html.html_escape title 1311 in 1312 let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in 1313 let full_content = Format.Html.full_content_from_html content in 1314 1315 (* Get River categories for this post *) 1316 let river_category_ids = get_post_categories state ~post_id in 1317 let river_categories = List.filter_map (fun cat_id -> 1318 match get_category state ~id:cat_id with 1319 | Some cat -> Some (Category.id cat, Category.name cat) 1320 | None -> None 1321 ) river_category_ids in 1322 1323 (* Display only River categories *) 1324 let tags_html = 1325 match river_categories with 1326 | [] -> "" 1327 | _ -> 1328 let category_links = List.map (fun (cat_id, cat_name) -> 1329 Printf.sprintf {|<a href="%s.html">%s</a>|} 1330 (Format.Html.html_escape (sanitize_filename cat_id)) (Format.Html.html_escape cat_name) 1331 ) river_categories in 1332 Printf.sprintf {|<div class="post-tags">%s</div>|} 1333 (String.concat "" category_links) 1334 in 1335 let tags_and_actions = 1336 if tags_html = "" then 1337 {|<a href="#" class="read-more">Read more</a>|} 1338 else 1339 Printf.sprintf {|<div class="post-tags-and-actions"><a href="#" class="read-more">Read more</a>%s</div>|} 1340 tags_html 1341 in 1342 (* Get thumbnail *) 1343 let thumbnail_html = match get_author_thumbnail username with 1344 | Some thumb_path -> 1345 Printf.sprintf {|<a href="../authors/%s.html"><img src="../%s" alt="%s" class="author-thumbnail"></a>|} 1346 (Format.Html.html_escape (sanitize_filename username)) 1347 (Format.Html.html_escape thumb_path) 1348 (Format.Html.html_escape author) 1349 | None -> 1350 Printf.sprintf {|<a href="../authors/%s.html"><div class="author-thumbnail" style="background: linear-gradient(135deg, #667eea 0%%, #764ba2 100%%); color: white; display: flex; align-items: center; justify-content: center; font-size: 20px; font-weight: 700;">%s</div></a>|} 1351 (Format.Html.html_escape (sanitize_filename username)) 1352 (String.uppercase_ascii (String.sub author 0 (min 1 (String.length author)))) 1353 in 1354 Printf.sprintf {|<article class="post"> 1355 %s 1356 <h2 class="post-title">%s</h2> 1357 <div class="post-meta-line">By <a href="../authors/%s.html">%s</a> · %s</div> 1358 <div class="post-excerpt"> 1359%s 1360 </div> 1361 <div class="post-full-content"> 1362%s 1363 </div> 1364%s 1365</article>|} 1366 thumbnail_html 1367 link_html 1368 (Format.Html.html_escape (sanitize_filename username)) 1369 (Format.Html.html_escape author) 1370 date_str 1371 excerpt 1372 full_content 1373 tags_and_actions 1374 ) page_posts in 1375 1376 let page_html = Format.Html.render_posts_page 1377 ~title:(tag ^ " - " ^ title) 1378 ~posts:post_htmls 1379 ~current_page:page 1380 ~total_pages:tag_pages 1381 ~base_path:(sanitize_filename tag ^ "-") 1382 ~nav_current:"categories" 1383 in 1384 1385 let safe_tag = sanitize_filename tag in 1386 let filename = if page = 1 then safe_tag ^ ".html" 1387 else Printf.sprintf "%s-%d.html" safe_tag page in 1388 Eio.Path.save ~create:(`Or_truncate 0o644) 1389 Eio.Path.(output_dir / "categories" / filename) 1390 page_html 1391 done 1392 ) categories_list; 1393 1394 (* Generate links page *) 1395 Log.info (fun m -> m "Generating links page"); 1396 let all_links = List.concat_map (fun (username, title, author, date, post_link, content, _, _) -> 1397 let links = Html_markdown.extract_links content in 1398 List.map (fun (href, link_text) -> 1399 (href, link_text, username, author, title, post_link, date) 1400 ) links 1401 ) html_data in 1402 1403 Log.info (fun m -> m " Extracted %d total links from all posts" (List.length all_links)); 1404 1405 (* Group by URL and track most recent post date *) 1406 let links_map = Hashtbl.create 256 in 1407 List.iter (fun (href, link_text, username, author, post_title, post_link, date) -> 1408 let existing = Hashtbl.find_opt links_map href in 1409 let new_entry = (link_text, username, author, post_title, post_link, date) in 1410 match existing with 1411 | None -> Hashtbl.add links_map href [new_entry] 1412 | Some entries -> 1413 (* Add to list, will sort by date later *) 1414 Hashtbl.replace links_map href (new_entry :: entries) 1415 ) all_links; 1416 1417 (* Sort links by most recent post date *) 1418 let sorted_links = Hashtbl.fold (fun href entries acc -> 1419 (* Get the most recent entry for this URL *) 1420 let sorted_entries = List.sort (fun (_, _, _, _, _, d1) (_, _, _, _, _, d2) -> 1421 Ptime.compare d2 d1 1422 ) entries in 1423 let most_recent = List.hd sorted_entries in 1424 (href, most_recent, entries) :: acc 1425 ) links_map [] |> List.sort (fun (_, (_, _, _, _, _, d1), _) (_, (_, _, _, _, _, d2), _) -> 1426 Ptime.compare d2 d1 1427 ) in 1428 1429 Log.info (fun m -> m " Deduplicated to %d unique links" (List.length sorted_links)); 1430 1431 let links_content = 1432 let items = List.map (fun (href, (_link_text, _username, _author, _post_title, _post_link, _date), all_entries) -> 1433 (* Parse URL to extract domain and path *) 1434 let uri = Uri.of_string href in 1435 let domain = match Uri.host uri with 1436 | Some h -> h 1437 | None -> "unknown" 1438 in 1439 let path = Uri.path uri in 1440 let fragment = Uri.fragment uri in 1441 1442 (* Shorten path if too long *) 1443 let shortened_path = 1444 let full_path = path ^ (match fragment with Some f -> "#" ^ f | None -> "") in 1445 if String.length full_path > 40 then 1446 let start = String.sub full_path 0 20 in 1447 let ending = String.sub full_path (String.length full_path - 17) 17 in 1448 start ^ "..." ^ ending 1449 else 1450 full_path 1451 in 1452 1453 let display_text = 1454 if shortened_path = "" || shortened_path = "/" then 1455 Printf.sprintf {|<span class="link-domain">%s</span>|} 1456 (Format.Html.html_escape domain) 1457 else 1458 Printf.sprintf {|<span class="link-domain">%s</span><span class="link-path">%s</span>|} 1459 (Format.Html.html_escape domain) 1460 (Format.Html.html_escape shortened_path) 1461 in 1462 1463 (* Group all backlinks *) 1464 let backlinks_html = List.map (fun (_, _username, author, post_title, post_link, date) -> 1465 let date_str = Format.Html.format_date date in 1466 let post_link_html = match post_link with 1467 | Some uri -> 1468 Printf.sprintf {|<a href="%s" title="%s by %s on %s">%s</a>|} 1469 (Format.Html.html_escape (Uri.to_string uri)) 1470 (Format.Html.html_escape post_title) 1471 (Format.Html.html_escape author) 1472 date_str 1473 (Format.Html.html_escape post_title) 1474 | None -> Format.Html.html_escape post_title 1475 in 1476 Printf.sprintf {|<span class="link-backlink"><span class="link-backlink-icon">↩</span>%s</span>|} 1477 post_link_html 1478 ) all_entries |> String.concat "" in 1479 1480 Printf.sprintf {|<div class="link-item"> 1481 <div class="link-url"><a href="%s">%s</a></div> 1482 <div class="link-backlinks">%s</div> 1483</div>|} 1484 (Format.Html.html_escape href) 1485 display_text 1486 backlinks_html 1487 ) sorted_links in 1488 String.concat "\n" items 1489 in 1490 1491 let links_html = Format.Html.page_template 1492 ~title:(title ^ " - Links") 1493 ~nav_current:"links" 1494 links_content 1495 in 1496 Eio.Path.save ~create:(`Or_truncate 0o644) 1497 Eio.Path.(output_dir / "links.html") 1498 links_html; 1499 1500 Log.info (fun m -> m "HTML site generated successfully in %s" 1501 (Eio.Path.native_exn output_dir)); 1502 Ok () 1503 with e -> 1504 Error (Printf.sprintf "Failed to generate HTML site: %s" (Printexc.to_string e)) 1505 1506let analyze_user_quality state ~username = 1507 match Storage.get_user state username with 1508 | None -> 1509 Error (Printf.sprintf "User %s not found" username) 1510 | Some _ -> 1511 let entries = Storage.load_existing_posts state username in 1512 if entries = [] then 1513 Error "No entries to analyze" 1514 else 1515 Ok (Quality.analyze entries)