···
Eio.Fiber.List.filter_map (fun source ->
210
-
Log.info (fun m -> m " Fetching %s (%s)..."
211
-
(Source.name source) (Source.url source));
210
+
Log.info (fun m -> m " [%s] Fetching %s (%s)..."
211
+
username (Source.name source) (Source.url source));
Some (Feed.fetch session source)
214
-
Log.err (fun m -> m " Failed to fetch %s: %s"
215
-
(Source.name source) (Printexc.to_string e));
214
+
Log.err (fun m -> m " [%s] Failed to fetch %s: %s"
215
+
username (Source.name source) (Printexc.to_string e));
···
let export_merged_feed state ~title ~format ?limit () =
let all_posts = get_all_posts state ?limit () in
417
-
let entries = List.map snd all_posts in
418
+
(* Rewrite author metadata from Sortal user info *)
419
+
let rewrite_entry_author username (entry : Syndic.Atom.entry) =
420
+
match Storage.get_user state username with
423
+
(* Get user's full name and email from Sortal *)
424
+
let fullname = User.fullname user in
425
+
let email = User.email user in
426
+
let username = User.username user in
428
+
(* Create new author with Sortal information *)
431
+
| Some email_addr ->
432
+
Syndic.Atom.author ~email:email_addr ~uri:(Uri.of_string ("https://" ^ username)) fullname
434
+
Syndic.Atom.author ~uri:(Uri.of_string ("https://" ^ username)) fullname
437
+
(* Update entry with new author, keeping existing contributors *)
438
+
let _, other_authors = entry.authors in
439
+
{ entry with authors = (new_author, other_authors) }
442
+
let entries = List.map (fun (username, entry) ->
443
+
rewrite_entry_author username entry
···
| Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err))
Export.export_jsonfeed ~title entries
460
+
let export_html_site state ~output_dir ~title ?(posts_per_page = 25) () =
462
+
Log.info (fun m -> m "=== Starting HTML site generation ===");
463
+
Log.info (fun m -> m "Output directory: %s" (Eio.Path.native_exn output_dir));
464
+
Log.info (fun m -> m "Site title: %s" title);
465
+
Log.info (fun m -> m "Posts per page: %d" posts_per_page);
467
+
(* Sanitize a string for use in filenames - replace unsafe characters *)
468
+
let sanitize_filename s =
469
+
let buf = Buffer.create (String.length s) in
470
+
String.iter (fun c ->
472
+
| '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' -> Buffer.add_char buf '-'
473
+
| ' ' -> Buffer.add_char buf '-'
474
+
| c -> Buffer.add_char buf c
476
+
Buffer.contents buf
479
+
(* Create directory structure *)
480
+
Log.info (fun m -> m "Creating directory structure");
481
+
let mkdir_if_not_exists dir =
482
+
try Eio.Path.mkdir ~perm:0o755 dir
483
+
with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()
485
+
mkdir_if_not_exists output_dir;
486
+
mkdir_if_not_exists Eio.Path.(output_dir / "authors");
487
+
mkdir_if_not_exists Eio.Path.(output_dir / "categories");
488
+
mkdir_if_not_exists Eio.Path.(output_dir / "thumbnails");
489
+
Log.info (fun m -> m "Directory structure created");
491
+
(* Helper to get and copy author thumbnail *)
492
+
let get_author_thumbnail username =
493
+
Log.debug (fun m -> m "Looking up thumbnail for username: %s" username);
494
+
match Sortal.lookup state.sortal username with
496
+
Log.debug (fun m -> m " Found Sortal contact for %s: %s" username (Sortal.Contact.name contact));
497
+
(match Sortal.thumbnail_path state.sortal contact with
499
+
Log.info (fun m -> m " Copying thumbnail for %s from: %s" username (Eio.Path.native_exn src_path));
500
+
(* Copy thumbnail to output directory *)
501
+
let filename = Filename.basename (Eio.Path.native_exn src_path) in
502
+
let dest_path = Eio.Path.(output_dir / "thumbnails" / filename) in
504
+
Log.debug (fun m -> m " Source path: %s" (Eio.Path.native_exn src_path));
505
+
Log.debug (fun m -> m " Destination path: %s" (Eio.Path.native_exn dest_path));
506
+
let content = Eio.Path.load src_path in
507
+
Eio.Path.save ~create:(`Or_truncate 0o644) dest_path content;
508
+
Log.info (fun m -> m " Successfully copied thumbnail to: thumbnails/%s" filename);
509
+
Some ("thumbnails/" ^ filename)
511
+
Log.warn (fun m -> m " Failed to copy thumbnail for %s: %s" username (Printexc.to_string e));
514
+
Log.debug (fun m -> m " No thumbnail set for %s" username);
517
+
Log.warn (fun m -> m " No Sortal contact found for username: %s" username);
521
+
(* Helper to convert Atom entry to a simple record for HTML generation *)
522
+
let entry_to_html_data username (entry : Syndic.Atom.entry) =
523
+
let title = Text_extract.string_of_text_construct entry.title in
524
+
let link = List.find_opt (fun (l : Syndic.Atom.link) ->
525
+
l.rel = Syndic.Atom.Alternate
527
+
let link_uri = match link with
528
+
| Some l -> Some l.href
529
+
| None -> if List.length entry.links > 0 then Some (List.hd entry.links).href else None
531
+
let content_html = match entry.content with
532
+
| Some (Syndic.Atom.Text s) -> s
533
+
| Some (Syndic.Atom.Html (_, s)) -> s
534
+
| Some (Syndic.Atom.Xhtml (_, nodes)) ->
535
+
String.concat "" (List.map Syndic.XML.to_string nodes)
536
+
| Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> ""
538
+
let author, _ = entry.authors in
539
+
let tags = List.map (fun (c : Syndic.Atom.category) -> c.term) entry.categories in
540
+
(username, title, author.name, entry.updated, link_uri, content_html, tags)
543
+
(* Get all posts *)
544
+
Log.info (fun m -> m "Retrieving all posts from state");
545
+
let all_posts = get_all_posts state () in
546
+
let html_data = List.map (fun (username, entry) ->
547
+
entry_to_html_data username entry
550
+
let unique_users = List.sort_uniq String.compare (List.map (fun (u, _, _, _, _, _, _) -> u) html_data) in
551
+
Log.info (fun m -> m "Retrieved %d posts from %d users" (List.length html_data) (List.length unique_users));
552
+
Log.info (fun m -> m "Users: %s" (String.concat ", " unique_users));
554
+
(* Generate main index pages with pagination *)
555
+
let total_posts = List.length html_data in
556
+
let total_pages = (total_posts + posts_per_page - 1) / posts_per_page in
557
+
Log.info (fun m -> m "Generating main index: %d posts across %d pages" total_posts total_pages);
559
+
for page = 1 to total_pages do
560
+
Log.info (fun m -> m " Generating index page %d/%d" page total_pages);
561
+
let start_idx = (page - 1) * posts_per_page in
562
+
let page_posts = List.filteri (fun i _ ->
563
+
i >= start_idx && i < start_idx + posts_per_page
566
+
let post_htmls = List.map (fun (username, title, author, date, link, content, tags) ->
567
+
Log.debug (fun m -> m " Processing post: %s by %s (@%s)" title author username);
568
+
(* Create a temporary Post-like structure for rendering *)
569
+
(* We'll need to adapt this since we're working with Atom entries *)
571
+
let date_str = Format.Html.format_date date in
572
+
let link_html = match link with
574
+
Printf.sprintf {|<a href="%s">%s</a>|}
575
+
(Format.Html.html_escape (Uri.to_string uri))
576
+
(Format.Html.html_escape title)
577
+
| None -> Format.Html.html_escape title
579
+
let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
580
+
let full_content = Format.Html.full_content_from_html content in
585
+
let tag_links = List.map (fun tag ->
586
+
Printf.sprintf {|<a href="categories/%s.html">%s</a>|}
587
+
(Format.Html.html_escape (sanitize_filename tag)) (Format.Html.html_escape tag)
589
+
Printf.sprintf {|<div class="post-tags">%s</div>|}
590
+
(String.concat "" tag_links)
592
+
let thumbnail_html = match get_author_thumbnail username with
593
+
| Some thumb_path ->
594
+
Printf.sprintf {|<img src="%s" alt="%s" class="author-thumbnail">|}
595
+
(Format.Html.html_escape thumb_path)
596
+
(Format.Html.html_escape author)
599
+
Printf.sprintf {|<article class="post">
600
+
<h2 class="post-title">%s</h2>
601
+
<div class="post-meta">
602
+
%s<div class="post-meta-text">By <a href="authors/%s.html">%s</a> on %s</div>
604
+
<div class="post-excerpt">
607
+
<div class="post-full-content">
610
+
<a href="#" class="read-more">Read more</a>
615
+
(Format.Html.html_escape (sanitize_filename username))
616
+
(Format.Html.html_escape author)
625
+
let page_html = Format.Html.render_posts_page
631
+
~nav_current:"posts"
634
+
let filename = if page = 1 then "index.html"
635
+
else Printf.sprintf "page-%d.html" page in
636
+
Eio.Path.save ~create:(`Or_truncate 0o644)
637
+
Eio.Path.(output_dir / filename)
641
+
(* Generate author index *)
642
+
Log.info (fun m -> m "Generating author index and pages");
643
+
let authors_map = Hashtbl.create 32 in
644
+
List.iter (fun (username, _, author, _, _, _, _) ->
645
+
let count = match Hashtbl.find_opt authors_map username with
646
+
| Some (_, c) -> c + 1
649
+
Hashtbl.replace authors_map username (author, count)
652
+
let authors_list = Hashtbl.fold (fun username (author, count) acc ->
653
+
(username, author, count) :: acc
654
+
) authors_map [] |> List.sort (fun (_, a1, _) (_, a2, _) -> String.compare a1 a2) in
656
+
Log.info (fun m -> m "Found %d authors" (List.length authors_list));
658
+
let authors_index_content =
659
+
let items = List.map (fun (username, author, count) ->
660
+
Printf.sprintf {|<li><a href="%s.html">%s</a><span class="count">%d post%s</span></li>|}
661
+
(Format.Html.html_escape (sanitize_filename username))
662
+
(Format.Html.html_escape author)
664
+
(if count = 1 then "" else "s")
666
+
Printf.sprintf "<ul class=\"author-list\">\n%s\n</ul>"
667
+
(String.concat "\n" items)
670
+
let authors_index_html = Format.Html.page_template
671
+
~title:(title ^ " - Authors")
672
+
~nav_current:"authors"
673
+
authors_index_content
675
+
Eio.Path.save ~create:(`Or_truncate 0o644)
676
+
Eio.Path.(output_dir / "authors" / "index.html")
677
+
authors_index_html;
679
+
(* Generate individual author pages *)
680
+
Hashtbl.iter (fun username (author, _) ->
681
+
let author_posts = List.filter (fun (u, _, _, _, _, _, _) -> u = username) html_data in
682
+
let author_total = List.length author_posts in
683
+
let author_pages = (author_total + posts_per_page - 1) / posts_per_page in
684
+
Log.info (fun m -> m " Author: %s (@%s) - %d posts, %d pages" author username author_total author_pages);
686
+
for page = 1 to author_pages do
687
+
let start_idx = (page - 1) * posts_per_page in
688
+
let page_posts = List.filteri (fun i _ ->
689
+
i >= start_idx && i < start_idx + posts_per_page
692
+
let post_htmls = List.map (fun (_username, title, author, date, link, content, tags) ->
693
+
let date_str = Format.Html.format_date date in
694
+
let link_html = match link with
696
+
Printf.sprintf {|<a href="%s">%s</a>|}
697
+
(Format.Html.html_escape (Uri.to_string uri))
698
+
(Format.Html.html_escape title)
699
+
| None -> Format.Html.html_escape title
701
+
let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
702
+
let full_content = Format.Html.full_content_from_html content in
707
+
let tag_links = List.map (fun tag ->
708
+
Printf.sprintf {|<a href="../categories/%s.html">%s</a>|}
709
+
(Format.Html.html_escape (sanitize_filename tag)) (Format.Html.html_escape tag)
711
+
Printf.sprintf {|<div class="post-tags">%s</div>|}
712
+
(String.concat "" tag_links)
714
+
Printf.sprintf {|<article class="post">
715
+
<h2 class="post-title">%s</h2>
716
+
<div class="post-meta">
719
+
<div class="post-excerpt">
722
+
<div class="post-full-content">
725
+
<a href="#" class="read-more">Read more</a>
729
+
(Format.Html.html_escape author)
736
+
let page_html = Format.Html.render_posts_page
737
+
~title:(author ^ " - " ^ title)
740
+
~total_pages:author_pages
741
+
~base_path:(sanitize_filename username ^ "-")
742
+
~nav_current:"authors"
745
+
let safe_username = sanitize_filename username in
746
+
let filename = if page = 1 then safe_username ^ ".html"
747
+
else Printf.sprintf "%s-%d.html" safe_username page in
748
+
Eio.Path.save ~create:(`Or_truncate 0o644)
749
+
Eio.Path.(output_dir / "authors" / filename)
754
+
(* Generate category index and pages *)
755
+
Log.info (fun m -> m "Generating category index and pages");
756
+
let categories_map = Hashtbl.create 32 in
757
+
List.iter (fun (_, _, _, _, _, _, tags) ->
758
+
List.iter (fun tag ->
759
+
let count = match Hashtbl.find_opt categories_map tag with
763
+
Hashtbl.replace categories_map tag count
767
+
let categories_list = Hashtbl.fold (fun tag count acc ->
768
+
(tag, count) :: acc
769
+
) categories_map [] |> List.sort (fun (t1, _) (t2, _) -> String.compare t1 t2) in
771
+
Log.info (fun m -> m "Found %d categories" (List.length categories_list));
773
+
let categories_index_content =
774
+
let items = List.map (fun (tag, count) ->
775
+
Printf.sprintf {|<li><a href="%s.html">%s</a><span class="count">%d post%s</span></li>|}
776
+
(Format.Html.html_escape (sanitize_filename tag))
777
+
(Format.Html.html_escape tag)
779
+
(if count = 1 then "" else "s")
780
+
) categories_list in
781
+
Printf.sprintf "<ul class=\"category-list\">\n%s\n</ul>"
782
+
(String.concat "\n" items)
785
+
let categories_index_html = Format.Html.page_template
786
+
~title:(title ^ " - Categories")
787
+
~nav_current:"categories"
788
+
categories_index_content
790
+
Eio.Path.save ~create:(`Or_truncate 0o644)
791
+
Eio.Path.(output_dir / "categories" / "index.html")
792
+
categories_index_html;
794
+
(* Generate individual category pages *)
795
+
List.iter (fun (tag, count) ->
796
+
let tag_posts = List.filter (fun (_, _, _, _, _, _, tags) ->
800
+
let tag_total = List.length tag_posts in
801
+
let tag_pages = (tag_total + posts_per_page - 1) / posts_per_page in
802
+
Log.info (fun m -> m " Category: %s - %d posts, %d pages" tag count tag_pages);
804
+
for page = 1 to tag_pages do
805
+
let start_idx = (page - 1) * posts_per_page in
806
+
let page_posts = List.filteri (fun i _ ->
807
+
i >= start_idx && i < start_idx + posts_per_page
810
+
let post_htmls = List.map (fun (username, title, author, date, link, content, tags) ->
811
+
let date_str = Format.Html.format_date date in
812
+
let link_html = match link with
814
+
Printf.sprintf {|<a href="%s">%s</a>|}
815
+
(Format.Html.html_escape (Uri.to_string uri))
816
+
(Format.Html.html_escape title)
817
+
| None -> Format.Html.html_escape title
819
+
let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
820
+
let full_content = Format.Html.full_content_from_html content in
825
+
let tag_links = List.map (fun t ->
826
+
Printf.sprintf {|<a href="%s.html">%s</a>|}
827
+
(Format.Html.html_escape (sanitize_filename t)) (Format.Html.html_escape t)
829
+
Printf.sprintf {|<div class="post-tags">%s</div>|}
830
+
(String.concat "" tag_links)
832
+
Printf.sprintf {|<article class="post">
833
+
<h2 class="post-title">%s</h2>
834
+
<div class="post-meta">
835
+
By <a href="../authors/%s.html">%s</a> on %s
837
+
<div class="post-excerpt">
840
+
<div class="post-full-content">
843
+
<a href="#" class="read-more">Read more</a>
847
+
(Format.Html.html_escape (sanitize_filename username))
848
+
(Format.Html.html_escape author)
855
+
let page_html = Format.Html.render_posts_page
856
+
~title:(tag ^ " - " ^ title)
859
+
~total_pages:tag_pages
860
+
~base_path:(sanitize_filename tag ^ "-")
861
+
~nav_current:"categories"
864
+
let safe_tag = sanitize_filename tag in
865
+
let filename = if page = 1 then safe_tag ^ ".html"
866
+
else Printf.sprintf "%s-%d.html" safe_tag page in
867
+
Eio.Path.save ~create:(`Or_truncate 0o644)
868
+
Eio.Path.(output_dir / "categories" / filename)
873
+
(* Generate links page *)
874
+
Log.info (fun m -> m "Generating links page");
875
+
let all_links = List.concat_map (fun (username, title, author, date, post_link, content, _) ->
876
+
let links = Html_markdown.extract_links content in
877
+
List.map (fun (href, link_text) ->
878
+
(href, link_text, username, author, title, post_link, date)
882
+
Log.info (fun m -> m " Extracted %d total links from all posts" (List.length all_links));
884
+
(* Group by URL and track most recent post date *)
885
+
let links_map = Hashtbl.create 256 in
886
+
List.iter (fun (href, link_text, username, author, post_title, post_link, date) ->
887
+
let existing = Hashtbl.find_opt links_map href in
888
+
let new_entry = (link_text, username, author, post_title, post_link, date) in
889
+
match existing with
890
+
| None -> Hashtbl.add links_map href [new_entry]
892
+
(* Add to list, will sort by date later *)
893
+
Hashtbl.replace links_map href (new_entry :: entries)
896
+
(* Sort links by most recent post date *)
897
+
let sorted_links = Hashtbl.fold (fun href entries acc ->
898
+
(* Get the most recent entry for this URL *)
899
+
let sorted_entries = List.sort (fun (_, _, _, _, _, d1) (_, _, _, _, _, d2) ->
900
+
Ptime.compare d2 d1
902
+
let most_recent = List.hd sorted_entries in
903
+
(href, most_recent, entries) :: acc
904
+
) links_map [] |> List.sort (fun (_, (_, _, _, _, _, d1), _) (_, (_, _, _, _, _, d2), _) ->
905
+
Ptime.compare d2 d1
908
+
Log.info (fun m -> m " Deduplicated to %d unique links" (List.length sorted_links));
910
+
let links_content =
911
+
let items = List.map (fun (href, (link_text, username, author, post_title, post_link, date), all_entries) ->
912
+
let date_str = Format.Html.format_date date in
913
+
let display_text = if link_text = "" || link_text = href then href else link_text in
914
+
let post_link_html = match post_link with
916
+
Printf.sprintf {|<a href="%s">%s</a>|}
917
+
(Format.Html.html_escape (Uri.to_string uri))
918
+
(Format.Html.html_escape post_title)
919
+
| None -> Format.Html.html_escape post_title
921
+
let count_str = if List.length all_entries > 1 then
922
+
Printf.sprintf " (mentioned in %d posts)" (List.length all_entries)
925
+
Printf.sprintf {|<div class="link-item">
926
+
<div class="link-url"><a href="%s">%s</a></div>
927
+
<div class="link-meta">From %s by <a href="authors/%s.html">%s</a> on %s%s</div>
929
+
(Format.Html.html_escape href)
930
+
(Format.Html.html_escape display_text)
932
+
(Format.Html.html_escape (sanitize_filename username))
933
+
(Format.Html.html_escape author)
937
+
String.concat "\n" items
940
+
let links_html = Format.Html.page_template
941
+
~title:(title ^ " - Links")
942
+
~nav_current:"links"
945
+
Eio.Path.save ~create:(`Or_truncate 0o644)
946
+
Eio.Path.(output_dir / "links.html")
949
+
Log.info (fun m -> m "HTML site generated successfully in %s"
950
+
(Eio.Path.native_exn output_dir));
953
+
Error (Printf.sprintf "Failed to generate HTML site: %s" (Printexc.to_string e))
let analyze_user_quality state ~username =
match Storage.get_user state username with