···
Eio.Fiber.List.filter_map (fun source ->
+
Log.info (fun m -> m " [%s] Fetching %s (%s)..."
+
username (Source.name source) (Source.url source));
Some (Feed.fetch session source)
+
Log.err (fun m -> m " [%s] Failed to fetch %s: %s"
+
username (Source.name source) (Printexc.to_string e));
···
let export_merged_feed state ~title ~format ?limit () =
let all_posts = get_all_posts state ?limit () in
+
(* Rewrite author metadata from Sortal user info *)
+
let rewrite_entry_author username (entry : Syndic.Atom.entry) =
+
match Storage.get_user state username with
+
(* Get user's full name and email from Sortal *)
+
let fullname = User.fullname user in
+
let email = User.email user in
+
let username = User.username user in
+
(* Create new author with Sortal information *)
+
Syndic.Atom.author ~email:email_addr ~uri:(Uri.of_string ("https://" ^ username)) fullname
+
Syndic.Atom.author ~uri:(Uri.of_string ("https://" ^ username)) fullname
+
(* Update entry with new author, keeping existing contributors *)
+
let _, other_authors = entry.authors in
+
{ entry with authors = (new_author, other_authors) }
+
let entries = List.map (fun (username, entry) ->
+
rewrite_entry_author username entry
···
| Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err))
Export.export_jsonfeed ~title entries
+
let export_html_site state ~output_dir ~title ?(posts_per_page = 25) () =
+
Log.info (fun m -> m "=== Starting HTML site generation ===");
+
Log.info (fun m -> m "Output directory: %s" (Eio.Path.native_exn output_dir));
+
Log.info (fun m -> m "Site title: %s" title);
+
Log.info (fun m -> m "Posts per page: %d" posts_per_page);
+
(* Sanitize a string for use in filenames - replace unsafe characters *)
+
let sanitize_filename s =
+
let buf = Buffer.create (String.length s) in
+
| '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' -> Buffer.add_char buf '-'
+
| ' ' -> Buffer.add_char buf '-'
+
| c -> Buffer.add_char buf c
+
(* Create directory structure *)
+
Log.info (fun m -> m "Creating directory structure");
+
let mkdir_if_not_exists dir =
+
try Eio.Path.mkdir ~perm:0o755 dir
+
with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()
+
mkdir_if_not_exists output_dir;
+
mkdir_if_not_exists Eio.Path.(output_dir / "authors");
+
mkdir_if_not_exists Eio.Path.(output_dir / "categories");
+
mkdir_if_not_exists Eio.Path.(output_dir / "thumbnails");
+
Log.info (fun m -> m "Directory structure created");
+
(* Helper to get and copy author thumbnail *)
+
let get_author_thumbnail username =
+
Log.debug (fun m -> m "Looking up thumbnail for username: %s" username);
+
match Sortal.lookup state.sortal username with
+
Log.debug (fun m -> m " Found Sortal contact for %s: %s" username (Sortal.Contact.name contact));
+
(match Sortal.thumbnail_path state.sortal contact with
+
Log.info (fun m -> m " Copying thumbnail for %s from: %s" username (Eio.Path.native_exn src_path));
+
(* Copy thumbnail to output directory *)
+
let filename = Filename.basename (Eio.Path.native_exn src_path) in
+
let dest_path = Eio.Path.(output_dir / "thumbnails" / filename) in
+
Log.debug (fun m -> m " Source path: %s" (Eio.Path.native_exn src_path));
+
Log.debug (fun m -> m " Destination path: %s" (Eio.Path.native_exn dest_path));
+
let content = Eio.Path.load src_path in
+
Eio.Path.save ~create:(`Or_truncate 0o644) dest_path content;
+
Log.info (fun m -> m " Successfully copied thumbnail to: thumbnails/%s" filename);
+
Some ("thumbnails/" ^ filename)
+
Log.warn (fun m -> m " Failed to copy thumbnail for %s: %s" username (Printexc.to_string e));
+
Log.debug (fun m -> m " No thumbnail set for %s" username);
+
Log.warn (fun m -> m " No Sortal contact found for username: %s" username);
+
(* Helper to convert Atom entry to a simple record for HTML generation *)
+
let entry_to_html_data username (entry : Syndic.Atom.entry) =
+
let title = Text_extract.string_of_text_construct entry.title in
+
let link = List.find_opt (fun (l : Syndic.Atom.link) ->
+
l.rel = Syndic.Atom.Alternate
+
let link_uri = match link with
+
| Some l -> Some l.href
+
| None -> if List.length entry.links > 0 then Some (List.hd entry.links).href else None
+
let content_html = match entry.content with
+
| Some (Syndic.Atom.Text s) -> s
+
| Some (Syndic.Atom.Html (_, s)) -> s
+
| Some (Syndic.Atom.Xhtml (_, nodes)) ->
+
String.concat "" (List.map Syndic.XML.to_string nodes)
+
| Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> ""
+
let author, _ = entry.authors in
+
let tags = List.map (fun (c : Syndic.Atom.category) -> c.term) entry.categories in
+
(username, title, author.name, entry.updated, link_uri, content_html, tags)
+
Log.info (fun m -> m "Retrieving all posts from state");
+
let all_posts = get_all_posts state () in
+
let html_data = List.map (fun (username, entry) ->
+
entry_to_html_data username entry
+
let unique_users = List.sort_uniq String.compare (List.map (fun (u, _, _, _, _, _, _) -> u) html_data) in
+
Log.info (fun m -> m "Retrieved %d posts from %d users" (List.length html_data) (List.length unique_users));
+
Log.info (fun m -> m "Users: %s" (String.concat ", " unique_users));
+
(* Generate main index pages with pagination *)
+
let total_posts = List.length html_data in
+
let total_pages = (total_posts + posts_per_page - 1) / posts_per_page in
+
Log.info (fun m -> m "Generating main index: %d posts across %d pages" total_posts total_pages);
+
for page = 1 to total_pages do
+
Log.info (fun m -> m " Generating index page %d/%d" page total_pages);
+
let start_idx = (page - 1) * posts_per_page in
+
let page_posts = List.filteri (fun i _ ->
+
i >= start_idx && i < start_idx + posts_per_page
+
let post_htmls = List.map (fun (username, title, author, date, link, content, tags) ->
+
Log.debug (fun m -> m " Processing post: %s by %s (@%s)" title author username);
+
(* Create a temporary Post-like structure for rendering *)
+
(* We'll need to adapt this since we're working with Atom entries *)
+
let date_str = Format.Html.format_date date in
+
let link_html = match link with
+
Printf.sprintf {|<a href="%s">%s</a>|}
+
(Format.Html.html_escape (Uri.to_string uri))
+
(Format.Html.html_escape title)
+
| None -> Format.Html.html_escape title
+
let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
+
let full_content = Format.Html.full_content_from_html content in
+
let tag_links = List.map (fun tag ->
+
Printf.sprintf {|<a href="categories/%s.html">%s</a>|}
+
(Format.Html.html_escape (sanitize_filename tag)) (Format.Html.html_escape tag)
+
Printf.sprintf {|<div class="post-tags">%s</div>|}
+
(String.concat "" tag_links)
+
let thumbnail_html = match get_author_thumbnail username with
+
Printf.sprintf {|<img src="%s" alt="%s" class="author-thumbnail">|}
+
(Format.Html.html_escape thumb_path)
+
(Format.Html.html_escape author)
+
Printf.sprintf {|<article class="post">
+
<h2 class="post-title">%s</h2>
+
<div class="post-meta">
+
%s<div class="post-meta-text">By <a href="authors/%s.html">%s</a> on %s</div>
+
<div class="post-excerpt">
+
<div class="post-full-content">
+
<a href="#" class="read-more">Read more</a>
+
(Format.Html.html_escape (sanitize_filename username))
+
(Format.Html.html_escape author)
+
let page_html = Format.Html.render_posts_page
+
let filename = if page = 1 then "index.html"
+
else Printf.sprintf "page-%d.html" page in
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(output_dir / filename)
+
(* Generate author index *)
+
Log.info (fun m -> m "Generating author index and pages");
+
let authors_map = Hashtbl.create 32 in
+
List.iter (fun (username, _, author, _, _, _, _) ->
+
let count = match Hashtbl.find_opt authors_map username with
+
Hashtbl.replace authors_map username (author, count)
+
let authors_list = Hashtbl.fold (fun username (author, count) acc ->
+
(username, author, count) :: acc
+
) authors_map [] |> List.sort (fun (_, a1, _) (_, a2, _) -> String.compare a1 a2) in
+
Log.info (fun m -> m "Found %d authors" (List.length authors_list));
+
let authors_index_content =
+
let items = List.map (fun (username, author, count) ->
+
Printf.sprintf {|<li><a href="%s.html">%s</a><span class="count">%d post%s</span></li>|}
+
(Format.Html.html_escape (sanitize_filename username))
+
(Format.Html.html_escape author)
+
(if count = 1 then "" else "s")
+
Printf.sprintf "<ul class=\"author-list\">\n%s\n</ul>"
+
(String.concat "\n" items)
+
let authors_index_html = Format.Html.page_template
+
~title:(title ^ " - Authors")
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(output_dir / "authors" / "index.html")
+
(* Generate individual author pages *)
+
Hashtbl.iter (fun username (author, _) ->
+
let author_posts = List.filter (fun (u, _, _, _, _, _, _) -> u = username) html_data in
+
let author_total = List.length author_posts in
+
let author_pages = (author_total + posts_per_page - 1) / posts_per_page in
+
Log.info (fun m -> m " Author: %s (@%s) - %d posts, %d pages" author username author_total author_pages);
+
for page = 1 to author_pages do
+
let start_idx = (page - 1) * posts_per_page in
+
let page_posts = List.filteri (fun i _ ->
+
i >= start_idx && i < start_idx + posts_per_page
+
let post_htmls = List.map (fun (_username, title, author, date, link, content, tags) ->
+
let date_str = Format.Html.format_date date in
+
let link_html = match link with
+
Printf.sprintf {|<a href="%s">%s</a>|}
+
(Format.Html.html_escape (Uri.to_string uri))
+
(Format.Html.html_escape title)
+
| None -> Format.Html.html_escape title
+
let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
+
let full_content = Format.Html.full_content_from_html content in
+
let tag_links = List.map (fun tag ->
+
Printf.sprintf {|<a href="../categories/%s.html">%s</a>|}
+
(Format.Html.html_escape (sanitize_filename tag)) (Format.Html.html_escape tag)
+
Printf.sprintf {|<div class="post-tags">%s</div>|}
+
(String.concat "" tag_links)
+
Printf.sprintf {|<article class="post">
+
<h2 class="post-title">%s</h2>
+
<div class="post-meta">
+
<div class="post-excerpt">
+
<div class="post-full-content">
+
<a href="#" class="read-more">Read more</a>
+
(Format.Html.html_escape author)
+
let page_html = Format.Html.render_posts_page
+
~title:(author ^ " - " ^ title)
+
~total_pages:author_pages
+
~base_path:(sanitize_filename username ^ "-")
+
let safe_username = sanitize_filename username in
+
let filename = if page = 1 then safe_username ^ ".html"
+
else Printf.sprintf "%s-%d.html" safe_username page in
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(output_dir / "authors" / filename)
+
(* Generate category index and pages *)
+
Log.info (fun m -> m "Generating category index and pages");
+
let categories_map = Hashtbl.create 32 in
+
List.iter (fun (_, _, _, _, _, _, tags) ->
+
let count = match Hashtbl.find_opt categories_map tag with
+
Hashtbl.replace categories_map tag count
+
let categories_list = Hashtbl.fold (fun tag count acc ->
+
) categories_map [] |> List.sort (fun (t1, _) (t2, _) -> String.compare t1 t2) in
+
Log.info (fun m -> m "Found %d categories" (List.length categories_list));
+
let categories_index_content =
+
let items = List.map (fun (tag, count) ->
+
Printf.sprintf {|<li><a href="%s.html">%s</a><span class="count">%d post%s</span></li>|}
+
(Format.Html.html_escape (sanitize_filename tag))
+
(Format.Html.html_escape tag)
+
(if count = 1 then "" else "s")
+
Printf.sprintf "<ul class=\"category-list\">\n%s\n</ul>"
+
(String.concat "\n" items)
+
let categories_index_html = Format.Html.page_template
+
~title:(title ^ " - Categories")
+
~nav_current:"categories"
+
categories_index_content
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(output_dir / "categories" / "index.html")
+
(* Generate individual category pages *)
+
List.iter (fun (tag, count) ->
+
let tag_posts = List.filter (fun (_, _, _, _, _, _, tags) ->
+
let tag_total = List.length tag_posts in
+
let tag_pages = (tag_total + posts_per_page - 1) / posts_per_page in
+
Log.info (fun m -> m " Category: %s - %d posts, %d pages" tag count tag_pages);
+
for page = 1 to tag_pages do
+
let start_idx = (page - 1) * posts_per_page in
+
let page_posts = List.filteri (fun i _ ->
+
i >= start_idx && i < start_idx + posts_per_page
+
let post_htmls = List.map (fun (username, title, author, date, link, content, tags) ->
+
let date_str = Format.Html.format_date date in
+
let link_html = match link with
+
Printf.sprintf {|<a href="%s">%s</a>|}
+
(Format.Html.html_escape (Uri.to_string uri))
+
(Format.Html.html_escape title)
+
| None -> Format.Html.html_escape title
+
let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
+
let full_content = Format.Html.full_content_from_html content in
+
let tag_links = List.map (fun t ->
+
Printf.sprintf {|<a href="%s.html">%s</a>|}
+
(Format.Html.html_escape (sanitize_filename t)) (Format.Html.html_escape t)
+
Printf.sprintf {|<div class="post-tags">%s</div>|}
+
(String.concat "" tag_links)
+
Printf.sprintf {|<article class="post">
+
<h2 class="post-title">%s</h2>
+
<div class="post-meta">
+
By <a href="../authors/%s.html">%s</a> on %s
+
<div class="post-excerpt">
+
<div class="post-full-content">
+
<a href="#" class="read-more">Read more</a>
+
(Format.Html.html_escape (sanitize_filename username))
+
(Format.Html.html_escape author)
+
let page_html = Format.Html.render_posts_page
+
~title:(tag ^ " - " ^ title)
+
~base_path:(sanitize_filename tag ^ "-")
+
~nav_current:"categories"
+
let safe_tag = sanitize_filename tag in
+
let filename = if page = 1 then safe_tag ^ ".html"
+
else Printf.sprintf "%s-%d.html" safe_tag page in
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(output_dir / "categories" / filename)
+
(* Generate links page *)
+
Log.info (fun m -> m "Generating links page");
+
let all_links = List.concat_map (fun (username, title, author, date, post_link, content, _) ->
+
let links = Html_markdown.extract_links content in
+
List.map (fun (href, link_text) ->
+
(href, link_text, username, author, title, post_link, date)
+
Log.info (fun m -> m " Extracted %d total links from all posts" (List.length all_links));
+
(* Group by URL and track most recent post date *)
+
let links_map = Hashtbl.create 256 in
+
List.iter (fun (href, link_text, username, author, post_title, post_link, date) ->
+
let existing = Hashtbl.find_opt links_map href in
+
let new_entry = (link_text, username, author, post_title, post_link, date) in
+
| None -> Hashtbl.add links_map href [new_entry]
+
(* Add to list, will sort by date later *)
+
Hashtbl.replace links_map href (new_entry :: entries)
+
(* Sort links by most recent post date *)
+
let sorted_links = Hashtbl.fold (fun href entries acc ->
+
(* Get the most recent entry for this URL *)
+
let sorted_entries = List.sort (fun (_, _, _, _, _, d1) (_, _, _, _, _, d2) ->
+
let most_recent = List.hd sorted_entries in
+
(href, most_recent, entries) :: acc
+
) links_map [] |> List.sort (fun (_, (_, _, _, _, _, d1), _) (_, (_, _, _, _, _, d2), _) ->
+
Log.info (fun m -> m " Deduplicated to %d unique links" (List.length sorted_links));
+
let items = List.map (fun (href, (link_text, username, author, post_title, post_link, date), all_entries) ->
+
let date_str = Format.Html.format_date date in
+
let display_text = if link_text = "" || link_text = href then href else link_text in
+
let post_link_html = match post_link with
+
Printf.sprintf {|<a href="%s">%s</a>|}
+
(Format.Html.html_escape (Uri.to_string uri))
+
(Format.Html.html_escape post_title)
+
| None -> Format.Html.html_escape post_title
+
let count_str = if List.length all_entries > 1 then
+
Printf.sprintf " (mentioned in %d posts)" (List.length all_entries)
+
Printf.sprintf {|<div class="link-item">
+
<div class="link-url"><a href="%s">%s</a></div>
+
<div class="link-meta">From %s by <a href="authors/%s.html">%s</a> on %s%s</div>
+
(Format.Html.html_escape href)
+
(Format.Html.html_escape display_text)
+
(Format.Html.html_escape (sanitize_filename username))
+
(Format.Html.html_escape author)
+
String.concat "\n" items
+
let links_html = Format.Html.page_template
+
~title:(title ^ " - Links")
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(output_dir / "links.html")
+
Log.info (fun m -> m "HTML site generated successfully in %s"
+
(Eio.Path.native_exn output_dir));
+
Error (Printf.sprintf "Failed to generate HTML site: %s" (Printexc.to_string e))
let analyze_user_quality state ~username =
match Storage.get_user state username with