My agentic slop goes here. Not intended for anyone else!

river

+35 -1
stack/river/cmd/river_cmd.ml
···
Log.err (fun m -> m "Failed to export merged feed: %s" err);
1
) $ format_arg $ title_arg $ limit_arg)
+
+
let html =
+
let output_dir_arg =
+
let doc = "Output directory for HTML site" in
+
Arg.(required & pos 0 (some string) None & info [] ~docv:"OUTPUT_DIR" ~doc)
+
in
+
let title_arg =
+
let doc = "Site title" in
+
Arg.(value & opt string "River Feed" & info ["title"; "t"] ~doc)
+
in
+
let posts_per_page_arg =
+
let doc = "Number of posts per page (default: 25)" in
+
Arg.(value & opt int 25 & info ["posts-per-page"; "p"] ~doc)
+
in
+
Term.(const (fun output_dir_str title posts_per_page env _xdg _profile ->
+
let state = River.State.create env ~app_name:"river" in
+
let output_dir = Eio.Path.(env#fs / output_dir_str) in
+
match River.State.export_html_site state ~output_dir ~title ~posts_per_page () with
+
| Ok () ->
+
Log.info (fun m -> m "HTML site generated in %s" output_dir_str);
+
0
+
| Error err ->
+
Log.err (fun m -> m "Failed to generate HTML site: %s" err);
+
1
+
) $ output_dir_arg $ title_arg $ posts_per_page_arg)
+
let main_cmd =
let doc = "River feed management CLI" in
let main_info = Cmd.info "river-cli" ~version:"1.0" ~doc in
···
~service:"river"
merge
in
-
Cmd.group main_info [user_cmd; sync_cmd; list_cmd; info_cmd; merge_cmd]
+
let html_cmd =
+
Eiocmd.run
+
~use_keyeio:false
+
~info:(Cmd.info "html" ~doc:"Generate a static HTML site from all feeds")
+
~app_name:"river"
+
~service:"river"
+
html
+
in
+
Cmd.group main_info [user_cmd; sync_cmd; list_cmd; info_cmd; merge_cmd; html_cmd]
+1 -1
stack/river/lib/dune
···
(library
(name river)
(public_name river)
-
(libraries eio eio_main requests requests_json_api logs str syndic lambdasoup uri ptime jsonfeed jsont jsont.bytesrw xdge cmdliner eiocmd fmt sortal))
+
(libraries eio eio_main requests requests_json_api logs str syndic lambdasoup uri ptime jsonfeed jsont jsont.bytesrw xdge cmdliner eiocmd fmt sortal cmarkit))
+10 -3
stack/river/lib/feed.ml
···
Log.debug (fun m -> m "Successfully parsed as JSONFeed");
Json jsonfeed
| Error err ->
-
Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err));
+
let err_str = Jsont.Error.to_string err in
+
Log.debug (fun m -> m "Not a JSONFeed: %s" err_str);
(* Fall through to XML parsing *)
-
failwith "Not a valid JSONFeed"
+
failwith (Printf.sprintf "Not a valid JSONFeed: %s" err_str)
) else (
(* Try XML formats *)
try
···
failwith (Printf.sprintf "HTTP %d: %s" status truncated_msg)
in
-
let content = classify_feed ~xmlbase response in
+
let content =
+
try classify_feed ~xmlbase response
+
with Failure msg ->
+
Log.err (fun m -> m "Failed to parse feed '%s' (%s): %s"
+
(Source.name source) (Source.url source) msg);
+
raise (Failure msg)
+
in
let title =
match content with
| Atom atom -> Text_extract.string_of_text_construct atom.Syndic.Atom.title
+653 -1
stack/river/lib/format.ml
···
| None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get
| Some d -> d
in
-
Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated
+
let categories =
+
List.map (fun tag -> Syndic.Atom.category tag) (Post.tags post)
+
in
+
Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated ~categories
()
let entries_of_posts posts = List.map entry_of_post posts
···
| Feed.Json jf -> Some jf
| _ -> None
end
+
+
module Html = struct
+
(** HTML static site generation. *)
+
+
let css = {|
+
* { margin: 0; padding: 0; box-sizing: border-box; }
+
+
body {
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif;
+
line-height: 1.5;
+
color: #333;
+
background: #fff;
+
max-width: 800px;
+
margin: 0 auto;
+
padding: 15px;
+
}
+
+
header {
+
border-bottom: 1px solid #e1e4e8;
+
padding-bottom: 10px;
+
margin-bottom: 20px;
+
}
+
+
header h1 {
+
font-size: 22px;
+
font-weight: 600;
+
margin-bottom: 6px;
+
}
+
+
header h1 a {
+
color: #333;
+
text-decoration: none;
+
}
+
+
nav {
+
font-size: 13px;
+
}
+
+
nav a {
+
color: #586069;
+
text-decoration: none;
+
margin-right: 12px;
+
}
+
+
nav a:hover {
+
color: #0366d6;
+
}
+
+
.post {
+
margin-bottom: 25px;
+
padding-bottom: 20px;
+
border-bottom: 1px solid #e1e4e8;
+
}
+
+
.post:last-child {
+
border-bottom: none;
+
}
+
+
.post-title {
+
font-size: 18px;
+
font-weight: 600;
+
margin-bottom: 5px;
+
line-height: 1.3;
+
}
+
+
.post-title a {
+
color: #0366d6;
+
text-decoration: none;
+
}
+
+
.post-title a:hover {
+
text-decoration: underline;
+
}
+
+
.post-meta {
+
font-size: 12px;
+
color: #586069;
+
margin-bottom: 8px;
+
display: flex;
+
align-items: center;
+
gap: 8px;
+
}
+
+
.post-meta a {
+
color: #586069;
+
text-decoration: none;
+
}
+
+
.post-meta a:hover {
+
color: #0366d6;
+
}
+
+
.author-thumbnail {
+
width: 24px;
+
height: 24px;
+
border-radius: 50%;
+
object-fit: cover;
+
}
+
+
.post-meta-text {
+
flex: 1;
+
}
+
+
.post-excerpt {
+
font-size: 14px;
+
color: #24292e;
+
line-height: 1.5;
+
}
+
+
.post-excerpt p {
+
margin-bottom: 8px;
+
}
+
+
.post-excerpt ul, .post-excerpt ol {
+
margin-left: 20px;
+
margin-bottom: 8px;
+
}
+
+
.post-excerpt li {
+
margin-bottom: 3px;
+
}
+
+
.post-excerpt code {
+
background: #f6f8fa;
+
padding: 2px 4px;
+
border-radius: 3px;
+
font-size: 13px;
+
}
+
+
.post-excerpt img {
+
float: right;
+
width: 35%;
+
max-width: 300px;
+
margin: 0 0 10px 15px;
+
border-radius: 4px;
+
cursor: pointer;
+
transition: opacity 0.2s;
+
}
+
+
.post-excerpt img:hover {
+
opacity: 0.9;
+
}
+
+
@media (max-width: 600px) {
+
.post-excerpt img {
+
float: none;
+
width: 100%;
+
max-width: 100%;
+
margin: 10px 0;
+
}
+
}
+
+
.lightbox {
+
display: none;
+
position: fixed;
+
top: 0;
+
left: 0;
+
width: 100%;
+
height: 100%;
+
background: rgba(0, 0, 0, 0.9);
+
z-index: 1000;
+
cursor: pointer;
+
align-items: center;
+
justify-content: center;
+
}
+
+
.lightbox.active {
+
display: flex;
+
}
+
+
.lightbox img {
+
max-width: 95%;
+
max-height: 95%;
+
object-fit: contain;
+
}
+
+
.post-full-content {
+
display: none;
+
font-size: 14px;
+
color: #24292e;
+
line-height: 1.5;
+
margin-top: 10px;
+
}
+
+
.post-full-content.active {
+
display: block;
+
}
+
+
.post-full-content p {
+
margin-bottom: 10px;
+
}
+
+
.post-full-content ul, .post-full-content ol {
+
margin-left: 20px;
+
margin-bottom: 10px;
+
}
+
+
.post-full-content li {
+
margin-bottom: 4px;
+
}
+
+
.post-full-content h1, .post-full-content h2, .post-full-content h3 {
+
margin-top: 15px;
+
margin-bottom: 8px;
+
}
+
+
.post-full-content h1 {
+
font-size: 18px;
+
font-weight: 600;
+
}
+
+
.post-full-content h2 {
+
font-size: 16px;
+
font-weight: 600;
+
}
+
+
.post-full-content h3 {
+
font-size: 15px;
+
font-weight: 600;
+
}
+
+
.post-full-content code {
+
background: #f6f8fa;
+
padding: 2px 4px;
+
border-radius: 3px;
+
font-size: 13px;
+
}
+
+
.post-full-content pre {
+
background: #f6f8fa;
+
padding: 10px;
+
border-radius: 4px;
+
overflow-x: auto;
+
margin-bottom: 10px;
+
}
+
+
.post-full-content pre code {
+
background: none;
+
padding: 0;
+
}
+
+
.post-full-content blockquote {
+
border-left: 3px solid #e1e4e8;
+
padding-left: 12px;
+
margin: 10px 0;
+
color: #586069;
+
}
+
+
.post-full-content img {
+
max-width: 100%;
+
height: auto;
+
margin: 10px 0;
+
border-radius: 4px;
+
}
+
+
.read-more {
+
display: inline-block;
+
color: #0366d6;
+
font-size: 13px;
+
cursor: pointer;
+
text-decoration: none;
+
margin-top: 8px;
+
padding: 4px 8px;
+
border: 1px solid #e1e4e8;
+
border-radius: 3px;
+
background: #f6f8fa;
+
transition: background 0.2s;
+
}
+
+
.read-more:hover {
+
background: #e1e4e8;
+
}
+
+
.read-more::after {
+
content: ' ▼';
+
font-size: 10px;
+
}
+
+
.read-more.active::after {
+
content: ' ▲';
+
}
+
+
.post-tags {
+
margin-top: 8px;
+
font-size: 11px;
+
clear: both;
+
}
+
+
.post-tags a {
+
display: inline-block;
+
background: #f1f8ff;
+
color: #0366d6;
+
padding: 2px 6px;
+
border-radius: 3px;
+
text-decoration: none;
+
margin-right: 4px;
+
margin-bottom: 4px;
+
}
+
+
.post-tags a:hover {
+
background: #dbedff;
+
}
+
+
.pagination {
+
margin-top: 30px;
+
padding-top: 15px;
+
border-top: 1px solid #e1e4e8;
+
text-align: center;
+
font-size: 13px;
+
}
+
+
.pagination a {
+
color: #0366d6;
+
text-decoration: none;
+
margin: 0 8px;
+
}
+
+
.pagination a:hover {
+
text-decoration: underline;
+
}
+
+
.pagination .current {
+
color: #24292e;
+
font-weight: 600;
+
}
+
+
.link-item {
+
margin-bottom: 15px;
+
padding-bottom: 12px;
+
border-bottom: 1px solid #e1e4e8;
+
}
+
+
.link-item:last-child {
+
border-bottom: none;
+
}
+
+
.link-url {
+
font-size: 14px;
+
margin-bottom: 3px;
+
}
+
+
.link-url a {
+
color: #0366d6;
+
text-decoration: none;
+
word-break: break-all;
+
}
+
+
.link-url a:hover {
+
text-decoration: underline;
+
}
+
+
.link-meta {
+
font-size: 11px;
+
color: #586069;
+
}
+
+
.link-meta a {
+
color: #586069;
+
text-decoration: none;
+
}
+
+
.link-meta a:hover {
+
color: #0366d6;
+
}
+
+
.author-list, .category-list {
+
list-style: none;
+
}
+
+
.author-list li, .category-list li {
+
margin-bottom: 12px;
+
padding-bottom: 12px;
+
border-bottom: 1px solid #e1e4e8;
+
}
+
+
.author-list li:last-child, .category-list li:last-child {
+
border-bottom: none;
+
}
+
+
.author-list a, .category-list a {
+
color: #0366d6;
+
text-decoration: none;
+
font-size: 15px;
+
}
+
+
.author-list a:hover, .category-list a:hover {
+
text-decoration: underline;
+
}
+
+
.count {
+
color: #586069;
+
font-size: 12px;
+
margin-left: 6px;
+
}
+
+
footer {
+
margin-top: 40px;
+
padding-top: 15px;
+
border-top: 1px solid #e1e4e8;
+
text-align: center;
+
font-size: 11px;
+
color: #586069;
+
}
+
|}
+
+
let html_escape s =
+
let buf = Buffer.create (String.length s) in
+
String.iter (function
+
| '<' -> Buffer.add_string buf "&lt;"
+
| '>' -> Buffer.add_string buf "&gt;"
+
| '&' -> Buffer.add_string buf "&amp;"
+
| '"' -> Buffer.add_string buf "&quot;"
+
| '\'' -> Buffer.add_string buf "&#39;"
+
| c -> Buffer.add_char buf c
+
) s;
+
Buffer.contents buf
+
+
let format_date date =
+
let open Unix in
+
let tm = gmtime (Ptime.to_float_s date) in
+
let months = [|"January"; "February"; "March"; "April"; "May"; "June";
+
"July"; "August"; "September"; "October"; "November"; "December"|] in
+
Printf.sprintf "%s %d, %d" months.(tm.tm_mon) tm.tm_mday (1900 + tm.tm_year)
+
+
let page_template ~title ~nav_current content =
+
Printf.sprintf {|<!DOCTYPE html>
+
<html lang="en">
+
<head>
+
<meta charset="UTF-8">
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
+
<title>%s</title>
+
<style>%s</style>
+
</head>
+
<body>
+
<header>
+
<h1><a href="index.html">River Feed</a></h1>
+
<nav>
+
<a href="index.html"%s>Posts</a>
+
<a href="authors/index.html"%s>Authors</a>
+
<a href="categories/index.html"%s>Categories</a>
+
<a href="links.html"%s>Links</a>
+
</nav>
+
</header>
+
<main>
+
%s
+
</main>
+
<footer>
+
Generated by River Feed Aggregator
+
</footer>
+
<div class="lightbox" id="lightbox">
+
<img id="lightbox-img" src="" alt="">
+
</div>
+
<script>
+
(function() {
+
const lightbox = document.getElementById('lightbox');
+
const lightboxImg = document.getElementById('lightbox-img');
+
+
// Add click handler to all images in excerpts and full content
+
document.addEventListener('click', function(e) {
+
if (e.target.tagName === 'IMG' && (e.target.closest('.post-excerpt') || e.target.closest('.post-full-content'))) {
+
e.preventDefault();
+
lightboxImg.src = e.target.src;
+
lightboxImg.alt = e.target.alt;
+
lightbox.classList.add('active');
+
}
+
});
+
+
// Close lightbox on click
+
lightbox.addEventListener('click', function() {
+
lightbox.classList.remove('active');
+
lightboxImg.src = '';
+
});
+
+
// Close on escape key
+
document.addEventListener('keydown', function(e) {
+
if (e.key === 'Escape' && lightbox.classList.contains('active')) {
+
lightbox.classList.remove('active');
+
lightboxImg.src = '';
+
}
+
});
+
+
// Read more toggle
+
document.addEventListener('click', function(e) {
+
if (e.target.classList.contains('read-more')) {
+
e.preventDefault();
+
const post = e.target.closest('.post');
+
const fullContent = post.querySelector('.post-full-content');
+
const excerpt = post.querySelector('.post-excerpt');
+
+
if (fullContent.classList.contains('active')) {
+
fullContent.classList.remove('active');
+
excerpt.style.display = 'block';
+
e.target.textContent = 'Read more';
+
e.target.classList.remove('active');
+
} else {
+
fullContent.classList.add('active');
+
excerpt.style.display = 'none';
+
e.target.textContent = 'Show less';
+
e.target.classList.add('active');
+
}
+
}
+
});
+
})();
+
</script>
+
</body>
+
</html>|}
+
(html_escape title)
+
css
+
(if nav_current = "posts" then " class=\"current\"" else "")
+
(if nav_current = "authors" then " class=\"current\"" else "")
+
(if nav_current = "categories" then " class=\"current\"" else "")
+
(if nav_current = "links" then " class=\"current\"" else "")
+
content
+
+
let pagination_html ~current_page ~total_pages ~base_path =
+
if total_pages <= 1 then ""
+
else
+
let prev = if current_page > 1 then
+
let prev_page = current_page - 1 in
+
let href = if prev_page = 1 then base_path ^ "index.html"
+
else Printf.sprintf "%spage-%d.html" base_path prev_page in
+
Printf.sprintf {|<a href="%s">← Previous</a>|} href
+
else ""
+
in
+
let next = if current_page < total_pages then
+
Printf.sprintf {|<a href="%spage-%d.html">Next →</a>|} base_path (current_page + 1)
+
else ""
+
in
+
let pages =
+
let buf = Buffer.create 256 in
+
for i = 1 to total_pages do
+
if i = current_page then
+
Buffer.add_string buf (Printf.sprintf {| <span class="current">%d</span>|} i)
+
else
+
let href = if i = 1 then base_path ^ "index.html"
+
else Printf.sprintf "%spage-%d.html" base_path i in
+
Buffer.add_string buf (Printf.sprintf {| <a href="%s">%d</a>|} href i)
+
done;
+
Buffer.contents buf
+
in
+
Printf.sprintf {|<div class="pagination">%s%s%s</div>|} prev pages next
+
+
let full_content_from_html html_content =
+
(* Convert HTML to markdown then to clean HTML using Cmarkit *)
+
let markdown = Html_markdown.html_to_markdown html_content in
+
let doc = Cmarkit.Doc.of_string markdown in
+
Cmarkit_html.of_doc ~safe:true doc
+
+
let post_excerpt_from_html html_content ~max_length =
+
(* Convert HTML to markdown for excerpt *)
+
let markdown = Html_markdown.html_to_markdown html_content in
+
(* Find paragraph break after max_length *)
+
let excerpt_md =
+
if String.length markdown <= max_length then markdown
+
else
+
(* Look for double newline (paragraph break) after max_length *)
+
let start_search = min max_length (String.length markdown - 1) in
+
let rec find_para_break pos =
+
if pos >= String.length markdown - 1 then
+
String.length markdown
+
else if pos < String.length markdown - 1 &&
+
markdown.[pos] = '\n' && markdown.[pos + 1] = '\n' then
+
pos
+
else
+
find_para_break (pos + 1)
+
in
+
let break_pos = find_para_break start_search in
+
let truncated = String.sub markdown 0 break_pos in
+
if break_pos < String.length markdown then
+
truncated ^ "..."
+
else
+
truncated
+
in
+
(* Convert markdown back to HTML using Cmarkit with custom renderer *)
+
let doc = Cmarkit.Doc.of_string excerpt_md in
+
+
(* Custom renderer that makes headings smaller and inline *)
+
let inline_headings =
+
let block c = function
+
| Cmarkit.Block.Heading (h, _) ->
+
let level = Cmarkit.Block.Heading.level h in
+
let inline = Cmarkit.Block.Heading.inline h in
+
(* Render heading as a strong tag with smaller font *)
+
let style = match level with
+
| 1 -> "font-size: 15px; font-weight: 600;"
+
| 2 -> "font-size: 14px; font-weight: 600;"
+
| _ -> "font-size: 14px; font-weight: 500;"
+
in
+
Cmarkit_renderer.Context.string c (Printf.sprintf "<strong style=\"%s\">" style);
+
Cmarkit_renderer.Context.inline c inline;
+
Cmarkit_renderer.Context.string c "</strong> ";
+
true
+
| _ -> false
+
in
+
Cmarkit_renderer.make ~block ()
+
in
+
+
let renderer = Cmarkit_renderer.compose (Cmarkit_html.renderer ~safe:true ()) inline_headings in
+
Cmarkit_renderer.doc_to_string renderer doc
+
+
let render_post_html ~post ~author_username =
+
let title = Post.title post in
+
let author = Post.author post in
+
let date_str = match Post.date post with
+
| Some d -> format_date d
+
| None -> "No date"
+
in
+
let link_html = match Post.link post with
+
| Some uri ->
+
Printf.sprintf {|<a href="%s">%s</a>|}
+
(html_escape (Uri.to_string uri))
+
(html_escape title)
+
| None -> html_escape title
+
in
+
let excerpt = post_excerpt_from_html (Post.content post) ~max_length:300 in
+
let tags_html =
+
match Post.tags post with
+
| [] -> ""
+
| tags ->
+
let tag_links = List.map (fun tag ->
+
Printf.sprintf {|<a href="../categories/%s.html">%s</a>|}
+
(html_escape tag) (html_escape tag)
+
) tags in
+
Printf.sprintf {|<div class="post-tags">%s</div>|}
+
(String.concat "" tag_links)
+
in
+
Printf.sprintf {|<article class="post">
+
<h2 class="post-title">%s</h2>
+
<div class="post-meta">
+
By <a href="../authors/%s.html">%s</a> on %s
+
</div>
+
<div class="post-excerpt">
+
%s
+
</div>
+
%s
+
</article>|}
+
link_html
+
(html_escape author_username)
+
(html_escape author)
+
date_str
+
excerpt
+
tags_html
+
+
let render_posts_page ~title ~posts ~current_page ~total_pages ~base_path ~nav_current =
+
let posts_html = String.concat "\n" posts in
+
let pagination = pagination_html ~current_page ~total_pages ~base_path in
+
let content = posts_html ^ "\n" ^ pagination in
+
page_template ~title ~nav_current content
+
end
+54
stack/river/lib/format.mli
···
Returns None if the feed is not JSONFeed. *)
end
+
+
module Html : sig
+
(** HTML static site generation. *)
+
+
val format_date : Ptime.t -> string
+
(** [format_date date] formats a date in human-readable format (e.g., "November 23, 2025"). *)
+
+
val html_escape : string -> string
+
(** [html_escape s] escapes HTML special characters in string. *)
+
+
val full_content_from_html : string -> string
+
(** [full_content_from_html html_content] converts HTML content to clean markdown-derived HTML.
+
+
@param html_content The HTML content to convert *)
+
+
val post_excerpt_from_html : string -> max_length:int -> string
+
(** [post_excerpt_from_html html_content ~max_length] generates an excerpt from HTML content.
+
+
Converts HTML to markdown, truncates to max_length, and converts back to simple HTML.
+
+
@param html_content The HTML content to excerpt
+
@param max_length Maximum length of the excerpt in characters *)
+
+
val render_post_html : post:Post.t -> author_username:string -> string
+
(** [render_post_html ~post ~author_username] renders a single post as HTML.
+
+
@param post The post to render
+
@param author_username The username of the author (for linking) *)
+
+
val render_posts_page :
+
title:string ->
+
posts:string list ->
+
current_page:int ->
+
total_pages:int ->
+
base_path:string ->
+
nav_current:string ->
+
string
+
(** [render_posts_page ~title ~posts ~current_page ~total_pages ~base_path ~nav_current]
+
renders a complete HTML page with posts and pagination.
+
+
@param title Page title
+
@param posts List of pre-rendered post HTML strings
+
@param current_page Current page number (1-indexed)
+
@param total_pages Total number of pages
+
@param base_path Base path for pagination links (e.g., "" for root, "authors/" for author pages)
+
@param nav_current Which nav item is current ("posts", "authors", "categories", "links") *)
+
+
val page_template : title:string -> nav_current:string -> string -> string
+
(** [page_template ~title ~nav_current content] wraps content in the HTML page template.
+
+
@param title Page title
+
@param nav_current Which nav item is current
+
@param content The main content HTML *)
+
end
+9 -2
stack/river/lib/html_markdown.ml
···
Soup.fold (fun acc link ->
match Soup.attribute "href" link with
| Some href ->
-
let text = Soup.texts link |> String.concat "" |> String.trim in
-
(href, text) :: acc
+
(* Filter out local anchors and only include absolute external URLs *)
+
let uri = Uri.of_string href in
+
let is_absolute = Uri.scheme uri <> None in
+
let is_local_anchor = String.starts_with ~prefix:"#" href in
+
if is_absolute && not is_local_anchor then
+
let text = Soup.texts link |> String.concat "" |> String.trim in
+
(href, text) :: acc
+
else
+
acc
| None -> acc
) [] links
|> List.rev
+19
stack/river/lib/river.mli
···
@param format Output format
@param limit Optional maximum number of entries *)
+
val export_html_site :
+
t ->
+
output_dir:Eio.Fs.dir_ty Eio.Path.t ->
+
title:string ->
+
?posts_per_page:int ->
+
unit ->
+
(unit, string) result
+
(** [export_html_site state ~output_dir ~title ()] exports a static HTML site.
+
+
Generates a complete static site with:
+
- Paginated post listings
+
- Author index and individual author pages
+
- Category index and individual category pages
+
- Links page showing all outgoing links from posts
+
+
@param output_dir Directory to write HTML files to
+
@param title Site title
+
@param posts_per_page Number of posts per page (default: 25) *)
+
(** {2 Analysis} *)
val analyze_user_quality :
+527 -5
stack/river/lib/state.ml
···
let fetched_feeds =
Eio.Fiber.List.filter_map (fun source ->
try
-
Log.info (fun m -> m " Fetching %s (%s)..."
-
(Source.name source) (Source.url source));
+
Log.info (fun m -> m " [%s] Fetching %s (%s)..."
+
username (Source.name source) (Source.url source));
Some (Feed.fetch session source)
with e ->
-
Log.err (fun m -> m " Failed to fetch %s: %s"
-
(Source.name source) (Printexc.to_string e));
+
Log.err (fun m -> m " [%s] Failed to fetch %s: %s"
+
username (Source.name source) (Printexc.to_string e));
None
) (User.feeds user)
in
···
let export_merged_feed state ~title ~format ?limit () =
let all_posts = get_all_posts state ?limit () in
-
let entries = List.map snd all_posts in
+
+
(* Rewrite author metadata from Sortal user info *)
+
let rewrite_entry_author username (entry : Syndic.Atom.entry) =
+
match Storage.get_user state username with
+
| None -> entry
+
| Some user ->
+
(* Get user's full name and email from Sortal *)
+
let fullname = User.fullname user in
+
let email = User.email user in
+
let username = User.username user in
+
+
(* Create new author with Sortal information *)
+
let new_author =
+
match email with
+
| Some email_addr ->
+
Syndic.Atom.author ~email:email_addr ~uri:(Uri.of_string ("https://" ^ username)) fullname
+
| None ->
+
Syndic.Atom.author ~uri:(Uri.of_string ("https://" ^ username)) fullname
+
in
+
+
(* Update entry with new author, keeping existing contributors *)
+
let _, other_authors = entry.authors in
+
{ entry with authors = (new_author, other_authors) }
+
in
+
+
let entries = List.map (fun (username, entry) ->
+
rewrite_entry_author username entry
+
) all_posts in
match format with
| `Atom ->
···
| Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err))
else
Export.export_jsonfeed ~title entries
+
+
let export_html_site state ~output_dir ~title ?(posts_per_page = 25) () =
+
try
+
Log.info (fun m -> m "=== Starting HTML site generation ===");
+
Log.info (fun m -> m "Output directory: %s" (Eio.Path.native_exn output_dir));
+
Log.info (fun m -> m "Site title: %s" title);
+
Log.info (fun m -> m "Posts per page: %d" posts_per_page);
+
+
(* Sanitize a string for use in filenames - replace unsafe characters *)
+
let sanitize_filename s =
+
let buf = Buffer.create (String.length s) in
+
String.iter (fun c ->
+
match c with
+
| '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' -> Buffer.add_char buf '-'
+
| ' ' -> Buffer.add_char buf '-'
+
| c -> Buffer.add_char buf c
+
) s;
+
Buffer.contents buf
+
in
+
+
(* Create directory structure *)
+
Log.info (fun m -> m "Creating directory structure");
+
let mkdir_if_not_exists dir =
+
try Eio.Path.mkdir ~perm:0o755 dir
+
with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()
+
in
+
mkdir_if_not_exists output_dir;
+
mkdir_if_not_exists Eio.Path.(output_dir / "authors");
+
mkdir_if_not_exists Eio.Path.(output_dir / "categories");
+
mkdir_if_not_exists Eio.Path.(output_dir / "thumbnails");
+
Log.info (fun m -> m "Directory structure created");
+
+
(* Helper to get and copy author thumbnail *)
+
let get_author_thumbnail username =
+
Log.debug (fun m -> m "Looking up thumbnail for username: %s" username);
+
match Sortal.lookup state.sortal username with
+
| Some contact ->
+
Log.debug (fun m -> m " Found Sortal contact for %s: %s" username (Sortal.Contact.name contact));
+
(match Sortal.thumbnail_path state.sortal contact with
+
| Some src_path ->
+
Log.info (fun m -> m " Copying thumbnail for %s from: %s" username (Eio.Path.native_exn src_path));
+
(* Copy thumbnail to output directory *)
+
let filename = Filename.basename (Eio.Path.native_exn src_path) in
+
let dest_path = Eio.Path.(output_dir / "thumbnails" / filename) in
+
(try
+
Log.debug (fun m -> m " Source path: %s" (Eio.Path.native_exn src_path));
+
Log.debug (fun m -> m " Destination path: %s" (Eio.Path.native_exn dest_path));
+
let content = Eio.Path.load src_path in
+
Eio.Path.save ~create:(`Or_truncate 0o644) dest_path content;
+
Log.info (fun m -> m " Successfully copied thumbnail to: thumbnails/%s" filename);
+
Some ("thumbnails/" ^ filename)
+
with e ->
+
Log.warn (fun m -> m " Failed to copy thumbnail for %s: %s" username (Printexc.to_string e));
+
None)
+
| None ->
+
Log.debug (fun m -> m " No thumbnail set for %s" username);
+
None)
+
| None ->
+
Log.warn (fun m -> m " No Sortal contact found for username: %s" username);
+
None
+
in
+
+
(* Helper to convert Atom entry to a simple record for HTML generation *)
+
let entry_to_html_data username (entry : Syndic.Atom.entry) =
+
let title = Text_extract.string_of_text_construct entry.title in
+
let link = List.find_opt (fun (l : Syndic.Atom.link) ->
+
l.rel = Syndic.Atom.Alternate
+
) entry.links in
+
let link_uri = match link with
+
| Some l -> Some l.href
+
| None -> if List.length entry.links > 0 then Some (List.hd entry.links).href else None
+
in
+
let content_html = match entry.content with
+
| Some (Syndic.Atom.Text s) -> s
+
| Some (Syndic.Atom.Html (_, s)) -> s
+
| Some (Syndic.Atom.Xhtml (_, nodes)) ->
+
String.concat "" (List.map Syndic.XML.to_string nodes)
+
| Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> ""
+
in
+
let author, _ = entry.authors in
+
let tags = List.map (fun (c : Syndic.Atom.category) -> c.term) entry.categories in
+
(username, title, author.name, entry.updated, link_uri, content_html, tags)
+
in
+
+
(* Get all posts *)
+
Log.info (fun m -> m "Retrieving all posts from state");
+
let all_posts = get_all_posts state () in
+
let html_data = List.map (fun (username, entry) ->
+
entry_to_html_data username entry
+
) all_posts in
+
+
let unique_users = List.sort_uniq String.compare (List.map (fun (u, _, _, _, _, _, _) -> u) html_data) in
+
Log.info (fun m -> m "Retrieved %d posts from %d users" (List.length html_data) (List.length unique_users));
+
Log.info (fun m -> m "Users: %s" (String.concat ", " unique_users));
+
+
(* Generate main index pages with pagination *)
+
let total_posts = List.length html_data in
+
let total_pages = (total_posts + posts_per_page - 1) / posts_per_page in
+
Log.info (fun m -> m "Generating main index: %d posts across %d pages" total_posts total_pages);
+
+
for page = 1 to total_pages do
+
Log.info (fun m -> m " Generating index page %d/%d" page total_pages);
+
let start_idx = (page - 1) * posts_per_page in
+
let page_posts = List.filteri (fun i _ ->
+
i >= start_idx && i < start_idx + posts_per_page
+
) html_data in
+
+
let post_htmls = List.map (fun (username, title, author, date, link, content, tags) ->
+
Log.debug (fun m -> m " Processing post: %s by %s (@%s)" title author username);
+
(* Create a temporary Post-like structure for rendering *)
+
(* We'll need to adapt this since we're working with Atom entries *)
+
let post_html =
+
let date_str = Format.Html.format_date date in
+
let link_html = match link with
+
| Some uri ->
+
Printf.sprintf {|<a href="%s">%s</a>|}
+
(Format.Html.html_escape (Uri.to_string uri))
+
(Format.Html.html_escape title)
+
| None -> Format.Html.html_escape title
+
in
+
let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
+
let full_content = Format.Html.full_content_from_html content in
+
let tags_html =
+
match tags with
+
| [] -> ""
+
| _ ->
+
let tag_links = List.map (fun tag ->
+
Printf.sprintf {|<a href="categories/%s.html">%s</a>|}
+
(Format.Html.html_escape (sanitize_filename tag)) (Format.Html.html_escape tag)
+
) tags in
+
Printf.sprintf {|<div class="post-tags">%s</div>|}
+
(String.concat "" tag_links)
+
in
+
let thumbnail_html = match get_author_thumbnail username with
+
| Some thumb_path ->
+
Printf.sprintf {|<img src="%s" alt="%s" class="author-thumbnail">|}
+
(Format.Html.html_escape thumb_path)
+
(Format.Html.html_escape author)
+
| None -> ""
+
in
+
Printf.sprintf {|<article class="post">
+
<h2 class="post-title">%s</h2>
+
<div class="post-meta">
+
%s<div class="post-meta-text">By <a href="authors/%s.html">%s</a> on %s</div>
+
</div>
+
<div class="post-excerpt">
+
%s
+
</div>
+
<div class="post-full-content">
+
%s
+
</div>
+
<a href="#" class="read-more">Read more</a>
+
%s
+
</article>|}
+
link_html
+
thumbnail_html
+
(Format.Html.html_escape (sanitize_filename username))
+
(Format.Html.html_escape author)
+
date_str
+
excerpt
+
full_content
+
tags_html
+
in
+
post_html
+
) page_posts in
+
+
let page_html = Format.Html.render_posts_page
+
~title
+
~posts:post_htmls
+
~current_page:page
+
~total_pages
+
~base_path:""
+
~nav_current:"posts"
+
in
+
+
let filename = if page = 1 then "index.html"
+
else Printf.sprintf "page-%d.html" page in
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(output_dir / filename)
+
page_html
+
done;
+
+
(* Generate author index *)
+
Log.info (fun m -> m "Generating author index and pages");
+
let authors_map = Hashtbl.create 32 in
+
List.iter (fun (username, _, author, _, _, _, _) ->
+
let count = match Hashtbl.find_opt authors_map username with
+
| Some (_, c) -> c + 1
+
| None -> 1
+
in
+
Hashtbl.replace authors_map username (author, count)
+
) html_data;
+
+
let authors_list = Hashtbl.fold (fun username (author, count) acc ->
+
(username, author, count) :: acc
+
) authors_map [] |> List.sort (fun (_, a1, _) (_, a2, _) -> String.compare a1 a2) in
+
+
Log.info (fun m -> m "Found %d authors" (List.length authors_list));
+
+
let authors_index_content =
+
let items = List.map (fun (username, author, count) ->
+
Printf.sprintf {|<li><a href="%s.html">%s</a><span class="count">%d post%s</span></li>|}
+
(Format.Html.html_escape (sanitize_filename username))
+
(Format.Html.html_escape author)
+
count
+
(if count = 1 then "" else "s")
+
) authors_list in
+
Printf.sprintf "<ul class=\"author-list\">\n%s\n</ul>"
+
(String.concat "\n" items)
+
in
+
+
let authors_index_html = Format.Html.page_template
+
~title:(title ^ " - Authors")
+
~nav_current:"authors"
+
authors_index_content
+
in
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(output_dir / "authors" / "index.html")
+
authors_index_html;
+
+
(* Generate individual author pages *)
+
Hashtbl.iter (fun username (author, _) ->
+
let author_posts = List.filter (fun (u, _, _, _, _, _, _) -> u = username) html_data in
+
let author_total = List.length author_posts in
+
let author_pages = (author_total + posts_per_page - 1) / posts_per_page in
+
Log.info (fun m -> m " Author: %s (@%s) - %d posts, %d pages" author username author_total author_pages);
+
+
for page = 1 to author_pages do
+
let start_idx = (page - 1) * posts_per_page in
+
let page_posts = List.filteri (fun i _ ->
+
i >= start_idx && i < start_idx + posts_per_page
+
) author_posts in
+
+
let post_htmls = List.map (fun (_username, title, author, date, link, content, tags) ->
+
let date_str = Format.Html.format_date date in
+
let link_html = match link with
+
| Some uri ->
+
Printf.sprintf {|<a href="%s">%s</a>|}
+
(Format.Html.html_escape (Uri.to_string uri))
+
(Format.Html.html_escape title)
+
| None -> Format.Html.html_escape title
+
in
+
let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
+
let full_content = Format.Html.full_content_from_html content in
+
let tags_html =
+
match tags with
+
| [] -> ""
+
| _ ->
+
let tag_links = List.map (fun tag ->
+
Printf.sprintf {|<a href="../categories/%s.html">%s</a>|}
+
(Format.Html.html_escape (sanitize_filename tag)) (Format.Html.html_escape tag)
+
) tags in
+
Printf.sprintf {|<div class="post-tags">%s</div>|}
+
(String.concat "" tag_links)
+
in
+
Printf.sprintf {|<article class="post">
+
<h2 class="post-title">%s</h2>
+
<div class="post-meta">
+
By %s on %s
+
</div>
+
<div class="post-excerpt">
+
%s
+
</div>
+
<div class="post-full-content">
+
%s
+
</div>
+
<a href="#" class="read-more">Read more</a>
+
%s
+
</article>|}
+
link_html
+
(Format.Html.html_escape author)
+
date_str
+
excerpt
+
full_content
+
tags_html
+
) page_posts in
+
+
let page_html = Format.Html.render_posts_page
+
~title:(author ^ " - " ^ title)
+
~posts:post_htmls
+
~current_page:page
+
~total_pages:author_pages
+
~base_path:(sanitize_filename username ^ "-")
+
~nav_current:"authors"
+
in
+
+
let safe_username = sanitize_filename username in
+
let filename = if page = 1 then safe_username ^ ".html"
+
else Printf.sprintf "%s-%d.html" safe_username page in
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(output_dir / "authors" / filename)
+
page_html
+
done
+
) authors_map;
+
+
(* Generate category index and pages *)
+
Log.info (fun m -> m "Generating category index and pages");
+
let categories_map = Hashtbl.create 32 in
+
List.iter (fun (_, _, _, _, _, _, tags) ->
+
List.iter (fun tag ->
+
let count = match Hashtbl.find_opt categories_map tag with
+
| Some c -> c + 1
+
| None -> 1
+
in
+
Hashtbl.replace categories_map tag count
+
) tags
+
) html_data;
+
+
let categories_list = Hashtbl.fold (fun tag count acc ->
+
(tag, count) :: acc
+
) categories_map [] |> List.sort (fun (t1, _) (t2, _) -> String.compare t1 t2) in
+
+
Log.info (fun m -> m "Found %d categories" (List.length categories_list));
+
+
let categories_index_content =
+
let items = List.map (fun (tag, count) ->
+
Printf.sprintf {|<li><a href="%s.html">%s</a><span class="count">%d post%s</span></li>|}
+
(Format.Html.html_escape (sanitize_filename tag))
+
(Format.Html.html_escape tag)
+
count
+
(if count = 1 then "" else "s")
+
) categories_list in
+
Printf.sprintf "<ul class=\"category-list\">\n%s\n</ul>"
+
(String.concat "\n" items)
+
in
+
+
let categories_index_html = Format.Html.page_template
+
~title:(title ^ " - Categories")
+
~nav_current:"categories"
+
categories_index_content
+
in
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(output_dir / "categories" / "index.html")
+
categories_index_html;
+
+
(* Generate individual category pages *)
+
List.iter (fun (tag, count) ->
+
let tag_posts = List.filter (fun (_, _, _, _, _, _, tags) ->
+
List.mem tag tags
+
) html_data in
+
+
let tag_total = List.length tag_posts in
+
let tag_pages = (tag_total + posts_per_page - 1) / posts_per_page in
+
Log.info (fun m -> m " Category: %s - %d posts, %d pages" tag count tag_pages);
+
+
for page = 1 to tag_pages do
+
let start_idx = (page - 1) * posts_per_page in
+
let page_posts = List.filteri (fun i _ ->
+
i >= start_idx && i < start_idx + posts_per_page
+
) tag_posts in
+
+
let post_htmls = List.map (fun (username, title, author, date, link, content, tags) ->
+
let date_str = Format.Html.format_date date in
+
let link_html = match link with
+
| Some uri ->
+
Printf.sprintf {|<a href="%s">%s</a>|}
+
(Format.Html.html_escape (Uri.to_string uri))
+
(Format.Html.html_escape title)
+
| None -> Format.Html.html_escape title
+
in
+
let excerpt = Format.Html.post_excerpt_from_html content ~max_length:300 in
+
let full_content = Format.Html.full_content_from_html content in
+
let tags_html =
+
match tags with
+
| [] -> ""
+
| _ ->
+
let tag_links = List.map (fun t ->
+
Printf.sprintf {|<a href="%s.html">%s</a>|}
+
(Format.Html.html_escape (sanitize_filename t)) (Format.Html.html_escape t)
+
) tags in
+
Printf.sprintf {|<div class="post-tags">%s</div>|}
+
(String.concat "" tag_links)
+
in
+
Printf.sprintf {|<article class="post">
+
<h2 class="post-title">%s</h2>
+
<div class="post-meta">
+
By <a href="../authors/%s.html">%s</a> on %s
+
</div>
+
<div class="post-excerpt">
+
%s
+
</div>
+
<div class="post-full-content">
+
%s
+
</div>
+
<a href="#" class="read-more">Read more</a>
+
%s
+
</article>|}
+
link_html
+
(Format.Html.html_escape (sanitize_filename username))
+
(Format.Html.html_escape author)
+
date_str
+
excerpt
+
full_content
+
tags_html
+
) page_posts in
+
+
let page_html = Format.Html.render_posts_page
+
~title:(tag ^ " - " ^ title)
+
~posts:post_htmls
+
~current_page:page
+
~total_pages:tag_pages
+
~base_path:(sanitize_filename tag ^ "-")
+
~nav_current:"categories"
+
in
+
+
let safe_tag = sanitize_filename tag in
+
let filename = if page = 1 then safe_tag ^ ".html"
+
else Printf.sprintf "%s-%d.html" safe_tag page in
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(output_dir / "categories" / filename)
+
page_html
+
done
+
) categories_list;
+
+
(* Generate links page *)
+
Log.info (fun m -> m "Generating links page");
+
let all_links = List.concat_map (fun (username, title, author, date, post_link, content, _) ->
+
let links = Html_markdown.extract_links content in
+
List.map (fun (href, link_text) ->
+
(href, link_text, username, author, title, post_link, date)
+
) links
+
) html_data in
+
+
Log.info (fun m -> m " Extracted %d total links from all posts" (List.length all_links));
+
+
(* Group by URL and track most recent post date *)
+
let links_map = Hashtbl.create 256 in
+
List.iter (fun (href, link_text, username, author, post_title, post_link, date) ->
+
let existing = Hashtbl.find_opt links_map href in
+
let new_entry = (link_text, username, author, post_title, post_link, date) in
+
match existing with
+
| None -> Hashtbl.add links_map href [new_entry]
+
| Some entries ->
+
(* Add to list, will sort by date later *)
+
Hashtbl.replace links_map href (new_entry :: entries)
+
) all_links;
+
+
(* Sort links by most recent post date *)
+
let sorted_links = Hashtbl.fold (fun href entries acc ->
+
(* Get the most recent entry for this URL *)
+
let sorted_entries = List.sort (fun (_, _, _, _, _, d1) (_, _, _, _, _, d2) ->
+
Ptime.compare d2 d1
+
) entries in
+
let most_recent = List.hd sorted_entries in
+
(href, most_recent, entries) :: acc
+
) links_map [] |> List.sort (fun (_, (_, _, _, _, _, d1), _) (_, (_, _, _, _, _, d2), _) ->
+
Ptime.compare d2 d1
+
) in
+
+
Log.info (fun m -> m " Deduplicated to %d unique links" (List.length sorted_links));
+
+
let links_content =
+
let items = List.map (fun (href, (link_text, username, author, post_title, post_link, date), all_entries) ->
+
let date_str = Format.Html.format_date date in
+
let display_text = if link_text = "" || link_text = href then href else link_text in
+
let post_link_html = match post_link with
+
| Some uri ->
+
Printf.sprintf {|<a href="%s">%s</a>|}
+
(Format.Html.html_escape (Uri.to_string uri))
+
(Format.Html.html_escape post_title)
+
| None -> Format.Html.html_escape post_title
+
in
+
let count_str = if List.length all_entries > 1 then
+
Printf.sprintf " (mentioned in %d posts)" (List.length all_entries)
+
else ""
+
in
+
Printf.sprintf {|<div class="link-item">
+
<div class="link-url"><a href="%s">%s</a></div>
+
<div class="link-meta">From %s by <a href="authors/%s.html">%s</a> on %s%s</div>
+
</div>|}
+
(Format.Html.html_escape href)
+
(Format.Html.html_escape display_text)
+
post_link_html
+
(Format.Html.html_escape (sanitize_filename username))
+
(Format.Html.html_escape author)
+
date_str
+
count_str
+
) sorted_links in
+
String.concat "\n" items
+
in
+
+
let links_html = Format.Html.page_template
+
~title:(title ^ " - Links")
+
~nav_current:"links"
+
links_content
+
in
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(output_dir / "links.html")
+
links_html;
+
+
Log.info (fun m -> m "HTML site generated successfully in %s"
+
(Eio.Path.native_exn output_dir));
+
Ok ()
+
with e ->
+
Error (Printf.sprintf "Failed to generate HTML site: %s" (Printexc.to_string e))
let analyze_user_quality state ~username =
match Storage.get_user state username with
+19
stack/river/lib/state.mli
···
@param format Output format
@param limit Optional maximum number of entries *)
+
val export_html_site :
+
t ->
+
output_dir:Eio.Fs.dir_ty Eio.Path.t ->
+
title:string ->
+
?posts_per_page:int ->
+
unit ->
+
(unit, string) result
+
(** [export_html_site state ~output_dir ~title ()] exports a static HTML site.
+
+
Generates a complete static site with:
+
- Paginated post listings
+
- Author index and individual author pages
+
- Category index and individual category pages
+
- Links page showing all outgoing links from posts
+
+
@param output_dir Directory to write HTML files to
+
@param title Site title
+
@param posts_per_page Number of posts per page (default: 25) *)
+
(** {2 Analysis} *)
val analyze_user_quality :