My agentic slop goes here. Not intended for anyone else!
1(*
2 * Copyright (c) 2014, OCaml.org project
3 * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *)
17
18(** River RSS/Atom/JSONFeed aggregator library *)
19
20(** {1 Feed Sources} *)
21
22module Source : sig
23 type t
24 (** A feed source with name and URL. *)
25
26 val make : name:string -> url:string -> t
27 (** [make ~name ~url] creates a new feed source. *)
28
29 val name : t -> string
30 (** [name source] returns the feed name/label. *)
31
32 val url : t -> string
33 (** [url source] returns the feed URL. *)
34
35 val jsont : t Jsont.t
36 (** JSON codec for sources. *)
37end
38
39(** {1 HTTP Session Management} *)
40
41module Session : sig
42 type t
43 (** An abstract HTTP session for fetching feeds.
44
45 The session manages HTTP connections and is tied to an Eio switch
46 for proper resource cleanup. *)
47
48 val init :
49 sw:Eio.Switch.t ->
50 < clock : float Eio.Time.clock_ty Eio.Resource.t;
51 fs : Eio.Fs.dir_ty Eio.Path.t;
52 net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
53 t
54 (** [init ~sw env] creates a new HTTP session.
55
56 The session is configured with appropriate defaults for fetching feeds:
57 - User-Agent: "OCaml-River/1.0"
58 - Automatic redirect following (max 5 redirects)
59 - TLS verification enabled
60
61 @param sw The switch for resource management
62 @param env The Eio environment *)
63
64 val with_session :
65 < clock : float Eio.Time.clock_ty Eio.Resource.t;
66 fs : Eio.Fs.dir_ty Eio.Path.t;
67 net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
68 (t -> 'a) -> 'a
69 (** [with_session env f] creates a session and automatically manages its lifecycle.
70
71 This is the recommended way to use River as it ensures proper cleanup.
72
73 @param env The Eio environment
74 @param f The function to run with the session *)
75end
76
77(** {1 Feeds and Posts} *)
78
79module Feed : sig
80 type t
81 (** An Atom, RSS2, or JSON Feed. *)
82
83 val fetch : Session.t -> Source.t -> t
84 (** [fetch session source] fetches and parses a feed from the given source.
85
86 @param session The HTTP session
87 @param source The feed source to fetch
88 @raise Failure if the feed cannot be fetched or parsed *)
89
90 val source : t -> Source.t
91 (** [source feed] returns the source this feed was fetched from. *)
92end
93
94module Post : sig
95 type t
96 (** A post from a feed. *)
97
98 val of_feeds : Feed.t list -> t list
99 (** [of_feeds feeds] extracts and deduplicates posts from the given feeds.
100
101 Posts are deduplicated by ID. *)
102
103 val feed : t -> Feed.t
104 (** [feed post] returns the feed this post originated from. *)
105
106 val title : t -> string
107 (** [title post] returns the post title. *)
108
109 val link : t -> Uri.t option
110 (** [link post] returns the post link. *)
111
112 val date : t -> Syndic.Date.t option
113 (** [date post] returns the post date. *)
114
115 val author : t -> string
116 (** [author post] returns the post author name. *)
117
118 val email : t -> string
119 (** [email post] returns the post author email. *)
120
121 val content : t -> string
122 (** [content post] returns the post content. *)
123
124 val id : t -> string
125 (** [id post] returns the unique identifier of the post. *)
126
127 val tags : t -> string list
128 (** [tags post] returns the list of tags associated with the post. *)
129
130 val summary : t -> string option
131 (** [summary post] returns the summary/excerpt of the post, if available. *)
132
133 val meta_description : t -> string option
134 (** [meta_description post] returns the meta description from the origin site.
135
136 To get the meta description, we fetch the content of [link post] and look
137 for an HTML meta tag with name "description" or "og:description". *)
138
139 val seo_image : t -> string option
140 (** [seo_image post] returns the social media image URL.
141
142 To get the SEO image, we fetch the content of [link post] and look for an
143 HTML meta tag with name "og:image" or "twitter:image". *)
144end
145
146(** {1 Format Conversion and Export} *)
147
148module Format : sig
149 (** Feed format conversion and export. *)
150
151 module Atom : sig
152 (** Atom 1.0 format support. *)
153
154 val entry_of_post : Post.t -> Syndic.Atom.entry
155 (** [entry_of_post post] converts a post to an Atom entry. *)
156
157 val entries_of_posts : Post.t list -> Syndic.Atom.entry list
158 (** [entries_of_posts posts] converts posts to Atom entries. *)
159
160 val feed_of_entries :
161 title:string ->
162 ?id:string ->
163 ?authors:(string * string option) list ->
164 Syndic.Atom.entry list ->
165 Syndic.Atom.feed
166 (** [feed_of_entries ~title entries] creates an Atom feed from entries.
167
168 @param title The feed title
169 @param id Optional feed ID (default: "urn:river:merged")
170 @param authors Optional list of (name, email) tuples *)
171
172 val to_string : Syndic.Atom.feed -> string
173 (** [to_string feed] serializes an Atom feed to XML string. *)
174 end
175
176 module Rss2 : sig
177 (** RSS 2.0 format support. *)
178
179 val of_feed : Feed.t -> Syndic.Rss2.channel option
180 (** [of_feed feed] extracts RSS2 channel if the feed is RSS2 format.
181
182 Returns None if the feed is not RSS2. *)
183 end
184
185 module Jsonfeed : sig
186 (** JSON Feed 1.1 format support. *)
187
188 val item_of_post : Post.t -> Jsonfeed.Item.t
189 (** [item_of_post post] converts a post to a JSONFeed item. *)
190
191 val items_of_posts : Post.t list -> Jsonfeed.Item.t list
192 (** [items_of_posts posts] converts posts to JSONFeed items. *)
193
194 val feed_of_items :
195 title:string ->
196 ?home_page_url:string ->
197 ?feed_url:string ->
198 ?description:string ->
199 ?icon:string ->
200 ?favicon:string ->
201 Jsonfeed.Item.t list ->
202 Jsonfeed.t
203 (** [feed_of_items ~title items] creates a JSONFeed from items.
204
205 @param title The feed title (required)
206 @param home_page_url The URL of the website the feed represents
207 @param feed_url The URL of the feed itself
208 @param description A description of the feed
209 @param icon URL of an icon for the feed (512x512 recommended)
210 @param favicon URL of a favicon for the feed (64x64 recommended) *)
211
212 val feed_of_posts :
213 title:string ->
214 ?home_page_url:string ->
215 ?feed_url:string ->
216 ?description:string ->
217 ?icon:string ->
218 ?favicon:string ->
219 Post.t list ->
220 Jsonfeed.t
221 (** [feed_of_posts ~title posts] creates a JSONFeed from posts.
222
223 Convenience function that combines [items_of_posts] and [feed_of_items]. *)
224
225 val to_string : ?minify:bool -> Jsonfeed.t -> (string, string) result
226 (** [to_string ?minify feed] serializes a JSONFeed to JSON string.
227
228 @param minify If true, output compact JSON; if false, pretty-print (default: false) *)
229
230 val of_feed : Feed.t -> Jsonfeed.t option
231 (** [of_feed feed] extracts JSONFeed if the feed is JSONFeed format.
232
233 Returns None if the feed is not JSONFeed. *)
234 end
235end
236
237(** {1 JSONFeed with Atom Extensions} *)
238
239module River_jsonfeed : sig
240 (** JSONFeed with Atom extension support for River.
241
242 This module provides conversion between Atom feeds and JSONFeed format,
243 with custom extensions to preserve Atom-specific metadata that doesn't
244 have direct JSONFeed equivalents.
245
246 The extensions follow the JSONFeed specification for custom fields:
247 - Prefixed with underscore + letter: [_atom]
248 - Contains [about] field with documentation URL
249 - Feed readers can safely ignore unknown extensions
250
251 See: https://www.jsonfeed.org/mappingrssandatom/ *)
252
253 (** {2 Extension Types} *)
254
255 type category = {
256 term : string; (** Category term (required in Atom) *)
257 scheme : string option; (** Category scheme/domain *)
258 label : string option; (** Human-readable label *)
259 }
260
261 type contributor = {
262 contributor_name : string;
263 contributor_uri : string option;
264 contributor_email : string option;
265 }
266
267 type generator = {
268 generator_name : string; (** Generator name *)
269 generator_uri : string option; (** Generator URI *)
270 generator_version : string option; (** Generator version *)
271 }
272
273 type source = {
274 source_id : string; (** Source feed ID *)
275 source_title : string; (** Source feed title *)
276 source_updated : Ptime.t; (** Source feed update time *)
277 }
278
279 type content_type =
280 | Text (** Plain text *)
281 | Html (** HTML content *)
282 | Xhtml (** XHTML content *)
283
284 type feed_extension = {
285 feed_subtitle : string option;
286 feed_id : string;
287 feed_categories : category list;
288 feed_contributors : contributor list;
289 feed_generator : generator option;
290 feed_rights : string option;
291 feed_logo : string option;
292 }
293
294 type item_extension = {
295 item_id : string;
296 item_published : Ptime.t option;
297 item_contributors : contributor list;
298 item_source : source option;
299 item_rights : string option;
300 item_categories : category list;
301 item_content_type : content_type option;
302 }
303
304 type t = {
305 feed : Jsonfeed.t;
306 extension : feed_extension option;
307 }
308
309 type item = {
310 item : Jsonfeed.Item.t;
311 extension : item_extension option;
312 }
313
314 (** {2 Conversion from Atom} *)
315
316 val of_atom : Syndic.Atom.feed -> t
317 (** [of_atom feed] converts an Atom feed to JSONFeed with extensions.
318
319 All Atom metadata is preserved using extensions. *)
320
321 val item_of_atom : Syndic.Atom.entry -> item
322 (** [item_of_atom entry] converts an Atom entry to JSONFeed item with extensions. *)
323
324 (** {2 Conversion from RSS} *)
325
326 val of_rss2 : Syndic.Rss2.channel -> t
327 (** [of_rss2 channel] converts an RSS2 channel to JSONFeed. *)
328
329 val item_of_rss2 : Syndic.Rss2.item -> item
330 (** [item_of_rss2 item] converts an RSS2 item to JSONFeed item. *)
331
332 (** {2 Conversion to Atom} *)
333
334 val to_atom : t -> Syndic.Atom.feed
335 (** [to_atom t] converts JSONFeed with extensions back to Atom feed.
336
337 All original Atom metadata is restored from extensions. *)
338
339 val item_to_atom : item -> Syndic.Atom.entry
340 (** [item_to_atom item] converts JSONFeed item with extensions back to Atom entry. *)
341
342 (** {2 Serialization} *)
343
344 val to_string : ?minify:bool -> t -> (string, string) result
345 (** [to_string ?minify t] serializes to JSON string with extensions. *)
346
347 val of_string : string -> (t, string) result
348 (** [of_string s] parses JSON string with extensions. *)
349
350 (** {2 Utilities} *)
351
352 val of_posts : title:string -> Post.t list -> t
353 (** [of_posts ~title posts] creates JSONFeed from Post list with Atom extensions. *)
354
355 val to_posts : feed:Feed.t -> t -> Post.t list
356 (** [to_posts ~feed t] extracts posts from extended JSONFeed. *)
357end
358
359(** {1 Category Management} *)
360
361module Category : sig
362 (** Custom categories for organizing posts.
363
364 Categories are manually defined and can be assigned to posts for
365 organization and filtering. This is separate from feed-extracted tags. *)
366
367 type t
368 (** A custom category with metadata. *)
369
370 val create :
371 id:string ->
372 name:string ->
373 ?description:string ->
374 unit ->
375 t
376 (** [create ~id ~name ?description ()] creates a new category.
377
378 @param id Unique identifier for the category (e.g., "ocaml-projects")
379 @param name Display name (e.g., "OCaml Projects")
380 @param description Optional longer description *)
381
382 val id : t -> string
383 (** [id category] returns the unique identifier of the category. *)
384
385 val name : t -> string
386 (** [name category] returns the display name of the category. *)
387
388 val description : t -> string option
389 (** [description category] returns the description, if any. *)
390
391 val to_json : t -> Jsont.json
392 (** [to_json category] serializes a category to JSON. *)
393
394 val of_json : Jsont.json -> (t, string) result
395 (** [of_json json] deserializes a category from JSON. *)
396
397 val jsont : t Jsont.t
398 (** Jsont codec for categories. *)
399end
400
401(** {1 User Management} *)
402
403module User : sig
404 (** River user composed from Sortal contact data + sync state.
405
406 User data is stored in Sortal and read on-demand. River only persists
407 sync timestamps and optional per-user overrides. *)
408
409 type t
410 (** A River user composed from Sortal.Contact + sync metadata. *)
411
412 val of_contact : Sortal.Contact.t -> ?last_synced:string -> unit -> t
413 (** [of_contact contact ()] creates a River user from a Sortal contact.
414
415 @param contact The Sortal contact to base this user on
416 @param last_synced Optional ISO 8601 timestamp of last sync *)
417
418 val username : t -> string
419 (** [username user] returns the username (from Sortal.Contact.handle). *)
420
421 val fullname : t -> string
422 (** [fullname user] returns the full name (from Sortal.Contact.primary_name). *)
423
424 val email : t -> string option
425 (** [email user] returns the email address (from Sortal.Contact). *)
426
427 val feeds : t -> Source.t list
428 (** [feeds user] returns the list of subscribed feeds (from Sortal.Contact). *)
429
430 val last_synced : t -> string option
431 (** [last_synced user] returns the last sync timestamp if set. *)
432
433 val contact : t -> Sortal.Contact.t
434 (** [contact user] returns the underlying Sortal contact. *)
435
436 val set_last_synced : t -> string -> t
437 (** [set_last_synced user timestamp] returns a new user with updated sync time. *)
438end
439
440(** {1 Feed Quality Analysis} *)
441
442module Quality : sig
443 type t
444 (** Quality metrics for a feed or user's aggregated feed. *)
445
446 val make :
447 total_entries:int ->
448 entries_with_summary:int ->
449 entries_with_author:int ->
450 entries_with_date:int ->
451 entries_with_content:int ->
452 entries_with_tags:int ->
453 avg_content_length:float ->
454 min_content_length:int ->
455 max_content_length:int ->
456 posting_frequency_days:float option ->
457 quality_score:float ->
458 t
459 (** [make ~total_entries ...] creates quality metrics. *)
460
461 val total_entries : t -> int
462 val entries_with_summary : t -> int
463 val entries_with_author : t -> int
464 val entries_with_date : t -> int
465 val entries_with_content : t -> int
466 val entries_with_tags : t -> int
467 val avg_content_length : t -> float
468 val min_content_length : t -> int
469 val max_content_length : t -> int
470 val posting_frequency_days : t -> float option
471 val quality_score : t -> float
472 (** Accessors for quality metrics. *)
473
474 val analyze : Syndic.Atom.entry list -> t
475 (** [analyze entries] computes quality metrics from Atom entries.
476
477 The quality score is a weighted average of:
478 - Content completeness (40%)
479 - Metadata completeness (30%)
480 - Content richness (30%) *)
481end
482
483(** {1 State Management} *)
484
485module State : sig
486 type t
487 (** State handle for managing sync state and feeds on disk.
488
489 User contact data is read from Sortal on-demand. River only persists
490 sync timestamps and feed data. *)
491
492 val create :
493 < fs : Eio.Fs.dir_ty Eio.Path.t; .. > ->
494 app_name:string ->
495 t
496 (** [create env ~app_name] creates a state handle using XDG directories.
497
498 Data is stored in:
499 - Sync state: $XDG_STATE_HOME/[app_name]/sync_state.json
500 - Feeds: $XDG_STATE_HOME/[app_name]/feeds/[username]/
501
502 User contact data is read from Sortal's XDG location.
503
504 @param env The Eio environment with filesystem access
505 @param app_name Application name for XDG paths *)
506
507 (** {2 User Operations} *)
508
509 val get_user : t -> username:string -> User.t option
510 (** [get_user state ~username] retrieves a user by username.
511
512 This reads contact data from Sortal and combines it with River's sync state.
513 Returns [None] if the username doesn't exist in Sortal or has no feeds. *)
514
515 val list_users : t -> string list
516 (** [list_users state] returns all usernames with feeds from Sortal. *)
517
518 val get_all_users : t -> User.t list
519 (** [get_all_users state] returns all users from Sortal with their sync state. *)
520
521 val update_sync_state : t -> username:string -> timestamp:string -> (unit, string) result
522 (** [update_sync_state state ~username ~timestamp] updates the last sync timestamp.
523
524 @param username The user to update
525 @param timestamp ISO 8601 timestamp of the sync *)
526
527 (** {2 Feed Operations} *)
528
529 val sync_user :
530 < clock : float Eio.Time.clock_ty Eio.Resource.t;
531 fs : Eio.Fs.dir_ty Eio.Path.t;
532 net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
533 t ->
534 username:string ->
535 (unit, string) result
536 (** [sync_user env state ~username] fetches all feeds for the user and stores merged result.
537
538 Posts are fetched concurrently and merged with existing posts.
539 The result is stored as an Atom feed. *)
540
541 val sync_all :
542 < clock : float Eio.Time.clock_ty Eio.Resource.t;
543 fs : Eio.Fs.dir_ty Eio.Path.t;
544 net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->
545 t ->
546 (int * int, string) result
547 (** [sync_all env state] syncs all users concurrently.
548
549 Returns [Ok (success_count, fail_count)]. *)
550
551 val get_user_posts :
552 t ->
553 username:string ->
554 ?limit:int ->
555 unit ->
556 Syndic.Atom.entry list
557 (** [get_user_posts state ~username ()] retrieves stored posts for a user.
558
559 @param limit Optional maximum number of posts to return *)
560
561 val get_all_posts :
562 t ->
563 ?limit:int ->
564 unit ->
565 (string * Syndic.Atom.entry) list
566 (** [get_all_posts state ()] retrieves posts from all users, sorted by date.
567
568 Returns list of (username, entry) tuples.
569 @param limit Optional maximum number of posts to return *)
570
571 (** {2 Export} *)
572
573 val export_merged_feed :
574 t ->
575 title:string ->
576 format:[ `Atom | `Jsonfeed ] ->
577 ?limit:int ->
578 unit ->
579 (string, string) result
580 (** [export_merged_feed state ~title ~format ()] exports a merged feed of all users.
581
582 @param title Feed title
583 @param format Output format
584 @param limit Optional maximum number of entries *)
585
586 val export_html_site :
587 t ->
588 output_dir:Eio.Fs.dir_ty Eio.Path.t ->
589 title:string ->
590 ?posts_per_page:int ->
591 unit ->
592 (unit, string) result
593 (** [export_html_site state ~output_dir ~title ()] exports a static HTML site.
594
595 Generates a complete static site with:
596 - Paginated post listings
597 - Author index and individual author pages
598 - Category index and individual category pages
599 - Links page showing all outgoing links from posts
600
601 @param output_dir Directory to write HTML files to
602 @param title Site title
603 @param posts_per_page Number of posts per page (default: 25) *)
604
605 (** {2 Category Management} *)
606
607 val list_categories : t -> Category.t list
608 (** [list_categories state] returns all custom categories. *)
609
610 val get_category : t -> id:string -> Category.t option
611 (** [get_category state ~id] retrieves a category by ID. *)
612
613 val add_category : t -> Category.t -> (unit, string) result
614 (** [add_category state category] adds or updates a category.
615
616 @param category The category to add/update *)
617
618 val remove_category : t -> id:string -> (unit, string) result
619 (** [remove_category state ~id] removes a category.
620
621 This also removes the category from any posts that were tagged with it.
622 @param id The category ID to remove *)
623
624 val get_post_categories : t -> post_id:string -> string list
625 (** [get_post_categories state ~post_id] returns the list of category IDs
626 assigned to a post. *)
627
628 val set_post_categories : t -> post_id:string -> category_ids:string list -> (unit, string) result
629 (** [set_post_categories state ~post_id ~category_ids] sets the categories for a post.
630
631 Replaces any existing category assignments for this post.
632 @param post_id The post ID to categorize
633 @param category_ids List of category IDs to assign *)
634
635 val add_post_category : t -> post_id:string -> category_id:string -> (unit, string) result
636 (** [add_post_category state ~post_id ~category_id] adds a category to a post.
637
638 @param post_id The post ID
639 @param category_id The category ID to add *)
640
641 val remove_post_category : t -> post_id:string -> category_id:string -> (unit, string) result
642 (** [remove_post_category state ~post_id ~category_id] removes a category from a post.
643
644 @param post_id The post ID
645 @param category_id The category ID to remove *)
646
647 val get_posts_by_category : t -> category_id:string -> string list
648 (** [get_posts_by_category state ~category_id] returns all post IDs with this category. *)
649
650 (** {2 Analysis} *)
651
652 val analyze_user_quality :
653 t ->
654 username:string ->
655 (Quality.t, string) result
656 (** [analyze_user_quality state ~username] analyzes quality metrics for a user's feed. *)
657end