commit 2243edf721476d47e2afb76e208eff29c8f4f6fb · anil.recoil.org/slop

+1 -1

stack/river/bin/dune

···

       1
       1
        
       (executable

     

       2
       2
        
        (public_name river-cli)

     

       3
       3
        
        (name river_cli)

     

       4
       4
       -
        (libraries river cmdliner jsont jsont.bytesrw fmt fmt.tty fmt.cli eio eio_main eiocmd unix ptime syndic xdge))

     

       4
       4
       +
        (libraries river river_cmd cmdliner jsont jsont.bytesrw fmt fmt.tty fmt.cli eio eio_main eiocmd unix ptime syndic xdge))

+2 -795

stack/river/bin/river_cli.ml

···

       1
       1
       -
       (* Logging setup *)

     

       2
       2
       -
       let src = Logs.Src.create "river-cli" ~doc:"River CLI application"

     

       3
       3
       -
       module Log = (val Logs.src_log src : Logs.LOG)

     

       4
       4
       -
       

     

       5
       5
       -
       (* Types *)

     

       6
       6
       -
       type user = {

     

       7
       7
       -
         username : string;

     

       8
       8
       -
         fullname : string;

     

       9
       9
       -
         email : string option;

     

       10
       10
       -
         feeds : River.source list;

     

       11
       11
       -
         last_synced : string option;

     

       12
       12
       -
       }

     

       13
       13
       -
       

     

       14
       14
       -
       type state = {

     

       15
       15
       -
         xdg : Xdge.t;

     

       16
       16
       -
       }

     

       17
       17
       -
       

     

       18
       18
       -
       (* State directory management *)

     

       19
       19
       -
       module State = struct

     

       20
       20
       -
         let users_dir state = Eio.Path.(Xdge.state_dir state.xdg / "users")

     

       21
       21
       -
         let feeds_dir state = Eio.Path.(Xdge.state_dir state.xdg / "feeds")

     

       22
       22
       -
         let user_feeds_dir state = Eio.Path.(feeds_dir state / "user")

     

       23
       23
       -
       

     

       24
       24
       -
         let user_file state username =

     

       25
       25
       -
           Eio.Path.(users_dir state / (username ^ ".json"))

     

       26
       26
       -
       

     

       27
       27
       -
         let user_feed_file state username =

     

       28
       28
       -
           Eio.Path.(user_feeds_dir state / (username ^ ".xml"))

     

       29
       29
       -
       

     

       30
       30
       -
         let ensure_directories state =

     

       31
       31
       -
           let dirs = [

     

       32
       32
       -
             users_dir state;

     

       33
       33
       -
             feeds_dir state;

     

       34
       34
       -
             user_feeds_dir state;

     

       35
       35
       -
           ] in

     

       36
       36
       -
           List.iter (fun dir ->

     

       37
       37
       -
             try Eio.Path.mkdir ~perm:0o755 dir

     

       38
       38
       -
             with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()

     

       39
       39
       -
           ) dirs

     

       40
       40
       -
       

     

       41
       41
       -
         (* JSON codecs for user data *)

     

       42
       42
       -
       

     

       43
       43
       -
         (* Codec for River.source (feed) *)

     

       44
       44
       -
         let source_jsont =

     

       45
       45
       -
           let make name url = { River.name; url } in

     

       46
       46
       -
           Jsont.Object.map ~kind:"Source" make

     

       47
       47
       -
           |> Jsont.Object.mem "name" Jsont.string ~enc:(fun s -> s.River.name)

     

       48
       48
       -
           |> Jsont.Object.mem "url" Jsont.string ~enc:(fun s -> s.River.url)

     

       49
       49
       -
           |> Jsont.Object.finish

     

       50
       50
       -
       

     

       51
       51
       -
         (* Codec for user *)

     

       52
       52
       -
         let user_jsont =

     

       53
       53
       -
           let make username fullname email feeds last_synced =

     

       54
       54
       -
             { username; fullname; email; feeds; last_synced }

     

       55
       55
       -
           in

     

       56
       56
       -
           Jsont.Object.map ~kind:"User" make

     

       57
       57
       -
           |> Jsont.Object.mem "username" Jsont.string ~enc:(fun u -> u.username)

     

       58
       58
       -
           |> Jsont.Object.mem "fullname" Jsont.string ~enc:(fun u -> u.fullname)

     

       59
       59
       -
           |> Jsont.Object.opt_mem "email" Jsont.string ~enc:(fun u -> u.email)

     

       60
       60
       -
           |> Jsont.Object.mem "feeds" (Jsont.list source_jsont) ~enc:(fun u -> u.feeds)

     

       61
       61
       -
           |> Jsont.Object.opt_mem "last_synced" Jsont.string ~enc:(fun u -> u.last_synced)

     

       62
       62
       -
           |> Jsont.Object.finish

     

       63
       63
       -
       

     

       64
       64
       -
         let user_of_string s =

     

       65
       65
       -
           match Jsont_bytesrw.decode_string' user_jsont s with

     

       66
       66
       -
           | Ok user -> Some user

     

       67
       67
       -
           | Error err ->

     

       68
       68
       -
               Log.err (fun m -> m "Failed to parse user JSON: %s" (Jsont.Error.to_string err));

     

       69
       69
       -
               None

     

       70
       70
       -
       

     

       71
       71
       -
         let user_to_string user =

     

       72
       72
       -
           match Jsont_bytesrw.encode_string' ~format:Jsont.Indent user_jsont user with

     

       73
       73
       -
           | Ok s -> s

     

       74
       74
       -
           | Error err -> failwith ("Failed to encode user: " ^ Jsont.Error.to_string err)

     

       75
       75
       -
       

     

       76
       76
       -
         let load_user state username =

     

       77
       77
       -
           let file = user_file state username in

     

       78
       78
       -
           try

     

       79
       79
       -
             let content = Eio.Path.load file in

     

       80
       80
       -
             user_of_string content

     

       81
       81
       -
           with

     

       82
       82
       -
           | Eio.Io (Eio.Fs.E (Not_found _), _) -> None

     

       83
       83
       -
           | e ->

     

       84
       84
       -
             Log.err (fun m -> m "Error loading user %s: %s" username (Printexc.to_string e));

     

       85
       85
       -
             None

     

       86
       86
       -
       

     

       87
       87
       -
         let save_user state user =

     

       88
       88
       -
           let file = user_file state user.username in

     

       89
       89
       -
           let json = user_to_string user in

     

       90
       90
       -
           Eio.Path.save ~create:(`Or_truncate 0o644) file json

     

       91
       91
       -
       

     

       92
       92
       -
         let list_users state =

     

       93
       93
       -
           try

     

       94
       94
       -
             Eio.Path.read_dir (users_dir state)

     

       95
       95
       -
             |> List.filter_map (fun name ->

     

       96
       96
       -
               if Filename.check_suffix name ".json" then

     

       97
       97
       -
                 Some (Filename.chop_suffix name ".json")

     

       98
       98
       -
               else None

     

       99
       99
       -
             )

     

       100
       100
       -
           with _ -> []

     

       101
       101
       -
       

     

       102
       102
       -
         let load_existing_posts state username =

     

       103
       103
       -
           let file = user_feed_file state username in

     

       104
       104
       -
           try

     

       105
       105
       -
             let content = Eio.Path.load file in

     

       106
       106
       -
             (* Parse existing Atom feed *)

     

       107
       107
       -
             let input = Xmlm.make_input (`String (0, content)) in

     

       108
       108
       -
             let feed = Syndic.Atom.parse input in

     

       109
       109
       -
             feed.Syndic.Atom.entries

     

       110
       110
       -
           with

     

       111
       111
       -
           | Eio.Io (Eio.Fs.E (Not_found _), _) -> []

     

       112
       112
       -
           | e ->

     

       113
       113
       -
             Log.err (fun m -> m "Error loading existing posts for %s: %s"

     

       114
       114
       -
               username (Printexc.to_string e));

     

       115
       115
       -
             []

     

       116
       116
       -
       

     

       117
       117
       -
         let save_atom_feed state username entries =

     

       118
       118
       -
           let file = user_feed_file state username in

     

       119
       119
       -
           let feed : Syndic.Atom.feed = {

     

       120
       120
       -
             id = Uri.of_string ("urn:river:user:" ^ username);

     

       121
       121
       -
             title = Syndic.Atom.Text username;

     

       122
       122
       -
             updated = Ptime.of_float_s (Unix.time ()) |> Option.get;

     

       123
       123
       -
             entries;

     

       124
       124
       -
             authors = [];

     

       125
       125
       -
             categories = [];

     

       126
       126
       -
             contributors = [];

     

       127
       127
       -
             generator = Some {

     

       128
       128
       -
               Syndic.Atom.version = Some "1.0";

     

       129
       129
       -
               uri = None;

     

       130
       130
       -
               content = "River Feed Aggregator";

     

       131
       131
       -
             };

     

       132
       132
       -
             icon = None;

     

       133
       133
       -
             links = [];

     

       134
       134
       -
             logo = None;

     

       135
       135
       -
             rights = None;

     

       136
       136
       -
             subtitle = None;

     

       137
       137
       -
           } in

     

       138
       138
       -
           let output = Buffer.create 1024 in

     

       139
       139
       -
           Syndic.Atom.output feed (`Buffer output);

     

       140
       140
       -
           Eio.Path.save ~create:(`Or_truncate 0o644) file (Buffer.contents output)

     

       141
       141
       -
       end

     

       142
       142
       -
       

     

       143
       143
       -
       (* User management commands *)

     

       144
       144
       -
       module User = struct

     

       145
       145
       -
         let add state ~username ~fullname ~email =

     

       146
       146
       -
           match State.load_user state username with

     

       147
       147
       -
           | Some _ ->

     

       148
       148
       -
             Log.err (fun m -> m "User %s already exists" username);

     

       149
       149
       -
             1

     

       150
       150
       -
           | None ->

     

       151
       151
       -
             let user = { username; fullname; email; feeds = []; last_synced = None } in

     

       152
       152
       -
             State.save_user state user;

     

       153
       153
       -
             Log.info (fun m -> m "User %s created" username);

     

       154
       154
       -
             0

     

       155
       155
       -
       

     

       156
       156
       -
         let remove state ~username =

     

       157
       157
       -
           match State.load_user state username with

     

       158
       158
       -
           | None ->

     

       159
       159
       -
             Log.err (fun m -> m "User %s not found" username);

     

       160
       160
       -
             1

     

       161
       161
       -
           | Some _ ->

     

       162
       162
       -
             (* Remove user file and feed file *)

     

       163
       163
       -
             let user_file = State.user_file state username in

     

       164
       164
       -
             let feed_file = State.user_feed_file state username in

     

       165
       165
       -
             (try Eio.Path.unlink user_file with _ -> ());

     

       166
       166
       -
             (try Eio.Path.unlink feed_file with _ -> ());

     

       167
       167
       -
             Log.info (fun m -> m "User %s removed" username);

     

       168
       168
       -
             0

     

       169
       169
       -
       

     

       170
       170
       -
         let list state =

     

       171
       171
       -
           let users = State.list_users state in

     

       172
       172
       -
           if users = [] then

     

       173
       173
       -
             Printf.printf "No users found\n"

     

       174
       174
       -
           else begin

     

       175
       175
       -
             Printf.printf "Users:\n";

     

       176
       176
       -
             List.iter (fun username ->

     

       177
       177
       -
               match State.load_user state username with

     

       178
       178
       -
               | Some user ->

     

       179
       179
       -
                 let email_str = match user.email with

     

       180
       180
       -
                   | Some e -> " <" ^ e ^ ">"

     

       181
       181
       -
                   | None -> ""

     

       182
       182
       -
                 in

     

       183
       183
       -
                 Printf.printf "  %s (%s%s) - %d feeds\n"

     

       184
       184
       -
                   username user.fullname email_str (List.length user.feeds)

     

       185
       185
       -
               | None -> ()

     

       186
       186
       -
             ) users

     

       187
       187
       -
           end;

     

       188
       188
       -
           0

     

       189
       189
       -
       

     

       190
       190
       -
         let add_feed state ~username ~name ~url =

     

       191
       191
       -
           match State.load_user state username with

     

       192
       192
       -
           | None ->

     

       193
       193
       -
             Log.err (fun m -> m "User %s not found" username);

     

       194
       194
       -
             1

     

       195
       195
       -
           | Some user ->

     

       196
       196
       -
             let feed = { River.name; url } in

     

       197
       197
       -
             if List.exists (fun f -> f.River.url = url) user.feeds then begin

     

       198
       198
       -
               Log.err (fun m -> m "Feed %s already exists for user %s" url username);

     

       199
       199
       -
               1

     

       200
       200
       -
             end else begin

     

       201
       201
       -
               let user = { user with feeds = feed :: user.feeds } in

     

       202
       202
       -
               State.save_user state user;

     

       203
       203
       -
               Log.info (fun m -> m "Feed %s added to user %s" name username);

     

       204
       204
       -
               0

     

       205
       205
       -
             end

     

       206
       206
       -
       

     

       207
       207
       -
         let remove_feed state ~username ~url =

     

       208
       208
       -
           match State.load_user state username with

     

       209
       209
       -
           | None ->

     

       210
       210
       -
             Log.err (fun m -> m "User %s not found" username);

     

       211
       211
       -
             1

     

       212
       212
       -
           | Some user ->

     

       213
       213
       -
             let feeds = List.filter (fun f -> f.River.url <> url) user.feeds in

     

       214
       214
       -
             if List.length feeds = List.length user.feeds then begin

     

       215
       215
       -
               Log.err (fun m -> m "Feed %s not found for user %s" url username);

     

       216
       216
       -
               1

     

       217
       217
       -
             end else begin

     

       218
       218
       -
               let user = { user with feeds } in

     

       219
       219
       -
               State.save_user state user;

     

       220
       220
       -
               Log.info (fun m -> m "Feed removed from user %s" username);

     

       221
       221
       -
               0

     

       222
       222
       -
             end

     

       223
       223
       -
       

     

       224
       224
       -
         let show state ~username =

     

       225
       225
       -
           match State.load_user state username with

     

       226
       226
       -
           | None ->

     

       227
       227
       -
             Log.err (fun m -> m "User %s not found" username);

     

       228
       228
       -
             1

     

       229
       229
       -
           | Some user ->

     

       230
       230
       -
             Printf.printf "Username: %s\n" user.username;

     

       231
       231
       -
             Printf.printf "Full name: %s\n" user.fullname;

     

       232
       232
       -
             Printf.printf "Email: %s\n"

     

       233
       233
       -
               (Option.value user.email ~default:"(none)");

     

       234
       234
       -
             Printf.printf "Last synced: %s\n"

     

       235
       235
       -
               (Option.value user.last_synced ~default:"never");

     

       236
       236
       -
             Printf.printf "Feeds (%d):\n" (List.length user.feeds);

     

       237
       237
       -
             List.iter (fun feed ->

     

       238
       238
       -
               Printf.printf "  - %s: %s\n" feed.River.name feed.River.url

     

       239
       239
       -
             ) user.feeds;

     

       240
       240
       -
             0

     

       241
       241
       -
       end

     

       242
       242
       -
       

     

       243
       243
       -
       (* Sync command *)

     

       244
       244
       -
       module Sync = struct

     

       245
       245
       -
         let merge_entries ~existing ~new_entries =

     

       246
       246
       -
           (* Create a map of new entry IDs for efficient lookup and updates *)

     

       247
       247
       -
           let module UriMap = Map.Make(Uri) in

     

       248
       248
       -
           let new_entries_map =

     

       249
       249
       -
             List.fold_left (fun acc (entry : Syndic.Atom.entry) ->

     

       250
       250
       -
               UriMap.add entry.id entry acc

     

       251
       251
       -
             ) UriMap.empty new_entries

     

       252
       252
       -
           in

     

       253
       253
       -
       

     

       254
       254
       -
           (* Update existing entries with new ones if IDs match, otherwise keep existing *)

     

       255
       255
       -
           let updated_existing =

     

       256
       256
       -
             List.filter_map (fun (entry : Syndic.Atom.entry) ->

     

       257
       257
       -
               if UriMap.mem entry.id new_entries_map then

     

       258
       258
       -
                 None  (* Will be replaced by new entry *)

     

       259
       259
       -
               else

     

       260
       260
       -
                 Some entry  (* Keep existing entry *)

     

       261
       261
       -
             ) existing

     

       262
       262
       -
           in

     

       263
       263
       -
       

     

       264
       264
       -
           (* Combine new entries with non-replaced existing entries *)

     

       265
       265
       -
           let combined = new_entries @ updated_existing in

     

       266
       266
       -
           List.sort (fun (a : Syndic.Atom.entry) (b : Syndic.Atom.entry) ->

     

       267
       267
       -
             Ptime.compare b.updated a.updated

     

       268
       268
       -
           ) combined

     

       269
       269
       -
       

     

       270
       270
       -
         let sync_user session state ~username =

     

       271
       271
       -
           match State.load_user state username with

     

       272
       272
       -
           | None ->

     

       273
       273
       -
             Log.err (fun m -> m "User %s not found" username);

     

       274
       274
       -
             1

     

       275
       275
       -
           | Some user when user.feeds = [] ->

     

       276
       276
       -
             Log.info (fun m -> m "No feeds configured for user %s" username);

     

       277
       277
       -
             0

     

       278
       278
       -
           | Some user ->

     

       279
       279
       -
             Log.info (fun m -> m "Syncing feeds for user %s..." username);

     

       280
       280
       -
       

     

       281
       281
       -
             (* Fetch all feeds concurrently using the session *)

     

       282
       282
       -
             let fetched_feeds =

     

       283
       283
       -
               Eio.Fiber.List.filter_map (fun source ->

     

       284
       284
       -
                 try

     

       285
       285
       -
                   Log.info (fun m -> m "  Fetching %s (%s)..." source.River.name source.River.url);

     

       286
       286
       -
                   Some (River.fetch session source)

     

       287
       287
       -
                 with e ->

     

       288
       288
       -
                   Log.err (fun m -> m "  Failed to fetch %s: %s"

     

       289
       289
       -
                     source.River.name (Printexc.to_string e));

     

       290
       290
       -
                   None

     

       291
       291
       -
               ) user.feeds

     

       292
       292
       -
             in

     

       293
       293
       -
       

     

       294
       294
       -
             if fetched_feeds = [] then begin

     

       295
       295
       -
               Log.err (fun m -> m "No feeds successfully fetched");

     

       296
       296
       -
               1

     

       297
       297
       -
             end else begin

     

       298
       298
       -
               (* Get posts from fetched feeds *)

     

       299
       299
       -
               let posts = River.posts fetched_feeds in

     

       300
       300
       -
               Log.info (fun m -> m "  Found %d new posts" (List.length posts));

     

       301
       301
       -
       

     

       302
       302
       -
               (* Convert to Atom entries *)

     

       303
       303
       -
               let new_entries = River.create_atom_entries posts in

     

       304
       304
       -
       

     

       305
       305
       -
               (* Load existing entries *)

     

       306
       306
       -
               let existing = State.load_existing_posts state username in

     

       307
       307
       -
               Log.info (fun m -> m "  Found %d existing posts" (List.length existing));

     

       308
       308
       -
       

     

       309
       309
       -
               (* Merge entries *)

     

       310
       310
       -
               let merged = merge_entries ~existing ~new_entries in

     

       311
       311
       -
               Log.info (fun m -> m "  Total posts after merge: %d" (List.length merged));

     

       312
       312
       -
       

     

       313
       313
       -
               (* Save updated feed *)

     

       314
       314
       -
               State.save_atom_feed state username merged;

     

       315
       315
       -
       

     

       316
       316
       -
               (* Update last_synced timestamp *)

     

       317
       317
       -
               let now =

     

       318
       318
       -
                 let open Unix in

     

       319
       319
       -
                 let tm = gmtime (time ()) in

     

       320
       320
       -
                 Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ"

     

       321
       321
       -
                   (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday

     

       322
       322
       -
                   tm.tm_hour tm.tm_min tm.tm_sec

     

       323
       323
       -
               in

     

       324
       324
       -
               let user = { user with last_synced = Some now } in

     

       325
       325
       -
               State.save_user state user;

     

       326
       326
       -
       

     

       327
       327
       -
               Log.info (fun m -> m "Sync completed for user %s" username);

     

       328
       328
       -
               0

     

       329
       329
       -
             end

     

       330
       330
       -
       

     

       331
       331
       -
         let sync_all session state =

     

       332
       332
       -
           let users = State.list_users state in

     

       333
       333
       -
           if users = [] then begin

     

       334
       334
       -
             Log.info (fun m -> m "No users to sync");

     

       335
       335
       -
             0

     

       336
       336
       -
           end else begin

     

       337
       337
       -
             Log.info (fun m -> m "Syncing %d users concurrently..." (List.length users));

     

       338
       338
       -
       

     

       339
       339
       -
             let results =

     

       340
       340
       -
               Eio.Fiber.List.map (fun username ->

     

       341
       341
       -
                 let result = sync_user session state ~username in

     

       342
       342
       -
                 Log.debug (fun m -> m "Completed sync for user");

     

       343
       343
       -
                 result

     

       344
       344
       -
               ) users

     

       345
       345
       -
             in

     

       346
       346
       -
             let failures = List.filter ((<>) 0) results in

     

       347
       347
       -
             if failures = [] then begin

     

       348
       348
       -
               Log.info (fun m -> m "All users synced successfully");

     

       349
       349
       -
               0

     

       350
       350
       -
             end else begin

     

       351
       351
       -
               Log.err (fun m -> m "Failed to sync %d users" (List.length failures));

     

       352
       352
       -
               1

     

       353
       353
       -
             end

     

       354
       354
       -
           end

     

       355
       355
       -
       end

     

       356
       356
       -
       

     

       357
       357
       -
       (* Post listing commands *)

     

       358
       358
       -
       module Post = struct

     

       359
       359
       -
         let format_date ptime =

     

       360
       360
       -
           let open Ptime in

     

       361
       361
       -
           let (y, m, d), _ = to_date_time ptime in

     

       362
       362
       -
           Printf.sprintf "%02d/%02d/%04d" d m y

     

       363
       363
       -
       

     

       364
       364
       -
         let format_text_construct : Syndic.Atom.text_construct -> string = function

     

       365
       365
       -
           | Syndic.Atom.Text s -> s

     

       366
       366
       -
           | Syndic.Atom.Html (_, s) -> s

     

       367
       367
       -
           | Syndic.Atom.Xhtml (_, _) -> "<xhtml content>"

     

       368
       368
       -
       

     

       369
       369
       -
         let get_content_length (entry : Syndic.Atom.entry) =

     

       370
       370
       -
           match entry.content with

     

       371
       371
       -
           | Some (Syndic.Atom.Text s) -> String.length s

     

       372
       372
       -
           | Some (Syndic.Atom.Html (_, s)) -> String.length s

     

       373
       373
       -
           | Some (Syndic.Atom.Xhtml (_, _)) -> 0 (* Could calculate but complex *)

     

       374
       374
       -
           | Some (Syndic.Atom.Mime _) -> 0

     

       375
       375
       -
           | Some (Syndic.Atom.Src _) -> 0

     

       376
       376
       -
           | None -> (

     

       377
       377
       -
               match entry.summary with

     

       378
       378
       -
               | Some (Syndic.Atom.Text s) -> String.length s

     

       379
       379
       -
               | Some (Syndic.Atom.Html (_, s)) -> String.length s

     

       380
       380
       -
               | Some (Syndic.Atom.Xhtml (_, _)) -> 0

     

       381
       381
       -
               | None -> 0)

     

       382
       382
       -
       

     

       383
       383
       -
         let list state ~username_opt ~limit =

     

       384
       384
       -
           match username_opt with

     

       385
       385
       -
           | Some username ->

     

       386
       386
       -
             (* List posts for a specific user *)

     

       387
       387
       -
             (match State.load_user state username with

     

       388
       388
       -
             | None ->

     

       389
       389
       -
               Log.err (fun m -> m "User %s not found" username);

     

       390
       390
       -
               1

     

       391
       391
       -
             | Some user ->

     

       392
       392
       -
               let entries = State.load_existing_posts state username in

     

       393
       393
       -
               if entries = [] then begin

     

       394
       394
       -
                 Fmt.pr "%a@." Fmt.(styled `Yellow string)

     

       395
       395
       -
                   ("No posts found for user " ^ username);

     

       396
       396
       -
                 Fmt.pr "%a@." Fmt.(styled `Faint string)

     

       397
       397
       -
                   ("(Run 'river-cli sync " ^ username ^ "' to fetch posts)");

     

       398
       398
       -
                 0

     

       399
       399
       -
               end else begin

     

       400
       400
       -
                 let to_show = match limit with

     

       401
       401
       -
                   | Some n -> List.filteri (fun i _ -> i < n) entries

     

       402
       402
       -
                   | None -> entries

     

       403
       403
       -
                 in

     

       404
       404
       -
                 Fmt.pr "%a@."

     

       405
       405
       -
                   Fmt.(styled `Bold string)

     

       406
       406
       -
                   (Printf.sprintf "Posts for %s (%d total, showing %d):"

     

       407
       407
       -
                     user.fullname (List.length entries) (List.length to_show));

     

       408
       408
       -
       

     

       409
       409
       -
                 List.iteri (fun i (entry : Syndic.Atom.entry) ->

     

       410
       410
       -
                   (* Use user's full name for all entries *)

     

       411
       411
       -
                   let author_name = user.fullname in

     

       412
       412
       -
                   let content_len = get_content_length entry in

     

       413
       413
       -
                   let entry_id = Uri.to_string entry.id in

     

       414
       414
       -
                   Fmt.pr "%a %a@."

     

       415
       415
       -
                     Fmt.(styled `Cyan string) (Printf.sprintf "[%d]" (i + 1))

     

       416
       416
       -
                     Fmt.(styled (`Fg `Blue) string) (format_text_construct entry.title);

     

       417
       417
       -
                   Fmt.pr "    %a %a@."

     

       418
       418
       -
                     Fmt.(styled `Faint string) "ID:"

     

       419
       419
       -
                     Fmt.(styled `Faint string) entry_id;

     

       420
       420
       -
                   Fmt.pr "    %a - %a - %a chars@."

     

       421
       421
       -
                     Fmt.(styled `Green string) author_name

     

       422
       422
       -
                     Fmt.(styled `Magenta string) (format_date entry.updated)

     

       423
       423
       -
                     Fmt.(styled `Yellow string) (string_of_int content_len)

     

       424
       424
       -
                 ) to_show;

     

       425
       425
       -
                 0

     

       426
       426
       -
               end)

     

       427
       427
       -
           | None ->

     

       428
       428
       -
             (* List posts from all users *)

     

       429
       429
       -
             let users = State.list_users state in

     

       430
       430
       -
             if users = [] then begin

     

       431
       431
       -
               Fmt.pr "%a@." Fmt.(styled `Yellow string)

     

       432
       432
       -
                 "No users found";

     

       433
       433
       -
               Fmt.pr "%a@." Fmt.(styled `Faint string)

     

       434
       434
       -
                 "(Run 'river-cli user add' to create a user)";

     

       435
       435
       -
               0

     

       436
       436
       -
             end else begin

     

       437
       437
       -
               (* Load user data to get full names *)

     

       438
       438
       -
               let user_map =

     

       439
       439
       -
                 List.fold_left (fun acc username ->

     

       440
       440
       -
                   match State.load_user state username with

     

       441
       441
       -
                   | Some user -> (username, user) :: acc

     

       442
       442
       -
                   | None -> acc

     

       443
       443
       -
                 ) [] users

     

       444
       444
       -
               in

     

       445
       445
       -
       

     

       446
       446
       -
               (* Collect all entries from all users with username tag *)

     

       447
       447
       -
               let all_entries =

     

       448
       448
       -
                 List.concat_map (fun username ->

     

       449
       449
       -
                   let entries = State.load_existing_posts state username in

     

       450
       450
       -
                   List.map (fun entry -> (username, entry)) entries

     

       451
       451
       -
                 ) users

     

       452
       452
       -
               in

     

       453
       453
       -
       

     

       454
       454
       -
               if all_entries = [] then begin

     

       455
       455
       -
                 Fmt.pr "%a@." Fmt.(styled `Yellow string)

     

       456
       456
       -
                   "No posts found for any users";

     

       457
       457
       -
                 Fmt.pr "%a@." Fmt.(styled `Faint string)

     

       458
       458
       -
                   "(Run 'river-cli sync' to fetch posts)";

     

       459
       459
       -
                 0

     

       460
       460
       -
               end else begin

     

       461
       461
       -
                 (* Sort by date (newest first) *)

     

       462
       462
       -
                 let sorted = List.sort (fun (_, a : string * Syndic.Atom.entry) (_, b) ->

     

       463
       463
       -
                   Ptime.compare b.updated a.updated

     

       464
       464
       -
                 ) all_entries in

     

       465
       465
       -
       

     

       466
       466
       -
                 let to_show = match limit with

     

       467
       467
       -
                   | Some n -> List.filteri (fun i _ -> i < n) sorted

     

       468
       468
       -
                   | None -> sorted

     

       469
       469
       -
                 in

     

       470
       470
       -
       

     

       471
       471
       -
                 Fmt.pr "%a@."

     

       472
       472
       -
                   Fmt.(styled `Bold string)

     

       473
       473
       -
                   (Printf.sprintf "Posts from all users (%d total, showing %d):"

     

       474
       474
       -
                     (List.length all_entries) (List.length to_show));

     

       475
       475
       -
       

     

       476
       476
       -
                 List.iteri (fun i (username, entry : string * Syndic.Atom.entry) ->

     

       477
       477
       -
                   (* Use user's full name instead of feed author *)

     

       478
       478
       -
                   let author_name =

     

       479
       479
       -
                     match List.assoc_opt username user_map with

     

       480
       480
       -
                     | Some user -> user.fullname

     

       481
       481
       -
                     | None ->

     

       482
       482
       -
                         (* Fallback to entry author if user not found *)

     

       483
       483
       -
                         let (author, _) = entry.authors in

     

       484
       484
       -
                         String.trim author.name

     

       485
       485
       -
                   in

     

       486
       486
       -
                   let content_len = get_content_length entry in

     

       487
       487
       -
                   let entry_id = Uri.to_string entry.id in

     

       488
       488
       -
                   (* Shorten ID for display if it's too long *)

     

       489
       489
       -
                   Fmt.pr "%a %a@."

     

       490
       490
       -
                     Fmt.(styled `Cyan string) (Printf.sprintf "[%d]" (i + 1))

     

       491
       491
       -
                     Fmt.(styled (`Fg `Blue) string) (format_text_construct entry.title);

     

       492
       492
       -
                   Fmt.pr "    %a %a@."

     

       493
       493
       -
                     Fmt.(styled `Faint string) "ID:"

     

       494
       494
       -
                     Fmt.(styled `Faint string) entry_id;

     

       495
       495
       -
                   Fmt.pr "    %a - %a - %a chars@."

     

       496
       496
       -
                     Fmt.(styled `Green string) author_name

     

       497
       497
       -
                     Fmt.(styled `Magenta string) (format_date entry.updated)

     

       498
       498
       -
                     Fmt.(styled `Yellow string) (string_of_int content_len)

     

       499
       499
       -
                 ) to_show;

     

       500
       500
       -
                 0

     

       501
       501
       -
               end

     

       502
       502
       -
             end

     

       503
       503
       -
       end

     

       504
       504
       -
       

     

       505
       505
       -
       (* Cmdliner interface *)

     

       506
       506
       -
       open Cmdliner

     

       507
       507
       -
       

     

       508
       508
       -
       let username_arg =

     

       509
       509
       -
         let doc = "Username" in

     

       510
       510
       -
         Arg.(required & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)

     

       511
       511
       -
       

     

       512
       512
       -
       let fullname_arg =

     

       513
       513
       -
         let doc = "Full name of the user" in

     

       514
       514
       -
         Arg.(required & opt (some string) None & info ["name"; "n"] ~doc)

     

       515
       515
       -
       

     

       516
       516
       -
       let email_arg =

     

       517
       517
       -
         let doc = "Email address of the user (optional)" in

     

       518
       518
       -
         Arg.(value & opt (some string) None & info ["email"; "e"] ~doc)

     

       519
       519
       -
       

     

       520
       520
       -
       let feed_name_arg =

     

       521
       521
       -
         let doc = "Feed name/label" in

     

       522
       522
       -
         Arg.(required & opt (some string) None & info ["name"; "n"] ~doc)

     

       523
       523
       -
       

     

       524
       524
       -
       let feed_url_arg =

     

       525
       525
       -
         let doc = "Feed URL" in

     

       526
       526
       -
         Arg.(required & opt (some string) None & info ["url"; "u"] ~doc)

     

       527
       527
       -
       

     

       528
       528
       -
       (* Note: eiocmd handles all logging setup automatically via Logs_cli *)

     

       529
       529
       -
       

     

       530
       530
       -
       (* User commands - these don't need network, just filesystem access via Xdge *)

     

       531
       531
       -
       let user_add_cmd =

     

       532
       532
       -
         let doc = "Add a new user" in

     

       533
       533
       -
         Eiocmd.run

     

       534
       534
       -
           ~use_keyeio:false

     

       535
       535
       -
           ~info:(Cmd.info "add" ~doc)

     

       536
       536
       -
           ~app_name:"river"

     

       537
       537
       -
           ~service:"river"

     

       538
       538
       -
           Term.(const (fun username fullname email _env xdg _profile ->

     

       539
       539
       -
             let state = { xdg } in

     

       540
       540
       -
             State.ensure_directories state;

     

       541
       541
       -
             User.add state ~username ~fullname ~email

     

       542
       542
       -
           ) $ username_arg $ fullname_arg $ email_arg)

     

       543
       543
       -
       

     

       544
       544
       -
       let user_remove_cmd =

     

       545
       545
       -
         let doc = "Remove a user" in

     

       546
       546
       -
         Eiocmd.run

     

       547
       547
       -
           ~use_keyeio:false

     

       548
       548
       -
           ~info:(Cmd.info "remove" ~doc)

     

       549
       549
       -
           ~app_name:"river"

     

       550
       550
       -
           ~service:"river"

     

       551
       551
       -
           Term.(const (fun username _env xdg _profile ->

     

       552
       552
       -
             let state = { xdg } in

     

       553
       553
       -
             User.remove state ~username

     

       554
       554
       -
           ) $ username_arg)

     

       555
       555
       -
       

     

       556
       556
       -
       let user_list_cmd =

     

       557
       557
       -
         let doc = "List all users" in

     

       558
       558
       -
         Eiocmd.run

     

       559
       559
       -
           ~use_keyeio:false

     

       560
       560
       -
           ~info:(Cmd.info "list" ~doc)

     

       561
       561
       -
           ~app_name:"river"

     

       562
       562
       -
           ~service:"river"

     

       563
       563
       -
           Term.(const (fun _env xdg _profile ->

     

       564
       564
       -
             let state = { xdg } in

     

       565
       565
       -
             User.list state

     

       566
       566
       -
           ))

     

       567
       567
       -
       

     

       568
       568
       -
       let user_show_cmd =

     

       569
       569
       -
         let doc = "Show user details" in

     

       570
       570
       -
         Eiocmd.run

     

       571
       571
       -
           ~use_keyeio:false

     

       572
       572
       -
           ~info:(Cmd.info "show" ~doc)

     

       573
       573
       -
           ~app_name:"river"

     

       574
       574
       -
           ~service:"river"

     

       575
       575
       -
           Term.(const (fun username _env xdg _profile ->

     

       576
       576
       -
             let state = { xdg } in

     

       577
       577
       -
             User.show state ~username

     

       578
       578
       -
           ) $ username_arg)

     

       579
       579
       -
       

     

       580
       580
       -
       let user_add_feed_cmd =

     

       581
       581
       -
         let doc = "Add a feed to a user" in

     

       582
       582
       -
         Eiocmd.run

     

       583
       583
       -
           ~use_keyeio:false

     

       584
       584
       -
           ~info:(Cmd.info "add-feed" ~doc)

     

       585
       585
       -
           ~app_name:"river"

     

       586
       586
       -
           ~service:"river"

     

       587
       587
       -
           Term.(const (fun username name url _env xdg _profile ->

     

       588
       588
       -
             let state = { xdg } in

     

       589
       589
       -
             User.add_feed state ~username ~name ~url

     

       590
       590
       -
           ) $ username_arg $ feed_name_arg $ feed_url_arg)

     

       591
       591
       -
       

     

       592
       592
       -
       let user_remove_feed_cmd =

     

       593
       593
       -
         let doc = "Remove a feed from a user" in

     

       594
       594
       -
         Eiocmd.run

     

       595
       595
       -
           ~use_keyeio:false

     

       596
       596
       -
           ~info:(Cmd.info "remove-feed" ~doc)

     

       597
       597
       -
           ~app_name:"river"

     

       598
       598
       -
           ~service:"river"

     

       599
       599
       -
           Term.(const (fun username url _env xdg _profile ->

     

       600
       600
       -
             let state = { xdg } in

     

       601
       601
       -
             User.remove_feed state ~username ~url

     

       602
       602
       -
           ) $ username_arg $ feed_url_arg)

     

       603
       603
       -
       

     

       604
       604
       -
       let user_cmd =

     

       605
       605
       -
         let doc = "Manage users" in

     

       606
       606
       -
         let info = Cmd.info "user" ~doc in

     

       607
       607
       -
         Cmd.group info [

     

       608
       608
       -
           user_add_cmd;

     

       609
       609
       -
           user_remove_cmd;

     

       610
       610
       -
           user_list_cmd;

     

       611
       611
       -
           user_show_cmd;

     

       612
       612
       -
           user_add_feed_cmd;

     

       613
       613
       -
           user_remove_feed_cmd;

     

       614
       614
       -
         ]

     

       615
       615
       -
       

     

       616
       616
       -
       (* Sync command - needs Eio environment for HTTP requests *)

     

       617
       617
       -
       let sync_cmd =

     

       618
       618
       -
         let doc = "Sync feeds for users" in

     

       619
       619
       -
         let username_opt =

     

       620
       620
       -
           let doc = "Sync specific user (omit to sync all)" in

     

       621
       621
       -
           Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)

     

       622
       622
       -
         in

     

       623
       623
       -
         Eiocmd.run

     

       624
       624
       -
           ~use_keyeio:false

     

       625
       625
       -
           ~info:(Cmd.info "sync" ~doc)

     

       626
       626
       -
           ~app_name:"river"

     

       627
       627
       -
           ~service:"river"

     

       628
       628
       -
           Term.(const (fun username_opt env xdg _profile ->

     

       629
       629
       -
             let state = { xdg } in

     

       630
       630
       -
             State.ensure_directories state;

     

       631
       631
       -
       

     

       632
       632
       -
             (* Use River.with_session for resource management *)

     

       633
       633
       -
             River.with_session env @@ fun session ->

     

       634
       634
       -
               match username_opt with

     

       635
       635
       -
               | Some username -> Sync.sync_user session state ~username

     

       636
       636
       -
               | None -> Sync.sync_all session state

     

       637
       637
       -
           ) $ username_opt)

     

       638
       638
       -
       

     

       639
       639
       -
       (* List command - doesn't need network, just reads local files *)

     

       640
       640
       -
       let list_cmd =

     

       641
       641
       -
         let doc = "List recent posts (from all users by default, or specify a user)" in

     

       642
       642
       -
         let username_opt_arg =

     

       643
       643
       -
           let doc = "Username (optional - defaults to all users)" in

     

       644
       644
       -
           Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)

     

       645
       645
       -
         in

     

       646
       646
       -
         let limit_arg =

     

       647
       647
       -
           let doc = "Limit number of posts to display (default: all)" in

     

       648
       648
       -
           Arg.(value & opt (some int) None & info ["limit"; "n"] ~doc)

     

       649
       649
       -
         in

     

       650
       650
       -
         Eiocmd.run

     

       651
       651
       -
           ~use_keyeio:false

     

       652
       652
       -
           ~info:(Cmd.info "list" ~doc)

     

       653
       653
       -
           ~app_name:"river"

     

       654
       654
       -
           ~service:"river"

     

       655
       655
       -
           Term.(const (fun username_opt limit _env xdg _profile ->

     

       656
       656
       -
             let state = { xdg } in

     

       657
       657
       -
             Post.list state ~username_opt ~limit

     

       658
       658
       -
           ) $ username_opt_arg $ limit_arg)

     

       659
       659
       -
       

     

       660
       660
       -
       (* Info command - show detailed post information *)

     

       661
       661
       -
       let info_cmd =

     

       662
       662
       -
         let doc = "Display detailed information about a post by ID" in

     

       663
       663
       -
         let id_arg =

     

       664
       664
       -
           let doc = "Exact post ID to display" in

     

       665
       665
       -
           Arg.(required & pos 0 (some string) None & info [] ~docv:"ID" ~doc)

     

       666
       666
       -
         in

     

       667
       667
       -
         let verbose_flag =

     

       668
       668
       -
           let doc = "Show full content and all metadata" in

     

       669
       669
       -
           Arg.(value & flag & info ["full"; "f"] ~doc)

     

       670
       670
       -
         in

     

       671
       671
       -
         Eiocmd.run

     

       672
       672
       -
           ~use_keyeio:false

     

       673
       673
       -
           ~info:(Cmd.info "info" ~doc)

     

       674
       674
       -
           ~app_name:"river"

     

       675
       675
       -
           ~service:"river"

     

       676
       676
       -
           Term.(const (fun id verbose _env xdg _profile ->

     

       677
       677
       -
             let state = { xdg } in

     

       678
       678
       -
             let users = State.list_users state in

     

       679
       679
       -
       

     

       680
       680
       -
             (* Load all entries from all users *)

     

       681
       681
       -
             let all_entries =

     

       682
       682
       -
               List.concat_map (fun username ->

     

       683
       683
       -
                 let entries = State.load_existing_posts state username in

     

       684
       684
       -
                 List.map (fun entry -> (username, entry)) entries

     

       685
       685
       -
               ) users

     

       686
       686
       -
             in

     

       687
       687
       -
       

     

       688
       688
       -
             (* Find entry with matching ID *)

     

       689
       689
       -
             let entry_opt = List.find_opt (fun (_username, entry : string * Syndic.Atom.entry) ->

     

       690
       690
       -
               Uri.to_string entry.id = id

     

       691
       691
       -
             ) all_entries in

     

       692
       692
       -
       

     

       693
       693
       -
             match entry_opt with

     

       694
       694
       -
             | None ->

     

       695
       695
       -
                 Fmt.pr "%a@." Fmt.(styled `Red string) (Printf.sprintf "No post found with ID: %s" id);

     

       696
       696
       -
                 Fmt.pr "%a@." Fmt.(styled `Faint string) "Hint: Use 'river-cli list' to see available posts and their IDs";

     

       697
       697
       -
                 1

     

       698
       698
       -
             | Some (username, entry) ->

     

       699
       699
       -
                 (* Get user info for author name *)

     

       700
       700
       -
                 let user_opt = State.load_user state username in

     

       701
       701
       -
                 let author_name = match user_opt with

     

       702
       702
       -
                   | Some user -> user.fullname

     

       703
       703
       -
                   | None ->

     

       704
       704
       -
                       let (author, _) = entry.authors in

     

       705
       705
       -
                       String.trim author.name

     

       706
       706
       -
                 in

     

       707
       707
       -
       

     

       708
       708
       -
                 (* Print header *)

     

       709
       709
       -
                 Fmt.pr "@.";

     

       710
       710
       -
                 Fmt.pr "%a@." Fmt.(styled `Bold string)

     

       711
       711
       -
                   (String.make 70 '=');

     

       712
       712
       -
                 Fmt.pr "  %a@." Fmt.(styled `Bold (styled (`Fg `Blue) string))

     

       713
       713
       -
                   (Post.format_text_construct entry.title);

     

       714
       714
       -
                 Fmt.pr "%a@.@." Fmt.(styled `Bold string)

     

       715
       715
       -
                   (String.make 70 '=');

     

       716
       716
       -
       

     

       717
       717
       -
                 (* Basic metadata *)

     

       718
       718
       -
                 Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "ID:       " (Uri.to_string entry.id);

     

       719
       719
       -
       

     

       720
       720
       -
                 (* Links *)

     

       721
       721
       -
                 let links = entry.links in

     

       722
       722
       -
                 (match links with

     

       723
       723
       -
                  | [] -> ()

     

       724
       724
       -
                  | link :: _ ->

     

       725
       725
       -
                      Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "URL:      " (Uri.to_string link.href));

     

       726
       726
       -
       

     

       727
       727
       -
                 Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "Author:   " author_name;

     

       728
       728
       -
       

     

       729
       729
       -
                 Fmt.pr "%a %s@." Fmt.(styled `Cyan string) "Updated:  " (Ptime.to_rfc3339 entry.updated);

     

       730
       730
       -
       

     

       731
       731
       -
                 (* Summary *)

     

       732
       732
       -
                 (match entry.summary with

     

       733
       733
       -
                  | Some summary ->

     

       734
       734
       -
                      Fmt.pr "@.%a@." Fmt.(styled (`Fg `Yellow) string) "Summary:";

     

       735
       735
       -
                      let summary_text = Post.format_text_construct summary in

     

       736
       736
       -
                      Fmt.pr "  %s@." summary_text

     

       737
       737
       -
                  | None -> ());

     

       738
       738
       -
       

     

       739
       739
       -
                 (* Content *)

     

       740
       740
       -
                 (match entry.content with

     

       741
       741
       -
                  | Some content ->

     

       742
       742
       -
                      let content_html = match content with

     

       743
       743
       -
                        | Syndic.Atom.Text s -> s

     

       744
       744
       -
                        | Syndic.Atom.Html (_, s) -> s

     

       745
       745
       -
                        | Syndic.Atom.Xhtml (_, nodes) ->

     

       746
       746
       -
                            String.concat "" (List.map Syndic.XML.to_string nodes)

     

       747
       747
       -
                        | Syndic.Atom.Mime _ -> "(MIME content)"

     

       748
       748
       -
                        | Syndic.Atom.Src _ -> "(External content)"

     

       749
       749
       -
                      in

     

       750
       750
       -
       

     

       751
       751
       -
                      (* Extract outgoing links *)

     

       752
       752
       -
                      let links = Markdown_converter.extract_links content_html in

     

       753
       753
       -
       

     

       754
       754
       -
                      (* Convert to markdown *)

     

       755
       755
       -
                      let content_markdown = Markdown_converter.to_markdown content_html in

     

       756
       756
       -
       

     

       757
       757
       -
                      Fmt.pr "@.%a@." Fmt.(styled (`Fg `Green) string) "Content:";

     

       758
       758
       -
                      if verbose then

     

       759
       759
       -
                        Fmt.pr "%s@." content_markdown

     

       760
       760
       -
                      else begin

     

       761
       761
       -
                        let preview =

     

       762
       762
       -
                          if String.length content_markdown > 500 then

     

       763
       763
       -
                            String.sub content_markdown 0 500 ^ "..."

     

       764
       764
       -
                          else

     

       765
       765
       -
                            content_markdown

     

       766
       766
       -
                        in

     

       767
       767
       -
                        Fmt.pr "%s@." preview;

     

       768
       768
       -
                        if String.length content_markdown > 500 then

     

       769
       769
       -
                          Fmt.pr "@.%a@." Fmt.(styled `Faint string) "(Use --full to see full content)"

     

       770
       770
       -
                      end;

     

       771
       771
       -
       

     

       772
       772
       -
                      (* Display outgoing links *)

     

       773
       773
       -
                      if links <> [] then begin

     

       774
       774
       -
                        Fmt.pr "@.%a (%d)@." Fmt.(styled (`Fg `Cyan) string) "Outgoing Links:" (List.length links);

     

       775
       775
       -
                        List.iteri (fun i (href, text) ->

     

       776
       776
       -
                          let link_text = if text = "" then "(no text)" else text in

     

       777
       777
       -
                          Fmt.pr "  %a %s@."

     

       778
       778
       -
                            Fmt.(styled `Faint string) (Printf.sprintf "[%d]" (i + 1))

     

       779
       779
       -
                            (Uri.to_string (Uri.of_string href));

     

       780
       780
       -
                          if text <> "" && String.length text < 80 then

     

       781
       781
       -
                            Fmt.pr "      %a %s@." Fmt.(styled `Faint string) "→" link_text

     

       782
       782
       -
                        ) links

     

       783
       783
       -
                      end

     

       784
       784
       -
                  | None -> ());

     

       785
       785
       -
       

     

       786
       786
       -
                 Fmt.pr "@.";

     

       787
       787
       -
                 0

     

       788
       788
       -
           ) $ id_arg $ verbose_flag)

     

       789
       789
       -
       

     

       790
       790
       -
       let main_cmd =

     

       791
       791
       -
         let doc = "River feed management CLI" in

     

       792
       792
       -
         let info = Cmd.info "river-cli" ~version:"1.0" ~doc in

     

       793
       793
       -
         Cmd.group info [user_cmd; sync_cmd; list_cmd; info_cmd]

     

       794
       794
       -
       

     

       795
       795
       -
       let () = exit (Cmd.eval' main_cmd)

     

       1
       1
       +
       (* Ultra-thin binary that delegates all command handling to River_cmd *)

     

       2
       2
       +
       let () = exit (Cmdliner.Cmd.eval' River_cmd.main_cmd)

stack/river/cmd/dune

···

       1
       1
       +
       (library

     

       2
       2
       +
        (name river_cmd)

     

       3
       3
       +
        (public_name river.cmd)

     

       4
       4
       +
        (libraries river cmdliner eiocmd fmt logs))

+761

stack/river/cmd/river_cmd.ml

···

       1
       1
       +
       (** River.Cmd - Cmdliner terms for River CLI

     

       2
       2
       +
       

     

       3
       3
       +
           This module provides cmdliner terms that are thin wrappers around

     

       4
       4
       +
           the River library functions. All business logic resides in the

     

       5
       5
       +
           main River module. *)

     

       6
       6
       +
       

     

       7
       7
       +
       open Cmdliner

     

       8
       8
       +
       

     

       9
       9
       +
       (* Logging setup *)

     

       10
       10
       +
       let src = Logs.Src.create "river-cli" ~doc:"River CLI application"

     

       11
       11
       +
       module Log = (val Logs.src_log src : Logs.LOG)

     

       12
       12
       +
       

     

       13
       13
       +
       (* User management commands *)

     

       14
       14
       +
       module User = struct

     

       15
       15
       +
         let add state ~username ~fullname ~email =

     

       16
       16
       +
           let user = River.User.make ~username ~fullname ?email () in

     

       17
       17
       +
           match River.State.create_user state user with

     

       18
       18
       +
           | Ok () ->

     

       19
       19
       +
             Log.info (fun m -> m "User %s created" username);

     

       20
       20
       +
             0

     

       21
       21
       +
           | Error err ->

     

       22
       22
       +
             Log.err (fun m -> m "%s" err);

     

       23
       23
       +
             1

     

       24
       24
       +
       

     

       25
       25
       +
         let remove state ~username =

     

       26
       26
       +
           match River.State.delete_user state ~username with

     

       27
       27
       +
           | Ok () ->

     

       28
       28
       +
             Log.info (fun m -> m "User %s removed" username);

     

       29
       29
       +
             0

     

       30
       30
       +
           | Error err ->

     

       31
       31
       +
             Log.err (fun m -> m "%s" err);

     

       32
       32
       +
             1

     

       33
       33
       +
       

     

       34
       34
       +
         let list state =

     

       35
       35
       +
           let users = River.State.list_users state in

     

       36
       36
       +
           if users = [] then

     

       37
       37
       +
             Printf.printf "No users found\n"

     

       38
       38
       +
           else begin

     

       39
       39
       +
             Printf.printf "Users:\n";

     

       40
       40
       +
             List.iter (fun username ->

     

       41
       41
       +
               match River.State.get_user state ~username with

     

       42
       42
       +
               | Some user ->

     

       43
       43
       +
                 let email_str = match River.User.email user with

     

       44
       44
       +
                   | Some e -> " <" ^ e ^ ">"

     

       45
       45
       +
                   | None -> ""

     

       46
       46
       +
                 in

     

       47
       47
       +
                 Printf.printf "  %s (%s%s) - %d feeds\n"

     

       48
       48
       +
                   username (River.User.fullname user) email_str

     

       49
       49
       +
                   (List.length (River.User.feeds user))

     

       50
       50
       +
               | None -> ()

     

       51
       51
       +
             ) users

     

       52
       52
       +
           end;

     

       53
       53
       +
           0

     

       54
       54
       +
       

     

       55
       55
       +
         let add_feed state ~username ~name ~url =

     

       56
       56
       +
           match River.State.get_user state ~username with

     

       57
       57
       +
           | None ->

     

       58
       58
       +
             Log.err (fun m -> m "User %s not found" username);

     

       59
       59
       +
             1

     

       60
       60
       +
           | Some user ->

     

       61
       61
       +
             let source = River.Source.make ~name ~url in

     

       62
       62
       +
             let user = River.User.add_feed user source in

     

       63
       63
       +
             (match River.State.update_user state user with

     

       64
       64
       +
             | Ok () ->

     

       65
       65
       +
               Log.info (fun m -> m "Feed %s added to user %s" name username);

     

       66
       66
       +
               0

     

       67
       67
       +
             | Error err ->

     

       68
       68
       +
               Log.err (fun m -> m "%s" err);

     

       69
       69
       +
               1)

     

       70
       70
       +
       

     

       71
       71
       +
         let remove_feed state ~username ~url =

     

       72
       72
       +
           match River.State.get_user state ~username with

     

       73
       73
       +
           | None ->

     

       74
       74
       +
             Log.err (fun m -> m "User %s not found" username);

     

       75
       75
       +
             1

     

       76
       76
       +
           | Some user ->

     

       77
       77
       +
             let user = River.User.remove_feed user ~url in

     

       78
       78
       +
             (match River.State.update_user state user with

     

       79
       79
       +
             | Ok () ->

     

       80
       80
       +
               Log.info (fun m -> m "Feed removed from user %s" username);

     

       81
       81
       +
               0

     

       82
       82
       +
             | Error err ->

     

       83
       83
       +
               Log.err (fun m -> m "%s" err);

     

       84
       84
       +
               1)

     

       85
       85
       +
       

     

       86
       86
       +
         let show state ~username =

     

       87
       87
       +
           match River.State.get_user state ~username with

     

       88
       88
       +
           | None ->

     

       89
       89
       +
             Log.err (fun m -> m "User %s not found" username);

     

       90
       90
       +
             1

     

       91
       91
       +
           | Some user ->

     

       92
       92
       +
             Printf.printf "Username: %s\n" (River.User.username user);

     

       93
       93
       +
             Printf.printf "Full name: %s\n" (River.User.fullname user);

     

       94
       94
       +
             Printf.printf "Email: %s\n"

     

       95
       95
       +
               (Option.value (River.User.email user) ~default:"(none)");

     

       96
       96
       +
             Printf.printf "Last synced: %s\n"

     

       97
       97
       +
               (Option.value (River.User.last_synced user) ~default:"never");

     

       98
       98
       +
             let feeds = River.User.feeds user in

     

       99
       99
       +
             Printf.printf "Feeds (%d):\n" (List.length feeds);

     

       100
       100
       +
             List.iter (fun feed ->

     

       101
       101
       +
               Printf.printf "  - %s: %s\n"

     

       102
       102
       +
                 (River.Source.name feed) (River.Source.url feed)

     

       103
       103
       +
             ) feeds;

     

       104
       104
       +
             0

     

       105
       105
       +
       end

     

       106
       106
       +
       

     

       107
       107
       +
       (* Sync command *)

     

       108
       108
       +
       module Sync = struct

     

       109
       109
       +
         let sync_user env state ~username =

     

       110
       110
       +
           match River.State.sync_user env state ~username with

     

       111
       111
       +
           | Ok () ->

     

       112
       112
       +
             Log.info (fun m -> m "Sync completed for user %s" username);

     

       113
       113
       +
             0

     

       114
       114
       +
           | Error err ->

     

       115
       115
       +
             Log.err (fun m -> m "Sync failed: %s" err);

     

       116
       116
       +
             1

     

       117
       117
       +
       

     

       118
       118
       +
         let sync_all env state =

     

       119
       119
       +
           match River.State.sync_all env state with

     

       120
       120
       +
           | Ok (success, fail) ->

     

       121
       121
       +
             Log.info (fun m -> m "Synced %d users (%d failed)" success fail);

     

       122
       122
       +
             if fail = 0 then 0 else 1

     

       123
       123
       +
           | Error err ->

     

       124
       124
       +
             Log.err (fun m -> m "Sync failed: %s" err);

     

       125
       125
       +
             1

     

       126
       126
       +
       end

     

       127
       127
       +
       

     

       128
       128
       +
       (* Post listing commands *)

     

       129
       129
       +
       module Post = struct

     

       130
       130
       +
         let format_date ptime =

     

       131
       131
       +
           let open Ptime in

     

       132
       132
       +
           let (y, m, d), _ = to_date_time ptime in

     

       133
       133
       +
           Printf.sprintf "%02d/%02d/%04d" d m y

     

       134
       134
       +
       

     

       135
       135
       +
         let format_text_construct : Syndic.Atom.text_construct -> string = function

     

       136
       136
       +
           | Syndic.Atom.Text s -> s

     

       137
       137
       +
           | Syndic.Atom.Html (_, s) -> s

     

       138
       138
       +
           | Syndic.Atom.Xhtml (_, _) -> "<xhtml content>"

     

       139
       139
       +
       

     

       140
       140
       +
         let get_content_text (entry : Syndic.Atom.entry) =

     

       141
       141
       +
           match entry.content with

     

       142
       142
       +
           | Some (Syndic.Atom.Text s) -> Some s

     

       143
       143
       +
           | Some (Syndic.Atom.Html (_, s)) -> Some s

     

       144
       144
       +
           | Some (Syndic.Atom.Xhtml (_, _)) -> Some "<xhtml content>"

     

       145
       145
       +
           | Some (Syndic.Atom.Mime _) -> Some "<mime content>"

     

       146
       146
       +
           | Some (Syndic.Atom.Src _) -> Some "<external content>"

     

       147
       147
       +
           | None -> None

     

       148
       148
       +
       

     

       149
       149
       +
         let truncate_string s max_len =

     

       150
       150
       +
           if String.length s <= max_len then s

     

       151
       151
       +
           else String.sub s 0 max_len ^ "..."

     

       152
       152
       +
       

     

       153
       153
       +
         let list state ~username_opt ~limit ~metadata =

     

       154
       154
       +
           match username_opt with

     

       155
       155
       +
           | Some username ->

     

       156
       156
       +
             (* List posts for a specific user *)

     

       157
       157
       +
             (match River.State.get_user state ~username with

     

       158
       158
       +
             | None ->

     

       159
       159
       +
               Log.err (fun m -> m "User %s not found" username);

     

       160
       160
       +
               1

     

       161
       161
       +
             | Some user ->

     

       162
       162
       +
               let entries = River.State.get_user_posts state ~username ?limit () in

     

       163
       163
       +
               if entries = [] then begin

     

       164
       164
       +
                 Fmt.pr "%a@." Fmt.(styled `Yellow string)

     

       165
       165
       +
                   ("No posts found for user " ^ username);

     

       166
       166
       +
                 Fmt.pr "%a@." Fmt.(styled `Faint string)

     

       167
       167
       +
                   ("(Run 'river-cli sync " ^ username ^ "' to fetch posts)");

     

       168
       168
       +
                 0

     

       169
       169
       +
               end else begin

     

       170
       170
       +
                 Fmt.pr "@.%a@.@."

     

       171
       171
       +
                   Fmt.(styled `Bold (styled (`Fg `Cyan) string))

     

       172
       172
       +
                   (Printf.sprintf "Posts for %s (%d total)"

     

       173
       173
       +
                     (River.User.fullname user) (List.length entries));

     

       174
       174
       +
       

     

       175
       175
       +
                 List.iter (fun (entry : Syndic.Atom.entry) ->

     

       176
       176
       +
                   let entry_id = Uri.to_string entry.id in

     

       177
       177
       +
       

     

       178
       178
       +
                   (* Title and ID on separate lines for clarity *)

     

       179
       179
       +
                   Fmt.pr "%a@."

     

       180
       180
       +
                     Fmt.(styled `Bold (styled (`Fg `Blue) string))

     

       181
       181
       +
                     (format_text_construct entry.title);

     

       182
       182
       +
                   Fmt.pr "  %a %a@."

     

       183
       183
       +
                     Fmt.(styled `Faint string) "ID:"

     

       184
       184
       +
                     Fmt.(styled (`Fg `Magenta) string) entry_id;

     

       185
       185
       +
       

     

       186
       186
       +
                   if metadata then begin

     

       187
       187
       +
                     (* Show all metadata *)

     

       188
       188
       +
                     Fmt.pr "  %a %a@."

     

       189
       189
       +
                       Fmt.(styled `Faint string) "Author:"

     

       190
       190
       +
                       Fmt.(styled `Green string) (River.User.fullname user);

     

       191
       191
       +
                     Fmt.pr "  %a %a@."

     

       192
       192
       +
                       Fmt.(styled `Faint string) "Updated:"

     

       193
       193
       +
                       Fmt.(styled `Yellow string) (format_date entry.updated);

     

       194
       194
       +
       

     

       195
       195
       +
                     (* Summary if present *)

     

       196
       196
       +
                     (match entry.summary with

     

       197
       197
       +
                     | Some summary ->

     

       198
       198
       +
                       let summary_text = format_text_construct summary in

     

       199
       199
       +
                       Fmt.pr "  %a %a@."

     

       200
       200
       +
                         Fmt.(styled `Faint string) "Summary:"

     

       201
       201
       +
                         Fmt.string (truncate_string summary_text 150)

     

       202
       202
       +
                     | None -> ());

     

       203
       203
       +
       

     

       204
       204
       +
                     (* Content (truncated) *)

     

       205
       205
       +
                     (match get_content_text entry with

     

       206
       206
       +
                     | Some content ->

     

       207
       207
       +
                       let clean = Str.global_replace (Str.regexp "<[^>]*>") "" content in

     

       208
       208
       +
                       let clean = Str.global_replace (Str.regexp "[ \t\n\r]+") " " clean in

     

       209
       209
       +
                       Fmt.pr "  %a %a@."

     

       210
       210
       +
                         Fmt.(styled `Faint string) "Content:"

     

       211
       211
       +
                         Fmt.string (truncate_string (String.trim clean) 200)

     

       212
       212
       +
                     | None -> ());

     

       213
       213
       +
       

     

       214
       214
       +
                     (* Links *)

     

       215
       215
       +
                     (match entry.links with

     

       216
       216
       +
                     | [] -> ()

     

       217
       217
       +
                     | links ->

     

       218
       218
       +
                       Fmt.pr "  %a@." Fmt.(styled `Faint string) "Links:";

     

       219
       219
       +
                       List.iter (fun link ->

     

       220
       220
       +
                         Fmt.pr "    %a@." Fmt.(styled (`Fg `Cyan) string)

     

       221
       221
       +
                           (Uri.to_string link.Syndic.Atom.href)

     

       222
       222
       +
                       ) links);

     

       223
       223
       +
       

     

       224
       224
       +
                     (* Tags/Categories *)

     

       225
       225
       +
                     (match entry.categories with

     

       226
       226
       +
                     | [] -> ()

     

       227
       227
       +
                     | categories ->

     

       228
       228
       +
                       Fmt.pr "  %a %a@." Fmt.(styled `Faint string) "Tags:"

     

       229
       229
       +
                         Fmt.(list ~sep:comma (styled (`Fg `Yellow) string))

     

       230
       230
       +
                         (List.map (fun c -> c.Syndic.Atom.term) categories));

     

       231
       231
       +
                   end else begin

     

       232
       232
       +
                     (* Compact view: just author and date *)

     

       233
       233
       +
                     Fmt.pr "  %a %a %a %a@."

     

       234
       234
       +
                       Fmt.(styled `Faint string) "By"

     

       235
       235
       +
                       Fmt.(styled `Green string) (River.User.fullname user)

     

       236
       236
       +
                       Fmt.(styled `Faint string) "on"

     

       237
       237
       +
                       Fmt.(styled `Yellow string) (format_date entry.updated);

     

       238
       238
       +
                   end;

     

       239
       239
       +
                   Fmt.pr "@."

     

       240
       240
       +
                 ) entries;

     

       241
       241
       +
                 0

     

       242
       242
       +
               end)

     

       243
       243
       +
           | None ->

     

       244
       244
       +
             (* List posts from all users *)

     

       245
       245
       +
             let all_posts = River.State.get_all_posts state ?limit () in

     

       246
       246
       +
             if all_posts = [] then begin

     

       247
       247
       +
               Fmt.pr "%a@." Fmt.(styled `Yellow string)

     

       248
       248
       +
                 "No posts found for any users";

     

       249
       249
       +
               Fmt.pr "%a@." Fmt.(styled `Faint string)

     

       250
       250
       +
                 "(Run 'river-cli sync' to fetch posts)";

     

       251
       251
       +
               0

     

       252
       252
       +
             end else begin

     

       253
       253
       +
               Fmt.pr "@.%a@.@."

     

       254
       254
       +
                 Fmt.(styled `Bold (styled (`Fg `Cyan) string))

     

       255
       255
       +
                 (Printf.sprintf "Posts from all users (%d total)"

     

       256
       256
       +
                   (List.length all_posts));

     

       257
       257
       +
       

     

       258
       258
       +
               List.iter (fun (username, entry : string * Syndic.Atom.entry) ->

     

       259
       259
       +
                 let author_name =

     

       260
       260
       +
                   match River.State.get_user state ~username with

     

       261
       261
       +
                   | Some user -> River.User.fullname user

     

       262
       262
       +
                   | None ->

     

       263
       263
       +
                       let (author, _) = entry.authors in

     

       264
       264
       +
                       String.trim author.name

     

       265
       265
       +
                 in

     

       266
       266
       +
                 let entry_id = Uri.to_string entry.id in

     

       267
       267
       +
       

     

       268
       268
       +
                 (* Title and ID on separate lines for clarity *)

     

       269
       269
       +
                 Fmt.pr "%a@."

     

       270
       270
       +
                   Fmt.(styled `Bold (styled (`Fg `Blue) string))

     

       271
       271
       +
                   (format_text_construct entry.title);

     

       272
       272
       +
                 Fmt.pr "  %a %a@."

     

       273
       273
       +
                   Fmt.(styled `Faint string) "ID:"

     

       274
       274
       +
                   Fmt.(styled (`Fg `Magenta) string) entry_id;

     

       275
       275
       +
       

     

       276
       276
       +
                 if metadata then begin

     

       277
       277
       +
                   (* Show all metadata *)

     

       278
       278
       +
                   Fmt.pr "  %a %a@."

     

       279
       279
       +
                     Fmt.(styled `Faint string) "Author:"

     

       280
       280
       +
                     Fmt.(styled `Green string) author_name;

     

       281
       281
       +
                   Fmt.pr "  %a %a@."

     

       282
       282
       +
                     Fmt.(styled `Faint string) "Updated:"

     

       283
       283
       +
                     Fmt.(styled `Yellow string) (format_date entry.updated);

     

       284
       284
       +
       

     

       285
       285
       +
                   (* Summary if present *)

     

       286
       286
       +
                   (match entry.summary with

     

       287
       287
       +
                   | Some summary ->

     

       288
       288
       +
                     let summary_text = format_text_construct summary in

     

       289
       289
       +
                     Fmt.pr "  %a %a@."

     

       290
       290
       +
                       Fmt.(styled `Faint string) "Summary:"

     

       291
       291
       +
                       Fmt.string (truncate_string summary_text 150)

     

       292
       292
       +
                   | None -> ());

     

       293
       293
       +
       

     

       294
       294
       +
                   (* Content (truncated) *)

     

       295
       295
       +
                   (match get_content_text entry with

     

       296
       296
       +
                   | Some content ->

     

       297
       297
       +
                     let clean = Str.global_replace (Str.regexp "<[^>]*>") "" content in

     

       298
       298
       +
                     let clean = Str.global_replace (Str.regexp "[ \t\n\r]+") " " clean in

     

       299
       299
       +
                     Fmt.pr "  %a %a@."

     

       300
       300
       +
                       Fmt.(styled `Faint string) "Content:"

     

       301
       301
       +
                       Fmt.string (truncate_string (String.trim clean) 200)

     

       302
       302
       +
                   | None -> ());

     

       303
       303
       +
       

     

       304
       304
       +
                   (* Links *)

     

       305
       305
       +
                   (match entry.links with

     

       306
       306
       +
                   | [] -> ()

     

       307
       307
       +
                   | links ->

     

       308
       308
       +
                     Fmt.pr "  %a@." Fmt.(styled `Faint string) "Links:";

     

       309
       309
       +
                     List.iter (fun link ->

     

       310
       310
       +
                       Fmt.pr "    %a@." Fmt.(styled (`Fg `Cyan) string)

     

       311
       311
       +
                         (Uri.to_string link.Syndic.Atom.href)

     

       312
       312
       +
                     ) links);

     

       313
       313
       +
       

     

       314
       314
       +
                   (* Tags/Categories *)

     

       315
       315
       +
                   (match entry.categories with

     

       316
       316
       +
                   | [] -> ()

     

       317
       317
       +
                   | categories ->

     

       318
       318
       +
                     Fmt.pr "  %a %a@." Fmt.(styled `Faint string) "Tags:"

     

       319
       319
       +
                       Fmt.(list ~sep:comma (styled (`Fg `Yellow) string))

     

       320
       320
       +
                       (List.map (fun c -> c.Syndic.Atom.term) categories));

     

       321
       321
       +
                 end else begin

     

       322
       322
       +
                   (* Compact view: just author and date *)

     

       323
       323
       +
                   Fmt.pr "  %a %a %a %a@."

     

       324
       324
       +
                     Fmt.(styled `Faint string) "By"

     

       325
       325
       +
                     Fmt.(styled `Green string) author_name

     

       326
       326
       +
                     Fmt.(styled `Faint string) "on"

     

       327
       327
       +
                     Fmt.(styled `Yellow string) (format_date entry.updated);

     

       328
       328
       +
                 end;

     

       329
       329
       +
                 Fmt.pr "@."

     

       330
       330
       +
               ) all_posts;

     

       331
       331
       +
               0

     

       332
       332
       +
             end

     

       333
       333
       +
       

     

       334
       334
       +
         let info state ~post_id ~verbose =

     

       335
       335
       +
           (* Find the post by ID across all users *)

     

       336
       336
       +
           let all_posts = River.State.get_all_posts state () in

     

       337
       337
       +
           match List.find_opt (fun (_, entry : string * Syndic.Atom.entry) ->

     

       338
       338
       +
             Uri.to_string entry.id = post_id

     

       339
       339
       +
           ) all_posts with

     

       340
       340
       +
           | None ->

     

       341
       341
       +
             Log.err (fun m -> m "Post with ID %s not found" post_id);

     

       342
       342
       +
             1

     

       343
       343
       +
           | Some (username, entry) ->

     

       344
       344
       +
             (* Display post information *)

     

       345
       345
       +
             Fmt.pr "@.";

     

       346
       346
       +
             Fmt.pr "%a@." Fmt.(styled `Bold string) (String.make 70 '=');

     

       347
       347
       +
             Fmt.pr "  %a@." Fmt.(styled `Bold (styled (`Fg `Blue) string))

     

       348
       348
       +
               (format_text_construct entry.title);

     

       349
       349
       +
             Fmt.pr "%a@.@." Fmt.(styled `Bold string) (String.make 70 '=');

     

       350
       350
       +
       

     

       351
       351
       +
             (* Author and date *)

     

       352
       352
       +
             let author_name =

     

       353
       353
       +
               match River.State.get_user state ~username with

     

       354
       354
       +
               | Some user -> River.User.fullname user

     

       355
       355
       +
               | None ->

     

       356
       356
       +
                 let (author, _) = entry.authors in

     

       357
       357
       +
                 String.trim author.name

     

       358
       358
       +
             in

     

       359
       359
       +
             Fmt.pr "%a %a@." Fmt.(styled `Cyan string) "Author:"

     

       360
       360
       +
               Fmt.(styled `Green string) author_name;

     

       361
       361
       +
             Fmt.pr "%a %a@." Fmt.(styled `Cyan string) "Published:"

     

       362
       362
       +
               Fmt.(styled `Magenta string) (format_date entry.updated);

     

       363
       363
       +
             Fmt.pr "%a %a@.@." Fmt.(styled `Cyan string) "ID:"

     

       364
       364
       +
               Fmt.(styled `Faint string) post_id;

     

       365
       365
       +
       

     

       366
       366
       +
             (* Summary if present *)

     

       367
       367
       +
             (match entry.summary with

     

       368
       368
       +
             | Some summary ->

     

       369
       369
       +
               Fmt.pr "%a@." Fmt.(styled (`Fg `Yellow) string) "Summary:";

     

       370
       370
       +
               Fmt.pr "%s@.@." (format_text_construct summary)

     

       371
       371
       +
             | None -> ());

     

       372
       372
       +
       

     

       373
       373
       +
             (* Content *)

     

       374
       374
       +
             (match entry.content with

     

       375
       375
       +
             | Some content ->

     

       376
       376
       +
               let content_str = match content with

     

       377
       377
       +
                 | Syndic.Atom.Text s -> s

     

       378
       378
       +
                 | Syndic.Atom.Html (_, s) -> s

     

       379
       379
       +
                 | Syndic.Atom.Xhtml (_, _) -> "<xhtml content>"

     

       380
       380
       +
                 | Syndic.Atom.Mime _ -> "<mime content>"

     

       381
       381
       +
                 | Syndic.Atom.Src _ -> "<external content>"

     

       382
       382
       +
               in

     

       383
       383
       +
               Fmt.pr "%a@." Fmt.(styled (`Fg `Yellow) string) "Content:";

     

       384
       384
       +
               if verbose then begin

     

       385
       385
       +
                 (* In verbose mode, attempt to convert HTML to markdown *)

     

       386
       386
       +
                 let markdown = try

     

       387
       387
       +
                   (* Simple HTML to markdown conversion - just strip tags for now *)

     

       388
       388
       +
                   let re = Str.regexp "<[^>]*>" in

     

       389
       389
       +
                   Str.global_replace re "" content_str

     

       390
       390
       +
                 with _ -> content_str

     

       391
       391
       +
                 in

     

       392
       392
       +
                 Fmt.pr "%s@.@." markdown

     

       393
       393
       +
               end else begin

     

       394
       394
       +
                 (* Non-verbose mode: show truncated content *)

     

       395
       395
       +
                 let max_len = 500 in

     

       396
       396
       +
                 if String.length content_str > max_len then

     

       397
       397
       +
                   Fmt.pr "%s...@.@." (String.sub content_str 0 max_len)

     

       398
       398
       +
                 else

     

       399
       399
       +
                   Fmt.pr "%s@.@." content_str

     

       400
       400
       +
               end

     

       401
       401
       +
             | None -> ());

     

       402
       402
       +
       

     

       403
       403
       +
             (* Links *)

     

       404
       404
       +
             (match entry.links with

     

       405
       405
       +
             | [] -> ()

     

       406
       406
       +
             | links ->

     

       407
       407
       +
               Fmt.pr "%a@." Fmt.(styled `Cyan string) "Links:";

     

       408
       408
       +
               List.iter (fun link ->

     

       409
       409
       +
                 Fmt.pr "  - %s@." (Uri.to_string link.Syndic.Atom.href)

     

       410
       410
       +
               ) links;

     

       411
       411
       +
               Fmt.pr "@.");

     

       412
       412
       +
       

     

       413
       413
       +
             (* Categories/Tags if verbose *)

     

       414
       414
       +
             if verbose then begin

     

       415
       415
       +
               match entry.categories with

     

       416
       416
       +
               | [] -> ()

     

       417
       417
       +
               | categories ->

     

       418
       418
       +
                 Fmt.pr "%a@." Fmt.(styled `Cyan string) "Tags:";

     

       419
       419
       +
                 List.iter (fun cat ->

     

       420
       420
       +
                   Fmt.pr "  - %s@." cat.Syndic.Atom.term

     

       421
       421
       +
                 ) categories;

     

       422
       422
       +
                 Fmt.pr "@."

     

       423
       423
       +
             end;

     

       424
       424
       +
       

     

       425
       425
       +
             0

     

       426
       426
       +
       end

     

       427
       427
       +
       

     

       428
       428
       +
       (* Cmdliner argument definitions *)

     

       429
       429
       +
       let username_arg =

     

       430
       430
       +
         let doc = "Username" in

     

       431
       431
       +
         Arg.(required & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)

     

       432
       432
       +
       

     

       433
       433
       +
       let fullname_arg =

     

       434
       434
       +
         let doc = "Full name of the user" in

     

       435
       435
       +
         Arg.(required & opt (some string) None & info ["name"; "n"] ~doc)

     

       436
       436
       +
       

     

       437
       437
       +
       let email_arg =

     

       438
       438
       +
         let doc = "Email address of the user (optional)" in

     

       439
       439
       +
         Arg.(value & opt (some string) None & info ["email"; "e"] ~doc)

     

       440
       440
       +
       

     

       441
       441
       +
       let feed_name_arg =

     

       442
       442
       +
         let doc = "Feed name/label" in

     

       443
       443
       +
         Arg.(required & opt (some string) None & info ["name"; "n"] ~doc)

     

       444
       444
       +
       

     

       445
       445
       +
       let feed_url_arg =

     

       446
       446
       +
         let doc = "Feed URL" in

     

       447
       447
       +
         Arg.(required & opt (some string) None & info ["url"; "u"] ~doc)

     

       448
       448
       +
       

     

       449
       449
       +
       (* User commands - these don't need network, just filesystem access *)

     

       450
       450
       +
       let user_add =

     

       451
       451
       +
         Term.(const (fun username fullname email env _xdg _profile ->

     

       452
       452
       +
           let state = River.State.create env ~app_name:"river" in

     

       453
       453
       +
           User.add state ~username ~fullname ~email

     

       454
       454
       +
         ) $ username_arg $ fullname_arg $ email_arg)

     

       455
       455
       +
       

     

       456
       456
       +
       let user_remove =

     

       457
       457
       +
         Term.(const (fun username env _xdg _profile ->

     

       458
       458
       +
           let state = River.State.create env ~app_name:"river" in

     

       459
       459
       +
           User.remove state ~username

     

       460
       460
       +
         ) $ username_arg)

     

       461
       461
       +
       

     

       462
       462
       +
       let user_list =

     

       463
       463
       +
         Term.(const (fun env _xdg _profile ->

     

       464
       464
       +
           let state = River.State.create env ~app_name:"river" in

     

       465
       465
       +
           User.list state

     

       466
       466
       +
         ))

     

       467
       467
       +
       

     

       468
       468
       +
       let user_show =

     

       469
       469
       +
         Term.(const (fun username env _xdg _profile ->

     

       470
       470
       +
           let state = River.State.create env ~app_name:"river" in

     

       471
       471
       +
           User.show state ~username

     

       472
       472
       +
         ) $ username_arg)

     

       473
       473
       +
       

     

       474
       474
       +
       let user_add_feed =

     

       475
       475
       +
         Term.(const (fun username name url env _xdg _profile ->

     

       476
       476
       +
           let state = River.State.create env ~app_name:"river" in

     

       477
       477
       +
           User.add_feed state ~username ~name ~url

     

       478
       478
       +
         ) $ username_arg $ feed_name_arg $ feed_url_arg)

     

       479
       479
       +
       

     

       480
       480
       +
       let user_remove_feed =

     

       481
       481
       +
         Term.(const (fun username url env _xdg _profile ->

     

       482
       482
       +
           let state = River.State.create env ~app_name:"river" in

     

       483
       483
       +
           User.remove_feed state ~username ~url

     

       484
       484
       +
         ) $ username_arg $ feed_url_arg)

     

       485
       485
       +
       

     

       486
       486
       +
       let user_cmd =

     

       487
       487
       +
         let doc = "Manage users" in

     

       488
       488
       +
         let info = Cmd.info "user" ~doc in

     

       489
       489
       +
         let user_add_cmd =

     

       490
       490
       +
           Eiocmd.run

     

       491
       491
       +
             ~use_keyeio:false

     

       492
       492
       +
             ~info:(Cmd.info "add" ~doc:"Add a new user")

     

       493
       493
       +
             ~app_name:"river"

     

       494
       494
       +
             ~service:"river"

     

       495
       495
       +
             user_add

     

       496
       496
       +
         in

     

       497
       497
       +
         let user_remove_cmd =

     

       498
       498
       +
           Eiocmd.run

     

       499
       499
       +
             ~use_keyeio:false

     

       500
       500
       +
             ~info:(Cmd.info "remove" ~doc:"Remove a user")

     

       501
       501
       +
             ~app_name:"river"

     

       502
       502
       +
             ~service:"river"

     

       503
       503
       +
             user_remove

     

       504
       504
       +
         in

     

       505
       505
       +
         let user_list_cmd =

     

       506
       506
       +
           Eiocmd.run

     

       507
       507
       +
             ~use_keyeio:false

     

       508
       508
       +
             ~info:(Cmd.info "list" ~doc:"List all users")

     

       509
       509
       +
             ~app_name:"river"

     

       510
       510
       +
             ~service:"river"

     

       511
       511
       +
             user_list

     

       512
       512
       +
         in

     

       513
       513
       +
         let user_show_cmd =

     

       514
       514
       +
           Eiocmd.run

     

       515
       515
       +
             ~use_keyeio:false

     

       516
       516
       +
             ~info:(Cmd.info "show" ~doc:"Show user details")

     

       517
       517
       +
             ~app_name:"river"

     

       518
       518
       +
             ~service:"river"

     

       519
       519
       +
             user_show

     

       520
       520
       +
         in

     

       521
       521
       +
         let user_add_feed_cmd =

     

       522
       522
       +
           Eiocmd.run

     

       523
       523
       +
             ~use_keyeio:false

     

       524
       524
       +
             ~info:(Cmd.info "add-feed" ~doc:"Add a feed to a user")

     

       525
       525
       +
             ~app_name:"river"

     

       526
       526
       +
             ~service:"river"

     

       527
       527
       +
             user_add_feed

     

       528
       528
       +
         in

     

       529
       529
       +
         let user_remove_feed_cmd =

     

       530
       530
       +
           Eiocmd.run

     

       531
       531
       +
             ~use_keyeio:false

     

       532
       532
       +
             ~info:(Cmd.info "remove-feed" ~doc:"Remove a feed from a user")

     

       533
       533
       +
             ~app_name:"river"

     

       534
       534
       +
             ~service:"river"

     

       535
       535
       +
             user_remove_feed

     

       536
       536
       +
         in

     

       537
       537
       +
         Cmd.group info [

     

       538
       538
       +
           user_add_cmd;

     

       539
       539
       +
           user_remove_cmd;

     

       540
       540
       +
           user_list_cmd;

     

       541
       541
       +
           user_show_cmd;

     

       542
       542
       +
           user_add_feed_cmd;

     

       543
       543
       +
           user_remove_feed_cmd;

     

       544
       544
       +
         ]

     

       545
       545
       +
       

     

       546
       546
       +
       (* Sync command - needs Eio environment for HTTP requests *)

     

       547
       547
       +
       let sync =

     

       548
       548
       +
         let username_opt =

     

       549
       549
       +
           let doc = "Sync specific user (omit to sync all)" in

     

       550
       550
       +
           Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)

     

       551
       551
       +
         in

     

       552
       552
       +
         Term.(const (fun username_opt env _xdg _profile ->

     

       553
       553
       +
           let state = River.State.create env ~app_name:"river" in

     

       554
       554
       +
           match username_opt with

     

       555
       555
       +
           | Some username -> Sync.sync_user env state ~username

     

       556
       556
       +
           | None -> Sync.sync_all env state

     

       557
       557
       +
         ) $ username_opt)

     

       558
       558
       +
       

     

       559
       559
       +
       (* List command - doesn't need network, just reads local files *)

     

       560
       560
       +
       let list =

     

       561
       561
       +
         let username_opt_arg =

     

       562
       562
       +
           let doc = "Username (optional - defaults to all users)" in

     

       563
       563
       +
           Arg.(value & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)

     

       564
       564
       +
         in

     

       565
       565
       +
         let limit_arg =

     

       566
       566
       +
           let doc = "Limit number of posts to display (default: all)" in

     

       567
       567
       +
           Arg.(value & opt (some int) None & info ["limit"; "n"] ~doc)

     

       568
       568
       +
         in

     

       569
       569
       +
         let metadata_arg =

     

       570
       570
       +
           let doc = "Show all metadata (author, date, summary, content preview, links, tags)" in

     

       571
       571
       +
           Arg.(value & flag & info ["metadata"; "m"] ~doc)

     

       572
       572
       +
         in

     

       573
       573
       +
         Term.(const (fun username_opt limit metadata env _xdg _profile ->

     

       574
       574
       +
           let state = River.State.create env ~app_name:"river" in

     

       575
       575
       +
           Post.list state ~username_opt ~limit ~metadata

     

       576
       576
       +
         ) $ username_opt_arg $ limit_arg $ metadata_arg)

     

       577
       577
       +
       

     

       578
       578
       +
       (* Info command - show detailed post information *)

     

       579
       579
       +
       let info =

     

       580
       580
       +
         let post_id_arg =

     

       581
       581
       +
           let doc = "Post ID (URI)" in

     

       582
       582
       +
           Arg.(required & pos 0 (some string) None & info [] ~docv:"POST_ID" ~doc)

     

       583
       583
       +
         in

     

       584
       584
       +
         let full_arg =

     

       585
       585
       +
           let doc = "Show full content without truncation" in

     

       586
       586
       +
           Arg.(value & flag & info ["full"] ~doc)

     

       587
       587
       +
         in

     

       588
       588
       +
         Term.(const (fun post_id full env _xdg _profile ->

     

       589
       589
       +
           let state = River.State.create env ~app_name:"river" in

     

       590
       590
       +
           Post.info state ~post_id ~verbose:full

     

       591
       591
       +
         ) $ post_id_arg $ full_arg)

     

       592
       592
       +
       

     

       593
       593
       +
       (* Merge command - export merged feed *)

     

       594
       594
       +
       let merge =

     

       595
       595
       +
         let format_arg =

     

       596
       596
       +
           let doc = "Output format: atom or jsonfeed" in

     

       597
       597
       +
           Arg.(value & opt string "atom" & info ["format"; "f"] ~doc)

     

       598
       598
       +
         in

     

       599
       599
       +
         let title_arg =

     

       600
       600
       +
           let doc = "Feed title" in

     

       601
       601
       +
           Arg.(value & opt string "River Merged Feed" & info ["title"; "t"] ~doc)

     

       602
       602
       +
         in

     

       603
       603
       +
         let limit_arg =

     

       604
       604
       +
           let doc = "Maximum number of entries to include (default: all)" in

     

       605
       605
       +
           Arg.(value & opt (some int) None & info ["limit"; "n"] ~doc)

     

       606
       606
       +
         in

     

       607
       607
       +
         Term.(const (fun format title limit env _xdg _profile ->

     

       608
       608
       +
           let state = River.State.create env ~app_name:"river" in

     

       609
       609
       +
           let format_type = match String.lowercase_ascii format with

     

       610
       610
       +
             | "jsonfeed" | "json" -> `Jsonfeed

     

       611
       611
       +
             | _ -> `Atom

     

       612
       612
       +
           in

     

       613
       613
       +
           match River.State.export_merged_feed state ~title ~format:format_type ?limit () with

     

       614
       614
       +
           | Ok output ->

     

       615
       615
       +
               print_endline output;

     

       616
       616
       +
               0

     

       617
       617
       +
           | Error err ->

     

       618
       618
       +
               Log.err (fun m -> m "Failed to export merged feed: %s" err);

     

       619
       619
       +
               1

     

       620
       620
       +
         ) $ format_arg $ title_arg $ limit_arg)

     

       621
       621
       +
       

     

       622
       622
       +
       (* Quality command - analyze feed quality *)

     

       623
       623
       +
       let quality =

     

       624
       624
       +
         let username_arg =

     

       625
       625
       +
           let doc = "Username to analyze" in

     

       626
       626
       +
           Arg.(required & pos 0 (some string) None & info [] ~docv:"USERNAME" ~doc)

     

       627
       627
       +
         in

     

       628
       628
       +
         Term.(const (fun username env _xdg _profile ->

     

       629
       629
       +
           let state = River.State.create env ~app_name:"river" in

     

       630
       630
       +
           match River.State.analyze_user_quality state ~username with

     

       631
       631
       +
           | Error err ->

     

       632
       632
       +
               Log.err (fun m -> m "%s" err);

     

       633
       633
       +
               1

     

       634
       634
       +
           | Ok metrics ->

     

       635
       635
       +
               (* Display quality metrics *)

     

       636
       636
       +
               Fmt.pr "@.";

     

       637
       637
       +
               Fmt.pr "%a@." Fmt.(styled `Bold string)

     

       638
       638
       +
                 (String.make 70 '=');

     

       639
       639
       +
               Fmt.pr "  %a %s@." Fmt.(styled `Bold (styled (`Fg `Blue) string))

     

       640
       640
       +
                 "User Quality Analysis:" username;

     

       641
       641
       +
               Fmt.pr "%a@.@." Fmt.(styled `Bold string)

     

       642
       642
       +
                 (String.make 70 '=');

     

       643
       643
       +
       

     

       644
       644
       +
               (* Overall quality score *)

     

       645
       645
       +
               let score = River.Quality.quality_score metrics in

     

       646
       646
       +
               let score_color = match score with

     

       647
       647
       +
                 | s when s >= 80.0 -> `Green

     

       648
       648
       +
                 | s when s >= 60.0 -> `Yellow

     

       649
       649
       +
                 | s when s >= 40.0 -> `Magenta

     

       650
       650
       +
                 | _ -> `Red

     

       651
       651
       +
               in

     

       652
       652
       +
               Fmt.pr "%a %.1f/100.0@.@."

     

       653
       653
       +
                 Fmt.(styled (`Fg score_color) (styled `Bold string))

     

       654
       654
       +
                 "Overall Quality Score:"

     

       655
       655
       +
                 score;

     

       656
       656
       +
       

     

       657
       657
       +
               (* Entry statistics *)

     

       658
       658
       +
               Fmt.pr "%a@." Fmt.(styled `Cyan string) "Entry Statistics:";

     

       659
       659
       +
               Fmt.pr "  Total entries: %d@." (River.Quality.total_entries metrics);

     

       660
       660
       +
               Fmt.pr "@.";

     

       661
       661
       +
       

     

       662
       662
       +
               (* Completeness metrics *)

     

       663
       663
       +
               Fmt.pr "%a@." Fmt.(styled `Cyan string) "Completeness:";

     

       664
       664
       +
               let total = River.Quality.total_entries metrics in

     

       665
       665
       +
               let pct entries =

     

       666
       666
       +
                 float_of_int entries /. float_of_int total *. 100.0

     

       667
       667
       +
               in

     

       668
       668
       +
               Fmt.pr "  Entries with content:  %3d/%d (%5.1f%%)@."

     

       669
       669
       +
                 (River.Quality.entries_with_content metrics)

     

       670
       670
       +
                 total

     

       671
       671
       +
                 (pct (River.Quality.entries_with_content metrics));

     

       672
       672
       +
               Fmt.pr "  Entries with dates:    %3d/%d (%5.1f%%)@."

     

       673
       673
       +
                 (River.Quality.entries_with_date metrics)

     

       674
       674
       +
                 total

     

       675
       675
       +
                 (pct (River.Quality.entries_with_date metrics));

     

       676
       676
       +
               Fmt.pr "  Entries with authors:  %3d/%d (%5.1f%%)@."

     

       677
       677
       +
                 (River.Quality.entries_with_author metrics)

     

       678
       678
       +
                 total

     

       679
       679
       +
                 (pct (River.Quality.entries_with_author metrics));

     

       680
       680
       +
               Fmt.pr "  Entries with summaries:%3d/%d (%5.1f%%)@."

     

       681
       681
       +
                 (River.Quality.entries_with_summary metrics)

     

       682
       682
       +
                 total

     

       683
       683
       +
                 (pct (River.Quality.entries_with_summary metrics));

     

       684
       684
       +
               Fmt.pr "  Entries with tags:     %3d/%d (%5.1f%%)@."

     

       685
       685
       +
                 (River.Quality.entries_with_tags metrics)

     

       686
       686
       +
                 total

     

       687
       687
       +
                 (pct (River.Quality.entries_with_tags metrics));

     

       688
       688
       +
               Fmt.pr "@.";

     

       689
       689
       +
       

     

       690
       690
       +
               (* Content statistics *)

     

       691
       691
       +
               if River.Quality.entries_with_content metrics > 0 then begin

     

       692
       692
       +
                 Fmt.pr "%a@." Fmt.(styled `Cyan string) "Content Statistics:";

     

       693
       693
       +
                 Fmt.pr "  Average length: %.0f characters@."

     

       694
       694
       +
                   (River.Quality.avg_content_length metrics);

     

       695
       695
       +
                 Fmt.pr "  Min length:     %d characters@."

     

       696
       696
       +
                   (River.Quality.min_content_length metrics);

     

       697
       697
       +
                 Fmt.pr "  Max length:     %d characters@."

     

       698
       698
       +
                   (River.Quality.max_content_length metrics);

     

       699
       699
       +
                 Fmt.pr "@."

     

       700
       700
       +
               end;

     

       701
       701
       +
       

     

       702
       702
       +
               (* Posting frequency *)

     

       703
       703
       +
               (match River.Quality.posting_frequency_days metrics with

     

       704
       704
       +
                | Some freq ->

     

       705
       705
       +
                    Fmt.pr "%a@." Fmt.(styled `Cyan string) "Posting Frequency:";

     

       706
       706
       +
                    Fmt.pr "  Average: %.1f days between posts@." freq;

     

       707
       707
       +
                    let posts_per_week = 7.0 /. freq in

     

       708
       708
       +
                    Fmt.pr "  (~%.1f posts per week)@." posts_per_week;

     

       709
       709
       +
                    Fmt.pr "@."

     

       710
       710
       +
                | None ->

     

       711
       711
       +
                    Fmt.pr "%a@.@." Fmt.(styled `Faint string)

     

       712
       712
       +
                      "Not enough data to calculate posting frequency");

     

       713
       713
       +
       

     

       714
       714
       +
               Fmt.pr "@.";

     

       715
       715
       +
               0

     

       716
       716
       +
         ) $ username_arg)

     

       717
       717
       +
       

     

       718
       718
       +
       let main_cmd =

     

       719
       719
       +
         let doc = "River feed management CLI" in

     

       720
       720
       +
         let main_info = Cmd.info "river-cli" ~version:"1.0" ~doc in

     

       721
       721
       +
         let sync_cmd =

     

       722
       722
       +
           Eiocmd.run

     

       723
       723
       +
             ~use_keyeio:false

     

       724
       724
       +
             ~info:(Cmd.info "sync" ~doc:"Sync feeds for users")

     

       725
       725
       +
             ~app_name:"river"

     

       726
       726
       +
             ~service:"river"

     

       727
       727
       +
             sync

     

       728
       728
       +
         in

     

       729
       729
       +
         let list_cmd =

     

       730
       730
       +
           Eiocmd.run

     

       731
       731
       +
             ~use_keyeio:false

     

       732
       732
       +
             ~info:(Cmd.info "list" ~doc:"List recent posts (from all users by default, or specify a user)")

     

       733
       733
       +
             ~app_name:"river"

     

       734
       734
       +
             ~service:"river"

     

       735
       735
       +
             list

     

       736
       736
       +
         in

     

       737
       737
       +
         let info_cmd =

     

       738
       738
       +
           Eiocmd.run

     

       739
       739
       +
             ~use_keyeio:false

     

       740
       740
       +
             ~info:(Cmd.info "info" ~doc:"Show detailed post information")

     

       741
       741
       +
             ~app_name:"river"

     

       742
       742
       +
             ~service:"river"

     

       743
       743
       +
             info

     

       744
       744
       +
         in

     

       745
       745
       +
         let merge_cmd =

     

       746
       746
       +
           Eiocmd.run

     

       747
       747
       +
             ~use_keyeio:false

     

       748
       748
       +
             ~info:(Cmd.info "merge" ~doc:"Export a merged feed combining all users' feeds")

     

       749
       749
       +
             ~app_name:"river"

     

       750
       750
       +
             ~service:"river"

     

       751
       751
       +
             merge

     

       752
       752
       +
         in

     

       753
       753
       +
         let quality_cmd =

     

       754
       754
       +
           Eiocmd.run

     

       755
       755
       +
             ~use_keyeio:false

     

       756
       756
       +
             ~info:(Cmd.info "quality" ~doc:"Analyze feed quality metrics for a user")

     

       757
       757
       +
             ~app_name:"river"

     

       758
       758
       +
             ~service:"river"

     

       759
       759
       +
             quality

     

       760
       760
       +
         in

     

       761
       761
       +
         Cmd.group main_info [user_cmd; sync_cmd; list_cmd; info_cmd; merge_cmd; quality_cmd]

+115

stack/river/cmd/river_cmd.mli

···

       1
       1
       +
       (** River.Cmd - Cmdliner terms for River CLI

     

       2
       2
       +
       

     

       3
       3
       +
           This module provides cmdliner terms that are thin wrappers around

     

       4
       4
       +
           the River library functions. All business logic resides in the

     

       5
       5
       +
           main River module. *)

     

       6
       6
       +
       

     

       7
       7
       +
       (** {1 Cmdliner Terms}

     

       8
       8
       +
       

     

       9
       9
       +
           These terms can be used to build command-line interfaces using

     

       10
       10
       +
           Cmdliner and Eiocmd. They handle argument parsing and call into

     

       11
       11
       +
           the River library functions. *)

     

       12
       12
       +
       

     

       13
       13
       +
       open Cmdliner

     

       14
       14
       +
       

     

       15
       15
       +
       (** {2 User Management Commands} *)

     

       16
       16
       +
       

     

       17
       17
       +
       val user_add :

     

       18
       18
       +
         (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t

     

       19
       19
       +
       (** [user_add] command term for adding a new user.

     

       20
       20
       +
       

     

       21
       21
       +
           Reads: username, fullname, email from command-line arguments.

     

       22
       22
       +
           Calls: [River.State.create_user] *)

     

       23
       23
       +
       

     

       24
       24
       +
       val user_remove :

     

       25
       25
       +
         (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t

     

       26
       26
       +
       (** [user_remove] command term for removing a user.

     

       27
       27
       +
       

     

       28
       28
       +
           Reads: username from command-line arguments.

     

       29
       29
       +
           Calls: [River.State.delete_user] *)

     

       30
       30
       +
       

     

       31
       31
       +
       val user_list :

     

       32
       32
       +
         (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t

     

       33
       33
       +
       (** [user_list] command term for listing all users.

     

       34
       34
       +
       

     

       35
       35
       +
           Calls: [River.State.list_users] *)

     

       36
       36
       +
       

     

       37
       37
       +
       val user_show :

     

       38
       38
       +
         (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t

     

       39
       39
       +
       (** [user_show] command term for showing user details.

     

       40
       40
       +
       

     

       41
       41
       +
           Reads: username from command-line arguments.

     

       42
       42
       +
           Calls: [River.State.get_user] *)

     

       43
       43
       +
       

     

       44
       44
       +
       val user_add_feed :

     

       45
       45
       +
         (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t

     

       46
       46
       +
       (** [user_add_feed] command term for adding a feed to a user.

     

       47
       47
       +
       

     

       48
       48
       +
           Reads: username, name, url from command-line arguments.

     

       49
       49
       +
           Calls: [River.State.get_user], [River.User.add_feed], [River.State.update_user] *)

     

       50
       50
       +
       

     

       51
       51
       +
       val user_remove_feed :

     

       52
       52
       +
         (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t

     

       53
       53
       +
       (** [user_remove_feed] command term for removing a feed from a user.

     

       54
       54
       +
       

     

       55
       55
       +
           Reads: username, url from command-line arguments.

     

       56
       56
       +
           Calls: [River.State.get_user], [River.User.remove_feed], [River.State.update_user] *)

     

       57
       57
       +
       

     

       58
       58
       +
       val user_cmd : int Cmd.t

     

       59
       59
       +
       (** [user_cmd] is the user management command group containing all user subcommands. *)

     

       60
       60
       +
       

     

       61
       61
       +
       (** {2 Feed Sync Commands} *)

     

       62
       62
       +
       

     

       63
       63
       +
       val sync :

     

       64
       64
       +
         (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t

     

       65
       65
       +
       (** [sync] command term for syncing feeds.

     

       66
       66
       +
       

     

       67
       67
       +
           Reads: optional username from command-line arguments.

     

       68
       68
       +
           Calls: [River.State.sync_user] or [River.State.sync_all] *)

     

       69
       69
       +
       

     

       70
       70
       +
       (** {2 Post Listing Commands} *)

     

       71
       71
       +
       

     

       72
       72
       +
       val list :

     

       73
       73
       +
         (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t

     

       74
       74
       +
       (** [list] command term for listing posts with enhanced formatting.

     

       75
       75
       +
       

     

       76
       76
       +
           Features:

     

       77
       77
       +
           - Pretty-printed output with colors using Fmt

     

       78
       78
       +
           - Clear ID display (never truncated) for each post

     

       79
       79
       +
           - Compact view (default): shows title, ID, author, and date

     

       80
       80
       +
           - Metadata view (--metadata/-m): shows all post metadata including summary,

     

       81
       81
       +
             content preview (truncated), links, and tags

     

       82
       82
       +
       

     

       83
       83
       +
           Reads: optional username, optional limit, --metadata flag from command-line arguments.

     

       84
       84
       +
           Calls: [River.State.get_user_posts] or [River.State.get_all_posts] *)

     

       85
       85
       +
       

     

       86
       86
       +
       val info :

     

       87
       87
       +
         (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t

     

       88
       88
       +
       (** [info] command term for showing detailed post information.

     

       89
       89
       +
       

     

       90
       90
       +
           Reads: post ID, --full flag from command-line arguments.

     

       91
       91
       +
           Uses Logs for informational output (controlled by -v/--verbose from Eiocmd).

     

       92
       92
       +
           Calls: [River.State.get_all_posts] *)

     

       93
       93
       +
       

     

       94
       94
       +
       (** {2 Feed Export Commands} *)

     

       95
       95
       +
       

     

       96
       96
       +
       val merge :

     

       97
       97
       +
         (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t

     

       98
       98
       +
       (** [merge] command term for exporting merged feed.

     

       99
       99
       +
       

     

       100
       100
       +
           Reads: format (atom|jsonfeed), title, limit from command-line arguments.

     

       101
       101
       +
           Calls: [River.State.export_merged_feed] *)

     

       102
       102
       +
       

     

       103
       103
       +
       (** {2 Quality Analysis Commands} *)

     

       104
       104
       +
       

     

       105
       105
       +
       val quality :

     

       106
       106
       +
         (Eio_unix.Stdenv.base -> Xdge.t -> Keyeio.Profile.t -> int) Term.t

     

       107
       107
       +
       (** [quality] command term for analyzing feed quality.

     

       108
       108
       +
       

     

       109
       109
       +
           Reads: username from command-line arguments.

     

       110
       110
       +
           Calls: [River.State.analyze_user_quality] *)

     

       111
       111
       +
       

     

       112
       112
       +
       (** {2 Main Command} *)

     

       113
       113
       +
       

     

       114
       114
       +
       val main_cmd : int Cmd.t

     

       115
       115
       +
       (** [main_cmd] is the main command group containing all River CLI commands. *)

+6 -10

stack/river/example/aggregate_feeds.ml

···

       1
       1
        
       let sources =

     

       2
       2
       -
         River.

     

       3
       2
        
           [

     

       4
       4
       -
             { name = "KC Sivaramakrishnan"; url = "http://kcsrk.info/atom-ocaml.xml" };

     

       5
       5
       -
             {

     

       6
       6
       -
               name = "Amir Chaudhry";

     

       7
       7
       -
               url = "http://amirchaudhry.com/tags/ocamllabs-atom.xml";

     

       8
       8
       -
             };

     

       3
       3
       +
             River.Source.make ~name:"KC Sivaramakrishnan" ~url:"http://kcsrk.info/atom-ocaml.xml";

     

       4
       4
       +
             River.Source.make ~name:"Amir Chaudhry" ~url:"http://amirchaudhry.com/tags/ocamllabs-atom.xml";

     

       9
       5
        
           ]

     

       10
       6
        
       

     

       11
       7
        
       let main env =

     

       12
       8
        
         (* Use River.with_session for proper resource management *)

     

       13
       13
       -
         River.with_session env @@ fun session ->

     

       14
       14
       -
         let feeds = List.map (River.fetch session) sources in

     

       15
       15
       -
         let posts = River.posts feeds in

     

       16
       16
       -
         let entries = River.create_atom_entries posts in

     

       9
       9
       +
         River.Session.with_session env @@ fun session ->

     

       10
       10
       +
         let feeds = List.map (River.Feed.fetch session) sources in

     

       11
       11
       +
         let posts = River.Post.of_feeds feeds in

     

       12
       12
       +
         let entries = River.Format.Atom.entries_of_posts posts in

     

       17
       13
        
         let feed =

     

       18
       14
        
           let authors = [ Syndic.Atom.author "OCaml Blog" ] in

     

       19
       15
        
           let id = Uri.of_string "https://ocaml.org/atom.xml" in

-45

stack/river/lib/client.ml

···

       1
       1
       -
       (*

     

       2
       2
       -
        * Copyright (c) 2014, OCaml.org project

     

       3
       3
       -
        * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>

     

       4
       4
       -
        *

     

       5
       5
       -
        * Permission to use, copy, modify, and distribute this software for any

     

       6
       6
       -
        * purpose with or without fee is hereby granted, provided that the above

     

       7
       7
       -
        * copyright notice and this permission notice appear in all copies.

     

       8
       8
       -
        *

     

       9
       9
       -
        * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

     

       10
       10
       -
        * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

     

       11
       11
       -
        * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

     

       12
       12
       -
        * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

     

       13
       13
       -
        * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

     

       14
       14
       -
        * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

     

       15
       15
       -
        * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

     

       16
       16
       -
        *)

     

       17
       17
       -
       

     

       18
       18
       -
       (* River HTTP client using Requests *)

     

       19
       19
       -
       

     

       20
       20
       -
       let src = Logs.Src.create "river.client" ~doc:"River HTTP client"

     

       21
       21
       -
       module Log = (val Logs.src_log src : Logs.LOG)

     

       22
       22
       -
       

     

       23
       23
       -
       type t = {

     

       24
       24
       -
         session : Requests.t;

     

       25
       25
       -
       }

     

       26
       26
       -
       

     

       27
       27
       -
       let create ~sw (env : _ ) =

     

       28
       28
       -
         Log.info (fun m -> m "Creating River client");

     

       29
       29
       -
         let session = Requests.create ~sw

     

       30
       30
       -
           ~default_headers:(Requests.Headers.of_list [

     

       31
       31
       -
             ("User-Agent", "OCaml-River/1.0");

     

       32
       32
       -
           ])

     

       33
       33
       -
           ~follow_redirects:true

     

       34
       34
       -
           ~max_redirects:5

     

       35
       35
       -
           ~verify_tls:true

     

       36
       36
       -
           env

     

       37
       37
       -
         in

     

       38
       38
       -
         { session }

     

       39
       39
       -
       

     

       40
       40
       -
       let with_client (env : _) f =

     

       41
       41
       -
         Eio.Switch.run @@ fun sw ->

     

       42
       42
       -
         let client = create ~sw env in

     

       43
       43
       -
         f client

     

       44
       44
       -
       

     

       45
       45
       -
       let session t = t.session

-57

stack/river/lib/client.mli

···

       1
       1
       -
       (*

     

       2
       2
       -
        * Copyright (c) 2014, OCaml.org project

     

       3
       3
       -
        * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>

     

       4
       4
       -
        *

     

       5
       5
       -
        * Permission to use, copy, modify, and distribute this software for any

     

       6
       6
       -
        * purpose with or without fee is hereby granted, provided that the above

     

       7
       7
       -
        * copyright notice and this permission notice appear in all copies.

     

       8
       8
       -
        *

     

       9
       9
       -
        * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

     

       10
       10
       -
        * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

     

       11
       11
       -
        * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

     

       12
       12
       -
        * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

     

       13
       13
       -
        * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

     

       14
       14
       -
        * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

     

       15
       15
       -
        * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

     

       16
       16
       -
        *)

     

       17
       17
       -
       

     

       18
       18
       -
       (** River HTTP client using Requests library.

     

       19
       19
       -
       

     

       20
       20
       -
           This module provides a session-based HTTP client for fetching RSS/Atom feeds.

     

       21
       21
       -
           The client manages a Requests session with appropriate defaults for feed fetching. *)

     

       22
       22
       -
       

     

       23
       23
       -
       (** The type of a River HTTP client *)

     

       24
       24
       -
       type t

     

       25
       25
       -
       

     

       26
       26
       -
       (** [create ~sw env] creates a new River client with a Requests session.

     

       27
       27
       -
       

     

       28
       28
       -
           The session is configured with:

     

       29
       29
       -
           - User-Agent: "OCaml-River/1.0"

     

       30
       30
       -
           - Automatic redirect following (max 5 redirects)

     

       31
       31
       -
           - TLS verification enabled

     

       32
       32
       -
       

     

       33
       33
       -
           @param sw The switch for resource management

     

       34
       34
       -
           @param env The Eio environment providing network and time resources *)

     

       35
       35
       -
       val create :

     

       36
       36
       -
         sw:Eio.Switch.t ->

     

       37
       37
       -
         < clock : float Eio.Time.clock_ty Eio.Resource.t;

     

       38
       38
       -
           fs : Eio.Fs.dir_ty Eio.Path.t;

     

       39
       39
       -
           net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->

     

       40
       40
       -
         t

     

       41
       41
       -
       

     

       42
       42
       -
       (** [with_client env f] creates a client and automatically manages its lifecycle.

     

       43
       43
       -
       

     

       44
       44
       -
           This is the recommended way to use the client as it ensures proper cleanup.

     

       45
       45
       -
       

     

       46
       46
       -
           @param env The Eio environment

     

       47
       47
       -
           @param f The function to run with the client *)

     

       48
       48
       -
       val with_client :

     

       49
       49
       -
         < clock : float Eio.Time.clock_ty Eio.Resource.t;

     

       50
       50
       -
           fs : Eio.Fs.dir_ty Eio.Path.t;

     

       51
       51
       -
           net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->

     

       52
       52
       -
         (t -> 'a) -> 'a

     

       53
       53
       -
       

     

       54
       54
       -
       (** [session t] returns the underlying Requests session.

     

       55
       55
       -
       

     

       56
       56
       -
           This is used internally by River's HTTP functions. *)

     

       57
       57
       -
       val session : t -> Requests.t

+1 -2

stack/river/lib/dune

···

       1
       1
        
       (library

     

       2
       2
        
        (name river)

     

       3
       3
        
        (public_name river)

     

       4
       4
       -
        (wrapped false)

     

       5
       5
       -
        (libraries eio eio_main requests requests_json_api logs str syndic lambdasoup uri ptime jsonfeed jsont jsont.bytesrw cacheio xdge))

     

       4
       4
       +
        (libraries eio eio_main requests requests_json_api logs str syndic lambdasoup uri ptime jsonfeed jsont jsont.bytesrw xdge cmdliner eiocmd fmt))

-106

stack/river/lib/feed.ml

···

       1
       1
       -
       (*

     

       2
       2
       -
        * Copyright (c) 2014, OCaml.org project

     

       3
       3
       -
        * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>

     

       4
       4
       -
        *

     

       5
       5
       -
        * Permission to use, copy, modify, and distribute this software for any

     

       6
       6
       -
        * purpose with or without fee is hereby granted, provided that the above

     

       7
       7
       -
        * copyright notice and this permission notice appear in all copies.

     

       8
       8
       -
        *

     

       9
       9
       -
        * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

     

       10
       10
       -
        * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

     

       11
       11
       -
        * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

     

       12
       12
       -
        * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

     

       13
       13
       -
        * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

     

       14
       14
       -
        * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

     

       15
       15
       -
        * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

     

       16
       16
       -
        *)

     

       17
       17
       -
       

     

       18
       18
       -
       let src = Logs.Src.create "river.feed" ~doc:"River feed parsing"

     

       19
       19
       -
       module Log = (val Logs.src_log src : Logs.LOG)

     

       20
       20
       -
       

     

       21
       21
       -
       type source = { name : string; url : string }

     

       22
       22
       -
       type content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel | Json of Jsonfeed.t

     

       23
       23
       -
       

     

       24
       24
       -
       let string_of_feed = function Atom _ -> "Atom" | Rss2 _ -> "Rss2" | Json _ -> "JSONFeed"

     

       25
       25
       -
       

     

       26
       26
       -
       type t = { name : string; title : string; url : string; content : content }

     

       27
       27
       -
       

     

       28
       28
       -
       let classify_feed ~xmlbase (body : string) =

     

       29
       29
       -
         Log.debug (fun m -> m "Attempting to parse feed (%d bytes)" (String.length body));

     

       30
       30
       -
       

     

       31
       31
       -
         (* Quick check - does it look like JSON? *)

     

       32
       32
       -
         let looks_like_json =

     

       33
       33
       -
           String.length body > 0 &&

     

       34
       34
       -
           let first_char = String.get body 0 in

     

       35
       35
       -
           first_char = '{' || first_char = '['

     

       36
       36
       -
         in

     

       37
       37
       -
       

     

       38
       38
       -
         if looks_like_json then (

     

       39
       39
       -
           (* Try JSONFeed first *)

     

       40
       40
       -
           Log.debug (fun m -> m "Body looks like JSON, trying JSONFeed parser");

     

       41
       41
       -
           match Jsonfeed.of_string body with

     

       42
       42
       -
           | Ok jsonfeed ->

     

       43
       43
       -
               Log.debug (fun m -> m "Successfully parsed as JSONFeed");

     

       44
       44
       -
               Json jsonfeed

     

       45
       45
       -
           | Error err ->

     

       46
       46
       -
               Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err));

     

       47
       47
       -
               (* Fall through to XML parsing *)

     

       48
       48
       -
               failwith "Not a valid JSONFeed"

     

       49
       49
       -
         ) else (

     

       50
       50
       -
           (* Try XML formats *)

     

       51
       51
       -
           try

     

       52
       52
       -
             let feed = Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in

     

       53
       53
       -
             Log.debug (fun m -> m "Successfully parsed as Atom feed");

     

       54
       54
       -
             feed

     

       55
       55
       -
           with

     

       56
       56
       -
           | Syndic.Atom.Error.Error (pos, msg) -> (

     

       57
       57
       -
             Log.debug (fun m -> m "Not an Atom feed: %s at position (%d, %d)"

     

       58
       58
       -
               msg (fst pos) (snd pos));

     

       59
       59
       -
             try

     

       60
       60
       -
               let feed = Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in

     

       61
       61
       -
               Log.debug (fun m -> m "Successfully parsed as RSS2 feed");

     

       62
       62
       -
               feed

     

       63
       63
       -
             with Syndic.Rss2.Error.Error (pos, msg) ->

     

       64
       64
       -
               Log.err (fun m -> m "Failed to parse as RSS2: %s at position (%d, %d)"

     

       65
       65
       -
                 msg (fst pos) (snd pos));

     

       66
       66
       -
               failwith "Neither Atom nor RSS2 feed")

     

       67
       67
       -
           | Not_found as e ->

     

       68
       68
       -
               Log.err (fun m -> m "Not_found exception during Atom feed parsing");

     

       69
       69
       -
               Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));

     

       70
       70
       -
               raise e

     

       71
       71
       -
           | e ->

     

       72
       72
       -
               Log.err (fun m -> m "Unexpected exception during feed parsing: %s"

     

       73
       73
       -
                 (Printexc.to_string e));

     

       74
       74
       -
               Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));

     

       75
       75
       -
               raise e

     

       76
       76
       -
         )

     

       77
       77
       -
       

     

       78
       78
       -
       let fetch client (source : source) =

     

       79
       79
       -
         Log.info (fun m -> m "Fetching feed '%s' from %s" source.name source.url);

     

       80
       80
       -
       

     

       81
       81
       -
         let xmlbase = Uri.of_string @@ source.url in

     

       82
       82
       -
       

     

       83
       83
       -
         (* Use Requests_json_api.get_result for clean Result-based error handling *)

     

       84
       84
       -
         let session = Client.session client in

     

       85
       85
       -
         let response =

     

       86
       86
       -
           match Requests_json_api.get_result session source.url with

     

       87
       87
       -
           | Ok body ->

     

       88
       88
       -
               Log.info (fun m -> m "Successfully fetched %s (%d bytes)" source.url (String.length body));

     

       89
       89
       -
               body

     

       90
       90
       -
           | Error (status, msg) ->

     

       91
       91
       -
               Log.err (fun m -> m "Failed to fetch feed '%s': HTTP %d - %s" source.name status msg);

     

       92
       92
       -
               failwith (Printf.sprintf "HTTP %d: %s" status msg)

     

       93
       93
       -
         in

     

       94
       94
       -
       

     

       95
       95
       -
         let content = classify_feed ~xmlbase response in

     

       96
       96
       -
         let title =

     

       97
       97
       -
           match content with

     

       98
       98
       -
           | Atom atom -> Util.string_of_text_construct atom.Syndic.Atom.title

     

       99
       99
       -
           | Rss2 ch -> ch.Syndic.Rss2.title

     

       100
       100
       -
           | Json jsonfeed -> Jsonfeed.title jsonfeed

     

       101
       101
       -
         in

     

       102
       102
       -
       

     

       103
       103
       -
         Log.info (fun m -> m "Successfully fetched %s feed '%s' (title: '%s')"

     

       104
       104
       -
           (string_of_feed content) source.name title);

     

       105
       105
       -
       

     

       106
       106
       -
         { name = source.name; title; content; url = source.url }

-309

stack/river/lib/markdown_converter.ml

···

       1
       1
       -
       (** HTML to Markdown converter using Lambda Soup *)

     

       2
       2
       -
       

     

       3
       3
       -
       (** Extract all links from HTML content *)

     

       4
       4
       -
       let extract_links html_str =

     

       5
       5
       -
         try

     

       6
       6
       -
           let soup = Soup.parse html_str in

     

       7
       7
       -
           let links = Soup.select "a[href]" soup in

     

       8
       8
       -
           Soup.fold (fun acc link ->

     

       9
       9
       -
             match Soup.attribute "href" link with

     

       10
       10
       -
             | Some href ->

     

       11
       11
       -
                 let text = Soup.texts link |> String.concat "" |> String.trim in

     

       12
       12
       -
                 (href, text) :: acc

     

       13
       13
       -
             | None -> acc

     

       14
       14
       -
           ) [] links

     

       15
       15
       -
           |> List.rev

     

       16
       16
       -
         with _ -> []

     

       17
       17
       -
       

     

       18
       18
       -
       (** Check if string contains any whitespace *)

     

       19
       19
       -
       let has_whitespace s =

     

       20
       20
       -
         try

     

       21
       21
       -
           let _ = Str.search_forward (Str.regexp "[ \t\n\r]") s 0 in

     

       22
       22
       -
           true

     

       23
       23
       -
         with Not_found -> false

     

       24
       24
       -
       

     

       25
       25
       -
       (** Clean up excessive newlines and normalize spacing *)

     

       26
       26
       -
       let cleanup_markdown s =

     

       27
       27
       -
         (* Normalize line endings *)

     

       28
       28
       -
         let s = Str.global_replace (Str.regexp "\r\n") "\n" s in

     

       29
       29
       -
       

     

       30
       30
       -
         (* Remove trailing whitespace from each line *)

     

       31
       31
       -
         let lines = String.split_on_char '\n' s in

     

       32
       32
       -
         let lines = List.map (fun line ->

     

       33
       33
       -
           (* Trim trailing spaces but preserve leading spaces for indentation *)

     

       34
       34
       -
           let len = String.length line in

     

       35
       35
       -
           let rec find_last_non_space i =

     

       36
       36
       -
             if i < 0 then -1

     

       37
       37
       -
             else if line.[i] = ' ' || line.[i] = '\t' then find_last_non_space (i - 1)

     

       38
       38
       -
             else i

     

       39
       39
       -
           in

     

       40
       40
       -
           let last = find_last_non_space (len - 1) in

     

       41
       41
       -
           if last < 0 then ""

     

       42
       42
       -
           else String.sub line 0 (last + 1)

     

       43
       43
       -
         ) lines in

     

       44
       44
       -
       

     

       45
       45
       -
         (* Join back and collapse excessive blank lines *)

     

       46
       46
       -
         let s = String.concat "\n" lines in

     

       47
       47
       -
       

     

       48
       48
       -
         (* Replace 3+ consecutive newlines with just 2 *)

     

       49
       49
       -
         let s = Str.global_replace (Str.regexp "\n\n\n+") "\n\n" s in

     

       50
       50
       -
       

     

       51
       51
       -
         (* Trim leading and trailing whitespace *)

     

       52
       52
       -
         String.trim s

     

       53
       53
       -
       

     

       54
       54
       -
       (** Convert HTML to Markdown using state-based whitespace handling *)

     

       55
       55
       -
       let html_to_markdown html_str =

     

       56
       56
       -
         try

     

       57
       57
       -
           let soup = Soup.parse html_str in

     

       58
       58
       -
           let buffer = Buffer.create 256 in

     

       59
       59
       -
       

     

       60
       60
       -
           (* State: track if we need to insert a space before next text *)

     

       61
       61
       -
           let need_space = ref false in

     

       62
       62
       -
       

     

       63
       63
       -
           (* Get last character in buffer, if any *)

     

       64
       64
       -
           let last_char () =

     

       65
       65
       -
             let len = Buffer.length buffer in

     

       66
       66
       -
             if len = 0 then None

     

       67
       67
       -
             else Some (Buffer.nth buffer (len - 1))

     

       68
       68
       -
           in

     

       69
       69
       -
       

     

       70
       70
       -
           (* Add text with proper spacing *)

     

       71
       71
       -
           let add_text text =

     

       72
       72
       -
             let trimmed = String.trim text in

     

       73
       73
       -
             if trimmed <> "" then begin

     

       74
       74
       -
               (* Check if text starts with punctuation that shouldn't have space before it *)

     

       75
       75
       -
               let starts_with_punctuation =

     

       76
       76
       -
                 String.length trimmed > 0 &&

     

       77
       77
       -
                 (match trimmed.[0] with

     

       78
       78
       -
                 | ',' | '.' | ';' | ':' | '!' | '?' | ')' | ']' | '}' -> true

     

       79
       79
       -
                 | _ -> false)

     

       80
       80
       -
               in

     

       81
       81
       -
       

     

       82
       82
       -
               (* Add space if needed, unless we're before punctuation *)

     

       83
       83
       -
               if !need_space && not starts_with_punctuation then begin

     

       84
       84
       -
                 match last_char () with

     

       85
       85
       -
                 | Some (' ' | '\n') -> ()

     

       86
       86
       -
                 | _ -> Buffer.add_char buffer ' '

     

       87
       87
       -
               end;

     

       88
       88
       -
               Buffer.add_string buffer trimmed;

     

       89
       89
       -
               need_space := false

     

       90
       90
       -
             end

     

       91
       91
       -
           in

     

       92
       92
       -
       

     

       93
       93
       -
           (* Mark that we need space before next text (for inline elements) *)

     

       94
       94
       -
           let mark_space_needed () =

     

       95
       95
       -
             need_space := has_whitespace (Buffer.contents buffer) || Buffer.length buffer > 0

     

       96
       96
       -
           in

     

       97
       97
       -
       

     

       98
       98
       -
           (* Process header with ID/anchor handling *)

     

       99
       99
       -
           let process_header level elem =

     

       100
       100
       -
             need_space := false;

     

       101
       101
       -
       

     

       102
       102
       -
             (* Check if header contains a link with an ID fragment *)

     

       103
       103
       -
             let link_opt = Soup.select_one "a[href]" elem in

     

       104
       104
       -
             let anchor_id = match link_opt with

     

       105
       105
       -
               | Some link ->

     

       106
       106
       -
                   (match Soup.attribute "href" link with

     

       107
       107
       -
                   | Some href ->

     

       108
       108
       -
                       (* Extract fragment from URL *)

     

       109
       109
       -
                       let uri = Uri.of_string href in

     

       110
       110
       -
                       Uri.fragment uri

     

       111
       111
       -
                   | None -> None)

     

       112
       112
       -
               | None -> None

     

       113
       113
       -
             in

     

       114
       114
       -
       

     

       115
       115
       -
             (* Add anchor if we found an ID *)

     

       116
       116
       -
             (match anchor_id with

     

       117
       117
       -
             | Some id when id <> "" ->

     

       118
       118
       -
                 Buffer.add_string buffer (Printf.sprintf "\n<a name=\"%s\"></a>\n" id)

     

       119
       119
       -
             | _ -> ());

     

       120
       120
       -
       

     

       121
       121
       -
             (* Add the header marker *)

     

       122
       122
       -
             let marker = String.make level '#' in

     

       123
       123
       -
             Buffer.add_string buffer ("\n" ^ marker ^ " ");

     

       124
       124
       -
       

     

       125
       125
       -
             (* Get text content, excluding link tags *)

     

       126
       126
       -
             let text = Soup.texts elem |> String.concat " " |> String.trim in

     

       127
       127
       -
             Buffer.add_string buffer text;

     

       128
       128
       -
       

     

       129
       129
       -
             Buffer.add_string buffer "\n\n";

     

       130
       130
       -
             need_space := false

     

       131
       131
       -
           in

     

       132
       132
       -
       

     

       133
       133
       -
           let rec process_node node =

     

       134
       134
       -
             match Soup.element node with

     

       135
       135
       -
             | Some elem ->

     

       136
       136
       -
                 let tag = Soup.name elem in

     

       137
       137
       -
                 (match tag with

     

       138
       138
       -
                 (* Block elements - reset space tracking *)

     

       139
       139
       -
                 | "h1" -> process_header 1 elem

     

       140
       140
       -
                 | "h2" -> process_header 2 elem

     

       141
       141
       -
                 | "h3" -> process_header 3 elem

     

       142
       142
       -
                 | "h4" -> process_header 4 elem

     

       143
       143
       -
                 | "h5" -> process_header 5 elem

     

       144
       144
       -
                 | "h6" -> process_header 6 elem

     

       145
       145
       -
                 | "p" ->

     

       146
       146
       -
                     need_space := false;

     

       147
       147
       -
                     Soup.children elem |> Soup.iter process_node;

     

       148
       148
       -
                     Buffer.add_string buffer "\n\n";

     

       149
       149
       -
                     need_space := false

     

       150
       150
       -
                 | "br" ->

     

       151
       151
       -
                     Buffer.add_string buffer "\n";

     

       152
       152
       -
                     need_space := false

     

       153
       153
       -
                 (* Inline elements - preserve space tracking *)

     

       154
       154
       -
                 | "strong" | "b" ->

     

       155
       155
       -
                     (* Add space before if needed *)

     

       156
       156
       -
                     if !need_space then begin

     

       157
       157
       -
                       match last_char () with

     

       158
       158
       -
                       | Some (' ' | '\n') -> ()

     

       159
       159
       -
                       | _ -> Buffer.add_char buffer ' '

     

       160
       160
       -
                     end;

     

       161
       161
       -
                     Buffer.add_string buffer "**";

     

       162
       162
       -
                     need_space := false;

     

       163
       163
       -
                     Soup.children elem |> Soup.iter process_node;

     

       164
       164
       -
                     Buffer.add_string buffer "**";

     

       165
       165
       -
                     mark_space_needed ()

     

       166
       166
       -
                 | "em" | "i" ->

     

       167
       167
       -
                     (* Add space before if needed *)

     

       168
       168
       -
                     if !need_space then begin

     

       169
       169
       -
                       match last_char () with

     

       170
       170
       -
                       | Some (' ' | '\n') -> ()

     

       171
       171
       -
                       | _ -> Buffer.add_char buffer ' '

     

       172
       172
       -
                     end;

     

       173
       173
       -
                     Buffer.add_string buffer "*";

     

       174
       174
       -
                     need_space := false;

     

       175
       175
       -
                     Soup.children elem |> Soup.iter process_node;

     

       176
       176
       -
                     Buffer.add_string buffer "*";

     

       177
       177
       -
                     mark_space_needed ()

     

       178
       178
       -
                 | "code" ->

     

       179
       179
       -
                     (* Add space before if needed *)

     

       180
       180
       -
                     if !need_space then begin

     

       181
       181
       -
                       match last_char () with

     

       182
       182
       -
                       | Some (' ' | '\n') -> ()

     

       183
       183
       -
                       | _ -> Buffer.add_char buffer ' '

     

       184
       184
       -
                     end;

     

       185
       185
       -
                     Buffer.add_string buffer "`";

     

       186
       186
       -
                     need_space := false;

     

       187
       187
       -
                     Soup.children elem |> Soup.iter process_node;

     

       188
       188
       -
                     Buffer.add_string buffer "`";

     

       189
       189
       -
                     mark_space_needed ()

     

       190
       190
       -
                 | "pre" ->

     

       191
       191
       -
                     need_space := false;

     

       192
       192
       -
                     Buffer.add_string buffer "\n```\n";

     

       193
       193
       -
                     Soup.children elem |> Soup.iter process_node;

     

       194
       194
       -
                     Buffer.add_string buffer "\n```\n\n";

     

       195
       195
       -
                     need_space := false

     

       196
       196
       -
                 | "a" ->

     

       197
       197
       -
                     let text = Soup.texts elem |> String.concat " " |> String.trim in

     

       198
       198
       -
                     let href = Soup.attribute "href" elem in

     

       199
       199
       -
                     (match href with

     

       200
       200
       -
                     | Some href ->

     

       201
       201
       -
                         (* Add space before link if needed *)

     

       202
       202
       -
                         if !need_space then begin

     

       203
       203
       -
                           match last_char () with

     

       204
       204
       -
                           | Some (' ' | '\n') -> ()

     

       205
       205
       -
                           | _ -> Buffer.add_char buffer ' '

     

       206
       206
       -
                         end;

     

       207
       207
       -
                         need_space := false;

     

       208
       208
       -
       

     

       209
       209
       -
                         (* Add the link markdown *)

     

       210
       210
       -
                         if text = "" then

     

       211
       211
       -
                           Buffer.add_string buffer (Printf.sprintf "<%s>" href)

     

       212
       212
       -
                         else

     

       213
       213
       -
                           Buffer.add_string buffer (Printf.sprintf "[%s](%s)" text href);

     

       214
       214
       -
       

     

       215
       215
       -
                         (* Mark that space may be needed after link *)

     

       216
       216
       -
                         mark_space_needed ()

     

       217
       217
       -
                     | None ->

     

       218
       218
       -
                         add_text text)

     

       219
       219
       -
                 | "ul" | "ol" ->

     

       220
       220
       -
                     need_space := false;

     

       221
       221
       -
                     Buffer.add_string buffer "\n";

     

       222
       222
       -
                     let is_ordered = tag = "ol" in

     

       223
       223
       -
                     let items = Soup.children elem |> Soup.to_list in

     

       224
       224
       -
                     List.iteri (fun i item ->

     

       225
       225
       -
                       match Soup.element item with

     

       226
       226
       -
                       | Some li when Soup.name li = "li" ->

     

       227
       227
       -
                           need_space := false;

     

       228
       228
       -
                           if is_ordered then

     

       229
       229
       -
                             Buffer.add_string buffer (Printf.sprintf "%d. " (i + 1))

     

       230
       230
       -
                           else

     

       231
       231
       -
                             Buffer.add_string buffer "- ";

     

       232
       232
       -
                           Soup.children li |> Soup.iter process_node;

     

       233
       233
       -
                           Buffer.add_string buffer "\n"

     

       234
       234
       -
                       | _ -> ()

     

       235
       235
       -
                     ) items;

     

       236
       236
       -
                     Buffer.add_string buffer "\n";

     

       237
       237
       -
                     need_space := false

     

       238
       238
       -
                 | "blockquote" ->

     

       239
       239
       -
                     need_space := false;

     

       240
       240
       -
                     Buffer.add_string buffer "\n> ";

     

       241
       241
       -
                     Soup.children elem |> Soup.iter process_node;

     

       242
       242
       -
                     Buffer.add_string buffer "\n\n";

     

       243
       243
       -
                     need_space := false

     

       244
       244
       -
                 | "img" ->

     

       245
       245
       -
                     (* Add space before if needed *)

     

       246
       246
       -
                     if !need_space then begin

     

       247
       247
       -
                       match last_char () with

     

       248
       248
       -
                       | Some (' ' | '\n') -> ()

     

       249
       249
       -
                       | _ -> Buffer.add_char buffer ' '

     

       250
       250
       -
                     end;

     

       251
       251
       -
                     let alt = Soup.attribute "alt" elem |> Option.value ~default:"" in

     

       252
       252
       -
                     let src = Soup.attribute "src" elem |> Option.value ~default:"" in

     

       253
       253
       -
                     Buffer.add_string buffer (Printf.sprintf "![%s](%s)" alt src);

     

       254
       254
       -
                     need_space := false;

     

       255
       255
       -
                     mark_space_needed ()

     

       256
       256
       -
                 | "hr" ->

     

       257
       257
       -
                     need_space := false;

     

       258
       258
       -
                     Buffer.add_string buffer "\n---\n\n";

     

       259
       259
       -
                     need_space := false

     

       260
       260
       -
                 (* Strip these tags but keep content *)

     

       261
       261
       -
                 | "div" | "span" | "article" | "section" | "header" | "footer"

     

       262
       262
       -
                 | "main" | "nav" | "aside" | "figure" | "figcaption" | "details" | "summary" ->

     

       263
       263
       -
                     Soup.children elem |> Soup.iter process_node

     

       264
       264
       -
                 (* Ignore script, style, etc *)

     

       265
       265
       -
                 | "script" | "style" | "noscript" -> ()

     

       266
       266
       -
                 (* Default: just process children *)

     

       267
       267
       -
                 | _ ->

     

       268
       268
       -
                     Soup.children elem |> Soup.iter process_node)

     

       269
       269
       -
             | None ->

     

       270
       270
       -
                 (* Text node - handle whitespace properly *)

     

       271
       271
       -
                 match Soup.leaf_text node with

     

       272
       272
       -
                 | Some text ->

     

       273
       273
       -
                     (* If text is only whitespace, mark that we need space *)

     

       274
       274
       -
                     let trimmed = String.trim text in

     

       275
       275
       -
                     if trimmed = "" then begin

     

       276
       276
       -
                       if has_whitespace text then

     

       277
       277
       -
                         need_space := true

     

       278
       278
       -
                     end else begin

     

       279
       279
       -
                       (* Text has content - check if it had leading/trailing whitespace *)

     

       280
       280
       -
                       let had_leading_ws = has_whitespace text &&

     

       281
       281
       -
                         (String.length text > 0 &&

     

       282
       282
       -
                          (text.[0] = ' ' || text.[0] = '\t' || text.[0] = '\n' || text.[0] = '\r')) in

     

       283
       283
       -
       

     

       284
       284
       -
                       (* If had leading whitespace, mark we need space *)

     

       285
       285
       -
                       if had_leading_ws then need_space := true;

     

       286
       286
       -
       

     

       287
       287
       -
                       (* Add the text content *)

     

       288
       288
       -
                       add_text trimmed;

     

       289
       289
       -
       

     

       290
       290
       -
                       (* If had trailing whitespace, mark we need space for next *)

     

       291
       291
       -
                       let had_trailing_ws = has_whitespace text &&

     

       292
       292
       -
                         (String.length text > 0 &&

     

       293
       293
       -
                          let last = text.[String.length text - 1] in

     

       294
       294
       -
                          last = ' ' || last = '\t' || last = '\n' || last = '\r') in

     

       295
       295
       -
                       if had_trailing_ws then need_space := true

     

       296
       296
       -
                     end

     

       297
       297
       -
                 | None -> ()

     

       298
       298
       -
           in

     

       299
       299
       -
       

     

       300
       300
       -
           Soup.children soup |> Soup.iter process_node;

     

       301
       301
       -
       

     

       302
       302
       -
           (* Clean up the result *)

     

       303
       303
       -
           let result = Buffer.contents buffer in

     

       304
       304
       -
           cleanup_markdown result

     

       305
       305
       -
         with _ -> html_str

     

       306
       306
       -
       

     

       307
       307
       -
       (** Convert HTML content to clean Markdown *)

     

       308
       308
       -
       let to_markdown html_str =

     

       309
       309
       -
         html_to_markdown html_str

-7

stack/river/lib/markdown_converter.mli

···

       1
       1
       -
       (** HTML to Markdown converter *)

     

       2
       2
       -
       

     

       3
       3
       -
       (** Extract all links from HTML content as (href, anchor_text) pairs *)

     

       4
       4
       -
       val extract_links : string -> (string * string) list

     

       5
       5
       -
       

     

       6
       6
       -
       (** Convert HTML content to clean Markdown format *)

     

       7
       7
       -
       val to_markdown : string -> string

-80

stack/river/lib/meta.ml

···

       1
       1
       -
       (*

     

       2
       2
       -
        * Copyright (c) 2014, OCaml.org project

     

       3
       3
       -
        * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>

     

       4
       4
       -
        *

     

       5
       5
       -
        * Permission to use, copy, modify, and distribute this software for any

     

       6
       6
       -
        * purpose with or without fee is hereby granted, provided that the above

     

       7
       7
       -
        * copyright notice and this permission notice appear in all copies.

     

       8
       8
       -
        *

     

       9
       9
       -
        * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

     

       10
       10
       -
        * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

     

       11
       11
       -
        * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

     

       12
       12
       -
        * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

     

       13
       13
       -
        * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

     

       14
       14
       -
        * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

     

       15
       15
       -
        * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

     

       16
       16
       -
        *)

     

       17
       17
       -
       

     

       18
       18
       -
       (** This module determines an image to be used as preview of a website.

     

       19
       19
       -
       

     

       20
       20
       -
           It does this by following the same logic Google+ and other websites use, and

     

       21
       21
       -
           described in this article:

     

       22
       22
       -
           https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *)

     

       23
       23
       -
       

     

       24
       24
       -
       let og_image html =

     

       25
       25
       -
         let open Soup in

     

       26
       26
       -
         let soup = parse html in

     

       27
       27
       -
         try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some

     

       28
       28
       -
         with Failure _ -> None

     

       29
       29
       -
       

     

       30
       30
       -
       let image_src html =

     

       31
       31
       -
         let open Soup in

     

       32
       32
       -
         let soup = parse html in

     

       33
       33
       -
         try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some

     

       34
       34
       -
         with Failure _ -> None

     

       35
       35
       -
       

     

       36
       36
       -
       let twitter_image html =

     

       37
       37
       -
         let open Soup in

     

       38
       38
       -
         let soup = parse html in

     

       39
       39
       -
         try

     

       40
       40
       -
           soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content"

     

       41
       41
       -
           |> Option.some

     

       42
       42
       -
         with Failure _ -> None

     

       43
       43
       -
       

     

       44
       44
       -
       let og_description html =

     

       45
       45
       -
         let open Soup in

     

       46
       46
       -
         let soup = parse html in

     

       47
       47
       -
         try

     

       48
       48
       -
           soup $ "meta[property=og:description]" |> R.attribute "content"

     

       49
       49
       -
           |> Option.some

     

       50
       50
       -
         with Failure _ -> None

     

       51
       51
       -
       

     

       52
       52
       -
       let description html =

     

       53
       53
       -
         let open Soup in

     

       54
       54
       -
         let soup = parse html in

     

       55
       55
       -
         try

     

       56
       56
       -
           soup $ "meta[property=description]" |> R.attribute "content" |> Option.some

     

       57
       57
       -
         with Failure _ -> None

     

       58
       58
       -
       

     

       59
       59
       -
       let preview_image html =

     

       60
       60
       -
         let preview_image =

     

       61
       61
       -
           match og_image html with

     

       62
       62
       -
           | None -> (

     

       63
       63
       -
               match image_src html with

     

       64
       64
       -
               | None -> twitter_image html

     

       65
       65
       -
               | Some x -> Some x)

     

       66
       66
       -
           | Some x -> Some x

     

       67
       67
       -
         in

     

       68
       68
       -
         match Option.map String.trim preview_image with

     

       69
       69
       -
         | Some "" -> None

     

       70
       70
       -
         | Some x -> Some x

     

       71
       71
       -
         | None -> None

     

       72
       72
       -
       

     

       73
       73
       -
       let description html =

     

       74
       74
       -
         let preview_image =

     

       75
       75
       -
           match og_description html with None -> description html | Some x -> Some x

     

       76
       76
       -
         in

     

       77
       77
       -
         match Option.map String.trim preview_image with

     

       78
       78
       -
         | Some "" -> None

     

       79
       79
       -
         | Some x -> Some x

     

       80
       80
       -
         | None -> None

-449

stack/river/lib/post.ml

···

       1
       1
       -
       (*

     

       2
       2
       -
        * Copyright (c) 2014, OCaml.org project

     

       3
       3
       -
        * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>

     

       4
       4
       -
        *

     

       5
       5
       -
        * Permission to use, copy, modify, and distribute this software for any

     

       6
       6
       -
        * purpose with or without fee is hereby granted, provided that the above

     

       7
       7
       -
        * copyright notice and this permission notice appear in all copies.

     

       8
       8
       -
        *

     

       9
       9
       -
        * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

     

       10
       10
       -
        * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

     

       11
       11
       -
        * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

     

       12
       12
       -
        * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

     

       13
       13
       -
        * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

     

       14
       14
       -
        * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

     

       15
       15
       -
        * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

     

       16
       16
       -
        *)

     

       17
       17
       -
       

     

       18
       18
       -
       let src = Logs.Src.create "river.post" ~doc:"River post processing"

     

       19
       19
       -
       module Log = (val Logs.src_log src : Logs.LOG)

     

       20
       20
       -
       

     

       21
       21
       -
       type t = {

     

       22
       22
       -
         id : string;

     

       23
       23
       -
         title : string;

     

       24
       24
       -
         link : Uri.t option;

     

       25
       25
       -
         date : Syndic.Date.t option;

     

       26
       26
       -
         feed : Feed.t;

     

       27
       27
       -
         author : string;

     

       28
       28
       -
         email : string;

     

       29
       29
       -
         content : Soup.soup Soup.node;

     

       30
       30
       -
         mutable link_response : (string, string) result option;

     

       31
       31
       -
         tags : string list;

     

       32
       32
       -
         summary : string option;

     

       33
       33
       -
       }

     

       34
       34
       -
       

     

       35
       35
       -
       (** Generate a stable, unique ID from available data *)

     

       36
       36
       -
       let generate_id ?guid ?link ?title ?date ~feed_url () =

     

       37
       37
       -
         match guid with

     

       38
       38
       -
         | Some id when id <> "" ->

     

       39
       39
       -
             (* Use explicit ID/GUID if available *)

     

       40
       40
       -
             id

     

       41
       41
       -
         | _ ->

     

       42
       42
       -
             match link with

     

       43
       43
       -
             | Some uri when Uri.to_string uri <> "" ->

     

       44
       44
       -
                 (* Use permalink as ID (stable and unique) *)

     

       45
       45
       -
                 Uri.to_string uri

     

       46
       46
       -
             | _ ->

     

       47
       47
       -
                 (* Fallback: hash of feed_url + title + date *)

     

       48
       48
       -
                 let title_str = Option.value title ~default:"" in

     

       49
       49
       -
                 let date_str =

     

       50
       50
       -
                   match date with

     

       51
       51
       -
                   | Some d -> Ptime.to_rfc3339 d

     

       52
       52
       -
                   | None -> ""

     

       53
       53
       -
                 in

     

       54
       54
       -
                 let composite = Printf.sprintf "%s|%s|%s" feed_url title_str date_str in

     

       55
       55
       -
                 (* Use SHA256 for stable hashing *)

     

       56
       56
       -
                 Digest.string composite |> Digest.to_hex

     

       57
       57
       -
       

     

       58
       58
       -
       let post_id post = post.id

     

       59
       59
       -
       

     

       60
       60
       -
       let resolve_links_attr ~xmlbase attr el =

     

       61
       61
       -
         Soup.R.attribute attr el

     

       62
       62
       -
         |> Uri.of_string

     

       63
       63
       -
         |> Syndic.XML.resolve ~xmlbase

     

       64
       64
       -
         |> Uri.to_string

     

       65
       65
       -
         |> fun value -> Soup.set_attribute attr value el

     

       66
       66
       -
       

     

       67
       67
       -
       (* Things that posts should not contain *)

     

       68
       68
       -
       let undesired_tags = [ "style"; "script" ]

     

       69
       69
       -
       let undesired_attr = [ "id" ]

     

       70
       70
       -
       

     

       71
       71
       -
       let html_of_text ?xmlbase s =

     

       72
       72
       -
         let soup = Soup.parse s in

     

       73
       73
       -
         let ($$) = Soup.($$) in

     

       74
       74
       -
         soup $$ "a[href]" |> Soup.iter (resolve_links_attr ~xmlbase "href");

     

       75
       75
       -
         soup $$ "img[src]" |> Soup.iter (resolve_links_attr ~xmlbase "src");

     

       76
       76
       -
         undesired_tags |> List.iter (fun tag -> soup $$ tag |> Soup.iter Soup.delete);

     

       77
       77
       -
         soup $$ "*" |> Soup.iter (fun el ->

     

       78
       78
       -
           undesired_attr |> List.iter (fun attr -> Soup.delete_attribute attr el));

     

       79
       79
       -
         soup

     

       80
       80
       -
       

     

       81
       81
       -
       (* Do not trust sites using XML for HTML content. Convert to string and parse

     

       82
       82
       -
          back. (Does not always fix bad HTML unfortunately.) *)

     

       83
       83
       -
       let html_of_syndic =

     

       84
       84
       -
         let ns_prefix _ = Some "" in

     

       85
       85
       -
         fun ?xmlbase h ->

     

       86
       86
       -
           html_of_text ?xmlbase

     

       87
       87
       -
             (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) h))

     

       88
       88
       -
       

     

       89
       89
       -
       let string_of_option = function None -> "" | Some s -> s

     

       90
       90
       -
       

     

       91
       91
       -
       (* Email on the forge contain the name in parenthesis *)

     

       92
       92
       -
       let forge_name_re = Str.regexp ".*(\\([^()]*\\))"

     

       93
       93
       -
       

     

       94
       94
       -
       let post_compare p1 p2 =

     

       95
       95
       -
         (* Most recent posts first. Posts with no date are always last *)

     

       96
       96
       -
         match (p1.date, p2.date) with

     

       97
       97
       -
         | Some d1, Some d2 -> Syndic.Date.compare d2 d1

     

       98
       98
       -
         | None, Some _ -> 1

     

       99
       99
       -
         | Some _, None -> -1

     

       100
       100
       -
         | None, None -> 1

     

       101
       101
       -
       

     

       102
       102
       -
       let rec remove n l =

     

       103
       103
       -
         if n <= 0 then l else match l with [] -> [] | _ :: tl -> remove (n - 1) tl

     

       104
       104
       -
       

     

       105
       105
       -
       let rec take n = function

     

       106
       106
       -
         | [] -> []

     

       107
       107
       -
         | e :: tl -> if n > 0 then e :: take (n - 1) tl else []

     

       108
       108
       -
       

     

       109
       109
       -
       (* Blog feed

     

       110
       110
       -
        ***********************************************************************)

     

       111
       111
       -
       

     

       112
       112
       -
       let post_of_atom ~(feed : Feed.t) (e : Syndic.Atom.entry) =

     

       113
       113
       -
         Log.debug (fun m -> m "Processing Atom entry: %s"

     

       114
       114
       -
           (Util.string_of_text_construct e.title));

     

       115
       115
       -
       

     

       116
       116
       -
         let link =

     

       117
       117
       -
           try

     

       118
       118
       -
             Some

     

       119
       119
       -
               (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) e.links)

     

       120
       120
       -
                 .href

     

       121
       121
       -
           with Not_found -> (

     

       122
       122
       -
             Log.debug (fun m -> m "No alternate link found, trying fallback");

     

       123
       123
       -
             match e.links with

     

       124
       124
       -
             | l :: _ -> Some l.href

     

       125
       125
       -
             | [] -> (

     

       126
       126
       -
                 match Uri.scheme e.id with

     

       127
       127
       -
                 | Some "http" -> Some e.id

     

       128
       128
       -
                 | Some "https" -> Some e.id

     

       129
       129
       -
                 | _ -> None))

     

       130
       130
       -
         in

     

       131
       131
       -
         let date =

     

       132
       132
       -
           match e.published with Some _ -> e.published | None -> Some e.updated

     

       133
       133
       -
         in

     

       134
       134
       -
         let content =

     

       135
       135
       -
           match e.content with

     

       136
       136
       -
           | Some (Text s) -> html_of_text s

     

       137
       137
       -
           | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s

     

       138
       138
       -
           | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h

     

       139
       139
       -
           | Some (Mime _) | Some (Src _) | None -> (

     

       140
       140
       -
               match e.summary with

     

       141
       141
       -
               | Some (Text s) -> html_of_text s

     

       142
       142
       -
               | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s

     

       143
       143
       -
               | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h

     

       144
       144
       -
               | None -> Soup.parse "")

     

       145
       145
       -
         in

     

       146
       146
       -
         let is_valid_author_name name =

     

       147
       147
       -
           (* Filter out empty strings and placeholder values like "Unknown" *)

     

       148
       148
       -
           let trimmed = String.trim name in

     

       149
       149
       -
           trimmed <> "" && trimmed <> "Unknown"

     

       150
       150
       -
         in

     

       151
       151
       -
         let author_name =

     

       152
       152
       -
           (* Fallback chain for author:

     

       153
       153
       -
              1. Entry author (if present, not empty, and not "Unknown")

     

       154
       154
       -
              2. Feed-level author (from Atom feed metadata)

     

       155
       155
       -
              3. Feed title (from Atom feed metadata)

     

       156
       156
       -
              4. Source name (manually entered feed name) *)

     

       157
       157
       -
           try

     

       158
       158
       -
             let author, _ = e.authors in

     

       159
       159
       -
             let trimmed = String.trim author.name in

     

       160
       160
       -
             if is_valid_author_name author.name then trimmed

     

       161
       161
       -
             else raise Not_found (* Try feed-level author *)

     

       162
       162
       -
           with Not_found -> (

     

       163
       163
       -
             match feed.content with

     

       164
       164
       -
             | Feed.Atom atom_feed -> (

     

       165
       165
       -
                 (* Try feed-level authors *)

     

       166
       166
       -
                 match atom_feed.Syndic.Atom.authors with

     

       167
       167
       -
                 | author :: _ when is_valid_author_name author.name ->

     

       168
       168
       -
                     String.trim author.name

     

       169
       169
       -
                 | _ ->

     

       170
       170
       -
                     (* Use feed title *)

     

       171
       171
       -
                     Util.string_of_text_construct atom_feed.Syndic.Atom.title)

     

       172
       172
       -
             | Feed.Rss2 _ | Feed.Json _ ->

     

       173
       173
       -
                 (* For RSS2 and JSONFeed, use the feed name which is the source name *)

     

       174
       174
       -
                 feed.name)

     

       175
       175
       -
         in

     

       176
       176
       -
         (* Extract tags from Atom categories *)

     

       177
       177
       -
         let tags =

     

       178
       178
       -
           List.map (fun cat -> cat.Syndic.Atom.term) e.categories

     

       179
       179
       -
         in

     

       180
       180
       -
         (* Extract summary - convert from text_construct to string *)

     

       181
       181
       -
         let summary =

     

       182
       182
       -
           match e.summary with

     

       183
       183
       -
           | Some s -> Some (Util.string_of_text_construct s)

     

       184
       184
       -
           | None -> None

     

       185
       185
       -
         in

     

       186
       186
       -
         (* Generate unique ID *)

     

       187
       187
       -
         let guid = Uri.to_string e.id in

     

       188
       188
       -
         let title_str = Util.string_of_text_construct e.title in

     

       189
       189
       -
         let id =

     

       190
       190
       -
           generate_id ~guid ?link ~title:title_str ?date ~feed_url:feed.url ()

     

       191
       191
       -
         in

     

       192
       192
       -
         {

     

       193
       193
       -
           id;

     

       194
       194
       -
           title = title_str;

     

       195
       195
       -
           link;

     

       196
       196
       -
           date;

     

       197
       197
       -
           feed;

     

       198
       198
       -
           author = author_name;

     

       199
       199
       -
           email = "";

     

       200
       200
       -
           content;

     

       201
       201
       -
           link_response = None;

     

       202
       202
       -
           tags;

     

       203
       203
       -
           summary;

     

       204
       204
       -
         }

     

       205
       205
       -
       

     

       206
       206
       -
       let post_of_rss2 ~(feed : Feed.t) it =

     

       207
       207
       -
         let title, content =

     

       208
       208
       -
           match it.Syndic.Rss2.story with

     

       209
       209
       -
           | All (t, xmlbase, d) -> (

     

       210
       210
       -
               ( t,

     

       211
       211
       -
                 match it.content with

     

       212
       212
       -
                 | _, "" -> html_of_text ?xmlbase d

     

       213
       213
       -
                 | xmlbase, c -> html_of_text ?xmlbase c ))

     

       214
       214
       -
           | Title t ->

     

       215
       215
       -
               let xmlbase, c = it.content in

     

       216
       216
       -
               (t, html_of_text ?xmlbase c)

     

       217
       217
       -
           | Description (xmlbase, d) -> (

     

       218
       218
       -
               ( "",

     

       219
       219
       -
                 match it.content with

     

       220
       220
       -
                 | _, "" -> html_of_text ?xmlbase d

     

       221
       221
       -
                 | xmlbase, c -> html_of_text ?xmlbase c ))

     

       222
       222
       -
         in

     

       223
       223
       -
         (* Note: it.link is of type Uri.t option in Syndic *)

     

       224
       224
       -
         let link =

     

       225
       225
       -
           match (it.guid, it.link) with

     

       226
       226
       -
           | Some u, _ when u.permalink -> Some u.data

     

       227
       227
       -
           | _, Some _ -> it.link

     

       228
       228
       -
           | Some u, _ ->

     

       229
       229
       -
               (* Sometimes the guid is indicated with isPermaLink="false" but is

     

       230
       230
       -
                  nonetheless the only URL we get (e.g. ocamlpro). *)

     

       231
       231
       -
               Some u.data

     

       232
       232
       -
           | None, None -> None

     

       233
       233
       -
         in

     

       234
       234
       -
         (* Extract GUID string for ID generation *)

     

       235
       235
       -
         let guid_str =

     

       236
       236
       -
           match it.guid with

     

       237
       237
       -
           | Some u -> Some (Uri.to_string u.data)

     

       238
       238
       -
           | None -> None

     

       239
       239
       -
         in

     

       240
       240
       -
         (* RSS2 doesn't have a categories field exposed, use empty list *)

     

       241
       241
       -
         let tags = [] in

     

       242
       242
       -
         (* RSS2 doesn't have a separate summary field, so leave it empty *)

     

       243
       243
       -
         let summary = None in

     

       244
       244
       -
         (* Generate unique ID *)

     

       245
       245
       -
         let id =

     

       246
       246
       -
           generate_id ?guid:guid_str ?link ~title ?date:it.pubDate ~feed_url:feed.url ()

     

       247
       247
       -
         in

     

       248
       248
       -
         {

     

       249
       249
       -
           id;

     

       250
       250
       -
           title;

     

       251
       251
       -
           link;

     

       252
       252
       -
           feed;

     

       253
       253
       -
           author = feed.name;

     

       254
       254
       -
           email = string_of_option it.author;

     

       255
       255
       -
           content;

     

       256
       256
       -
           date = it.pubDate;

     

       257
       257
       -
           link_response = None;

     

       258
       258
       -
           tags;

     

       259
       259
       -
           summary;

     

       260
       260
       -
         }

     

       261
       261
       -
       

     

       262
       262
       -
       let post_of_jsonfeed_item ~(feed : Feed.t) (item : Jsonfeed.Item.t) =

     

       263
       263
       -
         Log.debug (fun m -> m "Processing JSONFeed item: %s"

     

       264
       264
       -
           (Option.value (Jsonfeed.Item.title item) ~default:"Untitled"));

     

       265
       265
       -
       

     

       266
       266
       -
         (* Extract content - prefer HTML, fall back to text *)

     

       267
       267
       -
         let content =

     

       268
       268
       -
           match Jsonfeed.Item.content item with

     

       269
       269
       -
           | `Html html -> html_of_text html

     

       270
       270
       -
           | `Text text -> html_of_text text

     

       271
       271
       -
           | `Both (html, _text) -> html_of_text html

     

       272
       272
       -
         in

     

       273
       273
       -
       

     

       274
       274
       -
         (* Extract author - use first author if multiple *)

     

       275
       275
       -
         let author_name, author_email =

     

       276
       276
       -
           match Jsonfeed.Item.authors item with

     

       277
       277
       -
           | Some (first :: _) ->

     

       278
       278
       -
               let name = Jsonfeed.Author.name first |> Option.value ~default:"" in

     

       279
       279
       -
               (* JSONFeed authors don't typically have email *)

     

       280
       280
       -
               (name, "")

     

       281
       281
       -
           | _ ->

     

       282
       282
       -
               (* Fall back to feed-level authors or feed title *)

     

       283
       283
       -
               (match feed.content with

     

       284
       284
       -
                | Feed.Json jsonfeed ->

     

       285
       285
       -
                    (match Jsonfeed.authors jsonfeed with

     

       286
       286
       -
                     | Some (first :: _) ->

     

       287
       287
       -
                         let name = Jsonfeed.Author.name first |> Option.value ~default:feed.title in

     

       288
       288
       -
                         (name, "")

     

       289
       289
       -
                     | _ -> (feed.title, ""))

     

       290
       290
       -
                | _ -> (feed.title, ""))

     

       291
       291
       -
         in

     

       292
       292
       -
       

     

       293
       293
       -
         (* Link - use url field *)

     

       294
       294
       -
         let link =

     

       295
       295
       -
           Jsonfeed.Item.url item

     

       296
       296
       -
           |> Option.map Uri.of_string

     

       297
       297
       -
         in

     

       298
       298
       -
       

     

       299
       299
       -
         (* Date *)

     

       300
       300
       -
         let date = Jsonfeed.Item.date_published item in

     

       301
       301
       -
       

     

       302
       302
       -
         (* Summary *)

     

       303
       303
       -
         let summary = Jsonfeed.Item.summary item in

     

       304
       304
       -
       

     

       305
       305
       -
         (* Tags *)

     

       306
       306
       -
         let tags =

     

       307
       307
       -
           Jsonfeed.Item.tags item

     

       308
       308
       -
           |> Option.value ~default:[]

     

       309
       309
       -
         in

     

       310
       310
       -
       

     

       311
       311
       -
         (* Generate unique ID - JSONFeed items always have an id field (required) *)

     

       312
       312
       -
         let guid = Jsonfeed.Item.id item in

     

       313
       313
       -
         let title_str = Jsonfeed.Item.title item |> Option.value ~default:"Untitled" in

     

       314
       314
       -
         let id =

     

       315
       315
       -
           generate_id ~guid ?link ~title:title_str ?date ~feed_url:feed.url ()

     

       316
       316
       -
         in

     

       317
       317
       -
       

     

       318
       318
       -
         {

     

       319
       319
       -
           id;

     

       320
       320
       -
           title = title_str;

     

       321
       321
       -
           link;

     

       322
       322
       -
           date;

     

       323
       323
       -
           feed;

     

       324
       324
       -
           author = author_name;

     

       325
       325
       -
           email = author_email;

     

       326
       326
       -
           content;

     

       327
       327
       -
           link_response = None;

     

       328
       328
       -
           tags;

     

       329
       329
       -
           summary;

     

       330
       330
       -
         }

     

       331
       331
       -
       

     

       332
       332
       -
       let posts_of_feed c =

     

       333
       333
       -
         match c.Feed.content with

     

       334
       334
       -
         | Feed.Atom f ->

     

       335
       335
       -
             let posts = List.map (post_of_atom ~feed:c) f.Syndic.Atom.entries in

     

       336
       336
       -
             Log.debug (fun m -> m "Extracted %d posts from Atom feed '%s'"

     

       337
       337
       -
               (List.length posts) c.Feed.name);

     

       338
       338
       -
             posts

     

       339
       339
       -
         | Feed.Rss2 ch ->

     

       340
       340
       -
             let posts = List.map (post_of_rss2 ~feed:c) ch.Syndic.Rss2.items in

     

       341
       341
       -
             Log.debug (fun m -> m "Extracted %d posts from RSS2 feed '%s'"

     

       342
       342
       -
               (List.length posts) c.Feed.name);

     

       343
       343
       -
             posts

     

       344
       344
       -
         | Feed.Json jsonfeed ->

     

       345
       345
       -
             let items = Jsonfeed.items jsonfeed in

     

       346
       346
       -
             let posts = List.map (post_of_jsonfeed_item ~feed:c) items in

     

       347
       347
       -
             Log.debug (fun m -> m "Extracted %d posts from JSONFeed '%s'"

     

       348
       348
       -
               (List.length posts) c.Feed.name);

     

       349
       349
       -
             posts

     

       350
       350
       -
       

     

       351
       351
       -
       let mk_entry post =

     

       352
       352
       -
         let content = Syndic.Atom.Html (None, Soup.to_string post.content) in

     

       353
       353
       -
         let contributors =

     

       354
       354
       -
           [ Syndic.Atom.author ~uri:(Uri.of_string post.feed.url) post.feed.name ]

     

       355
       355
       -
         in

     

       356
       356
       -
         let links =

     

       357
       357
       -
           match post.link with

     

       358
       358
       -
           | Some l -> [ Syndic.Atom.link ~rel:Syndic.Atom.Alternate l ]

     

       359
       359
       -
           | None -> []

     

       360
       360
       -
         in

     

       361
       361
       -
         (* TODO: include source *)

     

       362
       362
       -
         let id =

     

       363
       363
       -
           match post.link with

     

       364
       364
       -
           | Some l -> l

     

       365
       365
       -
           | None -> Uri.of_string (Digest.to_hex (Digest.string post.title))

     

       366
       366
       -
         in

     

       367
       367
       -
         let authors = (Syndic.Atom.author ~email:post.email post.author, []) in

     

       368
       368
       -
         let title : Syndic.Atom.text_construct = Syndic.Atom.Text post.title in

     

       369
       369
       -
         let updated =

     

       370
       370
       -
           match post.date with

     

       371
       371
       -
           (* Atom entry requires a date but RSS2 does not. So if a date

     

       372
       372
       -
            * is not available, just capture the current date. *)

     

       373
       373
       -
           | None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get

     

       374
       374
       -
           | Some d -> d

     

       375
       375
       -
         in

     

       376
       376
       -
         Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated

     

       377
       377
       -
           ()

     

       378
       378
       -
       

     

       379
       379
       -
       let mk_entries posts = List.map mk_entry posts

     

       380
       380
       -
       

     

       381
       381
       -
       let mk_jsonfeed_item post =

     

       382
       382
       -
         (* Convert HTML content back to string *)

     

       383
       383
       -
         let html = Soup.to_string post.content in

     

       384
       384
       -
         let content = `Html html in

     

       385
       385
       -
       

     

       386
       386
       -
         (* Create author *)

     

       387
       387
       -
         let authors =

     

       388
       388
       -
           if post.author <> "" then

     

       389
       389
       -
             let author = Jsonfeed.Author.create ~name:post.author () in

     

       390
       390
       -
             Some [author]

     

       391
       391
       -
           else

     

       392
       392
       -
             None

     

       393
       393
       -
         in

     

       394
       394
       -
       

     

       395
       395
       -
         (* Create item *)

     

       396
       396
       -
         Jsonfeed.Item.create

     

       397
       397
       -
           ~id:post.id

     

       398
       398
       -
           ~content

     

       399
       399
       -
           ?url:(Option.map Uri.to_string post.link)

     

       400
       400
       -
           ~title:post.title

     

       401
       401
       -
           ?summary:post.summary

     

       402
       402
       -
           ?date_published:post.date

     

       403
       403
       -
           ?authors

     

       404
       404
       -
           ~tags:post.tags

     

       405
       405
       -
           ()

     

       406
       406
       -
       

     

       407
       407
       -
       let mk_jsonfeed_items posts = List.map mk_jsonfeed_item posts

     

       408
       408
       -
       

     

       409
       409
       -
       let get_posts ?n ?(ofs = 0) planet_feeds =

     

       410
       410
       -
         Log.info (fun m -> m "Processing %d feeds for posts" (List.length planet_feeds));

     

       411
       411
       -
       

     

       412
       412
       -
         let posts = List.concat @@ List.map posts_of_feed planet_feeds in

     

       413
       413
       -
         Log.debug (fun m -> m "Total posts collected: %d" (List.length posts));

     

       414
       414
       -
       

     

       415
       415
       -
         let posts = List.sort post_compare posts in

     

       416
       416
       -
         Log.debug (fun m -> m "Posts sorted by date (most recent first)");

     

       417
       417
       -
       

     

       418
       418
       -
         let posts = remove ofs posts in

     

       419
       419
       -
         let result =

     

       420
       420
       -
           match n with

     

       421
       421
       -
           | None ->

     

       422
       422
       -
               Log.debug (fun m -> m "Returning all %d posts (offset=%d)"

     

       423
       423
       -
                 (List.length posts) ofs);

     

       424
       424
       -
               posts

     

       425
       425
       -
           | Some n ->

     

       426
       426
       -
               let limited = take n posts in

     

       427
       427
       -
               Log.debug (fun m -> m "Returning %d posts (requested=%d, offset=%d)"

     

       428
       428
       -
                 (List.length limited) n ofs);

     

       429
       429
       -
               limited

     

       430
       430
       -
         in

     

       431
       431
       -
         result

     

       432
       432
       -
       

     

       433
       433
       -
       (* Fetch the link response and cache it. *)

     

       434
       434
       -
       (* TODO: This requires environment for HTTP access

     

       435
       435
       -
       let fetch_link env t =

     

       436
       436
       -
         match (t.link, t.link_response) with

     

       437
       437
       -
         | None, _ -> None

     

       438
       438
       -
         | Some _, Some (Ok x) -> Some x

     

       439
       439
       -
         | Some _, Some (Error _) -> None

     

       440
       440
       -
         | Some link, None -> (

     

       441
       441
       -
             try

     

       442
       442
       -
               let response = Http.get env (Uri.to_string link) in

     

       443
       443
       -
               t.link_response <- Some (Ok response);

     

       444
       444
       -
               Some response

     

       445
       445
       -
             with _exn ->

     

       446
       446
       -
               t.link_response <- Some (Error "");

     

       447
       447
       -
               None)

     

       448
       448
       -
       *)

     

       449
       449
       -
       let fetch_link _ = None

+1683 -67

stack/river/lib/river.ml

···

       15
       15
        
        * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

     

       16
       16
        
        *)

     

       17
       17
        
       

     

       18
       18
       +
       (** River RSS/Atom/JSONFeed aggregator library *)

     

       19
       19
       +
       

     

       18
       20
        
       let src = Logs.Src.create "river" ~doc:"River RSS/Atom aggregator"

     

       19
       21
        
       module Log = (val Logs.src_log src : Logs.LOG)

     

       20
       22
        
       

     

       21
       21
       -
       (* Keep Client module internal *)

     

       22
       22
       -
       module Internal_client = Client

     

       23
       23
       +
       (** {1 Internal Utilities} *)

     

       24
       24
       +
       

     

       25
       25
       +
       module Text_extract = struct

     

       26
       26
       +
         open Syndic

     

       27
       27
       +
       

     

       28
       28
       +
         (* Remove all tags *)

     

       29
       29
       +
         let rec syndic_to_buffer b = function

     

       30
       30
       +
           | XML.Node (_, _, subs) -> List.iter (syndic_to_buffer b) subs

     

       31
       31
       +
           | XML.Data (_, d) -> Buffer.add_string b d

     

       32
       32
       +
       

     

       33
       33
       +
         let syndic_to_string x =

     

       34
       34
       +
           let b = Buffer.create 1024 in

     

       35
       35
       +
           List.iter (syndic_to_buffer b) x;

     

       36
       36
       +
           Buffer.contents b

     

       37
       37
       +
       

     

       38
       38
       +
         let string_of_text_construct : Atom.text_construct -> string = function

     

       39
       39
       +
           | Atom.Text s | Atom.Html (_, s) -> s

     

       40
       40
       +
           | Atom.Xhtml (_, x) -> syndic_to_string x

     

       41
       41
       +
       end

     

       42
       42
       +
       

     

       43
       43
       +
       module Html_meta = struct

     

       44
       44
       +
         [@@@warning "-32"]  (* Suppress unused value warnings for internal utilities *)

     

       45
       45
       +
       

     

       46
       46
       +
         (** This module determines an image to be used as preview of a website.

     

       47
       47
       +
       

     

       48
       48
       +
             It does this by following the same logic Google+ and other websites use, and

     

       49
       49
       +
             described in this article:

     

       50
       50
       +
             https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *)

     

       51
       51
       +
       

     

       52
       52
       +
         let og_image html =

     

       53
       53
       +
           let open Soup in

     

       54
       54
       +
           let soup = parse html in

     

       55
       55
       +
           try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some

     

       56
       56
       +
           with Failure _ -> None

     

       57
       57
       +
       

     

       58
       58
       +
         let image_src html =

     

       59
       59
       +
           let open Soup in

     

       60
       60
       +
           let soup = parse html in

     

       61
       61
       +
           try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some

     

       62
       62
       +
           with Failure _ -> None

     

       63
       63
       +
       

     

       64
       64
       +
         let twitter_image html =

     

       65
       65
       +
           let open Soup in

     

       66
       66
       +
           let soup = parse html in

     

       67
       67
       +
           try

     

       68
       68
       +
             soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content"

     

       69
       69
       +
             |> Option.some

     

       70
       70
       +
           with Failure _ -> None

     

       71
       71
       +
       

     

       72
       72
       +
         let og_description html =

     

       73
       73
       +
           let open Soup in

     

       74
       74
       +
           let soup = parse html in

     

       75
       75
       +
           try

     

       76
       76
       +
             soup $ "meta[property=og:description]" |> R.attribute "content"

     

       77
       77
       +
             |> Option.some

     

       78
       78
       +
           with Failure _ -> None

     

       79
       79
       +
       

     

       80
       80
       +
         let description html =

     

       81
       81
       +
           let open Soup in

     

       82
       82
       +
           let soup = parse html in

     

       83
       83
       +
           try

     

       84
       84
       +
             soup $ "meta[property=description]" |> R.attribute "content" |> Option.some

     

       85
       85
       +
           with Failure _ -> None

     

       86
       86
       +
       

     

       87
       87
       +
         let preview_image html =

     

       88
       88
       +
           let preview_image =

     

       89
       89
       +
             match og_image html with

     

       90
       90
       +
             | None -> (

     

       91
       91
       +
                 match image_src html with

     

       92
       92
       +
                 | None -> twitter_image html

     

       93
       93
       +
                 | Some x -> Some x)

     

       94
       94
       +
             | Some x -> Some x

     

       95
       95
       +
           in

     

       96
       96
       +
           match Option.map String.trim preview_image with

     

       97
       97
       +
           | Some "" -> None

     

       98
       98
       +
           | Some x -> Some x

     

       99
       99
       +
           | None -> None

     

       100
       100
       +
       

     

       101
       101
       +
         let description html =

     

       102
       102
       +
           let preview_image =

     

       103
       103
       +
             match og_description html with None -> description html | Some x -> Some x

     

       104
       104
       +
           in

     

       105
       105
       +
           match Option.map String.trim preview_image with

     

       106
       106
       +
           | Some "" -> None

     

       107
       107
       +
           | Some x -> Some x

     

       108
       108
       +
           | None -> None

     

       109
       109
       +
       end

     

       110
       110
       +
       

     

       111
       111
       +
       module Html_markdown = struct

     

       112
       112
       +
         [@@@warning "-32"]  (* Suppress unused value warnings for internal utilities *)

     

       113
       113
       +
       

     

       114
       114
       +
         (** HTML to Markdown converter using Lambda Soup *)

     

       115
       115
       +
       

     

       116
       116
       +
         (** Extract all links from HTML content *)

     

       117
       117
       +
         let extract_links html_str =

     

       118
       118
       +
           try

     

       119
       119
       +
             let soup = Soup.parse html_str in

     

       120
       120
       +
             let links = Soup.select "a[href]" soup in

     

       121
       121
       +
             Soup.fold (fun acc link ->

     

       122
       122
       +
               match Soup.attribute "href" link with

     

       123
       123
       +
               | Some href ->

     

       124
       124
       +
                   let text = Soup.texts link |> String.concat "" |> String.trim in

     

       125
       125
       +
                   (href, text) :: acc

     

       126
       126
       +
               | None -> acc

     

       127
       127
       +
             ) [] links

     

       128
       128
       +
             |> List.rev

     

       129
       129
       +
           with _ -> []

     

       130
       130
       +
       

     

       131
       131
       +
         (** Check if string contains any whitespace *)

     

       132
       132
       +
         let has_whitespace s =

     

       133
       133
       +
           try

     

       134
       134
       +
             let _ = Str.search_forward (Str.regexp "[ \t\n\r]") s 0 in

     

       135
       135
       +
             true

     

       136
       136
       +
           with Not_found -> false

     

       137
       137
       +
       

     

       138
       138
       +
         (** Clean up excessive newlines and normalize spacing *)

     

       139
       139
       +
         let cleanup_markdown s =

     

       140
       140
       +
           (* Normalize line endings *)

     

       141
       141
       +
           let s = Str.global_replace (Str.regexp "\r\n") "\n" s in

     

       142
       142
       +
       

     

       143
       143
       +
           (* Remove trailing whitespace from each line *)

     

       144
       144
       +
           let lines = String.split_on_char '\n' s in

     

       145
       145
       +
           let lines = List.map (fun line ->

     

       146
       146
       +
             (* Trim trailing spaces but preserve leading spaces for indentation *)

     

       147
       147
       +
             let len = String.length line in

     

       148
       148
       +
             let rec find_last_non_space i =

     

       149
       149
       +
               if i < 0 then -1

     

       150
       150
       +
               else if line.[i] = ' ' || line.[i] = '\t' then find_last_non_space (i - 1)

     

       151
       151
       +
               else i

     

       152
       152
       +
             in

     

       153
       153
       +
             let last = find_last_non_space (len - 1) in

     

       154
       154
       +
             if last < 0 then ""

     

       155
       155
       +
             else String.sub line 0 (last + 1)

     

       156
       156
       +
           ) lines in

     

       157
       157
       +
       

     

       158
       158
       +
           (* Join back and collapse excessive blank lines *)

     

       159
       159
       +
           let s = String.concat "\n" lines in

     

       160
       160
       +
       

     

       161
       161
       +
           (* Replace 3+ consecutive newlines with just 2 *)

     

       162
       162
       +
           let s = Str.global_replace (Str.regexp "\n\n\n+") "\n\n" s in

     

       163
       163
       +
       

     

       164
       164
       +
           (* Trim leading and trailing whitespace *)

     

       165
       165
       +
           String.trim s

     

       166
       166
       +
       

     

       167
       167
       +
         (** Convert HTML to Markdown using state-based whitespace handling *)

     

       168
       168
       +
         let html_to_markdown html_str =

     

       169
       169
       +
           try

     

       170
       170
       +
             let soup = Soup.parse html_str in

     

       171
       171
       +
             let buffer = Buffer.create 256 in

     

       172
       172
       +
       

     

       173
       173
       +
             (* State: track if we need to insert a space before next text *)

     

       174
       174
       +
             let need_space = ref false in

     

       175
       175
       +
       

     

       176
       176
       +
             (* Get last character in buffer, if any *)

     

       177
       177
       +
             let last_char () =

     

       178
       178
       +
               let len = Buffer.length buffer in

     

       179
       179
       +
               if len = 0 then None

     

       180
       180
       +
               else Some (Buffer.nth buffer (len - 1))

     

       181
       181
       +
             in

     

       182
       182
       +
       

     

       183
       183
       +
             (* Add text with proper spacing *)

     

       184
       184
       +
             let add_text text =

     

       185
       185
       +
               let trimmed = String.trim text in

     

       186
       186
       +
               if trimmed <> "" then begin

     

       187
       187
       +
                 (* Check if text starts with punctuation that shouldn't have space before it *)

     

       188
       188
       +
                 let starts_with_punctuation =

     

       189
       189
       +
                   String.length trimmed > 0 &&

     

       190
       190
       +
                   (match trimmed.[0] with

     

       191
       191
       +
                   | ',' | '.' | ';' | ':' | '!' | '?' | ')' | ']' | '}' -> true

     

       192
       192
       +
                   | _ -> false)

     

       193
       193
       +
                 in

     

       194
       194
       +
       

     

       195
       195
       +
                 (* Add space if needed, unless we're before punctuation *)

     

       196
       196
       +
                 if !need_space && not starts_with_punctuation then begin

     

       197
       197
       +
                   match last_char () with

     

       198
       198
       +
                   | Some (' ' | '\n') -> ()

     

       199
       199
       +
                   | _ -> Buffer.add_char buffer ' '

     

       200
       200
       +
                 end;

     

       201
       201
       +
                 Buffer.add_string buffer trimmed;

     

       202
       202
       +
                 need_space := false

     

       203
       203
       +
               end

     

       204
       204
       +
             in

     

       205
       205
       +
       

     

       206
       206
       +
             (* Mark that we need space before next text (for inline elements) *)

     

       207
       207
       +
             let mark_space_needed () =

     

       208
       208
       +
               need_space := has_whitespace (Buffer.contents buffer) || Buffer.length buffer > 0

     

       209
       209
       +
             in

     

       210
       210
       +
       

     

       211
       211
       +
             (* Process header with ID/anchor handling *)

     

       212
       212
       +
             let process_header level elem =

     

       213
       213
       +
               need_space := false;

     

       214
       214
       +
       

     

       215
       215
       +
               (* Check if header contains a link with an ID fragment *)

     

       216
       216
       +
               let link_opt = Soup.select_one "a[href]" elem in

     

       217
       217
       +
               let anchor_id = match link_opt with

     

       218
       218
       +
                 | Some link ->

     

       219
       219
       +
                     (match Soup.attribute "href" link with

     

       220
       220
       +
                     | Some href ->

     

       221
       221
       +
                         (* Extract fragment from URL *)

     

       222
       222
       +
                         let uri = Uri.of_string href in

     

       223
       223
       +
                         Uri.fragment uri

     

       224
       224
       +
                     | None -> None)

     

       225
       225
       +
                 | None -> None

     

       226
       226
       +
               in

     

       227
       227
       +
       

     

       228
       228
       +
               (* Add anchor if we found an ID *)

     

       229
       229
       +
               (match anchor_id with

     

       230
       230
       +
               | Some id when id <> "" ->

     

       231
       231
       +
                   Buffer.add_string buffer (Printf.sprintf "\n<a name=\"%s\"></a>\n" id)

     

       232
       232
       +
               | _ -> ());

     

       233
       233
       +
       

     

       234
       234
       +
               (* Add the header marker *)

     

       235
       235
       +
               let marker = String.make level '#' in

     

       236
       236
       +
               Buffer.add_string buffer ("\n" ^ marker ^ " ");

     

       237
       237
       +
       

     

       238
       238
       +
               (* Get text content, excluding link tags *)

     

       239
       239
       +
               let text = Soup.texts elem |> String.concat " " |> String.trim in

     

       240
       240
       +
               Buffer.add_string buffer text;

     

       241
       241
       +
       

     

       242
       242
       +
               Buffer.add_string buffer "\n\n";

     

       243
       243
       +
               need_space := false

     

       244
       244
       +
             in

     

       245
       245
       +
       

     

       246
       246
       +
             let rec process_node node =

     

       247
       247
       +
               match Soup.element node with

     

       248
       248
       +
               | Some elem ->

     

       249
       249
       +
                   let tag = Soup.name elem in

     

       250
       250
       +
                   (match tag with

     

       251
       251
       +
                   (* Block elements - reset space tracking *)

     

       252
       252
       +
                   | "h1" -> process_header 1 elem

     

       253
       253
       +
                   | "h2" -> process_header 2 elem

     

       254
       254
       +
                   | "h3" -> process_header 3 elem

     

       255
       255
       +
                   | "h4" -> process_header 4 elem

     

       256
       256
       +
                   | "h5" -> process_header 5 elem

     

       257
       257
       +
                   | "h6" -> process_header 6 elem

     

       258
       258
       +
                   | "p" ->

     

       259
       259
       +
                       need_space := false;

     

       260
       260
       +
                       Soup.children elem |> Soup.iter process_node;

     

       261
       261
       +
                       Buffer.add_string buffer "\n\n";

     

       262
       262
       +
                       need_space := false

     

       263
       263
       +
                   | "br" ->

     

       264
       264
       +
                       Buffer.add_string buffer "\n";

     

       265
       265
       +
                       need_space := false

     

       266
       266
       +
                   (* Inline elements - preserve space tracking *)

     

       267
       267
       +
                   | "strong" | "b" ->

     

       268
       268
       +
                       (* Add space before if needed *)

     

       269
       269
       +
                       if !need_space then begin

     

       270
       270
       +
                         match last_char () with

     

       271
       271
       +
                         | Some (' ' | '\n') -> ()

     

       272
       272
       +
                         | _ -> Buffer.add_char buffer ' '

     

       273
       273
       +
                       end;

     

       274
       274
       +
                       Buffer.add_string buffer "**";

     

       275
       275
       +
                       need_space := false;

     

       276
       276
       +
                       Soup.children elem |> Soup.iter process_node;

     

       277
       277
       +
                       Buffer.add_string buffer "**";

     

       278
       278
       +
                       mark_space_needed ()

     

       279
       279
       +
                   | "em" | "i" ->

     

       280
       280
       +
                       (* Add space before if needed *)

     

       281
       281
       +
                       if !need_space then begin

     

       282
       282
       +
                         match last_char () with

     

       283
       283
       +
                         | Some (' ' | '\n') -> ()

     

       284
       284
       +
                         | _ -> Buffer.add_char buffer ' '

     

       285
       285
       +
                       end;

     

       286
       286
       +
                       Buffer.add_string buffer "*";

     

       287
       287
       +
                       need_space := false;

     

       288
       288
       +
                       Soup.children elem |> Soup.iter process_node;

     

       289
       289
       +
                       Buffer.add_string buffer "*";

     

       290
       290
       +
                       mark_space_needed ()

     

       291
       291
       +
                   | "code" ->

     

       292
       292
       +
                       (* Add space before if needed *)

     

       293
       293
       +
                       if !need_space then begin

     

       294
       294
       +
                         match last_char () with

     

       295
       295
       +
                         | Some (' ' | '\n') -> ()

     

       296
       296
       +
                         | _ -> Buffer.add_char buffer ' '

     

       297
       297
       +
                       end;

     

       298
       298
       +
                       Buffer.add_string buffer "`";

     

       299
       299
       +
                       need_space := false;

     

       300
       300
       +
                       Soup.children elem |> Soup.iter process_node;

     

       301
       301
       +
                       Buffer.add_string buffer "`";

     

       302
       302
       +
                       mark_space_needed ()

     

       303
       303
       +
                   | "pre" ->

     

       304
       304
       +
                       need_space := false;

     

       305
       305
       +
                       Buffer.add_string buffer "\n```\n";

     

       306
       306
       +
                       Soup.children elem |> Soup.iter process_node;

     

       307
       307
       +
                       Buffer.add_string buffer "\n```\n\n";

     

       308
       308
       +
                       need_space := false

     

       309
       309
       +
                   | "a" ->

     

       310
       310
       +
                       let text = Soup.texts elem |> String.concat " " |> String.trim in

     

       311
       311
       +
                       let href = Soup.attribute "href" elem in

     

       312
       312
       +
                       (match href with

     

       313
       313
       +
                       | Some href ->

     

       314
       314
       +
                           (* Add space before link if needed *)

     

       315
       315
       +
                           if !need_space then begin

     

       316
       316
       +
                             match last_char () with

     

       317
       317
       +
                             | Some (' ' | '\n') -> ()

     

       318
       318
       +
                             | _ -> Buffer.add_char buffer ' '

     

       319
       319
       +
                           end;

     

       320
       320
       +
                           need_space := false;

     

       321
       321
       +
       

     

       322
       322
       +
                           (* Add the link markdown *)

     

       323
       323
       +
                           if text = "" then

     

       324
       324
       +
                             Buffer.add_string buffer (Printf.sprintf "<%s>" href)

     

       325
       325
       +
                           else

     

       326
       326
       +
                             Buffer.add_string buffer (Printf.sprintf "[%s](%s)" text href);

     

       327
       327
       +
       

     

       328
       328
       +
                           (* Mark that space may be needed after link *)

     

       329
       329
       +
                           mark_space_needed ()

     

       330
       330
       +
                       | None ->

     

       331
       331
       +
                           add_text text)

     

       332
       332
       +
                   | "ul" | "ol" ->

     

       333
       333
       +
                       need_space := false;

     

       334
       334
       +
                       Buffer.add_string buffer "\n";

     

       335
       335
       +
                       let is_ordered = tag = "ol" in

     

       336
       336
       +
                       let items = Soup.children elem |> Soup.to_list in

     

       337
       337
       +
                       List.iteri (fun i item ->

     

       338
       338
       +
                         match Soup.element item with

     

       339
       339
       +
                         | Some li when Soup.name li = "li" ->

     

       340
       340
       +
                             need_space := false;

     

       341
       341
       +
                             if is_ordered then

     

       342
       342
       +
                               Buffer.add_string buffer (Printf.sprintf "%d. " (i + 1))

     

       343
       343
       +
                             else

     

       344
       344
       +
                               Buffer.add_string buffer "- ";

     

       345
       345
       +
                             Soup.children li |> Soup.iter process_node;

     

       346
       346
       +
                             Buffer.add_string buffer "\n"

     

       347
       347
       +
                         | _ -> ()

     

       348
       348
       +
                       ) items;

     

       349
       349
       +
                       Buffer.add_string buffer "\n";

     

       350
       350
       +
                       need_space := false

     

       351
       351
       +
                   | "blockquote" ->

     

       352
       352
       +
                       need_space := false;

     

       353
       353
       +
                       Buffer.add_string buffer "\n> ";

     

       354
       354
       +
                       Soup.children elem |> Soup.iter process_node;

     

       355
       355
       +
                       Buffer.add_string buffer "\n\n";

     

       356
       356
       +
                       need_space := false

     

       357
       357
       +
                   | "img" ->

     

       358
       358
       +
                       (* Add space before if needed *)

     

       359
       359
       +
                       if !need_space then begin

     

       360
       360
       +
                         match last_char () with

     

       361
       361
       +
                         | Some (' ' | '\n') -> ()

     

       362
       362
       +
                         | _ -> Buffer.add_char buffer ' '

     

       363
       363
       +
                       end;

     

       364
       364
       +
                       let alt = Soup.attribute "alt" elem |> Option.value ~default:"" in

     

       365
       365
       +
                       let src = Soup.attribute "src" elem |> Option.value ~default:"" in

     

       366
       366
       +
                       Buffer.add_string buffer (Printf.sprintf "![%s](%s)" alt src);

     

       367
       367
       +
                       need_space := false;

     

       368
       368
       +
                       mark_space_needed ()

     

       369
       369
       +
                   | "hr" ->

     

       370
       370
       +
                       need_space := false;

     

       371
       371
       +
                       Buffer.add_string buffer "\n---\n\n";

     

       372
       372
       +
                       need_space := false

     

       373
       373
       +
                   (* Strip these tags but keep content *)

     

       374
       374
       +
                   | "div" | "span" | "article" | "section" | "header" | "footer"

     

       375
       375
       +
                   | "main" | "nav" | "aside" | "figure" | "figcaption" | "details" | "summary" ->

     

       376
       376
       +
                       Soup.children elem |> Soup.iter process_node

     

       377
       377
       +
                   (* Ignore script, style, etc *)

     

       378
       378
       +
                   | "script" | "style" | "noscript" -> ()

     

       379
       379
       +
                   (* Default: just process children *)

     

       380
       380
       +
                   | _ ->

     

       381
       381
       +
                       Soup.children elem |> Soup.iter process_node)

     

       382
       382
       +
               | None ->

     

       383
       383
       +
                   (* Text node - handle whitespace properly *)

     

       384
       384
       +
                   match Soup.leaf_text node with

     

       385
       385
       +
                   | Some text ->

     

       386
       386
       +
                       (* If text is only whitespace, mark that we need space *)

     

       387
       387
       +
                       let trimmed = String.trim text in

     

       388
       388
       +
                       if trimmed = "" then begin

     

       389
       389
       +
                         if has_whitespace text then

     

       390
       390
       +
                           need_space := true

     

       391
       391
       +
                       end else begin

     

       392
       392
       +
                         (* Text has content - check if it had leading/trailing whitespace *)

     

       393
       393
       +
                         let had_leading_ws = has_whitespace text &&

     

       394
       394
       +
                           (String.length text > 0 &&

     

       395
       395
       +
                            (text.[0] = ' ' || text.[0] = '\t' || text.[0] = '\n' || text.[0] = '\r')) in

     

       396
       396
       +
       

     

       397
       397
       +
                         (* If had leading whitespace, mark we need space *)

     

       398
       398
       +
                         if had_leading_ws then need_space := true;

     

       399
       399
       +
       

     

       400
       400
       +
                         (* Add the text content *)

     

       401
       401
       +
                         add_text trimmed;

     

       402
       402
       +
       

     

       403
       403
       +
                         (* If had trailing whitespace, mark we need space for next *)

     

       404
       404
       +
                         let had_trailing_ws = has_whitespace text &&

     

       405
       405
       +
                           (String.length text > 0 &&

     

       406
       406
       +
                            let last = text.[String.length text - 1] in

     

       407
       407
       +
                            last = ' ' || last = '\t' || last = '\n' || last = '\r') in

     

       408
       408
       +
                         if had_trailing_ws then need_space := true

     

       409
       409
       +
                       end

     

       410
       410
       +
                   | None -> ()

     

       411
       411
       +
             in

     

       412
       412
       +
       

     

       413
       413
       +
             Soup.children soup |> Soup.iter process_node;

     

       414
       414
       +
       

     

       415
       415
       +
             (* Clean up the result *)

     

       416
       416
       +
             let result = Buffer.contents buffer in

     

       417
       417
       +
             cleanup_markdown result

     

       418
       418
       +
           with _ -> html_str

     

       419
       419
       +
       

     

       420
       420
       +
         (** Convert HTML content to clean Markdown *)

     

       421
       421
       +
         let to_markdown html_str =

     

       422
       422
       +
           html_to_markdown html_str

     

       423
       423
       +
       end

     

       424
       424
       +
       

     

       425
       425
       +
       (** {1 Feed Sources} *)

     

       426
       426
       +
       

     

       427
       427
       +
       module Source = struct

     

       428
       428
       +
         type t = {

     

       429
       429
       +
           name : string;

     

       430
       430
       +
           url : string;

     

       431
       431
       +
         }

     

       432
       432
       +
       

     

       433
       433
       +
         let make ~name ~url = { name; url }

     

       434
       434
       +
       

     

       435
       435
       +
         let name t = t.name

     

       436
       436
       +
         let url t = t.url

     

       437
       437
       +
       

     

       438
       438
       +
         let jsont =

     

       439
       439
       +
           let make name url = { name; url } in

     

       440
       440
       +
           Jsont.Object.map ~kind:"Source" make

     

       441
       441
       +
           |> Jsont.Object.mem "name" Jsont.string ~enc:(fun s -> s.name)

     

       442
       442
       +
           |> Jsont.Object.mem "url" Jsont.string ~enc:(fun s -> s.url)

     

       443
       443
       +
           |> Jsont.Object.finish

     

       444
       444
       +
       end

     

       445
       445
       +
       

     

       446
       446
       +
       (** {1 HTTP Session Management} *)

     

       447
       447
       +
       

     

       448
       448
       +
       module Session = struct

     

       449
       449
       +
         type t = {

     

       450
       450
       +
           session : (float Eio.Time.clock_ty Eio.Resource.t,

     

       451
       451
       +
                      [`Generic | `Unix] Eio.Net.ty Eio.Resource.t) Requests.t;

     

       452
       452
       +
         }

     

       453
       453
       +
       

     

       454
       454
       +
         let init ~sw env =

     

       455
       455
       +
           Log.info (fun m -> m "Initializing River session");

     

       456
       456
       +
           let session = Requests.create ~sw

     

       457
       457
       +
             ~default_headers:(Requests.Headers.of_list [

     

       458
       458
       +
               ("User-Agent", "OCaml-River/1.0");

     

       459
       459
       +
             ])

     

       460
       460
       +
             ~follow_redirects:true

     

       461
       461
       +
             ~max_redirects:5

     

       462
       462
       +
             ~verify_tls:true

     

       463
       463
       +
             env

     

       464
       464
       +
           in

     

       465
       465
       +
           { session }

     

       466
       466
       +
       

     

       467
       467
       +
         let with_session env f =

     

       468
       468
       +
           Log.info (fun m -> m "Creating River session");

     

       469
       469
       +
           Eio.Switch.run @@ fun sw ->

     

       470
       470
       +
           let client = init ~sw env in

     

       471
       471
       +
           f client

     

       472
       472
       +
       

     

       473
       473
       +
         let get_requests_session t = t.session

     

       474
       474
       +
       end

     

       475
       475
       +
       

     

       476
       476
       +
       (** {1 Feeds and Posts} *)

     

       477
       477
       +
       

     

       478
       478
       +
       module Feed = struct

     

       479
       479
       +
         type feed_content =

     

       480
       480
       +
           | Atom of Syndic.Atom.feed

     

       481
       481
       +
           | Rss2 of Syndic.Rss2.channel

     

       482
       482
       +
           | Json of Jsonfeed.t

     

       483
       483
       +
       

     

       484
       484
       +
         type t = {

     

       485
       485
       +
           source : Source.t;

     

       486
       486
       +
           title : string;

     

       487
       487
       +
           content : feed_content;

     

       488
       488
       +
         }

     

       489
       489
       +
       

     

       490
       490
       +
         let string_of_feed = function

     

       491
       491
       +
           | Atom _ -> "Atom"

     

       492
       492
       +
           | Rss2 _ -> "Rss2"

     

       493
       493
       +
           | Json _ -> "JSONFeed"

     

       494
       494
       +
       

     

       495
       495
       +
         let classify_feed ~xmlbase (body : string) =

     

       496
       496
       +
           Log.debug (fun m -> m "Attempting to parse feed (%d bytes)" (String.length body));

     

       497
       497
       +
       

     

       498
       498
       +
           (* Quick check - does it look like JSON? *)

     

       499
       499
       +
           let looks_like_json =

     

       500
       500
       +
             String.length body > 0 &&

     

       501
       501
       +
             let first_char = String.get body 0 in

     

       502
       502
       +
             first_char = '{' || first_char = '['

     

       503
       503
       +
           in

     

       504
       504
       +
       

     

       505
       505
       +
           if looks_like_json then (

     

       506
       506
       +
             (* Try JSONFeed first *)

     

       507
       507
       +
             Log.debug (fun m -> m "Body looks like JSON, trying JSONFeed parser");

     

       508
       508
       +
             match Jsonfeed.of_string body with

     

       509
       509
       +
             | Ok jsonfeed ->

     

       510
       510
       +
                 Log.debug (fun m -> m "Successfully parsed as JSONFeed");

     

       511
       511
       +
                 Json jsonfeed

     

       512
       512
       +
             | Error err ->

     

       513
       513
       +
                 Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err));

     

       514
       514
       +
                 (* Fall through to XML parsing *)

     

       515
       515
       +
                 failwith "Not a valid JSONFeed"

     

       516
       516
       +
           ) else (

     

       517
       517
       +
             (* Try XML formats *)

     

       518
       518
       +
             try

     

       519
       519
       +
               let feed = Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in

     

       520
       520
       +
               Log.debug (fun m -> m "Successfully parsed as Atom feed");

     

       521
       521
       +
               feed

     

       522
       522
       +
             with

     

       523
       523
       +
             | Syndic.Atom.Error.Error (pos, msg) -> (

     

       524
       524
       +
               Log.debug (fun m -> m "Not an Atom feed: %s at position (%d, %d)"

     

       525
       525
       +
                 msg (fst pos) (snd pos));

     

       526
       526
       +
               try

     

       527
       527
       +
                 let feed = Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in

     

       528
       528
       +
                 Log.debug (fun m -> m "Successfully parsed as RSS2 feed");

     

       529
       529
       +
                 feed

     

       530
       530
       +
               with Syndic.Rss2.Error.Error (pos, msg) ->

     

       531
       531
       +
                 Log.err (fun m -> m "Failed to parse as RSS2: %s at position (%d, %d)"

     

       532
       532
       +
                   msg (fst pos) (snd pos));

     

       533
       533
       +
                 failwith "Neither Atom nor RSS2 feed")

     

       534
       534
       +
             | Not_found as e ->

     

       535
       535
       +
                 Log.err (fun m -> m "Not_found exception during Atom feed parsing");

     

       536
       536
       +
                 Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));

     

       537
       537
       +
                 raise e

     

       538
       538
       +
             | e ->

     

       539
       539
       +
                 Log.err (fun m -> m "Unexpected exception during feed parsing: %s"

     

       540
       540
       +
                   (Printexc.to_string e));

     

       541
       541
       +
                 Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));

     

       542
       542
       +
                 raise e

     

       543
       543
       +
           )

     

       544
       544
       +
       

     

       545
       545
       +
         let fetch session source =

     

       546
       546
       +
           Log.info (fun m -> m "Fetching feed: %s" (Source.name source));

     

       547
       547
       +
       

     

       548
       548
       +
           let xmlbase = Uri.of_string (Source.url source) in

     

       549
       549
       +
       

     

       550
       550
       +
           (* Use Requests_json_api.get_result for clean Result-based error handling *)

     

       551
       551
       +
           let requests_session = Session.get_requests_session session in

     

       552
       552
       +
           let response =

     

       553
       553
       +
             match Requests_json_api.get_result requests_session (Source.url source) with

     

       554
       554
       +
             | Ok body ->

     

       555
       555
       +
                 Log.info (fun m -> m "Successfully fetched %s (%d bytes)"

     

       556
       556
       +
                   (Source.url source) (String.length body));

     

       557
       557
       +
                 body

     

       558
       558
       +
             | Error (status, msg) ->

     

       559
       559
       +
                 Log.err (fun m -> m "Failed to fetch feed '%s': HTTP %d - %s"

     

       560
       560
       +
                   (Source.name source) status msg);

     

       561
       561
       +
                 failwith (Printf.sprintf "HTTP %d: %s" status msg)

     

       562
       562
       +
           in

     

       563
       563
       +
       

     

       564
       564
       +
           let content = classify_feed ~xmlbase response in

     

       565
       565
       +
           let title =

     

       566
       566
       +
             match content with

     

       567
       567
       +
             | Atom atom -> Text_extract.string_of_text_construct atom.Syndic.Atom.title

     

       568
       568
       +
             | Rss2 ch -> ch.Syndic.Rss2.title

     

       569
       569
       +
             | Json jsonfeed -> Jsonfeed.title jsonfeed

     

       570
       570
       +
           in

     

       571
       571
       +
       

     

       572
       572
       +
           Log.info (fun m -> m "Successfully fetched %s feed '%s' (title: '%s')"

     

       573
       573
       +
             (string_of_feed content) (Source.name source) title);

     

       574
       574
       +
       

     

       575
       575
       +
           { source; title; content }

     

       576
       576
       +
       

     

       577
       577
       +
         let source t = t.source

     

       578
       578
       +
       end

     

       579
       579
       +
       

     

       580
       580
       +
       (** {1 Posts} *)

     

       23
       581
        
       

     

       24
       24
       -
       (* Abstract session type *)

     

       25
       25
       -
       type session = Client.t

     

       582
       582
       +
       module Post = struct

     

       583
       583
       +
         type t = {

     

       584
       584
       +
           id : string;

     

       585
       585
       +
           title : string;

     

       586
       586
       +
           link : Uri.t option;

     

       587
       587
       +
           date : Syndic.Date.t option;

     

       588
       588
       +
           feed : Feed.t;

     

       589
       589
       +
           author : string;

     

       590
       590
       +
           email : string;

     

       591
       591
       +
           content : Soup.soup Soup.node;

     

       592
       592
       +
           mutable link_response : (string, string) result option;

     

       593
       593
       +
           tags : string list;

     

       594
       594
       +
           summary : string option;

     

       595
       595
       +
         }

     

       26
       596
        
       

     

       27
       27
       -
       type source = Feed.source = { name : string; url : string }

     

       28
       28
       -
       type feed = Feed.t

     

       29
       29
       -
       type post = Post.t

     

       597
       597
       +
         (** Generate a stable, unique ID from available data *)

     

       598
       598
       +
         let generate_id ?guid ?link ?title ?date ~feed_url () =

     

       599
       599
       +
           match guid with

     

       600
       600
       +
           | Some id when id <> "" ->

     

       601
       601
       +
               (* Use explicit ID/GUID if available *)

     

       602
       602
       +
               id

     

       603
       603
       +
           | _ ->

     

       604
       604
       +
               match link with

     

       605
       605
       +
               | Some uri when Uri.to_string uri <> "" ->

     

       606
       606
       +
                   (* Use permalink as ID (stable and unique) *)

     

       607
       607
       +
                   Uri.to_string uri

     

       608
       608
       +
               | _ ->

     

       609
       609
       +
                   (* Fallback: hash of feed_url + title + date *)

     

       610
       610
       +
                   let title_str = Option.value title ~default:"" in

     

       611
       611
       +
                   let date_str =

     

       612
       612
       +
                     match date with

     

       613
       613
       +
                     | Some d -> Ptime.to_rfc3339 d

     

       614
       614
       +
                     | None -> ""

     

       615
       615
       +
                   in

     

       616
       616
       +
                   let composite = Printf.sprintf "%s|%s|%s" feed_url title_str date_str in

     

       617
       617
       +
                   (* Use SHA256 for stable hashing *)

     

       618
       618
       +
                   Digest.string composite |> Digest.to_hex

     

       30
       619
        
       

     

       31
       31
       -
       (* Session management *)

     

       32
       32
       -
       let init ~sw env =

     

       33
       33
       -
         Log.info (fun m -> m "Initializing River session");

     

       34
       34
       -
         Internal_client.create ~sw env

     

       620
       620
       +
         let resolve_links_attr ~xmlbase attr el =

     

       621
       621
       +
           Soup.R.attribute attr el

     

       622
       622
       +
           |> Uri.of_string

     

       623
       623
       +
           |> Syndic.XML.resolve ~xmlbase

     

       624
       624
       +
           |> Uri.to_string

     

       625
       625
       +
           |> fun value -> Soup.set_attribute attr value el

     

       35
       626
        
       

     

       36
       36
       -
       let with_session env f =

     

       37
       37
       -
         Log.info (fun m -> m "Creating River session");

     

       38
       38
       -
         Internal_client.with_client env f

     

       627
       627
       +
         (* Things that posts should not contain *)

     

       628
       628
       +
         let undesired_tags = [ "style"; "script" ]

     

       629
       629
       +
         let undesired_attr = [ "id" ]

     

       39
       630
        
       

     

       40
       40
       -
       (* Feed operations *)

     

       41
       41
       -
       let fetch session source =

     

       42
       42
       -
         Log.info (fun m -> m "Fetching feed: %s" source.name);

     

       43
       43
       -
         Feed.fetch session source

     

       631
       631
       +
         let html_of_text ?xmlbase s =

     

       632
       632
       +
           let soup = Soup.parse s in

     

       633
       633
       +
           let ($$) = Soup.($$) in

     

       634
       634
       +
           soup $$ "a[href]" |> Soup.iter (resolve_links_attr ~xmlbase "href");

     

       635
       635
       +
           soup $$ "img[src]" |> Soup.iter (resolve_links_attr ~xmlbase "src");

     

       636
       636
       +
           undesired_tags |> List.iter (fun tag -> soup $$ tag |> Soup.iter Soup.delete);

     

       637
       637
       +
           soup $$ "*" |> Soup.iter (fun el ->

     

       638
       638
       +
             undesired_attr |> List.iter (fun attr -> Soup.delete_attribute attr el));

     

       639
       639
       +
           soup

     

       44
       640
        
       

     

       45
       45
       -
       let name feed = feed.Feed.name

     

       46
       46
       -
       let url feed = feed.Feed.url

     

       641
       641
       +
         (* Do not trust sites using XML for HTML content. Convert to string and parse

     

       642
       642
       +
            back. (Does not always fix bad HTML unfortunately.) *)

     

       643
       643
       +
         let html_of_syndic =

     

       644
       644
       +
           let ns_prefix _ = Some "" in

     

       645
       645
       +
           fun ?xmlbase h ->

     

       646
       646
       +
             html_of_text ?xmlbase

     

       647
       647
       +
               (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) h))

     

       47
       648
        
       

     

       48
       48
       -
       let posts feeds =

     

       49
       49
       -
         Log.info (fun m -> m "Aggregating posts from %d feed(s)" (List.length feeds));

     

       50
       50
       -
         let result = Post.get_posts feeds in

     

       51
       51
       -
         Log.info (fun m -> m "Aggregated %d posts total" (List.length result));

     

       52
       52
       -
         result

     

       649
       649
       +
         let string_of_option = function None -> "" | Some s -> s

     

       53
       650
        
       

     

       54
       54
       -
       let title post = post.Post.title

     

       55
       55
       -
       let link post = post.Post.link

     

       56
       56
       -
       let date post = post.Post.date

     

       57
       57
       -
       let feed post = post.Post.feed

     

       58
       58
       -
       let author post = post.Post.author

     

       59
       59
       -
       let email post = post.Post.email

     

       60
       60
       -
       let content post = Soup.to_string post.Post.content

     

       61
       61
       -
       let id post = post.Post.id

     

       62
       62
       -
       let tags post = post.Post.tags

     

       63
       63
       -
       let summary post = post.Post.summary

     

       651
       651
       +
         let post_compare p1 p2 =

     

       652
       652
       +
           (* Most recent posts first. Posts with no date are always last *)

     

       653
       653
       +
           match (p1.date, p2.date) with

     

       654
       654
       +
           | Some d1, Some d2 -> Syndic.Date.compare d2 d1

     

       655
       655
       +
           | None, Some _ -> 1

     

       656
       656
       +
           | Some _, None -> -1

     

       657
       657
       +
           | None, None -> 1

     

       64
       658
        
       

     

       65
       65
       -
       let meta_description _post =

     

       66
       66
       -
         (* TODO: This requires environment for HTTP access *)

     

       67
       67
       -
         Log.debug (fun m -> m "meta_description not implemented (requires environment)");

     

       68
       68
       -
         None

     

       659
       659
       +
         let rec remove n l =

     

       660
       660
       +
           if n <= 0 then l else match l with [] -> [] | _ :: tl -> remove (n - 1) tl

     

       69
       661
        
       

     

       70
       70
       -
       let seo_image _post =

     

       71
       71
       -
         (* TODO: This requires environment for HTTP access *)

     

       72
       72
       -
         Log.debug (fun m -> m "seo_image not implemented (requires environment)");

     

       73
       73
       -
         None

     

       662
       662
       +
         let rec take n = function

     

       663
       663
       +
           | [] -> []

     

       664
       664
       +
           | e :: tl -> if n > 0 then e :: take (n - 1) tl else []

     

       74
       665
        
       

     

       75
       75
       -
       let create_atom_entries posts =

     

       76
       76
       -
         Log.info (fun m -> m "Creating Atom entries for %d posts" (List.length posts));

     

       77
       77
       -
         Post.mk_entries posts

     

       666
       666
       +
         let post_of_atom ~(feed : Feed.t) (e : Syndic.Atom.entry) =

     

       667
       667
       +
           Log.debug (fun m -> m "Processing Atom entry: %s"

     

       668
       668
       +
             (Text_extract.string_of_text_construct e.title));

     

       78
       669
        
       

     

       79
       79
       -
       (* JSONFeed support *)

     

       80
       80
       -
       let create_jsonfeed_items posts =

     

       81
       81
       -
         Log.info (fun m -> m "Creating JSONFeed items for %d posts" (List.length posts));

     

       82
       82
       -
         Post.mk_jsonfeed_items posts

     

       670
       670
       +
           let link =

     

       671
       671
       +
             try

     

       672
       672
       +
               Some

     

       673
       673
       +
                 (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) e.links)

     

       674
       674
       +
                   .href

     

       675
       675
       +
             with Not_found -> (

     

       676
       676
       +
               Log.debug (fun m -> m "No alternate link found, trying fallback");

     

       677
       677
       +
               match e.links with

     

       678
       678
       +
               | l :: _ -> Some l.href

     

       679
       679
       +
               | [] -> (

     

       680
       680
       +
                   match Uri.scheme e.id with

     

       681
       681
       +
                   | Some "http" -> Some e.id

     

       682
       682
       +
                   | Some "https" -> Some e.id

     

       683
       683
       +
                   | _ -> None))

     

       684
       684
       +
           in

     

       685
       685
       +
           let date =

     

       686
       686
       +
             match e.published with Some _ -> e.published | None -> Some e.updated

     

       687
       687
       +
           in

     

       688
       688
       +
           let content =

     

       689
       689
       +
             match e.content with

     

       690
       690
       +
             | Some (Text s) -> html_of_text s

     

       691
       691
       +
             | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s

     

       692
       692
       +
             | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h

     

       693
       693
       +
             | Some (Mime _) | Some (Src _) | None -> (

     

       694
       694
       +
                 match e.summary with

     

       695
       695
       +
                 | Some (Text s) -> html_of_text s

     

       696
       696
       +
                 | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s

     

       697
       697
       +
                 | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h

     

       698
       698
       +
                 | None -> Soup.parse "")

     

       699
       699
       +
           in

     

       700
       700
       +
           let is_valid_author_name name =

     

       701
       701
       +
             (* Filter out empty strings and placeholder values like "Unknown" *)

     

       702
       702
       +
             let trimmed = String.trim name in

     

       703
       703
       +
             trimmed <> "" && trimmed <> "Unknown"

     

       704
       704
       +
           in

     

       705
       705
       +
           let author_name =

     

       706
       706
       +
             (* Fallback chain for author:

     

       707
       707
       +
                1. Entry author (if present, not empty, and not "Unknown")

     

       708
       708
       +
                2. Feed-level author (from Atom feed metadata)

     

       709
       709
       +
                3. Feed title (from Atom feed metadata)

     

       710
       710
       +
                4. Source name (manually entered feed name) *)

     

       711
       711
       +
             try

     

       712
       712
       +
               let author, _ = e.authors in

     

       713
       713
       +
               let trimmed = String.trim author.name in

     

       714
       714
       +
               if is_valid_author_name author.name then trimmed

     

       715
       715
       +
               else raise Not_found (* Try feed-level author *)

     

       716
       716
       +
             with Not_found -> (

     

       717
       717
       +
               match feed.content with

     

       718
       718
       +
               | Feed.Atom atom_feed -> (

     

       719
       719
       +
                   (* Try feed-level authors *)

     

       720
       720
       +
                   match atom_feed.Syndic.Atom.authors with

     

       721
       721
       +
                   | author :: _ when is_valid_author_name author.name ->

     

       722
       722
       +
                       String.trim author.name

     

       723
       723
       +
                   | _ ->

     

       724
       724
       +
                       (* Use feed title *)

     

       725
       725
       +
                       Text_extract.string_of_text_construct atom_feed.Syndic.Atom.title)

     

       726
       726
       +
               | Feed.Rss2 _ | Feed.Json _ ->

     

       727
       727
       +
                   (* For RSS2 and JSONFeed, use the source name *)

     

       728
       728
       +
                   Source.name feed.source)

     

       729
       729
       +
           in

     

       730
       730
       +
           (* Extract tags from Atom categories *)

     

       731
       731
       +
           let tags =

     

       732
       732
       +
             List.map (fun cat -> cat.Syndic.Atom.term) e.categories

     

       733
       733
       +
           in

     

       734
       734
       +
           (* Extract summary - convert from text_construct to string *)

     

       735
       735
       +
           let summary =

     

       736
       736
       +
             match e.summary with

     

       737
       737
       +
             | Some s -> Some (Text_extract.string_of_text_construct s)

     

       738
       738
       +
             | None -> None

     

       739
       739
       +
           in

     

       740
       740
       +
           (* Generate unique ID *)

     

       741
       741
       +
           let guid = Uri.to_string e.id in

     

       742
       742
       +
           let title_str = Text_extract.string_of_text_construct e.title in

     

       743
       743
       +
           let id =

     

       744
       744
       +
             generate_id ~guid ?link ~title:title_str ?date

     

       745
       745
       +
               ~feed_url:(Source.url feed.source) ()

     

       746
       746
       +
           in

     

       747
       747
       +
           {

     

       748
       748
       +
             id;

     

       749
       749
       +
             title = title_str;

     

       750
       750
       +
             link;

     

       751
       751
       +
             date;

     

       752
       752
       +
             feed;

     

       753
       753
       +
             author = author_name;

     

       754
       754
       +
             email = "";

     

       755
       755
       +
             content;

     

       756
       756
       +
             link_response = None;

     

       757
       757
       +
             tags;

     

       758
       758
       +
             summary;

     

       759
       759
       +
           }

     

       83
       760
        
       

     

       84
       84
       -
       let create_jsonfeed ~title ?home_page_url ?feed_url ?description ?icon ?favicon posts =

     

       85
       85
       -
         Log.info (fun m -> m "Creating JSONFeed with %d posts" (List.length posts));

     

       86
       86
       -
         let items = create_jsonfeed_items posts in

     

       87
       87
       -
         Jsonfeed.create ~title ?home_page_url ?feed_url ?description ?icon ?favicon ~items ()

     

       761
       761
       +
         let post_of_rss2 ~(feed : Feed.t) it =

     

       762
       762
       +
           let title, content =

     

       763
       763
       +
             match it.Syndic.Rss2.story with

     

       764
       764
       +
             | All (t, xmlbase, d) -> (

     

       765
       765
       +
                 ( t,

     

       766
       766
       +
                   match it.content with

     

       767
       767
       +
                   | _, "" -> html_of_text ?xmlbase d

     

       768
       768
       +
                   | xmlbase, c -> html_of_text ?xmlbase c ))

     

       769
       769
       +
             | Title t ->

     

       770
       770
       +
                 let xmlbase, c = it.content in

     

       771
       771
       +
                 (t, html_of_text ?xmlbase c)

     

       772
       772
       +
             | Description (xmlbase, d) -> (

     

       773
       773
       +
                 ( "",

     

       774
       774
       +
                   match it.content with

     

       775
       775
       +
                   | _, "" -> html_of_text ?xmlbase d

     

       776
       776
       +
                   | xmlbase, c -> html_of_text ?xmlbase c ))

     

       777
       777
       +
           in

     

       778
       778
       +
           (* Note: it.link is of type Uri.t option in Syndic *)

     

       779
       779
       +
           let link =

     

       780
       780
       +
             match (it.guid, it.link) with

     

       781
       781
       +
             | Some u, _ when u.permalink -> Some u.data

     

       782
       782
       +
             | _, Some _ -> it.link

     

       783
       783
       +
             | Some u, _ ->

     

       784
       784
       +
                 (* Sometimes the guid is indicated with isPermaLink="false" but is

     

       785
       785
       +
                    nonetheless the only URL we get (e.g. ocamlpro). *)

     

       786
       786
       +
                 Some u.data

     

       787
       787
       +
             | None, None -> None

     

       788
       788
       +
           in

     

       789
       789
       +
           (* Extract GUID string for ID generation *)

     

       790
       790
       +
           let guid_str =

     

       791
       791
       +
             match it.guid with

     

       792
       792
       +
             | Some u -> Some (Uri.to_string u.data)

     

       793
       793
       +
             | None -> None

     

       794
       794
       +
           in

     

       795
       795
       +
           (* RSS2 doesn't have a categories field exposed, use empty list *)

     

       796
       796
       +
           let tags = [] in

     

       797
       797
       +
           (* RSS2 doesn't have a separate summary field, so leave it empty *)

     

       798
       798
       +
           let summary = None in

     

       799
       799
       +
           (* Generate unique ID *)

     

       800
       800
       +
           let id =

     

       801
       801
       +
             generate_id ?guid:guid_str ?link ~title ?date:it.pubDate

     

       802
       802
       +
               ~feed_url:(Source.url feed.source) ()

     

       803
       803
       +
           in

     

       804
       804
       +
           {

     

       805
       805
       +
             id;

     

       806
       806
       +
             title;

     

       807
       807
       +
             link;

     

       808
       808
       +
             feed;

     

       809
       809
       +
             author = Source.name feed.source;

     

       810
       810
       +
             email = string_of_option it.author;

     

       811
       811
       +
             content;

     

       812
       812
       +
             date = it.pubDate;

     

       813
       813
       +
             link_response = None;

     

       814
       814
       +
             tags;

     

       815
       815
       +
             summary;

     

       816
       816
       +
           }

     

       88
       817
        
       

     

       89
       89
       -
       let jsonfeed_to_string ?(minify = false) jsonfeed =

     

       90
       90
       -
         match Jsonfeed.to_string ~minify jsonfeed with

     

       91
       91
       -
         | Ok s -> Ok s

     

       92
       92
       -
         | Error err -> Error (Jsont.Error.to_string err)

     

       818
       818
       +
         let post_of_jsonfeed_item ~(feed : Feed.t) (item : Jsonfeed.Item.t) =

     

       819
       819
       +
           Log.debug (fun m -> m "Processing JSONFeed item: %s"

     

       820
       820
       +
             (Option.value (Jsonfeed.Item.title item) ~default:"Untitled"));

     

       93
       821
        
       

     

       94
       94
       -
       type feed_content =

     

       95
       95
       -
         | Atom of Syndic.Atom.feed

     

       96
       96
       -
         | Rss2 of Syndic.Rss2.channel

     

       97
       97
       -
         | JSONFeed of Jsonfeed.t

     

       822
       822
       +
           (* Extract content - prefer HTML, fall back to text *)

     

       823
       823
       +
           let content =

     

       824
       824
       +
             match Jsonfeed.Item.content item with

     

       825
       825
       +
             | `Html html -> html_of_text html

     

       826
       826
       +
             | `Text text -> html_of_text text

     

       827
       827
       +
             | `Both (html, _text) -> html_of_text html

     

       828
       828
       +
           in

     

       98
       829
        
       

     

       99
       99
       -
       let feed_content feed =

     

       100
       100
       -
         match feed.Feed.content with

     

       101
       101
       -
         | Feed.Atom f -> Atom f

     

       102
       102
       -
         | Feed.Rss2 ch -> Rss2 ch

     

       103
       103
       -
         | Feed.Json jf -> JSONFeed jf
     

       830
       830
       +
           (* Extract author - use first author if multiple *)

     

       831
       831
       +
           let author_name, author_email =

     

       832
       832
       +
             match Jsonfeed.Item.authors item with

     

       833
       833
       +
             | Some (first :: _) ->

     

       834
       834
       +
                 let name = Jsonfeed.Author.name first |> Option.value ~default:"" in

     

       835
       835
       +
                 (* JSONFeed authors don't typically have email *)

     

       836
       836
       +
                 (name, "")

     

       837
       837
       +
             | _ ->

     

       838
       838
       +
                 (* Fall back to feed-level authors or feed title *)

     

       839
       839
       +
                 (match feed.content with

     

       840
       840
       +
                  | Feed.Json jsonfeed ->

     

       841
       841
       +
                      (match Jsonfeed.authors jsonfeed with

     

       842
       842
       +
                       | Some (first :: _) ->

     

       843
       843
       +
                           let name = Jsonfeed.Author.name first |> Option.value ~default:feed.title in

     

       844
       844
       +
                           (name, "")

     

       845
       845
       +
                       | _ -> (feed.title, ""))

     

       846
       846
       +
                  | _ -> (feed.title, ""))

     

       847
       847
       +
           in

     

       848
       848
       +
       

     

       849
       849
       +
           (* Link - use url field *)

     

       850
       850
       +
           let link =

     

       851
       851
       +
             Jsonfeed.Item.url item

     

       852
       852
       +
             |> Option.map Uri.of_string

     

       853
       853
       +
           in

     

       854
       854
       +
       

     

       855
       855
       +
           (* Date *)

     

       856
       856
       +
           let date = Jsonfeed.Item.date_published item in

     

       857
       857
       +
       

     

       858
       858
       +
           (* Summary *)

     

       859
       859
       +
           let summary = Jsonfeed.Item.summary item in

     

       860
       860
       +
       

     

       861
       861
       +
           (* Tags *)

     

       862
       862
       +
           let tags =

     

       863
       863
       +
             Jsonfeed.Item.tags item

     

       864
       864
       +
             |> Option.value ~default:[]

     

       865
       865
       +
           in

     

       866
       866
       +
       

     

       867
       867
       +
           (* Generate unique ID - JSONFeed items always have an id field (required) *)

     

       868
       868
       +
           let guid = Jsonfeed.Item.id item in

     

       869
       869
       +
           let title_str = Jsonfeed.Item.title item |> Option.value ~default:"Untitled" in

     

       870
       870
       +
           let id =

     

       871
       871
       +
             generate_id ~guid ?link ~title:title_str ?date

     

       872
       872
       +
               ~feed_url:(Source.url feed.source) ()

     

       873
       873
       +
           in

     

       874
       874
       +
       

     

       875
       875
       +
           {

     

       876
       876
       +
             id;

     

       877
       877
       +
             title = title_str;

     

       878
       878
       +
             link;

     

       879
       879
       +
             date;

     

       880
       880
       +
             feed;

     

       881
       881
       +
             author = author_name;

     

       882
       882
       +
             email = author_email;

     

       883
       883
       +
             content;

     

       884
       884
       +
             link_response = None;

     

       885
       885
       +
             tags;

     

       886
       886
       +
             summary;

     

       887
       887
       +
           }

     

       888
       888
       +
       

     

       889
       889
       +
         let posts_of_feed c =

     

       890
       890
       +
           match c.Feed.content with

     

       891
       891
       +
           | Feed.Atom f ->

     

       892
       892
       +
               let posts = List.map (post_of_atom ~feed:c) f.Syndic.Atom.entries in

     

       893
       893
       +
               Log.debug (fun m -> m "Extracted %d posts from Atom feed '%s'"

     

       894
       894
       +
                 (List.length posts) (Source.name c.source));

     

       895
       895
       +
               posts

     

       896
       896
       +
           | Feed.Rss2 ch ->

     

       897
       897
       +
               let posts = List.map (post_of_rss2 ~feed:c) ch.Syndic.Rss2.items in

     

       898
       898
       +
               Log.debug (fun m -> m "Extracted %d posts from RSS2 feed '%s'"

     

       899
       899
       +
                 (List.length posts) (Source.name c.source));

     

       900
       900
       +
               posts

     

       901
       901
       +
           | Feed.Json jsonfeed ->

     

       902
       902
       +
               let items = Jsonfeed.items jsonfeed in

     

       903
       903
       +
               let posts = List.map (post_of_jsonfeed_item ~feed:c) items in

     

       904
       904
       +
               Log.debug (fun m -> m "Extracted %d posts from JSONFeed '%s'"

     

       905
       905
       +
                 (List.length posts) (Source.name c.source));

     

       906
       906
       +
               posts

     

       907
       907
       +
       

     

       908
       908
       +
         let get_posts ?n ?(ofs = 0) planet_feeds =

     

       909
       909
       +
           Log.info (fun m -> m "Processing %d feeds for posts" (List.length planet_feeds));

     

       910
       910
       +
       

     

       911
       911
       +
           let posts = List.concat @@ List.map posts_of_feed planet_feeds in

     

       912
       912
       +
           Log.debug (fun m -> m "Total posts collected: %d" (List.length posts));

     

       913
       913
       +
       

     

       914
       914
       +
           let posts = List.sort post_compare posts in

     

       915
       915
       +
           Log.debug (fun m -> m "Posts sorted by date (most recent first)");

     

       916
       916
       +
       

     

       917
       917
       +
           let posts = remove ofs posts in

     

       918
       918
       +
           let result =

     

       919
       919
       +
             match n with

     

       920
       920
       +
             | None ->

     

       921
       921
       +
                 Log.debug (fun m -> m "Returning all %d posts (offset=%d)"

     

       922
       922
       +
                   (List.length posts) ofs);

     

       923
       923
       +
                 posts

     

       924
       924
       +
             | Some n ->

     

       925
       925
       +
                 let limited = take n posts in

     

       926
       926
       +
                 Log.debug (fun m -> m "Returning %d posts (requested=%d, offset=%d)"

     

       927
       927
       +
                   (List.length limited) n ofs);

     

       928
       928
       +
                 limited

     

       929
       929
       +
           in

     

       930
       930
       +
           result

     

       931
       931
       +
       

     

       932
       932
       +
         let of_feeds feeds = get_posts feeds

     

       933
       933
       +
       

     

       934
       934
       +
         let feed t = t.feed

     

       935
       935
       +
         let title t = t.title

     

       936
       936
       +
         let link t = t.link

     

       937
       937
       +
         let date t = t.date

     

       938
       938
       +
         let author t = t.author

     

       939
       939
       +
         let email t = t.email

     

       940
       940
       +
         let content t = Soup.to_string t.content

     

       941
       941
       +
         let id t = t.id

     

       942
       942
       +
         let tags t = t.tags

     

       943
       943
       +
         let summary t = t.summary

     

       944
       944
       +
       

     

       945
       945
       +
         let meta_description _t =

     

       946
       946
       +
           (* TODO: This requires environment for HTTP access *)

     

       947
       947
       +
           Log.debug (fun m -> m "meta_description not implemented (requires environment)");

     

       948
       948
       +
           None

     

       949
       949
       +
       

     

       950
       950
       +
         let seo_image _t =

     

       951
       951
       +
           (* TODO: This requires environment for HTTP access *)

     

       952
       952
       +
           Log.debug (fun m -> m "seo_image not implemented (requires environment)");

     

       953
       953
       +
           None

     

       954
       954
       +
       end

     

       955
       955
       +
       

     

       956
       956
       +
       (** {1 Format Conversion and Export} *)

     

       957
       957
       +
       

     

       958
       958
       +
       module Format = struct

     

       959
       959
       +
         module Atom = struct

     

       960
       960
       +
           let entry_of_post post =

     

       961
       961
       +
             let content = Syndic.Atom.Html (None, Post.content post) in

     

       962
       962
       +
             let contributors =

     

       963
       963
       +
               [ Syndic.Atom.author ~uri:(Uri.of_string (Source.url (Feed.source (Post.feed post))))

     

       964
       964
       +
                   (Source.name (Feed.source (Post.feed post))) ]

     

       965
       965
       +
             in

     

       966
       966
       +
             let links =

     

       967
       967
       +
               match Post.link post with

     

       968
       968
       +
               | Some l -> [ Syndic.Atom.link ~rel:Syndic.Atom.Alternate l ]

     

       969
       969
       +
               | None -> []

     

       970
       970
       +
             in

     

       971
       971
       +
             let id =

     

       972
       972
       +
               match Post.link post with

     

       973
       973
       +
               | Some l -> l

     

       974
       974
       +
               | None -> Uri.of_string (Digest.to_hex (Digest.string (Post.title post)))

     

       975
       975
       +
             in

     

       976
       976
       +
             let authors = (Syndic.Atom.author ~email:(Post.email post) (Post.author post), []) in

     

       977
       977
       +
             let title : Syndic.Atom.text_construct = Syndic.Atom.Text (Post.title post) in

     

       978
       978
       +
             let updated =

     

       979
       979
       +
               match Post.date post with

     

       980
       980
       +
               (* Atom entry requires a date but RSS2 does not. So if a date

     

       981
       981
       +
                * is not available, just capture the current date. *)

     

       982
       982
       +
               | None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get

     

       983
       983
       +
               | Some d -> d

     

       984
       984
       +
             in

     

       985
       985
       +
             Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated

     

       986
       986
       +
               ()

     

       987
       987
       +
       

     

       988
       988
       +
           let entries_of_posts posts = List.map entry_of_post posts

     

       989
       989
       +
       

     

       990
       990
       +
           let feed_of_entries ~title ?id ?(authors = []) entries =

     

       991
       991
       +
             let feed_id = match id with

     

       992
       992
       +
               | Some i -> Uri.of_string i

     

       993
       993
       +
               | None -> Uri.of_string "urn:river:merged"

     

       994
       994
       +
             in

     

       995
       995
       +
             let feed_authors = List.map (fun (name, email) ->

     

       996
       996
       +
               match email with

     

       997
       997
       +
               | Some e -> Syndic.Atom.author ~email:e name

     

       998
       998
       +
               | None -> Syndic.Atom.author name

     

       999
       999
       +
             ) authors in

     

       1000
       1000
       +
             {

     

       1001
       1001
       +
               Syndic.Atom.id = feed_id;

     

       1002
       1002
       +
               title = Syndic.Atom.Text title;

     

       1003
       1003
       +
               updated = Ptime.of_float_s (Unix.time ()) |> Option.get;

     

       1004
       1004
       +
               entries;

     

       1005
       1005
       +
               authors = feed_authors;

     

       1006
       1006
       +
               categories = [];

     

       1007
       1007
       +
               contributors = [];

     

       1008
       1008
       +
               generator = Some {

     

       1009
       1009
       +
                 Syndic.Atom.version = Some "1.0";

     

       1010
       1010
       +
                 uri = None;

     

       1011
       1011
       +
                 content = "River Feed Aggregator";

     

       1012
       1012
       +
               };

     

       1013
       1013
       +
               icon = None;

     

       1014
       1014
       +
               links = [];

     

       1015
       1015
       +
               logo = None;

     

       1016
       1016
       +
               rights = None;

     

       1017
       1017
       +
               subtitle = None;

     

       1018
       1018
       +
             }

     

       1019
       1019
       +
       

     

       1020
       1020
       +
           let to_string feed =

     

       1021
       1021
       +
             let output = Buffer.create 4096 in

     

       1022
       1022
       +
             Syndic.Atom.output feed (`Buffer output);

     

       1023
       1023
       +
             Buffer.contents output

     

       1024
       1024
       +
         end

     

       1025
       1025
       +
       

     

       1026
       1026
       +
         module Rss2 = struct

     

       1027
       1027
       +
           let of_feed feed =

     

       1028
       1028
       +
             match feed.Feed.content with

     

       1029
       1029
       +
             | Feed.Rss2 ch -> Some ch

     

       1030
       1030
       +
             | _ -> None

     

       1031
       1031
       +
         end

     

       1032
       1032
       +
       

     

       1033
       1033
       +
         module Jsonfeed = struct

     

       1034
       1034
       +
           let item_of_post post =

     

       1035
       1035
       +
             (* Convert HTML content back to string *)

     

       1036
       1036
       +
             let html = Post.content post in

     

       1037
       1037
       +
             let content = `Html html in

     

       1038
       1038
       +
       

     

       1039
       1039
       +
             (* Create author *)

     

       1040
       1040
       +
             let authors =

     

       1041
       1041
       +
               if Post.author post <> "" then

     

       1042
       1042
       +
                 let author = Jsonfeed.Author.create ~name:(Post.author post) () in

     

       1043
       1043
       +
                 Some [author]

     

       1044
       1044
       +
               else

     

       1045
       1045
       +
                 None

     

       1046
       1046
       +
             in

     

       1047
       1047
       +
       

     

       1048
       1048
       +
             (* Create item *)

     

       1049
       1049
       +
             Jsonfeed.Item.create

     

       1050
       1050
       +
               ~id:(Post.id post)

     

       1051
       1051
       +
               ~content

     

       1052
       1052
       +
               ?url:(Option.map Uri.to_string (Post.link post))

     

       1053
       1053
       +
               ~title:(Post.title post)

     

       1054
       1054
       +
               ?summary:(Post.summary post)

     

       1055
       1055
       +
               ?date_published:(Post.date post)

     

       1056
       1056
       +
               ?authors

     

       1057
       1057
       +
               ~tags:(Post.tags post)

     

       1058
       1058
       +
               ()

     

       1059
       1059
       +
       

     

       1060
       1060
       +
           let items_of_posts posts = List.map item_of_post posts

     

       1061
       1061
       +
       

     

       1062
       1062
       +
           let feed_of_items ~title ?home_page_url ?feed_url ?description ?icon ?favicon items =

     

       1063
       1063
       +
             Jsonfeed.create ~title ?home_page_url ?feed_url ?description ?icon ?favicon ~items ()

     

       1064
       1064
       +
       

     

       1065
       1065
       +
           let feed_of_posts ~title ?home_page_url ?feed_url ?description ?icon ?favicon posts =

     

       1066
       1066
       +
             let items = items_of_posts posts in

     

       1067
       1067
       +
             feed_of_items ~title ?home_page_url ?feed_url ?description ?icon ?favicon items

     

       1068
       1068
       +
       

     

       1069
       1069
       +
           let to_string ?(minify = false) jsonfeed =

     

       1070
       1070
       +
             match Jsonfeed.to_string ~minify jsonfeed with

     

       1071
       1071
       +
             | Ok s -> Ok s

     

       1072
       1072
       +
             | Error err -> Error (Jsont.Error.to_string err)

     

       1073
       1073
       +
       

     

       1074
       1074
       +
           let of_feed feed =

     

       1075
       1075
       +
             match feed.Feed.content with

     

       1076
       1076
       +
             | Feed.Json jf -> Some jf

     

       1077
       1077
       +
             | _ -> None

     

       1078
       1078
       +
         end

     

       1079
       1079
       +
       end

     

       1080
       1080
       +
       

     

       1081
       1081
       +
       (** {1 User Management} *)

     

       1082
       1082
       +
       

     

       1083
       1083
       +
       module User = struct

     

       1084
       1084
       +
         type t = {

     

       1085
       1085
       +
           username : string;

     

       1086
       1086
       +
           fullname : string;

     

       1087
       1087
       +
           email : string option;

     

       1088
       1088
       +
           feeds : Source.t list;

     

       1089
       1089
       +
           last_synced : string option;

     

       1090
       1090
       +
         }

     

       1091
       1091
       +
       

     

       1092
       1092
       +
         let make ~username ~fullname ?email ?(feeds = []) ?last_synced () =

     

       1093
       1093
       +
           { username; fullname; email; feeds; last_synced }

     

       1094
       1094
       +
       

     

       1095
       1095
       +
         let username t = t.username

     

       1096
       1096
       +
         let fullname t = t.fullname

     

       1097
       1097
       +
         let email t = t.email

     

       1098
       1098
       +
         let feeds t = t.feeds

     

       1099
       1099
       +
         let last_synced t = t.last_synced

     

       1100
       1100
       +
       

     

       1101
       1101
       +
         let add_feed t source =

     

       1102
       1102
       +
           { t with feeds = source :: t.feeds }

     

       1103
       1103
       +
       

     

       1104
       1104
       +
         let remove_feed t ~url =

     

       1105
       1105
       +
           let feeds = List.filter (fun s -> Source.url s <> url) t.feeds in

     

       1106
       1106
       +
           { t with feeds }

     

       1107
       1107
       +
       

     

       1108
       1108
       +
         let set_last_synced t timestamp =

     

       1109
       1109
       +
           { t with last_synced = Some timestamp }

     

       1110
       1110
       +
       

     

       1111
       1111
       +
         let jsont =

     

       1112
       1112
       +
           let make username fullname email feeds last_synced =

     

       1113
       1113
       +
             { username; fullname; email; feeds; last_synced }

     

       1114
       1114
       +
           in

     

       1115
       1115
       +
           Jsont.Object.map ~kind:"User" make

     

       1116
       1116
       +
           |> Jsont.Object.mem "username" Jsont.string ~enc:(fun u -> u.username)

     

       1117
       1117
       +
           |> Jsont.Object.mem "fullname" Jsont.string ~enc:(fun u -> u.fullname)

     

       1118
       1118
       +
           |> Jsont.Object.opt_mem "email" Jsont.string ~enc:(fun u -> u.email)

     

       1119
       1119
       +
           |> Jsont.Object.mem "feeds" (Jsont.list Source.jsont) ~enc:(fun u -> u.feeds)

     

       1120
       1120
       +
           |> Jsont.Object.opt_mem "last_synced" Jsont.string ~enc:(fun u -> u.last_synced)

     

       1121
       1121
       +
           |> Jsont.Object.finish

     

       1122
       1122
       +
       end

     

       1123
       1123
       +
       

     

       1124
       1124
       +
       (** {1 Feed Quality Analysis} *)

     

       1125
       1125
       +
       

     

       1126
       1126
       +
       module Quality = struct

     

       1127
       1127
       +
         type t = {

     

       1128
       1128
       +
           total_entries : int;

     

       1129
       1129
       +
           entries_with_summary : int;

     

       1130
       1130
       +
           entries_with_author : int;

     

       1131
       1131
       +
           entries_with_date : int;

     

       1132
       1132
       +
           entries_with_content : int;

     

       1133
       1133
       +
           entries_with_tags : int;

     

       1134
       1134
       +
           avg_content_length : float;

     

       1135
       1135
       +
           min_content_length : int;

     

       1136
       1136
       +
           max_content_length : int;

     

       1137
       1137
       +
           posting_frequency_days : float option;

     

       1138
       1138
       +
           quality_score : float;

     

       1139
       1139
       +
         }

     

       1140
       1140
       +
       

     

       1141
       1141
       +
         let make ~total_entries ~entries_with_summary ~entries_with_author

     

       1142
       1142
       +
             ~entries_with_date ~entries_with_content ~entries_with_tags

     

       1143
       1143
       +
             ~avg_content_length ~min_content_length ~max_content_length

     

       1144
       1144
       +
             ~posting_frequency_days ~quality_score =

     

       1145
       1145
       +
           {

     

       1146
       1146
       +
             total_entries;

     

       1147
       1147
       +
             entries_with_summary;

     

       1148
       1148
       +
             entries_with_author;

     

       1149
       1149
       +
             entries_with_date;

     

       1150
       1150
       +
             entries_with_content;

     

       1151
       1151
       +
             entries_with_tags;

     

       1152
       1152
       +
             avg_content_length;

     

       1153
       1153
       +
             min_content_length;

     

       1154
       1154
       +
             max_content_length;

     

       1155
       1155
       +
             posting_frequency_days;

     

       1156
       1156
       +
             quality_score;

     

       1157
       1157
       +
           }

     

       1158
       1158
       +
       

     

       1159
       1159
       +
         let total_entries t = t.total_entries

     

       1160
       1160
       +
         let entries_with_summary t = t.entries_with_summary

     

       1161
       1161
       +
         let entries_with_author t = t.entries_with_author

     

       1162
       1162
       +
         let entries_with_date t = t.entries_with_date

     

       1163
       1163
       +
         let entries_with_content t = t.entries_with_content

     

       1164
       1164
       +
         let entries_with_tags t = t.entries_with_tags

     

       1165
       1165
       +
         let avg_content_length t = t.avg_content_length

     

       1166
       1166
       +
         let min_content_length t = t.min_content_length

     

       1167
       1167
       +
         let max_content_length t = t.max_content_length

     

       1168
       1168
       +
         let posting_frequency_days t = t.posting_frequency_days

     

       1169
       1169
       +
         let quality_score t = t.quality_score

     

       1170
       1170
       +
       

     

       1171
       1171
       +
         (** Get content length from an Atom entry *)

     

       1172
       1172
       +
         let get_content_length (entry : Syndic.Atom.entry) =

     

       1173
       1173
       +
           match entry.content with

     

       1174
       1174
       +
           | Some (Syndic.Atom.Text s) -> String.length s

     

       1175
       1175
       +
           | Some (Syndic.Atom.Html (_, s)) -> String.length s

     

       1176
       1176
       +
           | Some (Syndic.Atom.Xhtml (_, _)) -> 0 (* Could calculate but complex *)

     

       1177
       1177
       +
           | Some (Syndic.Atom.Mime _) -> 0

     

       1178
       1178
       +
           | Some (Syndic.Atom.Src _) -> 0

     

       1179
       1179
       +
           | None -> (

     

       1180
       1180
       +
               match entry.summary with

     

       1181
       1181
       +
               | Some (Syndic.Atom.Text s) -> String.length s

     

       1182
       1182
       +
               | Some (Syndic.Atom.Html (_, s)) -> String.length s

     

       1183
       1183
       +
               | Some (Syndic.Atom.Xhtml (_, _)) -> 0

     

       1184
       1184
       +
               | None -> 0)

     

       1185
       1185
       +
       

     

       1186
       1186
       +
         (** Check if entry has non-empty summary *)

     

       1187
       1187
       +
         let has_summary (entry : Syndic.Atom.entry) =

     

       1188
       1188
       +
           match entry.summary with

     

       1189
       1189
       +
           | Some (Syndic.Atom.Text s) when String.trim s <> "" -> true

     

       1190
       1190
       +
           | Some (Syndic.Atom.Html (_, s)) when String.trim s <> "" -> true

     

       1191
       1191
       +
           | Some (Syndic.Atom.Xhtml (_, _)) -> true

     

       1192
       1192
       +
           | _ -> false

     

       1193
       1193
       +
       

     

       1194
       1194
       +
         (** Check if entry has author *)

     

       1195
       1195
       +
         let has_author (entry : Syndic.Atom.entry) =

     

       1196
       1196
       +
           let (author, _) = entry.authors in

     

       1197
       1197
       +
           String.trim author.name <> ""

     

       1198
       1198
       +
       

     

       1199
       1199
       +
         (** Check if entry has content *)

     

       1200
       1200
       +
         let has_content (entry : Syndic.Atom.entry) =

     

       1201
       1201
       +
           get_content_length entry > 0

     

       1202
       1202
       +
       

     

       1203
       1203
       +
         (** Check if entry has tags/categories *)

     

       1204
       1204
       +
         let has_tags (entry : Syndic.Atom.entry) =

     

       1205
       1205
       +
           entry.categories <> []

     

       1206
       1206
       +
       

     

       1207
       1207
       +
         (** Calculate quality score from metrics *)

     

       1208
       1208
       +
         let calculate_quality_score t =

     

       1209
       1209
       +
           let total = float_of_int t.total_entries in

     

       1210
       1210
       +
           if total = 0.0 then 0.0

     

       1211
       1211
       +
           else

     

       1212
       1212
       +
             let summary_pct = float_of_int t.entries_with_summary /. total *. 100.0 in

     

       1213
       1213
       +
             let author_pct = float_of_int t.entries_with_author /. total *. 100.0 in

     

       1214
       1214
       +
             let date_pct = float_of_int t.entries_with_date /. total *. 100.0 in

     

       1215
       1215
       +
             let content_pct = float_of_int t.entries_with_content /. total *. 100.0 in

     

       1216
       1216
       +
             let tags_pct = float_of_int t.entries_with_tags /. total *. 100.0 in

     

       1217
       1217
       +
       

     

       1218
       1218
       +
             (* Weighted average: content and dates are most important *)

     

       1219
       1219
       +
             let score =

     

       1220
       1220
       +
               (content_pct *. 0.30) +.

     

       1221
       1221
       +
               (date_pct *. 0.25) +.

     

       1222
       1222
       +
               (author_pct *. 0.20) +.

     

       1223
       1223
       +
               (summary_pct *. 0.15) +.

     

       1224
       1224
       +
               (tags_pct *. 0.10)

     

       1225
       1225
       +
             in

     

       1226
       1226
       +
             score

     

       1227
       1227
       +
       

     

       1228
       1228
       +
         let analyze entries =

     

       1229
       1229
       +
           if entries = [] then

     

       1230
       1230
       +
             failwith "No entries to analyze"

     

       1231
       1231
       +
           else

     

       1232
       1232
       +
             let total_entries = List.length entries in

     

       1233
       1233
       +
       

     

       1234
       1234
       +
             let entries_with_summary = ref 0 in

     

       1235
       1235
       +
             let entries_with_author = ref 0 in

     

       1236
       1236
       +
             let entries_with_date = ref total_entries in  (* All Atom entries have updated *)

     

       1237
       1237
       +
             let entries_with_content = ref 0 in

     

       1238
       1238
       +
             let entries_with_tags = ref 0 in

     

       1239
       1239
       +
             let content_lengths = ref [] in

     

       1240
       1240
       +
             let dates = ref [] in

     

       1241
       1241
       +
       

     

       1242
       1242
       +
             List.iter (fun (entry : Syndic.Atom.entry) ->

     

       1243
       1243
       +
               if has_summary entry then incr entries_with_summary;

     

       1244
       1244
       +
               if has_author entry then incr entries_with_author;

     

       1245
       1245
       +
               if has_content entry then begin

     

       1246
       1246
       +
                 incr entries_with_content;

     

       1247
       1247
       +
                 content_lengths := get_content_length entry :: !content_lengths

     

       1248
       1248
       +
               end;

     

       1249
       1249
       +
               if has_tags entry then incr entries_with_tags;

     

       1250
       1250
       +
               dates := entry.updated :: !dates

     

       1251
       1251
       +
             ) entries;

     

       1252
       1252
       +
       

     

       1253
       1253
       +
             (* Calculate content statistics *)

     

       1254
       1254
       +
             let avg_content_length, min_content_length, max_content_length =

     

       1255
       1255
       +
               if !content_lengths = [] then

     

       1256
       1256
       +
                 (0.0, 0, 0)

     

       1257
       1257
       +
               else

     

       1258
       1258
       +
                 let sorted = List.sort compare !content_lengths in

     

       1259
       1259
       +
                 let sum = List.fold_left (+) 0 sorted in

     

       1260
       1260
       +
                 let avg = float_of_int sum /. float_of_int (List.length sorted) in

     

       1261
       1261
       +
                 let min_len = List.hd sorted in

     

       1262
       1262
       +
                 let max_len = List.hd (List.rev sorted) in

     

       1263
       1263
       +
                 (avg, min_len, max_len)

     

       1264
       1264
       +
             in

     

       1265
       1265
       +
       

     

       1266
       1266
       +
             (* Calculate posting frequency *)

     

       1267
       1267
       +
             let posting_frequency_days =

     

       1268
       1268
       +
               if List.length !dates < 2 then

     

       1269
       1269
       +
                 None

     

       1270
       1270
       +
               else

     

       1271
       1271
       +
                 try

     

       1272
       1272
       +
                   let timestamps = List.map Ptime.to_float_s !dates in

     

       1273
       1273
       +
                   let sorted_timestamps = List.sort compare timestamps in

     

       1274
       1274
       +
                   let first = List.hd sorted_timestamps in

     

       1275
       1275
       +
                   let last = List.hd (List.rev sorted_timestamps) in

     

       1276
       1276
       +
                   let total_days = (last -. first) /. 86400.0 in

     

       1277
       1277
       +
                   let num_intervals = float_of_int (List.length sorted_timestamps - 1) in

     

       1278
       1278
       +
                   Some (total_days /. num_intervals)

     

       1279
       1279
       +
                 with _ -> None

     

       1280
       1280
       +
             in

     

       1281
       1281
       +
       

     

       1282
       1282
       +
             (* Create metrics record (without quality_score first) *)

     

       1283
       1283
       +
             let metrics = {

     

       1284
       1284
       +
               total_entries;

     

       1285
       1285
       +
               entries_with_summary = !entries_with_summary;

     

       1286
       1286
       +
               entries_with_author = !entries_with_author;

     

       1287
       1287
       +
               entries_with_date = !entries_with_date;

     

       1288
       1288
       +
               entries_with_content = !entries_with_content;

     

       1289
       1289
       +
               entries_with_tags = !entries_with_tags;

     

       1290
       1290
       +
               avg_content_length;

     

       1291
       1291
       +
               min_content_length;

     

       1292
       1292
       +
               max_content_length;

     

       1293
       1293
       +
               posting_frequency_days;

     

       1294
       1294
       +
               quality_score = 0.0;  (* Placeholder *)

     

       1295
       1295
       +
             } in

     

       1296
       1296
       +
       

     

       1297
       1297
       +
             (* Calculate quality score *)

     

       1298
       1298
       +
             let quality_score = calculate_quality_score metrics in

     

       1299
       1299
       +
             { metrics with quality_score }

     

       1300
       1300
       +
       end

     

       1301
       1301
       +
       

     

       1302
       1302
       +
       (** {1 State Management} *)

     

       1303
       1303
       +
       

     

       1304
       1304
       +
       module State = struct

     

       1305
       1305
       +
         type t = {

     

       1306
       1306
       +
           xdg : Xdge.t;

     

       1307
       1307
       +
         }

     

       1308
       1308
       +
       

     

       1309
       1309
       +
         module Paths = struct

     

       1310
       1310
       +
           (** Get the users directory path *)

     

       1311
       1311
       +
           let users_dir state = Eio.Path.(Xdge.state_dir state.xdg / "users")

     

       1312
       1312
       +
       

     

       1313
       1313
       +
           (** Get the feeds directory path *)

     

       1314
       1314
       +
           let feeds_dir state = Eio.Path.(Xdge.state_dir state.xdg / "feeds")

     

       1315
       1315
       +
       

     

       1316
       1316
       +
           (** Get the user feeds directory path *)

     

       1317
       1317
       +
           let user_feeds_dir state = Eio.Path.(feeds_dir state / "user")

     

       1318
       1318
       +
       

     

       1319
       1319
       +
           (** Get the path to a user's JSON file *)

     

       1320
       1320
       +
           let user_file state username =

     

       1321
       1321
       +
             Eio.Path.(users_dir state / (username ^ ".json"))

     

       1322
       1322
       +
       

     

       1323
       1323
       +
           (** Get the path to a user's Atom feed file *)

     

       1324
       1324
       +
           let user_feed_file state username =

     

       1325
       1325
       +
             Eio.Path.(user_feeds_dir state / (username ^ ".xml"))

     

       1326
       1326
       +
       

     

       1327
       1327
       +
           (** Ensure all necessary directories exist *)

     

       1328
       1328
       +
           let ensure_directories state =

     

       1329
       1329
       +
             let dirs = [

     

       1330
       1330
       +
               users_dir state;

     

       1331
       1331
       +
               feeds_dir state;

     

       1332
       1332
       +
               user_feeds_dir state;

     

       1333
       1333
       +
             ] in

     

       1334
       1334
       +
             List.iter (fun dir ->

     

       1335
       1335
       +
               try Eio.Path.mkdir ~perm:0o755 dir

     

       1336
       1336
       +
               with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ()

     

       1337
       1337
       +
             ) dirs

     

       1338
       1338
       +
         end

     

       1339
       1339
       +
       

     

       1340
       1340
       +
         module Json = struct

     

       1341
       1341
       +
           (** Decode a user from JSON string *)

     

       1342
       1342
       +
           let user_of_string s =

     

       1343
       1343
       +
             match Jsont_bytesrw.decode_string' User.jsont s with

     

       1344
       1344
       +
             | Ok user -> Some user

     

       1345
       1345
       +
             | Error err ->

     

       1346
       1346
       +
                 Log.err (fun m -> m "Failed to parse user JSON: %s" (Jsont.Error.to_string err));

     

       1347
       1347
       +
                 None

     

       1348
       1348
       +
       

     

       1349
       1349
       +
           (** Encode a user to JSON string *)

     

       1350
       1350
       +
           let user_to_string user =

     

       1351
       1351
       +
             match Jsont_bytesrw.encode_string' ~format:Jsont.Indent User.jsont user with

     

       1352
       1352
       +
             | Ok s -> s

     

       1353
       1353
       +
             | Error err -> failwith ("Failed to encode user: " ^ Jsont.Error.to_string err)

     

       1354
       1354
       +
         end

     

       1355
       1355
       +
       

     

       1356
       1356
       +
         module Storage = struct

     

       1357
       1357
       +
           (** Load a user from disk *)

     

       1358
       1358
       +
           let load_user state username =

     

       1359
       1359
       +
             let file = Paths.user_file state username in

     

       1360
       1360
       +
             try

     

       1361
       1361
       +
               let content = Eio.Path.load file in

     

       1362
       1362
       +
               Json.user_of_string content

     

       1363
       1363
       +
             with

     

       1364
       1364
       +
             | Eio.Io (Eio.Fs.E (Not_found _), _) -> None

     

       1365
       1365
       +
             | e ->

     

       1366
       1366
       +
               Log.err (fun m -> m "Error loading user %s: %s" username (Printexc.to_string e));

     

       1367
       1367
       +
               None

     

       1368
       1368
       +
       

     

       1369
       1369
       +
           (** Save a user to disk *)

     

       1370
       1370
       +
           let save_user state user =

     

       1371
       1371
       +
             let file = Paths.user_file state (User.username user) in

     

       1372
       1372
       +
             let json = Json.user_to_string user in

     

       1373
       1373
       +
             Eio.Path.save ~create:(`Or_truncate 0o644) file json

     

       1374
       1374
       +
       

     

       1375
       1375
       +
           (** List all usernames *)

     

       1376
       1376
       +
           let list_users state =

     

       1377
       1377
       +
             try

     

       1378
       1378
       +
               Eio.Path.read_dir (Paths.users_dir state)

     

       1379
       1379
       +
               |> List.filter_map (fun name ->

     

       1380
       1380
       +
                 if Filename.check_suffix name ".json" then

     

       1381
       1381
       +
                   Some (Filename.chop_suffix name ".json")

     

       1382
       1382
       +
                 else None

     

       1383
       1383
       +
               )

     

       1384
       1384
       +
             with _ -> []

     

       1385
       1385
       +
       

     

       1386
       1386
       +
           (** Load existing Atom entries for a user *)

     

       1387
       1387
       +
           let load_existing_posts state username =

     

       1388
       1388
       +
             let file = Paths.user_feed_file state username in

     

       1389
       1389
       +
             try

     

       1390
       1390
       +
               let content = Eio.Path.load file in

     

       1391
       1391
       +
               (* Parse existing Atom feed *)

     

       1392
       1392
       +
               let input = Xmlm.make_input (`String (0, content)) in

     

       1393
       1393
       +
               let feed = Syndic.Atom.parse input in

     

       1394
       1394
       +
               feed.Syndic.Atom.entries

     

       1395
       1395
       +
             with

     

       1396
       1396
       +
             | Eio.Io (Eio.Fs.E (Not_found _), _) -> []

     

       1397
       1397
       +
             | e ->

     

       1398
       1398
       +
               Log.err (fun m -> m "Error loading existing posts for %s: %s"

     

       1399
       1399
       +
                 username (Printexc.to_string e));

     

       1400
       1400
       +
               []

     

       1401
       1401
       +
       

     

       1402
       1402
       +
           (** Save Atom entries for a user *)

     

       1403
       1403
       +
           let save_atom_feed state username entries =

     

       1404
       1404
       +
             let file = Paths.user_feed_file state username in

     

       1405
       1405
       +
             let feed = Format.Atom.feed_of_entries ~title:username entries in

     

       1406
       1406
       +
             let xml = Format.Atom.to_string feed in

     

       1407
       1407
       +
             Eio.Path.save ~create:(`Or_truncate 0o644) file xml

     

       1408
       1408
       +
       

     

       1409
       1409
       +
           (** Delete a user and their feed file *)

     

       1410
       1410
       +
           let delete_user state username =

     

       1411
       1411
       +
             let user_file = Paths.user_file state username in

     

       1412
       1412
       +
             let feed_file = Paths.user_feed_file state username in

     

       1413
       1413
       +
             (try Eio.Path.unlink user_file with _ -> ());

     

       1414
       1414
       +
             (try Eio.Path.unlink feed_file with _ -> ())

     

       1415
       1415
       +
         end

     

       1416
       1416
       +
       

     

       1417
       1417
       +
         module Sync = struct

     

       1418
       1418
       +
           (** Merge new entries with existing ones, updating matching IDs *)

     

       1419
       1419
       +
           let merge_entries ~existing ~new_entries =

     

       1420
       1420
       +
             (* Create a map of new entry IDs for efficient lookup and updates *)

     

       1421
       1421
       +
             let module UriMap = Map.Make(Uri) in

     

       1422
       1422
       +
             let new_entries_map =

     

       1423
       1423
       +
               List.fold_left (fun acc (entry : Syndic.Atom.entry) ->

     

       1424
       1424
       +
                 UriMap.add entry.id entry acc

     

       1425
       1425
       +
               ) UriMap.empty new_entries

     

       1426
       1426
       +
             in

     

       1427
       1427
       +
       

     

       1428
       1428
       +
             (* Update existing entries with new ones if IDs match, otherwise keep existing *)

     

       1429
       1429
       +
             let updated_existing =

     

       1430
       1430
       +
               List.filter_map (fun (entry : Syndic.Atom.entry) ->

     

       1431
       1431
       +
                 if UriMap.mem entry.id new_entries_map then

     

       1432
       1432
       +
                   None  (* Will be replaced by new entry *)

     

       1433
       1433
       +
                 else

     

       1434
       1434
       +
                   Some entry  (* Keep existing entry *)

     

       1435
       1435
       +
               ) existing

     

       1436
       1436
       +
             in

     

       1437
       1437
       +
       

     

       1438
       1438
       +
             (* Combine new entries with non-replaced existing entries *)

     

       1439
       1439
       +
             let combined = new_entries @ updated_existing in

     

       1440
       1440
       +
             List.sort (fun (a : Syndic.Atom.entry) (b : Syndic.Atom.entry) ->

     

       1441
       1441
       +
               Ptime.compare b.updated a.updated

     

       1442
       1442
       +
             ) combined

     

       1443
       1443
       +
       

     

       1444
       1444
       +
           (** Get current timestamp in ISO 8601 format *)

     

       1445
       1445
       +
           let current_timestamp () =

     

       1446
       1446
       +
             let open Unix in

     

       1447
       1447
       +
             let tm = gmtime (time ()) in

     

       1448
       1448
       +
             Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ"

     

       1449
       1449
       +
               (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday

     

       1450
       1450
       +
               tm.tm_hour tm.tm_min tm.tm_sec

     

       1451
       1451
       +
       

     

       1452
       1452
       +
           (** Sync feeds for a single user *)

     

       1453
       1453
       +
           let sync_user session state ~username =

     

       1454
       1454
       +
             match Storage.load_user state username with

     

       1455
       1455
       +
             | None ->

     

       1456
       1456
       +
               Error (Printf.sprintf "User %s not found" username)

     

       1457
       1457
       +
             | Some user when User.feeds user = [] ->

     

       1458
       1458
       +
               Log.info (fun m -> m "No feeds configured for user %s" username);

     

       1459
       1459
       +
               Ok ()

     

       1460
       1460
       +
             | Some user ->

     

       1461
       1461
       +
               Log.info (fun m -> m "Syncing feeds for user %s..." username);

     

       1462
       1462
       +
       

     

       1463
       1463
       +
               (* Fetch all feeds concurrently *)

     

       1464
       1464
       +
               let fetched_feeds =

     

       1465
       1465
       +
                 Eio.Fiber.List.filter_map (fun source ->

     

       1466
       1466
       +
                   try

     

       1467
       1467
       +
                     Log.info (fun m -> m "  Fetching %s (%s)..."

     

       1468
       1468
       +
                       (Source.name source) (Source.url source));

     

       1469
       1469
       +
                     Some (Feed.fetch session source)

     

       1470
       1470
       +
                   with e ->

     

       1471
       1471
       +
                     Log.err (fun m -> m "  Failed to fetch %s: %s"

     

       1472
       1472
       +
                       (Source.name source) (Printexc.to_string e));

     

       1473
       1473
       +
                     None

     

       1474
       1474
       +
                 ) (User.feeds user)

     

       1475
       1475
       +
               in

     

       1476
       1476
       +
       

     

       1477
       1477
       +
               if fetched_feeds = [] then begin

     

       1478
       1478
       +
                 Error "No feeds successfully fetched"

     

       1479
       1479
       +
               end else begin

     

       1480
       1480
       +
                 (* Get posts from fetched feeds *)

     

       1481
       1481
       +
                 let posts = Post.of_feeds fetched_feeds in

     

       1482
       1482
       +
                 Log.info (fun m -> m "  Found %d new posts" (List.length posts));

     

       1483
       1483
       +
       

     

       1484
       1484
       +
                 (* Convert to Atom entries *)

     

       1485
       1485
       +
                 let new_entries = Format.Atom.entries_of_posts posts in

     

       1486
       1486
       +
       

     

       1487
       1487
       +
                 (* Load existing entries *)

     

       1488
       1488
       +
                 let existing = Storage.load_existing_posts state username in

     

       1489
       1489
       +
                 Log.info (fun m -> m "  Found %d existing posts" (List.length existing));

     

       1490
       1490
       +
       

     

       1491
       1491
       +
                 (* Merge entries *)

     

       1492
       1492
       +
                 let merged = merge_entries ~existing ~new_entries in

     

       1493
       1493
       +
                 Log.info (fun m -> m "  Total posts after merge: %d" (List.length merged));

     

       1494
       1494
       +
       

     

       1495
       1495
       +
                 (* Save updated feed *)

     

       1496
       1496
       +
                 Storage.save_atom_feed state username merged;

     

       1497
       1497
       +
       

     

       1498
       1498
       +
                 (* Update last_synced timestamp *)

     

       1499
       1499
       +
                 let now = current_timestamp () in

     

       1500
       1500
       +
                 let user = User.set_last_synced user now in

     

       1501
       1501
       +
                 Storage.save_user state user;

     

       1502
       1502
       +
       

     

       1503
       1503
       +
                 Log.info (fun m -> m "Sync completed for user %s" username);

     

       1504
       1504
       +
                 Ok ()

     

       1505
       1505
       +
               end

     

       1506
       1506
       +
         end

     

       1507
       1507
       +
       

     

       1508
       1508
       +
         module Export = struct

     

       1509
       1509
       +
           (** Convert Atom entry to JSONFeed item *)

     

       1510
       1510
       +
           let atom_entry_to_jsonfeed_item (entry : Syndic.Atom.entry) =

     

       1511
       1511
       +
             (* Extract ID *)

     

       1512
       1512
       +
             let id = Uri.to_string entry.id in

     

       1513
       1513
       +
       

     

       1514
       1514
       +
             (* Extract title *)

     

       1515
       1515
       +
             let title =

     

       1516
       1516
       +
               match entry.title with

     

       1517
       1517
       +
               | Syndic.Atom.Text s -> Some s

     

       1518
       1518
       +
               | Syndic.Atom.Html (_, s) -> Some s

     

       1519
       1519
       +
               | Syndic.Atom.Xhtml (_, _) -> Some "Untitled"

     

       1520
       1520
       +
             in

     

       1521
       1521
       +
       

     

       1522
       1522
       +
             (* Extract URL *)

     

       1523
       1523
       +
             let url =

     

       1524
       1524
       +
               match entry.links with

     

       1525
       1525
       +
               | link :: _ -> Some (Uri.to_string link.href)

     

       1526
       1526
       +
               | [] -> None

     

       1527
       1527
       +
             in

     

       1528
       1528
       +
       

     

       1529
       1529
       +
             (* Extract content *)

     

       1530
       1530
       +
             let content =

     

       1531
       1531
       +
               match entry.content with

     

       1532
       1532
       +
               | Some (Syndic.Atom.Text s) -> `Text s

     

       1533
       1533
       +
               | Some (Syndic.Atom.Html (_, s)) -> `Html s

     

       1534
       1534
       +
               | Some (Syndic.Atom.Xhtml (_, nodes)) ->

     

       1535
       1535
       +
                   let html = String.concat "" (List.map Syndic.XML.to_string nodes) in

     

       1536
       1536
       +
                   `Html html

     

       1537
       1537
       +
               | Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None ->

     

       1538
       1538
       +
                   `Text ""

     

       1539
       1539
       +
             in

     

       1540
       1540
       +
       

     

       1541
       1541
       +
             (* Extract summary *)

     

       1542
       1542
       +
             let summary =

     

       1543
       1543
       +
               match entry.summary with

     

       1544
       1544
       +
               | Some (Syndic.Atom.Text s) when String.trim s <> "" -> Some s

     

       1545
       1545
       +
               | Some (Syndic.Atom.Html (_, s)) when String.trim s <> "" -> Some s

     

       1546
       1546
       +
               | _ -> None

     

       1547
       1547
       +
             in

     

       1548
       1548
       +
       

     

       1549
       1549
       +
             (* Extract authors *)

     

       1550
       1550
       +
             let authors =

     

       1551
       1551
       +
               let (author, contributors) = entry.authors in

     

       1552
       1552
       +
               let author_list = author :: contributors in

     

       1553
       1553
       +
               let jsonfeed_authors = List.filter_map (fun (a : Syndic.Atom.author) ->

     

       1554
       1554
       +
                 let name = String.trim a.name in

     

       1555
       1555
       +
                 if name = "" then None

     

       1556
       1556
       +
                 else Some (Jsonfeed.Author.create ~name ())

     

       1557
       1557
       +
               ) author_list in

     

       1558
       1558
       +
               if jsonfeed_authors = [] then None else Some jsonfeed_authors

     

       1559
       1559
       +
             in

     

       1560
       1560
       +
       

     

       1561
       1561
       +
             (* Extract tags *)

     

       1562
       1562
       +
             let tags =

     

       1563
       1563
       +
               match entry.categories with

     

       1564
       1564
       +
               | [] -> None

     

       1565
       1565
       +
               | cats ->

     

       1566
       1566
       +
                   let tag_list = List.map (fun (c : Syndic.Atom.category) ->

     

       1567
       1567
       +
                     match c.label with

     

       1568
       1568
       +
                     | Some lbl -> lbl

     

       1569
       1569
       +
                     | None -> c.term

     

       1570
       1570
       +
                   ) cats in

     

       1571
       1571
       +
                   if tag_list = [] then None else Some tag_list

     

       1572
       1572
       +
             in

     

       1573
       1573
       +
       

     

       1574
       1574
       +
             (* Create JSONFeed item *)

     

       1575
       1575
       +
             Jsonfeed.Item.create

     

       1576
       1576
       +
               ~id

     

       1577
       1577
       +
               ~content

     

       1578
       1578
       +
               ?title

     

       1579
       1579
       +
               ?url

     

       1580
       1580
       +
               ?summary

     

       1581
       1581
       +
               ?authors

     

       1582
       1582
       +
               ?tags

     

       1583
       1583
       +
               ~date_published:entry.updated

     

       1584
       1584
       +
               ()

     

       1585
       1585
       +
       

     

       1586
       1586
       +
           (** Export entries as JSONFeed *)

     

       1587
       1587
       +
           let export_jsonfeed ~title entries =

     

       1588
       1588
       +
             let items = List.map atom_entry_to_jsonfeed_item entries in

     

       1589
       1589
       +
             let feed = Jsonfeed.create ~title ~items () in

     

       1590
       1590
       +
             match Jsonfeed.to_string ~minify:false feed with

     

       1591
       1591
       +
             | Ok json -> Ok json

     

       1592
       1592
       +
             | Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err))

     

       1593
       1593
       +
         end

     

       1594
       1594
       +
       

     

       1595
       1595
       +
         let create env ~app_name =

     

       1596
       1596
       +
           let xdg = Xdge.create env#fs app_name in

     

       1597
       1597
       +
           let state = { xdg } in

     

       1598
       1598
       +
           Paths.ensure_directories state;

     

       1599
       1599
       +
           state

     

       1600
       1600
       +
       

     

       1601
       1601
       +
         let create_user state user =

     

       1602
       1602
       +
           match Storage.load_user state (User.username user) with

     

       1603
       1603
       +
           | Some _ ->

     

       1604
       1604
       +
             Error (Printf.sprintf "User %s already exists" (User.username user))

     

       1605
       1605
       +
           | None ->

     

       1606
       1606
       +
             Storage.save_user state user;

     

       1607
       1607
       +
             Log.info (fun m -> m "User %s created" (User.username user));

     

       1608
       1608
       +
             Ok ()

     

       1609
       1609
       +
       

     

       1610
       1610
       +
         let delete_user state ~username =

     

       1611
       1611
       +
           match Storage.load_user state username with

     

       1612
       1612
       +
           | None ->

     

       1613
       1613
       +
             Error (Printf.sprintf "User %s not found" username)

     

       1614
       1614
       +
           | Some _ ->

     

       1615
       1615
       +
             Storage.delete_user state username;

     

       1616
       1616
       +
             Log.info (fun m -> m "User %s deleted" username);

     

       1617
       1617
       +
             Ok ()

     

       1618
       1618
       +
       

     

       1619
       1619
       +
         let get_user state ~username =

     

       1620
       1620
       +
           Storage.load_user state username

     

       1621
       1621
       +
       

     

       1622
       1622
       +
         let update_user state user =

     

       1623
       1623
       +
           match Storage.load_user state (User.username user) with

     

       1624
       1624
       +
           | None ->

     

       1625
       1625
       +
             Error (Printf.sprintf "User %s not found" (User.username user))

     

       1626
       1626
       +
           | Some _ ->

     

       1627
       1627
       +
             Storage.save_user state user;

     

       1628
       1628
       +
             Log.info (fun m -> m "User %s updated" (User.username user));

     

       1629
       1629
       +
             Ok ()

     

       1630
       1630
       +
       

     

       1631
       1631
       +
         let list_users state =

     

       1632
       1632
       +
           Storage.list_users state

     

       1633
       1633
       +
       

     

       1634
       1634
       +
         let sync_user env state ~username =

     

       1635
       1635
       +
           Session.with_session env @@ fun session ->

     

       1636
       1636
       +
             Sync.sync_user session state ~username

     

       1637
       1637
       +
       

     

       1638
       1638
       +
         let sync_all env state =

     

       1639
       1639
       +
           let users = Storage.list_users state in

     

       1640
       1640
       +
           if users = [] then begin

     

       1641
       1641
       +
             Log.info (fun m -> m "No users to sync");

     

       1642
       1642
       +
             Ok (0, 0)

     

       1643
       1643
       +
           end else begin

     

       1644
       1644
       +
             Log.info (fun m -> m "Syncing %d users concurrently..." (List.length users));

     

       1645
       1645
       +
       

     

       1646
       1646
       +
             Session.with_session env @@ fun session ->

     

       1647
       1647
       +
               let results =

     

       1648
       1648
       +
                 Eio.Fiber.List.map (fun username ->

     

       1649
       1649
       +
                   match Sync.sync_user session state ~username with

     

       1650
       1650
       +
                   | Ok () -> true

     

       1651
       1651
       +
                   | Error err ->

     

       1652
       1652
       +
                       Log.err (fun m -> m "Failed to sync user %s: %s" username err);

     

       1653
       1653
       +
                       false

     

       1654
       1654
       +
                 ) users

     

       1655
       1655
       +
               in

     

       1656
       1656
       +
               let success_count = List.length (List.filter (fun x -> x) results) in

     

       1657
       1657
       +
               let fail_count = List.length users - success_count in

     

       1658
       1658
       +
       

     

       1659
       1659
       +
               if fail_count = 0 then

     

       1660
       1660
       +
                 Log.info (fun m -> m "All users synced successfully");

     

       1661
       1661
       +
       

     

       1662
       1662
       +
               Ok (success_count, fail_count)

     

       1663
       1663
       +
           end

     

       1664
       1664
       +
       

     

       1665
       1665
       +
         let get_user_posts state ~username ?limit () =

     

       1666
       1666
       +
           let entries = Storage.load_existing_posts state username in

     

       1667
       1667
       +
           match limit with

     

       1668
       1668
       +
           | None -> entries

     

       1669
       1669
       +
           | Some n -> List.filteri (fun i _ -> i < n) entries

     

       1670
       1670
       +
       

     

       1671
       1671
       +
         let get_all_posts state ?limit () =

     

       1672
       1672
       +
           let users = Storage.list_users state in

     

       1673
       1673
       +
       

     

       1674
       1674
       +
           (* Collect all entries from all users with username tag *)

     

       1675
       1675
       +
           let all_entries =

     

       1676
       1676
       +
             List.concat_map (fun username ->

     

       1677
       1677
       +
               let entries = Storage.load_existing_posts state username in

     

       1678
       1678
       +
               List.map (fun entry -> (username, entry)) entries

     

       1679
       1679
       +
             ) users

     

       1680
       1680
       +
           in

     

       1681
       1681
       +
       

     

       1682
       1682
       +
           (* Sort by date (newest first) *)

     

       1683
       1683
       +
           let sorted = List.sort (fun (_, a : string * Syndic.Atom.entry) (_, b) ->

     

       1684
       1684
       +
             Ptime.compare b.updated a.updated

     

       1685
       1685
       +
           ) all_entries in

     

       1686
       1686
       +
       

     

       1687
       1687
       +
           match limit with

     

       1688
       1688
       +
           | None -> sorted

     

       1689
       1689
       +
           | Some n -> List.filteri (fun i _ -> i < n) sorted

     

       1690
       1690
       +
       

     

       1691
       1691
       +
         let export_merged_feed state ~title ~format ?limit () =

     

       1692
       1692
       +
           let all_posts = get_all_posts state ?limit () in

     

       1693
       1693
       +
           let entries = List.map snd all_posts in

     

       1694
       1694
       +
       

     

       1695
       1695
       +
           match format with

     

       1696
       1696
       +
           | `Atom ->

     

       1697
       1697
       +
             let xml = Format.Atom.to_string (Format.Atom.feed_of_entries ~title entries) in

     

       1698
       1698
       +
             Ok xml

     

       1699
       1699
       +
           | `Jsonfeed ->

     

       1700
       1700
       +
             if entries = [] then

     

       1701
       1701
       +
               (* Empty JSONFeed *)

     

       1702
       1702
       +
               let feed = Jsonfeed.create ~title ~items:[] () in

     

       1703
       1703
       +
               match Jsonfeed.to_string ~minify:false feed with

     

       1704
       1704
       +
               | Ok json -> Ok json

     

       1705
       1705
       +
               | Error err -> Error (Printf.sprintf "Failed to serialize JSON Feed: %s" (Jsont.Error.to_string err))

     

       1706
       1706
       +
             else

     

       1707
       1707
       +
               Export.export_jsonfeed ~title entries

     

       1708
       1708
       +
       

     

       1709
       1709
       +
         let analyze_user_quality state ~username =

     

       1710
       1710
       +
           match Storage.load_user state username with

     

       1711
       1711
       +
           | None ->

     

       1712
       1712
       +
             Error (Printf.sprintf "User %s not found" username)

     

       1713
       1713
       +
           | Some _ ->

     

       1714
       1714
       +
             let entries = Storage.load_existing_posts state username in

     

       1715
       1715
       +
             if entries = [] then

     

       1716
       1716
       +
               Error "No entries to analyze"

     

       1717
       1717
       +
             else

     

       1718
       1718
       +
               Ok (Quality.analyze entries)

     

       1719
       1719
       +
       end

+376 -107

stack/river/lib/river.mli

···

       15
       15
        
        * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

     

       16
       16
        
        *)

     

       17
       17
        
       

     

       18
       18
       -
       (** River RSS/Atom feed aggregator *)

     

       18
       18
       +
       (** River RSS/Atom/JSONFeed aggregator library *)

     

       19
       19
       +
       

     

       20
       20
       +
       (** {1 Feed Sources} *)

     

       21
       21
       +
       

     

       22
       22
       +
       module Source : sig

     

       23
       23
       +
         type t

     

       24
       24
       +
         (** A feed source with name and URL. *)

     

       25
       25
       +
       

     

       26
       26
       +
         val make : name:string -> url:string -> t

     

       27
       27
       +
         (** [make ~name ~url] creates a new feed source. *)

     

       28
       28
       +
       

     

       29
       29
       +
         val name : t -> string

     

       30
       30
       +
         (** [name source] returns the feed name/label. *)

     

       31
       31
       +
       

     

       32
       32
       +
         val url : t -> string

     

       33
       33
       +
         (** [url source] returns the feed URL. *)

     

       34
       34
       +
       

     

       35
       35
       +
         val jsont : t Jsont.t

     

       36
       36
       +
         (** JSON codec for sources. *)

     

       37
       37
       +
       end

     

       38
       38
       +
       

     

       39
       39
       +
       (** {1 HTTP Session Management} *)

     

       40
       40
       +
       

     

       41
       41
       +
       module Session : sig

     

       42
       42
       +
         type t

     

       43
       43
       +
         (** An abstract HTTP session for fetching feeds.

     

       44
       44
       +
       

     

       45
       45
       +
             The session manages HTTP connections and is tied to an Eio switch

     

       46
       46
       +
             for proper resource cleanup. *)

     

       47
       47
       +
       

     

       48
       48
       +
         val init :

     

       49
       49
       +
           sw:Eio.Switch.t ->

     

       50
       50
       +
           < clock : float Eio.Time.clock_ty Eio.Resource.t;

     

       51
       51
       +
             fs : Eio.Fs.dir_ty Eio.Path.t;

     

       52
       52
       +
             net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->

     

       53
       53
       +
           t

     

       54
       54
       +
         (** [init ~sw env] creates a new HTTP session.

     

       55
       55
       +
       

     

       56
       56
       +
             The session is configured with appropriate defaults for fetching feeds:

     

       57
       57
       +
             - User-Agent: "OCaml-River/1.0"

     

       58
       58
       +
             - Automatic redirect following (max 5 redirects)

     

       59
       59
       +
             - TLS verification enabled

     

       60
       60
       +
       

     

       61
       61
       +
             @param sw The switch for resource management

     

       62
       62
       +
             @param env The Eio environment *)

     

       63
       63
       +
       

     

       64
       64
       +
         val with_session :

     

       65
       65
       +
           < clock : float Eio.Time.clock_ty Eio.Resource.t;

     

       66
       66
       +
             fs : Eio.Fs.dir_ty Eio.Path.t;

     

       67
       67
       +
             net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->

     

       68
       68
       +
           (t -> 'a) -> 'a

     

       69
       69
       +
         (** [with_session env f] creates a session and automatically manages its lifecycle.

     

       70
       70
       +
       

     

       71
       71
       +
             This is the recommended way to use River as it ensures proper cleanup.

     

       72
       72
       +
       

     

       73
       73
       +
             @param env The Eio environment

     

       74
       74
       +
             @param f The function to run with the session *)

     

       75
       75
       +
       end

     

       76
       76
       +
       

     

       77
       77
       +
       (** {1 Feeds and Posts} *)

     

       78
       78
       +
       

     

       79
       79
       +
       module Feed : sig

     

       80
       80
       +
         type t

     

       81
       81
       +
         (** An Atom, RSS2, or JSON Feed. *)

     

       82
       82
       +
       

     

       83
       83
       +
         val fetch : Session.t -> Source.t -> t

     

       84
       84
       +
         (** [fetch session source] fetches and parses a feed from the given source.

     

       85
       85
       +
       

     

       86
       86
       +
             @param session The HTTP session

     

       87
       87
       +
             @param source The feed source to fetch

     

       88
       88
       +
             @raise Failure if the feed cannot be fetched or parsed *)

     

       89
       89
       +
       

     

       90
       90
       +
         val source : t -> Source.t

     

       91
       91
       +
         (** [source feed] returns the source this feed was fetched from. *)

     

       92
       92
       +
       end

     

       93
       93
       +
       

     

       94
       94
       +
       module Post : sig

     

       95
       95
       +
         type t

     

       96
       96
       +
         (** A post from a feed. *)

     

       97
       97
       +
       

     

       98
       98
       +
         val of_feeds : Feed.t list -> t list

     

       99
       99
       +
         (** [of_feeds feeds] extracts and deduplicates posts from the given feeds.

     

       100
       100
       +
       

     

       101
       101
       +
             Posts are deduplicated by ID. *)

     

       102
       102
       +
       

     

       103
       103
       +
         val feed : t -> Feed.t

     

       104
       104
       +
         (** [feed post] returns the feed this post originated from. *)

     

       105
       105
       +
       

     

       106
       106
       +
         val title : t -> string

     

       107
       107
       +
         (** [title post] returns the post title. *)

     

       108
       108
       +
       

     

       109
       109
       +
         val link : t -> Uri.t option

     

       110
       110
       +
         (** [link post] returns the post link. *)

     

       111
       111
       +
       

     

       112
       112
       +
         val date : t -> Syndic.Date.t option

     

       113
       113
       +
         (** [date post] returns the post date. *)

     

       114
       114
       +
       

     

       115
       115
       +
         val author : t -> string

     

       116
       116
       +
         (** [author post] returns the post author name. *)

     

       117
       117
       +
       

     

       118
       118
       +
         val email : t -> string

     

       119
       119
       +
         (** [email post] returns the post author email. *)

     

       120
       120
       +
       

     

       121
       121
       +
         val content : t -> string

     

       122
       122
       +
         (** [content post] returns the post content. *)

     

       123
       123
       +
       

     

       124
       124
       +
         val id : t -> string

     

       125
       125
       +
         (** [id post] returns the unique identifier of the post. *)

     

       126
       126
       +
       

     

       127
       127
       +
         val tags : t -> string list

     

       128
       128
       +
         (** [tags post] returns the list of tags associated with the post. *)

     

       129
       129
       +
       

     

       130
       130
       +
         val summary : t -> string option

     

       131
       131
       +
         (** [summary post] returns the summary/excerpt of the post, if available. *)

     

       132
       132
       +
       

     

       133
       133
       +
         val meta_description : t -> string option

     

       134
       134
       +
         (** [meta_description post] returns the meta description from the origin site.

     

       135
       135
       +
       

     

       136
       136
       +
             To get the meta description, we fetch the content of [link post] and look

     

       137
       137
       +
             for an HTML meta tag with name "description" or "og:description". *)

     

       19
       138
        
       

     

       20
       20
       -
       (** {1 Session Management} *)

     

       139
       139
       +
         val seo_image : t -> string option

     

       140
       140
       +
         (** [seo_image post] returns the social media image URL.

     

       141
       141
       +
       

     

       142
       142
       +
             To get the SEO image, we fetch the content of [link post] and look for an

     

       143
       143
       +
             HTML meta tag with name "og:image" or "twitter:image". *)

     

       144
       144
       +
       end

     

       145
       145
       +
       

     

       146
       146
       +
       (** {1 Format Conversion and Export} *)

     

       147
       147
       +
       

     

       148
       148
       +
       module Format : sig

     

       149
       149
       +
         (** Feed format conversion and export. *)

     

       150
       150
       +
       

     

       151
       151
       +
         module Atom : sig

     

       152
       152
       +
           (** Atom 1.0 format support. *)

     

       153
       153
       +
       

     

       154
       154
       +
           val entry_of_post : Post.t -> Syndic.Atom.entry

     

       155
       155
       +
           (** [entry_of_post post] converts a post to an Atom entry. *)

     

       156
       156
       +
       

     

       157
       157
       +
           val entries_of_posts : Post.t list -> Syndic.Atom.entry list

     

       158
       158
       +
           (** [entries_of_posts posts] converts posts to Atom entries. *)

     

       159
       159
       +
       

     

       160
       160
       +
           val feed_of_entries :

     

       161
       161
       +
             title:string ->

     

       162
       162
       +
             ?id:string ->

     

       163
       163
       +
             ?authors:(string * string option) list ->

     

       164
       164
       +
             Syndic.Atom.entry list ->

     

       165
       165
       +
             Syndic.Atom.feed

     

       166
       166
       +
           (** [feed_of_entries ~title entries] creates an Atom feed from entries.

     

       167
       167
       +
       

     

       168
       168
       +
               @param title The feed title

     

       169
       169
       +
               @param id Optional feed ID (default: "urn:river:merged")

     

       170
       170
       +
               @param authors Optional list of (name, email) tuples *)

     

       171
       171
       +
       

     

       172
       172
       +
           val to_string : Syndic.Atom.feed -> string

     

       173
       173
       +
           (** [to_string feed] serializes an Atom feed to XML string. *)

     

       174
       174
       +
         end

     

       175
       175
       +
       

     

       176
       176
       +
         module Rss2 : sig

     

       177
       177
       +
           (** RSS 2.0 format support. *)

     

       178
       178
       +
       

     

       179
       179
       +
           val of_feed : Feed.t -> Syndic.Rss2.channel option

     

       180
       180
       +
           (** [of_feed feed] extracts RSS2 channel if the feed is RSS2 format.

     

       181
       181
       +
       

     

       182
       182
       +
               Returns None if the feed is not RSS2. *)

     

       183
       183
       +
         end

     

       184
       184
       +
       

     

       185
       185
       +
         module Jsonfeed : sig

     

       186
       186
       +
           (** JSON Feed 1.1 format support. *)

     

       187
       187
       +
       

     

       188
       188
       +
           val item_of_post : Post.t -> Jsonfeed.Item.t

     

       189
       189
       +
           (** [item_of_post post] converts a post to a JSONFeed item. *)

     

       190
       190
       +
       

     

       191
       191
       +
           val items_of_posts : Post.t list -> Jsonfeed.Item.t list

     

       192
       192
       +
           (** [items_of_posts posts] converts posts to JSONFeed items. *)

     

       193
       193
       +
       

     

       194
       194
       +
           val feed_of_items :

     

       195
       195
       +
             title:string ->

     

       196
       196
       +
             ?home_page_url:string ->

     

       197
       197
       +
             ?feed_url:string ->

     

       198
       198
       +
             ?description:string ->

     

       199
       199
       +
             ?icon:string ->

     

       200
       200
       +
             ?favicon:string ->

     

       201
       201
       +
             Jsonfeed.Item.t list ->

     

       202
       202
       +
             Jsonfeed.t

     

       203
       203
       +
           (** [feed_of_items ~title items] creates a JSONFeed from items.

     

       204
       204
       +
       

     

       205
       205
       +
               @param title The feed title (required)

     

       206
       206
       +
               @param home_page_url The URL of the website the feed represents

     

       207
       207
       +
               @param feed_url The URL of the feed itself

     

       208
       208
       +
               @param description A description of the feed

     

       209
       209
       +
               @param icon URL of an icon for the feed (512x512 recommended)

     

       210
       210
       +
               @param favicon URL of a favicon for the feed (64x64 recommended) *)

     

       211
       211
       +
       

     

       212
       212
       +
           val feed_of_posts :

     

       213
       213
       +
             title:string ->

     

       214
       214
       +
             ?home_page_url:string ->

     

       215
       215
       +
             ?feed_url:string ->

     

       216
       216
       +
             ?description:string ->

     

       217
       217
       +
             ?icon:string ->

     

       218
       218
       +
             ?favicon:string ->

     

       219
       219
       +
             Post.t list ->

     

       220
       220
       +
             Jsonfeed.t

     

       221
       221
       +
           (** [feed_of_posts ~title posts] creates a JSONFeed from posts.

     

       222
       222
       +
       

     

       223
       223
       +
               Convenience function that combines [items_of_posts] and [feed_of_items]. *)

     

       224
       224
       +
       

     

       225
       225
       +
           val to_string : ?minify:bool -> Jsonfeed.t -> (string, string) result

     

       226
       226
       +
           (** [to_string ?minify feed] serializes a JSONFeed to JSON string.

     

       227
       227
       +
       

     

       228
       228
       +
               @param minify If true, output compact JSON; if false, pretty-print (default: false) *)

     

       229
       229
       +
       

     

       230
       230
       +
           val of_feed : Feed.t -> Jsonfeed.t option

     

       231
       231
       +
           (** [of_feed feed] extracts JSONFeed if the feed is JSONFeed format.

     

       232
       232
       +
       

     

       233
       233
       +
               Returns None if the feed is not JSONFeed. *)

     

       234
       234
       +
         end

     

       235
       235
       +
       end

     

       236
       236
       +
       

     

       237
       237
       +
       (** {1 User Management} *)

     

       238
       238
       +
       

     

       239
       239
       +
       module User : sig

     

       240
       240
       +
         type t

     

       241
       241
       +
         (** User configuration with feed subscriptions. *)

     

       242
       242
       +
       

     

       243
       243
       +
         val make :

     

       244
       244
       +
           username:string ->

     

       245
       245
       +
           fullname:string ->

     

       246
       246
       +
           ?email:string ->

     

       247
       247
       +
           ?feeds:Source.t list ->

     

       248
       248
       +
           ?last_synced:string ->

     

       249
       249
       +
           unit ->

     

       250
       250
       +
           t

     

       251
       251
       +
         (** [make ~username ~fullname ()] creates a new user.

     

       252
       252
       +
       

     

       253
       253
       +
             @param username Unique username identifier

     

       254
       254
       +
             @param fullname User's display name

     

       255
       255
       +
             @param email Optional email address

     

       256
       256
       +
             @param feeds Optional list of feed sources (default: [])

     

       257
       257
       +
             @param last_synced Optional ISO 8601 timestamp of last sync *)

     

       258
       258
       +
       

     

       259
       259
       +
         val username : t -> string

     

       260
       260
       +
         (** [username user] returns the username. *)

     

       21
       261
        
       

     

       22
       22
       -
       type session

     

       23
       23
       -
       (** An abstract River session for fetching feeds.

     

       262
       262
       +
         val fullname : t -> string

     

       263
       263
       +
         (** [fullname user] returns the full name. *)

     

       24
       264
        
       

     

       25
       25
       -
           The session manages HTTP connections and is tied to an Eio switch

     

       26
       26
       -
           for proper resource cleanup. *)

     

       265
       265
       +
         val email : t -> string option

     

       266
       266
       +
         (** [email user] returns the email address if set. *)

     

       27
       267
        
       

     

       28
       28
       -
       val init :

     

       29
       29
       -
         sw:Eio.Switch.t ->

     

       30
       30
       -
         < clock : float Eio.Time.clock_ty Eio.Resource.t;

     

       31
       31
       -
           fs : Eio.Fs.dir_ty Eio.Path.t;

     

       32
       32
       -
           net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->

     

       33
       33
       -
         session

     

       34
       34
       -
       (** [init ~sw env] creates a new River session.

     

       268
       268
       +
         val feeds : t -> Source.t list

     

       269
       269
       +
         (** [feeds user] returns the list of subscribed feeds. *)

     

       35
       270
        
       

     

       36
       36
       -
           The session is configured with appropriate defaults for fetching feeds:

     

       37
       37
       -
           - User-Agent: "OCaml-River/1.0"

     

       38
       38
       -
           - Automatic redirect following (max 5 redirects)

     

       39
       39
       -
           - TLS verification enabled

     

       271
       271
       +
         val last_synced : t -> string option

     

       272
       272
       +
         (** [last_synced user] returns the last sync timestamp if set. *)

     

       40
       273
        
       

     

       41
       41
       -
           @param sw The switch for resource management

     

       42
       42
       -
           @param env The Eio environment *)

     

       274
       274
       +
         val add_feed : t -> Source.t -> t

     

       275
       275
       +
         (** [add_feed user source] returns a new user with the feed added. *)

     

       43
       276
        
       

     

       44
       44
       -
       val with_session :

     

       45
       45
       -
         < clock : float Eio.Time.clock_ty Eio.Resource.t;

     

       46
       46
       -
           fs : Eio.Fs.dir_ty Eio.Path.t;

     

       47
       47
       -
           net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->

     

       48
       48
       -
         (session -> 'a) -> 'a

     

       49
       49
       -
       (** [with_session env f] creates a session and automatically manages its lifecycle.

     

       277
       277
       +
         val remove_feed : t -> url:string -> t

     

       278
       278
       +
         (** [remove_feed user ~url] returns a new user with the feed removed by URL. *)

     

       50
       279
        
       

     

       51
       51
       -
           This is the recommended way to use River as it ensures proper cleanup.

     

       280
       280
       +
         val set_last_synced : t -> string -> t

     

       281
       281
       +
         (** [set_last_synced user timestamp] returns a new user with updated sync time. *)

     

       52
       282
        
       

     

       53
       53
       -
           @param env The Eio environment

     

       54
       54
       -
           @param f The function to run with the session *)

     

       283
       283
       +
         val jsont : t Jsont.t

     

       284
       284
       +
         (** JSON codec for users. *)

     

       285
       285
       +
       end

     

       55
       286
        
       

     

       56
       56
       -
       (** {1 Feed Sources and Fetching} *)

     

       287
       287
       +
       (** {1 Feed Quality Analysis} *)

     

       57
       288
        
       

     

       58
       58
       -
       type source = { name : string; url : string }

     

       59
       59
       -
       (** The source of a feed. *)

     

       289
       289
       +
       module Quality : sig

     

       290
       290
       +
         type t

     

       291
       291
       +
         (** Quality metrics for a feed or user's aggregated feed. *)

     

       60
       292
        
       

     

       61
       61
       -
       type feed

     

       62
       62
       -
       (** An Atom, RSS2, or JSON Feed. *)

     

       293
       293
       +
         val make :

     

       294
       294
       +
           total_entries:int ->

     

       295
       295
       +
           entries_with_summary:int ->

     

       296
       296
       +
           entries_with_author:int ->

     

       297
       297
       +
           entries_with_date:int ->

     

       298
       298
       +
           entries_with_content:int ->

     

       299
       299
       +
           entries_with_tags:int ->

     

       300
       300
       +
           avg_content_length:float ->

     

       301
       301
       +
           min_content_length:int ->

     

       302
       302
       +
           max_content_length:int ->

     

       303
       303
       +
           posting_frequency_days:float option ->

     

       304
       304
       +
           quality_score:float ->

     

       305
       305
       +
           t

     

       306
       306
       +
         (** [make ~total_entries ...] creates quality metrics. *)

     

       63
       307
        
       

     

       64
       64
       -
       type post

     

       65
       65
       -
       (** A post from a feed. *)

     

       308
       308
       +
         val total_entries : t -> int

     

       309
       309
       +
         val entries_with_summary : t -> int

     

       310
       310
       +
         val entries_with_author : t -> int

     

       311
       311
       +
         val entries_with_date : t -> int

     

       312
       312
       +
         val entries_with_content : t -> int

     

       313
       313
       +
         val entries_with_tags : t -> int

     

       314
       314
       +
         val avg_content_length : t -> float

     

       315
       315
       +
         val min_content_length : t -> int

     

       316
       316
       +
         val max_content_length : t -> int

     

       317
       317
       +
         val posting_frequency_days : t -> float option

     

       318
       318
       +
         val quality_score : t -> float

     

       319
       319
       +
         (** Accessors for quality metrics. *)

     

       66
       320
        
       

     

       67
       67
       -
       val fetch : session -> source -> feed

     

       68
       68
       -
       (** [fetch session source] returns an Atom or RSS feed from a source.

     

       321
       321
       +
         val analyze : Syndic.Atom.entry list -> t

     

       322
       322
       +
         (** [analyze entries] computes quality metrics from Atom entries.

     

       69
       323
        
       

     

       70
       70
       -
           @param session The River session

     

       71
       71
       -
           @param source The feed source to fetch *)

     

       324
       324
       +
             The quality score is a weighted average of:

     

       325
       325
       +
             - Content completeness (40%)

     

       326
       326
       +
             - Metadata completeness (30%)

     

       327
       327
       +
             - Content richness (30%) *)

     

       328
       328
       +
       end

     

       72
       329
        
       

     

       73
       73
       -
       val name : feed -> string

     

       74
       74
       -
       (** [name feed] is the name of the feed source passed to [fetch]. *)

     

       330
       330
       +
       (** {1 State Management} *)

     

       75
       331
        
       

     

       76
       76
       -
       val url : feed -> string

     

       77
       77
       -
       (** [url feed] is the url of the feed source passed to [fetch]. *)

     

       332
       332
       +
       module State : sig

     

       333
       333
       +
         type t

     

       334
       334
       +
         (** State handle for managing user data and feeds on disk. *)

     

       78
       335
        
       

     

       79
       79
       -
       val posts : feed list -> post list

     

       80
       80
       -
       (** [posts feeds] is the list of deduplicated posts of the given feeds. *)

     

       336
       336
       +
         val create :

     

       337
       337
       +
           < fs : Eio.Fs.dir_ty Eio.Path.t; .. > ->

     

       338
       338
       +
           app_name:string ->

     

       339
       339
       +
           t

     

       340
       340
       +
         (** [create env ~app_name] creates a state handle using XDG directories.

     

       81
       341
        
       

     

       82
       82
       -
       val feed : post -> feed

     

       83
       83
       -
       (** [feed post] is the feed the post originates from. *)

     

       342
       342
       +
             Data is stored in:

     

       343
       343
       +
             - Users: $XDG_STATE_HOME/[app_name]/users/

     

       344
       344
       +
             - Feeds: $XDG_STATE_HOME/[app_name]/feeds/user/

     

       84
       345
        
       

     

       85
       85
       -
       val title : post -> string

     

       86
       86
       -
       (** [title post] is the title of the post. *)

     

       346
       346
       +
             @param env The Eio environment with filesystem access

     

       347
       347
       +
             @param app_name Application name for XDG paths *)

     

       87
       348
        
       

     

       88
       88
       -
       val link : post -> Uri.t option

     

       89
       89
       -
       (** [link post] is the link of the post. *)

     

       349
       349
       +
         (** {2 User Operations} *)

     

       90
       350
        
       

     

       91
       91
       -
       val date : post -> Syndic.Date.t option

     

       92
       92
       -
       (** [date post] is the date of the post. *)

     

       351
       351
       +
         val create_user : t -> User.t -> (unit, string) result

     

       352
       352
       +
         (** [create_user state user] creates a new user.

     

       93
       353
        
       

     

       94
       94
       -
       val author : post -> string

     

       95
       95
       -
       (** [author post] is the author of the post. *)

     

       354
       354
       +
             Returns [Error] if the user already exists. *)

     

       96
       355
        
       

     

       97
       97
       -
       val email : post -> string

     

       98
       98
       -
       (** [email post] is the email of the post. *)

     

       356
       356
       +
         val delete_user : t -> username:string -> (unit, string) result

     

       357
       357
       +
         (** [delete_user state ~username] deletes a user and their feed data. *)

     

       99
       358
        
       

     

       100
       100
       -
       val content : post -> string

     

       101
       101
       -
       (** [content post] is the content of the post. *)

     

       359
       359
       +
         val get_user : t -> username:string -> User.t option

     

       360
       360
       +
         (** [get_user state ~username] retrieves a user by username. *)

     

       102
       361
        
       

     

       103
       103
       -
       val id : post -> string

     

       104
       104
       -
       (** [id post] is the unique identifier of the post. *)

     

       362
       362
       +
         val update_user : t -> User.t -> (unit, string) result

     

       363
       363
       +
         (** [update_user state user] saves updated user configuration. *)

     

       105
       364
        
       

     

       106
       106
       -
       val tags : post -> string list

     

       107
       107
       -
       (** [tags post] is the list of tags associated with the post. *)

     

       365
       365
       +
         val list_users : t -> string list

     

       366
       366
       +
         (** [list_users state] returns all usernames. *)

     

       108
       367
        
       

     

       109
       109
       -
       val summary : post -> string option

     

       110
       110
       -
       (** [summary post] is the summary/excerpt of the post, if available. *)

     

       368
       368
       +
         (** {2 Feed Operations} *)

     

       111
       369
        
       

     

       112
       112
       -
       val meta_description : post -> string option

     

       113
       113
       -
       (** [meta_description post] is the meta description of the post on the origin

     

       114
       114
       -
           site.

     

       370
       370
       +
         val sync_user :

     

       371
       371
       +
           < clock : float Eio.Time.clock_ty Eio.Resource.t;

     

       372
       372
       +
             fs : Eio.Fs.dir_ty Eio.Path.t;

     

       373
       373
       +
             net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->

     

       374
       374
       +
           t ->

     

       375
       375
       +
           username:string ->

     

       376
       376
       +
           (unit, string) result

     

       377
       377
       +
         (** [sync_user env state ~username] fetches all feeds for the user and stores merged result.

     

       115
       378
        
       

     

       116
       116
       -
           To get the meta description, we make get the content of [link post] and look

     

       117
       117
       -
           for an HTML meta tag with the name "description" or "og:description".*)

     

       379
       379
       +
             Posts are fetched concurrently and merged with existing posts.

     

       380
       380
       +
             The result is stored as an Atom feed. *)

     

       118
       381
        
       

     

       119
       119
       -
       val seo_image : post -> string option

     

       120
       120
       -
       (** [seo_image post] is the image to be used by social networks and links to the

     

       121
       121
       -
           post.

     

       382
       382
       +
         val sync_all :

     

       383
       383
       +
           < clock : float Eio.Time.clock_ty Eio.Resource.t;

     

       384
       384
       +
             fs : Eio.Fs.dir_ty Eio.Path.t;

     

       385
       385
       +
             net : [ `Generic | `Unix ] Eio.Net.ty Eio.Resource.t; .. > ->

     

       386
       386
       +
           t ->

     

       387
       387
       +
           (int * int, string) result

     

       388
       388
       +
         (** [sync_all env state] syncs all users concurrently.

     

       122
       389
        
       

     

       123
       123
       -
           To get the seo image, we make get the content of [link post] and look for an

     

       124
       124
       -
           HTML meta tag with the name "og:image" or "twitter:image". *)

     

       390
       390
       +
             Returns [Ok (success_count, fail_count)]. *)

     

       125
       391
        
       

     

       126
       126
       -
       val create_atom_entries : post list -> Syndic.Atom.entry list

     

       127
       127
       -
       (** [create_atom_feed posts] creates a list of atom entries, which can then be

     

       128
       128
       -
           used to create an atom feed that is an aggregate of the posts. *)

     

       392
       392
       +
         val get_user_posts :

     

       393
       393
       +
           t ->

     

       394
       394
       +
           username:string ->

     

       395
       395
       +
           ?limit:int ->

     

       396
       396
       +
           unit ->

     

       397
       397
       +
           Syndic.Atom.entry list

     

       398
       398
       +
         (** [get_user_posts state ~username ()] retrieves stored posts for a user.

     

       129
       399
        
       

     

       130
       130
       -
       (** {1 JSON Feed Support} *)

     

       400
       400
       +
             @param limit Optional maximum number of posts to return *)

     

       131
       401
        
       

     

       132
       132
       -
       val create_jsonfeed_items : post list -> Jsonfeed.Item.t list

     

       133
       133
       -
       (** [create_jsonfeed_items posts] creates a list of JSONFeed items from posts. *)

     

       402
       402
       +
         val get_all_posts :

     

       403
       403
       +
           t ->

     

       404
       404
       +
           ?limit:int ->

     

       405
       405
       +
           unit ->

     

       406
       406
       +
           (string * Syndic.Atom.entry) list

     

       407
       407
       +
         (** [get_all_posts state ()] retrieves posts from all users, sorted by date.

     

       134
       408
        
       

     

       135
       135
       -
       val create_jsonfeed :

     

       136
       136
       -
         title:string ->

     

       137
       137
       -
         ?home_page_url:string ->

     

       138
       138
       -
         ?feed_url:string ->

     

       139
       139
       -
         ?description:string ->

     

       140
       140
       -
         ?icon:string ->

     

       141
       141
       -
         ?favicon:string ->

     

       142
       142
       -
         post list ->

     

       143
       143
       -
         Jsonfeed.t

     

       144
       144
       -
       (** [create_jsonfeed ~title ?home_page_url ?feed_url ?description posts]

     

       145
       145
       -
           creates a complete JSONFeed from the given posts.

     

       409
       409
       +
             Returns list of (username, entry) tuples.

     

       410
       410
       +
             @param limit Optional maximum number of posts to return *)

     

       146
       411
        
       

     

       147
       147
       -
           @param title The feed title (required)

     

       148
       148
       -
           @param home_page_url The URL of the website the feed represents

     

       149
       149
       -
           @param feed_url The URL of the feed itself

     

       150
       150
       -
           @param description A description of the feed

     

       151
       151
       -
           @param icon URL of an icon for the feed (512x512 recommended)

     

       152
       152
       -
           @param favicon URL of a favicon for the feed (64x64 recommended)

     

       153
       153
       -
           @param posts The posts to include in the feed *)

     

       412
       412
       +
         (** {2 Export} *)

     

       154
       413
        
       

     

       155
       155
       -
       val jsonfeed_to_string : ?minify:bool -> Jsonfeed.t -> (string, string) result

     

       156
       156
       -
       (** [jsonfeed_to_string ?minify jsonfeed] serializes a JSONFeed to a string.

     

       414
       414
       +
         val export_merged_feed :

     

       415
       415
       +
           t ->

     

       416
       416
       +
           title:string ->

     

       417
       417
       +
           format:[ `Atom | `Jsonfeed ] ->

     

       418
       418
       +
           ?limit:int ->

     

       419
       419
       +
           unit ->

     

       420
       420
       +
           (string, string) result

     

       421
       421
       +
         (** [export_merged_feed state ~title ~format ()] exports a merged feed of all users.

     

       157
       422
        
       

     

       158
       158
       -
           @param minify If true, output compact JSON; if false, pretty-print (default: false) *)

     

       423
       423
       +
             @param title Feed title

     

       424
       424
       +
             @param format Output format

     

       425
       425
       +
             @param limit Optional maximum number of entries *)

     

       159
       426
        
       

     

       160
       160
       -
       type feed_content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel | JSONFeed of Jsonfeed.t

     

       161
       161
       -
       (** The native format of a feed. *)

     

       427
       427
       +
         (** {2 Analysis} *)

     

       162
       428
        
       

     

       163
       163
       -
       val feed_content : feed -> feed_content

     

       164
       164
       -
       (** [feed_content feed] returns the feed in its native format (Atom, RSS2, or JSONFeed).

     

       165
       165
       -
           This allows access to format-specific features like JSONFeed attachments. *)

     

       429
       429
       +
         val analyze_user_quality :

     

       430
       430
       +
           t ->

     

       431
       431
       +
           username:string ->

     

       432
       432
       +
           (Quality.t, string) result

     

       433
       433
       +
         (** [analyze_user_quality state ~username] analyzes quality metrics for a user's feed. *)

     

       434
       434
       +
       end

-656

stack/river/lib/river_store.ml

···

       1
       1
       -
       (*

     

       2
       2
       -
        * Persistent storage for Atom feed entries using Cacheio and Jsonfeed

     

       3
       3
       -
        *)

     

       4
       4
       -
       

     

       5
       5
       -
       let src = Logs.Src.create "river.store" ~doc:"River persistent storage"

     

       6
       6
       -
       module Log = (val Logs.src_log src : Logs.LOG)

     

       7
       7
       -
       

     

       8
       8
       -
       (* Types *)

     

       9
       9
       -
       

     

       10
       10
       -
       (* Storage metadata that extends Jsonfeed.Item via unknown fields *)

     

       11
       11
       -
       type storage_meta = {

     

       12
       12
       -
         feed_url : string;

     

       13
       13
       -
         feed_name : string;

     

       14
       14
       -
         feed_title : string;

     

       15
       15
       -
         stored_at : Ptime.t;

     

       16
       16
       -
       }

     

       17
       17
       -
       

     

       18
       18
       -
       (* A stored entry is a Jsonfeed.Item.t with storage metadata in unknown fields *)

     

       19
       19
       -
       type stored_entry = {

     

       20
       20
       -
         item : Jsonfeed.Item.t;

     

       21
       21
       -
         meta : storage_meta;

     

       22
       22
       -
       }

     

       23
       23
       -
       

     

       24
       24
       -
       (* Stored entry accessors *)

     

       25
       25
       -
       let entry_item entry = entry.item

     

       26
       26
       -
       let entry_feed_url entry = entry.meta.feed_url

     

       27
       27
       -
       let entry_feed_name entry = entry.meta.feed_name

     

       28
       28
       -
       let entry_feed_title entry = entry.meta.feed_title

     

       29
       29
       -
       let entry_stored_at entry = entry.meta.stored_at

     

       30
       30
       -
       

     

       31
       31
       -
       type feed_info = {

     

       32
       32
       -
         url : string;

     

       33
       33
       -
         name : string;

     

       34
       34
       -
         title : string;

     

       35
       35
       -
         last_updated : Ptime.t;

     

       36
       36
       -
         entry_count : int;

     

       37
       37
       -
       }

     

       38
       38
       -
       

     

       39
       39
       -
       type t = {

     

       40
       40
       -
         cache : Cacheio.t;

     

       41
       41
       -
         base_dir : Eio.Fs.dir_ty Eio.Path.t;

     

       42
       42
       -
       }

     

       43
       43
       -
       

     

       44
       44
       -
       (* Helper functions *)

     

       45
       45
       -
       

     

       46
       46
       -
       let make_feed_key feed_url =

     

       47
       47
       -
         (* Use SHA256 hash of feed URL as directory name for safety *)

     

       48
       48
       -
         let hash = Digest.string feed_url |> Digest.to_hex in

     

       49
       49
       -
         "feeds/" ^ hash

     

       50
       50
       -
       

     

       51
       51
       -
       let make_entry_key feed_url atom_id =

     

       52
       52
       -
         (* Store entry under feed directory with atom_id hash *)

     

       53
       53
       -
         let feed_key = make_feed_key feed_url in

     

       54
       54
       -
         let entry_hash = Digest.string atom_id |> Digest.to_hex in

     

       55
       55
       -
         feed_key ^ "/entries/" ^ entry_hash

     

       56
       56
       -
       

     

       57
       57
       -
       let make_feed_meta_key feed_url =

     

       58
       58
       -
         let feed_key = make_feed_key feed_url in

     

       59
       59
       -
         feed_key ^ "/meta.json"

     

       60
       60
       -
       

     

       61
       61
       -
       (* JSON serialization using Jsonfeed and Jsont *)

     

       62
       62
       -
       

     

       63
       63
       -
       (* Storage metadata codec - stores feed info and storage timestamp *)

     

       64
       64
       -
       let storage_meta_jsont : storage_meta Jsont.t =

     

       65
       65
       -
         Jsont.Object.(

     

       66
       66
       -
           map ~kind:"StorageMeta" (fun feed_url feed_name feed_title stored_at : storage_meta ->

     

       67
       67
       -
             { feed_url; feed_name; feed_title; stored_at })

     

       68
       68
       -
           |> mem "x_river_feed_url" Jsont.string ~enc:(fun m -> m.feed_url)

     

       69
       69
       -
           |> mem "x_river_feed_name" Jsont.string ~enc:(fun m -> m.feed_name)

     

       70
       70
       -
           |> mem "x_river_feed_title" Jsont.string ~enc:(fun m -> m.feed_title)

     

       71
       71
       -
           |> mem "x_river_stored_at" Jsonfeed.Rfc3339.jsont ~enc:(fun m -> m.stored_at)

     

       72
       72
       -
           |> finish

     

       73
       73
       -
         )

     

       74
       74
       -
       

     

       75
       75
       -
       (* Codec for feed_info *)

     

       76
       76
       -
       let feed_meta_jsont : feed_info Jsont.t =

     

       77
       77
       -
         Jsont.Object.(

     

       78
       78
       -
           map ~kind:"FeedInfo" (fun url name title last_updated entry_count : feed_info ->

     

       79
       79
       -
             { url; name; title; last_updated; entry_count })

     

       80
       80
       -
           |> mem "url" Jsont.string ~enc:(fun (m : feed_info) -> m.url)

     

       81
       81
       -
           |> mem "name" Jsont.string ~enc:(fun m -> m.name)

     

       82
       82
       -
           |> mem "title" Jsont.string ~enc:(fun m -> m.title)

     

       83
       83
       -
           |> mem "last_updated" Jsonfeed.Rfc3339.jsont ~enc:(fun m -> m.last_updated)

     

       84
       84
       -
           |> mem "entry_count" Jsont.int ~enc:(fun m -> m.entry_count)

     

       85
       85
       -
           |> finish

     

       86
       86
       -
         )

     

       87
       87
       -
       

     

       88
       88
       -
       (* Helper to create item with storage metadata in unknown fields *)

     

       89
       89
       -
       let merge_storage_meta item meta =

     

       90
       90
       -
         let meta_json = Jsont_bytesrw.encode_string' storage_meta_jsont meta

     

       91
       91
       -
                         |> Result.get_ok in

     

       92
       92
       -
         let meta_unknown = Jsont_bytesrw.decode_string' Jsont.json meta_json

     

       93
       93
       -
                            |> Result.get_ok in

     

       94
       94
       -
         Jsonfeed.Item.create

     

       95
       95
       -
           ~id:(Jsonfeed.Item.id item)

     

       96
       96
       -
           ~content:(Jsonfeed.Item.content item)

     

       97
       97
       -
           ?url:(Jsonfeed.Item.url item)

     

       98
       98
       -
           ?external_url:(Jsonfeed.Item.external_url item)

     

       99
       99
       -
           ?title:(Jsonfeed.Item.title item)

     

       100
       100
       -
           ?summary:(Jsonfeed.Item.summary item)

     

       101
       101
       -
           ?image:(Jsonfeed.Item.image item)

     

       102
       102
       -
           ?banner_image:(Jsonfeed.Item.banner_image item)

     

       103
       103
       -
           ?date_published:(Jsonfeed.Item.date_published item)

     

       104
       104
       -
           ?date_modified:(Jsonfeed.Item.date_modified item)

     

       105
       105
       -
           ?authors:(Jsonfeed.Item.authors item)

     

       106
       106
       -
           ?tags:(Jsonfeed.Item.tags item)

     

       107
       107
       -
           ?language:(Jsonfeed.Item.language item)

     

       108
       108
       -
           ?attachments:(Jsonfeed.Item.attachments item)

     

       109
       109
       -
           ?references:(Jsonfeed.Item.references item)

     

       110
       110
       -
           ~unknown:meta_unknown

     

       111
       111
       -
           ()

     

       112
       112
       -
       

     

       113
       113
       -
       (* Helper to extract storage metadata from item's unknown fields *)

     

       114
       114
       -
       let extract_storage_meta item =

     

       115
       115
       -
         let unknown = Jsonfeed.Item.unknown item in

     

       116
       116
       -
         let meta_str = Jsont_bytesrw.encode_string' Jsont.json unknown |> Result.get_ok in

     

       117
       117
       -
         match Jsont_bytesrw.decode_string' storage_meta_jsont meta_str with

     

       118
       118
       -
         | Ok meta -> meta

     

       119
       119
       -
         | Error e -> failwith ("Missing storage metadata: " ^ Jsont.Error.to_string e)

     

       120
       120
       -
       

     

       121
       121
       -
       (* Stored entry codec - just wraps Jsonfeed.Item.jsont *)

     

       122
       122
       -
       let stored_entry_jsont : stored_entry Jsont.t =

     

       123
       123
       -
         let kind = "StoredEntry" in

     

       124
       124
       -
         let of_string s =

     

       125
       125
       -
           match Jsont_bytesrw.decode_string' Jsonfeed.Item.jsont s with

     

       126
       126
       -
           | Ok item -> Ok { item; meta = extract_storage_meta item }

     

       127
       127
       -
           | Error e -> Error (Jsont.Error.to_string e)

     

       128
       128
       -
         in

     

       129
       129
       -
         let enc entry =

     

       130
       130
       -
           let item_with_meta = merge_storage_meta entry.item entry.meta in

     

       131
       131
       -
           match Jsont_bytesrw.encode_string' Jsonfeed.Item.jsont item_with_meta with

     

       132
       132
       -
           | Ok s -> s

     

       133
       133
       -
           | Error e -> failwith ("Failed to encode: " ^ Jsont.Error.to_string e)

     

       134
       134
       -
         in

     

       135
       135
       -
         Jsont.of_of_string ~kind of_string ~enc

     

       136
       136
       -
       

     

       137
       137
       -
       (* Encode/decode functions *)

     

       138
       138
       -
       let entry_to_string entry =

     

       139
       139
       -
         match Jsont_bytesrw.encode_string' stored_entry_jsont entry with

     

       140
       140
       -
         | Ok s -> s

     

       141
       141
       -
         | Error err -> failwith ("Failed to encode entry: " ^ Jsont.Error.to_string err)

     

       142
       142
       -
       

     

       143
       143
       -
       let entry_of_string s =

     

       144
       144
       -
         match Jsont_bytesrw.decode_string' stored_entry_jsont s with

     

       145
       145
       -
         | Ok entry -> entry

     

       146
       146
       -
         | Error err -> failwith ("Failed to parse entry: " ^ Jsont.Error.to_string err)

     

       147
       147
       -
       

     

       148
       148
       -
       let feed_meta_to_string meta =

     

       149
       149
       -
         match Jsont_bytesrw.encode_string' feed_meta_jsont meta with

     

       150
       150
       -
         | Ok s -> s

     

       151
       151
       -
         | Error err -> failwith ("Failed to encode feed metadata: " ^ Jsont.Error.to_string err)

     

       152
       152
       -
       

     

       153
       153
       -
       let feed_meta_of_string s =

     

       154
       154
       -
         match Jsont_bytesrw.decode_string' feed_meta_jsont s with

     

       155
       155
       -
         | Ok meta -> meta

     

       156
       156
       -
         | Error err -> failwith ("Failed to parse feed metadata: " ^ Jsont.Error.to_string err)

     

       157
       157
       -
       

     

       158
       158
       -
       (* Store creation *)

     

       159
       159
       -
       

     

       160
       160
       -
       let create ~base_dir =

     

       161
       161
       -
         let cache_dir = Eio.Path.(base_dir / "river_store") in

     

       162
       162
       -
         (try

     

       163
       163
       -
           Eio.Path.mkdir ~perm:0o755 cache_dir

     

       164
       164
       -
         with Eio.Io (Eio.Fs.E (Already_exists _), _) -> ());

     

       165
       165
       -
         let cache = Cacheio.create ~base_dir:cache_dir in

     

       166
       166
       -
         Log.info (fun m -> m "Created River store at %a" Eio.Path.pp cache_dir);

     

       167
       167
       -
         { cache; base_dir = cache_dir }

     

       168
       168
       -
       

     

       169
       169
       -
       let create_with_xdge xdge =

     

       170
       170
       -
         let cache = Cacheio.create_with_xdge xdge in

     

       171
       171
       -
         let base_dir = Eio.Path.( / ) (Xdge.cache_dir xdge) "river_store" in

     

       172
       172
       -
         Log.info (fun m -> m "Created River store with XDG at %a" Eio.Path.pp base_dir);

     

       173
       173
       -
         { cache; base_dir }

     

       174
       174
       -
       

     

       175
       175
       -
       (* Convert Post.t to Jsonfeed.Item.t *)

     

       176
       176
       -
       let item_of_post ~feed_url ~feed_name ~feed_title (post : Post.t) =

     

       177
       177
       -
         let content =

     

       178
       178
       -
           let html = Soup.to_string post.content in

     

       179
       179
       -
           `Html html

     

       180
       180
       -
         in

     

       181
       181
       -
         let url = Option.map Uri.to_string post.link in

     

       182
       182
       -
         let authors =

     

       183
       183
       -
           if post.author = "" then None

     

       184
       184
       -
           else Some [Jsonfeed.Author.create ~name:post.author ()]

     

       185
       185
       -
         in

     

       186
       186
       -
         let tags = if post.tags = [] then None else Some post.tags in

     

       187
       187
       -
         let item = Jsonfeed.Item.create

     

       188
       188
       -
           ~id:post.id

     

       189
       189
       -
           ~content

     

       190
       190
       -
           ?url

     

       191
       191
       -
           ?title:(if post.title = "" then None else Some post.title)

     

       192
       192
       -
           ?summary:post.summary

     

       193
       193
       -
           ?date_published:post.date

     

       194
       194
       -
           ?date_modified:post.date

     

       195
       195
       -
           ?authors

     

       196
       196
       -
           ?tags

     

       197
       197
       -
           ()

     

       198
       198
       -
         in

     

       199
       199
       -
         let meta = {

     

       200
       200
       -
           feed_url;

     

       201
       201
       -
           feed_name;

     

       202
       202
       -
           feed_title;

     

       203
       203
       -
           stored_at = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get;

     

       204
       204
       -
         } in

     

       205
       205
       -
         { item; meta }

     

       206
       206
       -
       

     

       207
       207
       -
       (* Convert Syndic.Atom.entry to Jsonfeed.Item.t *)

     

       208
       208
       -
       let item_of_atom ~feed_url ~feed_name ~feed_title (atom_entry : Syndic.Atom.entry) =

     

       209
       209
       -
         let atom_id = Uri.to_string atom_entry.id in

     

       210
       210
       -
         let date_modified = atom_entry.updated in

     

       211
       211
       -
         let date_published = match atom_entry.published with

     

       212
       212
       -
           | Some p -> Some p

     

       213
       213
       -
           | None -> Some atom_entry.updated

     

       214
       214
       -
         in

     

       215
       215
       -
         (* Extract content *)

     

       216
       216
       -
         let content_html = match atom_entry.content with

     

       217
       217
       -
           | Some (Syndic.Atom.Text s) -> Some s

     

       218
       218
       -
           | Some (Syndic.Atom.Html (_, s)) -> Some s

     

       219
       219
       -
           | Some (Syndic.Atom.Xhtml (_, nodes)) ->

     

       220
       220
       -
               let ns_prefix _ = Some "" in

     

       221
       221
       -
               Some (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) nodes))

     

       222
       222
       -
           | Some (Syndic.Atom.Mime _) | Some (Syndic.Atom.Src _) | None -> None

     

       223
       223
       -
         in

     

       224
       224
       -
         let content_text = match atom_entry.summary with

     

       225
       225
       -
           | Some s -> Some (Util.string_of_text_construct s)

     

       226
       226
       -
           | None -> None

     

       227
       227
       -
         in

     

       228
       228
       -
         let content = match content_html, content_text with

     

       229
       229
       -
           | Some h, Some t -> `Both (h, t)

     

       230
       230
       -
           | Some h, None -> `Html h

     

       231
       231
       -
           | None, Some t -> `Text t

     

       232
       232
       -
           | None, None -> `Text "" (* Fallback *)

     

       233
       233
       -
         in

     

       234
       234
       -
         let url = try

     

       235
       235
       -
           Some (Uri.to_string (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) atom_entry.links).href)

     

       236
       236
       -
         with Not_found ->

     

       237
       237
       -
           match atom_entry.links with

     

       238
       238
       -
           | l :: _ -> Some (Uri.to_string l.href)

     

       239
       239
       -
           | [] -> None

     

       240
       240
       -
         in

     

       241
       241
       -
         let tags =

     

       242
       242
       -
           let cat_tags = List.map (fun cat -> cat.Syndic.Atom.term) atom_entry.categories in

     

       243
       243
       -
           if cat_tags = [] then None else Some cat_tags

     

       244
       244
       -
         in

     

       245
       245
       -
         let summary = match atom_entry.summary with

     

       246
       246
       -
           | Some s -> Some (Util.string_of_text_construct s)

     

       247
       247
       -
           | None -> None

     

       248
       248
       -
         in

     

       249
       249
       -
         let item = Jsonfeed.Item.create

     

       250
       250
       -
           ~id:atom_id

     

       251
       251
       -
           ~content

     

       252
       252
       -
           ?url

     

       253
       253
       -
           ~title:(Util.string_of_text_construct atom_entry.title)

     

       254
       254
       -
           ?summary

     

       255
       255
       -
           ?date_published

     

       256
       256
       -
           ~date_modified

     

       257
       257
       -
           ?tags

     

       258
       258
       -
           ()

     

       259
       259
       -
         in

     

       260
       260
       -
         let meta = {

     

       261
       261
       -
           feed_url;

     

       262
       262
       -
           feed_name;

     

       263
       263
       -
           feed_title;

     

       264
       264
       -
           stored_at = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get;

     

       265
       265
       -
         } in

     

       266
       266
       -
         { item; meta }

     

       267
       267
       -
       

     

       268
       268
       -
       (* Feed metadata management *)

     

       269
       269
       -
       let update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw:_ =

     

       270
       270
       -
         let key = make_feed_meta_key feed_url in

     

       271
       271
       -
         let meta = {

     

       272
       272
       -
           url = feed_url;

     

       273
       273
       -
           name = feed_name;

     

       274
       274
       -
           title = feed_title;

     

       275
       275
       -
           last_updated = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get;

     

       276
       276
       -
           entry_count = 0;

     

       277
       277
       -
         } in

     

       278
       278
       -
         let json_str = feed_meta_to_string meta in

     

       279
       279
       -
         let source = Eio.Flow.string_source json_str in

     

       280
       280
       -
         Cacheio.put store.cache ~key ~source ~ttl:None ();

     

       281
       281
       -
         Log.debug (fun m -> m "Updated feed metadata for %s" feed_url)

     

       282
       282
       -
       

     

       283
       283
       -
       let get_feed_meta store ~feed_url ~sw =

     

       284
       284
       -
         let key = make_feed_meta_key feed_url in

     

       285
       285
       -
         match Cacheio.get store.cache ~key ~sw with

     

       286
       286
       -
         | None -> None

     

       287
       287
       -
         | Some source ->

     

       288
       288
       -
             try

     

       289
       289
       -
               let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in

     

       290
       290
       -
               Some (feed_meta_of_string json_str)

     

       291
       291
       -
             with e ->

     

       292
       292
       -
               Log.err (fun m -> m "Failed to parse feed metadata: %s" (Printexc.to_string e));

     

       293
       293
       -
               None

     

       294
       294
       -
       

     

       295
       295
       -
       (* Entry storage *)

     

       296
       296
       -
       

     

       297
       297
       -
       let store_entry store ~feed_url ~feed_name ~feed_title ~post ~sw =

     

       298
       298
       -
         let entry = item_of_post ~feed_url ~feed_name ~feed_title post in

     

       299
       299
       -
         let key = make_entry_key feed_url (Jsonfeed.Item.id entry.item) in

     

       300
       300
       -
         let json_str = entry_to_string entry in

     

       301
       301
       -
         let source = Eio.Flow.string_source json_str in

     

       302
       302
       -
         Cacheio.put store.cache ~key ~source ~ttl:None ();

     

       303
       303
       -
         Log.debug (fun m -> m "Stored entry %s for feed %s" (Jsonfeed.Item.id entry.item) feed_url);

     

       304
       304
       -
         (* Update feed metadata *)

     

       305
       305
       -
         update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw

     

       306
       306
       -
       

     

       307
       307
       -
       let store_posts store ~feed_url ~feed_name ~feed_title ~posts ~sw =

     

       308
       308
       -
         Log.info (fun m -> m "Storing %d posts for feed %s" (List.length posts) feed_url);

     

       309
       309
       -
         List.iter (fun post ->

     

       310
       310
       -
           store_entry store ~feed_url ~feed_name ~feed_title ~post ~sw

     

       311
       311
       -
         ) posts;

     

       312
       312
       -
         Log.info (fun m -> m "Stored %d entries for feed %s" (List.length posts) feed_url)

     

       313
       313
       -
       

     

       314
       314
       -
       let store_atom_entries store ~feed_url ~feed_name ~feed_title ~entries ~sw =

     

       315
       315
       -
         Log.info (fun m -> m "Storing %d Atom entries for feed %s" (List.length entries) feed_url);

     

       316
       316
       -
         List.iter (fun atom_entry ->

     

       317
       317
       -
           let entry = item_of_atom ~feed_url ~feed_name ~feed_title atom_entry in

     

       318
       318
       -
           let key = make_entry_key feed_url (Jsonfeed.Item.id entry.item) in

     

       319
       319
       -
           let json_str = entry_to_string entry in

     

       320
       320
       -
           let source = Eio.Flow.string_source json_str in

     

       321
       321
       -
           Cacheio.put store.cache ~key ~source ~ttl:None ();

     

       322
       322
       -
           Log.debug (fun m -> m "Stored Atom entry %s" (Jsonfeed.Item.id entry.item));

     

       323
       323
       -
         ) entries;

     

       324
       324
       -
         update_feed_meta store ~feed_url ~feed_name ~feed_title ~sw;

     

       325
       325
       -
         Log.info (fun m -> m "Stored %d Atom entries for feed %s" (List.length entries) feed_url)

     

       326
       326
       -
       

     

       327
       327
       -
       (* Entry retrieval *)

     

       328
       328
       -
       

     

       329
       329
       -
       let get_entry store ~feed_url ~atom_id ~sw =

     

       330
       330
       -
         let key = make_entry_key feed_url atom_id in

     

       331
       331
       -
         match Cacheio.get store.cache ~key ~sw with

     

       332
       332
       -
         | None -> None

     

       333
       333
       -
         | Some source ->

     

       334
       334
       -
             try

     

       335
       335
       -
               let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in

     

       336
       336
       -
               Some (entry_of_string json_str)

     

       337
       337
       -
             with e ->

     

       338
       338
       -
               Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e));

     

       339
       339
       -
               None

     

       340
       340
       -
       

     

       341
       341
       -
       let list_entries store ~feed_url =

     

       342
       342
       -
         let feed_key = make_feed_key feed_url in

     

       343
       343
       -
         let prefix = feed_key ^ "/entries/" in

     

       344
       344
       -
         let entries = Cacheio.scan store.cache in

     

       345
       345
       -
         let feed_entries = List.filter_map (fun (cache_entry : Cacheio.Entry.t) ->

     

       346
       346
       -
           let key = Cacheio.Entry.key cache_entry in

     

       347
       347
       -
           if String.starts_with ~prefix key then

     

       348
       348
       -
             Eio.Switch.run @@ fun sw ->

     

       349
       349
       -
             match Cacheio.get store.cache ~key ~sw with

     

       350
       350
       -
             | None -> None

     

       351
       351
       -
             | Some source ->

     

       352
       352
       -
                 try

     

       353
       353
       -
                   let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in

     

       354
       354
       -
                   Some (entry_of_string json_str)

     

       355
       355
       -
                 with e ->

     

       356
       356
       -
                   Log.err (fun m -> m "Failed to parse entry from scan: %s" (Printexc.to_string e));

     

       357
       357
       -
                   None

     

       358
       358
       -
           else None

     

       359
       359
       -
         ) entries in

     

       360
       360
       -
         (* Sort by date_modified, newest first *)

     

       361
       361
       -
         List.sort (fun a b ->

     

       362
       362
       -
           let time_a = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in

     

       363
       363
       -
           let time_b = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in

     

       364
       364
       -
           Ptime.compare time_b time_a

     

       365
       365
       -
         ) feed_entries

     

       366
       366
       -
       

     

       367
       367
       -
       let list_entries_filtered store ~feed_url ?since ?until ?limit ?(sort=`Updated) () =

     

       368
       368
       -
         let entries = list_entries store ~feed_url in

     

       369
       369
       -
         (* Filter by time *)

     

       370
       370
       -
         let entries = match since with

     

       371
       371
       -
           | None -> entries

     

       372
       372
       -
           | Some t -> List.filter (fun e ->

     

       373
       373
       -
               let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in

     

       374
       374
       -
               Ptime.is_later time ~than:t || Ptime.equal time t) entries

     

       375
       375
       -
         in

     

       376
       376
       -
         let entries = match until with

     

       377
       377
       -
           | None -> entries

     

       378
       378
       -
           | Some t -> List.filter (fun e ->

     

       379
       379
       -
               let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in

     

       380
       380
       -
               Ptime.is_earlier time ~than:t || Ptime.equal time t) entries

     

       381
       381
       -
         in

     

       382
       382
       -
         (* Sort *)

     

       383
       383
       -
         let entries = match sort with

     

       384
       384
       -
           | `Published -> List.sort (fun a b ->

     

       385
       385
       -
               let pa = Jsonfeed.Item.date_published a.item in

     

       386
       386
       -
               let pb = Jsonfeed.Item.date_published b.item in

     

       387
       387
       -
               match pa, pb with

     

       388
       388
       -
               | Some ta, Some tb -> Ptime.compare tb ta

     

       389
       389
       -
               | None, Some _ -> 1

     

       390
       390
       -
               | Some _, None -> -1

     

       391
       391
       -
               | None, None ->

     

       392
       392
       -
                   let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in

     

       393
       393
       -
                   let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in

     

       394
       394
       -
                   Ptime.compare tb ta

     

       395
       395
       -
             ) entries

     

       396
       396
       -
           | `Updated -> List.sort (fun a b ->

     

       397
       397
       -
               let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in

     

       398
       398
       -
               let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in

     

       399
       399
       -
               Ptime.compare tb ta

     

       400
       400
       -
             ) entries

     

       401
       401
       -
           | `Stored -> List.sort (fun a b -> Ptime.compare b.meta.stored_at a.meta.stored_at) entries

     

       402
       402
       -
         in

     

       403
       403
       -
         (* Limit *)

     

       404
       404
       -
         match limit with

     

       405
       405
       -
         | None -> entries

     

       406
       406
       -
         | Some n -> List.filteri (fun i _ -> i < n) entries

     

       407
       407
       -
       

     

       408
       408
       -
       let exists_entry store ~feed_url ~atom_id =

     

       409
       409
       -
         let key = make_entry_key feed_url atom_id in

     

       410
       410
       -
         Cacheio.exists store.cache ~key

     

       411
       411
       -
       

     

       412
       412
       -
       let get_recent_entries store ?(limit=50) () =

     

       413
       413
       -
         let entries = Cacheio.scan store.cache in

     

       414
       414
       -
         let all_entries = List.filter_map (fun (cache_entry : Cacheio.Entry.t) ->

     

       415
       415
       -
           let key = Cacheio.Entry.key cache_entry in

     

       416
       416
       -
           if String.contains key '/' &&

     

       417
       417
       -
              String.ends_with ~suffix:"entries/" (String.sub key 0 (String.rindex key '/') ^ "/") then

     

       418
       418
       -
             Eio.Switch.run @@ fun sw ->

     

       419
       419
       -
             match Cacheio.get store.cache ~key ~sw with

     

       420
       420
       -
             | None -> None

     

       421
       421
       -
             | Some source ->

     

       422
       422
       -
                 try

     

       423
       423
       -
                   let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in

     

       424
       424
       -
                   Some (entry_of_string json_str)

     

       425
       425
       -
                 with e ->

     

       426
       426
       -
                   Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e));

     

       427
       427
       -
                   None

     

       428
       428
       -
           else None

     

       429
       429
       -
         ) entries in

     

       430
       430
       -
         let sorted = List.sort (fun a b ->

     

       431
       431
       -
           let ta = Jsonfeed.Item.date_modified a.item |> Option.value ~default:a.meta.stored_at in

     

       432
       432
       -
           let tb = Jsonfeed.Item.date_modified b.item |> Option.value ~default:b.meta.stored_at in

     

       433
       433
       -
           Ptime.compare tb ta

     

       434
       434
       -
         ) all_entries in

     

       435
       435
       -
         List.filteri (fun i _ -> i < limit) sorted

     

       436
       436
       -
       

     

       437
       437
       -
       let find_entry_by_id store ~id =

     

       438
       438
       -
         Log.debug (fun m -> m "Searching for entry with ID: %s" id);

     

       439
       439
       -
         let entries = Cacheio.scan store.cache in

     

       440
       440
       -
         let matching_entry = List.find_map (fun (cache_entry : Cacheio.Entry.t) ->

     

       441
       441
       -
           let key = Cacheio.Entry.key cache_entry in

     

       442
       442
       -
           if String.contains key '/' &&

     

       443
       443
       -
              String.ends_with ~suffix:"entries/" (String.sub key 0 (String.rindex key '/') ^ "/") then

     

       444
       444
       -
             Eio.Switch.run @@ fun sw ->

     

       445
       445
       -
             match Cacheio.get store.cache ~key ~sw with

     

       446
       446
       -
             | None -> None

     

       447
       447
       -
             | Some source ->

     

       448
       448
       -
                 (try

     

       449
       449
       -
                   let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in

     

       450
       450
       -
                   let entry = entry_of_string json_str in

     

       451
       451
       -
                   (* Exact ID match only *)

     

       452
       452
       -
                   if Jsonfeed.Item.id entry.item = id then

     

       453
       453
       -
                     Some entry

     

       454
       454
       -
                   else

     

       455
       455
       -
                     None

     

       456
       456
       -
                 with e ->

     

       457
       457
       -
                   Log.err (fun m -> m "Failed to parse entry: %s" (Printexc.to_string e));

     

       458
       458
       -
                   None)

     

       459
       459
       -
           else None

     

       460
       460
       -
         ) entries in

     

       461
       461
       -
         (match matching_entry with

     

       462
       462
       -
          | Some e -> Log.debug (fun m -> m "Found entry: %s"

     

       463
       463
       -
                                   (Jsonfeed.Item.title e.item |> Option.value ~default:"(no title)"))

     

       464
       464
       -
          | None -> Log.debug (fun m -> m "No entry found with ID: %s" id));

     

       465
       465
       -
         matching_entry

     

       466
       466
       -
       

     

       467
       467
       -
       (* Entry management *)

     

       468
       468
       -
       

     

       469
       469
       -
       let delete_entry store ~feed_url ~atom_id =

     

       470
       470
       -
         let key = make_entry_key feed_url atom_id in

     

       471
       471
       -
         Cacheio.delete store.cache ~key;

     

       472
       472
       -
         Log.info (fun m -> m "Deleted entry %s from feed %s" atom_id feed_url)

     

       473
       473
       -
       

     

       474
       474
       -
       let delete_feed store ~feed_url =

     

       475
       475
       -
         let feed_key = make_feed_key feed_url in

     

       476
       476
       -
         let prefix = feed_key ^ "/" in

     

       477
       477
       -
         let entries = Cacheio.scan store.cache in

     

       478
       478
       -
         let count = ref 0 in

     

       479
       479
       -
         List.iter (fun (cache_entry : Cacheio.Entry.t) ->

     

       480
       480
       -
           let key = Cacheio.Entry.key cache_entry in

     

       481
       481
       -
           if String.starts_with ~prefix key then begin

     

       482
       482
       -
             Cacheio.delete store.cache ~key;

     

       483
       483
       -
             incr count

     

       484
       484
       -
           end

     

       485
       485
       -
         ) entries;

     

       486
       486
       -
         Log.info (fun m -> m "Deleted feed %s (%d entries)" feed_url !count)

     

       487
       487
       -
       

     

       488
       488
       -
       let prune_entries store ~feed_url ~keep =

     

       489
       489
       -
         let entries = list_entries store ~feed_url in

     

       490
       490
       -
         let to_delete = List.filteri (fun i _ -> i >= keep) entries in

     

       491
       491
       -
         List.iter (fun entry ->

     

       492
       492
       -
           delete_entry store ~feed_url ~atom_id:(Jsonfeed.Item.id entry.item)

     

       493
       493
       -
         ) to_delete;

     

       494
       494
       -
         let deleted = List.length to_delete in

     

       495
       495
       -
         Log.info (fun m -> m "Pruned %d entries from feed %s (kept %d)" deleted feed_url keep);

     

       496
       496
       -
         deleted

     

       497
       497
       -
       

     

       498
       498
       -
       let prune_old_entries store ~feed_url ~older_than =

     

       499
       499
       -
         let entries = list_entries store ~feed_url in

     

       500
       500
       -
         let to_delete = List.filter (fun e ->

     

       501
       501
       -
           let time = Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at in

     

       502
       502
       -
           Ptime.is_earlier time ~than:older_than

     

       503
       503
       -
         ) entries in

     

       504
       504
       -
         List.iter (fun entry ->

     

       505
       505
       -
           delete_entry store ~feed_url ~atom_id:(Jsonfeed.Item.id entry.item)

     

       506
       506
       -
         ) to_delete;

     

       507
       507
       -
         let deleted = List.length to_delete in

     

       508
       508
       -
         Log.info (fun m -> m "Pruned %d old entries from feed %s" deleted feed_url);

     

       509
       509
       -
         deleted

     

       510
       510
       -
       

     

       511
       511
       -
       (* Feed information *)

     

       512
       512
       -
       

     

       513
       513
       -
       let list_feeds store =

     

       514
       514
       -
         let feed_entries = Cacheio.scan store.cache in

     

       515
       515
       -
         let feed_metas = List.filter_map (fun (cache_entry : Cacheio.Entry.t) ->

     

       516
       516
       -
           let key = Cacheio.Entry.key cache_entry in

     

       517
       517
       -
           if String.ends_with ~suffix:"/meta.json" key then

     

       518
       518
       -
             Eio.Switch.run @@ fun sw ->

     

       519
       519
       -
             match Cacheio.get store.cache ~key ~sw with

     

       520
       520
       -
             | None -> None

     

       521
       521
       -
             | Some source ->

     

       522
       522
       -
                 try

     

       523
       523
       -
                   let json_str = Eio.Buf_read.(parse_exn take_all) source ~max_size:Int.max_int in

     

       524
       524
       -
                   Some (feed_meta_of_string json_str)

     

       525
       525
       -
                 with e ->

     

       526
       526
       -
                   Log.err (fun m -> m "Failed to parse feed metadata: %s" (Printexc.to_string e));

     

       527
       527
       -
                   None

     

       528
       528
       -
           else None

     

       529
       529
       -
         ) feed_entries in

     

       530
       530
       -
         (* Count entries for each feed *)

     

       531
       531
       -
         List.map (fun meta ->

     

       532
       532
       -
           let entries = list_entries store ~feed_url:meta.url in

     

       533
       533
       -
           { meta with entry_count = List.length entries }

     

       534
       534
       -
         ) feed_metas

     

       535
       535
       -
       

     

       536
       536
       -
       let get_feed_info store ~feed_url =

     

       537
       537
       -
         Eio.Switch.run @@ fun sw ->

     

       538
       538
       -
         match get_feed_meta store ~feed_url ~sw with

     

       539
       539
       -
         | None -> None

     

       540
       540
       -
         | Some meta ->

     

       541
       541
       -
             let entries = list_entries store ~feed_url in

     

       542
       542
       -
             Some { meta with entry_count = List.length entries }

     

       543
       543
       -
       

     

       544
       544
       -
       let stats store =

     

       545
       545
       -
         Cacheio.stats store.cache

     

       546
       546
       -
       

     

       547
       547
       -
       (* Maintenance *)

     

       548
       548
       -
       

     

       549
       549
       -
       let expire store =

     

       550
       550
       -
         Cacheio.expire store.cache

     

       551
       551
       -
       

     

       552
       552
       -
       let compact _store =

     

       553
       553
       -
         (* TODO: Implement compaction logic *)

     

       554
       554
       -
         Log.info (fun m -> m "Compaction not yet implemented")

     

       555
       555
       -
       

     

       556
       556
       -
       (* Export/Import *)

     

       557
       557
       -
       

     

       558
       558
       -
       let export_to_atom store ~feed_url ?title ?limit () =

     

       559
       559
       -
         let entries = match limit with

     

       560
       560
       -
           | None -> list_entries store ~feed_url

     

       561
       561
       -
           | Some n -> list_entries_filtered store ~feed_url ~limit:n ()

     

       562
       562
       -
         in

     

       563
       563
       -
         let atom_entries = List.map (fun entry ->

     

       564
       564
       -
           let item = entry.item in

     

       565
       565
       -
           let id = Uri.of_string (Jsonfeed.Item.id item) in

     

       566
       566
       -
           let entry_title : Syndic.Atom.text_construct =

     

       567
       567
       -
             Syndic.Atom.Text (Jsonfeed.Item.title item |> Option.value ~default:"(no title)") in

     

       568
       568
       -
           let links = match Jsonfeed.Item.url item with

     

       569
       569
       -
             | Some url_str -> [Syndic.Atom.link ~rel:Syndic.Atom.Alternate (Uri.of_string url_str)]

     

       570
       570
       -
             | None -> []

     

       571
       571
       -
           in

     

       572
       572
       -
           let content_str = match Jsonfeed.Item.content item with

     

       573
       573
       -
             | `Html h -> h

     

       574
       574
       -
             | `Text t -> t

     

       575
       575
       -
             | `Both (h, _) -> h

     

       576
       576
       -
           in

     

       577
       577
       -
           let entry_content : Syndic.Atom.content = Syndic.Atom.Html (None, content_str) in

     

       578
       578
       -
           let author_name = match Jsonfeed.Item.authors item with

     

       579
       579
       -
             | Some (a :: _) -> Jsonfeed.Author.name a |> Option.value ~default:entry.meta.feed_name

     

       580
       580
       -
             | _ -> entry.meta.feed_name

     

       581
       581
       -
           in

     

       582
       582
       -
           let author = Syndic.Atom.author author_name in

     

       583
       583
       -
           let authors = (author, []) in

     

       584
       584
       -
           let updated = Jsonfeed.Item.date_modified item |> Option.value ~default:entry.meta.stored_at in

     

       585
       585
       -
           Syndic.Atom.entry ~id ~title:entry_title ~updated

     

       586
       586
       -
             ?published:(Jsonfeed.Item.date_published item)

     

       587
       587
       -
             ~links ~content:entry_content ~authors ()

     

       588
       588
       -
         ) entries in

     

       589
       589
       -
         let feed_title : Syndic.Atom.text_construct = match title with

     

       590
       590
       -
           | Some t -> Syndic.Atom.Text t

     

       591
       591
       -
           | None -> Syndic.Atom.Text ("Archive: " ^ feed_url)

     

       592
       592
       -
         in

     

       593
       593
       -
         let feed_id = Uri.of_string ("urn:river:archive:" ^ (Digest.string feed_url |> Digest.to_hex)) in

     

       594
       594
       -
         let feed_updated = match entries with

     

       595
       595
       -
           | [] -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get

     

       596
       596
       -
           | e :: _ -> Jsonfeed.Item.date_modified e.item |> Option.value ~default:e.meta.stored_at

     

       597
       597
       -
         in

     

       598
       598
       -
         {

     

       599
       599
       -
           Syndic.Atom.id = feed_id;

     

       600
       600
       -
           title = feed_title;

     

       601
       601
       -
           updated = feed_updated;

     

       602
       602
       -
           entries = atom_entries;

     

       603
       603
       -
           authors = [];

     

       604
       604
       -
           categories = [];

     

       605
       605
       -
           contributors = [];

     

       606
       606
       -
           generator = Some {

     

       607
       607
       -
             Syndic.Atom.version = Some "1.0";

     

       608
       608
       -
             uri = None;

     

       609
       609
       -
             content = "River Store";

     

       610
       610
       -
           };

     

       611
       611
       -
           icon = None;

     

       612
       612
       -
           links = [];

     

       613
       613
       -
           logo = None;

     

       614
       614
       -
           rights = None;

     

       615
       615
       -
           subtitle = None;

     

       616
       616
       -
         }

     

       617
       617
       -
       

     

       618
       618
       -
       let import_from_atom store ~feed_url ~feed_name ~feed ~sw =

     

       619
       619
       -
         let entries = feed.Syndic.Atom.entries in

     

       620
       620
       -
         store_atom_entries store ~feed_url ~feed_name ~feed_title:(Util.string_of_text_construct feed.title) ~entries ~sw;

     

       621
       621
       -
         List.length entries

     

       622
       622
       -
       

     

       623
       623
       -
       (* Pretty printing *)

     

       624
       624
       -
       

     

       625
       625
       -
       let pp_entry fmt entry =

     

       626
       626
       -
         let item = entry.item in

     

       627
       627
       -
         Format.fprintf fmt "@[<v 2>Entry:@,";

     

       628
       628
       -
         Format.fprintf fmt "ID: %s@," (Jsonfeed.Item.id item);

     

       629
       629
       -
         Format.fprintf fmt "Title: %s@," (Jsonfeed.Item.title item |> Option.value ~default:"(no title)");

     

       630
       630
       -
         Format.fprintf fmt "URL: %s@," (Jsonfeed.Item.url item |> Option.value ~default:"(none)");

     

       631
       631
       -
         (match Jsonfeed.Item.date_published item with

     

       632
       632
       -
          | Some t -> Format.fprintf fmt "Published: %s@," (Ptime.to_rfc3339 t)

     

       633
       633
       -
          | None -> ());

     

       634
       634
       -
         (match Jsonfeed.Item.date_modified item with

     

       635
       635
       -
          | Some t -> Format.fprintf fmt "Modified: %s@," (Ptime.to_rfc3339 t)

     

       636
       636
       -
          | None -> ());

     

       637
       637
       -
         Format.fprintf fmt "Feed: %s (%s)@," entry.meta.feed_name entry.meta.feed_url;

     

       638
       638
       -
         Format.fprintf fmt "Stored: %s@]" (Ptime.to_rfc3339 entry.meta.stored_at)

     

       639
       639
       -
       

     

       640
       640
       -
       let pp_feed_info fmt info =

     

       641
       641
       -
         Format.fprintf fmt "@[<v 2>Feed:@,";

     

       642
       642
       -
         Format.fprintf fmt "Name: %s@," info.name;

     

       643
       643
       -
         Format.fprintf fmt "Title: %s@," info.title;

     

       644
       644
       -
         Format.fprintf fmt "URL: %s@," info.url;

     

       645
       645
       -
         Format.fprintf fmt "Last updated: %s@," (Ptime.to_rfc3339 info.last_updated);

     

       646
       646
       -
         Format.fprintf fmt "Entries: %d@]" info.entry_count

     

       647
       647
       -
       

     

       648
       648
       -
       let pp fmt store =

     

       649
       649
       -
         let feeds = list_feeds store in

     

       650
       650
       -
         Format.fprintf fmt "@[<v 2>River Store:@,";

     

       651
       651
       -
         Format.fprintf fmt "Base dir: %a@," Eio.Path.pp store.base_dir;

     

       652
       652
       -
         Format.fprintf fmt "Feeds: %d@," (List.length feeds);

     

       653
       653
       -
         List.iter (fun feed ->

     

       654
       654
       -
           Format.fprintf fmt "  - %s: %d entries@," feed.name feed.entry_count

     

       655
       655
       -
         ) feeds;

     

       656
       656
       -
         Format.fprintf fmt "@]"

-218

stack/river/lib/river_store.mli

···

       1
       1
       -
       (** Persistent storage for Atom feed entries using Cacheio

     

       2
       2
       -
       

     

       3
       3
       -
           River_store provides a persistent, per-feed storage system for Atom entries,

     

       4
       4
       -
           enabling long-term archival of feed items that may have expired upstream.

     

       5
       5
       -
       

     

       6
       6
       -
           {2 Key Features}

     

       7
       7
       -
       

     

       8
       8
       -
           - {b Per-feed storage}: Each feed's entries stored independently

     

       9
       9
       -
           - {b Atom ID keying}: Entries keyed by their unique Atom ID

     

       10
       10
       -
           - {b URL resolution}: Resolves all URLs relative to feed base URI

     

       11
       11
       -
           - {b Persistent caching}: Built on Cacheio for reliable file storage

     

       12
       12
       -
           - {b Entry management}: List, update, delete, and prune operations

     

       13
       13
       -
           - {b Metadata tracking}: Stores feed source, timestamps, and relationships

     

       14
       14
       -
       

     

       15
       15
       -
           {2 Usage Example}

     

       16
       16
       -
       

     

       17
       17
       -
           {[

     

       18
       18
       -
           let store = River_store.create ~base_dir:store_dir in

     

       19
       19
       -
       

     

       20
       20
       -
           (* Store entries from a feed *)

     

       21
       21
       -
           Eio.Switch.run @@ fun sw ->

     

       22
       22
       -
             let feed = River.fetch env source in

     

       23
       23
       -
             let posts = River.posts [feed] in

     

       24
       24
       -
             River_store.store_posts store ~feed_url:source.url ~posts ~sw ();

     

       25
       25
       -
       

     

       26
       26
       -
           (* List entries for a feed *)

     

       27
       27
       -
           let entries = River_store.list_entries store ~feed_url:source.url in

     

       28
       28
       -
           List.iter (fun entry ->

     

       29
       29
       -
             Printf.printf "%s: %s\n" entry.atom_id entry.title

     

       30
       30
       -
           ) entries;

     

       31
       31
       -
       

     

       32
       32
       -
           (* Get a specific entry *)

     

       33
       33
       -
           match River_store.get_entry store ~feed_url:source.url ~atom_id:"..." ~sw with

     

       34
       34
       -
           | Some entry -> (* Use entry *)

     

       35
       35
       -
           | None -> (* Not found *)

     

       36
       36
       -
           ]} *)

     

       37
       37
       -
       

     

       38
       38
       -
       (** {1 Core Types} *)

     

       39
       39
       -
       

     

       40
       40
       -
       (** Abstract type representing the store *)

     

       41
       41
       -
       type t

     

       42
       42
       -
       

     

       43
       43
       -
       (** Stored entry - combines Jsonfeed.Item with storage metadata *)

     

       44
       44
       -
       type stored_entry

     

       45
       45
       -
       

     

       46
       46
       -
       (** {2 Stored Entry Accessors} *)

     

       47
       47
       -
       

     

       48
       48
       -
       val entry_item : stored_entry -> Jsonfeed.Item.t

     

       49
       49
       -
       (** Get the underlying Jsonfeed Item *)

     

       50
       50
       -
       

     

       51
       51
       -
       val entry_feed_url : stored_entry -> string

     

       52
       52
       -
       (** Get the source feed URL *)

     

       53
       53
       -
       

     

       54
       54
       -
       val entry_feed_name : stored_entry -> string

     

       55
       55
       -
       (** Get the source feed name *)

     

       56
       56
       -
       

     

       57
       57
       -
       val entry_feed_title : stored_entry -> string

     

       58
       58
       -
       (** Get the source feed title *)

     

       59
       59
       -
       

     

       60
       60
       -
       val entry_stored_at : stored_entry -> Ptime.t

     

       61
       61
       -
       (** Get the storage timestamp *)

     

       62
       62
       -
       

     

       63
       63
       -
       (** Feed metadata *)

     

       64
       64
       -
       type feed_info = {

     

       65
       65
       -
         url : string;

     

       66
       66
       -
         (** Feed URL *)

     

       67
       67
       -
       

     

       68
       68
       -
         name : string;

     

       69
       69
       -
         (** Feed name/label *)

     

       70
       70
       -
       

     

       71
       71
       -
         title : string;

     

       72
       72
       -
         (** Feed title from metadata *)

     

       73
       73
       -
       

     

       74
       74
       -
         last_updated : Ptime.t;

     

       75
       75
       -
         (** Last time feed was synced *)

     

       76
       76
       -
       

     

       77
       77
       -
         entry_count : int;

     

       78
       78
       -
         (** Number of stored entries *)

     

       79
       79
       -
       }

     

       80
       80
       -
       

     

       81
       81
       -
       (** {1 Store Creation} *)

     

       82
       82
       -
       

     

       83
       83
       -
       (** Create a store at the specified base directory *)

     

       84
       84
       -
       val create : base_dir:Eio.Fs.dir_ty Eio.Path.t -> t

     

       85
       85
       -
       

     

       86
       86
       -
       (** Create a store using an Xdge context for XDG-compliant paths *)

     

       87
       87
       -
       val create_with_xdge : Xdge.t -> t

     

       88
       88
       -
       

     

       89
       89
       -
       (** {1 Entry Storage} *)

     

       90
       90
       -
       

     

       91
       91
       -
       (** Store a single post entry from a feed *)

     

       92
       92
       -
       val store_entry :

     

       93
       93
       -
         t ->

     

       94
       94
       -
         feed_url:string ->

     

       95
       95
       -
         feed_name:string ->

     

       96
       96
       -
         feed_title:string ->

     

       97
       97
       -
         post:Post.t ->

     

       98
       98
       -
         sw:Eio.Switch.t ->

     

       99
       99
       -
         unit

     

       100
       100
       -
       

     

       101
       101
       -
       (** Store multiple posts from a feed *)

     

       102
       102
       -
       val store_posts :

     

       103
       103
       -
         t ->

     

       104
       104
       -
         feed_url:string ->

     

       105
       105
       -
         feed_name:string ->

     

       106
       106
       -
         feed_title:string ->

     

       107
       107
       -
         posts:Post.t list ->

     

       108
       108
       -
         sw:Eio.Switch.t ->

     

       109
       109
       -
         unit

     

       110
       110
       -
       

     

       111
       111
       -
       (** Store entries directly from Syndic.Atom.entry list *)

     

       112
       112
       -
       val store_atom_entries :

     

       113
       113
       -
         t ->

     

       114
       114
       -
         feed_url:string ->

     

       115
       115
       -
         feed_name:string ->

     

       116
       116
       -
         feed_title:string ->

     

       117
       117
       -
         entries:Syndic.Atom.entry list ->

     

       118
       118
       -
         sw:Eio.Switch.t ->

     

       119
       119
       -
         unit

     

       120
       120
       -
       

     

       121
       121
       -
       (** {1 Entry Retrieval} *)

     

       122
       122
       -
       

     

       123
       123
       -
       (** Get a specific entry by Atom ID *)

     

       124
       124
       -
       val get_entry :

     

       125
       125
       -
         t ->

     

       126
       126
       -
         feed_url:string ->

     

       127
       127
       -
         atom_id:string ->

     

       128
       128
       -
         sw:Eio.Switch.t ->

     

       129
       129
       -
         stored_entry option

     

       130
       130
       -
       

     

       131
       131
       -
       (** List all entries for a feed *)

     

       132
       132
       -
       val list_entries : t -> feed_url:string -> stored_entry list

     

       133
       133
       -
       

     

       134
       134
       -
       (** List entries with filtering and sorting options *)

     

       135
       135
       -
       val list_entries_filtered :

     

       136
       136
       -
         t ->

     

       137
       137
       -
         feed_url:string ->

     

       138
       138
       -
         ?since:Ptime.t ->

     

       139
       139
       -
         ?until:Ptime.t ->

     

       140
       140
       -
         ?limit:int ->

     

       141
       141
       -
         ?sort:[`Published | `Updated | `Stored] ->

     

       142
       142
       -
         unit ->

     

       143
       143
       -
         stored_entry list

     

       144
       144
       -
       

     

       145
       145
       -
       (** Check if an entry exists *)

     

       146
       146
       -
       val exists_entry : t -> feed_url:string -> atom_id:string -> bool

     

       147
       147
       -
       

     

       148
       148
       -
       (** Get the most recent entries across all feeds *)

     

       149
       149
       -
       val get_recent_entries : t -> ?limit:int -> unit -> stored_entry list

     

       150
       150
       -
       

     

       151
       151
       -
       (** Find an entry by ID across all feeds (searches by atom_id) *)

     

       152
       152
       -
       val find_entry_by_id : t -> id:string -> stored_entry option

     

       153
       153
       -
       

     

       154
       154
       -
       (** {1 Entry Management} *)

     

       155
       155
       -
       

     

       156
       156
       -
       (** Delete a specific entry *)

     

       157
       157
       -
       val delete_entry : t -> feed_url:string -> atom_id:string -> unit

     

       158
       158
       -
       

     

       159
       159
       -
       (** Delete all entries for a feed *)

     

       160
       160
       -
       val delete_feed : t -> feed_url:string -> unit

     

       161
       161
       -
       

     

       162
       162
       -
       (** Prune old entries (keep most recent N per feed) *)

     

       163
       163
       -
       val prune_entries : t -> feed_url:string -> keep:int -> int

     

       164
       164
       -
       (** Returns number of entries deleted *)

     

       165
       165
       -
       

     

       166
       166
       -
       (** Prune entries older than a given time *)

     

       167
       167
       -
       val prune_old_entries : t -> feed_url:string -> older_than:Ptime.t -> int

     

       168
       168
       -
       (** Returns number of entries deleted *)

     

       169
       169
       -
       

     

       170
       170
       -
       (** {1 Feed Information} *)

     

       171
       171
       -
       

     

       172
       172
       -
       (** List all feeds that have stored entries *)

     

       173
       173
       -
       val list_feeds : t -> feed_info list

     

       174
       174
       -
       

     

       175
       175
       -
       (** Get information about a specific feed *)

     

       176
       176
       -
       val get_feed_info : t -> feed_url:string -> feed_info option

     

       177
       177
       -
       

     

       178
       178
       -
       (** Get statistics about the store *)

     

       179
       179
       -
       val stats : t -> Cacheio.Stats.t

     

       180
       180
       -
       

     

       181
       181
       -
       (** {1 Maintenance} *)

     

       182
       182
       -
       

     

       183
       183
       -
       (** Clean up expired entries (respects TTL if set) *)

     

       184
       184
       -
       val expire : t -> int

     

       185
       185
       -
       (** Returns number of entries expired *)

     

       186
       186
       -
       

     

       187
       187
       -
       (** Compact storage (remove duplicate/orphaned data) *)

     

       188
       188
       -
       val compact : t -> unit

     

       189
       189
       -
       

     

       190
       190
       -
       (** Export entries to an Atom feed *)

     

       191
       191
       -
       val export_to_atom :

     

       192
       192
       -
         t ->

     

       193
       193
       -
         feed_url:string ->

     

       194
       194
       -
         ?title:string ->

     

       195
       195
       -
         ?limit:int ->

     

       196
       196
       -
         unit ->

     

       197
       197
       -
         Syndic.Atom.feed

     

       198
       198
       -
       

     

       199
       199
       -
       (** Import entries from an Atom feed *)

     

       200
       200
       -
       val import_from_atom :

     

       201
       201
       -
         t ->

     

       202
       202
       -
         feed_url:string ->

     

       203
       203
       -
         feed_name:string ->

     

       204
       204
       -
         feed:Syndic.Atom.feed ->

     

       205
       205
       -
         sw:Eio.Switch.t ->

     

       206
       206
       -
         int

     

       207
       207
       -
       (** Returns number of entries imported *)

     

       208
       208
       -
       

     

       209
       209
       -
       (** {1 Pretty Printing} *)

     

       210
       210
       -
       

     

       211
       211
       -
       (** Pretty printer for stored entries *)

     

       212
       212
       -
       val pp_entry : Format.formatter -> stored_entry -> unit

     

       213
       213
       -
       

     

       214
       214
       -
       (** Pretty printer for feed info *)

     

       215
       215
       -
       val pp_feed_info : Format.formatter -> feed_info -> unit

     

       216
       216
       -
       

     

       217
       217
       -
       (** Pretty printer for the store *)

     

       218
       218
       -
       val pp : Format.formatter -> t -> unit

-33

stack/river/lib/util.ml

···

       1
       1
       -
       (*

     

       2
       2
       -
        * Copyright (c) 2014, OCaml.org project

     

       3
       3
       -
        * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>

     

       4
       4
       -
        *

     

       5
       5
       -
        * Permission to use, copy, modify, and distribute this software for any

     

       6
       6
       -
        * purpose with or without fee is hereby granted, provided that the above

     

       7
       7
       -
        * copyright notice and this permission notice appear in all copies.

     

       8
       8
       -
        *

     

       9
       9
       -
        * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

     

       10
       10
       -
        * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

     

       11
       11
       -
        * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

     

       12
       12
       -
        * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

     

       13
       13
       -
        * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

     

       14
       14
       -
        * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

     

       15
       15
       -
        * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

     

       16
       16
       -
        *)

     

       17
       17
       -
       

     

       18
       18
       -
       open Syndic

     

       19
       19
       -
       

     

       20
       20
       -
       (* Remove all tags *)

     

       21
       21
       -
       let rec syndic_to_buffer b = function

     

       22
       22
       -
         | XML.Node (_, _, subs) -> List.iter (syndic_to_buffer b) subs

     

       23
       23
       -
         | XML.Data (_, d) -> Buffer.add_string b d

     

       24
       24
       -
       

     

       25
       25
       -
       let syndic_to_string x =

     

       26
       26
       -
         let b = Buffer.create 1024 in

     

       27
       27
       -
         List.iter (syndic_to_buffer b) x;

     

       28
       28
       -
         Buffer.contents b

     

       29
       29
       -
       

     

       30
       30
       -
       let string_of_text_construct : Atom.text_construct -> string = function

     

       31
       31
       -
         (* FIXME: we probably would like to parse the HTML and remove the tags *)

     

       32
       32
       -
         | Atom.Text s | Atom.Html (_, s) -> s

     

       33
       33
       -
         | Atom.Xhtml (_, x) -> syndic_to_string x

+9 -10

stack/river/test/test_eio_river.ml

···

       1
       1
        
       (* Test the Eio-based River library *)

     

       2
       2
        
       

     

       3
       3
        
       let test_sources =

     

       4
       4
       -
         River.

     

       5
       4
        
           [

     

       6
       6
       -
             { name = "OCaml Planet"; url = "https://ocaml.org/feed.xml" };

     

       5
       5
       +
             River.Source.make ~name:"OCaml Planet" ~url:"https://ocaml.org/feed.xml";

     

       7
       6
        
           ]

     

       8
       7
        
       

     

       9
       8
        
       let main env =

     

       10
       9
        
         Printf.printf "Testing River library with Eio and Requests...\n";

     

       11
       10
        
       

     

       12
       12
       -
         (* Use River.with_session for proper resource management *)

     

       13
       13
       -
         River.with_session env @@ fun session ->

     

       11
       11
       +
         (* Use River.Session.with_session for proper resource management *)

     

       12
       12
       +
         River.Session.with_session env @@ fun session ->

     

       14
       13
        
         (* Test fetching feeds *)

     

       15
       14
        
         let feeds =

     

       16
       15
        
           try

     

       17
       17
       -
             List.map (River.fetch session) test_sources

     

       16
       16
       +
             List.map (River.Feed.fetch session) test_sources

     

       18
       17
        
           with

     

       19
       18
        
           | Failure msg ->

     

       20
       19
        
               Printf.printf "Error: %s\n" msg;

     
···

       28
       27
        
           Printf.printf "Successfully fetched %d feed(s)\n" (List.length feeds);

     

       29
       28
        
       

     

       30
       29
        
           (* Get posts from feeds *)

     

       31
       31
       -
           let posts = River.posts feeds in

     

       30
       30
       +
           let posts = River.Post.of_feeds feeds in

     

       32
       31
        
           Printf.printf "Found %d posts\n" (List.length posts);

     

       33
       32
        
       

     

       34
       33
        
           (* Show first 3 posts *)

     
···

       40
       39
        
       

     

       41
       40
        
           List.iteri (fun i post ->

     

       42
       41
        
             Printf.printf "\nPost %d:\n" (i + 1);

     

       43
       43
       -
             Printf.printf "  Title: %s\n" (River.title post);

     

       44
       44
       -
             Printf.printf "  Author: %s\n" (River.author post);

     

       42
       42
       +
             Printf.printf "  Title: %s\n" (River.Post.title post);

     

       43
       43
       +
             Printf.printf "  Author: %s\n" (River.Post.author post);

     

       45
       44
        
             Printf.printf "  Date: %s\n"

     

       46
       46
       -
               (match River.date post with

     

       45
       45
       +
               (match River.Post.date post with

     

       47
       46
        
                | Some _ -> "Date available" (* Syndic.Date doesn't have to_string *)

     

       48
       47
        
                | None -> "N/A");

     

       49
       48
        
             Printf.printf "  Link: %s\n"

     

       50
       50
       -
               (match River.link post with

     

       49
       49
       +
               (match River.Post.link post with

     

       51
       50
        
                | Some uri -> Uri.to_string uri

     

       52
       51
        
                | None -> "N/A")

     

       53
       52
        
           ) first_posts

+6 -7

stack/river/test/test_logging.ml

···

       18
       18
        
         Printf.printf "---\n\n"

     

       19
       19
        
       

     

       20
       20
        
       let test_sources =

     

       21
       21
       -
         River.

     

       22
       21
        
           [

     

       23
       23
       -
             { name = "Test Feed"; url = "https://example.com/feed.xml" };

     

       22
       22
       +
             River.Source.make ~name:"Test Feed" ~url:"https://example.com/feed.xml";

     

       24
       23
        
           ]

     

       25
       24
        
       

     

       26
       25
        
       let main env =

     

       27
       26
        
         (* Test with logging *)

     

       28
       27
        
         Printf.printf "Testing River library with logging...\n\n";

     

       29
       28
        
       

     

       30
       30
       -
         (* Use River.with_session for proper resource management *)

     

       31
       31
       -
         River.with_session env @@ fun session ->

     

       29
       29
       +
         (* Use River.Session.with_session for proper resource management *)

     

       30
       30
       +
         River.Session.with_session env @@ fun session ->

     

       32
       31
        
         (* Demonstrate fetching with logging *)

     

       33
       32
        
         let feeds =

     

       34
       33
        
           try

     

       35
       35
       -
             List.map (River.fetch session) test_sources

     

       34
       34
       +
             List.map (River.Feed.fetch session) test_sources

     

       36
       35
        
           with

     

       37
       36
        
           | Failure msg ->

     

       38
       37
        
               Printf.printf "Expected error (for demo): %s\n" msg;

     
···

       44
       43
        
       

     

       45
       44
        
         if feeds <> [] then begin

     

       46
       45
        
           (* This would show post aggregation logs *)

     

       47
       47
       -
           let posts = River.posts feeds in

     

       46
       46
       +
           let posts = River.Post.of_feeds feeds in

     

       48
       47
        
           Printf.printf "\nFound %d posts\n" (List.length posts);

     

       49
       48
        
       

     

       50
       49
        
           (* This would show Atom entry creation logs *)

     

       51
       51
       -
           let _entries = River.create_atom_entries posts in

     

       50
       50
       +
           let _entries = River.Format.Atom.entries_of_posts posts in

     

       52
       51
        
           Printf.printf "Created Atom entries\n"

     

       53
       52
        
         end

     

       54
       53