My agentic slop goes here. Not intended for anyone else!
1(*
2 * Copyright (c) 2014, OCaml.org project
3 * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *)
17
18(** Feed fetching and parsing. *)
19
20let src = Logs.Src.create "river" ~doc:"River RSS/Atom aggregator"
21module Log = (val Logs.src_log src : Logs.LOG)
22
23type feed_content = River_jsonfeed.t
24
25type t = {
26 source : Source.t;
27 title : string;
28 content : feed_content;
29 original_format : string; (* "Atom", "RSS2", or "JSONFeed" *)
30}
31
32let classify_feed ~xmlbase (body : string) =
33 Log.debug (fun m -> m "Attempting to parse feed (%d bytes)" (String.length body));
34
35 (* Quick check - does it look like JSON? *)
36 let looks_like_json =
37 String.length body > 0 &&
38 let first_char = String.get body 0 in
39 first_char = '{' || first_char = '['
40 in
41
42 if looks_like_json then (
43 (* Try JSONFeed first *)
44 Log.debug (fun m -> m "Body looks like JSON, trying JSONFeed parser");
45 match Jsonfeed.of_string body with
46 | Ok jsonfeed ->
47 Log.debug (fun m -> m "Successfully parsed as JSONFeed");
48 (* Wrap plain JSONFeed with River_jsonfeed (no extensions needed) *)
49 let river_jsonfeed = { River_jsonfeed.feed = jsonfeed; extension = None } in
50 (river_jsonfeed, "JSONFeed")
51 | Error err ->
52 let err_str = Jsont.Error.to_string err in
53 Log.debug (fun m -> m "Not a JSONFeed: %s" err_str);
54 (* Fall through to XML parsing *)
55 failwith (Printf.sprintf "Not a valid JSONFeed: %s" err_str)
56 ) else (
57 (* Try XML formats *)
58 try
59 let atom_feed = Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, body))) in
60 Log.debug (fun m -> m "Successfully parsed as Atom feed, converting to JSONFeed");
61 (* Convert Atom to JSONFeed with extensions *)
62 let river_jsonfeed = River_jsonfeed.of_atom atom_feed in
63 (river_jsonfeed, "Atom")
64 with
65 | Syndic.Atom.Error.Error (pos, msg) -> (
66 Log.debug (fun m -> m "Not an Atom feed: %s at position (%d, %d)"
67 msg (fst pos) (snd pos));
68 try
69 let rss2_channel = Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, body))) in
70 Log.debug (fun m -> m "Successfully parsed as RSS2 feed, converting to JSONFeed");
71 (* Convert RSS2 to JSONFeed *)
72 let river_jsonfeed = River_jsonfeed.of_rss2 rss2_channel in
73 (river_jsonfeed, "RSS2")
74 with Syndic.Rss2.Error.Error (pos, msg) ->
75 Log.err (fun m -> m "Failed to parse as RSS2: %s at position (%d, %d)"
76 msg (fst pos) (snd pos));
77 failwith "Neither Atom nor RSS2 feed")
78 | Not_found as e ->
79 Log.err (fun m -> m "Not_found exception during Atom feed parsing");
80 Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));
81 raise e
82 | e ->
83 Log.err (fun m -> m "Unexpected exception during feed parsing: %s"
84 (Printexc.to_string e));
85 Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));
86 raise e
87 )
88
89let fetch session source =
90 Log.info (fun m -> m "Fetching feed: %s" (Source.name source));
91
92 let xmlbase = Uri.of_string (Source.url source) in
93
94 (* Use Requests_json_api.get_result for clean Result-based error handling *)
95 let requests_session = Session.get_requests_session session in
96 let response =
97 match Requests_json_api.get_result requests_session (Source.url source) with
98 | Ok body ->
99 Log.info (fun m -> m "Successfully fetched %s (%d bytes)"
100 (Source.url source) (String.length body));
101 body
102 | Error (status, msg) ->
103 let truncated_msg =
104 if String.length msg > 200
105 then String.sub msg 0 200 ^ "..."
106 else msg
107 in
108 Log.err (fun m -> m "Failed to fetch feed '%s': HTTP %d - %s"
109 (Source.name source) status truncated_msg);
110 failwith (Printf.sprintf "HTTP %d: %s" status truncated_msg)
111 in
112
113 let (content, original_format) =
114 try classify_feed ~xmlbase response
115 with Failure msg ->
116 Log.err (fun m -> m "Failed to parse feed '%s' (%s): %s"
117 (Source.name source) (Source.url source) msg);
118 raise (Failure msg)
119 in
120
121 let title = Jsonfeed.title content.River_jsonfeed.feed in
122
123 Log.info (fun m -> m "Successfully fetched %s feed '%s' (title: '%s'), converted to JSONFeed"
124 original_format (Source.name source) title);
125
126 { source; title; content; original_format }
127
128let source t = t.source
129let content t = t.content
130let title t = t.title