My agentic slop goes here. Not intended for anyone else!
1(*
2 * Copyright (c) 2014, OCaml.org project
3 * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *)
17
18let src = Logs.Src.create "river.feed" ~doc:"River feed parsing"
19module Log = (val Logs.src_log src : Logs.LOG)
20
21type source = { name : string; url : string }
22type content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel | Json of Jsonfeed.t
23
24let string_of_feed = function Atom _ -> "Atom" | Rss2 _ -> "Rss2" | Json _ -> "JSONFeed"
25
26type t = { name : string; title : string; url : string; content : content }
27
28let classify_feed ~xmlbase (body : string) =
29 Log.debug (fun m -> m "Attempting to parse feed (%d bytes)" (String.length body));
30
31 (* Quick check - does it look like JSON? *)
32 let looks_like_json =
33 String.length body > 0 &&
34 let first_char = String.get body 0 in
35 first_char = '{' || first_char = '['
36 in
37
38 if looks_like_json then (
39 (* Try JSONFeed first *)
40 Log.debug (fun m -> m "Body looks like JSON, trying JSONFeed parser");
41 match Jsonfeed.of_string body with
42 | Ok jsonfeed ->
43 Log.debug (fun m -> m "Successfully parsed as JSONFeed");
44 Json jsonfeed
45 | Error err ->
46 Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err));
47 (* Fall through to XML parsing *)
48 failwith "Not a valid JSONFeed"
49 ) else (
50 (* Try XML formats *)
51 try
52 let feed = Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in
53 Log.debug (fun m -> m "Successfully parsed as Atom feed");
54 feed
55 with
56 | Syndic.Atom.Error.Error (pos, msg) -> (
57 Log.debug (fun m -> m "Not an Atom feed: %s at position (%d, %d)"
58 msg (fst pos) (snd pos));
59 try
60 let feed = Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in
61 Log.debug (fun m -> m "Successfully parsed as RSS2 feed");
62 feed
63 with Syndic.Rss2.Error.Error (pos, msg) ->
64 Log.err (fun m -> m "Failed to parse as RSS2: %s at position (%d, %d)"
65 msg (fst pos) (snd pos));
66 failwith "Neither Atom nor RSS2 feed")
67 | Not_found as e ->
68 Log.err (fun m -> m "Not_found exception during Atom feed parsing");
69 Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));
70 raise e
71 | e ->
72 Log.err (fun m -> m "Unexpected exception during feed parsing: %s"
73 (Printexc.to_string e));
74 Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ()));
75 raise e
76 )
77
78let fetch client (source : source) =
79 Log.info (fun m -> m "Fetching feed '%s' from %s" source.name source.url);
80
81 let xmlbase = Uri.of_string @@ source.url in
82
83 (* Use Requests_json_api.get_result for clean Result-based error handling *)
84 let session = Client.session client in
85 let response =
86 match Requests_json_api.get_result session source.url with
87 | Ok body ->
88 Log.info (fun m -> m "Successfully fetched %s (%d bytes)" source.url (String.length body));
89 body
90 | Error (status, msg) ->
91 Log.err (fun m -> m "Failed to fetch feed '%s': HTTP %d - %s" source.name status msg);
92 failwith (Printf.sprintf "HTTP %d: %s" status msg)
93 in
94
95 let content = classify_feed ~xmlbase response in
96 let title =
97 match content with
98 | Atom atom -> Util.string_of_text_construct atom.Syndic.Atom.title
99 | Rss2 ch -> ch.Syndic.Rss2.title
100 | Json jsonfeed -> Jsonfeed.title jsonfeed
101 in
102
103 Log.info (fun m -> m "Successfully fetched %s feed '%s' (title: '%s')"
104 (string_of_feed content) source.name title);
105
106 { name = source.name; title; content; url = source.url }