My agentic slop goes here. Not intended for anyone else!
at main 5.2 kB view raw
1(* 2 * Copyright (c) 2014, OCaml.org project 3 * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 *) 17 18(** Feed fetching and parsing. *) 19 20let src = Logs.Src.create "river" ~doc:"River RSS/Atom aggregator" 21module Log = (val Logs.src_log src : Logs.LOG) 22 23type feed_content = River_jsonfeed.t 24 25type t = { 26 source : Source.t; 27 title : string; 28 content : feed_content; 29 original_format : string; (* "Atom", "RSS2", or "JSONFeed" *) 30} 31 32let classify_feed ~xmlbase (body : string) = 33 Log.debug (fun m -> m "Attempting to parse feed (%d bytes)" (String.length body)); 34 35 (* Quick check - does it look like JSON? *) 36 let looks_like_json = 37 String.length body > 0 && 38 let first_char = String.get body 0 in 39 first_char = '{' || first_char = '[' 40 in 41 42 if looks_like_json then ( 43 (* Try JSONFeed first *) 44 Log.debug (fun m -> m "Body looks like JSON, trying JSONFeed parser"); 45 match Jsonfeed.of_string body with 46 | Ok jsonfeed -> 47 Log.debug (fun m -> m "Successfully parsed as JSONFeed"); 48 (* Wrap plain JSONFeed with River_jsonfeed (no extensions needed) *) 49 let river_jsonfeed = { River_jsonfeed.feed = jsonfeed; extension = None } in 50 (river_jsonfeed, "JSONFeed") 51 | Error err -> 52 let err_str = Jsont.Error.to_string err in 53 Log.debug (fun m -> m "Not a JSONFeed: %s" err_str); 54 (* Fall through to XML parsing *) 55 failwith (Printf.sprintf "Not a valid JSONFeed: %s" err_str) 56 ) else ( 57 (* Try XML formats *) 58 try 59 let atom_feed = Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, body))) in 60 Log.debug (fun m -> m "Successfully parsed as Atom feed, converting to JSONFeed"); 61 (* Convert Atom to JSONFeed with extensions *) 62 let river_jsonfeed = River_jsonfeed.of_atom atom_feed in 63 (river_jsonfeed, "Atom") 64 with 65 | Syndic.Atom.Error.Error (pos, msg) -> ( 66 Log.debug (fun m -> m "Not an Atom feed: %s at position (%d, %d)" 67 msg (fst pos) (snd pos)); 68 try 69 let rss2_channel = Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, body))) in 70 Log.debug (fun m -> m "Successfully parsed as RSS2 feed, converting to JSONFeed"); 71 (* Convert RSS2 to JSONFeed *) 72 let river_jsonfeed = River_jsonfeed.of_rss2 rss2_channel in 73 (river_jsonfeed, "RSS2") 74 with Syndic.Rss2.Error.Error (pos, msg) -> 75 Log.err (fun m -> m "Failed to parse as RSS2: %s at position (%d, %d)" 76 msg (fst pos) (snd pos)); 77 failwith "Neither Atom nor RSS2 feed") 78 | Not_found as e -> 79 Log.err (fun m -> m "Not_found exception during Atom feed parsing"); 80 Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ())); 81 raise e 82 | e -> 83 Log.err (fun m -> m "Unexpected exception during feed parsing: %s" 84 (Printexc.to_string e)); 85 Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ())); 86 raise e 87 ) 88 89let fetch session source = 90 Log.info (fun m -> m "Fetching feed: %s" (Source.name source)); 91 92 let xmlbase = Uri.of_string (Source.url source) in 93 94 (* Use Requests_json_api.get_result for clean Result-based error handling *) 95 let requests_session = Session.get_requests_session session in 96 let response = 97 match Requests_json_api.get_result requests_session (Source.url source) with 98 | Ok body -> 99 Log.info (fun m -> m "Successfully fetched %s (%d bytes)" 100 (Source.url source) (String.length body)); 101 body 102 | Error (status, msg) -> 103 let truncated_msg = 104 if String.length msg > 200 105 then String.sub msg 0 200 ^ "..." 106 else msg 107 in 108 Log.err (fun m -> m "Failed to fetch feed '%s': HTTP %d - %s" 109 (Source.name source) status truncated_msg); 110 failwith (Printf.sprintf "HTTP %d: %s" status truncated_msg) 111 in 112 113 let (content, original_format) = 114 try classify_feed ~xmlbase response 115 with Failure msg -> 116 Log.err (fun m -> m "Failed to parse feed '%s' (%s): %s" 117 (Source.name source) (Source.url source) msg); 118 raise (Failure msg) 119 in 120 121 let title = Jsonfeed.title content.River_jsonfeed.feed in 122 123 Log.info (fun m -> m "Successfully fetched %s feed '%s' (title: '%s'), converted to JSONFeed" 124 original_format (Source.name source) title); 125 126 { source; title; content; original_format } 127 128let source t = t.source 129let content t = t.content 130let title t = t.title