My agentic slop goes here. Not intended for anyone else!
at jsont 4.3 kB view raw
1(* 2 * Copyright (c) 2014, OCaml.org project 3 * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 *) 17 18let src = Logs.Src.create "river.feed" ~doc:"River feed parsing" 19module Log = (val Logs.src_log src : Logs.LOG) 20 21type source = { name : string; url : string } 22type content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel | Json of Jsonfeed.t 23 24let string_of_feed = function Atom _ -> "Atom" | Rss2 _ -> "Rss2" | Json _ -> "JSONFeed" 25 26type t = { name : string; title : string; url : string; content : content } 27 28let classify_feed ~xmlbase (body : string) = 29 Log.debug (fun m -> m "Attempting to parse feed (%d bytes)" (String.length body)); 30 31 (* Quick check - does it look like JSON? *) 32 let looks_like_json = 33 String.length body > 0 && 34 let first_char = String.get body 0 in 35 first_char = '{' || first_char = '[' 36 in 37 38 if looks_like_json then ( 39 (* Try JSONFeed first *) 40 Log.debug (fun m -> m "Body looks like JSON, trying JSONFeed parser"); 41 match Jsonfeed.of_string body with 42 | Ok jsonfeed -> 43 Log.debug (fun m -> m "Successfully parsed as JSONFeed"); 44 Json jsonfeed 45 | Error err -> 46 Log.debug (fun m -> m "Not a JSONFeed: %s" (Jsont.Error.to_string err)); 47 (* Fall through to XML parsing *) 48 failwith "Not a valid JSONFeed" 49 ) else ( 50 (* Try XML formats *) 51 try 52 let feed = Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in 53 Log.debug (fun m -> m "Successfully parsed as Atom feed"); 54 feed 55 with 56 | Syndic.Atom.Error.Error (pos, msg) -> ( 57 Log.debug (fun m -> m "Not an Atom feed: %s at position (%d, %d)" 58 msg (fst pos) (snd pos)); 59 try 60 let feed = Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, body)))) in 61 Log.debug (fun m -> m "Successfully parsed as RSS2 feed"); 62 feed 63 with Syndic.Rss2.Error.Error (pos, msg) -> 64 Log.err (fun m -> m "Failed to parse as RSS2: %s at position (%d, %d)" 65 msg (fst pos) (snd pos)); 66 failwith "Neither Atom nor RSS2 feed") 67 | Not_found as e -> 68 Log.err (fun m -> m "Not_found exception during Atom feed parsing"); 69 Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ())); 70 raise e 71 | e -> 72 Log.err (fun m -> m "Unexpected exception during feed parsing: %s" 73 (Printexc.to_string e)); 74 Log.err (fun m -> m "Backtrace:\n%s" (Printexc.get_backtrace ())); 75 raise e 76 ) 77 78let fetch client (source : source) = 79 Log.info (fun m -> m "Fetching feed '%s' from %s" source.name source.url); 80 81 let xmlbase = Uri.of_string @@ source.url in 82 83 (* Use Requests_json_api.get_result for clean Result-based error handling *) 84 let session = Client.session client in 85 let response = 86 match Requests_json_api.get_result session source.url with 87 | Ok body -> 88 Log.info (fun m -> m "Successfully fetched %s (%d bytes)" source.url (String.length body)); 89 body 90 | Error (status, msg) -> 91 Log.err (fun m -> m "Failed to fetch feed '%s': HTTP %d - %s" source.name status msg); 92 failwith (Printf.sprintf "HTTP %d: %s" status msg) 93 in 94 95 let content = classify_feed ~xmlbase response in 96 let title = 97 match content with 98 | Atom atom -> Util.string_of_text_construct atom.Syndic.Atom.title 99 | Rss2 ch -> ch.Syndic.Rss2.title 100 | Json jsonfeed -> Jsonfeed.title jsonfeed 101 in 102 103 Log.info (fun m -> m "Successfully fetched %s feed '%s' (title: '%s')" 104 (string_of_feed content) source.name title); 105 106 { name = source.name; title; content; url = source.url }