OCaml library for JSONfeed parsing and creation
1(** JSON Feed format parser and serializer. *) 2 3exception Invalid_feed of string 4 5module Author = Author 6module Attachment = Attachment 7module Hub = Hub 8module Item = Item 9 10type t = { 11 version : string; 12 title : string; 13 home_page_url : string option; 14 feed_url : string option; 15 description : string option; 16 user_comment : string option; 17 next_url : string option; 18 icon : string option; 19 favicon : string option; 20 authors : Author.t list option; 21 language : string option; 22 expired : bool option; 23 hubs : Hub.t list option; 24 items : Item.t list; 25} 26 27let create ~title ?home_page_url ?feed_url ?description ?user_comment 28 ?next_url ?icon ?favicon ?authors ?language ?expired ?hubs ~items () = 29 { 30 version = "https://jsonfeed.org/version/1.1"; 31 title; 32 home_page_url; 33 feed_url; 34 description; 35 user_comment; 36 next_url; 37 icon; 38 favicon; 39 authors; 40 language; 41 expired; 42 hubs; 43 items; 44 } 45 46let version t = t.version 47let title t = t.title 48let home_page_url t = t.home_page_url 49let feed_url t = t.feed_url 50let description t = t.description 51let user_comment t = t.user_comment 52let next_url t = t.next_url 53let icon t = t.icon 54let favicon t = t.favicon 55let authors t = t.authors 56let language t = t.language 57let expired t = t.expired 58let hubs t = t.hubs 59let items t = t.items 60 61(* RFC3339 date utilities *) 62 63let parse_rfc3339 s = 64 match Ptime.of_rfc3339 s with 65 | Ok (t, _, _) -> Some t 66 | Error _ -> None 67 68let format_rfc3339 t = 69 Ptime.to_rfc3339 t 70 71(* JSON parsing and serialization *) 72 73type error = [ `Msg of string ] 74 75let error_msgf fmt = Format.kasprintf (fun s -> Error (`Msg s)) fmt 76 77(* JSON parsing helpers *) 78 79type json_value = 80 | Null 81 | Bool of bool 82 | Float of float 83 | String of string 84 | Array of json_value list 85 | Object of (string * json_value) list 86 87let rec decode_value dec = 88 match Jsonm.decode dec with 89 | `Lexeme `Null -> Null 90 | `Lexeme (`Bool b) -> Bool b 91 | `Lexeme (`Float f) -> Float f 92 | `Lexeme (`String s) -> String s 93 | `Lexeme `Os -> decode_object dec [] 94 | `Lexeme `As -> decode_array dec [] 95 | `Lexeme _ -> Null 96 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err)) 97 | `End | `Await -> Null 98 99and decode_object dec acc = 100 match Jsonm.decode dec with 101 | `Lexeme `Oe -> Object (List.rev acc) 102 | `Lexeme (`Name n) -> 103 let v = decode_value dec in 104 decode_object dec ((n, v) :: acc) 105 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err)) 106 | _ -> Object (List.rev acc) 107 108and decode_array dec acc = 109 match Jsonm.decode dec with 110 | `Lexeme `Ae -> Array (List.rev acc) 111 | `Lexeme `Os -> 112 let v = decode_object dec [] in 113 decode_array dec (v :: acc) 114 | `Lexeme `As -> 115 let v = decode_array dec [] in 116 decode_array dec (v :: acc) 117 | `Lexeme `Null -> decode_array dec (Null :: acc) 118 | `Lexeme (`Bool b) -> decode_array dec (Bool b :: acc) 119 | `Lexeme (`Float f) -> decode_array dec (Float f :: acc) 120 | `Lexeme (`String s) -> decode_array dec (String s :: acc) 121 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err)) 122 | _ -> Array (List.rev acc) 123 124(* Helpers to extract values from JSON *) 125 126let get_string = function String s -> Some s | _ -> None 127let get_bool = function Bool b -> Some b | _ -> None 128let _get_float = function Float f -> Some f | _ -> None 129let get_int = function Float f -> Some (int_of_float f) | _ -> None 130let get_int64 = function Float f -> Some (Int64.of_float f) | _ -> None 131let get_array = function Array arr -> Some arr | _ -> None 132let _get_object = function Object obj -> Some obj | _ -> None 133 134let find_field name obj = List.assoc_opt name obj 135 136let require_field name obj = 137 match find_field name obj with 138 | Some v -> v 139 | None -> raise (Invalid_feed (Printf.sprintf "Missing required field: %s" name)) 140 141let require_string name obj = 142 match require_field name obj |> get_string with 143 | Some s -> s 144 | None -> raise (Invalid_feed (Printf.sprintf "Field %s must be a string" name)) 145 146let optional_string name obj = 147 match find_field name obj with Some v -> get_string v | None -> None 148 149let optional_bool name obj = 150 match find_field name obj with Some v -> get_bool v | None -> None 151 152let optional_int name obj = 153 match find_field name obj with Some v -> get_int v | None -> None 154 155let optional_int64 name obj = 156 match find_field name obj with Some v -> get_int64 v | None -> None 157 158let optional_array name obj = 159 match find_field name obj with Some v -> get_array v | None -> None 160 161(* Parse Author *) 162 163let parse_author_obj obj = 164 let name = optional_string "name" obj in 165 let url = optional_string "url" obj in 166 let avatar = optional_string "avatar" obj in 167 if name = None && url = None && avatar = None then 168 raise (Invalid_feed "Author must have at least one field"); 169 Author.create ?name ?url ?avatar () 170 171let parse_author = function 172 | Object obj -> parse_author_obj obj 173 | _ -> raise (Invalid_feed "Author must be an object") 174 175(* Parse Attachment *) 176 177let parse_attachment_obj obj = 178 let url = require_string "url" obj in 179 let mime_type = require_string "mime_type" obj in 180 let title = optional_string "title" obj in 181 let size_in_bytes = optional_int64 "size_in_bytes" obj in 182 let duration_in_seconds = optional_int "duration_in_seconds" obj in 183 Attachment.create ~url ~mime_type ?title ?size_in_bytes ?duration_in_seconds () 184 185let parse_attachment = function 186 | Object obj -> parse_attachment_obj obj 187 | _ -> raise (Invalid_feed "Attachment must be an object") 188 189(* Parse Hub *) 190 191let parse_hub_obj obj = 192 let type_ = require_string "type" obj in 193 let url = require_string "url" obj in 194 Hub.create ~type_ ~url () 195 196let parse_hub = function 197 | Object obj -> parse_hub_obj obj 198 | _ -> raise (Invalid_feed "Hub must be an object") 199 200(* Parse Item *) 201 202let parse_item_obj obj = 203 let id = require_string "id" obj in 204 205 (* Parse content - at least one required *) 206 let content_html = optional_string "content_html" obj in 207 let content_text = optional_string "content_text" obj in 208 let content = match content_html, content_text with 209 | Some html, Some text -> `Both (html, text) 210 | Some html, None -> `Html html 211 | None, Some text -> `Text text 212 | None, None -> 213 raise (Invalid_feed "Item must have content_html or content_text") 214 in 215 216 let url = optional_string "url" obj in 217 let external_url = optional_string "external_url" obj in 218 let title = optional_string "title" obj in 219 let summary = optional_string "summary" obj in 220 let image = optional_string "image" obj in 221 let banner_image = optional_string "banner_image" obj in 222 223 let date_published = 224 match optional_string "date_published" obj with 225 | Some s -> parse_rfc3339 s 226 | None -> None 227 in 228 229 let date_modified = 230 match optional_string "date_modified" obj with 231 | Some s -> parse_rfc3339 s 232 | None -> None 233 in 234 235 let authors = 236 match optional_array "authors" obj with 237 | Some arr -> 238 let parsed = List.map parse_author arr in 239 if parsed = [] then None else Some parsed 240 | None -> None 241 in 242 243 let tags = 244 match optional_array "tags" obj with 245 | Some arr -> 246 let parsed = List.filter_map get_string arr in 247 if parsed = [] then None else Some parsed 248 | None -> None 249 in 250 251 let language = optional_string "language" obj in 252 253 let attachments = 254 match optional_array "attachments" obj with 255 | Some arr -> 256 let parsed = List.map parse_attachment arr in 257 if parsed = [] then None else Some parsed 258 | None -> None 259 in 260 261 Item.create ~id ~content ?url ?external_url ?title ?summary ?image 262 ?banner_image ?date_published ?date_modified ?authors ?tags ?language 263 ?attachments () 264 265let parse_item = function 266 | Object obj -> parse_item_obj obj 267 | _ -> raise (Invalid_feed "Item must be an object") 268 269(* Parse Feed *) 270 271let parse_feed_obj obj = 272 let version = require_string "version" obj in 273 let title = require_string "title" obj in 274 let home_page_url = optional_string "home_page_url" obj in 275 let feed_url = optional_string "feed_url" obj in 276 let description = optional_string "description" obj in 277 let user_comment = optional_string "user_comment" obj in 278 let next_url = optional_string "next_url" obj in 279 let icon = optional_string "icon" obj in 280 let favicon = optional_string "favicon" obj in 281 let language = optional_string "language" obj in 282 let expired = optional_bool "expired" obj in 283 284 let authors = 285 match optional_array "authors" obj with 286 | Some arr -> 287 let parsed = List.map parse_author arr in 288 if parsed = [] then None else Some parsed 289 | None -> None 290 in 291 292 let hubs = 293 match optional_array "hubs" obj with 294 | Some arr -> 295 let parsed = List.map parse_hub arr in 296 if parsed = [] then None else Some parsed 297 | None -> None 298 in 299 300 let items = 301 match optional_array "items" obj with 302 | Some arr -> List.map parse_item arr 303 | None -> [] 304 in 305 306 { 307 version; 308 title; 309 home_page_url; 310 feed_url; 311 description; 312 user_comment; 313 next_url; 314 icon; 315 favicon; 316 authors; 317 language; 318 expired; 319 hubs; 320 items; 321 } 322 323let of_jsonm dec = 324 try 325 let json = decode_value dec in 326 match json with 327 | Object obj -> Ok (parse_feed_obj obj) 328 | _ -> error_msgf "Feed must be a JSON object" 329 with 330 | Invalid_feed msg -> error_msgf "%s" msg 331 332(* JSON serialization *) 333 334let to_jsonm enc feed = 335 (* Simplified serialization using Jsonm *) 336 let enc_field name value_fn = 337 ignore (Jsonm.encode enc (`Lexeme (`Name name))); 338 value_fn () 339 in 340 341 let enc_string s = 342 ignore (Jsonm.encode enc (`Lexeme (`String s))) 343 in 344 345 let enc_bool b = 346 ignore (Jsonm.encode enc (`Lexeme (`Bool b))) 347 in 348 349 let enc_opt enc_fn = function 350 | None -> () 351 | Some v -> enc_fn v 352 in 353 354 let enc_list enc_fn lst = 355 ignore (Jsonm.encode enc (`Lexeme `As)); 356 List.iter enc_fn lst; 357 ignore (Jsonm.encode enc (`Lexeme `Ae)) 358 in 359 360 let enc_author author = 361 ignore (Jsonm.encode enc (`Lexeme `Os)); 362 (match Author.name author with 363 | Some name -> enc_field "name" (fun () -> enc_string name) 364 | None -> ()); 365 (match Author.url author with 366 | Some url -> enc_field "url" (fun () -> enc_string url) 367 | None -> ()); 368 (match Author.avatar author with 369 | Some avatar -> enc_field "avatar" (fun () -> enc_string avatar) 370 | None -> ()); 371 ignore (Jsonm.encode enc (`Lexeme `Oe)) 372 in 373 374 let enc_attachment att = 375 ignore (Jsonm.encode enc (`Lexeme `Os)); 376 enc_field "url" (fun () -> enc_string (Attachment.url att)); 377 enc_field "mime_type" (fun () -> enc_string (Attachment.mime_type att)); 378 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title)) 379 (Attachment.title att); 380 enc_opt (fun size -> 381 enc_field "size_in_bytes" (fun () -> 382 ignore (Jsonm.encode enc (`Lexeme (`Float (Int64.to_float size)))))) 383 (Attachment.size_in_bytes att); 384 enc_opt (fun dur -> 385 enc_field "duration_in_seconds" (fun () -> 386 ignore (Jsonm.encode enc (`Lexeme (`Float (float_of_int dur)))))) 387 (Attachment.duration_in_seconds att); 388 ignore (Jsonm.encode enc (`Lexeme `Oe)) 389 in 390 391 let enc_hub hub = 392 ignore (Jsonm.encode enc (`Lexeme `Os)); 393 enc_field "type" (fun () -> enc_string (Hub.type_ hub)); 394 enc_field "url" (fun () -> enc_string (Hub.url hub)); 395 ignore (Jsonm.encode enc (`Lexeme `Oe)) 396 in 397 398 let enc_item item = 399 ignore (Jsonm.encode enc (`Lexeme `Os)); 400 enc_field "id" (fun () -> enc_string (Item.id item)); 401 402 (* Encode content *) 403 (match Item.content item with 404 | `Html html -> 405 enc_field "content_html" (fun () -> enc_string html) 406 | `Text text -> 407 enc_field "content_text" (fun () -> enc_string text) 408 | `Both (html, text) -> 409 enc_field "content_html" (fun () -> enc_string html); 410 enc_field "content_text" (fun () -> enc_string text)); 411 412 enc_opt (fun url -> enc_field "url" (fun () -> enc_string url)) 413 (Item.url item); 414 enc_opt (fun url -> enc_field "external_url" (fun () -> enc_string url)) 415 (Item.external_url item); 416 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title)) 417 (Item.title item); 418 enc_opt (fun summary -> enc_field "summary" (fun () -> enc_string summary)) 419 (Item.summary item); 420 enc_opt (fun img -> enc_field "image" (fun () -> enc_string img)) 421 (Item.image item); 422 enc_opt (fun img -> enc_field "banner_image" (fun () -> enc_string img)) 423 (Item.banner_image item); 424 enc_opt (fun date -> enc_field "date_published" (fun () -> enc_string (format_rfc3339 date))) 425 (Item.date_published item); 426 enc_opt (fun date -> enc_field "date_modified" (fun () -> enc_string (format_rfc3339 date))) 427 (Item.date_modified item); 428 enc_opt (fun authors -> 429 enc_field "authors" (fun () -> enc_list enc_author authors)) 430 (Item.authors item); 431 enc_opt (fun tags -> 432 enc_field "tags" (fun () -> enc_list enc_string tags)) 433 (Item.tags item); 434 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang)) 435 (Item.language item); 436 enc_opt (fun atts -> 437 enc_field "attachments" (fun () -> enc_list enc_attachment atts)) 438 (Item.attachments item); 439 440 ignore (Jsonm.encode enc (`Lexeme `Oe)) 441 in 442 443 (* Encode the feed *) 444 ignore (Jsonm.encode enc (`Lexeme `Os)); 445 enc_field "version" (fun () -> enc_string feed.version); 446 enc_field "title" (fun () -> enc_string feed.title); 447 enc_opt (fun url -> enc_field "home_page_url" (fun () -> enc_string url)) 448 feed.home_page_url; 449 enc_opt (fun url -> enc_field "feed_url" (fun () -> enc_string url)) 450 feed.feed_url; 451 enc_opt (fun desc -> enc_field "description" (fun () -> enc_string desc)) 452 feed.description; 453 enc_opt (fun comment -> enc_field "user_comment" (fun () -> enc_string comment)) 454 feed.user_comment; 455 enc_opt (fun url -> enc_field "next_url" (fun () -> enc_string url)) 456 feed.next_url; 457 enc_opt (fun icon -> enc_field "icon" (fun () -> enc_string icon)) 458 feed.icon; 459 enc_opt (fun favicon -> enc_field "favicon" (fun () -> enc_string favicon)) 460 feed.favicon; 461 enc_opt (fun authors -> 462 enc_field "authors" (fun () -> enc_list enc_author authors)) 463 feed.authors; 464 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang)) 465 feed.language; 466 enc_opt (fun expired -> enc_field "expired" (fun () -> enc_bool expired)) 467 feed.expired; 468 enc_opt (fun hubs -> 469 enc_field "hubs" (fun () -> enc_list enc_hub hubs)) 470 feed.hubs; 471 enc_field "items" (fun () -> enc_list enc_item feed.items); 472 ignore (Jsonm.encode enc (`Lexeme `Oe)); 473 ignore (Jsonm.encode enc `End) 474 475let of_string s = 476 let dec = Jsonm.decoder (`String s) in 477 of_jsonm dec 478 479let to_string ?(minify=false) feed = 480 let buf = Buffer.create 1024 in 481 let enc = Jsonm.encoder ~minify (`Buffer buf) in 482 to_jsonm enc feed; 483 Buffer.contents buf 484 485(* Validation *) 486 487let validate feed = 488 let errors = ref [] in 489 let add_error msg = errors := msg :: !errors in 490 491 (* Check required fields *) 492 if feed.title = "" then 493 add_error "title is required and cannot be empty"; 494 495 (* Check items have unique IDs *) 496 let ids = List.map Item.id feed.items in 497 let unique_ids = List.sort_uniq String.compare ids in 498 if List.length ids <> List.length unique_ids then 499 add_error "items must have unique IDs"; 500 501 (* Validate authors *) 502 (match feed.authors with 503 | Some authors -> 504 List.iteri (fun i author -> 505 if not (Author.is_valid author) then 506 add_error (Printf.sprintf "feed author %d is invalid (needs at least one field)" i) 507 ) authors 508 | None -> ()); 509 510 (* Validate items *) 511 List.iteri (fun i item -> 512 if Item.id item = "" then 513 add_error (Printf.sprintf "item %d has empty ID" i); 514 515 (* Validate item authors *) 516 (match Item.authors item with 517 | Some authors -> 518 List.iteri (fun j author -> 519 if not (Author.is_valid author) then 520 add_error (Printf.sprintf "item %d author %d is invalid" i j) 521 ) authors 522 | None -> ()) 523 ) feed.items; 524 525 if !errors = [] then Ok () 526 else Error (List.rev !errors) 527 528(* Comparison *) 529 530let equal a b = 531 a.version = b.version && 532 a.title = b.title && 533 a.home_page_url = b.home_page_url && 534 a.feed_url = b.feed_url && 535 a.description = b.description && 536 a.user_comment = b.user_comment && 537 a.next_url = b.next_url && 538 a.icon = b.icon && 539 a.favicon = b.favicon && 540 a.language = b.language && 541 a.expired = b.expired && 542 (* Note: We're doing structural equality on items *) 543 List.length a.items = List.length b.items 544 545(* Pretty printing *) 546 547let pp_summary ppf feed = 548 Format.fprintf ppf "%s (%d items)" feed.title (List.length feed.items) 549 550let pp ppf feed = 551 Format.fprintf ppf "Feed: %s" feed.title; 552 (match feed.home_page_url with 553 | Some url -> Format.fprintf ppf " (%s)" url 554 | None -> ()); 555 Format.fprintf ppf "@\n"; 556 557 Format.fprintf ppf " Items: %d@\n" (List.length feed.items); 558 559 (match feed.authors with 560 | Some authors when authors <> [] -> 561 Format.fprintf ppf " Authors: "; 562 List.iteri (fun i author -> 563 if i > 0 then Format.fprintf ppf ", "; 564 Format.fprintf ppf "%a" Author.pp author 565 ) authors; 566 Format.fprintf ppf "@\n" 567 | _ -> ()); 568 569 (match feed.language with 570 | Some lang -> Format.fprintf ppf " Language: %s@\n" lang 571 | None -> ())