OCaml library for JSONfeed parsing and creation
1(** JSON Feed format parser and serializer. *) 2 3exception Invalid_feed of string 4 5module Author = Author 6module Attachment = Attachment 7module Hub = Hub 8module Item = Item 9module Reference = Reference 10module Cito = Cito 11 12type t = { 13 version : string; 14 title : string; 15 home_page_url : string option; 16 feed_url : string option; 17 description : string option; 18 user_comment : string option; 19 next_url : string option; 20 icon : string option; 21 favicon : string option; 22 authors : Author.t list option; 23 language : string option; 24 expired : bool option; 25 hubs : Hub.t list option; 26 items : Item.t list; 27} 28 29let create ~title ?home_page_url ?feed_url ?description ?user_comment 30 ?next_url ?icon ?favicon ?authors ?language ?expired ?hubs ~items () = 31 { 32 version = "https://jsonfeed.org/version/1.1"; 33 title; 34 home_page_url; 35 feed_url; 36 description; 37 user_comment; 38 next_url; 39 icon; 40 favicon; 41 authors; 42 language; 43 expired; 44 hubs; 45 items; 46 } 47 48let version t = t.version 49let title t = t.title 50let home_page_url t = t.home_page_url 51let feed_url t = t.feed_url 52let description t = t.description 53let user_comment t = t.user_comment 54let next_url t = t.next_url 55let icon t = t.icon 56let favicon t = t.favicon 57let authors t = t.authors 58let language t = t.language 59let expired t = t.expired 60let hubs t = t.hubs 61let items t = t.items 62 63(* RFC3339 date utilities *) 64 65let parse_rfc3339 s = 66 match Ptime.of_rfc3339 s with 67 | Ok (t, _, _) -> Some t 68 | Error _ -> None 69 70let format_rfc3339 t = 71 Ptime.to_rfc3339 t 72 73(* JSON parsing and serialization *) 74 75type error = string 76 77let error_msgf fmt = Format.kasprintf (fun s -> Error s) fmt 78 79(* JSON parsing helpers *) 80 81type json_value = 82 | Null 83 | Bool of bool 84 | Float of float 85 | String of string 86 | Array of json_value list 87 | Object of (string * json_value) list 88 89let rec decode_value dec = 90 match Jsonm.decode dec with 91 | `Lexeme `Null -> Null 92 | `Lexeme (`Bool b) -> Bool b 93 | `Lexeme (`Float f) -> Float f 94 | `Lexeme (`String s) -> String s 95 | `Lexeme `Os -> decode_object dec [] 96 | `Lexeme `As -> decode_array dec [] 97 | `Lexeme _ -> Null 98 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err)) 99 | `End | `Await -> Null 100 101and decode_object dec acc = 102 match Jsonm.decode dec with 103 | `Lexeme `Oe -> Object (List.rev acc) 104 | `Lexeme (`Name n) -> 105 let v = decode_value dec in 106 decode_object dec ((n, v) :: acc) 107 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err)) 108 | _ -> Object (List.rev acc) 109 110and decode_array dec acc = 111 match Jsonm.decode dec with 112 | `Lexeme `Ae -> Array (List.rev acc) 113 | `Lexeme `Os -> 114 let v = decode_object dec [] in 115 decode_array dec (v :: acc) 116 | `Lexeme `As -> 117 let v = decode_array dec [] in 118 decode_array dec (v :: acc) 119 | `Lexeme `Null -> decode_array dec (Null :: acc) 120 | `Lexeme (`Bool b) -> decode_array dec (Bool b :: acc) 121 | `Lexeme (`Float f) -> decode_array dec (Float f :: acc) 122 | `Lexeme (`String s) -> decode_array dec (String s :: acc) 123 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err)) 124 | _ -> Array (List.rev acc) 125 126(* Helpers to extract values from JSON *) 127 128let get_string = function String s -> Some s | _ -> None 129let get_bool = function Bool b -> Some b | _ -> None 130let _get_float = function Float f -> Some f | _ -> None 131let get_int = function Float f -> Some (int_of_float f) | _ -> None 132let get_int64 = function Float f -> Some (Int64.of_float f) | _ -> None 133let get_array = function Array arr -> Some arr | _ -> None 134let _get_object = function Object obj -> Some obj | _ -> None 135 136let find_field name obj = List.assoc_opt name obj 137 138let require_field name obj = 139 match find_field name obj with 140 | Some v -> v 141 | None -> raise (Invalid_feed (Printf.sprintf "Missing required field: %s" name)) 142 143let require_string name obj = 144 match require_field name obj |> get_string with 145 | Some s -> s 146 | None -> raise (Invalid_feed (Printf.sprintf "Field %s must be a string" name)) 147 148let optional_string name obj = 149 match find_field name obj with Some v -> get_string v | None -> None 150 151let optional_bool name obj = 152 match find_field name obj with Some v -> get_bool v | None -> None 153 154let optional_int name obj = 155 match find_field name obj with Some v -> get_int v | None -> None 156 157let optional_int64 name obj = 158 match find_field name obj with Some v -> get_int64 v | None -> None 159 160let optional_array name obj = 161 match find_field name obj with Some v -> get_array v | None -> None 162 163(* Parse Author *) 164 165let parse_author_obj obj = 166 let name = optional_string "name" obj in 167 let url = optional_string "url" obj in 168 let avatar = optional_string "avatar" obj in 169 if name = None && url = None && avatar = None then 170 raise (Invalid_feed "Author must have at least one field"); 171 Author.create ?name ?url ?avatar () 172 173let parse_author = function 174 | Object obj -> parse_author_obj obj 175 | _ -> raise (Invalid_feed "Author must be an object") 176 177(* Parse Attachment *) 178 179let parse_attachment_obj obj = 180 let url = require_string "url" obj in 181 let mime_type = require_string "mime_type" obj in 182 let title = optional_string "title" obj in 183 let size_in_bytes = optional_int64 "size_in_bytes" obj in 184 let duration_in_seconds = optional_int "duration_in_seconds" obj in 185 Attachment.create ~url ~mime_type ?title ?size_in_bytes ?duration_in_seconds () 186 187let parse_attachment = function 188 | Object obj -> parse_attachment_obj obj 189 | _ -> raise (Invalid_feed "Attachment must be an object") 190 191(* Parse Hub *) 192 193let parse_hub_obj obj = 194 let type_ = require_string "type" obj in 195 let url = require_string "url" obj in 196 Hub.create ~type_ ~url () 197 198let parse_hub = function 199 | Object obj -> parse_hub_obj obj 200 | _ -> raise (Invalid_feed "Hub must be an object") 201 202(* Parse Item *) 203 204let parse_item_obj obj = 205 let id = require_string "id" obj in 206 207 (* Parse content - at least one required *) 208 let content_html = optional_string "content_html" obj in 209 let content_text = optional_string "content_text" obj in 210 let content = match content_html, content_text with 211 | Some html, Some text -> `Both (html, text) 212 | Some html, None -> `Html html 213 | None, Some text -> `Text text 214 | None, None -> 215 raise (Invalid_feed "Item must have content_html or content_text") 216 in 217 218 let url = optional_string "url" obj in 219 let external_url = optional_string "external_url" obj in 220 let title = optional_string "title" obj in 221 let summary = optional_string "summary" obj in 222 let image = optional_string "image" obj in 223 let banner_image = optional_string "banner_image" obj in 224 225 let date_published = 226 match optional_string "date_published" obj with 227 | Some s -> parse_rfc3339 s 228 | None -> None 229 in 230 231 let date_modified = 232 match optional_string "date_modified" obj with 233 | Some s -> parse_rfc3339 s 234 | None -> None 235 in 236 237 let authors = 238 match optional_array "authors" obj with 239 | Some arr -> 240 let parsed = List.map parse_author arr in 241 if parsed = [] then None else Some parsed 242 | None -> None 243 in 244 245 let tags = 246 match optional_array "tags" obj with 247 | Some arr -> 248 let parsed = List.filter_map get_string arr in 249 if parsed = [] then None else Some parsed 250 | None -> None 251 in 252 253 let language = optional_string "language" obj in 254 255 let attachments = 256 match optional_array "attachments" obj with 257 | Some arr -> 258 let parsed = List.map parse_attachment arr in 259 if parsed = [] then None else Some parsed 260 | None -> None 261 in 262 263 let parse_reference = function 264 | Object obj -> 265 let url = require_string "url" obj in 266 let doi = optional_string "doi" obj in 267 Reference.create ~url ?doi () 268 | _ -> raise (Invalid_feed "Reference must be an object") 269 in 270 271 let references = 272 match optional_array "_references" obj with 273 | Some arr -> 274 let parsed = List.map parse_reference arr in 275 if parsed = [] then None else Some parsed 276 | None -> None 277 in 278 279 Item.create ~id ~content ?url ?external_url ?title ?summary ?image 280 ?banner_image ?date_published ?date_modified ?authors ?tags ?language 281 ?attachments ?references () 282 283let parse_item = function 284 | Object obj -> parse_item_obj obj 285 | _ -> raise (Invalid_feed "Item must be an object") 286 287(* Parse Feed *) 288 289let parse_feed_obj obj = 290 let version = require_string "version" obj in 291 let title = require_string "title" obj in 292 let home_page_url = optional_string "home_page_url" obj in 293 let feed_url = optional_string "feed_url" obj in 294 let description = optional_string "description" obj in 295 let user_comment = optional_string "user_comment" obj in 296 let next_url = optional_string "next_url" obj in 297 let icon = optional_string "icon" obj in 298 let favicon = optional_string "favicon" obj in 299 let language = optional_string "language" obj in 300 let expired = optional_bool "expired" obj in 301 302 let authors = 303 match optional_array "authors" obj with 304 | Some arr -> 305 let parsed = List.map parse_author arr in 306 if parsed = [] then None else Some parsed 307 | None -> None 308 in 309 310 let hubs = 311 match optional_array "hubs" obj with 312 | Some arr -> 313 let parsed = List.map parse_hub arr in 314 if parsed = [] then None else Some parsed 315 | None -> None 316 in 317 318 let items = 319 match optional_array "items" obj with 320 | Some arr -> List.map parse_item arr 321 | None -> [] 322 in 323 324 { 325 version; 326 title; 327 home_page_url; 328 feed_url; 329 description; 330 user_comment; 331 next_url; 332 icon; 333 favicon; 334 authors; 335 language; 336 expired; 337 hubs; 338 items; 339 } 340 341let of_jsonm dec = 342 try 343 let json = decode_value dec in 344 match json with 345 | Object obj -> Ok (parse_feed_obj obj) 346 | _ -> error_msgf "Feed must be a JSON object" 347 with 348 | Invalid_feed msg -> error_msgf "%s" msg 349 350(* JSON serialization *) 351 352let to_jsonm enc feed = 353 let enc_field name value_fn = 354 ignore (Jsonm.encode enc (`Lexeme (`Name name))); 355 value_fn () 356 in 357 358 let enc_string s = 359 ignore (Jsonm.encode enc (`Lexeme (`String s))) 360 in 361 362 let enc_bool b = 363 ignore (Jsonm.encode enc (`Lexeme (`Bool b))) 364 in 365 366 let enc_opt enc_fn = function 367 | None -> () 368 | Some v -> enc_fn v 369 in 370 371 let enc_list enc_fn lst = 372 ignore (Jsonm.encode enc (`Lexeme `As)); 373 List.iter enc_fn lst; 374 ignore (Jsonm.encode enc (`Lexeme `Ae)) 375 in 376 377 let enc_author author = 378 ignore (Jsonm.encode enc (`Lexeme `Os)); 379 (match Author.name author with 380 | Some name -> enc_field "name" (fun () -> enc_string name) 381 | None -> ()); 382 (match Author.url author with 383 | Some url -> enc_field "url" (fun () -> enc_string url) 384 | None -> ()); 385 (match Author.avatar author with 386 | Some avatar -> enc_field "avatar" (fun () -> enc_string avatar) 387 | None -> ()); 388 ignore (Jsonm.encode enc (`Lexeme `Oe)) 389 in 390 391 let enc_attachment att = 392 ignore (Jsonm.encode enc (`Lexeme `Os)); 393 enc_field "url" (fun () -> enc_string (Attachment.url att)); 394 enc_field "mime_type" (fun () -> enc_string (Attachment.mime_type att)); 395 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title)) 396 (Attachment.title att); 397 enc_opt (fun size -> 398 enc_field "size_in_bytes" (fun () -> 399 ignore (Jsonm.encode enc (`Lexeme (`Float (Int64.to_float size)))))) 400 (Attachment.size_in_bytes att); 401 enc_opt (fun dur -> 402 enc_field "duration_in_seconds" (fun () -> 403 ignore (Jsonm.encode enc (`Lexeme (`Float (float_of_int dur)))))) 404 (Attachment.duration_in_seconds att); 405 ignore (Jsonm.encode enc (`Lexeme `Oe)) 406 in 407 408 let enc_reference ref = 409 ignore (Jsonm.encode enc (`Lexeme `Os)); 410 enc_field "url" (fun () -> enc_string (Reference.url ref)); 411 enc_opt (fun doi -> enc_field "doi" (fun () -> enc_string doi)) 412 (Reference.doi ref); 413 enc_opt (fun cito_list -> 414 enc_field "cito" (fun () -> 415 enc_list (fun cito -> enc_string (Cito.to_string cito)) cito_list)) 416 (Reference.cito ref); 417 ignore (Jsonm.encode enc (`Lexeme `Oe)) 418 in 419 420 let enc_hub hub = 421 ignore (Jsonm.encode enc (`Lexeme `Os)); 422 enc_field "type" (fun () -> enc_string (Hub.type_ hub)); 423 enc_field "url" (fun () -> enc_string (Hub.url hub)); 424 ignore (Jsonm.encode enc (`Lexeme `Oe)) 425 in 426 427 let enc_item item = 428 ignore (Jsonm.encode enc (`Lexeme `Os)); 429 enc_field "id" (fun () -> enc_string (Item.id item)); 430 431 (* Encode content *) 432 (match Item.content item with 433 | `Html html -> 434 enc_field "content_html" (fun () -> enc_string html) 435 | `Text text -> 436 enc_field "content_text" (fun () -> enc_string text) 437 | `Both (html, text) -> 438 enc_field "content_html" (fun () -> enc_string html); 439 enc_field "content_text" (fun () -> enc_string text)); 440 441 enc_opt (fun url -> enc_field "url" (fun () -> enc_string url)) 442 (Item.url item); 443 enc_opt (fun url -> enc_field "external_url" (fun () -> enc_string url)) 444 (Item.external_url item); 445 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title)) 446 (Item.title item); 447 enc_opt (fun summary -> enc_field "summary" (fun () -> enc_string summary)) 448 (Item.summary item); 449 enc_opt (fun img -> enc_field "image" (fun () -> enc_string img)) 450 (Item.image item); 451 enc_opt (fun img -> enc_field "banner_image" (fun () -> enc_string img)) 452 (Item.banner_image item); 453 enc_opt (fun date -> enc_field "date_published" (fun () -> enc_string (format_rfc3339 date))) 454 (Item.date_published item); 455 enc_opt (fun date -> enc_field "date_modified" (fun () -> enc_string (format_rfc3339 date))) 456 (Item.date_modified item); 457 enc_opt (fun authors -> 458 enc_field "authors" (fun () -> enc_list enc_author authors)) 459 (Item.authors item); 460 enc_opt (fun tags -> 461 enc_field "tags" (fun () -> enc_list enc_string tags)) 462 (Item.tags item); 463 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang)) 464 (Item.language item); 465 enc_opt (fun atts -> 466 enc_field "attachments" (fun () -> enc_list enc_attachment atts)) 467 (Item.attachments item); 468 enc_opt (fun refs -> 469 enc_field "_references" (fun () -> enc_list enc_reference refs)) 470 (Item.references item); 471 472 ignore (Jsonm.encode enc (`Lexeme `Oe)) 473 in 474 475 (* Encode the feed *) 476 ignore (Jsonm.encode enc (`Lexeme `Os)); 477 enc_field "version" (fun () -> enc_string feed.version); 478 enc_field "title" (fun () -> enc_string feed.title); 479 enc_opt (fun url -> enc_field "home_page_url" (fun () -> enc_string url)) 480 feed.home_page_url; 481 enc_opt (fun url -> enc_field "feed_url" (fun () -> enc_string url)) 482 feed.feed_url; 483 enc_opt (fun desc -> enc_field "description" (fun () -> enc_string desc)) 484 feed.description; 485 enc_opt (fun comment -> enc_field "user_comment" (fun () -> enc_string comment)) 486 feed.user_comment; 487 enc_opt (fun url -> enc_field "next_url" (fun () -> enc_string url)) 488 feed.next_url; 489 enc_opt (fun icon -> enc_field "icon" (fun () -> enc_string icon)) 490 feed.icon; 491 enc_opt (fun favicon -> enc_field "favicon" (fun () -> enc_string favicon)) 492 feed.favicon; 493 enc_opt (fun authors -> 494 enc_field "authors" (fun () -> enc_list enc_author authors)) 495 feed.authors; 496 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang)) 497 feed.language; 498 enc_opt (fun expired -> enc_field "expired" (fun () -> enc_bool expired)) 499 feed.expired; 500 enc_opt (fun hubs -> 501 enc_field "hubs" (fun () -> enc_list enc_hub hubs)) 502 feed.hubs; 503 enc_field "items" (fun () -> enc_list enc_item feed.items); 504 ignore (Jsonm.encode enc (`Lexeme `Oe)); 505 ignore (Jsonm.encode enc `End) 506 507let of_string s = 508 let dec = Jsonm.decoder (`String s) in 509 of_jsonm dec 510 511let to_string ?(minify=false) feed = 512 let buf = Buffer.create 1024 in 513 let enc = Jsonm.encoder ~minify (`Buffer buf) in 514 to_jsonm enc feed; 515 Buffer.contents buf 516 517(* Validation *) 518 519let validate feed = 520 let errors = ref [] in 521 let add_error msg = errors := msg :: !errors in 522 523 (* Check required fields *) 524 if feed.title = "" then 525 add_error "title is required and cannot be empty"; 526 527 (* Check items have unique IDs *) 528 let ids = List.map Item.id feed.items in 529 let unique_ids = List.sort_uniq String.compare ids in 530 if List.length ids <> List.length unique_ids then 531 add_error "items must have unique IDs"; 532 533 (* Validate authors *) 534 (match feed.authors with 535 | Some authors -> 536 List.iteri (fun i author -> 537 if not (Author.is_valid author) then 538 add_error (Printf.sprintf "feed author %d is invalid (needs at least one field)" i) 539 ) authors 540 | None -> ()); 541 542 (* Validate items *) 543 List.iteri (fun i item -> 544 if Item.id item = "" then 545 add_error (Printf.sprintf "item %d has empty ID" i); 546 547 (* Validate item authors *) 548 (match Item.authors item with 549 | Some authors -> 550 List.iteri (fun j author -> 551 if not (Author.is_valid author) then 552 add_error (Printf.sprintf "item %d author %d is invalid" i j) 553 ) authors 554 | None -> ()) 555 ) feed.items; 556 557 if !errors = [] then Ok () 558 else Error (List.rev !errors) 559 560(* Comparison *) 561 562let equal a b = 563 a.version = b.version && 564 a.title = b.title && 565 a.home_page_url = b.home_page_url && 566 a.feed_url = b.feed_url && 567 a.description = b.description && 568 a.user_comment = b.user_comment && 569 a.next_url = b.next_url && 570 a.icon = b.icon && 571 a.favicon = b.favicon && 572 a.language = b.language && 573 a.expired = b.expired && 574 (* Note: We're doing structural equality on items *) 575 List.length a.items = List.length b.items 576 577(* Pretty printing *) 578 579let pp_summary ppf feed = 580 Format.fprintf ppf "%s (%d items)" feed.title (List.length feed.items) 581 582let pp ppf feed = 583 Format.fprintf ppf "Feed: %s" feed.title; 584 (match feed.home_page_url with 585 | Some url -> Format.fprintf ppf " (%s)" url 586 | None -> ()); 587 Format.fprintf ppf "@\n"; 588 589 Format.fprintf ppf " Items: %d@\n" (List.length feed.items); 590 591 (match feed.authors with 592 | Some authors when authors <> [] -> 593 Format.fprintf ppf " Authors: "; 594 List.iteri (fun i author -> 595 if i > 0 then Format.fprintf ppf ", "; 596 Format.fprintf ppf "%a" Author.pp author 597 ) authors; 598 Format.fprintf ppf "@\n" 599 | _ -> ()); 600 601 (match feed.language with 602 | Some lang -> Format.fprintf ppf " Language: %s@\n" lang 603 | None -> ())