OCaml library for JSONfeed parsing and creation
1(** JSON Feed format parser and serializer. *) 2 3exception Invalid_feed of string 4 5module Author = Author 6module Attachment = Attachment 7module Hub = Hub 8module Item = Item 9module Reference = Reference 10module Cito = Cito 11 12type t = { 13 version : string; 14 title : string; 15 home_page_url : string option; 16 feed_url : string option; 17 description : string option; 18 user_comment : string option; 19 next_url : string option; 20 icon : string option; 21 favicon : string option; 22 authors : Author.t list option; 23 language : string option; 24 expired : bool option; 25 hubs : Hub.t list option; 26 items : Item.t list; 27} 28 29let create ~title ?home_page_url ?feed_url ?description ?user_comment 30 ?next_url ?icon ?favicon ?authors ?language ?expired ?hubs ~items () = 31 { 32 version = "https://jsonfeed.org/version/1.1"; 33 title; 34 home_page_url; 35 feed_url; 36 description; 37 user_comment; 38 next_url; 39 icon; 40 favicon; 41 authors; 42 language; 43 expired; 44 hubs; 45 items; 46 } 47 48let version t = t.version 49let title t = t.title 50let home_page_url t = t.home_page_url 51let feed_url t = t.feed_url 52let description t = t.description 53let user_comment t = t.user_comment 54let next_url t = t.next_url 55let icon t = t.icon 56let favicon t = t.favicon 57let authors t = t.authors 58let language t = t.language 59let expired t = t.expired 60let hubs t = t.hubs 61let items t = t.items 62 63(* RFC3339 date utilities *) 64 65let parse_rfc3339 s = 66 match Ptime.of_rfc3339 s with 67 | Ok (t, _, _) -> Some t 68 | Error _ -> None 69 70let format_rfc3339 t = 71 Ptime.to_rfc3339 t 72 73(* JSON parsing and serialization *) 74 75type error = [ `Msg of string ] 76 77let error_msgf fmt = Format.kasprintf (fun s -> Error (`Msg s)) fmt 78 79(* JSON parsing helpers *) 80 81type json_value = 82 | Null 83 | Bool of bool 84 | Float of float 85 | String of string 86 | Array of json_value list 87 | Object of (string * json_value) list 88 89let rec decode_value dec = 90 match Jsonm.decode dec with 91 | `Lexeme `Null -> Null 92 | `Lexeme (`Bool b) -> Bool b 93 | `Lexeme (`Float f) -> Float f 94 | `Lexeme (`String s) -> String s 95 | `Lexeme `Os -> decode_object dec [] 96 | `Lexeme `As -> decode_array dec [] 97 | `Lexeme _ -> Null 98 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err)) 99 | `End | `Await -> Null 100 101and decode_object dec acc = 102 match Jsonm.decode dec with 103 | `Lexeme `Oe -> Object (List.rev acc) 104 | `Lexeme (`Name n) -> 105 let v = decode_value dec in 106 decode_object dec ((n, v) :: acc) 107 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err)) 108 | _ -> Object (List.rev acc) 109 110and decode_array dec acc = 111 match Jsonm.decode dec with 112 | `Lexeme `Ae -> Array (List.rev acc) 113 | `Lexeme `Os -> 114 let v = decode_object dec [] in 115 decode_array dec (v :: acc) 116 | `Lexeme `As -> 117 let v = decode_array dec [] in 118 decode_array dec (v :: acc) 119 | `Lexeme `Null -> decode_array dec (Null :: acc) 120 | `Lexeme (`Bool b) -> decode_array dec (Bool b :: acc) 121 | `Lexeme (`Float f) -> decode_array dec (Float f :: acc) 122 | `Lexeme (`String s) -> decode_array dec (String s :: acc) 123 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err)) 124 | _ -> Array (List.rev acc) 125 126(* Helpers to extract values from JSON *) 127 128let get_string = function String s -> Some s | _ -> None 129let get_bool = function Bool b -> Some b | _ -> None 130let _get_float = function Float f -> Some f | _ -> None 131let get_int = function Float f -> Some (int_of_float f) | _ -> None 132let get_int64 = function Float f -> Some (Int64.of_float f) | _ -> None 133let get_array = function Array arr -> Some arr | _ -> None 134let _get_object = function Object obj -> Some obj | _ -> None 135 136let find_field name obj = List.assoc_opt name obj 137 138let require_field name obj = 139 match find_field name obj with 140 | Some v -> v 141 | None -> raise (Invalid_feed (Printf.sprintf "Missing required field: %s" name)) 142 143let require_string name obj = 144 match require_field name obj |> get_string with 145 | Some s -> s 146 | None -> raise (Invalid_feed (Printf.sprintf "Field %s must be a string" name)) 147 148let optional_string name obj = 149 match find_field name obj with Some v -> get_string v | None -> None 150 151let optional_bool name obj = 152 match find_field name obj with Some v -> get_bool v | None -> None 153 154let optional_int name obj = 155 match find_field name obj with Some v -> get_int v | None -> None 156 157let optional_int64 name obj = 158 match find_field name obj with Some v -> get_int64 v | None -> None 159 160let optional_array name obj = 161 match find_field name obj with Some v -> get_array v | None -> None 162 163(* Parse Author *) 164 165let parse_author_obj obj = 166 let name = optional_string "name" obj in 167 let url = optional_string "url" obj in 168 let avatar = optional_string "avatar" obj in 169 if name = None && url = None && avatar = None then 170 raise (Invalid_feed "Author must have at least one field"); 171 Author.create ?name ?url ?avatar () 172 173let parse_author = function 174 | Object obj -> parse_author_obj obj 175 | _ -> raise (Invalid_feed "Author must be an object") 176 177(* Parse Attachment *) 178 179let parse_attachment_obj obj = 180 let url = require_string "url" obj in 181 let mime_type = require_string "mime_type" obj in 182 let title = optional_string "title" obj in 183 let size_in_bytes = optional_int64 "size_in_bytes" obj in 184 let duration_in_seconds = optional_int "duration_in_seconds" obj in 185 Attachment.create ~url ~mime_type ?title ?size_in_bytes ?duration_in_seconds () 186 187let parse_attachment = function 188 | Object obj -> parse_attachment_obj obj 189 | _ -> raise (Invalid_feed "Attachment must be an object") 190 191(* Parse Hub *) 192 193let parse_hub_obj obj = 194 let type_ = require_string "type" obj in 195 let url = require_string "url" obj in 196 Hub.create ~type_ ~url () 197 198let parse_hub = function 199 | Object obj -> parse_hub_obj obj 200 | _ -> raise (Invalid_feed "Hub must be an object") 201 202(* Parse Item *) 203 204let parse_item_obj obj = 205 let id = require_string "id" obj in 206 207 (* Parse content - at least one required *) 208 let content_html = optional_string "content_html" obj in 209 let content_text = optional_string "content_text" obj in 210 let content = match content_html, content_text with 211 | Some html, Some text -> `Both (html, text) 212 | Some html, None -> `Html html 213 | None, Some text -> `Text text 214 | None, None -> 215 raise (Invalid_feed "Item must have content_html or content_text") 216 in 217 218 let url = optional_string "url" obj in 219 let external_url = optional_string "external_url" obj in 220 let title = optional_string "title" obj in 221 let summary = optional_string "summary" obj in 222 let image = optional_string "image" obj in 223 let banner_image = optional_string "banner_image" obj in 224 225 let date_published = 226 match optional_string "date_published" obj with 227 | Some s -> parse_rfc3339 s 228 | None -> None 229 in 230 231 let date_modified = 232 match optional_string "date_modified" obj with 233 | Some s -> parse_rfc3339 s 234 | None -> None 235 in 236 237 let authors = 238 match optional_array "authors" obj with 239 | Some arr -> 240 let parsed = List.map parse_author arr in 241 if parsed = [] then None else Some parsed 242 | None -> None 243 in 244 245 let tags = 246 match optional_array "tags" obj with 247 | Some arr -> 248 let parsed = List.filter_map get_string arr in 249 if parsed = [] then None else Some parsed 250 | None -> None 251 in 252 253 let language = optional_string "language" obj in 254 255 let attachments = 256 match optional_array "attachments" obj with 257 | Some arr -> 258 let parsed = List.map parse_attachment arr in 259 if parsed = [] then None else Some parsed 260 | None -> None 261 in 262 263 Item.create ~id ~content ?url ?external_url ?title ?summary ?image 264 ?banner_image ?date_published ?date_modified ?authors ?tags ?language 265 ?attachments () 266 267let parse_item = function 268 | Object obj -> parse_item_obj obj 269 | _ -> raise (Invalid_feed "Item must be an object") 270 271(* Parse Feed *) 272 273let parse_feed_obj obj = 274 let version = require_string "version" obj in 275 let title = require_string "title" obj in 276 let home_page_url = optional_string "home_page_url" obj in 277 let feed_url = optional_string "feed_url" obj in 278 let description = optional_string "description" obj in 279 let user_comment = optional_string "user_comment" obj in 280 let next_url = optional_string "next_url" obj in 281 let icon = optional_string "icon" obj in 282 let favicon = optional_string "favicon" obj in 283 let language = optional_string "language" obj in 284 let expired = optional_bool "expired" obj in 285 286 let authors = 287 match optional_array "authors" obj with 288 | Some arr -> 289 let parsed = List.map parse_author arr in 290 if parsed = [] then None else Some parsed 291 | None -> None 292 in 293 294 let hubs = 295 match optional_array "hubs" obj with 296 | Some arr -> 297 let parsed = List.map parse_hub arr in 298 if parsed = [] then None else Some parsed 299 | None -> None 300 in 301 302 let items = 303 match optional_array "items" obj with 304 | Some arr -> List.map parse_item arr 305 | None -> [] 306 in 307 308 { 309 version; 310 title; 311 home_page_url; 312 feed_url; 313 description; 314 user_comment; 315 next_url; 316 icon; 317 favicon; 318 authors; 319 language; 320 expired; 321 hubs; 322 items; 323 } 324 325let of_jsonm dec = 326 try 327 let json = decode_value dec in 328 match json with 329 | Object obj -> Ok (parse_feed_obj obj) 330 | _ -> error_msgf "Feed must be a JSON object" 331 with 332 | Invalid_feed msg -> error_msgf "%s" msg 333 334(* JSON serialization *) 335 336let to_jsonm enc feed = 337 (* Simplified serialization using Jsonm *) 338 let enc_field name value_fn = 339 ignore (Jsonm.encode enc (`Lexeme (`Name name))); 340 value_fn () 341 in 342 343 let enc_string s = 344 ignore (Jsonm.encode enc (`Lexeme (`String s))) 345 in 346 347 let enc_bool b = 348 ignore (Jsonm.encode enc (`Lexeme (`Bool b))) 349 in 350 351 let enc_opt enc_fn = function 352 | None -> () 353 | Some v -> enc_fn v 354 in 355 356 let enc_list enc_fn lst = 357 ignore (Jsonm.encode enc (`Lexeme `As)); 358 List.iter enc_fn lst; 359 ignore (Jsonm.encode enc (`Lexeme `Ae)) 360 in 361 362 let enc_author author = 363 ignore (Jsonm.encode enc (`Lexeme `Os)); 364 (match Author.name author with 365 | Some name -> enc_field "name" (fun () -> enc_string name) 366 | None -> ()); 367 (match Author.url author with 368 | Some url -> enc_field "url" (fun () -> enc_string url) 369 | None -> ()); 370 (match Author.avatar author with 371 | Some avatar -> enc_field "avatar" (fun () -> enc_string avatar) 372 | None -> ()); 373 ignore (Jsonm.encode enc (`Lexeme `Oe)) 374 in 375 376 let enc_attachment att = 377 ignore (Jsonm.encode enc (`Lexeme `Os)); 378 enc_field "url" (fun () -> enc_string (Attachment.url att)); 379 enc_field "mime_type" (fun () -> enc_string (Attachment.mime_type att)); 380 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title)) 381 (Attachment.title att); 382 enc_opt (fun size -> 383 enc_field "size_in_bytes" (fun () -> 384 ignore (Jsonm.encode enc (`Lexeme (`Float (Int64.to_float size)))))) 385 (Attachment.size_in_bytes att); 386 enc_opt (fun dur -> 387 enc_field "duration_in_seconds" (fun () -> 388 ignore (Jsonm.encode enc (`Lexeme (`Float (float_of_int dur)))))) 389 (Attachment.duration_in_seconds att); 390 ignore (Jsonm.encode enc (`Lexeme `Oe)) 391 in 392 393 let enc_hub hub = 394 ignore (Jsonm.encode enc (`Lexeme `Os)); 395 enc_field "type" (fun () -> enc_string (Hub.type_ hub)); 396 enc_field "url" (fun () -> enc_string (Hub.url hub)); 397 ignore (Jsonm.encode enc (`Lexeme `Oe)) 398 in 399 400 let enc_item item = 401 ignore (Jsonm.encode enc (`Lexeme `Os)); 402 enc_field "id" (fun () -> enc_string (Item.id item)); 403 404 (* Encode content *) 405 (match Item.content item with 406 | `Html html -> 407 enc_field "content_html" (fun () -> enc_string html) 408 | `Text text -> 409 enc_field "content_text" (fun () -> enc_string text) 410 | `Both (html, text) -> 411 enc_field "content_html" (fun () -> enc_string html); 412 enc_field "content_text" (fun () -> enc_string text)); 413 414 enc_opt (fun url -> enc_field "url" (fun () -> enc_string url)) 415 (Item.url item); 416 enc_opt (fun url -> enc_field "external_url" (fun () -> enc_string url)) 417 (Item.external_url item); 418 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title)) 419 (Item.title item); 420 enc_opt (fun summary -> enc_field "summary" (fun () -> enc_string summary)) 421 (Item.summary item); 422 enc_opt (fun img -> enc_field "image" (fun () -> enc_string img)) 423 (Item.image item); 424 enc_opt (fun img -> enc_field "banner_image" (fun () -> enc_string img)) 425 (Item.banner_image item); 426 enc_opt (fun date -> enc_field "date_published" (fun () -> enc_string (format_rfc3339 date))) 427 (Item.date_published item); 428 enc_opt (fun date -> enc_field "date_modified" (fun () -> enc_string (format_rfc3339 date))) 429 (Item.date_modified item); 430 enc_opt (fun authors -> 431 enc_field "authors" (fun () -> enc_list enc_author authors)) 432 (Item.authors item); 433 enc_opt (fun tags -> 434 enc_field "tags" (fun () -> enc_list enc_string tags)) 435 (Item.tags item); 436 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang)) 437 (Item.language item); 438 enc_opt (fun atts -> 439 enc_field "attachments" (fun () -> enc_list enc_attachment atts)) 440 (Item.attachments item); 441 442 ignore (Jsonm.encode enc (`Lexeme `Oe)) 443 in 444 445 (* Encode the feed *) 446 ignore (Jsonm.encode enc (`Lexeme `Os)); 447 enc_field "version" (fun () -> enc_string feed.version); 448 enc_field "title" (fun () -> enc_string feed.title); 449 enc_opt (fun url -> enc_field "home_page_url" (fun () -> enc_string url)) 450 feed.home_page_url; 451 enc_opt (fun url -> enc_field "feed_url" (fun () -> enc_string url)) 452 feed.feed_url; 453 enc_opt (fun desc -> enc_field "description" (fun () -> enc_string desc)) 454 feed.description; 455 enc_opt (fun comment -> enc_field "user_comment" (fun () -> enc_string comment)) 456 feed.user_comment; 457 enc_opt (fun url -> enc_field "next_url" (fun () -> enc_string url)) 458 feed.next_url; 459 enc_opt (fun icon -> enc_field "icon" (fun () -> enc_string icon)) 460 feed.icon; 461 enc_opt (fun favicon -> enc_field "favicon" (fun () -> enc_string favicon)) 462 feed.favicon; 463 enc_opt (fun authors -> 464 enc_field "authors" (fun () -> enc_list enc_author authors)) 465 feed.authors; 466 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang)) 467 feed.language; 468 enc_opt (fun expired -> enc_field "expired" (fun () -> enc_bool expired)) 469 feed.expired; 470 enc_opt (fun hubs -> 471 enc_field "hubs" (fun () -> enc_list enc_hub hubs)) 472 feed.hubs; 473 enc_field "items" (fun () -> enc_list enc_item feed.items); 474 ignore (Jsonm.encode enc (`Lexeme `Oe)); 475 ignore (Jsonm.encode enc `End) 476 477let of_string s = 478 let dec = Jsonm.decoder (`String s) in 479 of_jsonm dec 480 481let to_string ?(minify=false) feed = 482 let buf = Buffer.create 1024 in 483 let enc = Jsonm.encoder ~minify (`Buffer buf) in 484 to_jsonm enc feed; 485 Buffer.contents buf 486 487(* Validation *) 488 489let validate feed = 490 let errors = ref [] in 491 let add_error msg = errors := msg :: !errors in 492 493 (* Check required fields *) 494 if feed.title = "" then 495 add_error "title is required and cannot be empty"; 496 497 (* Check items have unique IDs *) 498 let ids = List.map Item.id feed.items in 499 let unique_ids = List.sort_uniq String.compare ids in 500 if List.length ids <> List.length unique_ids then 501 add_error "items must have unique IDs"; 502 503 (* Validate authors *) 504 (match feed.authors with 505 | Some authors -> 506 List.iteri (fun i author -> 507 if not (Author.is_valid author) then 508 add_error (Printf.sprintf "feed author %d is invalid (needs at least one field)" i) 509 ) authors 510 | None -> ()); 511 512 (* Validate items *) 513 List.iteri (fun i item -> 514 if Item.id item = "" then 515 add_error (Printf.sprintf "item %d has empty ID" i); 516 517 (* Validate item authors *) 518 (match Item.authors item with 519 | Some authors -> 520 List.iteri (fun j author -> 521 if not (Author.is_valid author) then 522 add_error (Printf.sprintf "item %d author %d is invalid" i j) 523 ) authors 524 | None -> ()) 525 ) feed.items; 526 527 if !errors = [] then Ok () 528 else Error (List.rev !errors) 529 530(* Comparison *) 531 532let equal a b = 533 a.version = b.version && 534 a.title = b.title && 535 a.home_page_url = b.home_page_url && 536 a.feed_url = b.feed_url && 537 a.description = b.description && 538 a.user_comment = b.user_comment && 539 a.next_url = b.next_url && 540 a.icon = b.icon && 541 a.favicon = b.favicon && 542 a.language = b.language && 543 a.expired = b.expired && 544 (* Note: We're doing structural equality on items *) 545 List.length a.items = List.length b.items 546 547(* Pretty printing *) 548 549let pp_summary ppf feed = 550 Format.fprintf ppf "%s (%d items)" feed.title (List.length feed.items) 551 552let pp ppf feed = 553 Format.fprintf ppf "Feed: %s" feed.title; 554 (match feed.home_page_url with 555 | Some url -> Format.fprintf ppf " (%s)" url 556 | None -> ()); 557 Format.fprintf ppf "@\n"; 558 559 Format.fprintf ppf " Items: %d@\n" (List.length feed.items); 560 561 (match feed.authors with 562 | Some authors when authors <> [] -> 563 Format.fprintf ppf " Authors: "; 564 List.iteri (fun i author -> 565 if i > 0 then Format.fprintf ppf ", "; 566 Format.fprintf ppf "%a" Author.pp author 567 ) authors; 568 Format.fprintf ppf "@\n" 569 | _ -> ()); 570 571 (match feed.language with 572 | Some lang -> Format.fprintf ppf " Language: %s@\n" lang 573 | None -> ())