OCaml library for JSONfeed parsing and creation
1(** JSON Feed format parser and serializer. *)
2
3exception Invalid_feed of string
4
5module Author = Author
6module Attachment = Attachment
7module Hub = Hub
8module Item = Item
9module Reference = Reference
10module Cito = Cito
11
12type t = {
13 version : string;
14 title : string;
15 home_page_url : string option;
16 feed_url : string option;
17 description : string option;
18 user_comment : string option;
19 next_url : string option;
20 icon : string option;
21 favicon : string option;
22 authors : Author.t list option;
23 language : string option;
24 expired : bool option;
25 hubs : Hub.t list option;
26 items : Item.t list;
27}
28
29let create ~title ?home_page_url ?feed_url ?description ?user_comment
30 ?next_url ?icon ?favicon ?authors ?language ?expired ?hubs ~items () =
31 {
32 version = "https://jsonfeed.org/version/1.1";
33 title;
34 home_page_url;
35 feed_url;
36 description;
37 user_comment;
38 next_url;
39 icon;
40 favicon;
41 authors;
42 language;
43 expired;
44 hubs;
45 items;
46 }
47
48let version t = t.version
49let title t = t.title
50let home_page_url t = t.home_page_url
51let feed_url t = t.feed_url
52let description t = t.description
53let user_comment t = t.user_comment
54let next_url t = t.next_url
55let icon t = t.icon
56let favicon t = t.favicon
57let authors t = t.authors
58let language t = t.language
59let expired t = t.expired
60let hubs t = t.hubs
61let items t = t.items
62
63(* RFC3339 date utilities *)
64
65let parse_rfc3339 s =
66 match Ptime.of_rfc3339 s with
67 | Ok (t, _, _) -> Some t
68 | Error _ -> None
69
70let format_rfc3339 t =
71 Ptime.to_rfc3339 t
72
73(* JSON parsing and serialization *)
74
75type error = string
76
77let error_msgf fmt = Format.kasprintf (fun s -> Error s) fmt
78
79(* JSON parsing helpers *)
80
81type json_value =
82 | Null
83 | Bool of bool
84 | Float of float
85 | String of string
86 | Array of json_value list
87 | Object of (string * json_value) list
88
89let rec decode_value dec =
90 match Jsonm.decode dec with
91 | `Lexeme `Null -> Null
92 | `Lexeme (`Bool b) -> Bool b
93 | `Lexeme (`Float f) -> Float f
94 | `Lexeme (`String s) -> String s
95 | `Lexeme `Os -> decode_object dec []
96 | `Lexeme `As -> decode_array dec []
97 | `Lexeme _ -> Null
98 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err))
99 | `End | `Await -> Null
100
101and decode_object dec acc =
102 match Jsonm.decode dec with
103 | `Lexeme `Oe -> Object (List.rev acc)
104 | `Lexeme (`Name n) ->
105 let v = decode_value dec in
106 decode_object dec ((n, v) :: acc)
107 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err))
108 | _ -> Object (List.rev acc)
109
110and decode_array dec acc =
111 match Jsonm.decode dec with
112 | `Lexeme `Ae -> Array (List.rev acc)
113 | `Lexeme `Os ->
114 let v = decode_object dec [] in
115 decode_array dec (v :: acc)
116 | `Lexeme `As ->
117 let v = decode_array dec [] in
118 decode_array dec (v :: acc)
119 | `Lexeme `Null -> decode_array dec (Null :: acc)
120 | `Lexeme (`Bool b) -> decode_array dec (Bool b :: acc)
121 | `Lexeme (`Float f) -> decode_array dec (Float f :: acc)
122 | `Lexeme (`String s) -> decode_array dec (String s :: acc)
123 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err))
124 | _ -> Array (List.rev acc)
125
126(* Helpers to extract values from JSON *)
127
128let get_string = function String s -> Some s | _ -> None
129let get_bool = function Bool b -> Some b | _ -> None
130let _get_float = function Float f -> Some f | _ -> None
131let get_int = function Float f -> Some (int_of_float f) | _ -> None
132let get_int64 = function Float f -> Some (Int64.of_float f) | _ -> None
133let get_array = function Array arr -> Some arr | _ -> None
134let _get_object = function Object obj -> Some obj | _ -> None
135
136let find_field name obj = List.assoc_opt name obj
137
138let require_field name obj =
139 match find_field name obj with
140 | Some v -> v
141 | None -> raise (Invalid_feed (Printf.sprintf "Missing required field: %s" name))
142
143let require_string name obj =
144 match require_field name obj |> get_string with
145 | Some s -> s
146 | None -> raise (Invalid_feed (Printf.sprintf "Field %s must be a string" name))
147
148let optional_string name obj =
149 match find_field name obj with Some v -> get_string v | None -> None
150
151let optional_bool name obj =
152 match find_field name obj with Some v -> get_bool v | None -> None
153
154let optional_int name obj =
155 match find_field name obj with Some v -> get_int v | None -> None
156
157let optional_int64 name obj =
158 match find_field name obj with Some v -> get_int64 v | None -> None
159
160let optional_array name obj =
161 match find_field name obj with Some v -> get_array v | None -> None
162
163(* Parse Author *)
164
165let parse_author_obj obj =
166 let name = optional_string "name" obj in
167 let url = optional_string "url" obj in
168 let avatar = optional_string "avatar" obj in
169 if name = None && url = None && avatar = None then
170 raise (Invalid_feed "Author must have at least one field");
171 Author.create ?name ?url ?avatar ()
172
173let parse_author = function
174 | Object obj -> parse_author_obj obj
175 | _ -> raise (Invalid_feed "Author must be an object")
176
177(* Parse Attachment *)
178
179let parse_attachment_obj obj =
180 let url = require_string "url" obj in
181 let mime_type = require_string "mime_type" obj in
182 let title = optional_string "title" obj in
183 let size_in_bytes = optional_int64 "size_in_bytes" obj in
184 let duration_in_seconds = optional_int "duration_in_seconds" obj in
185 Attachment.create ~url ~mime_type ?title ?size_in_bytes ?duration_in_seconds ()
186
187let parse_attachment = function
188 | Object obj -> parse_attachment_obj obj
189 | _ -> raise (Invalid_feed "Attachment must be an object")
190
191(* Parse Hub *)
192
193let parse_hub_obj obj =
194 let type_ = require_string "type" obj in
195 let url = require_string "url" obj in
196 Hub.create ~type_ ~url ()
197
198let parse_hub = function
199 | Object obj -> parse_hub_obj obj
200 | _ -> raise (Invalid_feed "Hub must be an object")
201
202(* Parse Item *)
203
204let parse_item_obj obj =
205 let id = require_string "id" obj in
206
207 (* Parse content - at least one required *)
208 let content_html = optional_string "content_html" obj in
209 let content_text = optional_string "content_text" obj in
210 let content = match content_html, content_text with
211 | Some html, Some text -> `Both (html, text)
212 | Some html, None -> `Html html
213 | None, Some text -> `Text text
214 | None, None ->
215 raise (Invalid_feed "Item must have content_html or content_text")
216 in
217
218 let url = optional_string "url" obj in
219 let external_url = optional_string "external_url" obj in
220 let title = optional_string "title" obj in
221 let summary = optional_string "summary" obj in
222 let image = optional_string "image" obj in
223 let banner_image = optional_string "banner_image" obj in
224
225 let date_published =
226 match optional_string "date_published" obj with
227 | Some s -> parse_rfc3339 s
228 | None -> None
229 in
230
231 let date_modified =
232 match optional_string "date_modified" obj with
233 | Some s -> parse_rfc3339 s
234 | None -> None
235 in
236
237 let authors =
238 match optional_array "authors" obj with
239 | Some arr ->
240 let parsed = List.map parse_author arr in
241 if parsed = [] then None else Some parsed
242 | None -> None
243 in
244
245 let tags =
246 match optional_array "tags" obj with
247 | Some arr ->
248 let parsed = List.filter_map get_string arr in
249 if parsed = [] then None else Some parsed
250 | None -> None
251 in
252
253 let language = optional_string "language" obj in
254
255 let attachments =
256 match optional_array "attachments" obj with
257 | Some arr ->
258 let parsed = List.map parse_attachment arr in
259 if parsed = [] then None else Some parsed
260 | None -> None
261 in
262
263 let parse_reference = function
264 | Object obj ->
265 let url = require_string "url" obj in
266 let doi = optional_string "doi" obj in
267 Reference.create ~url ?doi ()
268 | _ -> raise (Invalid_feed "Reference must be an object")
269 in
270
271 let references =
272 match optional_array "_references" obj with
273 | Some arr ->
274 let parsed = List.map parse_reference arr in
275 if parsed = [] then None else Some parsed
276 | None -> None
277 in
278
279 Item.create ~id ~content ?url ?external_url ?title ?summary ?image
280 ?banner_image ?date_published ?date_modified ?authors ?tags ?language
281 ?attachments ?references ()
282
283let parse_item = function
284 | Object obj -> parse_item_obj obj
285 | _ -> raise (Invalid_feed "Item must be an object")
286
287(* Parse Feed *)
288
289let parse_feed_obj obj =
290 let version = require_string "version" obj in
291 let title = require_string "title" obj in
292 let home_page_url = optional_string "home_page_url" obj in
293 let feed_url = optional_string "feed_url" obj in
294 let description = optional_string "description" obj in
295 let user_comment = optional_string "user_comment" obj in
296 let next_url = optional_string "next_url" obj in
297 let icon = optional_string "icon" obj in
298 let favicon = optional_string "favicon" obj in
299 let language = optional_string "language" obj in
300 let expired = optional_bool "expired" obj in
301
302 let authors =
303 match optional_array "authors" obj with
304 | Some arr ->
305 let parsed = List.map parse_author arr in
306 if parsed = [] then None else Some parsed
307 | None -> None
308 in
309
310 let hubs =
311 match optional_array "hubs" obj with
312 | Some arr ->
313 let parsed = List.map parse_hub arr in
314 if parsed = [] then None else Some parsed
315 | None -> None
316 in
317
318 let items =
319 match optional_array "items" obj with
320 | Some arr -> List.map parse_item arr
321 | None -> []
322 in
323
324 {
325 version;
326 title;
327 home_page_url;
328 feed_url;
329 description;
330 user_comment;
331 next_url;
332 icon;
333 favicon;
334 authors;
335 language;
336 expired;
337 hubs;
338 items;
339 }
340
341let of_jsonm dec =
342 try
343 let json = decode_value dec in
344 match json with
345 | Object obj -> Ok (parse_feed_obj obj)
346 | _ -> error_msgf "Feed must be a JSON object"
347 with
348 | Invalid_feed msg -> error_msgf "%s" msg
349
350(* JSON serialization *)
351
352let to_jsonm enc feed =
353 let enc_field name value_fn =
354 ignore (Jsonm.encode enc (`Lexeme (`Name name)));
355 value_fn ()
356 in
357
358 let enc_string s =
359 ignore (Jsonm.encode enc (`Lexeme (`String s)))
360 in
361
362 let enc_bool b =
363 ignore (Jsonm.encode enc (`Lexeme (`Bool b)))
364 in
365
366 let enc_opt enc_fn = function
367 | None -> ()
368 | Some v -> enc_fn v
369 in
370
371 let enc_list enc_fn lst =
372 ignore (Jsonm.encode enc (`Lexeme `As));
373 List.iter enc_fn lst;
374 ignore (Jsonm.encode enc (`Lexeme `Ae))
375 in
376
377 let enc_author author =
378 ignore (Jsonm.encode enc (`Lexeme `Os));
379 (match Author.name author with
380 | Some name -> enc_field "name" (fun () -> enc_string name)
381 | None -> ());
382 (match Author.url author with
383 | Some url -> enc_field "url" (fun () -> enc_string url)
384 | None -> ());
385 (match Author.avatar author with
386 | Some avatar -> enc_field "avatar" (fun () -> enc_string avatar)
387 | None -> ());
388 ignore (Jsonm.encode enc (`Lexeme `Oe))
389 in
390
391 let enc_attachment att =
392 ignore (Jsonm.encode enc (`Lexeme `Os));
393 enc_field "url" (fun () -> enc_string (Attachment.url att));
394 enc_field "mime_type" (fun () -> enc_string (Attachment.mime_type att));
395 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title))
396 (Attachment.title att);
397 enc_opt (fun size ->
398 enc_field "size_in_bytes" (fun () ->
399 ignore (Jsonm.encode enc (`Lexeme (`Float (Int64.to_float size))))))
400 (Attachment.size_in_bytes att);
401 enc_opt (fun dur ->
402 enc_field "duration_in_seconds" (fun () ->
403 ignore (Jsonm.encode enc (`Lexeme (`Float (float_of_int dur))))))
404 (Attachment.duration_in_seconds att);
405 ignore (Jsonm.encode enc (`Lexeme `Oe))
406 in
407
408 let enc_reference ref =
409 ignore (Jsonm.encode enc (`Lexeme `Os));
410 enc_field "url" (fun () -> enc_string (Reference.url ref));
411 enc_opt (fun doi -> enc_field "doi" (fun () -> enc_string doi))
412 (Reference.doi ref);
413 enc_opt (fun cito_list ->
414 enc_field "cito" (fun () ->
415 enc_list (fun cito -> enc_string (Cito.to_string cito)) cito_list))
416 (Reference.cito ref);
417 ignore (Jsonm.encode enc (`Lexeme `Oe))
418 in
419
420 let enc_hub hub =
421 ignore (Jsonm.encode enc (`Lexeme `Os));
422 enc_field "type" (fun () -> enc_string (Hub.type_ hub));
423 enc_field "url" (fun () -> enc_string (Hub.url hub));
424 ignore (Jsonm.encode enc (`Lexeme `Oe))
425 in
426
427 let enc_item item =
428 ignore (Jsonm.encode enc (`Lexeme `Os));
429 enc_field "id" (fun () -> enc_string (Item.id item));
430
431 (* Encode content *)
432 (match Item.content item with
433 | `Html html ->
434 enc_field "content_html" (fun () -> enc_string html)
435 | `Text text ->
436 enc_field "content_text" (fun () -> enc_string text)
437 | `Both (html, text) ->
438 enc_field "content_html" (fun () -> enc_string html);
439 enc_field "content_text" (fun () -> enc_string text));
440
441 enc_opt (fun url -> enc_field "url" (fun () -> enc_string url))
442 (Item.url item);
443 enc_opt (fun url -> enc_field "external_url" (fun () -> enc_string url))
444 (Item.external_url item);
445 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title))
446 (Item.title item);
447 enc_opt (fun summary -> enc_field "summary" (fun () -> enc_string summary))
448 (Item.summary item);
449 enc_opt (fun img -> enc_field "image" (fun () -> enc_string img))
450 (Item.image item);
451 enc_opt (fun img -> enc_field "banner_image" (fun () -> enc_string img))
452 (Item.banner_image item);
453 enc_opt (fun date -> enc_field "date_published" (fun () -> enc_string (format_rfc3339 date)))
454 (Item.date_published item);
455 enc_opt (fun date -> enc_field "date_modified" (fun () -> enc_string (format_rfc3339 date)))
456 (Item.date_modified item);
457 enc_opt (fun authors ->
458 enc_field "authors" (fun () -> enc_list enc_author authors))
459 (Item.authors item);
460 enc_opt (fun tags ->
461 enc_field "tags" (fun () -> enc_list enc_string tags))
462 (Item.tags item);
463 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang))
464 (Item.language item);
465 enc_opt (fun atts ->
466 enc_field "attachments" (fun () -> enc_list enc_attachment atts))
467 (Item.attachments item);
468 enc_opt (fun refs ->
469 enc_field "_references" (fun () -> enc_list enc_reference refs))
470 (Item.references item);
471
472 ignore (Jsonm.encode enc (`Lexeme `Oe))
473 in
474
475 (* Encode the feed *)
476 ignore (Jsonm.encode enc (`Lexeme `Os));
477 enc_field "version" (fun () -> enc_string feed.version);
478 enc_field "title" (fun () -> enc_string feed.title);
479 enc_opt (fun url -> enc_field "home_page_url" (fun () -> enc_string url))
480 feed.home_page_url;
481 enc_opt (fun url -> enc_field "feed_url" (fun () -> enc_string url))
482 feed.feed_url;
483 enc_opt (fun desc -> enc_field "description" (fun () -> enc_string desc))
484 feed.description;
485 enc_opt (fun comment -> enc_field "user_comment" (fun () -> enc_string comment))
486 feed.user_comment;
487 enc_opt (fun url -> enc_field "next_url" (fun () -> enc_string url))
488 feed.next_url;
489 enc_opt (fun icon -> enc_field "icon" (fun () -> enc_string icon))
490 feed.icon;
491 enc_opt (fun favicon -> enc_field "favicon" (fun () -> enc_string favicon))
492 feed.favicon;
493 enc_opt (fun authors ->
494 enc_field "authors" (fun () -> enc_list enc_author authors))
495 feed.authors;
496 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang))
497 feed.language;
498 enc_opt (fun expired -> enc_field "expired" (fun () -> enc_bool expired))
499 feed.expired;
500 enc_opt (fun hubs ->
501 enc_field "hubs" (fun () -> enc_list enc_hub hubs))
502 feed.hubs;
503 enc_field "items" (fun () -> enc_list enc_item feed.items);
504 ignore (Jsonm.encode enc (`Lexeme `Oe));
505 ignore (Jsonm.encode enc `End)
506
507let of_string s =
508 let dec = Jsonm.decoder (`String s) in
509 of_jsonm dec
510
511let to_string ?(minify=false) feed =
512 let buf = Buffer.create 1024 in
513 let enc = Jsonm.encoder ~minify (`Buffer buf) in
514 to_jsonm enc feed;
515 Buffer.contents buf
516
517(* Validation *)
518
519let validate feed =
520 let errors = ref [] in
521 let add_error msg = errors := msg :: !errors in
522
523 (* Check required fields *)
524 if feed.title = "" then
525 add_error "title is required and cannot be empty";
526
527 (* Check items have unique IDs *)
528 let ids = List.map Item.id feed.items in
529 let unique_ids = List.sort_uniq String.compare ids in
530 if List.length ids <> List.length unique_ids then
531 add_error "items must have unique IDs";
532
533 (* Validate authors *)
534 (match feed.authors with
535 | Some authors ->
536 List.iteri (fun i author ->
537 if not (Author.is_valid author) then
538 add_error (Printf.sprintf "feed author %d is invalid (needs at least one field)" i)
539 ) authors
540 | None -> ());
541
542 (* Validate items *)
543 List.iteri (fun i item ->
544 if Item.id item = "" then
545 add_error (Printf.sprintf "item %d has empty ID" i);
546
547 (* Validate item authors *)
548 (match Item.authors item with
549 | Some authors ->
550 List.iteri (fun j author ->
551 if not (Author.is_valid author) then
552 add_error (Printf.sprintf "item %d author %d is invalid" i j)
553 ) authors
554 | None -> ())
555 ) feed.items;
556
557 if !errors = [] then Ok ()
558 else Error (List.rev !errors)
559
560(* Comparison *)
561
562let equal a b =
563 a.version = b.version &&
564 a.title = b.title &&
565 a.home_page_url = b.home_page_url &&
566 a.feed_url = b.feed_url &&
567 a.description = b.description &&
568 a.user_comment = b.user_comment &&
569 a.next_url = b.next_url &&
570 a.icon = b.icon &&
571 a.favicon = b.favicon &&
572 a.language = b.language &&
573 a.expired = b.expired &&
574 (* Note: We're doing structural equality on items *)
575 List.length a.items = List.length b.items
576
577(* Pretty printing *)
578
579let pp_summary ppf feed =
580 Format.fprintf ppf "%s (%d items)" feed.title (List.length feed.items)
581
582let pp ppf feed =
583 Format.fprintf ppf "Feed: %s" feed.title;
584 (match feed.home_page_url with
585 | Some url -> Format.fprintf ppf " (%s)" url
586 | None -> ());
587 Format.fprintf ppf "@\n";
588
589 Format.fprintf ppf " Items: %d@\n" (List.length feed.items);
590
591 (match feed.authors with
592 | Some authors when authors <> [] ->
593 Format.fprintf ppf " Authors: ";
594 List.iteri (fun i author ->
595 if i > 0 then Format.fprintf ppf ", ";
596 Format.fprintf ppf "%a" Author.pp author
597 ) authors;
598 Format.fprintf ppf "@\n"
599 | _ -> ());
600
601 (match feed.language with
602 | Some lang -> Format.fprintf ppf " Language: %s@\n" lang
603 | None -> ())