OCaml library for JSONfeed parsing and creation
1(** JSON Feed format parser and serializer. *)
2
3exception Invalid_feed of string
4
5module Author = Author
6module Attachment = Attachment
7module Hub = Hub
8module Item = Item
9module Reference = Reference
10module Cito = Cito
11
12type t = {
13 version : string;
14 title : string;
15 home_page_url : string option;
16 feed_url : string option;
17 description : string option;
18 user_comment : string option;
19 next_url : string option;
20 icon : string option;
21 favicon : string option;
22 authors : Author.t list option;
23 language : string option;
24 expired : bool option;
25 hubs : Hub.t list option;
26 items : Item.t list;
27}
28
29let create ~title ?home_page_url ?feed_url ?description ?user_comment
30 ?next_url ?icon ?favicon ?authors ?language ?expired ?hubs ~items () =
31 {
32 version = "https://jsonfeed.org/version/1.1";
33 title;
34 home_page_url;
35 feed_url;
36 description;
37 user_comment;
38 next_url;
39 icon;
40 favicon;
41 authors;
42 language;
43 expired;
44 hubs;
45 items;
46 }
47
48let version t = t.version
49let title t = t.title
50let home_page_url t = t.home_page_url
51let feed_url t = t.feed_url
52let description t = t.description
53let user_comment t = t.user_comment
54let next_url t = t.next_url
55let icon t = t.icon
56let favicon t = t.favicon
57let authors t = t.authors
58let language t = t.language
59let expired t = t.expired
60let hubs t = t.hubs
61let items t = t.items
62
63(* RFC3339 date utilities *)
64
65let parse_rfc3339 s =
66 match Ptime.of_rfc3339 s with
67 | Ok (t, _, _) -> Some t
68 | Error _ -> None
69
70let format_rfc3339 t =
71 Ptime.to_rfc3339 t
72
73(* JSON parsing and serialization *)
74
75type error = [ `Msg of string ]
76
77let error_msgf fmt = Format.kasprintf (fun s -> Error (`Msg s)) fmt
78
79(* JSON parsing helpers *)
80
81type json_value =
82 | Null
83 | Bool of bool
84 | Float of float
85 | String of string
86 | Array of json_value list
87 | Object of (string * json_value) list
88
89let rec decode_value dec =
90 match Jsonm.decode dec with
91 | `Lexeme `Null -> Null
92 | `Lexeme (`Bool b) -> Bool b
93 | `Lexeme (`Float f) -> Float f
94 | `Lexeme (`String s) -> String s
95 | `Lexeme `Os -> decode_object dec []
96 | `Lexeme `As -> decode_array dec []
97 | `Lexeme _ -> Null
98 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err))
99 | `End | `Await -> Null
100
101and decode_object dec acc =
102 match Jsonm.decode dec with
103 | `Lexeme `Oe -> Object (List.rev acc)
104 | `Lexeme (`Name n) ->
105 let v = decode_value dec in
106 decode_object dec ((n, v) :: acc)
107 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err))
108 | _ -> Object (List.rev acc)
109
110and decode_array dec acc =
111 match Jsonm.decode dec with
112 | `Lexeme `Ae -> Array (List.rev acc)
113 | `Lexeme `Os ->
114 let v = decode_object dec [] in
115 decode_array dec (v :: acc)
116 | `Lexeme `As ->
117 let v = decode_array dec [] in
118 decode_array dec (v :: acc)
119 | `Lexeme `Null -> decode_array dec (Null :: acc)
120 | `Lexeme (`Bool b) -> decode_array dec (Bool b :: acc)
121 | `Lexeme (`Float f) -> decode_array dec (Float f :: acc)
122 | `Lexeme (`String s) -> decode_array dec (String s :: acc)
123 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err))
124 | _ -> Array (List.rev acc)
125
126(* Helpers to extract values from JSON *)
127
128let get_string = function String s -> Some s | _ -> None
129let get_bool = function Bool b -> Some b | _ -> None
130let _get_float = function Float f -> Some f | _ -> None
131let get_int = function Float f -> Some (int_of_float f) | _ -> None
132let get_int64 = function Float f -> Some (Int64.of_float f) | _ -> None
133let get_array = function Array arr -> Some arr | _ -> None
134let _get_object = function Object obj -> Some obj | _ -> None
135
136let find_field name obj = List.assoc_opt name obj
137
138let require_field name obj =
139 match find_field name obj with
140 | Some v -> v
141 | None -> raise (Invalid_feed (Printf.sprintf "Missing required field: %s" name))
142
143let require_string name obj =
144 match require_field name obj |> get_string with
145 | Some s -> s
146 | None -> raise (Invalid_feed (Printf.sprintf "Field %s must be a string" name))
147
148let optional_string name obj =
149 match find_field name obj with Some v -> get_string v | None -> None
150
151let optional_bool name obj =
152 match find_field name obj with Some v -> get_bool v | None -> None
153
154let optional_int name obj =
155 match find_field name obj with Some v -> get_int v | None -> None
156
157let optional_int64 name obj =
158 match find_field name obj with Some v -> get_int64 v | None -> None
159
160let optional_array name obj =
161 match find_field name obj with Some v -> get_array v | None -> None
162
163(* Parse Author *)
164
165let parse_author_obj obj =
166 let name = optional_string "name" obj in
167 let url = optional_string "url" obj in
168 let avatar = optional_string "avatar" obj in
169 if name = None && url = None && avatar = None then
170 raise (Invalid_feed "Author must have at least one field");
171 Author.create ?name ?url ?avatar ()
172
173let parse_author = function
174 | Object obj -> parse_author_obj obj
175 | _ -> raise (Invalid_feed "Author must be an object")
176
177(* Parse Attachment *)
178
179let parse_attachment_obj obj =
180 let url = require_string "url" obj in
181 let mime_type = require_string "mime_type" obj in
182 let title = optional_string "title" obj in
183 let size_in_bytes = optional_int64 "size_in_bytes" obj in
184 let duration_in_seconds = optional_int "duration_in_seconds" obj in
185 Attachment.create ~url ~mime_type ?title ?size_in_bytes ?duration_in_seconds ()
186
187let parse_attachment = function
188 | Object obj -> parse_attachment_obj obj
189 | _ -> raise (Invalid_feed "Attachment must be an object")
190
191(* Parse Hub *)
192
193let parse_hub_obj obj =
194 let type_ = require_string "type" obj in
195 let url = require_string "url" obj in
196 Hub.create ~type_ ~url ()
197
198let parse_hub = function
199 | Object obj -> parse_hub_obj obj
200 | _ -> raise (Invalid_feed "Hub must be an object")
201
202(* Parse Item *)
203
204let parse_item_obj obj =
205 let id = require_string "id" obj in
206
207 (* Parse content - at least one required *)
208 let content_html = optional_string "content_html" obj in
209 let content_text = optional_string "content_text" obj in
210 let content = match content_html, content_text with
211 | Some html, Some text -> `Both (html, text)
212 | Some html, None -> `Html html
213 | None, Some text -> `Text text
214 | None, None ->
215 raise (Invalid_feed "Item must have content_html or content_text")
216 in
217
218 let url = optional_string "url" obj in
219 let external_url = optional_string "external_url" obj in
220 let title = optional_string "title" obj in
221 let summary = optional_string "summary" obj in
222 let image = optional_string "image" obj in
223 let banner_image = optional_string "banner_image" obj in
224
225 let date_published =
226 match optional_string "date_published" obj with
227 | Some s -> parse_rfc3339 s
228 | None -> None
229 in
230
231 let date_modified =
232 match optional_string "date_modified" obj with
233 | Some s -> parse_rfc3339 s
234 | None -> None
235 in
236
237 let authors =
238 match optional_array "authors" obj with
239 | Some arr ->
240 let parsed = List.map parse_author arr in
241 if parsed = [] then None else Some parsed
242 | None -> None
243 in
244
245 let tags =
246 match optional_array "tags" obj with
247 | Some arr ->
248 let parsed = List.filter_map get_string arr in
249 if parsed = [] then None else Some parsed
250 | None -> None
251 in
252
253 let language = optional_string "language" obj in
254
255 let attachments =
256 match optional_array "attachments" obj with
257 | Some arr ->
258 let parsed = List.map parse_attachment arr in
259 if parsed = [] then None else Some parsed
260 | None -> None
261 in
262
263 let parse_reference = function
264 | Object obj ->
265 let url = require_string "url" obj in
266 let doi = optional_string "doi" obj in
267 Reference.create ~url ?doi ()
268 | _ -> raise (Invalid_feed "Reference must be an object")
269 in
270
271 let references =
272 match optional_array "_references" obj with
273 | Some arr ->
274 let parsed = List.map parse_reference arr in
275 if parsed = [] then None else Some parsed
276 | None -> None
277 in
278
279 Item.create ~id ~content ?url ?external_url ?title ?summary ?image
280 ?banner_image ?date_published ?date_modified ?authors ?tags ?language
281 ?attachments ?references ()
282
283let parse_item = function
284 | Object obj -> parse_item_obj obj
285 | _ -> raise (Invalid_feed "Item must be an object")
286
287(* Parse Feed *)
288
289let parse_feed_obj obj =
290 let version = require_string "version" obj in
291 let title = require_string "title" obj in
292 let home_page_url = optional_string "home_page_url" obj in
293 let feed_url = optional_string "feed_url" obj in
294 let description = optional_string "description" obj in
295 let user_comment = optional_string "user_comment" obj in
296 let next_url = optional_string "next_url" obj in
297 let icon = optional_string "icon" obj in
298 let favicon = optional_string "favicon" obj in
299 let language = optional_string "language" obj in
300 let expired = optional_bool "expired" obj in
301
302 let authors =
303 match optional_array "authors" obj with
304 | Some arr ->
305 let parsed = List.map parse_author arr in
306 if parsed = [] then None else Some parsed
307 | None -> None
308 in
309
310 let hubs =
311 match optional_array "hubs" obj with
312 | Some arr ->
313 let parsed = List.map parse_hub arr in
314 if parsed = [] then None else Some parsed
315 | None -> None
316 in
317
318 let items =
319 match optional_array "items" obj with
320 | Some arr -> List.map parse_item arr
321 | None -> []
322 in
323
324 {
325 version;
326 title;
327 home_page_url;
328 feed_url;
329 description;
330 user_comment;
331 next_url;
332 icon;
333 favicon;
334 authors;
335 language;
336 expired;
337 hubs;
338 items;
339 }
340
341let of_jsonm dec =
342 try
343 let json = decode_value dec in
344 match json with
345 | Object obj -> Ok (parse_feed_obj obj)
346 | _ -> error_msgf "Feed must be a JSON object"
347 with
348 | Invalid_feed msg -> error_msgf "%s" msg
349
350(* JSON serialization *)
351
352let to_jsonm enc feed =
353 (* Simplified serialization using Jsonm *)
354 let enc_field name value_fn =
355 ignore (Jsonm.encode enc (`Lexeme (`Name name)));
356 value_fn ()
357 in
358
359 let enc_string s =
360 ignore (Jsonm.encode enc (`Lexeme (`String s)))
361 in
362
363 let enc_bool b =
364 ignore (Jsonm.encode enc (`Lexeme (`Bool b)))
365 in
366
367 let enc_opt enc_fn = function
368 | None -> ()
369 | Some v -> enc_fn v
370 in
371
372 let enc_list enc_fn lst =
373 ignore (Jsonm.encode enc (`Lexeme `As));
374 List.iter enc_fn lst;
375 ignore (Jsonm.encode enc (`Lexeme `Ae))
376 in
377
378 let enc_author author =
379 ignore (Jsonm.encode enc (`Lexeme `Os));
380 (match Author.name author with
381 | Some name -> enc_field "name" (fun () -> enc_string name)
382 | None -> ());
383 (match Author.url author with
384 | Some url -> enc_field "url" (fun () -> enc_string url)
385 | None -> ());
386 (match Author.avatar author with
387 | Some avatar -> enc_field "avatar" (fun () -> enc_string avatar)
388 | None -> ());
389 ignore (Jsonm.encode enc (`Lexeme `Oe))
390 in
391
392 let enc_attachment att =
393 ignore (Jsonm.encode enc (`Lexeme `Os));
394 enc_field "url" (fun () -> enc_string (Attachment.url att));
395 enc_field "mime_type" (fun () -> enc_string (Attachment.mime_type att));
396 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title))
397 (Attachment.title att);
398 enc_opt (fun size ->
399 enc_field "size_in_bytes" (fun () ->
400 ignore (Jsonm.encode enc (`Lexeme (`Float (Int64.to_float size))))))
401 (Attachment.size_in_bytes att);
402 enc_opt (fun dur ->
403 enc_field "duration_in_seconds" (fun () ->
404 ignore (Jsonm.encode enc (`Lexeme (`Float (float_of_int dur))))))
405 (Attachment.duration_in_seconds att);
406 ignore (Jsonm.encode enc (`Lexeme `Oe))
407 in
408
409 let enc_reference ref =
410 ignore (Jsonm.encode enc (`Lexeme `Os));
411 enc_field "url" (fun () -> enc_string (Reference.url ref));
412 enc_opt (fun doi -> enc_field "doi" (fun () -> enc_string doi))
413 (Reference.doi ref);
414 enc_opt (fun cito_list ->
415 enc_field "cito" (fun () ->
416 enc_list (fun cito -> enc_string (Cito.to_string cito)) cito_list))
417 (Reference.cito ref);
418 ignore (Jsonm.encode enc (`Lexeme `Oe))
419 in
420
421 let enc_hub hub =
422 ignore (Jsonm.encode enc (`Lexeme `Os));
423 enc_field "type" (fun () -> enc_string (Hub.type_ hub));
424 enc_field "url" (fun () -> enc_string (Hub.url hub));
425 ignore (Jsonm.encode enc (`Lexeme `Oe))
426 in
427
428 let enc_item item =
429 ignore (Jsonm.encode enc (`Lexeme `Os));
430 enc_field "id" (fun () -> enc_string (Item.id item));
431
432 (* Encode content *)
433 (match Item.content item with
434 | `Html html ->
435 enc_field "content_html" (fun () -> enc_string html)
436 | `Text text ->
437 enc_field "content_text" (fun () -> enc_string text)
438 | `Both (html, text) ->
439 enc_field "content_html" (fun () -> enc_string html);
440 enc_field "content_text" (fun () -> enc_string text));
441
442 enc_opt (fun url -> enc_field "url" (fun () -> enc_string url))
443 (Item.url item);
444 enc_opt (fun url -> enc_field "external_url" (fun () -> enc_string url))
445 (Item.external_url item);
446 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title))
447 (Item.title item);
448 enc_opt (fun summary -> enc_field "summary" (fun () -> enc_string summary))
449 (Item.summary item);
450 enc_opt (fun img -> enc_field "image" (fun () -> enc_string img))
451 (Item.image item);
452 enc_opt (fun img -> enc_field "banner_image" (fun () -> enc_string img))
453 (Item.banner_image item);
454 enc_opt (fun date -> enc_field "date_published" (fun () -> enc_string (format_rfc3339 date)))
455 (Item.date_published item);
456 enc_opt (fun date -> enc_field "date_modified" (fun () -> enc_string (format_rfc3339 date)))
457 (Item.date_modified item);
458 enc_opt (fun authors ->
459 enc_field "authors" (fun () -> enc_list enc_author authors))
460 (Item.authors item);
461 enc_opt (fun tags ->
462 enc_field "tags" (fun () -> enc_list enc_string tags))
463 (Item.tags item);
464 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang))
465 (Item.language item);
466 enc_opt (fun atts ->
467 enc_field "attachments" (fun () -> enc_list enc_attachment atts))
468 (Item.attachments item);
469 enc_opt (fun refs ->
470 enc_field "_references" (fun () -> enc_list enc_reference refs))
471 (Item.references item);
472
473 ignore (Jsonm.encode enc (`Lexeme `Oe))
474 in
475
476 (* Encode the feed *)
477 ignore (Jsonm.encode enc (`Lexeme `Os));
478 enc_field "version" (fun () -> enc_string feed.version);
479 enc_field "title" (fun () -> enc_string feed.title);
480 enc_opt (fun url -> enc_field "home_page_url" (fun () -> enc_string url))
481 feed.home_page_url;
482 enc_opt (fun url -> enc_field "feed_url" (fun () -> enc_string url))
483 feed.feed_url;
484 enc_opt (fun desc -> enc_field "description" (fun () -> enc_string desc))
485 feed.description;
486 enc_opt (fun comment -> enc_field "user_comment" (fun () -> enc_string comment))
487 feed.user_comment;
488 enc_opt (fun url -> enc_field "next_url" (fun () -> enc_string url))
489 feed.next_url;
490 enc_opt (fun icon -> enc_field "icon" (fun () -> enc_string icon))
491 feed.icon;
492 enc_opt (fun favicon -> enc_field "favicon" (fun () -> enc_string favicon))
493 feed.favicon;
494 enc_opt (fun authors ->
495 enc_field "authors" (fun () -> enc_list enc_author authors))
496 feed.authors;
497 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang))
498 feed.language;
499 enc_opt (fun expired -> enc_field "expired" (fun () -> enc_bool expired))
500 feed.expired;
501 enc_opt (fun hubs ->
502 enc_field "hubs" (fun () -> enc_list enc_hub hubs))
503 feed.hubs;
504 enc_field "items" (fun () -> enc_list enc_item feed.items);
505 ignore (Jsonm.encode enc (`Lexeme `Oe));
506 ignore (Jsonm.encode enc `End)
507
508let of_string s =
509 let dec = Jsonm.decoder (`String s) in
510 of_jsonm dec
511
512let to_string ?(minify=false) feed =
513 let buf = Buffer.create 1024 in
514 let enc = Jsonm.encoder ~minify (`Buffer buf) in
515 to_jsonm enc feed;
516 Buffer.contents buf
517
518(* Validation *)
519
520let validate feed =
521 let errors = ref [] in
522 let add_error msg = errors := msg :: !errors in
523
524 (* Check required fields *)
525 if feed.title = "" then
526 add_error "title is required and cannot be empty";
527
528 (* Check items have unique IDs *)
529 let ids = List.map Item.id feed.items in
530 let unique_ids = List.sort_uniq String.compare ids in
531 if List.length ids <> List.length unique_ids then
532 add_error "items must have unique IDs";
533
534 (* Validate authors *)
535 (match feed.authors with
536 | Some authors ->
537 List.iteri (fun i author ->
538 if not (Author.is_valid author) then
539 add_error (Printf.sprintf "feed author %d is invalid (needs at least one field)" i)
540 ) authors
541 | None -> ());
542
543 (* Validate items *)
544 List.iteri (fun i item ->
545 if Item.id item = "" then
546 add_error (Printf.sprintf "item %d has empty ID" i);
547
548 (* Validate item authors *)
549 (match Item.authors item with
550 | Some authors ->
551 List.iteri (fun j author ->
552 if not (Author.is_valid author) then
553 add_error (Printf.sprintf "item %d author %d is invalid" i j)
554 ) authors
555 | None -> ())
556 ) feed.items;
557
558 if !errors = [] then Ok ()
559 else Error (List.rev !errors)
560
561(* Comparison *)
562
563let equal a b =
564 a.version = b.version &&
565 a.title = b.title &&
566 a.home_page_url = b.home_page_url &&
567 a.feed_url = b.feed_url &&
568 a.description = b.description &&
569 a.user_comment = b.user_comment &&
570 a.next_url = b.next_url &&
571 a.icon = b.icon &&
572 a.favicon = b.favicon &&
573 a.language = b.language &&
574 a.expired = b.expired &&
575 (* Note: We're doing structural equality on items *)
576 List.length a.items = List.length b.items
577
578(* Pretty printing *)
579
580let pp_summary ppf feed =
581 Format.fprintf ppf "%s (%d items)" feed.title (List.length feed.items)
582
583let pp ppf feed =
584 Format.fprintf ppf "Feed: %s" feed.title;
585 (match feed.home_page_url with
586 | Some url -> Format.fprintf ppf " (%s)" url
587 | None -> ());
588 Format.fprintf ppf "@\n";
589
590 Format.fprintf ppf " Items: %d@\n" (List.length feed.items);
591
592 (match feed.authors with
593 | Some authors when authors <> [] ->
594 Format.fprintf ppf " Authors: ";
595 List.iteri (fun i author ->
596 if i > 0 then Format.fprintf ppf ", ";
597 Format.fprintf ppf "%a" Author.pp author
598 ) authors;
599 Format.fprintf ppf "@\n"
600 | _ -> ());
601
602 (match feed.language with
603 | Some lang -> Format.fprintf ppf " Language: %s@\n" lang
604 | None -> ())