OCaml library for JSONfeed parsing and creation
1(** JSON Feed format parser and serializer. *)
2
3exception Invalid_feed of string
4
5module Author = Author
6module Attachment = Attachment
7module Hub = Hub
8module Item = Item
9module Reference = Reference
10module Cito = Cito
11
12type t = {
13 version : string;
14 title : string;
15 home_page_url : string option;
16 feed_url : string option;
17 description : string option;
18 user_comment : string option;
19 next_url : string option;
20 icon : string option;
21 favicon : string option;
22 authors : Author.t list option;
23 language : string option;
24 expired : bool option;
25 hubs : Hub.t list option;
26 items : Item.t list;
27}
28
29let create ~title ?home_page_url ?feed_url ?description ?user_comment
30 ?next_url ?icon ?favicon ?authors ?language ?expired ?hubs ~items () =
31 {
32 version = "https://jsonfeed.org/version/1.1";
33 title;
34 home_page_url;
35 feed_url;
36 description;
37 user_comment;
38 next_url;
39 icon;
40 favicon;
41 authors;
42 language;
43 expired;
44 hubs;
45 items;
46 }
47
48let version t = t.version
49let title t = t.title
50let home_page_url t = t.home_page_url
51let feed_url t = t.feed_url
52let description t = t.description
53let user_comment t = t.user_comment
54let next_url t = t.next_url
55let icon t = t.icon
56let favicon t = t.favicon
57let authors t = t.authors
58let language t = t.language
59let expired t = t.expired
60let hubs t = t.hubs
61let items t = t.items
62
63(* RFC3339 date utilities *)
64
65let parse_rfc3339 s =
66 match Ptime.of_rfc3339 s with
67 | Ok (t, _, _) -> Some t
68 | Error _ -> None
69
70let format_rfc3339 t =
71 Ptime.to_rfc3339 t
72
73(* JSON parsing and serialization *)
74
75type error = [ `Msg of string ]
76
77let error_msgf fmt = Format.kasprintf (fun s -> Error (`Msg s)) fmt
78
79(* JSON parsing helpers *)
80
81type json_value =
82 | Null
83 | Bool of bool
84 | Float of float
85 | String of string
86 | Array of json_value list
87 | Object of (string * json_value) list
88
89let rec decode_value dec =
90 match Jsonm.decode dec with
91 | `Lexeme `Null -> Null
92 | `Lexeme (`Bool b) -> Bool b
93 | `Lexeme (`Float f) -> Float f
94 | `Lexeme (`String s) -> String s
95 | `Lexeme `Os -> decode_object dec []
96 | `Lexeme `As -> decode_array dec []
97 | `Lexeme _ -> Null
98 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err))
99 | `End | `Await -> Null
100
101and decode_object dec acc =
102 match Jsonm.decode dec with
103 | `Lexeme `Oe -> Object (List.rev acc)
104 | `Lexeme (`Name n) ->
105 let v = decode_value dec in
106 decode_object dec ((n, v) :: acc)
107 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err))
108 | _ -> Object (List.rev acc)
109
110and decode_array dec acc =
111 match Jsonm.decode dec with
112 | `Lexeme `Ae -> Array (List.rev acc)
113 | `Lexeme `Os ->
114 let v = decode_object dec [] in
115 decode_array dec (v :: acc)
116 | `Lexeme `As ->
117 let v = decode_array dec [] in
118 decode_array dec (v :: acc)
119 | `Lexeme `Null -> decode_array dec (Null :: acc)
120 | `Lexeme (`Bool b) -> decode_array dec (Bool b :: acc)
121 | `Lexeme (`Float f) -> decode_array dec (Float f :: acc)
122 | `Lexeme (`String s) -> decode_array dec (String s :: acc)
123 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err))
124 | _ -> Array (List.rev acc)
125
126(* Helpers to extract values from JSON *)
127
128let get_string = function String s -> Some s | _ -> None
129let get_bool = function Bool b -> Some b | _ -> None
130let _get_float = function Float f -> Some f | _ -> None
131let get_int = function Float f -> Some (int_of_float f) | _ -> None
132let get_int64 = function Float f -> Some (Int64.of_float f) | _ -> None
133let get_array = function Array arr -> Some arr | _ -> None
134let _get_object = function Object obj -> Some obj | _ -> None
135
136let find_field name obj = List.assoc_opt name obj
137
138let require_field name obj =
139 match find_field name obj with
140 | Some v -> v
141 | None -> raise (Invalid_feed (Printf.sprintf "Missing required field: %s" name))
142
143let require_string name obj =
144 match require_field name obj |> get_string with
145 | Some s -> s
146 | None -> raise (Invalid_feed (Printf.sprintf "Field %s must be a string" name))
147
148let optional_string name obj =
149 match find_field name obj with Some v -> get_string v | None -> None
150
151let optional_bool name obj =
152 match find_field name obj with Some v -> get_bool v | None -> None
153
154let optional_int name obj =
155 match find_field name obj with Some v -> get_int v | None -> None
156
157let optional_int64 name obj =
158 match find_field name obj with Some v -> get_int64 v | None -> None
159
160let optional_array name obj =
161 match find_field name obj with Some v -> get_array v | None -> None
162
163(* Parse Author *)
164
165let parse_author_obj obj =
166 let name = optional_string "name" obj in
167 let url = optional_string "url" obj in
168 let avatar = optional_string "avatar" obj in
169 if name = None && url = None && avatar = None then
170 raise (Invalid_feed "Author must have at least one field");
171 Author.create ?name ?url ?avatar ()
172
173let parse_author = function
174 | Object obj -> parse_author_obj obj
175 | _ -> raise (Invalid_feed "Author must be an object")
176
177(* Parse Attachment *)
178
179let parse_attachment_obj obj =
180 let url = require_string "url" obj in
181 let mime_type = require_string "mime_type" obj in
182 let title = optional_string "title" obj in
183 let size_in_bytes = optional_int64 "size_in_bytes" obj in
184 let duration_in_seconds = optional_int "duration_in_seconds" obj in
185 Attachment.create ~url ~mime_type ?title ?size_in_bytes ?duration_in_seconds ()
186
187let parse_attachment = function
188 | Object obj -> parse_attachment_obj obj
189 | _ -> raise (Invalid_feed "Attachment must be an object")
190
191(* Parse Hub *)
192
193let parse_hub_obj obj =
194 let type_ = require_string "type" obj in
195 let url = require_string "url" obj in
196 Hub.create ~type_ ~url ()
197
198let parse_hub = function
199 | Object obj -> parse_hub_obj obj
200 | _ -> raise (Invalid_feed "Hub must be an object")
201
202(* Parse Item *)
203
204let parse_item_obj obj =
205 let id = require_string "id" obj in
206
207 (* Parse content - at least one required *)
208 let content_html = optional_string "content_html" obj in
209 let content_text = optional_string "content_text" obj in
210 let content = match content_html, content_text with
211 | Some html, Some text -> `Both (html, text)
212 | Some html, None -> `Html html
213 | None, Some text -> `Text text
214 | None, None ->
215 raise (Invalid_feed "Item must have content_html or content_text")
216 in
217
218 let url = optional_string "url" obj in
219 let external_url = optional_string "external_url" obj in
220 let title = optional_string "title" obj in
221 let summary = optional_string "summary" obj in
222 let image = optional_string "image" obj in
223 let banner_image = optional_string "banner_image" obj in
224
225 let date_published =
226 match optional_string "date_published" obj with
227 | Some s -> parse_rfc3339 s
228 | None -> None
229 in
230
231 let date_modified =
232 match optional_string "date_modified" obj with
233 | Some s -> parse_rfc3339 s
234 | None -> None
235 in
236
237 let authors =
238 match optional_array "authors" obj with
239 | Some arr ->
240 let parsed = List.map parse_author arr in
241 if parsed = [] then None else Some parsed
242 | None -> None
243 in
244
245 let tags =
246 match optional_array "tags" obj with
247 | Some arr ->
248 let parsed = List.filter_map get_string arr in
249 if parsed = [] then None else Some parsed
250 | None -> None
251 in
252
253 let language = optional_string "language" obj in
254
255 let attachments =
256 match optional_array "attachments" obj with
257 | Some arr ->
258 let parsed = List.map parse_attachment arr in
259 if parsed = [] then None else Some parsed
260 | None -> None
261 in
262
263 Item.create ~id ~content ?url ?external_url ?title ?summary ?image
264 ?banner_image ?date_published ?date_modified ?authors ?tags ?language
265 ?attachments ()
266
267let parse_item = function
268 | Object obj -> parse_item_obj obj
269 | _ -> raise (Invalid_feed "Item must be an object")
270
271(* Parse Feed *)
272
273let parse_feed_obj obj =
274 let version = require_string "version" obj in
275 let title = require_string "title" obj in
276 let home_page_url = optional_string "home_page_url" obj in
277 let feed_url = optional_string "feed_url" obj in
278 let description = optional_string "description" obj in
279 let user_comment = optional_string "user_comment" obj in
280 let next_url = optional_string "next_url" obj in
281 let icon = optional_string "icon" obj in
282 let favicon = optional_string "favicon" obj in
283 let language = optional_string "language" obj in
284 let expired = optional_bool "expired" obj in
285
286 let authors =
287 match optional_array "authors" obj with
288 | Some arr ->
289 let parsed = List.map parse_author arr in
290 if parsed = [] then None else Some parsed
291 | None -> None
292 in
293
294 let hubs =
295 match optional_array "hubs" obj with
296 | Some arr ->
297 let parsed = List.map parse_hub arr in
298 if parsed = [] then None else Some parsed
299 | None -> None
300 in
301
302 let items =
303 match optional_array "items" obj with
304 | Some arr -> List.map parse_item arr
305 | None -> []
306 in
307
308 {
309 version;
310 title;
311 home_page_url;
312 feed_url;
313 description;
314 user_comment;
315 next_url;
316 icon;
317 favicon;
318 authors;
319 language;
320 expired;
321 hubs;
322 items;
323 }
324
325let of_jsonm dec =
326 try
327 let json = decode_value dec in
328 match json with
329 | Object obj -> Ok (parse_feed_obj obj)
330 | _ -> error_msgf "Feed must be a JSON object"
331 with
332 | Invalid_feed msg -> error_msgf "%s" msg
333
334(* JSON serialization *)
335
336let to_jsonm enc feed =
337 (* Simplified serialization using Jsonm *)
338 let enc_field name value_fn =
339 ignore (Jsonm.encode enc (`Lexeme (`Name name)));
340 value_fn ()
341 in
342
343 let enc_string s =
344 ignore (Jsonm.encode enc (`Lexeme (`String s)))
345 in
346
347 let enc_bool b =
348 ignore (Jsonm.encode enc (`Lexeme (`Bool b)))
349 in
350
351 let enc_opt enc_fn = function
352 | None -> ()
353 | Some v -> enc_fn v
354 in
355
356 let enc_list enc_fn lst =
357 ignore (Jsonm.encode enc (`Lexeme `As));
358 List.iter enc_fn lst;
359 ignore (Jsonm.encode enc (`Lexeme `Ae))
360 in
361
362 let enc_author author =
363 ignore (Jsonm.encode enc (`Lexeme `Os));
364 (match Author.name author with
365 | Some name -> enc_field "name" (fun () -> enc_string name)
366 | None -> ());
367 (match Author.url author with
368 | Some url -> enc_field "url" (fun () -> enc_string url)
369 | None -> ());
370 (match Author.avatar author with
371 | Some avatar -> enc_field "avatar" (fun () -> enc_string avatar)
372 | None -> ());
373 ignore (Jsonm.encode enc (`Lexeme `Oe))
374 in
375
376 let enc_attachment att =
377 ignore (Jsonm.encode enc (`Lexeme `Os));
378 enc_field "url" (fun () -> enc_string (Attachment.url att));
379 enc_field "mime_type" (fun () -> enc_string (Attachment.mime_type att));
380 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title))
381 (Attachment.title att);
382 enc_opt (fun size ->
383 enc_field "size_in_bytes" (fun () ->
384 ignore (Jsonm.encode enc (`Lexeme (`Float (Int64.to_float size))))))
385 (Attachment.size_in_bytes att);
386 enc_opt (fun dur ->
387 enc_field "duration_in_seconds" (fun () ->
388 ignore (Jsonm.encode enc (`Lexeme (`Float (float_of_int dur))))))
389 (Attachment.duration_in_seconds att);
390 ignore (Jsonm.encode enc (`Lexeme `Oe))
391 in
392
393 let enc_hub hub =
394 ignore (Jsonm.encode enc (`Lexeme `Os));
395 enc_field "type" (fun () -> enc_string (Hub.type_ hub));
396 enc_field "url" (fun () -> enc_string (Hub.url hub));
397 ignore (Jsonm.encode enc (`Lexeme `Oe))
398 in
399
400 let enc_item item =
401 ignore (Jsonm.encode enc (`Lexeme `Os));
402 enc_field "id" (fun () -> enc_string (Item.id item));
403
404 (* Encode content *)
405 (match Item.content item with
406 | `Html html ->
407 enc_field "content_html" (fun () -> enc_string html)
408 | `Text text ->
409 enc_field "content_text" (fun () -> enc_string text)
410 | `Both (html, text) ->
411 enc_field "content_html" (fun () -> enc_string html);
412 enc_field "content_text" (fun () -> enc_string text));
413
414 enc_opt (fun url -> enc_field "url" (fun () -> enc_string url))
415 (Item.url item);
416 enc_opt (fun url -> enc_field "external_url" (fun () -> enc_string url))
417 (Item.external_url item);
418 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title))
419 (Item.title item);
420 enc_opt (fun summary -> enc_field "summary" (fun () -> enc_string summary))
421 (Item.summary item);
422 enc_opt (fun img -> enc_field "image" (fun () -> enc_string img))
423 (Item.image item);
424 enc_opt (fun img -> enc_field "banner_image" (fun () -> enc_string img))
425 (Item.banner_image item);
426 enc_opt (fun date -> enc_field "date_published" (fun () -> enc_string (format_rfc3339 date)))
427 (Item.date_published item);
428 enc_opt (fun date -> enc_field "date_modified" (fun () -> enc_string (format_rfc3339 date)))
429 (Item.date_modified item);
430 enc_opt (fun authors ->
431 enc_field "authors" (fun () -> enc_list enc_author authors))
432 (Item.authors item);
433 enc_opt (fun tags ->
434 enc_field "tags" (fun () -> enc_list enc_string tags))
435 (Item.tags item);
436 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang))
437 (Item.language item);
438 enc_opt (fun atts ->
439 enc_field "attachments" (fun () -> enc_list enc_attachment atts))
440 (Item.attachments item);
441
442 ignore (Jsonm.encode enc (`Lexeme `Oe))
443 in
444
445 (* Encode the feed *)
446 ignore (Jsonm.encode enc (`Lexeme `Os));
447 enc_field "version" (fun () -> enc_string feed.version);
448 enc_field "title" (fun () -> enc_string feed.title);
449 enc_opt (fun url -> enc_field "home_page_url" (fun () -> enc_string url))
450 feed.home_page_url;
451 enc_opt (fun url -> enc_field "feed_url" (fun () -> enc_string url))
452 feed.feed_url;
453 enc_opt (fun desc -> enc_field "description" (fun () -> enc_string desc))
454 feed.description;
455 enc_opt (fun comment -> enc_field "user_comment" (fun () -> enc_string comment))
456 feed.user_comment;
457 enc_opt (fun url -> enc_field "next_url" (fun () -> enc_string url))
458 feed.next_url;
459 enc_opt (fun icon -> enc_field "icon" (fun () -> enc_string icon))
460 feed.icon;
461 enc_opt (fun favicon -> enc_field "favicon" (fun () -> enc_string favicon))
462 feed.favicon;
463 enc_opt (fun authors ->
464 enc_field "authors" (fun () -> enc_list enc_author authors))
465 feed.authors;
466 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang))
467 feed.language;
468 enc_opt (fun expired -> enc_field "expired" (fun () -> enc_bool expired))
469 feed.expired;
470 enc_opt (fun hubs ->
471 enc_field "hubs" (fun () -> enc_list enc_hub hubs))
472 feed.hubs;
473 enc_field "items" (fun () -> enc_list enc_item feed.items);
474 ignore (Jsonm.encode enc (`Lexeme `Oe));
475 ignore (Jsonm.encode enc `End)
476
477let of_string s =
478 let dec = Jsonm.decoder (`String s) in
479 of_jsonm dec
480
481let to_string ?(minify=false) feed =
482 let buf = Buffer.create 1024 in
483 let enc = Jsonm.encoder ~minify (`Buffer buf) in
484 to_jsonm enc feed;
485 Buffer.contents buf
486
487(* Validation *)
488
489let validate feed =
490 let errors = ref [] in
491 let add_error msg = errors := msg :: !errors in
492
493 (* Check required fields *)
494 if feed.title = "" then
495 add_error "title is required and cannot be empty";
496
497 (* Check items have unique IDs *)
498 let ids = List.map Item.id feed.items in
499 let unique_ids = List.sort_uniq String.compare ids in
500 if List.length ids <> List.length unique_ids then
501 add_error "items must have unique IDs";
502
503 (* Validate authors *)
504 (match feed.authors with
505 | Some authors ->
506 List.iteri (fun i author ->
507 if not (Author.is_valid author) then
508 add_error (Printf.sprintf "feed author %d is invalid (needs at least one field)" i)
509 ) authors
510 | None -> ());
511
512 (* Validate items *)
513 List.iteri (fun i item ->
514 if Item.id item = "" then
515 add_error (Printf.sprintf "item %d has empty ID" i);
516
517 (* Validate item authors *)
518 (match Item.authors item with
519 | Some authors ->
520 List.iteri (fun j author ->
521 if not (Author.is_valid author) then
522 add_error (Printf.sprintf "item %d author %d is invalid" i j)
523 ) authors
524 | None -> ())
525 ) feed.items;
526
527 if !errors = [] then Ok ()
528 else Error (List.rev !errors)
529
530(* Comparison *)
531
532let equal a b =
533 a.version = b.version &&
534 a.title = b.title &&
535 a.home_page_url = b.home_page_url &&
536 a.feed_url = b.feed_url &&
537 a.description = b.description &&
538 a.user_comment = b.user_comment &&
539 a.next_url = b.next_url &&
540 a.icon = b.icon &&
541 a.favicon = b.favicon &&
542 a.language = b.language &&
543 a.expired = b.expired &&
544 (* Note: We're doing structural equality on items *)
545 List.length a.items = List.length b.items
546
547(* Pretty printing *)
548
549let pp_summary ppf feed =
550 Format.fprintf ppf "%s (%d items)" feed.title (List.length feed.items)
551
552let pp ppf feed =
553 Format.fprintf ppf "Feed: %s" feed.title;
554 (match feed.home_page_url with
555 | Some url -> Format.fprintf ppf " (%s)" url
556 | None -> ());
557 Format.fprintf ppf "@\n";
558
559 Format.fprintf ppf " Items: %d@\n" (List.length feed.items);
560
561 (match feed.authors with
562 | Some authors when authors <> [] ->
563 Format.fprintf ppf " Authors: ";
564 List.iteri (fun i author ->
565 if i > 0 then Format.fprintf ppf ", ";
566 Format.fprintf ppf "%a" Author.pp author
567 ) authors;
568 Format.fprintf ppf "@\n"
569 | _ -> ());
570
571 (match feed.language with
572 | Some lang -> Format.fprintf ppf " Language: %s@\n" lang
573 | None -> ())