OCaml library for JSONfeed parsing and creation
1(** JSON Feed format parser and serializer. *)
2
3exception Invalid_feed of string
4
5module Author = Author
6module Attachment = Attachment
7module Hub = Hub
8module Item = Item
9
10type t = {
11 version : string;
12 title : string;
13 home_page_url : string option;
14 feed_url : string option;
15 description : string option;
16 user_comment : string option;
17 next_url : string option;
18 icon : string option;
19 favicon : string option;
20 authors : Author.t list option;
21 language : string option;
22 expired : bool option;
23 hubs : Hub.t list option;
24 items : Item.t list;
25}
26
27let create ~title ?home_page_url ?feed_url ?description ?user_comment
28 ?next_url ?icon ?favicon ?authors ?language ?expired ?hubs ~items () =
29 {
30 version = "https://jsonfeed.org/version/1.1";
31 title;
32 home_page_url;
33 feed_url;
34 description;
35 user_comment;
36 next_url;
37 icon;
38 favicon;
39 authors;
40 language;
41 expired;
42 hubs;
43 items;
44 }
45
46let version t = t.version
47let title t = t.title
48let home_page_url t = t.home_page_url
49let feed_url t = t.feed_url
50let description t = t.description
51let user_comment t = t.user_comment
52let next_url t = t.next_url
53let icon t = t.icon
54let favicon t = t.favicon
55let authors t = t.authors
56let language t = t.language
57let expired t = t.expired
58let hubs t = t.hubs
59let items t = t.items
60
61(* RFC3339 date utilities *)
62
63let parse_rfc3339 s =
64 match Ptime.of_rfc3339 s with
65 | Ok (t, _, _) -> Some t
66 | Error _ -> None
67
68let format_rfc3339 t =
69 Ptime.to_rfc3339 t
70
71(* JSON parsing and serialization *)
72
73type error = [ `Msg of string ]
74
75let error_msgf fmt = Format.kasprintf (fun s -> Error (`Msg s)) fmt
76
77(* JSON parsing helpers *)
78
79type json_value =
80 | Null
81 | Bool of bool
82 | Float of float
83 | String of string
84 | Array of json_value list
85 | Object of (string * json_value) list
86
87let rec decode_value dec =
88 match Jsonm.decode dec with
89 | `Lexeme `Null -> Null
90 | `Lexeme (`Bool b) -> Bool b
91 | `Lexeme (`Float f) -> Float f
92 | `Lexeme (`String s) -> String s
93 | `Lexeme `Os -> decode_object dec []
94 | `Lexeme `As -> decode_array dec []
95 | `Lexeme _ -> Null
96 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err))
97 | `End | `Await -> Null
98
99and decode_object dec acc =
100 match Jsonm.decode dec with
101 | `Lexeme `Oe -> Object (List.rev acc)
102 | `Lexeme (`Name n) ->
103 let v = decode_value dec in
104 decode_object dec ((n, v) :: acc)
105 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err))
106 | _ -> Object (List.rev acc)
107
108and decode_array dec acc =
109 match Jsonm.decode dec with
110 | `Lexeme `Ae -> Array (List.rev acc)
111 | `Lexeme `Os ->
112 let v = decode_object dec [] in
113 decode_array dec (v :: acc)
114 | `Lexeme `As ->
115 let v = decode_array dec [] in
116 decode_array dec (v :: acc)
117 | `Lexeme `Null -> decode_array dec (Null :: acc)
118 | `Lexeme (`Bool b) -> decode_array dec (Bool b :: acc)
119 | `Lexeme (`Float f) -> decode_array dec (Float f :: acc)
120 | `Lexeme (`String s) -> decode_array dec (String s :: acc)
121 | `Error err -> raise (Invalid_feed (Format.asprintf "%a" Jsonm.pp_error err))
122 | _ -> Array (List.rev acc)
123
124(* Helpers to extract values from JSON *)
125
126let get_string = function String s -> Some s | _ -> None
127let get_bool = function Bool b -> Some b | _ -> None
128let _get_float = function Float f -> Some f | _ -> None
129let get_int = function Float f -> Some (int_of_float f) | _ -> None
130let get_int64 = function Float f -> Some (Int64.of_float f) | _ -> None
131let get_array = function Array arr -> Some arr | _ -> None
132let _get_object = function Object obj -> Some obj | _ -> None
133
134let find_field name obj = List.assoc_opt name obj
135
136let require_field name obj =
137 match find_field name obj with
138 | Some v -> v
139 | None -> raise (Invalid_feed (Printf.sprintf "Missing required field: %s" name))
140
141let require_string name obj =
142 match require_field name obj |> get_string with
143 | Some s -> s
144 | None -> raise (Invalid_feed (Printf.sprintf "Field %s must be a string" name))
145
146let optional_string name obj =
147 match find_field name obj with Some v -> get_string v | None -> None
148
149let optional_bool name obj =
150 match find_field name obj with Some v -> get_bool v | None -> None
151
152let optional_int name obj =
153 match find_field name obj with Some v -> get_int v | None -> None
154
155let optional_int64 name obj =
156 match find_field name obj with Some v -> get_int64 v | None -> None
157
158let optional_array name obj =
159 match find_field name obj with Some v -> get_array v | None -> None
160
161(* Parse Author *)
162
163let parse_author_obj obj =
164 let name = optional_string "name" obj in
165 let url = optional_string "url" obj in
166 let avatar = optional_string "avatar" obj in
167 if name = None && url = None && avatar = None then
168 raise (Invalid_feed "Author must have at least one field");
169 Author.create ?name ?url ?avatar ()
170
171let parse_author = function
172 | Object obj -> parse_author_obj obj
173 | _ -> raise (Invalid_feed "Author must be an object")
174
175(* Parse Attachment *)
176
177let parse_attachment_obj obj =
178 let url = require_string "url" obj in
179 let mime_type = require_string "mime_type" obj in
180 let title = optional_string "title" obj in
181 let size_in_bytes = optional_int64 "size_in_bytes" obj in
182 let duration_in_seconds = optional_int "duration_in_seconds" obj in
183 Attachment.create ~url ~mime_type ?title ?size_in_bytes ?duration_in_seconds ()
184
185let parse_attachment = function
186 | Object obj -> parse_attachment_obj obj
187 | _ -> raise (Invalid_feed "Attachment must be an object")
188
189(* Parse Hub *)
190
191let parse_hub_obj obj =
192 let type_ = require_string "type" obj in
193 let url = require_string "url" obj in
194 Hub.create ~type_ ~url ()
195
196let parse_hub = function
197 | Object obj -> parse_hub_obj obj
198 | _ -> raise (Invalid_feed "Hub must be an object")
199
200(* Parse Item *)
201
202let parse_item_obj obj =
203 let id = require_string "id" obj in
204
205 (* Parse content - at least one required *)
206 let content_html = optional_string "content_html" obj in
207 let content_text = optional_string "content_text" obj in
208 let content = match content_html, content_text with
209 | Some html, Some text -> `Both (html, text)
210 | Some html, None -> `Html html
211 | None, Some text -> `Text text
212 | None, None ->
213 raise (Invalid_feed "Item must have content_html or content_text")
214 in
215
216 let url = optional_string "url" obj in
217 let external_url = optional_string "external_url" obj in
218 let title = optional_string "title" obj in
219 let summary = optional_string "summary" obj in
220 let image = optional_string "image" obj in
221 let banner_image = optional_string "banner_image" obj in
222
223 let date_published =
224 match optional_string "date_published" obj with
225 | Some s -> parse_rfc3339 s
226 | None -> None
227 in
228
229 let date_modified =
230 match optional_string "date_modified" obj with
231 | Some s -> parse_rfc3339 s
232 | None -> None
233 in
234
235 let authors =
236 match optional_array "authors" obj with
237 | Some arr ->
238 let parsed = List.map parse_author arr in
239 if parsed = [] then None else Some parsed
240 | None -> None
241 in
242
243 let tags =
244 match optional_array "tags" obj with
245 | Some arr ->
246 let parsed = List.filter_map get_string arr in
247 if parsed = [] then None else Some parsed
248 | None -> None
249 in
250
251 let language = optional_string "language" obj in
252
253 let attachments =
254 match optional_array "attachments" obj with
255 | Some arr ->
256 let parsed = List.map parse_attachment arr in
257 if parsed = [] then None else Some parsed
258 | None -> None
259 in
260
261 Item.create ~id ~content ?url ?external_url ?title ?summary ?image
262 ?banner_image ?date_published ?date_modified ?authors ?tags ?language
263 ?attachments ()
264
265let parse_item = function
266 | Object obj -> parse_item_obj obj
267 | _ -> raise (Invalid_feed "Item must be an object")
268
269(* Parse Feed *)
270
271let parse_feed_obj obj =
272 let version = require_string "version" obj in
273 let title = require_string "title" obj in
274 let home_page_url = optional_string "home_page_url" obj in
275 let feed_url = optional_string "feed_url" obj in
276 let description = optional_string "description" obj in
277 let user_comment = optional_string "user_comment" obj in
278 let next_url = optional_string "next_url" obj in
279 let icon = optional_string "icon" obj in
280 let favicon = optional_string "favicon" obj in
281 let language = optional_string "language" obj in
282 let expired = optional_bool "expired" obj in
283
284 let authors =
285 match optional_array "authors" obj with
286 | Some arr ->
287 let parsed = List.map parse_author arr in
288 if parsed = [] then None else Some parsed
289 | None -> None
290 in
291
292 let hubs =
293 match optional_array "hubs" obj with
294 | Some arr ->
295 let parsed = List.map parse_hub arr in
296 if parsed = [] then None else Some parsed
297 | None -> None
298 in
299
300 let items =
301 match optional_array "items" obj with
302 | Some arr -> List.map parse_item arr
303 | None -> []
304 in
305
306 {
307 version;
308 title;
309 home_page_url;
310 feed_url;
311 description;
312 user_comment;
313 next_url;
314 icon;
315 favicon;
316 authors;
317 language;
318 expired;
319 hubs;
320 items;
321 }
322
323let of_jsonm dec =
324 try
325 let json = decode_value dec in
326 match json with
327 | Object obj -> Ok (parse_feed_obj obj)
328 | _ -> error_msgf "Feed must be a JSON object"
329 with
330 | Invalid_feed msg -> error_msgf "%s" msg
331
332(* JSON serialization *)
333
334let to_jsonm enc feed =
335 (* Simplified serialization using Jsonm *)
336 let enc_field name value_fn =
337 ignore (Jsonm.encode enc (`Lexeme (`Name name)));
338 value_fn ()
339 in
340
341 let enc_string s =
342 ignore (Jsonm.encode enc (`Lexeme (`String s)))
343 in
344
345 let enc_bool b =
346 ignore (Jsonm.encode enc (`Lexeme (`Bool b)))
347 in
348
349 let enc_opt enc_fn = function
350 | None -> ()
351 | Some v -> enc_fn v
352 in
353
354 let enc_list enc_fn lst =
355 ignore (Jsonm.encode enc (`Lexeme `As));
356 List.iter enc_fn lst;
357 ignore (Jsonm.encode enc (`Lexeme `Ae))
358 in
359
360 let enc_author author =
361 ignore (Jsonm.encode enc (`Lexeme `Os));
362 (match Author.name author with
363 | Some name -> enc_field "name" (fun () -> enc_string name)
364 | None -> ());
365 (match Author.url author with
366 | Some url -> enc_field "url" (fun () -> enc_string url)
367 | None -> ());
368 (match Author.avatar author with
369 | Some avatar -> enc_field "avatar" (fun () -> enc_string avatar)
370 | None -> ());
371 ignore (Jsonm.encode enc (`Lexeme `Oe))
372 in
373
374 let enc_attachment att =
375 ignore (Jsonm.encode enc (`Lexeme `Os));
376 enc_field "url" (fun () -> enc_string (Attachment.url att));
377 enc_field "mime_type" (fun () -> enc_string (Attachment.mime_type att));
378 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title))
379 (Attachment.title att);
380 enc_opt (fun size ->
381 enc_field "size_in_bytes" (fun () ->
382 ignore (Jsonm.encode enc (`Lexeme (`Float (Int64.to_float size))))))
383 (Attachment.size_in_bytes att);
384 enc_opt (fun dur ->
385 enc_field "duration_in_seconds" (fun () ->
386 ignore (Jsonm.encode enc (`Lexeme (`Float (float_of_int dur))))))
387 (Attachment.duration_in_seconds att);
388 ignore (Jsonm.encode enc (`Lexeme `Oe))
389 in
390
391 let enc_hub hub =
392 ignore (Jsonm.encode enc (`Lexeme `Os));
393 enc_field "type" (fun () -> enc_string (Hub.type_ hub));
394 enc_field "url" (fun () -> enc_string (Hub.url hub));
395 ignore (Jsonm.encode enc (`Lexeme `Oe))
396 in
397
398 let enc_item item =
399 ignore (Jsonm.encode enc (`Lexeme `Os));
400 enc_field "id" (fun () -> enc_string (Item.id item));
401
402 (* Encode content *)
403 (match Item.content item with
404 | `Html html ->
405 enc_field "content_html" (fun () -> enc_string html)
406 | `Text text ->
407 enc_field "content_text" (fun () -> enc_string text)
408 | `Both (html, text) ->
409 enc_field "content_html" (fun () -> enc_string html);
410 enc_field "content_text" (fun () -> enc_string text));
411
412 enc_opt (fun url -> enc_field "url" (fun () -> enc_string url))
413 (Item.url item);
414 enc_opt (fun url -> enc_field "external_url" (fun () -> enc_string url))
415 (Item.external_url item);
416 enc_opt (fun title -> enc_field "title" (fun () -> enc_string title))
417 (Item.title item);
418 enc_opt (fun summary -> enc_field "summary" (fun () -> enc_string summary))
419 (Item.summary item);
420 enc_opt (fun img -> enc_field "image" (fun () -> enc_string img))
421 (Item.image item);
422 enc_opt (fun img -> enc_field "banner_image" (fun () -> enc_string img))
423 (Item.banner_image item);
424 enc_opt (fun date -> enc_field "date_published" (fun () -> enc_string (format_rfc3339 date)))
425 (Item.date_published item);
426 enc_opt (fun date -> enc_field "date_modified" (fun () -> enc_string (format_rfc3339 date)))
427 (Item.date_modified item);
428 enc_opt (fun authors ->
429 enc_field "authors" (fun () -> enc_list enc_author authors))
430 (Item.authors item);
431 enc_opt (fun tags ->
432 enc_field "tags" (fun () -> enc_list enc_string tags))
433 (Item.tags item);
434 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang))
435 (Item.language item);
436 enc_opt (fun atts ->
437 enc_field "attachments" (fun () -> enc_list enc_attachment atts))
438 (Item.attachments item);
439
440 ignore (Jsonm.encode enc (`Lexeme `Oe))
441 in
442
443 (* Encode the feed *)
444 ignore (Jsonm.encode enc (`Lexeme `Os));
445 enc_field "version" (fun () -> enc_string feed.version);
446 enc_field "title" (fun () -> enc_string feed.title);
447 enc_opt (fun url -> enc_field "home_page_url" (fun () -> enc_string url))
448 feed.home_page_url;
449 enc_opt (fun url -> enc_field "feed_url" (fun () -> enc_string url))
450 feed.feed_url;
451 enc_opt (fun desc -> enc_field "description" (fun () -> enc_string desc))
452 feed.description;
453 enc_opt (fun comment -> enc_field "user_comment" (fun () -> enc_string comment))
454 feed.user_comment;
455 enc_opt (fun url -> enc_field "next_url" (fun () -> enc_string url))
456 feed.next_url;
457 enc_opt (fun icon -> enc_field "icon" (fun () -> enc_string icon))
458 feed.icon;
459 enc_opt (fun favicon -> enc_field "favicon" (fun () -> enc_string favicon))
460 feed.favicon;
461 enc_opt (fun authors ->
462 enc_field "authors" (fun () -> enc_list enc_author authors))
463 feed.authors;
464 enc_opt (fun lang -> enc_field "language" (fun () -> enc_string lang))
465 feed.language;
466 enc_opt (fun expired -> enc_field "expired" (fun () -> enc_bool expired))
467 feed.expired;
468 enc_opt (fun hubs ->
469 enc_field "hubs" (fun () -> enc_list enc_hub hubs))
470 feed.hubs;
471 enc_field "items" (fun () -> enc_list enc_item feed.items);
472 ignore (Jsonm.encode enc (`Lexeme `Oe));
473 ignore (Jsonm.encode enc `End)
474
475let of_string s =
476 let dec = Jsonm.decoder (`String s) in
477 of_jsonm dec
478
479let to_string ?(minify=false) feed =
480 let buf = Buffer.create 1024 in
481 let enc = Jsonm.encoder ~minify (`Buffer buf) in
482 to_jsonm enc feed;
483 Buffer.contents buf
484
485(* Validation *)
486
487let validate feed =
488 let errors = ref [] in
489 let add_error msg = errors := msg :: !errors in
490
491 (* Check required fields *)
492 if feed.title = "" then
493 add_error "title is required and cannot be empty";
494
495 (* Check items have unique IDs *)
496 let ids = List.map Item.id feed.items in
497 let unique_ids = List.sort_uniq String.compare ids in
498 if List.length ids <> List.length unique_ids then
499 add_error "items must have unique IDs";
500
501 (* Validate authors *)
502 (match feed.authors with
503 | Some authors ->
504 List.iteri (fun i author ->
505 if not (Author.is_valid author) then
506 add_error (Printf.sprintf "feed author %d is invalid (needs at least one field)" i)
507 ) authors
508 | None -> ());
509
510 (* Validate items *)
511 List.iteri (fun i item ->
512 if Item.id item = "" then
513 add_error (Printf.sprintf "item %d has empty ID" i);
514
515 (* Validate item authors *)
516 (match Item.authors item with
517 | Some authors ->
518 List.iteri (fun j author ->
519 if not (Author.is_valid author) then
520 add_error (Printf.sprintf "item %d author %d is invalid" i j)
521 ) authors
522 | None -> ())
523 ) feed.items;
524
525 if !errors = [] then Ok ()
526 else Error (List.rev !errors)
527
528(* Comparison *)
529
530let equal a b =
531 a.version = b.version &&
532 a.title = b.title &&
533 a.home_page_url = b.home_page_url &&
534 a.feed_url = b.feed_url &&
535 a.description = b.description &&
536 a.user_comment = b.user_comment &&
537 a.next_url = b.next_url &&
538 a.icon = b.icon &&
539 a.favicon = b.favicon &&
540 a.language = b.language &&
541 a.expired = b.expired &&
542 (* Note: We're doing structural equality on items *)
543 List.length a.items = List.length b.items
544
545(* Pretty printing *)
546
547let pp_summary ppf feed =
548 Format.fprintf ppf "%s (%d items)" feed.title (List.length feed.items)
549
550let pp ppf feed =
551 Format.fprintf ppf "Feed: %s" feed.title;
552 (match feed.home_page_url with
553 | Some url -> Format.fprintf ppf " (%s)" url
554 | None -> ());
555 Format.fprintf ppf "@\n";
556
557 Format.fprintf ppf " Items: %d@\n" (List.length feed.items);
558
559 (match feed.authors with
560 | Some authors when authors <> [] ->
561 Format.fprintf ppf " Authors: ";
562 List.iteri (fun i author ->
563 if i > 0 then Format.fprintf ppf ", ";
564 Format.fprintf ppf "%a" Author.pp author
565 ) authors;
566 Format.fprintf ppf "@\n"
567 | _ -> ());
568
569 (match feed.language with
570 | Some lang -> Format.fprintf ppf " Language: %s@\n" lang
571 | None -> ())