My agentic slop goes here. Not intended for anyone else!
1open Syndic_common.XML
2open Syndic_common.Util
3module XML = Syndic_xml
4module Atom = Syndic_atom
5module Date = Syndic_date
6module Error = Syndic_error
7
8type image =
9 { url: Uri.t
10 ; title: string
11 ; link: Uri.t
12 ; width: int
13 ; (* default 88 *)
14 height: int
15 ; (* default 31 *)
16 description: string option }
17
18type image' =
19 [ `URL of Uri.t
20 | `Title of string
21 | `Link of Uri.t
22 | `Width of int
23 | `Height of int
24 | `Description of string ]
25
26let make_image ~pos (l : [< image'] list) =
27 let url =
28 match find (function `URL _ -> true | _ -> false) l with
29 | Some (`URL u) -> u
30 | _ ->
31 raise
32 (Error.Error
33 (pos, "<image> elements MUST contains exactly one <url> element"))
34 in
35 let title =
36 match find (function `Title _ -> true | _ -> false) l with
37 | Some (`Title t) -> t
38 | _ ->
39 raise
40 (Error.Error
41 (pos, "<image> elements MUST contains exactly one <title> element"))
42 in
43 let link =
44 match find (function `Link _ -> true | _ -> false) l with
45 | Some (`Link l) -> l
46 | _ ->
47 raise
48 (Error.Error
49 (pos, "<image> elements MUST contains exactly one <link> element"))
50 in
51 let width =
52 match find (function `Width _ -> true | _ -> false) l with
53 | Some (`Width w) -> w
54 | _ -> 88
55 (* cf. RFC *)
56 in
57 let height =
58 match find (function `Height _ -> true | _ -> false) l with
59 | Some (`Height h) -> h
60 | _ -> 31
61 (* cf. RFC *)
62 in
63 let description =
64 match find (function `Description _ -> true | _ -> false) l with
65 | Some (`Description s) -> Some s
66 | _ -> None
67 in
68 `Image ({url; title; link; width; height; description} : image)
69
70let url_of_xml ~xmlbase a = `URL (XML.resolve ~xmlbase (Uri.of_string a))
71let url_of_xml' ~xmlbase a = `URL (xmlbase, a)
72
73let image_url_of_xml ~xmlbase (pos, _tag, datas) =
74 try url_of_xml ~xmlbase (get_leaf datas) with Not_found ->
75 raise
76 (Error.Error (pos, "The content of <uri> MUST be a non-empty string"))
77
78let image_title_of_xml ~xmlbase:_ (_pos, _tag, datas) =
79 `Title (try get_leaf datas with Not_found -> "")
80
81let image_link_of_xml ~xmlbase (pos, _tag, datas) =
82 try `Link (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
83 with Not_found ->
84 raise
85 (Error.Error (pos, "The content of <link> MUST be a non-empty string"))
86
87let image_size_of_xml ~max ~xmlbase:_ (pos, tag, datas) =
88 try
89 let size = int_of_string (get_leaf datas) in
90 if size > max then
91 raise
92 (Error.Error
93 ( pos
94 , "size of "
95 ^ get_tag_name tag
96 ^ " exceeded (max is "
97 ^ string_of_int max
98 ^ ")" ))
99 else size
100 with
101 | Not_found ->
102 raise
103 (Error.Error
104 ( pos
105 , "The content of <"
106 ^ get_tag_name tag
107 ^ "> MUST be a non-empty string" ))
108 | Failure _ ->
109 raise
110 (Error.Error
111 (pos, "The content of <" ^ get_tag_name tag ^ "> MUST be an integer"))
112
113let image_width_of_xml ~xmlbase a =
114 `Width (image_size_of_xml ~max:144 ~xmlbase a)
115
116let image_height_of_xml ~xmlbase a =
117 `Height (image_size_of_xml ~max:400 ~xmlbase a)
118
119let image_description_of_xml ~xmlbase:_ (pos, _tag, datas) =
120 try `Description (get_leaf datas) with Not_found ->
121 raise
122 (Error.Error
123 (pos, "The content of <description> MUST be a non-empty string"))
124
125let image_of_xml =
126 let data_producer =
127 [ ("url", image_url_of_xml)
128 ; ("title", image_title_of_xml)
129 ; ("link", image_link_of_xml)
130 ; ("width", image_width_of_xml)
131 ; ("height", image_height_of_xml)
132 ; ("description", image_description_of_xml) ]
133 in
134 generate_catcher ~data_producer make_image
135
136let image_of_xml' =
137 let data_producer =
138 [ ("url", dummy_of_xml ~ctor:url_of_xml')
139 ; ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a))
140 ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a)))
141 ; ("width", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Width a))
142 ; ("height", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Height a))
143 ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a))
144 ]
145 in
146 generate_catcher ~data_producer (fun ~pos:_ x -> `Image x)
147
148type cloud = {uri: Uri.t; registerProcedure: string; protocol: string}
149
150type cloud' =
151 [ `Domain of string
152 | `Port of string
153 | `Path of string
154 | `RegisterProcedure of string
155 | `Protocol of string ]
156
157let make_cloud ~pos (l : [< cloud'] list) =
158 let domain =
159 match find (function `Domain _ -> true | _ -> false) l with
160 | Some (`Domain u) -> u
161 | _ ->
162 raise
163 (Error.Error (pos, "Cloud elements MUST have a 'domain' attribute"))
164 in
165 let port =
166 match find (function `Port _ -> true | _ -> false) l with
167 | Some (`Port p) -> int_of_string p
168 | _ ->
169 raise
170 (Error.Error (pos, "Cloud elements MUST have a 'port' attribute"))
171 in
172 let path =
173 match find (function `Path _ -> true | _ -> false) l with
174 | Some (`Path p) -> p
175 | _ ->
176 raise
177 (Error.Error (pos, "Cloud elements MUST have a 'path' attribute"))
178 in
179 let registerProcedure =
180 match find (function `RegisterProcedure _ -> true | _ -> false) l with
181 | Some (`RegisterProcedure r) -> r
182 | _ ->
183 raise
184 (Error.Error
185 (pos, "Cloud elements MUST have a 'registerProcedure' attribute"))
186 in
187 let protocol =
188 match find (function `Protocol _ -> true | _ -> false) l with
189 | Some (`Protocol p) -> p
190 | _ ->
191 raise
192 (Error.Error (pos, "Cloud elements MUST have a 'protocol' attribute"))
193 in
194 let uri = Uri.make ~host:domain ~port ~path () in
195 `Cloud ({uri; registerProcedure; protocol} : cloud)
196
197let cloud_attr_producer =
198 [ ("domain", fun ~xmlbase:_ a -> `Domain a)
199 ; ("port", fun ~xmlbase:_ a -> `Port a)
200 ; ("path", fun ~xmlbase:_ a -> `Path a)
201 ; (* XXX: it's RFC compliant ? *)
202 ("registerProcedure", fun ~xmlbase:_ a -> `RegisterProcedure a)
203 ; ("protocol", fun ~xmlbase:_ a -> `Protocol a) ]
204
205let cloud_of_xml =
206 generate_catcher ~attr_producer:cloud_attr_producer make_cloud
207
208let cloud_of_xml' =
209 generate_catcher ~attr_producer:cloud_attr_producer (fun ~pos:_ x -> `Cloud x)
210
211type textinput = {title: string; description: string; name: string; link: Uri.t}
212
213type textinput' =
214 [`Title of string | `Description of string | `Name of string | `Link of Uri.t]
215
216let make_textinput ~pos (l : [< textinput'] list) =
217 let title =
218 match find (function `Title _ -> true | _ -> false) l with
219 | Some (`Title t) -> t
220 | _ ->
221 raise
222 (Error.Error
223 ( pos
224 , "<textinput> elements MUST contains exactly one <title> element"
225 ))
226 in
227 let description =
228 match find (function `Description _ -> true | _ -> false) l with
229 | Some (`Description s) -> s
230 | _ ->
231 raise
232 (Error.Error
233 ( pos
234 , "<textinput> elements MUST contains exactly one <description> \
235 element" ))
236 in
237 let name =
238 match find (function `Name _ -> true | _ -> false) l with
239 | Some (`Name s) -> s
240 | _ ->
241 raise
242 (Error.Error
243 ( pos
244 , "<textinput> elements MUST contains exactly one <name> element"
245 ))
246 in
247 let link =
248 match find (function `Link _ -> true | _ -> false) l with
249 | Some (`Link u) -> u
250 | _ ->
251 raise
252 (Error.Error
253 ( pos
254 , "<textinput> elements MUST contains exactly one <link> element"
255 ))
256 in
257 `TextInput ({title; description; name; link} : textinput)
258
259let textinput_title_of_xml ~xmlbase:_ (pos, _tag, datas) =
260 try `Title (get_leaf datas) with Not_found ->
261 raise
262 (Error.Error (pos, "The content of <title> MUST be a non-empty string"))
263
264let textinput_description_of_xml ~xmlbase:_ (pos, _tag, datas) =
265 try `Description (get_leaf datas) with Not_found ->
266 raise
267 (Error.Error
268 (pos, "The content of <description> MUST be a non-empty string"))
269
270let textinput_name_of_xml ~xmlbase:_ (pos, _tag, datas) =
271 try `Name (get_leaf datas) with Not_found ->
272 raise
273 (Error.Error (pos, "The content of <name> MUST be a non-empty string"))
274
275let textinput_link_of_xml ~xmlbase (pos, _tag, datas) =
276 try `Link (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
277 with Not_found ->
278 raise
279 (Error.Error (pos, "The content of <link> MUST be a non-empty string"))
280
281let textinput_of_xml =
282 let data_producer =
283 [ ("title", textinput_title_of_xml)
284 ; ("description", textinput_description_of_xml)
285 ; ("name", textinput_name_of_xml)
286 ; ("link", textinput_link_of_xml) ]
287 in
288 generate_catcher ~data_producer make_textinput
289
290let textinput_of_xml' =
291 let data_producer =
292 [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a))
293 ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a))
294 ; ("name", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Name a))
295 ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a))) ]
296 in
297 generate_catcher ~data_producer (fun ~pos:_ x -> `TextInput x)
298
299type category = {data: string; domain: Uri.t option}
300type category' = [`Data of string | `Domain of Uri.t]
301
302let make_category ~pos:_ (l : [< category'] list) =
303 let data =
304 match find (function `Data _ -> true | _ -> false) l with
305 | Some (`Data s) -> s
306 | _ -> ""
307 in
308 let domain =
309 match find (function `Domain _ -> true | _ -> false) l with
310 | Some (`Domain d) -> Some d
311 | _ -> None
312 in
313 `Category ({data; domain} : category)
314
315let category_of_xml =
316 let attr_producer =
317 [("domain", fun ~xmlbase:_ a -> `Domain (Uri.of_string a))]
318 in
319 let leaf_producer ~xmlbase:_ _pos data = `Data data in
320 generate_catcher ~attr_producer ~leaf_producer make_category
321
322let category_of_xml' =
323 let attr_producer = [("domain", fun ~xmlbase:_ a -> `Domain a)] in
324 let leaf_producer ~xmlbase:_ _pos data = `Data data in
325 generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Category x)
326
327type enclosure = {url: Uri.t; length: int; mime: string}
328type enclosure' = [`URL of Uri.t | `Length of string | `Mime of string]
329
330let make_enclosure ~pos (l : [< enclosure'] list) =
331 let url =
332 match find (function `URL _ -> true | _ -> false) l with
333 | Some (`URL u) -> u
334 | _ ->
335 raise
336 (Error.Error (pos, "Enclosure elements MUST have a 'url' attribute"))
337 in
338 let length =
339 match find (function `Length _ -> true | _ -> false) l with
340 | Some (`Length l) -> int_of_string l
341 | _ ->
342 raise
343 (Error.Error
344 (pos, "Enclosure elements MUST have a 'length' attribute"))
345 in
346 let mime =
347 match find (function `Mime _ -> true | _ -> false) l with
348 | Some (`Mime m) -> m
349 | _ ->
350 raise
351 (Error.Error (pos, "Enclosure elements MUST have a 'type' attribute"))
352 in
353 `Enclosure ({url; length; mime} : enclosure)
354
355let enclosure_of_xml =
356 let attr_producer =
357 [ ("url", url_of_xml)
358 ; ("length", fun ~xmlbase:_ a -> `Length a)
359 ; ("type", fun ~xmlbase:_ a -> `Mime a) ]
360 in
361 generate_catcher ~attr_producer make_enclosure
362
363let enclosure_of_xml' =
364 let attr_producer =
365 [ ("url", url_of_xml')
366 ; ("length", fun ~xmlbase:_ a -> `Length a)
367 ; ("type", fun ~xmlbase:_ a -> `Mime a) ]
368 in
369 generate_catcher ~attr_producer (fun ~pos:_ x -> `Enclosure x)
370
371type guid = {data: Uri.t; (* must be uniq *) permalink: bool (* default true *)}
372type guid' = [`Data of Uri.t option * string | `Permalink of string]
373
374(* Some RSS2 server output <guid isPermaLink="false"></guid> ! *)
375let make_guid ~pos:_ (l : [< guid'] list) =
376 let permalink =
377 match find (function `Permalink _ -> true | _ -> false) l with
378 | Some (`Permalink b) -> bool_of_string b
379 | _ -> true
380 (* cf. RFC *)
381 in
382 match find (function `Data _ -> true | _ -> false) l with
383 | Some (`Data (xmlbase, u)) ->
384 if u = "" then `Guid None
385 else
386 (* When the GUID is declared as a permlink, resolve it using xml:base *)
387 let data =
388 if permalink then XML.resolve ~xmlbase (Uri.of_string u)
389 else Uri.of_string u
390 in
391 `Guid (Some ({data; permalink} : guid))
392 | _ -> `Guid None
393
394let guid_of_xml, guid_of_xml' =
395 let attr_producer = [("isPermaLink", fun ~xmlbase:_ a -> `Permalink a)] in
396 let leaf_producer ~xmlbase _pos data = `Data (xmlbase, data) in
397 ( generate_catcher ~attr_producer ~leaf_producer make_guid
398 , generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Guid x) )
399
400type source = {data: string; url: Uri.t}
401type source' = [`Data of string | `URL of Uri.t]
402
403let make_source ~pos (l : [< source'] list) =
404 let data =
405 match find (function `Data _ -> true | _ -> false) l with
406 | Some (`Data s) -> s
407 | _ ->
408 raise
409 (Error.Error
410 (pos, "The content of <source> MUST be a non-empty string"))
411 in
412 let url =
413 match find (function `URL _ -> true | _ -> false) l with
414 | Some (`URL u) -> u
415 | _ ->
416 raise
417 (Error.Error (pos, "Source elements MUST have a 'url' attribute"))
418 in
419 `Source ({data; url} : source)
420
421let source_of_xml =
422 let attr_producer = [("url", url_of_xml)] in
423 let leaf_producer ~xmlbase:_ _pos data = `Data data in
424 generate_catcher ~attr_producer ~leaf_producer make_source
425
426let source_of_xml' =
427 let attr_producer = [("url", url_of_xml')] in
428 let leaf_producer ~xmlbase:_ _pos data = `Data data in
429 generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Source x)
430
431type story =
432 | All of string * Uri.t option * string
433 | Title of string
434 | Description of Uri.t option * string
435
436type item =
437 { story: story
438 ; content: Uri.t option * string
439 ; link: Uri.t option
440 ; author: string option
441 ; (* e-mail *)
442 categories: category list
443 ; comments: Uri.t option
444 ; enclosure: enclosure option
445 ; guid: guid option
446 ; pubDate: Date.t option
447 ; (* date *)
448 source: source option }
449
450[@@@warning "-34"]
451
452type item' =
453 [ `Title of string
454 | `Description of Uri.t option * string (* xmlbase, description *)
455 | `Content of Uri.t option * string
456 | `Link of Uri.t
457 | `Author of string (* e-mail *)
458 | `Category of category
459 | `Comments of Uri.t
460 | `Enclosure of enclosure
461 | `Guid of guid
462 | `PubDate of Date.t
463 | `Source of source ]
464
465let make_item ~pos (l : _ list) =
466 let story =
467 match
468 ( find (function `Title _ -> true | _ -> false) l
469 , find (function `Description _ -> true | _ -> false) l )
470 with
471 | Some (`Title t), Some (`Description (x, d)) -> All (t, x, d)
472 | Some (`Title t), _ -> Title t
473 | _, Some (`Description (x, d)) -> Description (x, d)
474 | _, _ ->
475 raise (Error.Error (pos, "Item expected <title> or <description> tag"))
476 in
477 let content =
478 match find (function `Content _ -> true | _ -> false) l with
479 | Some (`Content (x, c)) -> (x, c)
480 | _ -> (None, "")
481 in
482 let link =
483 match find (function `Link _ -> true | _ -> false) l with
484 | Some (`Link l) -> l
485 | _ -> None
486 in
487 let author =
488 match find (function `Author _ -> true | _ -> false) l with
489 | Some (`Author a) -> Some a
490 | _ -> None
491 in
492 let categories =
493 let fn = fun acc -> function `Category x -> x :: acc | _ -> acc in
494 List.fold_left fn [] l |> List.rev
495 in
496 let comments =
497 match find (function `Comments _ -> true | _ -> false) l with
498 | Some (`Comments c) -> Some c
499 | _ -> None
500 in
501 let enclosure =
502 match find (function `Enclosure _ -> true | _ -> false) l with
503 | Some (`Enclosure e) -> Some e
504 | _ -> None
505 in
506 let guid =
507 match find (function `Guid _ -> true | _ -> false) l with
508 | Some (`Guid g) -> g
509 | _ -> None
510 in
511 let pubDate =
512 match find (function `PubDate _ -> true | _ -> false) l with
513 | Some (`PubDate p) -> Some p
514 | _ -> None
515 in
516 let source =
517 match find (function `Source _ -> true | _ -> false) l with
518 | Some (`Source s) -> Some s
519 | _ -> None
520 in
521 `Item
522 ( { story
523 ; content
524 ; link
525 ; author
526 ; categories
527 ; comments
528 ; enclosure
529 ; guid
530 ; pubDate
531 ; source }
532 : item )
533
534let item_title_of_xml ~xmlbase:_ (pos, _tag, datas) =
535 try `Title (get_leaf datas) with Not_found ->
536 raise
537 (Error.Error (pos, "The content of <title> MUST be a non-empty string"))
538
539let item_description_of_xml ~xmlbase (_pos, _tag, datas) =
540 `Description (xmlbase, try get_leaf datas with Not_found -> "")
541
542let item_content_of_xml ~xmlbase (_pos, _tag, datas) =
543 `Content (xmlbase, try get_leaf datas with Not_found -> "")
544
545let item_link_of_xml ~xmlbase (_pos, _tag, datas) =
546 `Link
547 ( try Some (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
548 with Not_found -> None )
549
550let item_author_of_xml ~xmlbase:_ (pos, _tag, datas) =
551 try `Author (get_leaf datas) with Not_found ->
552 raise
553 (Error.Error (pos, "The content of <author> MUST be a non-empty string"))
554
555let item_comments_of_xml ~xmlbase (pos, _tag, datas) =
556 try `Comments (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
557 with Not_found ->
558 raise
559 (Error.Error (pos, "The content of <comments> MUST be a non-empty string"))
560
561let item_pubdate_of_xml ~xmlbase:_ (pos, _tag, datas) =
562 try `PubDate (Date.of_rfc822 (get_leaf datas)) with Not_found ->
563 raise
564 (Error.Error (pos, "The content of <pubDate> MUST be a non-empty string"))
565
566let item_namespaces = [""; "http://purl.org/rss/1.0/modules/content/"]
567
568let item_of_xml =
569 let data_producer =
570 [ ("title", item_title_of_xml)
571 ; ("description", item_description_of_xml)
572 ; (* <content:encoded> where
573 xmlns:content="http://purl.org/rss/1.0/modules/content/" *)
574 ("encoded", item_content_of_xml)
575 ; ("link", item_link_of_xml)
576 ; ("author", item_author_of_xml)
577 ; ("category", category_of_xml)
578 ; ("comments", item_comments_of_xml)
579 ; ("enclosure", enclosure_of_xml)
580 ; ("guid", guid_of_xml)
581 ; ("pubDate", item_pubdate_of_xml)
582 ; ("source", source_of_xml) ]
583 in
584 generate_catcher ~data_producer make_item ~namespaces:item_namespaces
585
586let item_of_xml' =
587 let data_producer =
588 [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a))
589 ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a))
590 ; ("encoded", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Content a))
591 ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a)))
592 ; ("author", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Author a))
593 ; ("category", category_of_xml')
594 ; ("comments", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Comments a))
595 ; ("enclosure", enclosure_of_xml')
596 ; ("guid", guid_of_xml')
597 ; ("pubdate", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `PubDate a))
598 ; ("source", source_of_xml') ]
599 in
600 generate_catcher ~data_producer
601 (fun ~pos:_ x -> `Item x)
602 ~namespaces:item_namespaces
603
604type channel =
605 { title: string
606 ; link: Uri.t
607 ; description: string
608 ; language: string option
609 ; copyright: string option
610 ; managingEditor: string option
611 ; webMaster: string option
612 ; pubDate: Date.t option
613 ; lastBuildDate: Date.t option
614 ; category: string option
615 ; generator: string option
616 ; docs: Uri.t option
617 ; cloud: cloud option
618 ; ttl: int option
619 ; image: image option
620 ; rating: int option
621 ; textInput: textinput option
622 ; skipHours: int option
623 ; skipDays: int option
624 ; items: item list }
625
626type channel' =
627 [ `Title of string
628 | `Link of Uri.t
629 | `Description of string
630 | `Language of string
631 | `Copyright of string
632 | `ManagingEditor of string
633 | `WebMaster of string
634 | `PubDate of Date.t
635 | `LastBuildDate of Date.t
636 | `Category of string
637 | `Generator of string
638 | `Docs of Uri.t
639 | `Cloud of cloud
640 | `TTL of int
641 | `Image of image
642 | `Rating of int
643 | `TextInput of textinput
644 | `SkipHours of int
645 | `SkipDays of int
646 | `Item of item ]
647
648let make_channel ~pos (l : [< channel'] list) =
649 let title =
650 match find (function `Title _ -> true | _ -> false) l with
651 | Some (`Title t) -> t
652 | _ ->
653 raise
654 (Error.Error
655 ( pos
656 , "<channel> elements MUST contains exactly one <title> element"
657 ))
658 in
659 let link =
660 match find (function `Link _ -> true | _ -> false) l with
661 | Some (`Link l) -> l
662 | _ ->
663 raise
664 (Error.Error
665 ( pos
666 , "<channel> elements MUST contains exactly one <link> element" ))
667 in
668 let description =
669 match find (function `Description _ -> true | _ -> false) l with
670 | Some (`Description l) -> l
671 | _ ->
672 raise
673 (Error.Error
674 ( pos
675 , "<channel> elements MUST contains exactly one <description> \
676 element" ))
677 in
678 let language =
679 match find (function `Language _ -> true | _ -> false) l with
680 | Some (`Language a) -> Some a
681 | _ -> None
682 in
683 let copyright =
684 match find (function `Copyright _ -> true | _ -> false) l with
685 | Some (`Copyright a) -> Some a
686 | _ -> None
687 in
688 let managingEditor =
689 match find (function `ManagingEditor _ -> true | _ -> false) l with
690 | Some (`ManagingEditor a) -> Some a
691 | _ -> None
692 in
693 let webMaster =
694 match find (function `WebMaster _ -> true | _ -> false) l with
695 | Some (`WebMaster a) -> Some a
696 | _ -> None
697 in
698 let pubDate =
699 match find (function `PubDate _ -> true | _ -> false) l with
700 | Some (`PubDate a) -> Some a
701 | _ -> None
702 in
703 let lastBuildDate =
704 match find (function `LastBuildDate _ -> true | _ -> false) l with
705 | Some (`LastBuildDate a) -> Some a
706 | _ -> None
707 in
708 let category =
709 match find (function `Category _ -> true | _ -> false) l with
710 | Some (`Category a) -> Some a
711 | _ -> None
712 in
713 let generator =
714 match find (function `Generator _ -> true | _ -> false) l with
715 | Some (`Generator a) -> Some a
716 | _ -> None
717 in
718 let docs =
719 match find (function `Docs _ -> true | _ -> false) l with
720 | Some (`Docs a) -> Some a
721 | _ -> None
722 in
723 let cloud =
724 match find (function `Cloud _ -> true | _ -> false) l with
725 | Some (`Cloud a) -> Some a
726 | _ -> None
727 in
728 let ttl =
729 match find (function `TTL _ -> true | _ -> false) l with
730 | Some (`TTL a) -> Some a
731 | _ -> None
732 in
733 let image =
734 match find (function `Image _ -> true | _ -> false) l with
735 | Some (`Image a) -> Some a
736 | _ -> None
737 in
738 let rating =
739 match find (function `Rating _ -> true | _ -> false) l with
740 | Some (`Rating a) -> Some a
741 | _ -> None
742 in
743 let textInput =
744 match find (function `TextInput _ -> true | _ -> false) l with
745 | Some (`TextInput a) -> Some a
746 | _ -> None
747 in
748 let skipHours =
749 match find (function `SkipHours _ -> true | _ -> false) l with
750 | Some (`SkipHours a) -> Some a
751 | _ -> None
752 in
753 let skipDays =
754 match find (function `SkipDays _ -> true | _ -> false) l with
755 | Some (`SkipDays a) -> Some a
756 | _ -> None
757 in
758 let items =
759 List.fold_left (fun acc -> function `Item x -> x :: acc | _ -> acc) [] l
760 in
761 ( { title
762 ; link
763 ; description
764 ; language
765 ; copyright
766 ; managingEditor
767 ; webMaster
768 ; pubDate
769 ; lastBuildDate
770 ; category
771 ; generator
772 ; docs
773 ; cloud
774 ; ttl
775 ; image
776 ; rating
777 ; textInput
778 ; skipHours
779 ; skipDays
780 ; items }
781 : channel )
782
783let channel_title_of_xml ~xmlbase:_ (pos, _tag, datas) =
784 try `Title (get_leaf datas) with Not_found ->
785 raise
786 (Error.Error (pos, "The content of <title> MUST be a non-empty string"))
787
788let channel_description_of_xml ~xmlbase:_ (_pos, _tag, datas) =
789 `Description (try get_leaf datas with Not_found -> "")
790
791let channel_link_of_xml ~xmlbase (pos, _tag, datas) =
792 try `Link (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
793 with Not_found ->
794 raise
795 (Error.Error (pos, "The content of <link> MUST be a non-empty string"))
796
797let channel_language_of_xml ~xmlbase:_ (pos, _tag, datas) =
798 try `Language (get_leaf datas) with Not_found ->
799 raise
800 (Error.Error (pos, "The content of <language> MUST be a non-empty string"))
801
802let channel_copyright_of_xml ~xmlbase:_ (_pos, _tag, datas) =
803 try `Copyright (get_leaf datas) with Not_found -> `Copyright ""
804
805(* XXX(dinosaure): aempty copyright is allowed. *)
806
807let channel_managingeditor_of_xml ~xmlbase:_ (pos, _tag, datas) =
808 try `ManagingEditor (get_leaf datas) with Not_found ->
809 raise
810 (Error.Error
811 (pos, "The content of <managingEditor> MUST be a non-empty string"))
812
813let channel_webmaster_of_xml ~xmlbase:_ (pos, _tag, datas) =
814 try `WebMaster (get_leaf datas) with Not_found ->
815 raise
816 (Error.Error
817 (pos, "The content of <webMaster> MUST be a non-empty string"))
818
819let channel_pubdate_of_xml ~xmlbase:_ (pos, _tag, datas) =
820 try `PubDate (Date.of_rfc822 (get_leaf datas)) with Not_found ->
821 raise
822 (Error.Error (pos, "The content of <pubDate> MUST be a non-empty string"))
823
824let channel_lastbuilddate_of_xml ~xmlbase:_ (pos, _tag, datas) =
825 try `LastBuildDate (Date.of_rfc822 (get_leaf datas)) with Not_found ->
826 raise
827 (Error.Error
828 (pos, "The content of <lastBuildDate> MUST be a non-empty string"))
829
830let channel_category_of_xml ~xmlbase:_ (pos, _tag, datas) =
831 try `Category (get_leaf datas) with Not_found ->
832 raise
833 (Error.Error (pos, "The content of <category> MUST be a non-empty string"))
834
835let channel_generator_of_xml ~xmlbase:_ (pos, _tag, datas) =
836 try `Generator (get_leaf datas) with Not_found ->
837 raise
838 (Error.Error
839 (pos, "The content of <generator> MUST be a non-empty string"))
840
841let channel_docs_of_xml ~xmlbase (pos, _tag, datas) =
842 try `Docs (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
843 with Not_found ->
844 raise
845 (Error.Error (pos, "The content of <docs> MUST be a non-empty string"))
846
847let channel_ttl_of_xml ~xmlbase:_ (pos, _tag, datas) =
848 try `TTL (int_of_string (get_leaf datas)) with _ ->
849 raise
850 (Error.Error
851 ( pos
852 , "The content of <ttl> MUST be a non-empty string representing an \
853 integer" ))
854
855let channel_rating_of_xml ~xmlbase:_ (pos, _tag, datas) =
856 try `Rating (int_of_string (get_leaf datas)) with _ ->
857 raise
858 (Error.Error
859 ( pos
860 , "The content of <rating> MUST be a non-empty string representing \
861 an integer" ))
862
863let channel_skipHours_of_xml ~xmlbase:_ (pos, _tag, datas) =
864 try `SkipHours (int_of_string (get_leaf datas)) with _ ->
865 raise
866 (Error.Error
867 ( pos
868 , "The content of <skipHours> MUST be a non-empty string \
869 representing an integer" ))
870
871let channel_skipDays_of_xml ~xmlbase:_ (pos, _tag, datas) =
872 try `SkipDays (int_of_string (get_leaf datas)) with _ ->
873 raise
874 (Error.Error
875 ( pos
876 , "The content of <skipDays> MUST be a non-empty string representing \
877 an integer" ))
878
879let channel_of_xml =
880 let data_producer =
881 [ ("title", channel_title_of_xml)
882 ; ("link", channel_link_of_xml)
883 ; ("description", channel_description_of_xml)
884 ; ("Language", channel_language_of_xml)
885 ; ("copyright", channel_copyright_of_xml)
886 ; ("managingeditor", channel_managingeditor_of_xml)
887 ; ("webmaster", channel_webmaster_of_xml)
888 ; ("pubdate", channel_pubdate_of_xml)
889 ; ("lastbuilddate", channel_lastbuilddate_of_xml)
890 ; ("category", channel_category_of_xml)
891 ; ("generator", channel_generator_of_xml)
892 ; ("docs", channel_docs_of_xml)
893 ; ("cloud", cloud_of_xml)
894 ; ("ttl", channel_ttl_of_xml)
895 ; ("image", image_of_xml)
896 ; ("rating", channel_rating_of_xml)
897 ; ("textinput", textinput_of_xml)
898 ; ("skiphours", channel_skipHours_of_xml)
899 ; ("skipdays", channel_skipDays_of_xml)
900 ; ("item", item_of_xml) ]
901 in
902 generate_catcher ~data_producer make_channel
903
904let channel_of_xml' =
905 let data_producer =
906 [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a))
907 ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a)))
908 ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a))
909 ; ("Language", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Language a))
910 ; ("copyright", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Copyright a))
911 ; ( "managingeditor"
912 , dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `ManagingEditor a) )
913 ; ("webmaster", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `WebMaster a))
914 ; ("pubdate", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `PubDate a))
915 ; ( "lastbuilddate"
916 , dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `LastBuildDate a) )
917 ; ("category", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Category a))
918 ; ("generator", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Generator a))
919 ; ("docs", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Docs a))
920 ; ("cloud", cloud_of_xml')
921 ; ("ttl", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `TTL a))
922 ; ("image", image_of_xml')
923 ; ("rating", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Rating a))
924 ; ("textinput", textinput_of_xml')
925 ; ("skiphours", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `SkipHours a))
926 ; ("skipdays", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `SkipDays a))
927 ; ("item", item_of_xml') ]
928 in
929 generate_catcher ~data_producer (fun ~pos:_ x -> x)
930
931let find_channel l =
932 find
933 (function
934 | XML.Node (_pos, tag, _data) -> tag_is tag "channel"
935 | XML.Data _ -> false)
936 l
937
938let parse ?xmlbase input =
939 match XML.of_xmlm input |> snd with
940 | XML.Node (pos, tag, data) -> (
941 if tag_is tag "channel" then channel_of_xml ~xmlbase (pos, tag, data)
942 else
943 match find_channel data with
944 | Some (XML.Node (p, t, d)) -> channel_of_xml ~xmlbase (p, t, d)
945 | Some (XML.Data _) | _ ->
946 raise
947 (Error.Error
948 ( (0, 0)
949 , "document MUST contains exactly one <channel> element" )) )
950 | _ ->
951 raise
952 (Error.Error
953 ((0, 0), "document MUST contains exactly one <channel> element"))
954
955let read ?xmlbase fname =
956 let fh = open_in fname in
957 try
958 let x = parse ?xmlbase (XML.input_of_channel fh) in
959 close_in fh ; x
960 with e -> close_in fh ; raise e
961
962type uri = Uri.t option * string
963
964let unsafe ?xmlbase input =
965 match XML.of_xmlm input |> snd with
966 | XML.Node (pos, tag, data) -> (
967 if tag_is tag "channel" then
968 `Channel (channel_of_xml' ~xmlbase (pos, tag, data))
969 else
970 match find_channel data with
971 | Some (XML.Node (p, t, d)) ->
972 `Channel (channel_of_xml' ~xmlbase (p, t, d))
973 | Some (XML.Data _) | None -> `Channel [] )
974 | _ -> `Channel []
975
976(* Conversion to Atom *)
977
978let map_option o f = match o with None -> None | Some v -> Some (f v)
979
980(* Assume ASCII or a superset like UTF-8. *)
981let valid_local_part =
982 let is_valid c =
983 let c = Char.unsafe_chr c in
984 ('a' <= c && c <= 'z')
985 || ('A' <= c && c <= 'Z')
986 || ('0' <= c && c <= '9')
987 || c = '.'
988 (* shouldn't be the 1st char and not appear twice consecutively *)
989 || c = '!'
990 || c = '#'
991 || c = '$'
992 || c = '%'
993 || c = '&'
994 || c = '\''
995 || c = '*'
996 || c = '+'
997 || c = '-'
998 || c = '/'
999 || c = '='
1000 || c = '?'
1001 || c = '^'
1002 || c = '_'
1003 || c = '`'
1004 || c = '{'
1005 || c = '|'
1006 || c = '}'
1007 || c = '~'
1008 in
1009 Array.init 256 is_valid
1010
1011let is_valid_local_part c = valid_local_part.(Char.code c)
1012
1013let valid_domain_part =
1014 let is_valid c =
1015 let c = Char.unsafe_chr c in
1016 ('a' <= c && c <= 'z')
1017 || ('A' <= c && c <= 'Z')
1018 || ('0' <= c && c <= '9')
1019 || c = '.'
1020 || c = '.'
1021 in
1022 Array.init 256 is_valid
1023
1024let is_valid_domain_part c = valid_domain_part.(Char.code c)
1025
1026(* Valid range [s.[i]], [i0 ≤ i < i1]. *)
1027let sub_no_braces s i0 i1 =
1028 let i0 = if s.[i0] = '(' then i0 + 1 else i0 in
1029 let i1 = if s.[i1 - 1] = ')' then i1 - 1 else i1 in
1030 String.sub s i0 (i1 - i0)
1031
1032(* The item author sometimes contains the name and email under the form "name
1033 <email>" or "email (name)". Try to extract both compnents. *)
1034let extract_name_email a =
1035 try
1036 let i = String.index a '@' in
1037 (* or Not_found *)
1038 let len = String.length a in
1039 let i0 = ref (i - 1) in
1040 while !i0 >= 0 && is_valid_local_part a.[!i0] do
1041 decr i0
1042 done ;
1043 incr i0 ;
1044 (* !i0 >= 0 is the first char of the possible email. *)
1045 let i1 = ref (i + 1) in
1046 while !i1 < len && is_valid_domain_part a.[!i1] do
1047 incr i1
1048 done ;
1049 if !i0 < i && i + 1 < !i1 then (
1050 let email = String.sub a !i0 (!i1 - !i0) in
1051 if !i0 > 0 && a.[!i0 - 1] = '<' then decr i0 ;
1052 if !i1 < len && a.[!i1] = '>' then incr i1 ;
1053 while !i1 < len && a.[!i1] = ' ' do
1054 incr i1
1055 done ;
1056 (* skip spaces *)
1057 let name =
1058 if !i0 <= 0 then
1059 if !i1 >= len then email (* no name *) else sub_no_braces a !i1 len
1060 else
1061 (* !i0 > 0 *)
1062 let name0 = String.trim (String.sub a 0 !i0) in
1063 if !i1 >= len then name0 else name0 ^ String.sub a !i1 (len - !i1)
1064 in
1065 (name, Some email) )
1066 else (a, None)
1067 with Not_found -> (a, None)
1068
1069let looks_like_a_link u =
1070 (Uri.scheme u = Some "http" || Uri.scheme u = Some "https")
1071 && match Uri.host u with None | Some "" -> false | Some _ -> true
1072
1073let entry_of_item ch_link ch_updated (it : item) : Atom.entry =
1074 let author =
1075 match it.author with
1076 | Some a ->
1077 let name, email = extract_name_email a in
1078 {Atom.name; uri= None; email}
1079 | None ->
1080 (* If no author is specified for the item, there is little one can do
1081 just using the RSS2 feed. The user will have to set it using Atom
1082 convenience functions. *)
1083 {Atom.name= ""; uri= None; email= None}
1084 in
1085 let categories =
1086 let fn (c : category) = { Atom.term= c.data; scheme= map_option c.domain (fun d -> d); label= None } in
1087 List.map fn it.categories
1088 in
1089 let (title : Atom.title), content =
1090 match it.story with
1091 | All (t, xmlbase, d) ->
1092 let content =
1093 match it.content with
1094 | _, "" -> if d = "" then None else Some (Atom.Html (xmlbase, d))
1095 | x, c -> Some (Atom.Html (x, c))
1096 in
1097 (Atom.Text t, content)
1098 | Title t ->
1099 let content =
1100 match it.content with
1101 | _, "" -> None
1102 | x, c -> Some (Atom.Html (x, c))
1103 in
1104 (Atom.Text t, content)
1105 | Description (xmlbase, d) ->
1106 let content =
1107 match it.content with
1108 | _, "" -> if d = "" then None else Some (Atom.Html (xmlbase, d))
1109 | x, c -> Some (Atom.Html (x, c))
1110 in
1111 (Atom.Text "", content)
1112 in
1113 let id =
1114 match it.guid with
1115 | Some g ->
1116 if g.permalink || looks_like_a_link g.data then g.data
1117 else
1118 let d = Digest.to_hex (Digest.string (Uri.to_string g.data)) in
1119 Uri.with_fragment ch_link (Some d)
1120 | None ->
1121 (* The [it.link] may not be a permanent link and may also be used by
1122 other items. We use a digest to make it unique. *)
1123 let link = match it.link with Some l -> l | None -> ch_link in
1124 let s =
1125 match it.story with
1126 | All (t, _, d) -> t ^ d
1127 | Title t -> t
1128 | Description (_, d) -> d
1129 in
1130 let d = Digest.to_hex (Digest.string s) in
1131 Uri.with_fragment link (Some d)
1132 in
1133 let links =
1134 match (it.guid, it.link) with
1135 | Some g, _ when g.permalink -> [Atom.link g.data ~rel:Atom.Alternate]
1136 | _, Some l -> [Atom.link l ~rel:Atom.Alternate]
1137 | Some g, _ ->
1138 (* Sometimes the guid sets [l.permalink = false] but is nonetheless the
1139 only URI we have. *)
1140 if looks_like_a_link g.data then [Atom.link g.data ~rel:Atom.Alternate]
1141 else []
1142 | _, None -> []
1143 in
1144 let links =
1145 match it.comments with
1146 | Some l ->
1147 { Atom.href= l
1148 ; rel= Atom.Related
1149 ; type_media= None
1150 ; hreflang= None
1151 ; title= ""
1152 ; length= None }
1153 :: links
1154 | None -> links
1155 in
1156 let links =
1157 match it.enclosure with
1158 | Some e ->
1159 { Atom.href= e.url
1160 ; rel= Atom.Enclosure
1161 ; type_media= Some e.mime
1162 ; hreflang= None
1163 ; title= ""
1164 ; length= Some e.length }
1165 :: links
1166 | None -> links
1167 in
1168 let source =
1169 match it.source with
1170 | Some s ->
1171 Some
1172 { Atom.authors= [author]
1173 ; (* Best guess *)
1174 categories= []
1175 ; contributors= []
1176 ; generator= None
1177 ; icon= None
1178 ; id= ch_link
1179 ; (* declared as the ID of the whole channel *)
1180 links=
1181 [ { Atom.href= s.url
1182 ; rel= Atom.Related
1183 ; type_media= None
1184 ; hreflang= None
1185 ; title= ""
1186 ; length= None } ]
1187 ; logo= None
1188 ; rights= None
1189 ; subtitle= None
1190 ; title= Atom.Text s.data
1191 ; updated= None }
1192 | None -> None
1193 in
1194 { Atom.authors= (author, [])
1195 ; categories
1196 ; content
1197 ; contributors= []
1198 ; id
1199 ; links
1200 ; published= None
1201 ; rights= None
1202 ; source
1203 ; summary= None
1204 ; title
1205 ; updated= (match it.pubDate with Some d -> d | None -> ch_updated) }
1206
1207let more_recent_of_item date (it : item) =
1208 match (date, it.pubDate) with
1209 | _, None -> date
1210 | None, Some _ -> it.pubDate
1211 | Some d, Some di -> if Date.compare d di >= 0 then date else it.pubDate
1212
1213let max_date_opt d = function None -> d | Some d' -> Date.max d d'
1214
1215let to_atom ?self (ch : channel) : Atom.feed =
1216 let contributors =
1217 match ch.webMaster with
1218 | Some p -> [{Atom.name= "Webmaster"; uri= None; email= Some p}]
1219 | None -> []
1220 in
1221 let contributors =
1222 match ch.managingEditor with
1223 | Some p ->
1224 {Atom.name= "Managing Editor"; uri= None; email= Some p}
1225 :: contributors
1226 | None -> contributors
1227 in
1228 let links =
1229 [ { Atom.href= ch.link
1230 ; rel= Atom.Related
1231 ; type_media= Some "text/html"
1232 ; hreflang= None
1233 ; title= ch.title
1234 ; length= None } ]
1235 in
1236 let links =
1237 match self with
1238 | Some self ->
1239 { Atom.href= self
1240 ; rel= Atom.Self
1241 ; type_media= Some "application/rss+xml"
1242 ; hreflang= None
1243 ; title= ch.title
1244 ; length= None }
1245 :: links
1246 | None -> links
1247 in
1248 let updated =
1249 match List.fold_left more_recent_of_item None ch.items with
1250 | None -> max_date_opt Date.epoch ch.lastBuildDate
1251 | Some d -> max_date_opt d ch.lastBuildDate
1252 in
1253 { Atom.authors= []
1254 ; categories=
1255 ( match ch.category with
1256 | None -> []
1257 | Some c -> [{Atom.term= c; scheme= None; label= None}] )
1258 ; contributors
1259 ; generator=
1260 map_option ch.generator (fun g ->
1261 {Atom.content= g; version= None; uri= None} )
1262 ; icon= None
1263 ; id= ch.link
1264 ; (* FIXME: Best we can do? *)
1265 links
1266 ; logo= map_option ch.image (fun i -> i.url)
1267 ; rights= map_option ch.copyright (fun c -> (Atom.Text c : Atom.rights))
1268 ; subtitle= None
1269 ; title= Atom.Text ch.title
1270 ; updated
1271 ; entries= List.map (entry_of_item ch.link updated) ch.items }