My agentic slop goes here. Not intended for anyone else!
1open Syndic_common.XML
2open Syndic_common.Util
3module XML = Syndic_xml
4module Error = Syndic_error
5module Date = Syndic_date
6
7let atom_ns = "http://www.w3.org/2005/Atom"
8let xhtml_ns = "http://www.w3.org/1999/xhtml"
9let namespaces = [atom_ns]
10
11type rel = Alternate | Related | Self | Enclosure | Via | Link of Uri.t
12
13type link =
14 { href: Uri.t
15 ; rel: rel
16 ; type_media: string option
17 ; hreflang: string option
18 ; title: string
19 ; length: int option }
20
21let link ?type_media ?hreflang ?(title = "") ?length ?(rel = Alternate) href =
22 {href; rel; type_media; hreflang; title; length}
23
24type link' =
25 [ `HREF of Uri.t
26 | `Rel of string
27 | `Type of string
28 | `HREFLang of string
29 | `Title of string
30 | `Length of string ]
31
32(* The actual XML content is supposed to be inside a <div> which is NOT part of
33 the content. *)
34let rec get_xml_content xml0 = function
35 | XML.Data (_, s) :: tl ->
36 if only_whitespace s then get_xml_content xml0 tl
37 else xml0 (* unexpected *)
38 | XML.Node (_pos, tag, data) :: tl when tag_is tag "div" ->
39 let is_space =
40 List.for_all
41 (function XML.Data (_, s) -> only_whitespace s | _ -> false)
42 tl
43 in
44 if is_space then data else xml0
45 | _ -> xml0
46
47let no_namespace = Some ""
48let rm_namespace _ = no_namespace
49
50(* For HTML, the spec says the whole content needs to be escaped
51 http://tools.ietf.org/html/rfc4287#section-3.1.1.2 (some feeds use <![CDATA[
52 ]]>) so a single data item should be present. If not, assume the HTML was
53 properly parsed and convert it back to a string as it should. *)
54let get_html_content html =
55 match html with
56 | [XML.Data (_, d)] -> d
57 | h ->
58 (* It is likely that, when the HTML was parsed, the Atom namespace was
59 applied. Remove it. *)
60 String.concat "" (List.map (XML.to_string ~ns_prefix:rm_namespace) h)
61
62type text_construct =
63 | Text of string
64 | Html of Uri.t option * string
65 | Xhtml of Uri.t option * XML.t list
66
67let text_construct_of_xml ~xmlbase
68 ((_pos, (_tag, attr), data) : XML.pos * XML.tag * t list) =
69 let xmlbase = xmlbase_of_attr ~xmlbase attr in
70 match find (fun a -> attr_is a "type") attr with
71 | Some (_, "html") -> Html (xmlbase, get_html_content data)
72 | Some (_, "application/xhtml+xml") | Some (_, "xhtml") ->
73 Xhtml (xmlbase, get_xml_content data data)
74 | _ -> Text (get_leaf data)
75
76type author = {name: string; uri: Uri.t option; email: string option}
77
78let empty_author = {name= ""; uri= None; email= None}
79let not_empty_author a = a.name <> "" || a.uri <> None || a.email <> None
80let author ?uri ?email name = {uri; email; name}
81
82type person' = [`Name of string | `URI of Uri.t | `Email of string]
83
84let make_person datas ~pos:_ (l : [< person'] list) =
85 (* element atom:name { text } *)
86 let name =
87 match find (function `Name _ -> true | _ -> false) l with
88 | Some (`Name s) -> s
89 | _ ->
90 (* The spec mandates that <author><name>name</name></author> but
91 several feeds just do <author>name</author> *)
92 get_leaf datas
93 in
94 (* element atom:uri { atomUri }? *)
95 let uri =
96 match find (function `URI _ -> true | _ -> false) l with
97 | Some (`URI u) -> Some u
98 | _ -> None
99 in
100 (* element atom:email { atomEmailAddress }? *)
101 let email =
102 match find (function `Email _ -> true | _ -> false) l with
103 | Some (`Email e) -> Some e
104 | _ -> None
105 in
106 ({name; uri; email} : author)
107
108let make_author datas ~pos a = `Author (make_person datas ~pos a)
109
110let person_name_of_xml ~xmlbase:_ (_pos, _tag, datas) =
111 `Name (try get_leaf datas with Not_found -> "")
112
113(* mandatory ? *)
114
115let person_uri_of_xml ~xmlbase (pos, _tag, datas) =
116 try `URI (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas)))
117 with Not_found ->
118 raise
119 (Error.Error (pos, "The content of <uri> MUST be a non-empty string"))
120
121let person_email_of_xml ~xmlbase:_ (_pos, _tag, datas) =
122 `Email (try get_leaf datas with Not_found -> "")
123
124(* mandatory ? *)
125
126(* {[ atomAuthor = element atom:author { atomPersonConstruct } ]} where
127
128 atomPersonConstruct = atomCommonAttributes, (element atom:name { text } &
129 element atom:uri { atomUri }? & element atom:email { atomEmailAddress }? &
130 extensionElement * )
131
132 This specification assigns no significance to the order of appearance of the
133 child elements in a Person construct. *)
134let person_data_producer =
135 [ ("name", person_name_of_xml)
136 ; ("uri", person_uri_of_xml)
137 ; ("email", person_email_of_xml) ]
138
139let author_of_xml ~xmlbase ((_, _, datas) as xml) =
140 generate_catcher ~namespaces ~data_producer:person_data_producer
141 (make_author datas) ~xmlbase xml
142
143type uri = Uri.t option * string
144type person = [`Email of string | `Name of string | `URI of uri] list
145
146let person_data_producer' =
147 [ ("name", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Name a))
148 ; ("uri", dummy_of_xml ~ctor:(fun ~xmlbase a -> `URI (xmlbase, a)))
149 ; ("email", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Email a)) ]
150
151let author_of_xml' =
152 generate_catcher ~namespaces ~data_producer:person_data_producer'
153 (fun ~pos:_ x -> `Author x )
154
155type category = {term: string; scheme: Uri.t option; label: string option}
156
157let category ?scheme ?label term = {scheme; label; term}
158
159type category' = [`Term of string | `Scheme of Uri.t | `Label of string]
160
161let make_category ~pos (l : [< category'] list) =
162 (* attribute term { text } *)
163 let term =
164 match find (function `Term _ -> true | _ -> false) l with
165 | Some (`Term t) -> t
166 | _ ->
167 raise
168 (Error.Error (pos, "Category elements MUST have a 'term' attribute"))
169 in
170 (* attribute scheme { atomUri }? *)
171 let scheme =
172 match find (function `Scheme _ -> true | _ -> false) l with
173 | Some (`Scheme u) -> Some u
174 | _ -> None
175 in
176 (* attribute label { text }? *)
177 let label =
178 match find (function `Label _ -> true | _ -> false) l with
179 | Some (`Label l) -> Some l
180 | _ -> None
181 in
182 `Category ({term; scheme; label} : category)
183
184let scheme_of_xml ~xmlbase a = `Scheme (XML.resolve ~xmlbase (Uri.of_string a))
185
186(* atomCategory = element atom:category { atomCommonAttributes, attribute term
187 { text }, attribute scheme { atomUri }?, attribute label { text }?,
188 undefinedContent } *)
189let category_attr_producer =
190 [ ("term", fun ~xmlbase:_ a -> `Term a)
191 ; ("label", fun ~xmlbase:_ a -> `Label a) ]
192
193let category_of_xml =
194 let attr_producer = ("scheme", scheme_of_xml) :: category_attr_producer in
195 generate_catcher ~attr_producer make_category
196
197let category_of_xml' =
198 let attr_producer =
199 ("scheme", fun ~xmlbase:_ a -> `Scheme a) :: category_attr_producer
200 in
201 generate_catcher ~attr_producer (fun ~pos:_ x -> `Category x)
202
203let make_contributor datas ~pos a = `Contributor (make_person datas ~pos a)
204
205let contributor_of_xml ~xmlbase ((_, _, datas) as xml) =
206 generate_catcher ~namespaces ~data_producer:person_data_producer
207 (make_contributor datas) ~xmlbase xml
208
209let contributor_of_xml' =
210 generate_catcher ~namespaces ~data_producer:person_data_producer'
211 (fun ~pos:_ x -> `Contributor x )
212
213type generator = {version: string option; uri: Uri.t option; content: string}
214
215let generator ?uri ?version content = {uri; version; content}
216
217type generator' = [`URI of Uri.t | `Version of string | `Content of string]
218
219let make_generator ~pos (l : [< generator'] list) =
220 (* text *)
221 let content =
222 match find (function `Content _ -> true | _ -> false) l with
223 | Some (`Content c) -> c
224 | _ ->
225 raise
226 (Error.Error
227 (pos, "The content of <generator> MUST be a non-empty string"))
228 in
229 (* attribute version { text }? *)
230 let version =
231 match find (function `Version _ -> true | _ -> false) l with
232 | Some (`Version v) -> Some v
233 | _ -> None
234 in
235 (* attribute uri { atomUri }? *)
236 let uri =
237 match find (function `URI _ -> true | _ -> false) l with
238 | Some (`URI u) -> Some u
239 | _ -> None
240 in
241 `Generator ({version; uri; content} : generator)
242
243(* URI, if present, MUST be an IRI reference [RFC3987]. The definition of "IRI"
244 excludes relative references but we resolve it anyway in case this is not
245 respected by the generator. *)
246let generator_uri_of_xml ~xmlbase a =
247 `URI (XML.resolve ~xmlbase (Uri.of_string a))
248
249(* atomGenerator = element atom:generator { atomCommonAttributes, attribute uri
250 { atomUri }?, attribute version { text }?, text } *)
251let generator_of_xml =
252 let attr_producer =
253 [("version", fun ~xmlbase:_ a -> `Version a); ("uri", generator_uri_of_xml)]
254 in
255 let leaf_producer ~xmlbase:_ _pos data = `Content data in
256 generate_catcher ~attr_producer ~leaf_producer make_generator
257
258let generator_of_xml' =
259 let attr_producer =
260 [ ("version", fun ~xmlbase:_ a -> `Version a)
261 ; ("uri", fun ~xmlbase a -> `URI (xmlbase, a)) ]
262 in
263 let leaf_producer ~xmlbase:_ _pos data = `Content data in
264 generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Generator x)
265
266type icon = Uri.t
267
268let make_icon ~pos (l : Uri.t list) =
269 (* (atomUri) *)
270 let uri =
271 match l with
272 | u :: _ -> u
273 | [] ->
274 raise
275 (Error.Error (pos, "The content of <icon> MUST be a non-empty string"))
276 in
277 `Icon uri
278
279(* atomIcon = element atom:icon { atomCommonAttributes, } *)
280let icon_of_xml =
281 let leaf_producer ~xmlbase _pos data =
282 XML.resolve ~xmlbase (Uri.of_string data)
283 in
284 generate_catcher ~leaf_producer make_icon
285
286let icon_of_xml' =
287 let leaf_producer ~xmlbase _pos data = `URI (xmlbase, data) in
288 generate_catcher ~leaf_producer (fun ~pos:_ x -> `Icon x)
289
290type id = Uri.t
291
292let make_id ~pos (l : string list) =
293 (* (atomUri) *)
294 let id =
295 match l with
296 | u :: _ -> Uri.of_string u
297 | [] ->
298 raise
299 (Error.Error (pos, "The content of <id> MUST be a non-empty string"))
300 in
301 `ID id
302
303(* atomId = element atom:id { atomCommonAttributes, (atomUri) } *)
304let id_of_xml, id_of_xml' =
305 let leaf_producer ~xmlbase:_ _pos data = data in
306 ( generate_catcher ~leaf_producer make_id
307 , generate_catcher ~leaf_producer (fun ~pos:_ x -> `ID x) )
308
309let rel_of_string s =
310 match String.lowercase_ascii (String.trim s) with
311 | "alternate" -> Alternate
312 | "related" -> Related
313 | "self" -> Self
314 | "enclosure" -> Enclosure
315 | "via" -> Via
316 | uri ->
317 (* RFC 4287 § 4.2.7.2: the use of a relative reference other than a
318 simple name is not allowed. Thus no need to resolve against xml:base. *)
319 Link (Uri.of_string uri)
320
321let make_link ~pos (l : [< link'] list) =
322 (* attribute href { atomUri } *)
323 let href =
324 match find (function `HREF _ -> true | _ -> false) l with
325 | Some (`HREF u) -> u
326 | _ ->
327 raise (Error.Error (pos, "Link elements MUST have a 'href' attribute"))
328 in
329 (* attribute rel { atomNCName | atomUri }? *)
330 let rel =
331 match find (function `Rel _ -> true | _ -> false) l with
332 | Some (`Rel r) -> rel_of_string r
333 | _ -> Alternate
334 (* cf. RFC 4287 § 4.2.7.2 *)
335 in
336 (* attribute type { atomMediaType }? *)
337 let type_media =
338 match find (function `Type _ -> true | _ -> false) l with
339 | Some (`Type t) -> Some t
340 | _ -> None
341 in
342 (* attribute hreflang { atomLanguageTag }? *)
343 let hreflang =
344 match find (function `HREFLang _ -> true | _ -> false) l with
345 | Some (`HREFLang l) -> Some l
346 | _ -> None
347 in
348 (* attribute title { text }? *)
349 let title =
350 match find (function `Title _ -> true | _ -> false) l with
351 | Some (`Title s) -> s
352 | _ -> ""
353 in
354 (* attribute length { text }? *)
355 let length =
356 match find (function `Length _ -> true | _ -> false) l with
357 | Some (`Length i) -> Some (int_of_string i)
358 | _ -> None
359 in
360 `Link ({href; rel; type_media; hreflang; title; length} : link)
361
362let link_href_of_xml ~xmlbase a =
363 `HREF (XML.resolve ~xmlbase (Uri.of_string a))
364
365(* atomLink = element atom:link { atomCommonAttributes, attribute href {
366 atomUri }, attribute rel { atomNCName | atomUri }?, attribute type {
367 atomMediaType }?, attribute hreflang { atomLanguageTag }?, attribute title {
368 text }?, attribute length { text }?, undefinedContent } *)
369let link_attr_producer =
370 [ ("rel", fun ~xmlbase:_ a -> `Rel a)
371 ; ("type", fun ~xmlbase:_ a -> `Type a)
372 ; ("hreflang", fun ~xmlbase:_ a -> `HREFLang a)
373 ; ("title", fun ~xmlbase:_ a -> `Title a)
374 ; ("length", fun ~xmlbase:_ a -> `Length a) ]
375
376let link_of_xml =
377 let attr_producer = ("href", link_href_of_xml) :: link_attr_producer in
378 generate_catcher ~attr_producer make_link
379
380let link_of_xml' =
381 let attr_producer =
382 ("href", fun ~xmlbase:_ a -> `HREF a) :: link_attr_producer
383 in
384 generate_catcher ~attr_producer (fun ~pos:_ x -> `Link x)
385
386type logo = Uri.t
387
388let make_logo ~pos (l : Uri.t list) =
389 (* (atomUri) *)
390 let uri =
391 match l with
392 | u :: _ -> u
393 | [] ->
394 raise
395 (Error.Error (pos, "The content of <logo> MUST be a non-empty string"))
396 in
397 `Logo uri
398
399(* atomLogo = element atom:logo { atomCommonAttributes, (atomUri) } *)
400let logo_of_xml =
401 let leaf_producer ~xmlbase _pos data =
402 XML.resolve ~xmlbase (Uri.of_string data)
403 in
404 generate_catcher ~leaf_producer make_logo
405
406let logo_of_xml' =
407 let leaf_producer ~xmlbase _pos data = `URI (xmlbase, data) in
408 generate_catcher ~leaf_producer (fun ~pos:_ x -> `Logo x)
409
410type published = Date.t
411type published' = [`Date of string]
412
413let make_published ~pos (l : [< published'] list) =
414 (* atom:published { atomDateConstruct } *)
415 let date =
416 match find (fun (`Date _) -> true) l with
417 | Some (`Date d) -> Date.of_rfc3339 d
418 | _ ->
419 raise
420 (Error.Error
421 (pos, "The content of <published> MUST be a non-empty string"))
422 in
423 `Published date
424
425(* atomPublished = element atom:published { atomDateConstruct } *)
426let published_of_xml, published_of_xml' =
427 let leaf_producer ~xmlbase:_ _pos data = `Date data in
428 ( generate_catcher ~leaf_producer make_published
429 , generate_catcher ~leaf_producer (fun ~pos:_ x -> `Published x) )
430
431type rights = text_construct
432
433let rights_of_xml ~xmlbase a = `Rights (text_construct_of_xml ~xmlbase a)
434
435(* atomRights = element atom:rights { atomTextConstruct } *)
436let rights_of_xml' ~xmlbase:_
437 ((_pos, (_tag, _attr), data) : XML.pos * XML.tag * t list) =
438 `Rights data
439
440type title = text_construct
441
442let title_of_xml ~xmlbase a = `Title (text_construct_of_xml ~xmlbase a)
443
444(* atomTitle = element atom:title { atomTextConstruct } *)
445let title_of_xml' ~xmlbase:_
446 ((_pos, (_tag, _attr), data) : XML.pos * XML.tag * t list) =
447 `Title data
448
449type subtitle = text_construct
450
451let subtitle_of_xml ~xmlbase a = `Subtitle (text_construct_of_xml ~xmlbase a)
452
453(* atomSubtitle = element atom:subtitle { atomTextConstruct } *)
454let subtitle_of_xml' ~xmlbase:_
455 ((_pos, (_tag, _attr), data) : XML.pos * XML.tag * t list) =
456 `Subtitle data
457
458type updated = Date.t
459type updated' = [`Date of string]
460
461let make_updated ~pos (l : [< updated'] list) =
462 (* atom:updated { atomDateConstruct } *)
463 let updated =
464 match find (fun (`Date _) -> true) l with
465 | Some (`Date d) -> Date.of_rfc3339 d
466 | _ ->
467 raise
468 (Error.Error
469 (pos, "The content of <updated> MUST be a non-empty string"))
470 in
471 `Updated updated
472
473(* atomUpdated = element atom:updated { atomDateConstruct } *)
474let updated_of_xml, updated_of_xml' =
475 let leaf_producer ~xmlbase:_ _pos data = `Date data in
476 ( generate_catcher ~leaf_producer make_updated
477 , generate_catcher ~leaf_producer (fun ~pos:_ x -> `Updated x) )
478
479type source =
480 { authors: author list
481 ; categories: category list
482 ; contributors: author list
483 ; generator: generator option
484 ; icon: icon option
485 ; id: id
486 ; links: link list
487 ; logo: logo option
488 ; rights: rights option
489 ; subtitle: subtitle option
490 ; title: title
491 ; updated: updated option }
492
493let source ?(categories = []) ?(contributors = []) ?generator ?icon
494 ?(links = []) ?logo ?rights ?subtitle ?updated ~authors ~id title =
495 { authors
496 ; categories
497 ; contributors
498 ; generator
499 ; icon
500 ; id
501 ; links
502 ; logo
503 ; rights
504 ; subtitle
505 ; title
506 ; updated }
507
508type source' =
509 [ `Author of author
510 | `Category of category
511 | `Contributor of author
512 | `Generator of generator
513 | `Icon of icon
514 | `ID of id
515 | `Link of link
516 | `Logo of logo
517 | `Subtitle of subtitle
518 | `Title of title
519 | `Rights of rights
520 | `Updated of updated ]
521
522let make_source ~pos (l : [< source'] list) =
523 (* atomAuthor* *)
524 let authors =
525 List.fold_left
526 (fun acc -> function `Author x -> x :: acc | _ -> acc)
527 [] l
528 in
529 (* atomCategory* *)
530 let categories =
531 List.fold_left
532 (fun acc -> function `Category x -> x :: acc | _ -> acc)
533 [] l
534 in
535 (* atomContributor* *)
536 let contributors =
537 List.fold_left
538 (fun acc -> function `Contributor x -> x :: acc | _ -> acc)
539 [] l
540 in
541 (* atomGenerator? *)
542 let generator =
543 match find (function `Generator _ -> true | _ -> false) l with
544 | Some (`Generator g) -> Some g
545 | _ -> None
546 in
547 (* atomIcon? *)
548 let icon =
549 match find (function `Icon _ -> true | _ -> false) l with
550 | Some (`Icon u) -> Some u
551 | _ -> None
552 in
553 (* atomId? *)
554 let id =
555 match find (function `ID _ -> true | _ -> false) l with
556 | Some (`ID i) -> i
557 | _ ->
558 raise
559 (Error.Error
560 (pos, "<source> elements MUST contains exactly one <id> elements"))
561 in
562 (* atomLink* *)
563 let links =
564 List.fold_left (fun acc -> function `Link x -> x :: acc | _ -> acc) [] l
565 in
566 (* atomLogo? *)
567 let logo =
568 match find (function `Logo _ -> true | _ -> false) l with
569 | Some (`Logo u) -> Some u
570 | _ -> None
571 in
572 (* atomRights? *)
573 let rights =
574 match find (function `Rights _ -> true | _ -> false) l with
575 | Some (`Rights r) -> Some r
576 | _ -> None
577 in
578 (* atomSubtitle? *)
579 let subtitle =
580 match find (function `Subtitle _ -> true | _ -> false) l with
581 | Some (`Subtitle s) -> Some s
582 | _ -> None
583 in
584 (* atomTitle? *)
585 let title =
586 match find (function `Title _ -> true | _ -> false) l with
587 | Some (`Title s) -> s
588 | _ ->
589 raise
590 (Error.Error
591 ( pos
592 , "<source> elements MUST contains exactly one <title> elements"
593 ))
594 in
595 (* atomUpdated? *)
596 let updated =
597 match find (function `Updated _ -> true | _ -> false) l with
598 | Some (`Updated d) -> Some d
599 | _ -> None
600 in
601 `Source
602 ( { authors
603 ; categories
604 ; contributors
605 ; generator
606 ; icon
607 ; id
608 ; links
609 ; logo
610 ; rights
611 ; subtitle
612 ; title
613 ; updated }
614 : source )
615
616(* atomSource = element atom:source { atomCommonAttributes, (atomAuthor* &
617 atomCategory* & atomContributor* & atomGenerator? & atomIcon? & atomId? &
618 atomLink* & atomLogo? & atomRights? & atomSubtitle? & atomTitle? &
619 atomUpdated? & extensionElement * ) } *)
620let source_of_xml =
621 let data_producer =
622 [ ("author", author_of_xml)
623 ; ("category", category_of_xml)
624 ; ("contributor", contributor_of_xml)
625 ; ("generator", generator_of_xml)
626 ; ("icon", icon_of_xml); ("id", id_of_xml); ("link", link_of_xml)
627 ; ("logo", logo_of_xml); ("rights", rights_of_xml)
628 ; ("subtitle", subtitle_of_xml)
629 ; ("title", title_of_xml)
630 ; ("updated", updated_of_xml) ]
631 in
632 generate_catcher ~namespaces ~data_producer make_source
633
634let source_of_xml' =
635 let data_producer =
636 [ ("author", author_of_xml')
637 ; ("category", category_of_xml')
638 ; ("contributor", contributor_of_xml')
639 ; ("generator", generator_of_xml')
640 ; ("icon", icon_of_xml'); ("id", id_of_xml'); ("link", link_of_xml')
641 ; ("logo", logo_of_xml'); ("rights", rights_of_xml')
642 ; ("subtitle", subtitle_of_xml')
643 ; ("title", title_of_xml')
644 ; ("updated", updated_of_xml') ]
645 in
646 generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> `Source x)
647
648type mime = string
649
650type content =
651 | Text of string
652 | Html of Uri.t option * string
653 | Xhtml of Uri.t option * Syndic_xml.t list
654 | Mime of mime * string
655 | Src of mime option * Uri.t
656
657[@@@warning "-34"]
658
659type content' = [`Type of string | `SRC of string | `Data of Syndic_xml.t list]
660
661(* atomInlineTextContent = element atom:content { atomCommonAttributes,
662 attribute type { "text" | "html" }?, (text)* }
663
664 atomInlineXHTMLContent = element atom:content { atomCommonAttributes,
665 attribute type { "xhtml" }, xhtmlDiv }
666
667 atomInlineOtherContent = element atom:content { atomCommonAttributes,
668 attribute type { atomMediaType }?, (text|anyElement)* }
669
670 atomOutOfLineContent = element atom:content { atomCommonAttributes,
671 attribute type { atomMediaType }?, attribute src { atomUri }, empty }
672
673 atomContent = atomInlineTextContent | atomInlineXHTMLContent |
674 atomInlineOtherContent | atomOutOfLineContent *)
675let content_of_xml ~xmlbase
676 ((_pos, (_tag, attr), data) : XML.pos * XML.tag * t list) =
677 (* MIME ::= attribute type { "text" | "html" }? | attribute type { "xhtml" }
678 | attribute type { atomMediaType }? *)
679 (* attribute src { atomUri } | none If src s present, [data] MUST be empty. *)
680 match find (fun a -> attr_is a "src") attr with
681 | Some (_, src) ->
682 let mime =
683 match find (fun a -> attr_is a "type") attr with
684 | Some (_, ty) -> Some ty
685 | None -> None
686 in
687 `Content (Src (mime, XML.resolve ~xmlbase (Uri.of_string src)))
688 | None ->
689 (* (text)*
690 * | xhtmlDiv
691 * | (text|anyElement)*
692 * | none *)
693 `Content
694 ( match find (fun a -> attr_is a "type") attr with
695 | Some (_, "text") | None -> Text (get_leaf data)
696 | Some (_, "html") -> Html (xmlbase, get_html_content data)
697 | Some (_, "xhtml") -> Xhtml (xmlbase, get_xml_content data data)
698 | Some (_, mime) -> Mime (mime, get_leaf data) )
699
700let content_of_xml' ~xmlbase:_
701 ((_pos, (_tag, attr), data) : XML.pos * XML.tag * t list) =
702 let l =
703 match find (fun a -> attr_is a "src") attr with
704 | Some (_, src) -> [`SRC src]
705 | None -> []
706 in
707 let l =
708 match find (fun a -> attr_is a "type") attr with
709 | Some (_, ty) -> `Type ty :: l
710 | None -> l
711 in
712 `Content (`Data data :: l)
713
714type summary = text_construct
715
716(* atomSummary = element atom:summary { atomTextConstruct } *)
717let summary_of_xml ~xmlbase a = `Summary (text_construct_of_xml ~xmlbase a)
718
719let summary_of_xml' ~xmlbase:_ ((_, (_, _), data) : XML.pos * XML.tag * t list)
720 =
721 `Summary data
722
723type entry =
724 { authors: author * author list
725 ; categories: category list
726 ; content: content option
727 ; contributors: author list
728 ; id: id
729 ; links: link list
730 ; published: published option
731 ; rights: rights option
732 ; source: source option
733 ; summary: summary option
734 ; title: title
735 ; updated: updated }
736
737let entry ?(categories = []) ?content ?(contributors = []) ?(links = [])
738 ?published ?rights ?source ?summary ~id ~authors ~title ~updated () =
739 { authors
740 ; categories
741 ; content
742 ; contributors
743 ; id
744 ; links
745 ; published
746 ; rights
747 ; source
748 ; summary
749 ; title
750 ; updated }
751
752type entry' =
753 [ `Author of author
754 | `Category of category
755 | `Contributor of author
756 | `ID of id
757 | `Link of link
758 | `Published of published
759 | `Rights of rights
760 | `Source of source
761 | `Content of content
762 | `Summary of summary
763 | `Title of title
764 | `Updated of updated ]
765
766module LinkOrder : Set.OrderedType with type t = string * string = struct
767 type t = string * string
768
769 let compare (a : t) (b : t) =
770 match compare (fst a) (fst b) with 0 -> compare (snd a) (snd b) | n -> n
771end
772
773module LinkSet = Set.Make (LinkOrder)
774
775let uniq_link_alternate ~pos (l : link list) =
776 let string_of_duplicate_link {href; type_media; hreflang; _}
777 (type_media', hreflang') =
778 let ty = (function Some a -> a | None -> "(none)") type_media in
779 let hl = (function Some a -> a | None -> "(none)") hreflang in
780 let ty' = (function "" -> "(none)" | s -> s) type_media' in
781 let hl' = (function "" -> "(none)" | s -> s) hreflang' in
782 Printf.sprintf
783 "Duplicate link between <link href=\"%s\" hreflang=\"%s\" type=\"%s\" \
784 ..> and <link hreflang=\"%s\" type=\"%s\" ..>"
785 (Uri.to_string href) hl ty hl' ty'
786 in
787 let raise_error link link' =
788 raise (Error.Error (pos, string_of_duplicate_link link link'))
789 in
790 let rec aux acc = function
791 | [] -> l
792 | ({rel; type_media= Some ty; hreflang= Some hl; _} as x) :: r
793 when rel = Alternate ->
794 if LinkSet.mem (ty, hl) acc then
795 raise_error x (LinkSet.find (ty, hl) acc)
796 else aux (LinkSet.add (ty, hl) acc) r
797 | ({rel; type_media= None; hreflang= Some hl; _} as x) :: r
798 when rel = Alternate ->
799 if LinkSet.mem ("", hl) acc then
800 raise_error x (LinkSet.find ("", hl) acc)
801 else aux (LinkSet.add ("", hl) acc) r
802 | ({rel; type_media= Some ty; hreflang= None; _} as x) :: r
803 when rel = Alternate ->
804 if LinkSet.mem (ty, "") acc then
805 raise_error x (LinkSet.find (ty, "") acc)
806 else aux (LinkSet.add (ty, "") acc) r
807 | ({rel; type_media= None; hreflang= None; _} as x) :: r
808 when rel = Alternate ->
809 if LinkSet.mem ("", "") acc then
810 raise_error x (LinkSet.find ("", "") acc)
811 else aux (LinkSet.add ("", "") acc) r
812 | _ :: r -> aux acc r
813 in
814 aux LinkSet.empty l
815
816type feed' =
817 [ `Author of author
818 | `Category of category
819 | `Contributor of author
820 | `Generator of generator
821 | `Icon of icon
822 | `ID of id
823 | `Link of link
824 | `Logo of logo
825 | `Rights of rights
826 | `Subtitle of subtitle
827 | `Title of title
828 | `Updated of updated
829 | `Entry of entry ]
830
831let dummy_name = "\000"
832
833let make_entry ~pos l =
834 let authors =
835 List.fold_left
836 (fun acc -> function `Author x -> x :: acc | _ -> acc)
837 [] l
838 in
839 (* atomSource? *)
840 let sources =
841 List.fold_left
842 (fun acc -> function `Source x -> x :: acc | _ -> acc)
843 [] l
844 in
845 let source =
846 match sources with
847 | [] -> None
848 | [s] -> Some s
849 | _ ->
850 (* RFC 4287 § 4.1.2 *)
851 let msg =
852 "<entry> elements MUST NOT contain more than one <source> element."
853 in
854 raise (Error.Error (pos, msg))
855 in
856 let authors =
857 match (authors, source) with
858 | a0 :: a, _ -> (a0, a)
859 | [], Some (s : source) -> (
860 (* If an atom:entry element does not contain atom:author elements, then
861 the atom:author elements of the contained atom:source element are
862 considered to apply. http://tools.ietf.org/html/rfc4287#section-4.2.1 *)
863 match s.authors with
864 | a0 :: a -> (a0, a)
865 | [] ->
866 let msg =
867 "<entry> does not contain an <author> and its <source> neither does"
868 in
869 raise (Error.Error (pos, msg)) )
870 | [], None -> ({name= dummy_name; uri= None; email= None}, [])
871 (* unacceptable value, see fix_author below *)
872 (* atomCategory* *)
873 in
874 let categories =
875 List.fold_left
876 (fun acc -> function `Category x -> x :: acc | _ -> acc)
877 [] l
878 (* atomContributor* *)
879 in
880 let contributors =
881 List.fold_left
882 (fun acc -> function `Contributor x -> x :: acc | _ -> acc)
883 [] l
884 in
885 (* atomId *)
886 let id =
887 match find (function `ID _ -> true | _ -> false) l with
888 | Some (`ID i) -> i
889 | _ ->
890 raise
891 (Error.Error
892 (pos, "<entry> elements MUST contains exactly one <id> elements"))
893 (* atomLink* *)
894 in
895 let links =
896 List.fold_left (fun acc -> function `Link x -> x :: acc | _ -> acc) [] l
897 in
898 (* atomPublished? *)
899 let published =
900 match find (function `Published _ -> true | _ -> false) l with
901 | Some (`Published s) -> Some s
902 | _ -> None
903 in
904 (* atomRights? *)
905 let rights =
906 match find (function `Rights _ -> true | _ -> false) l with
907 | Some (`Rights r) -> Some r
908 | _ -> None
909 in
910 (* atomContent? *)
911 let content =
912 match find (function `Content _ -> true | _ -> false) l with
913 | Some (`Content c) -> Some c
914 | _ -> None
915 in
916 (* atomSummary? *)
917 let summary =
918 match find (function `Summary _ -> true | _ -> false) l with
919 | Some (`Summary s) -> Some s
920 | _ -> None
921 in
922 (* atomTitle *)
923 let title =
924 match find (function `Title _ -> true | _ -> false) l with
925 | Some (`Title t) -> t
926 | _ ->
927 raise
928 (Error.Error
929 ( pos
930 , "<entry> elements MUST contains exactly one <title> elements" ))
931 in
932 (* atomUpdated *)
933 let updated =
934 match find (function `Updated _ -> true | _ -> false) l with
935 | Some (`Updated u) -> u
936 | _ ->
937 raise
938 (Error.Error
939 ( pos
940 , "<entry> elements MUST contains exactly one <updated> elements"
941 ))
942 in
943 `Entry
944 ( pos
945 , ( { authors
946 ; categories
947 ; content
948 ; contributors
949 ; id
950 ; links= uniq_link_alternate ~pos links
951 ; published
952 ; rights
953 ; source
954 ; summary
955 ; title
956 ; updated }
957 : entry ) )
958
959(* atomEntry = element atom:entry { atomCommonAttributes, (atomAuthor* &
960 atomCategory* & atomContent? & atomContributor* & atomId & atomLink* &
961 atomPublished? & atomRights? & atomSource? & atomSummary? & atomTitle &
962 atomUpdated & extensionElement * ) } *)
963let entry_of_xml =
964 let data_producer =
965 [ ("author", author_of_xml)
966 ; ("category", category_of_xml)
967 ; ("contributor", contributor_of_xml)
968 ; ("id", id_of_xml); ("link", link_of_xml)
969 ; ("published", published_of_xml)
970 ; ("rights", rights_of_xml); ("source", source_of_xml)
971 ; ("content", content_of_xml)
972 ; ("summary", summary_of_xml)
973 ; ("title", title_of_xml)
974 ; ("updated", updated_of_xml) ]
975 in
976 generate_catcher ~namespaces ~data_producer make_entry
977
978let entry_of_xml' =
979 let data_producer =
980 [ ("author", author_of_xml')
981 ; ("category", category_of_xml')
982 ; ("contributor", contributor_of_xml')
983 ; ("id", id_of_xml'); ("link", link_of_xml')
984 ; ("published", published_of_xml')
985 ; ("rights", rights_of_xml'); ("source", source_of_xml')
986 ; ("content", content_of_xml')
987 ; ("summary", summary_of_xml')
988 ; ("title", title_of_xml')
989 ; ("updated", updated_of_xml') ]
990 in
991 generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> `Entry x)
992
993type feed =
994 { authors: author list
995 ; categories: category list
996 ; contributors: author list
997 ; generator: generator option
998 ; icon: icon option
999 ; id: id
1000 ; links: link list
1001 ; logo: logo option
1002 ; rights: rights option
1003 ; subtitle: subtitle option
1004 ; title: title
1005 ; updated: updated
1006 ; entries: entry list }
1007
1008let feed ?(authors = []) ?(categories = []) ?(contributors = []) ?generator
1009 ?icon ?(links = []) ?logo ?rights ?subtitle ~id ~title ~updated entries =
1010 { authors
1011 ; categories
1012 ; contributors
1013 ; generator
1014 ; icon
1015 ; id
1016 ; links
1017 ; logo
1018 ; rights
1019 ; subtitle
1020 ; title
1021 ; updated
1022 ; entries }
1023
1024let make_feed ~pos (l : _ list) =
1025 (* atomAuthor* *)
1026 let authors =
1027 List.fold_left
1028 (fun acc -> function `Author x -> x :: acc | _ -> acc)
1029 [] l
1030 in
1031 (* atomCategory* *)
1032 let categories =
1033 List.fold_left
1034 (fun acc -> function `Category x -> x :: acc | _ -> acc)
1035 [] l
1036 in
1037 (* atomContributor* *)
1038 let contributors =
1039 List.fold_left
1040 (fun acc -> function `Contributor x -> x :: acc | _ -> acc)
1041 [] l
1042 in
1043 (* atomLink* *)
1044 let links =
1045 List.fold_left (fun acc -> function `Link x -> x :: acc | _ -> acc) [] l
1046 in
1047 (* atomGenerator? *)
1048 let generator =
1049 match find (function `Generator _ -> true | _ -> false) l with
1050 | Some (`Generator g) -> Some g
1051 | _ -> None
1052 in
1053 (* atomIcon? *)
1054 let icon =
1055 match find (function `Icon _ -> true | _ -> false) l with
1056 | Some (`Icon i) -> Some i
1057 | _ -> None
1058 in
1059 (* atomId *)
1060 let id =
1061 match find (function `ID _ -> true | _ -> false) l with
1062 | Some (`ID i) -> i
1063 | _ ->
1064 raise
1065 (Error.Error
1066 (pos, "<feed> elements MUST contains exactly one <id> elements"))
1067 in
1068 (* atomLogo? *)
1069 let logo =
1070 match find (function `Logo _ -> true | _ -> false) l with
1071 | Some (`Logo l) -> Some l
1072 | _ -> None
1073 in
1074 (* atomRights? *)
1075 let rights =
1076 match find (function `Rights _ -> true | _ -> false) l with
1077 | Some (`Rights r) -> Some r
1078 | _ -> None
1079 in
1080 (* atomSubtitle? *)
1081 let subtitle =
1082 match find (function `Subtitle _ -> true | _ -> false) l with
1083 | Some (`Subtitle s) -> Some s
1084 | _ -> None
1085 in
1086 (* atomTitle *)
1087 let title =
1088 match find (function `Title _ -> true | _ -> false) l with
1089 | Some (`Title t) -> t
1090 | _ ->
1091 raise
1092 (Error.Error
1093 (pos, "<feed> elements MUST contains exactly one <title> elements"))
1094 in
1095 (* atomUpdated *)
1096 let updated =
1097 match find (function `Updated _ -> true | _ -> false) l with
1098 | Some (`Updated u) -> u
1099 | _ ->
1100 raise
1101 (Error.Error
1102 ( pos
1103 , "<feed> elements MUST contains exactly one <updated> elements"
1104 ))
1105 in
1106 (* atomEntry* *)
1107 let fix_author _pos (e : entry) =
1108 match e.authors with
1109 | a, [] when a.name = dummy_name -> (
1110 (* In an Atom Feed Document, the atom:author elements of the containing
1111 atom:feed element are considered to apply to the entry if there are no
1112 atom:author elements in the locations described above.
1113 http://tools.ietf.org/html/rfc4287#section-4.2.1 *)
1114 match authors with
1115 | a0 :: a -> {e with authors= (a0, a)}
1116 | [] ->
1117 (* RFC 4287 requires at least one author, but many real-world feeds
1118 omit this. Be lenient and use an empty author rather than failing. *)
1119 {e with authors= (empty_author, [])} )
1120 | _ -> e
1121 in
1122 let entries =
1123 List.fold_left
1124 (fun acc -> function `Entry (pos, e) -> fix_author pos e :: acc
1125 | _ -> acc )
1126 [] l
1127 in
1128 ( { authors
1129 ; categories
1130 ; contributors
1131 ; generator
1132 ; icon
1133 ; id
1134 ; links
1135 ; logo
1136 ; rights
1137 ; subtitle
1138 ; title
1139 ; updated
1140 ; entries }
1141 : feed )
1142
1143(* atomFeed = element atom:feed { atomCommonAttributes, (atomAuthor* &
1144 atomCategory* & atomContributor* & atomGenerator? & atomIcon? & atomId &
1145 atomLink* & atomLogo? & atomRights? & atomSubtitle? & atomTitle &
1146 atomUpdated & extensionElement * ), atomEntry* } *)
1147
1148let feed_of_xml =
1149 let data_producer =
1150 [ ("author", author_of_xml)
1151 ; ("category", category_of_xml)
1152 ; ("contributor", contributor_of_xml)
1153 ; ("generator", generator_of_xml)
1154 ; ("icon", icon_of_xml); ("id", id_of_xml); ("link", link_of_xml)
1155 ; ("logo", logo_of_xml); ("rights", rights_of_xml)
1156 ; ("subtitle", subtitle_of_xml)
1157 ; ("title", title_of_xml)
1158 ; ("updated", updated_of_xml)
1159 ; ("entry", entry_of_xml) ]
1160 in
1161 generate_catcher ~namespaces ~data_producer make_feed
1162
1163let feed_of_xml' =
1164 let data_producer =
1165 [ ("author", author_of_xml')
1166 ; ("category", category_of_xml')
1167 ; ("contributor", contributor_of_xml')
1168 ; ("generator", generator_of_xml')
1169 ; ("icon", icon_of_xml'); ("id", id_of_xml'); ("link", link_of_xml')
1170 ; ("logo", logo_of_xml'); ("rights", rights_of_xml')
1171 ; ("subtitle", subtitle_of_xml')
1172 ; ("title", title_of_xml')
1173 ; ("updated", updated_of_xml')
1174 ; ("entry", entry_of_xml') ]
1175 in
1176 generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> x)
1177
1178(* Remove all tags *)
1179let rec add_to_buffer buf = function
1180 | XML.Node (_, _, subs) -> List.iter (add_to_buffer buf) subs
1181 | XML.Data (_, d) -> Buffer.add_string buf d
1182
1183let xhtml_to_string xhtml =
1184 let buf = Buffer.create 128 in
1185 List.iter (add_to_buffer buf) xhtml ;
1186 Buffer.contents buf
1187
1188let string_of_text_construct = function
1189 (* FIXME: Once we use a proper HTML library, we probably would like to parse
1190 the HTML and remove the tags *)
1191 | (Text s : text_construct) | Html (_, s) -> s
1192 | Xhtml (_, x) -> xhtml_to_string x
1193
1194let parse ?self ?xmlbase input =
1195 let feed =
1196 match XML.of_xmlm input |> snd with
1197 | XML.Node (pos, tag, datas) when tag_is tag "feed" ->
1198 feed_of_xml ~xmlbase (pos, tag, datas)
1199 | _ ->
1200 raise
1201 (Error.Error
1202 ((0, 0), "document MUST contains exactly one <feed> element"))
1203 in
1204 (* FIXME: the spec says that an entry can appear as the top-level element *)
1205 match self with
1206 | None -> feed
1207 | Some self ->
1208 if List.exists (fun l -> l.rel = Self) feed.links then feed
1209 else
1210 let links =
1211 { href= self
1212 ; rel= Self
1213 ; type_media= Some "application/atom+xml"
1214 ; hreflang= None
1215 ; title= string_of_text_construct feed.title
1216 ; length= None }
1217 :: feed.links
1218 in
1219 {feed with links}
1220
1221let read ?self ?xmlbase fname =
1222 let fh = open_in fname in
1223 try
1224 let x = parse ?self ?xmlbase (XML.input_of_channel fh) in
1225 close_in fh ; x
1226 with e -> close_in fh ; raise e
1227
1228let set_self_link feed ?hreflang ?length url =
1229 match List.partition (fun l -> l.rel = Self) feed.links with
1230 | l :: _, links ->
1231 let hreflang =
1232 match hreflang with None -> l.hreflang | Some _ -> hreflang
1233 in
1234 let length = match length with None -> l.length | Some _ -> length in
1235 let self = {l with href= url; hreflang; length} in
1236 {feed with links= self :: links}
1237 | [], links ->
1238 let links =
1239 { href= url
1240 ; rel= Self
1241 ; type_media= Some "application/atom+xml"
1242 ; hreflang
1243 ; title= string_of_text_construct feed.title
1244 ; length }
1245 :: links
1246 in
1247 {feed with links}
1248
1249let get_self_link feed =
1250 try Some (List.find (fun l -> l.rel = Self) feed.links) with Not_found ->
1251 None
1252
1253let unsafe ?xmlbase input =
1254 match XML.of_xmlm input |> snd with
1255 | XML.Node (pos, tag, datas) when tag_is tag "feed" ->
1256 `Feed (feed_of_xml' ~xmlbase (pos, tag, datas))
1257 | _ -> `Feed []
1258
1259let remove_empty_authors a = List.filter not_empty_author a
1260
1261(* [normalize_authors a authors] returns (a', authors') where [authors'] is
1262 [authors] where the empty authors and the author [a] have been removed and
1263 [a'] is [a] possibly completed with the information found for [a] in
1264 [authors]. *)
1265let rec normalize_authors (a : author) = function
1266 | [] -> (a, [])
1267 | a0 :: tl ->
1268 if not_empty_author a0 then
1269 if a0.name = a.name then
1270 (* Merge [a0] and [a]. *)
1271 let uri = match a.uri with None -> a0.uri | Some _ -> a.uri in
1272 let email =
1273 match a.email with None -> a0.email | Some _ -> a.email
1274 in
1275 normalize_authors {name= a.name; uri; email} tl
1276 else
1277 let a', authors' = normalize_authors a tl in
1278 (a', a0 :: authors')
1279 else normalize_authors a tl
1280
1281(* drop the empty author *)
1282
1283let set_main_author_entry author (e : entry) =
1284 (* If the entry has a source, then [author] should be ignored and the one
1285 from the [source] should be used instead. *)
1286 let author, author_ok, source =
1287 match e.source with
1288 | None -> (author, true, None)
1289 | Some s -> (
1290 let s_authors = remove_empty_authors s.authors in
1291 let s_contributors = remove_empty_authors s.contributors in
1292 let s =
1293 Some {s with authors= s_authors; contributors= s_contributors}
1294 in
1295 (* A source exists. If it contains no author, one should not change the
1296 entry authors with [author] because that may wrongly attribute the
1297 post. *)
1298 match s_authors with
1299 | [] -> (author, false, s)
1300 | s_author :: _ -> (s_author, true, s) )
1301 in
1302 let a0, a = e.authors in
1303 let authors =
1304 match remove_empty_authors (a0 :: a) with
1305 | a0 :: a -> (a0, a)
1306 | [] -> ((if author_ok then author else empty_author), [])
1307 in
1308 let contributors = remove_empty_authors e.contributors in
1309 {e with authors; contributors; source}
1310
1311let set_main_author feed author =
1312 let author, authors = normalize_authors author feed.authors in
1313 let contributors = remove_empty_authors feed.contributors in
1314 let entries = List.map (set_main_author_entry author) feed.entries in
1315 {feed with authors= author :: authors; contributors; entries}
1316
1317(* Conversion to XML *)
1318
1319(* Tag with the Atom namespace *)
1320let atom name : XML.tag = ((atom_ns, name), [])
1321
1322let add_attr_xmlbase ~xmlbase attrs =
1323 match xmlbase with
1324 | Some u -> ((Xmlm.ns_xml, "base"), Uri.to_string u) :: attrs
1325 | None -> attrs
1326
1327let text_construct_to_xml tag_name (t : text_construct) =
1328 match t with
1329 | Text t ->
1330 XML.Node
1331 ( dummy_pos
1332 , ((atom_ns, tag_name), [(("", "type"), "text")])
1333 , [XML.Data (dummy_pos, t)] )
1334 | Html (xmlbase, t) ->
1335 let attr = add_attr_xmlbase ~xmlbase [(("", "type"), "html")] in
1336 XML.Node
1337 (dummy_pos, ((atom_ns, tag_name), attr), [XML.Data (dummy_pos, t)])
1338 | Xhtml (xmlbase, x) ->
1339 let div =
1340 XML.Node
1341 (dummy_pos, ((xhtml_ns, "div"), [(("", "xmlns"), xhtml_ns)]), x)
1342 in
1343 let attr = add_attr_xmlbase ~xmlbase [(("", "type"), "xhtml")] in
1344 XML.Node (dummy_pos, ((atom_ns, tag_name), attr), [div])
1345
1346let person_to_xml name (a : author) =
1347 XML.Node
1348 ( dummy_pos
1349 , atom name
1350 , [node_data (atom "name") a.name]
1351 |> add_node_uri (atom "uri") a.uri
1352 |> add_node_data (atom "email") a.email )
1353
1354let author_to_xml a = person_to_xml "author" a
1355let contributor_to_xml a = person_to_xml "contributor" a
1356
1357let category_to_xml (c : category) =
1358 let attrs =
1359 [(("", "term"), c.term)]
1360 |> add_attr_uri ("", "scheme") c.scheme
1361 |> add_attr ("", "label") c.label
1362 in
1363 XML.Node (dummy_pos, ((atom_ns, "category"), attrs), [])
1364
1365let generator_to_xml (g : generator) =
1366 let attr =
1367 [] |> add_attr ("", "version") g.version |> add_attr_uri ("", "uri") g.uri
1368 in
1369 XML.Node
1370 ( dummy_pos
1371 , ((atom_ns, "generator"), attr)
1372 , [XML.Data (dummy_pos, g.content)] )
1373
1374let string_of_rel = function
1375 | Alternate -> "alternate"
1376 | Related -> "related"
1377 | Self -> "self"
1378 | Enclosure -> "enclosure"
1379 | Via -> "via"
1380 | Link l -> Uri.to_string l
1381
1382let link_to_xml (l : link) =
1383 let attr =
1384 [(("", "href"), Uri.to_string l.href); (("", "rel"), string_of_rel l.rel)]
1385 |> add_attr ("", "type") l.type_media
1386 |> add_attr ("", "hreflang") l.hreflang
1387 in
1388 let attr = if l.title = "" then attr else (("", "title"), l.title) :: attr in
1389 let attr =
1390 match l.length with
1391 | Some len -> (("", "length"), string_of_int len) :: attr
1392 | None -> attr
1393 in
1394 XML.Node (dummy_pos, ((atom_ns, "link"), attr), [])
1395
1396let add_node_date tag date nodes =
1397 match date with
1398 | None -> nodes
1399 | Some d -> node_data tag (Date.to_rfc3339 d) :: nodes
1400
1401let source_to_xml (s : source) =
1402 let nodes =
1403 node_data (atom "id") (Uri.to_string s.id)
1404 :: text_construct_to_xml "title" s.title
1405 :: List.map author_to_xml s.authors
1406 |> add_nodes_rev_map category_to_xml s.categories
1407 |> add_nodes_rev_map contributor_to_xml s.contributors
1408 |> add_node_option generator_to_xml s.generator
1409 |> add_node_option (node_uri (atom "icon")) s.icon
1410 |> add_nodes_rev_map link_to_xml s.links
1411 |> add_node_option (node_uri (atom "logo")) s.logo
1412 |> add_node_option (text_construct_to_xml "rights") s.rights
1413 |> add_node_option (text_construct_to_xml "subtitle") s.subtitle
1414 |> add_node_date (atom "updated") s.updated
1415 in
1416 XML.Node (dummy_pos, atom "source", nodes)
1417
1418let content_to_xml (c : content) =
1419 match c with
1420 | Text t ->
1421 XML.Node
1422 ( dummy_pos
1423 , ((atom_ns, "content"), [(("", "type"), "text")])
1424 , [XML.Data (dummy_pos, t)] )
1425 | Html (xmlbase, t) ->
1426 let attrs = add_attr_xmlbase ~xmlbase [(("", "type"), "html")] in
1427 XML.Node
1428 (dummy_pos, ((atom_ns, "content"), attrs), [XML.Data (dummy_pos, t)])
1429 | Xhtml (xmlbase, x) ->
1430 let div =
1431 XML.Node
1432 (dummy_pos, ((xhtml_ns, "div"), [(("", "xmlns"), xhtml_ns)]), x)
1433 in
1434 let attrs = add_attr_xmlbase ~xmlbase [(("", "type"), "xhtml")] in
1435 XML.Node (dummy_pos, ((atom_ns, "content"), attrs), [div])
1436 | Mime (mime, d) ->
1437 XML.Node
1438 ( dummy_pos
1439 , ((atom_ns, "content"), [(("", "type"), mime)])
1440 , [XML.Data (dummy_pos, d)] )
1441 | Src (mime, uri) ->
1442 let attr =
1443 [(("", "src"), Uri.to_string uri)] |> add_attr ("", "type") mime
1444 in
1445 XML.Node (dummy_pos, ((atom_ns, "content"), attr), [])
1446
1447let entry_to_xml (e : entry) =
1448 let a0, a = e.authors in
1449 let nodes =
1450 node_data (atom "id") (Uri.to_string e.id)
1451 :: text_construct_to_xml "title" e.title
1452 :: node_data (atom "updated") (Date.to_rfc3339 e.updated)
1453 :: author_to_xml a0
1454 :: List.map author_to_xml a
1455 |> add_nodes_rev_map category_to_xml e.categories
1456 |> add_node_option content_to_xml e.content
1457 |> add_nodes_rev_map contributor_to_xml e.contributors
1458 |> add_nodes_rev_map link_to_xml e.links
1459 |> add_node_date (atom "published") e.published
1460 |> add_node_option (text_construct_to_xml "rights") e.rights
1461 |> add_node_option source_to_xml e.source
1462 |> add_node_option (text_construct_to_xml "summary") e.summary
1463 in
1464 XML.Node (dummy_pos, atom "entry", nodes)
1465
1466let to_xml (f : feed) =
1467 let nodes =
1468 node_data (atom "id") (Uri.to_string f.id)
1469 :: text_construct_to_xml "title" f.title
1470 :: node_data (atom "updated") (Date.to_rfc3339 f.updated)
1471 :: List.map entry_to_xml f.entries
1472 |> add_nodes_rev_map author_to_xml (List.rev f.authors)
1473 |> add_nodes_rev_map category_to_xml f.categories
1474 |> add_nodes_rev_map contributor_to_xml f.contributors
1475 |> add_node_option generator_to_xml f.generator
1476 |> add_node_option (node_uri (atom "icon")) f.icon
1477 |> add_nodes_rev_map link_to_xml f.links
1478 |> add_node_option (node_uri (atom "logo")) f.logo
1479 |> add_node_option (text_construct_to_xml "rights") f.rights
1480 |> add_node_option (text_construct_to_xml "subtitle") f.subtitle
1481 in
1482 XML.Node (dummy_pos, ((atom_ns, "feed"), [(("", "xmlns"), atom_ns)]), nodes)
1483
1484(* Atom and XHTML have been declared well in the above XML representation. One
1485 can remove them. *)
1486let output_ns_prefix s = if s = atom_ns || s = xhtml_ns then Some "" else None
1487
1488let output feed dest =
1489 let o = XML.make_output dest ~ns_prefix:output_ns_prefix in
1490 XML.to_xmlm (to_xml feed) o
1491
1492let write feed fname =
1493 let fh = open_out fname in
1494 try
1495 output feed (`Channel fh) ;
1496 close_out fh
1497 with e -> close_out fh ; raise e
1498
1499(* Comparing entries *)
1500
1501let entry_date e = match e.published with Some d -> d | None -> e.updated
1502
1503let ascending (e1 : entry) (e2 : entry) =
1504 Date.compare (entry_date e1) (entry_date e2)
1505
1506let descending (e1 : entry) (e2 : entry) =
1507 Date.compare (entry_date e2) (entry_date e1)
1508
1509(* Feed aggregation *)
1510
1511let syndic_generator =
1512 { version= Some Syndic_conf.version
1513 ; uri= Some Syndic_conf.homepage
1514 ; content= "OCaml Syndic.Atom feed aggregator" }
1515
1516let ocaml_icon = Uri.of_string "http://ocaml.org/img/colour-icon-170x148.png"
1517let default_title : text_construct = Text "Syndic.Atom aggregated feed"
1518
1519let[@warning "-32"] is_alternate_Atom (l : link) =
1520 match l.type_media with
1521 | None -> false
1522 | Some ty -> ty = "application/atom+xml" && l.rel = Alternate
1523
1524let add_entries_of_feed entries feed : entry list =
1525 let source_of_feed =
1526 Some
1527 { authors= feed.authors
1528 ; categories= feed.categories
1529 ; contributors= feed.contributors
1530 ; generator= feed.generator
1531 ; icon= feed.icon
1532 ; id= feed.id
1533 ; links= feed.links
1534 ; logo= feed.logo
1535 ; rights= feed.rights
1536 ; subtitle= feed.subtitle
1537 ; title= feed.title
1538 ; updated= Some feed.updated }
1539 in
1540 let add_entry entries (e : entry) =
1541 match e.source with
1542 | Some _ -> e :: entries (* if a source is present, do not overwrite it. *)
1543 | None -> {e with source= source_of_feed} :: entries
1544 in
1545 List.fold_left add_entry entries feed.entries
1546
1547let entries_of_feeds feeds = List.fold_left add_entries_of_feed [] feeds
1548
1549let more_recent d1 (e : entry) =
1550 if Date.compare d1 e.updated >= 0 then d1 else e.updated
1551
1552let aggregate ?self ?id ?updated ?subtitle ?(title = default_title)
1553 ?(sort = `Newest_first) ?n feeds : feed =
1554 let entries = entries_of_feeds feeds in
1555 let entries =
1556 match sort with
1557 | `Newest_first -> List.sort descending entries
1558 | `Oldest_first -> List.sort ascending entries
1559 | `None -> entries
1560 in
1561 let entries = match n with Some n -> take entries n | None -> entries in
1562 let id =
1563 match id with
1564 | Some id -> id
1565 | None ->
1566 (* Collect all ids of the entries and "digest" them. *)
1567 let b = Buffer.create 4096 in
1568 let add_id (e : entry) = Buffer.add_string b (Uri.to_string e.id) in
1569 List.iter add_id entries ;
1570 let d = Digest.to_hex (Digest.string (Buffer.contents b)) in
1571 (* FIXME: use urn:uuid *)
1572 Uri.of_string ("urn:md5:" ^ d)
1573 in
1574 let links =
1575 match self with
1576 | Some u ->
1577 [ link u
1578 ~title:(string_of_text_construct title)
1579 ~rel:Self ~type_media:"application/atom+xml" ]
1580 | None -> []
1581 in
1582 let updated =
1583 match updated with
1584 | Some d -> d
1585 | None -> (
1586 (* Use the more recent date of the entries. *)
1587 match entries with
1588 | [] -> Date.epoch
1589 | e0 :: el -> List.fold_left more_recent e0.updated el )
1590 in
1591 { authors= []
1592 ; categories= []
1593 ; contributors= []
1594 ; generator= Some syndic_generator
1595 ; icon= Some ocaml_icon
1596 ; id
1597 ; links
1598 ; logo= None
1599 ; rights= None
1600 ; subtitle
1601 ; title
1602 ; updated
1603 ; entries }