My agentic slop goes here. Not intended for anyone else!
at main 50 kB view raw
1open Syndic_common.XML 2open Syndic_common.Util 3module XML = Syndic_xml 4module Error = Syndic_error 5module Date = Syndic_date 6 7let atom_ns = "http://www.w3.org/2005/Atom" 8let xhtml_ns = "http://www.w3.org/1999/xhtml" 9let namespaces = [atom_ns] 10 11type rel = Alternate | Related | Self | Enclosure | Via | Link of Uri.t 12 13type link = 14 { href: Uri.t 15 ; rel: rel 16 ; type_media: string option 17 ; hreflang: string option 18 ; title: string 19 ; length: int option } 20 21let link ?type_media ?hreflang ?(title = "") ?length ?(rel = Alternate) href = 22 {href; rel; type_media; hreflang; title; length} 23 24type link' = 25 [ `HREF of Uri.t 26 | `Rel of string 27 | `Type of string 28 | `HREFLang of string 29 | `Title of string 30 | `Length of string ] 31 32(* The actual XML content is supposed to be inside a <div> which is NOT part of 33 the content. *) 34let rec get_xml_content xml0 = function 35 | XML.Data (_, s) :: tl -> 36 if only_whitespace s then get_xml_content xml0 tl 37 else xml0 (* unexpected *) 38 | XML.Node (_pos, tag, data) :: tl when tag_is tag "div" -> 39 let is_space = 40 List.for_all 41 (function XML.Data (_, s) -> only_whitespace s | _ -> false) 42 tl 43 in 44 if is_space then data else xml0 45 | _ -> xml0 46 47let no_namespace = Some "" 48let rm_namespace _ = no_namespace 49 50(* For HTML, the spec says the whole content needs to be escaped 51 http://tools.ietf.org/html/rfc4287#section-3.1.1.2 (some feeds use <![CDATA[ 52 ]]>) so a single data item should be present. If not, assume the HTML was 53 properly parsed and convert it back to a string as it should. *) 54let get_html_content html = 55 match html with 56 | [XML.Data (_, d)] -> d 57 | h -> 58 (* It is likely that, when the HTML was parsed, the Atom namespace was 59 applied. Remove it. *) 60 String.concat "" (List.map (XML.to_string ~ns_prefix:rm_namespace) h) 61 62type text_construct = 63 | Text of string 64 | Html of Uri.t option * string 65 | Xhtml of Uri.t option * XML.t list 66 67let text_construct_of_xml ~xmlbase 68 ((_pos, (_tag, attr), data) : XML.pos * XML.tag * t list) = 69 let xmlbase = xmlbase_of_attr ~xmlbase attr in 70 match find (fun a -> attr_is a "type") attr with 71 | Some (_, "html") -> Html (xmlbase, get_html_content data) 72 | Some (_, "application/xhtml+xml") | Some (_, "xhtml") -> 73 Xhtml (xmlbase, get_xml_content data data) 74 | _ -> Text (get_leaf data) 75 76type author = {name: string; uri: Uri.t option; email: string option} 77 78let empty_author = {name= ""; uri= None; email= None} 79let not_empty_author a = a.name <> "" || a.uri <> None || a.email <> None 80let author ?uri ?email name = {uri; email; name} 81 82type person' = [`Name of string | `URI of Uri.t | `Email of string] 83 84let make_person datas ~pos:_ (l : [< person'] list) = 85 (* element atom:name { text } *) 86 let name = 87 match find (function `Name _ -> true | _ -> false) l with 88 | Some (`Name s) -> s 89 | _ -> 90 (* The spec mandates that <author><name>name</name></author> but 91 several feeds just do <author>name</author> *) 92 get_leaf datas 93 in 94 (* element atom:uri { atomUri }? *) 95 let uri = 96 match find (function `URI _ -> true | _ -> false) l with 97 | Some (`URI u) -> Some u 98 | _ -> None 99 in 100 (* element atom:email { atomEmailAddress }? *) 101 let email = 102 match find (function `Email _ -> true | _ -> false) l with 103 | Some (`Email e) -> Some e 104 | _ -> None 105 in 106 ({name; uri; email} : author) 107 108let make_author datas ~pos a = `Author (make_person datas ~pos a) 109 110let person_name_of_xml ~xmlbase:_ (_pos, _tag, datas) = 111 `Name (try get_leaf datas with Not_found -> "") 112 113(* mandatory ? *) 114 115let person_uri_of_xml ~xmlbase (pos, _tag, datas) = 116 try `URI (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 117 with Not_found -> 118 raise 119 (Error.Error (pos, "The content of <uri> MUST be a non-empty string")) 120 121let person_email_of_xml ~xmlbase:_ (_pos, _tag, datas) = 122 `Email (try get_leaf datas with Not_found -> "") 123 124(* mandatory ? *) 125 126(* {[ atomAuthor = element atom:author { atomPersonConstruct } ]} where 127 128 atomPersonConstruct = atomCommonAttributes, (element atom:name { text } & 129 element atom:uri { atomUri }? & element atom:email { atomEmailAddress }? & 130 extensionElement * ) 131 132 This specification assigns no significance to the order of appearance of the 133 child elements in a Person construct. *) 134let person_data_producer = 135 [ ("name", person_name_of_xml) 136 ; ("uri", person_uri_of_xml) 137 ; ("email", person_email_of_xml) ] 138 139let author_of_xml ~xmlbase ((_, _, datas) as xml) = 140 generate_catcher ~namespaces ~data_producer:person_data_producer 141 (make_author datas) ~xmlbase xml 142 143type uri = Uri.t option * string 144type person = [`Email of string | `Name of string | `URI of uri] list 145 146let person_data_producer' = 147 [ ("name", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Name a)) 148 ; ("uri", dummy_of_xml ~ctor:(fun ~xmlbase a -> `URI (xmlbase, a))) 149 ; ("email", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Email a)) ] 150 151let author_of_xml' = 152 generate_catcher ~namespaces ~data_producer:person_data_producer' 153 (fun ~pos:_ x -> `Author x ) 154 155type category = {term: string; scheme: Uri.t option; label: string option} 156 157let category ?scheme ?label term = {scheme; label; term} 158 159type category' = [`Term of string | `Scheme of Uri.t | `Label of string] 160 161let make_category ~pos (l : [< category'] list) = 162 (* attribute term { text } *) 163 let term = 164 match find (function `Term _ -> true | _ -> false) l with 165 | Some (`Term t) -> t 166 | _ -> 167 raise 168 (Error.Error (pos, "Category elements MUST have a 'term' attribute")) 169 in 170 (* attribute scheme { atomUri }? *) 171 let scheme = 172 match find (function `Scheme _ -> true | _ -> false) l with 173 | Some (`Scheme u) -> Some u 174 | _ -> None 175 in 176 (* attribute label { text }? *) 177 let label = 178 match find (function `Label _ -> true | _ -> false) l with 179 | Some (`Label l) -> Some l 180 | _ -> None 181 in 182 `Category ({term; scheme; label} : category) 183 184let scheme_of_xml ~xmlbase a = `Scheme (XML.resolve ~xmlbase (Uri.of_string a)) 185 186(* atomCategory = element atom:category { atomCommonAttributes, attribute term 187 { text }, attribute scheme { atomUri }?, attribute label { text }?, 188 undefinedContent } *) 189let category_attr_producer = 190 [ ("term", fun ~xmlbase:_ a -> `Term a) 191 ; ("label", fun ~xmlbase:_ a -> `Label a) ] 192 193let category_of_xml = 194 let attr_producer = ("scheme", scheme_of_xml) :: category_attr_producer in 195 generate_catcher ~attr_producer make_category 196 197let category_of_xml' = 198 let attr_producer = 199 ("scheme", fun ~xmlbase:_ a -> `Scheme a) :: category_attr_producer 200 in 201 generate_catcher ~attr_producer (fun ~pos:_ x -> `Category x) 202 203let make_contributor datas ~pos a = `Contributor (make_person datas ~pos a) 204 205let contributor_of_xml ~xmlbase ((_, _, datas) as xml) = 206 generate_catcher ~namespaces ~data_producer:person_data_producer 207 (make_contributor datas) ~xmlbase xml 208 209let contributor_of_xml' = 210 generate_catcher ~namespaces ~data_producer:person_data_producer' 211 (fun ~pos:_ x -> `Contributor x ) 212 213type generator = {version: string option; uri: Uri.t option; content: string} 214 215let generator ?uri ?version content = {uri; version; content} 216 217type generator' = [`URI of Uri.t | `Version of string | `Content of string] 218 219let make_generator ~pos (l : [< generator'] list) = 220 (* text *) 221 let content = 222 match find (function `Content _ -> true | _ -> false) l with 223 | Some (`Content c) -> c 224 | _ -> 225 raise 226 (Error.Error 227 (pos, "The content of <generator> MUST be a non-empty string")) 228 in 229 (* attribute version { text }? *) 230 let version = 231 match find (function `Version _ -> true | _ -> false) l with 232 | Some (`Version v) -> Some v 233 | _ -> None 234 in 235 (* attribute uri { atomUri }? *) 236 let uri = 237 match find (function `URI _ -> true | _ -> false) l with 238 | Some (`URI u) -> Some u 239 | _ -> None 240 in 241 `Generator ({version; uri; content} : generator) 242 243(* URI, if present, MUST be an IRI reference [RFC3987]. The definition of "IRI" 244 excludes relative references but we resolve it anyway in case this is not 245 respected by the generator. *) 246let generator_uri_of_xml ~xmlbase a = 247 `URI (XML.resolve ~xmlbase (Uri.of_string a)) 248 249(* atomGenerator = element atom:generator { atomCommonAttributes, attribute uri 250 { atomUri }?, attribute version { text }?, text } *) 251let generator_of_xml = 252 let attr_producer = 253 [("version", fun ~xmlbase:_ a -> `Version a); ("uri", generator_uri_of_xml)] 254 in 255 let leaf_producer ~xmlbase:_ _pos data = `Content data in 256 generate_catcher ~attr_producer ~leaf_producer make_generator 257 258let generator_of_xml' = 259 let attr_producer = 260 [ ("version", fun ~xmlbase:_ a -> `Version a) 261 ; ("uri", fun ~xmlbase a -> `URI (xmlbase, a)) ] 262 in 263 let leaf_producer ~xmlbase:_ _pos data = `Content data in 264 generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Generator x) 265 266type icon = Uri.t 267 268let make_icon ~pos (l : Uri.t list) = 269 (* (atomUri) *) 270 let uri = 271 match l with 272 | u :: _ -> u 273 | [] -> 274 raise 275 (Error.Error (pos, "The content of <icon> MUST be a non-empty string")) 276 in 277 `Icon uri 278 279(* atomIcon = element atom:icon { atomCommonAttributes, } *) 280let icon_of_xml = 281 let leaf_producer ~xmlbase _pos data = 282 XML.resolve ~xmlbase (Uri.of_string data) 283 in 284 generate_catcher ~leaf_producer make_icon 285 286let icon_of_xml' = 287 let leaf_producer ~xmlbase _pos data = `URI (xmlbase, data) in 288 generate_catcher ~leaf_producer (fun ~pos:_ x -> `Icon x) 289 290type id = Uri.t 291 292let make_id ~pos (l : string list) = 293 (* (atomUri) *) 294 let id = 295 match l with 296 | u :: _ -> Uri.of_string u 297 | [] -> 298 raise 299 (Error.Error (pos, "The content of <id> MUST be a non-empty string")) 300 in 301 `ID id 302 303(* atomId = element atom:id { atomCommonAttributes, (atomUri) } *) 304let id_of_xml, id_of_xml' = 305 let leaf_producer ~xmlbase:_ _pos data = data in 306 ( generate_catcher ~leaf_producer make_id 307 , generate_catcher ~leaf_producer (fun ~pos:_ x -> `ID x) ) 308 309let rel_of_string s = 310 match String.lowercase_ascii (String.trim s) with 311 | "alternate" -> Alternate 312 | "related" -> Related 313 | "self" -> Self 314 | "enclosure" -> Enclosure 315 | "via" -> Via 316 | uri -> 317 (* RFC 4287 § 4.2.7.2: the use of a relative reference other than a 318 simple name is not allowed. Thus no need to resolve against xml:base. *) 319 Link (Uri.of_string uri) 320 321let make_link ~pos (l : [< link'] list) = 322 (* attribute href { atomUri } *) 323 let href = 324 match find (function `HREF _ -> true | _ -> false) l with 325 | Some (`HREF u) -> u 326 | _ -> 327 raise (Error.Error (pos, "Link elements MUST have a 'href' attribute")) 328 in 329 (* attribute rel { atomNCName | atomUri }? *) 330 let rel = 331 match find (function `Rel _ -> true | _ -> false) l with 332 | Some (`Rel r) -> rel_of_string r 333 | _ -> Alternate 334 (* cf. RFC 4287 § 4.2.7.2 *) 335 in 336 (* attribute type { atomMediaType }? *) 337 let type_media = 338 match find (function `Type _ -> true | _ -> false) l with 339 | Some (`Type t) -> Some t 340 | _ -> None 341 in 342 (* attribute hreflang { atomLanguageTag }? *) 343 let hreflang = 344 match find (function `HREFLang _ -> true | _ -> false) l with 345 | Some (`HREFLang l) -> Some l 346 | _ -> None 347 in 348 (* attribute title { text }? *) 349 let title = 350 match find (function `Title _ -> true | _ -> false) l with 351 | Some (`Title s) -> s 352 | _ -> "" 353 in 354 (* attribute length { text }? *) 355 let length = 356 match find (function `Length _ -> true | _ -> false) l with 357 | Some (`Length i) -> Some (int_of_string i) 358 | _ -> None 359 in 360 `Link ({href; rel; type_media; hreflang; title; length} : link) 361 362let link_href_of_xml ~xmlbase a = 363 `HREF (XML.resolve ~xmlbase (Uri.of_string a)) 364 365(* atomLink = element atom:link { atomCommonAttributes, attribute href { 366 atomUri }, attribute rel { atomNCName | atomUri }?, attribute type { 367 atomMediaType }?, attribute hreflang { atomLanguageTag }?, attribute title { 368 text }?, attribute length { text }?, undefinedContent } *) 369let link_attr_producer = 370 [ ("rel", fun ~xmlbase:_ a -> `Rel a) 371 ; ("type", fun ~xmlbase:_ a -> `Type a) 372 ; ("hreflang", fun ~xmlbase:_ a -> `HREFLang a) 373 ; ("title", fun ~xmlbase:_ a -> `Title a) 374 ; ("length", fun ~xmlbase:_ a -> `Length a) ] 375 376let link_of_xml = 377 let attr_producer = ("href", link_href_of_xml) :: link_attr_producer in 378 generate_catcher ~attr_producer make_link 379 380let link_of_xml' = 381 let attr_producer = 382 ("href", fun ~xmlbase:_ a -> `HREF a) :: link_attr_producer 383 in 384 generate_catcher ~attr_producer (fun ~pos:_ x -> `Link x) 385 386type logo = Uri.t 387 388let make_logo ~pos (l : Uri.t list) = 389 (* (atomUri) *) 390 let uri = 391 match l with 392 | u :: _ -> u 393 | [] -> 394 raise 395 (Error.Error (pos, "The content of <logo> MUST be a non-empty string")) 396 in 397 `Logo uri 398 399(* atomLogo = element atom:logo { atomCommonAttributes, (atomUri) } *) 400let logo_of_xml = 401 let leaf_producer ~xmlbase _pos data = 402 XML.resolve ~xmlbase (Uri.of_string data) 403 in 404 generate_catcher ~leaf_producer make_logo 405 406let logo_of_xml' = 407 let leaf_producer ~xmlbase _pos data = `URI (xmlbase, data) in 408 generate_catcher ~leaf_producer (fun ~pos:_ x -> `Logo x) 409 410type published = Date.t 411type published' = [`Date of string] 412 413let make_published ~pos (l : [< published'] list) = 414 (* atom:published { atomDateConstruct } *) 415 let date = 416 match find (fun (`Date _) -> true) l with 417 | Some (`Date d) -> Date.of_rfc3339 d 418 | _ -> 419 raise 420 (Error.Error 421 (pos, "The content of <published> MUST be a non-empty string")) 422 in 423 `Published date 424 425(* atomPublished = element atom:published { atomDateConstruct } *) 426let published_of_xml, published_of_xml' = 427 let leaf_producer ~xmlbase:_ _pos data = `Date data in 428 ( generate_catcher ~leaf_producer make_published 429 , generate_catcher ~leaf_producer (fun ~pos:_ x -> `Published x) ) 430 431type rights = text_construct 432 433let rights_of_xml ~xmlbase a = `Rights (text_construct_of_xml ~xmlbase a) 434 435(* atomRights = element atom:rights { atomTextConstruct } *) 436let rights_of_xml' ~xmlbase:_ 437 ((_pos, (_tag, _attr), data) : XML.pos * XML.tag * t list) = 438 `Rights data 439 440type title = text_construct 441 442let title_of_xml ~xmlbase a = `Title (text_construct_of_xml ~xmlbase a) 443 444(* atomTitle = element atom:title { atomTextConstruct } *) 445let title_of_xml' ~xmlbase:_ 446 ((_pos, (_tag, _attr), data) : XML.pos * XML.tag * t list) = 447 `Title data 448 449type subtitle = text_construct 450 451let subtitle_of_xml ~xmlbase a = `Subtitle (text_construct_of_xml ~xmlbase a) 452 453(* atomSubtitle = element atom:subtitle { atomTextConstruct } *) 454let subtitle_of_xml' ~xmlbase:_ 455 ((_pos, (_tag, _attr), data) : XML.pos * XML.tag * t list) = 456 `Subtitle data 457 458type updated = Date.t 459type updated' = [`Date of string] 460 461let make_updated ~pos (l : [< updated'] list) = 462 (* atom:updated { atomDateConstruct } *) 463 let updated = 464 match find (fun (`Date _) -> true) l with 465 | Some (`Date d) -> Date.of_rfc3339 d 466 | _ -> 467 raise 468 (Error.Error 469 (pos, "The content of <updated> MUST be a non-empty string")) 470 in 471 `Updated updated 472 473(* atomUpdated = element atom:updated { atomDateConstruct } *) 474let updated_of_xml, updated_of_xml' = 475 let leaf_producer ~xmlbase:_ _pos data = `Date data in 476 ( generate_catcher ~leaf_producer make_updated 477 , generate_catcher ~leaf_producer (fun ~pos:_ x -> `Updated x) ) 478 479type source = 480 { authors: author list 481 ; categories: category list 482 ; contributors: author list 483 ; generator: generator option 484 ; icon: icon option 485 ; id: id 486 ; links: link list 487 ; logo: logo option 488 ; rights: rights option 489 ; subtitle: subtitle option 490 ; title: title 491 ; updated: updated option } 492 493let source ?(categories = []) ?(contributors = []) ?generator ?icon 494 ?(links = []) ?logo ?rights ?subtitle ?updated ~authors ~id title = 495 { authors 496 ; categories 497 ; contributors 498 ; generator 499 ; icon 500 ; id 501 ; links 502 ; logo 503 ; rights 504 ; subtitle 505 ; title 506 ; updated } 507 508type source' = 509 [ `Author of author 510 | `Category of category 511 | `Contributor of author 512 | `Generator of generator 513 | `Icon of icon 514 | `ID of id 515 | `Link of link 516 | `Logo of logo 517 | `Subtitle of subtitle 518 | `Title of title 519 | `Rights of rights 520 | `Updated of updated ] 521 522let make_source ~pos (l : [< source'] list) = 523 (* atomAuthor* *) 524 let authors = 525 List.fold_left 526 (fun acc -> function `Author x -> x :: acc | _ -> acc) 527 [] l 528 in 529 (* atomCategory* *) 530 let categories = 531 List.fold_left 532 (fun acc -> function `Category x -> x :: acc | _ -> acc) 533 [] l 534 in 535 (* atomContributor* *) 536 let contributors = 537 List.fold_left 538 (fun acc -> function `Contributor x -> x :: acc | _ -> acc) 539 [] l 540 in 541 (* atomGenerator? *) 542 let generator = 543 match find (function `Generator _ -> true | _ -> false) l with 544 | Some (`Generator g) -> Some g 545 | _ -> None 546 in 547 (* atomIcon? *) 548 let icon = 549 match find (function `Icon _ -> true | _ -> false) l with 550 | Some (`Icon u) -> Some u 551 | _ -> None 552 in 553 (* atomId? *) 554 let id = 555 match find (function `ID _ -> true | _ -> false) l with 556 | Some (`ID i) -> i 557 | _ -> 558 raise 559 (Error.Error 560 (pos, "<source> elements MUST contains exactly one <id> elements")) 561 in 562 (* atomLink* *) 563 let links = 564 List.fold_left (fun acc -> function `Link x -> x :: acc | _ -> acc) [] l 565 in 566 (* atomLogo? *) 567 let logo = 568 match find (function `Logo _ -> true | _ -> false) l with 569 | Some (`Logo u) -> Some u 570 | _ -> None 571 in 572 (* atomRights? *) 573 let rights = 574 match find (function `Rights _ -> true | _ -> false) l with 575 | Some (`Rights r) -> Some r 576 | _ -> None 577 in 578 (* atomSubtitle? *) 579 let subtitle = 580 match find (function `Subtitle _ -> true | _ -> false) l with 581 | Some (`Subtitle s) -> Some s 582 | _ -> None 583 in 584 (* atomTitle? *) 585 let title = 586 match find (function `Title _ -> true | _ -> false) l with 587 | Some (`Title s) -> s 588 | _ -> 589 raise 590 (Error.Error 591 ( pos 592 , "<source> elements MUST contains exactly one <title> elements" 593 )) 594 in 595 (* atomUpdated? *) 596 let updated = 597 match find (function `Updated _ -> true | _ -> false) l with 598 | Some (`Updated d) -> Some d 599 | _ -> None 600 in 601 `Source 602 ( { authors 603 ; categories 604 ; contributors 605 ; generator 606 ; icon 607 ; id 608 ; links 609 ; logo 610 ; rights 611 ; subtitle 612 ; title 613 ; updated } 614 : source ) 615 616(* atomSource = element atom:source { atomCommonAttributes, (atomAuthor* & 617 atomCategory* & atomContributor* & atomGenerator? & atomIcon? & atomId? & 618 atomLink* & atomLogo? & atomRights? & atomSubtitle? & atomTitle? & 619 atomUpdated? & extensionElement * ) } *) 620let source_of_xml = 621 let data_producer = 622 [ ("author", author_of_xml) 623 ; ("category", category_of_xml) 624 ; ("contributor", contributor_of_xml) 625 ; ("generator", generator_of_xml) 626 ; ("icon", icon_of_xml); ("id", id_of_xml); ("link", link_of_xml) 627 ; ("logo", logo_of_xml); ("rights", rights_of_xml) 628 ; ("subtitle", subtitle_of_xml) 629 ; ("title", title_of_xml) 630 ; ("updated", updated_of_xml) ] 631 in 632 generate_catcher ~namespaces ~data_producer make_source 633 634let source_of_xml' = 635 let data_producer = 636 [ ("author", author_of_xml') 637 ; ("category", category_of_xml') 638 ; ("contributor", contributor_of_xml') 639 ; ("generator", generator_of_xml') 640 ; ("icon", icon_of_xml'); ("id", id_of_xml'); ("link", link_of_xml') 641 ; ("logo", logo_of_xml'); ("rights", rights_of_xml') 642 ; ("subtitle", subtitle_of_xml') 643 ; ("title", title_of_xml') 644 ; ("updated", updated_of_xml') ] 645 in 646 generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> `Source x) 647 648type mime = string 649 650type content = 651 | Text of string 652 | Html of Uri.t option * string 653 | Xhtml of Uri.t option * Syndic_xml.t list 654 | Mime of mime * string 655 | Src of mime option * Uri.t 656 657[@@@warning "-34"] 658 659type content' = [`Type of string | `SRC of string | `Data of Syndic_xml.t list] 660 661(* atomInlineTextContent = element atom:content { atomCommonAttributes, 662 attribute type { "text" | "html" }?, (text)* } 663 664 atomInlineXHTMLContent = element atom:content { atomCommonAttributes, 665 attribute type { "xhtml" }, xhtmlDiv } 666 667 atomInlineOtherContent = element atom:content { atomCommonAttributes, 668 attribute type { atomMediaType }?, (text|anyElement)* } 669 670 atomOutOfLineContent = element atom:content { atomCommonAttributes, 671 attribute type { atomMediaType }?, attribute src { atomUri }, empty } 672 673 atomContent = atomInlineTextContent | atomInlineXHTMLContent | 674 atomInlineOtherContent | atomOutOfLineContent *) 675let content_of_xml ~xmlbase 676 ((_pos, (_tag, attr), data) : XML.pos * XML.tag * t list) = 677 (* MIME ::= attribute type { "text" | "html" }? | attribute type { "xhtml" } 678 | attribute type { atomMediaType }? *) 679 (* attribute src { atomUri } | none If src s present, [data] MUST be empty. *) 680 match find (fun a -> attr_is a "src") attr with 681 | Some (_, src) -> 682 let mime = 683 match find (fun a -> attr_is a "type") attr with 684 | Some (_, ty) -> Some ty 685 | None -> None 686 in 687 `Content (Src (mime, XML.resolve ~xmlbase (Uri.of_string src))) 688 | None -> 689 (* (text)* 690 * | xhtmlDiv 691 * | (text|anyElement)* 692 * | none *) 693 `Content 694 ( match find (fun a -> attr_is a "type") attr with 695 | Some (_, "text") | None -> Text (get_leaf data) 696 | Some (_, "html") -> Html (xmlbase, get_html_content data) 697 | Some (_, "xhtml") -> Xhtml (xmlbase, get_xml_content data data) 698 | Some (_, mime) -> Mime (mime, get_leaf data) ) 699 700let content_of_xml' ~xmlbase:_ 701 ((_pos, (_tag, attr), data) : XML.pos * XML.tag * t list) = 702 let l = 703 match find (fun a -> attr_is a "src") attr with 704 | Some (_, src) -> [`SRC src] 705 | None -> [] 706 in 707 let l = 708 match find (fun a -> attr_is a "type") attr with 709 | Some (_, ty) -> `Type ty :: l 710 | None -> l 711 in 712 `Content (`Data data :: l) 713 714type summary = text_construct 715 716(* atomSummary = element atom:summary { atomTextConstruct } *) 717let summary_of_xml ~xmlbase a = `Summary (text_construct_of_xml ~xmlbase a) 718 719let summary_of_xml' ~xmlbase:_ ((_, (_, _), data) : XML.pos * XML.tag * t list) 720 = 721 `Summary data 722 723type entry = 724 { authors: author * author list 725 ; categories: category list 726 ; content: content option 727 ; contributors: author list 728 ; id: id 729 ; links: link list 730 ; published: published option 731 ; rights: rights option 732 ; source: source option 733 ; summary: summary option 734 ; title: title 735 ; updated: updated } 736 737let entry ?(categories = []) ?content ?(contributors = []) ?(links = []) 738 ?published ?rights ?source ?summary ~id ~authors ~title ~updated () = 739 { authors 740 ; categories 741 ; content 742 ; contributors 743 ; id 744 ; links 745 ; published 746 ; rights 747 ; source 748 ; summary 749 ; title 750 ; updated } 751 752type entry' = 753 [ `Author of author 754 | `Category of category 755 | `Contributor of author 756 | `ID of id 757 | `Link of link 758 | `Published of published 759 | `Rights of rights 760 | `Source of source 761 | `Content of content 762 | `Summary of summary 763 | `Title of title 764 | `Updated of updated ] 765 766module LinkOrder : Set.OrderedType with type t = string * string = struct 767 type t = string * string 768 769 let compare (a : t) (b : t) = 770 match compare (fst a) (fst b) with 0 -> compare (snd a) (snd b) | n -> n 771end 772 773module LinkSet = Set.Make (LinkOrder) 774 775let uniq_link_alternate ~pos (l : link list) = 776 let string_of_duplicate_link {href; type_media; hreflang; _} 777 (type_media', hreflang') = 778 let ty = (function Some a -> a | None -> "(none)") type_media in 779 let hl = (function Some a -> a | None -> "(none)") hreflang in 780 let ty' = (function "" -> "(none)" | s -> s) type_media' in 781 let hl' = (function "" -> "(none)" | s -> s) hreflang' in 782 Printf.sprintf 783 "Duplicate link between <link href=\"%s\" hreflang=\"%s\" type=\"%s\" \ 784 ..> and <link hreflang=\"%s\" type=\"%s\" ..>" 785 (Uri.to_string href) hl ty hl' ty' 786 in 787 let raise_error link link' = 788 raise (Error.Error (pos, string_of_duplicate_link link link')) 789 in 790 let rec aux acc = function 791 | [] -> l 792 | ({rel; type_media= Some ty; hreflang= Some hl; _} as x) :: r 793 when rel = Alternate -> 794 if LinkSet.mem (ty, hl) acc then 795 raise_error x (LinkSet.find (ty, hl) acc) 796 else aux (LinkSet.add (ty, hl) acc) r 797 | ({rel; type_media= None; hreflang= Some hl; _} as x) :: r 798 when rel = Alternate -> 799 if LinkSet.mem ("", hl) acc then 800 raise_error x (LinkSet.find ("", hl) acc) 801 else aux (LinkSet.add ("", hl) acc) r 802 | ({rel; type_media= Some ty; hreflang= None; _} as x) :: r 803 when rel = Alternate -> 804 if LinkSet.mem (ty, "") acc then 805 raise_error x (LinkSet.find (ty, "") acc) 806 else aux (LinkSet.add (ty, "") acc) r 807 | ({rel; type_media= None; hreflang= None; _} as x) :: r 808 when rel = Alternate -> 809 if LinkSet.mem ("", "") acc then 810 raise_error x (LinkSet.find ("", "") acc) 811 else aux (LinkSet.add ("", "") acc) r 812 | _ :: r -> aux acc r 813 in 814 aux LinkSet.empty l 815 816type feed' = 817 [ `Author of author 818 | `Category of category 819 | `Contributor of author 820 | `Generator of generator 821 | `Icon of icon 822 | `ID of id 823 | `Link of link 824 | `Logo of logo 825 | `Rights of rights 826 | `Subtitle of subtitle 827 | `Title of title 828 | `Updated of updated 829 | `Entry of entry ] 830 831let dummy_name = "\000" 832 833let make_entry ~pos l = 834 let authors = 835 List.fold_left 836 (fun acc -> function `Author x -> x :: acc | _ -> acc) 837 [] l 838 in 839 (* atomSource? *) 840 let sources = 841 List.fold_left 842 (fun acc -> function `Source x -> x :: acc | _ -> acc) 843 [] l 844 in 845 let source = 846 match sources with 847 | [] -> None 848 | [s] -> Some s 849 | _ -> 850 (* RFC 4287 § 4.1.2 *) 851 let msg = 852 "<entry> elements MUST NOT contain more than one <source> element." 853 in 854 raise (Error.Error (pos, msg)) 855 in 856 let authors = 857 match (authors, source) with 858 | a0 :: a, _ -> (a0, a) 859 | [], Some (s : source) -> ( 860 (* If an atom:entry element does not contain atom:author elements, then 861 the atom:author elements of the contained atom:source element are 862 considered to apply. http://tools.ietf.org/html/rfc4287#section-4.2.1 *) 863 match s.authors with 864 | a0 :: a -> (a0, a) 865 | [] -> 866 let msg = 867 "<entry> does not contain an <author> and its <source> neither does" 868 in 869 raise (Error.Error (pos, msg)) ) 870 | [], None -> ({name= dummy_name; uri= None; email= None}, []) 871 (* unacceptable value, see fix_author below *) 872 (* atomCategory* *) 873 in 874 let categories = 875 List.fold_left 876 (fun acc -> function `Category x -> x :: acc | _ -> acc) 877 [] l 878 (* atomContributor* *) 879 in 880 let contributors = 881 List.fold_left 882 (fun acc -> function `Contributor x -> x :: acc | _ -> acc) 883 [] l 884 in 885 (* atomId *) 886 let id = 887 match find (function `ID _ -> true | _ -> false) l with 888 | Some (`ID i) -> i 889 | _ -> 890 raise 891 (Error.Error 892 (pos, "<entry> elements MUST contains exactly one <id> elements")) 893 (* atomLink* *) 894 in 895 let links = 896 List.fold_left (fun acc -> function `Link x -> x :: acc | _ -> acc) [] l 897 in 898 (* atomPublished? *) 899 let published = 900 match find (function `Published _ -> true | _ -> false) l with 901 | Some (`Published s) -> Some s 902 | _ -> None 903 in 904 (* atomRights? *) 905 let rights = 906 match find (function `Rights _ -> true | _ -> false) l with 907 | Some (`Rights r) -> Some r 908 | _ -> None 909 in 910 (* atomContent? *) 911 let content = 912 match find (function `Content _ -> true | _ -> false) l with 913 | Some (`Content c) -> Some c 914 | _ -> None 915 in 916 (* atomSummary? *) 917 let summary = 918 match find (function `Summary _ -> true | _ -> false) l with 919 | Some (`Summary s) -> Some s 920 | _ -> None 921 in 922 (* atomTitle *) 923 let title = 924 match find (function `Title _ -> true | _ -> false) l with 925 | Some (`Title t) -> t 926 | _ -> 927 raise 928 (Error.Error 929 ( pos 930 , "<entry> elements MUST contains exactly one <title> elements" )) 931 in 932 (* atomUpdated *) 933 let updated = 934 match find (function `Updated _ -> true | _ -> false) l with 935 | Some (`Updated u) -> u 936 | _ -> 937 raise 938 (Error.Error 939 ( pos 940 , "<entry> elements MUST contains exactly one <updated> elements" 941 )) 942 in 943 `Entry 944 ( pos 945 , ( { authors 946 ; categories 947 ; content 948 ; contributors 949 ; id 950 ; links= uniq_link_alternate ~pos links 951 ; published 952 ; rights 953 ; source 954 ; summary 955 ; title 956 ; updated } 957 : entry ) ) 958 959(* atomEntry = element atom:entry { atomCommonAttributes, (atomAuthor* & 960 atomCategory* & atomContent? & atomContributor* & atomId & atomLink* & 961 atomPublished? & atomRights? & atomSource? & atomSummary? & atomTitle & 962 atomUpdated & extensionElement * ) } *) 963let entry_of_xml = 964 let data_producer = 965 [ ("author", author_of_xml) 966 ; ("category", category_of_xml) 967 ; ("contributor", contributor_of_xml) 968 ; ("id", id_of_xml); ("link", link_of_xml) 969 ; ("published", published_of_xml) 970 ; ("rights", rights_of_xml); ("source", source_of_xml) 971 ; ("content", content_of_xml) 972 ; ("summary", summary_of_xml) 973 ; ("title", title_of_xml) 974 ; ("updated", updated_of_xml) ] 975 in 976 generate_catcher ~namespaces ~data_producer make_entry 977 978let entry_of_xml' = 979 let data_producer = 980 [ ("author", author_of_xml') 981 ; ("category", category_of_xml') 982 ; ("contributor", contributor_of_xml') 983 ; ("id", id_of_xml'); ("link", link_of_xml') 984 ; ("published", published_of_xml') 985 ; ("rights", rights_of_xml'); ("source", source_of_xml') 986 ; ("content", content_of_xml') 987 ; ("summary", summary_of_xml') 988 ; ("title", title_of_xml') 989 ; ("updated", updated_of_xml') ] 990 in 991 generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> `Entry x) 992 993type feed = 994 { authors: author list 995 ; categories: category list 996 ; contributors: author list 997 ; generator: generator option 998 ; icon: icon option 999 ; id: id 1000 ; links: link list 1001 ; logo: logo option 1002 ; rights: rights option 1003 ; subtitle: subtitle option 1004 ; title: title 1005 ; updated: updated 1006 ; entries: entry list } 1007 1008let feed ?(authors = []) ?(categories = []) ?(contributors = []) ?generator 1009 ?icon ?(links = []) ?logo ?rights ?subtitle ~id ~title ~updated entries = 1010 { authors 1011 ; categories 1012 ; contributors 1013 ; generator 1014 ; icon 1015 ; id 1016 ; links 1017 ; logo 1018 ; rights 1019 ; subtitle 1020 ; title 1021 ; updated 1022 ; entries } 1023 1024let make_feed ~pos (l : _ list) = 1025 (* atomAuthor* *) 1026 let authors = 1027 List.fold_left 1028 (fun acc -> function `Author x -> x :: acc | _ -> acc) 1029 [] l 1030 in 1031 (* atomCategory* *) 1032 let categories = 1033 List.fold_left 1034 (fun acc -> function `Category x -> x :: acc | _ -> acc) 1035 [] l 1036 in 1037 (* atomContributor* *) 1038 let contributors = 1039 List.fold_left 1040 (fun acc -> function `Contributor x -> x :: acc | _ -> acc) 1041 [] l 1042 in 1043 (* atomLink* *) 1044 let links = 1045 List.fold_left (fun acc -> function `Link x -> x :: acc | _ -> acc) [] l 1046 in 1047 (* atomGenerator? *) 1048 let generator = 1049 match find (function `Generator _ -> true | _ -> false) l with 1050 | Some (`Generator g) -> Some g 1051 | _ -> None 1052 in 1053 (* atomIcon? *) 1054 let icon = 1055 match find (function `Icon _ -> true | _ -> false) l with 1056 | Some (`Icon i) -> Some i 1057 | _ -> None 1058 in 1059 (* atomId *) 1060 let id = 1061 match find (function `ID _ -> true | _ -> false) l with 1062 | Some (`ID i) -> i 1063 | _ -> 1064 raise 1065 (Error.Error 1066 (pos, "<feed> elements MUST contains exactly one <id> elements")) 1067 in 1068 (* atomLogo? *) 1069 let logo = 1070 match find (function `Logo _ -> true | _ -> false) l with 1071 | Some (`Logo l) -> Some l 1072 | _ -> None 1073 in 1074 (* atomRights? *) 1075 let rights = 1076 match find (function `Rights _ -> true | _ -> false) l with 1077 | Some (`Rights r) -> Some r 1078 | _ -> None 1079 in 1080 (* atomSubtitle? *) 1081 let subtitle = 1082 match find (function `Subtitle _ -> true | _ -> false) l with 1083 | Some (`Subtitle s) -> Some s 1084 | _ -> None 1085 in 1086 (* atomTitle *) 1087 let title = 1088 match find (function `Title _ -> true | _ -> false) l with 1089 | Some (`Title t) -> t 1090 | _ -> 1091 raise 1092 (Error.Error 1093 (pos, "<feed> elements MUST contains exactly one <title> elements")) 1094 in 1095 (* atomUpdated *) 1096 let updated = 1097 match find (function `Updated _ -> true | _ -> false) l with 1098 | Some (`Updated u) -> u 1099 | _ -> 1100 raise 1101 (Error.Error 1102 ( pos 1103 , "<feed> elements MUST contains exactly one <updated> elements" 1104 )) 1105 in 1106 (* atomEntry* *) 1107 let fix_author _pos (e : entry) = 1108 match e.authors with 1109 | a, [] when a.name = dummy_name -> ( 1110 (* In an Atom Feed Document, the atom:author elements of the containing 1111 atom:feed element are considered to apply to the entry if there are no 1112 atom:author elements in the locations described above. 1113 http://tools.ietf.org/html/rfc4287#section-4.2.1 *) 1114 match authors with 1115 | a0 :: a -> {e with authors= (a0, a)} 1116 | [] -> 1117 (* RFC 4287 requires at least one author, but many real-world feeds 1118 omit this. Be lenient and use an empty author rather than failing. *) 1119 {e with authors= (empty_author, [])} ) 1120 | _ -> e 1121 in 1122 let entries = 1123 List.fold_left 1124 (fun acc -> function `Entry (pos, e) -> fix_author pos e :: acc 1125 | _ -> acc ) 1126 [] l 1127 in 1128 ( { authors 1129 ; categories 1130 ; contributors 1131 ; generator 1132 ; icon 1133 ; id 1134 ; links 1135 ; logo 1136 ; rights 1137 ; subtitle 1138 ; title 1139 ; updated 1140 ; entries } 1141 : feed ) 1142 1143(* atomFeed = element atom:feed { atomCommonAttributes, (atomAuthor* & 1144 atomCategory* & atomContributor* & atomGenerator? & atomIcon? & atomId & 1145 atomLink* & atomLogo? & atomRights? & atomSubtitle? & atomTitle & 1146 atomUpdated & extensionElement * ), atomEntry* } *) 1147 1148let feed_of_xml = 1149 let data_producer = 1150 [ ("author", author_of_xml) 1151 ; ("category", category_of_xml) 1152 ; ("contributor", contributor_of_xml) 1153 ; ("generator", generator_of_xml) 1154 ; ("icon", icon_of_xml); ("id", id_of_xml); ("link", link_of_xml) 1155 ; ("logo", logo_of_xml); ("rights", rights_of_xml) 1156 ; ("subtitle", subtitle_of_xml) 1157 ; ("title", title_of_xml) 1158 ; ("updated", updated_of_xml) 1159 ; ("entry", entry_of_xml) ] 1160 in 1161 generate_catcher ~namespaces ~data_producer make_feed 1162 1163let feed_of_xml' = 1164 let data_producer = 1165 [ ("author", author_of_xml') 1166 ; ("category", category_of_xml') 1167 ; ("contributor", contributor_of_xml') 1168 ; ("generator", generator_of_xml') 1169 ; ("icon", icon_of_xml'); ("id", id_of_xml'); ("link", link_of_xml') 1170 ; ("logo", logo_of_xml'); ("rights", rights_of_xml') 1171 ; ("subtitle", subtitle_of_xml') 1172 ; ("title", title_of_xml') 1173 ; ("updated", updated_of_xml') 1174 ; ("entry", entry_of_xml') ] 1175 in 1176 generate_catcher ~namespaces ~data_producer (fun ~pos:_ x -> x) 1177 1178(* Remove all tags *) 1179let rec add_to_buffer buf = function 1180 | XML.Node (_, _, subs) -> List.iter (add_to_buffer buf) subs 1181 | XML.Data (_, d) -> Buffer.add_string buf d 1182 1183let xhtml_to_string xhtml = 1184 let buf = Buffer.create 128 in 1185 List.iter (add_to_buffer buf) xhtml ; 1186 Buffer.contents buf 1187 1188let string_of_text_construct = function 1189 (* FIXME: Once we use a proper HTML library, we probably would like to parse 1190 the HTML and remove the tags *) 1191 | (Text s : text_construct) | Html (_, s) -> s 1192 | Xhtml (_, x) -> xhtml_to_string x 1193 1194let parse ?self ?xmlbase input = 1195 let feed = 1196 match XML.of_xmlm input |> snd with 1197 | XML.Node (pos, tag, datas) when tag_is tag "feed" -> 1198 feed_of_xml ~xmlbase (pos, tag, datas) 1199 | _ -> 1200 raise 1201 (Error.Error 1202 ((0, 0), "document MUST contains exactly one <feed> element")) 1203 in 1204 (* FIXME: the spec says that an entry can appear as the top-level element *) 1205 match self with 1206 | None -> feed 1207 | Some self -> 1208 if List.exists (fun l -> l.rel = Self) feed.links then feed 1209 else 1210 let links = 1211 { href= self 1212 ; rel= Self 1213 ; type_media= Some "application/atom+xml" 1214 ; hreflang= None 1215 ; title= string_of_text_construct feed.title 1216 ; length= None } 1217 :: feed.links 1218 in 1219 {feed with links} 1220 1221let read ?self ?xmlbase fname = 1222 let fh = open_in fname in 1223 try 1224 let x = parse ?self ?xmlbase (XML.input_of_channel fh) in 1225 close_in fh ; x 1226 with e -> close_in fh ; raise e 1227 1228let set_self_link feed ?hreflang ?length url = 1229 match List.partition (fun l -> l.rel = Self) feed.links with 1230 | l :: _, links -> 1231 let hreflang = 1232 match hreflang with None -> l.hreflang | Some _ -> hreflang 1233 in 1234 let length = match length with None -> l.length | Some _ -> length in 1235 let self = {l with href= url; hreflang; length} in 1236 {feed with links= self :: links} 1237 | [], links -> 1238 let links = 1239 { href= url 1240 ; rel= Self 1241 ; type_media= Some "application/atom+xml" 1242 ; hreflang 1243 ; title= string_of_text_construct feed.title 1244 ; length } 1245 :: links 1246 in 1247 {feed with links} 1248 1249let get_self_link feed = 1250 try Some (List.find (fun l -> l.rel = Self) feed.links) with Not_found -> 1251 None 1252 1253let unsafe ?xmlbase input = 1254 match XML.of_xmlm input |> snd with 1255 | XML.Node (pos, tag, datas) when tag_is tag "feed" -> 1256 `Feed (feed_of_xml' ~xmlbase (pos, tag, datas)) 1257 | _ -> `Feed [] 1258 1259let remove_empty_authors a = List.filter not_empty_author a 1260 1261(* [normalize_authors a authors] returns (a', authors') where [authors'] is 1262 [authors] where the empty authors and the author [a] have been removed and 1263 [a'] is [a] possibly completed with the information found for [a] in 1264 [authors]. *) 1265let rec normalize_authors (a : author) = function 1266 | [] -> (a, []) 1267 | a0 :: tl -> 1268 if not_empty_author a0 then 1269 if a0.name = a.name then 1270 (* Merge [a0] and [a]. *) 1271 let uri = match a.uri with None -> a0.uri | Some _ -> a.uri in 1272 let email = 1273 match a.email with None -> a0.email | Some _ -> a.email 1274 in 1275 normalize_authors {name= a.name; uri; email} tl 1276 else 1277 let a', authors' = normalize_authors a tl in 1278 (a', a0 :: authors') 1279 else normalize_authors a tl 1280 1281(* drop the empty author *) 1282 1283let set_main_author_entry author (e : entry) = 1284 (* If the entry has a source, then [author] should be ignored and the one 1285 from the [source] should be used instead. *) 1286 let author, author_ok, source = 1287 match e.source with 1288 | None -> (author, true, None) 1289 | Some s -> ( 1290 let s_authors = remove_empty_authors s.authors in 1291 let s_contributors = remove_empty_authors s.contributors in 1292 let s = 1293 Some {s with authors= s_authors; contributors= s_contributors} 1294 in 1295 (* A source exists. If it contains no author, one should not change the 1296 entry authors with [author] because that may wrongly attribute the 1297 post. *) 1298 match s_authors with 1299 | [] -> (author, false, s) 1300 | s_author :: _ -> (s_author, true, s) ) 1301 in 1302 let a0, a = e.authors in 1303 let authors = 1304 match remove_empty_authors (a0 :: a) with 1305 | a0 :: a -> (a0, a) 1306 | [] -> ((if author_ok then author else empty_author), []) 1307 in 1308 let contributors = remove_empty_authors e.contributors in 1309 {e with authors; contributors; source} 1310 1311let set_main_author feed author = 1312 let author, authors = normalize_authors author feed.authors in 1313 let contributors = remove_empty_authors feed.contributors in 1314 let entries = List.map (set_main_author_entry author) feed.entries in 1315 {feed with authors= author :: authors; contributors; entries} 1316 1317(* Conversion to XML *) 1318 1319(* Tag with the Atom namespace *) 1320let atom name : XML.tag = ((atom_ns, name), []) 1321 1322let add_attr_xmlbase ~xmlbase attrs = 1323 match xmlbase with 1324 | Some u -> ((Xmlm.ns_xml, "base"), Uri.to_string u) :: attrs 1325 | None -> attrs 1326 1327let text_construct_to_xml tag_name (t : text_construct) = 1328 match t with 1329 | Text t -> 1330 XML.Node 1331 ( dummy_pos 1332 , ((atom_ns, tag_name), [(("", "type"), "text")]) 1333 , [XML.Data (dummy_pos, t)] ) 1334 | Html (xmlbase, t) -> 1335 let attr = add_attr_xmlbase ~xmlbase [(("", "type"), "html")] in 1336 XML.Node 1337 (dummy_pos, ((atom_ns, tag_name), attr), [XML.Data (dummy_pos, t)]) 1338 | Xhtml (xmlbase, x) -> 1339 let div = 1340 XML.Node 1341 (dummy_pos, ((xhtml_ns, "div"), [(("", "xmlns"), xhtml_ns)]), x) 1342 in 1343 let attr = add_attr_xmlbase ~xmlbase [(("", "type"), "xhtml")] in 1344 XML.Node (dummy_pos, ((atom_ns, tag_name), attr), [div]) 1345 1346let person_to_xml name (a : author) = 1347 XML.Node 1348 ( dummy_pos 1349 , atom name 1350 , [node_data (atom "name") a.name] 1351 |> add_node_uri (atom "uri") a.uri 1352 |> add_node_data (atom "email") a.email ) 1353 1354let author_to_xml a = person_to_xml "author" a 1355let contributor_to_xml a = person_to_xml "contributor" a 1356 1357let category_to_xml (c : category) = 1358 let attrs = 1359 [(("", "term"), c.term)] 1360 |> add_attr_uri ("", "scheme") c.scheme 1361 |> add_attr ("", "label") c.label 1362 in 1363 XML.Node (dummy_pos, ((atom_ns, "category"), attrs), []) 1364 1365let generator_to_xml (g : generator) = 1366 let attr = 1367 [] |> add_attr ("", "version") g.version |> add_attr_uri ("", "uri") g.uri 1368 in 1369 XML.Node 1370 ( dummy_pos 1371 , ((atom_ns, "generator"), attr) 1372 , [XML.Data (dummy_pos, g.content)] ) 1373 1374let string_of_rel = function 1375 | Alternate -> "alternate" 1376 | Related -> "related" 1377 | Self -> "self" 1378 | Enclosure -> "enclosure" 1379 | Via -> "via" 1380 | Link l -> Uri.to_string l 1381 1382let link_to_xml (l : link) = 1383 let attr = 1384 [(("", "href"), Uri.to_string l.href); (("", "rel"), string_of_rel l.rel)] 1385 |> add_attr ("", "type") l.type_media 1386 |> add_attr ("", "hreflang") l.hreflang 1387 in 1388 let attr = if l.title = "" then attr else (("", "title"), l.title) :: attr in 1389 let attr = 1390 match l.length with 1391 | Some len -> (("", "length"), string_of_int len) :: attr 1392 | None -> attr 1393 in 1394 XML.Node (dummy_pos, ((atom_ns, "link"), attr), []) 1395 1396let add_node_date tag date nodes = 1397 match date with 1398 | None -> nodes 1399 | Some d -> node_data tag (Date.to_rfc3339 d) :: nodes 1400 1401let source_to_xml (s : source) = 1402 let nodes = 1403 node_data (atom "id") (Uri.to_string s.id) 1404 :: text_construct_to_xml "title" s.title 1405 :: List.map author_to_xml s.authors 1406 |> add_nodes_rev_map category_to_xml s.categories 1407 |> add_nodes_rev_map contributor_to_xml s.contributors 1408 |> add_node_option generator_to_xml s.generator 1409 |> add_node_option (node_uri (atom "icon")) s.icon 1410 |> add_nodes_rev_map link_to_xml s.links 1411 |> add_node_option (node_uri (atom "logo")) s.logo 1412 |> add_node_option (text_construct_to_xml "rights") s.rights 1413 |> add_node_option (text_construct_to_xml "subtitle") s.subtitle 1414 |> add_node_date (atom "updated") s.updated 1415 in 1416 XML.Node (dummy_pos, atom "source", nodes) 1417 1418let content_to_xml (c : content) = 1419 match c with 1420 | Text t -> 1421 XML.Node 1422 ( dummy_pos 1423 , ((atom_ns, "content"), [(("", "type"), "text")]) 1424 , [XML.Data (dummy_pos, t)] ) 1425 | Html (xmlbase, t) -> 1426 let attrs = add_attr_xmlbase ~xmlbase [(("", "type"), "html")] in 1427 XML.Node 1428 (dummy_pos, ((atom_ns, "content"), attrs), [XML.Data (dummy_pos, t)]) 1429 | Xhtml (xmlbase, x) -> 1430 let div = 1431 XML.Node 1432 (dummy_pos, ((xhtml_ns, "div"), [(("", "xmlns"), xhtml_ns)]), x) 1433 in 1434 let attrs = add_attr_xmlbase ~xmlbase [(("", "type"), "xhtml")] in 1435 XML.Node (dummy_pos, ((atom_ns, "content"), attrs), [div]) 1436 | Mime (mime, d) -> 1437 XML.Node 1438 ( dummy_pos 1439 , ((atom_ns, "content"), [(("", "type"), mime)]) 1440 , [XML.Data (dummy_pos, d)] ) 1441 | Src (mime, uri) -> 1442 let attr = 1443 [(("", "src"), Uri.to_string uri)] |> add_attr ("", "type") mime 1444 in 1445 XML.Node (dummy_pos, ((atom_ns, "content"), attr), []) 1446 1447let entry_to_xml (e : entry) = 1448 let a0, a = e.authors in 1449 let nodes = 1450 node_data (atom "id") (Uri.to_string e.id) 1451 :: text_construct_to_xml "title" e.title 1452 :: node_data (atom "updated") (Date.to_rfc3339 e.updated) 1453 :: author_to_xml a0 1454 :: List.map author_to_xml a 1455 |> add_nodes_rev_map category_to_xml e.categories 1456 |> add_node_option content_to_xml e.content 1457 |> add_nodes_rev_map contributor_to_xml e.contributors 1458 |> add_nodes_rev_map link_to_xml e.links 1459 |> add_node_date (atom "published") e.published 1460 |> add_node_option (text_construct_to_xml "rights") e.rights 1461 |> add_node_option source_to_xml e.source 1462 |> add_node_option (text_construct_to_xml "summary") e.summary 1463 in 1464 XML.Node (dummy_pos, atom "entry", nodes) 1465 1466let to_xml (f : feed) = 1467 let nodes = 1468 node_data (atom "id") (Uri.to_string f.id) 1469 :: text_construct_to_xml "title" f.title 1470 :: node_data (atom "updated") (Date.to_rfc3339 f.updated) 1471 :: List.map entry_to_xml f.entries 1472 |> add_nodes_rev_map author_to_xml (List.rev f.authors) 1473 |> add_nodes_rev_map category_to_xml f.categories 1474 |> add_nodes_rev_map contributor_to_xml f.contributors 1475 |> add_node_option generator_to_xml f.generator 1476 |> add_node_option (node_uri (atom "icon")) f.icon 1477 |> add_nodes_rev_map link_to_xml f.links 1478 |> add_node_option (node_uri (atom "logo")) f.logo 1479 |> add_node_option (text_construct_to_xml "rights") f.rights 1480 |> add_node_option (text_construct_to_xml "subtitle") f.subtitle 1481 in 1482 XML.Node (dummy_pos, ((atom_ns, "feed"), [(("", "xmlns"), atom_ns)]), nodes) 1483 1484(* Atom and XHTML have been declared well in the above XML representation. One 1485 can remove them. *) 1486let output_ns_prefix s = if s = atom_ns || s = xhtml_ns then Some "" else None 1487 1488let output feed dest = 1489 let o = XML.make_output dest ~ns_prefix:output_ns_prefix in 1490 XML.to_xmlm (to_xml feed) o 1491 1492let write feed fname = 1493 let fh = open_out fname in 1494 try 1495 output feed (`Channel fh) ; 1496 close_out fh 1497 with e -> close_out fh ; raise e 1498 1499(* Comparing entries *) 1500 1501let entry_date e = match e.published with Some d -> d | None -> e.updated 1502 1503let ascending (e1 : entry) (e2 : entry) = 1504 Date.compare (entry_date e1) (entry_date e2) 1505 1506let descending (e1 : entry) (e2 : entry) = 1507 Date.compare (entry_date e2) (entry_date e1) 1508 1509(* Feed aggregation *) 1510 1511let syndic_generator = 1512 { version= Some Syndic_conf.version 1513 ; uri= Some Syndic_conf.homepage 1514 ; content= "OCaml Syndic.Atom feed aggregator" } 1515 1516let ocaml_icon = Uri.of_string "http://ocaml.org/img/colour-icon-170x148.png" 1517let default_title : text_construct = Text "Syndic.Atom aggregated feed" 1518 1519let[@warning "-32"] is_alternate_Atom (l : link) = 1520 match l.type_media with 1521 | None -> false 1522 | Some ty -> ty = "application/atom+xml" && l.rel = Alternate 1523 1524let add_entries_of_feed entries feed : entry list = 1525 let source_of_feed = 1526 Some 1527 { authors= feed.authors 1528 ; categories= feed.categories 1529 ; contributors= feed.contributors 1530 ; generator= feed.generator 1531 ; icon= feed.icon 1532 ; id= feed.id 1533 ; links= feed.links 1534 ; logo= feed.logo 1535 ; rights= feed.rights 1536 ; subtitle= feed.subtitle 1537 ; title= feed.title 1538 ; updated= Some feed.updated } 1539 in 1540 let add_entry entries (e : entry) = 1541 match e.source with 1542 | Some _ -> e :: entries (* if a source is present, do not overwrite it. *) 1543 | None -> {e with source= source_of_feed} :: entries 1544 in 1545 List.fold_left add_entry entries feed.entries 1546 1547let entries_of_feeds feeds = List.fold_left add_entries_of_feed [] feeds 1548 1549let more_recent d1 (e : entry) = 1550 if Date.compare d1 e.updated >= 0 then d1 else e.updated 1551 1552let aggregate ?self ?id ?updated ?subtitle ?(title = default_title) 1553 ?(sort = `Newest_first) ?n feeds : feed = 1554 let entries = entries_of_feeds feeds in 1555 let entries = 1556 match sort with 1557 | `Newest_first -> List.sort descending entries 1558 | `Oldest_first -> List.sort ascending entries 1559 | `None -> entries 1560 in 1561 let entries = match n with Some n -> take entries n | None -> entries in 1562 let id = 1563 match id with 1564 | Some id -> id 1565 | None -> 1566 (* Collect all ids of the entries and "digest" them. *) 1567 let b = Buffer.create 4096 in 1568 let add_id (e : entry) = Buffer.add_string b (Uri.to_string e.id) in 1569 List.iter add_id entries ; 1570 let d = Digest.to_hex (Digest.string (Buffer.contents b)) in 1571 (* FIXME: use urn:uuid *) 1572 Uri.of_string ("urn:md5:" ^ d) 1573 in 1574 let links = 1575 match self with 1576 | Some u -> 1577 [ link u 1578 ~title:(string_of_text_construct title) 1579 ~rel:Self ~type_media:"application/atom+xml" ] 1580 | None -> [] 1581 in 1582 let updated = 1583 match updated with 1584 | Some d -> d 1585 | None -> ( 1586 (* Use the more recent date of the entries. *) 1587 match entries with 1588 | [] -> Date.epoch 1589 | e0 :: el -> List.fold_left more_recent e0.updated el ) 1590 in 1591 { authors= [] 1592 ; categories= [] 1593 ; contributors= [] 1594 ; generator= Some syndic_generator 1595 ; icon= Some ocaml_icon 1596 ; id 1597 ; links 1598 ; logo= None 1599 ; rights= None 1600 ; subtitle 1601 ; title 1602 ; updated 1603 ; entries }