My agentic slop goes here. Not intended for anyone else!
at main 39 kB view raw
1open Syndic_common.XML 2open Syndic_common.Util 3module XML = Syndic_xml 4module Atom = Syndic_atom 5module Date = Syndic_date 6module Error = Syndic_error 7 8type image = 9 { url: Uri.t 10 ; title: string 11 ; link: Uri.t 12 ; width: int 13 ; (* default 88 *) 14 height: int 15 ; (* default 31 *) 16 description: string option } 17 18type image' = 19 [ `URL of Uri.t 20 | `Title of string 21 | `Link of Uri.t 22 | `Width of int 23 | `Height of int 24 | `Description of string ] 25 26let make_image ~pos (l : [< image'] list) = 27 let url = 28 match find (function `URL _ -> true | _ -> false) l with 29 | Some (`URL u) -> u 30 | _ -> 31 raise 32 (Error.Error 33 (pos, "<image> elements MUST contains exactly one <url> element")) 34 in 35 let title = 36 match find (function `Title _ -> true | _ -> false) l with 37 | Some (`Title t) -> t 38 | _ -> 39 raise 40 (Error.Error 41 (pos, "<image> elements MUST contains exactly one <title> element")) 42 in 43 let link = 44 match find (function `Link _ -> true | _ -> false) l with 45 | Some (`Link l) -> l 46 | _ -> 47 raise 48 (Error.Error 49 (pos, "<image> elements MUST contains exactly one <link> element")) 50 in 51 let width = 52 match find (function `Width _ -> true | _ -> false) l with 53 | Some (`Width w) -> w 54 | _ -> 88 55 (* cf. RFC *) 56 in 57 let height = 58 match find (function `Height _ -> true | _ -> false) l with 59 | Some (`Height h) -> h 60 | _ -> 31 61 (* cf. RFC *) 62 in 63 let description = 64 match find (function `Description _ -> true | _ -> false) l with 65 | Some (`Description s) -> Some s 66 | _ -> None 67 in 68 `Image ({url; title; link; width; height; description} : image) 69 70let url_of_xml ~xmlbase a = `URL (XML.resolve ~xmlbase (Uri.of_string a)) 71let url_of_xml' ~xmlbase a = `URL (xmlbase, a) 72 73let image_url_of_xml ~xmlbase (pos, _tag, datas) = 74 try url_of_xml ~xmlbase (get_leaf datas) with Not_found -> 75 raise 76 (Error.Error (pos, "The content of <uri> MUST be a non-empty string")) 77 78let image_title_of_xml ~xmlbase:_ (_pos, _tag, datas) = 79 `Title (try get_leaf datas with Not_found -> "") 80 81let image_link_of_xml ~xmlbase (pos, _tag, datas) = 82 try `Link (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 83 with Not_found -> 84 raise 85 (Error.Error (pos, "The content of <link> MUST be a non-empty string")) 86 87let image_size_of_xml ~max ~xmlbase:_ (pos, tag, datas) = 88 try 89 let size = int_of_string (get_leaf datas) in 90 if size > max then 91 raise 92 (Error.Error 93 ( pos 94 , "size of " 95 ^ get_tag_name tag 96 ^ " exceeded (max is " 97 ^ string_of_int max 98 ^ ")" )) 99 else size 100 with 101 | Not_found -> 102 raise 103 (Error.Error 104 ( pos 105 , "The content of <" 106 ^ get_tag_name tag 107 ^ "> MUST be a non-empty string" )) 108 | Failure _ -> 109 raise 110 (Error.Error 111 (pos, "The content of <" ^ get_tag_name tag ^ "> MUST be an integer")) 112 113let image_width_of_xml ~xmlbase a = 114 `Width (image_size_of_xml ~max:144 ~xmlbase a) 115 116let image_height_of_xml ~xmlbase a = 117 `Height (image_size_of_xml ~max:400 ~xmlbase a) 118 119let image_description_of_xml ~xmlbase:_ (pos, _tag, datas) = 120 try `Description (get_leaf datas) with Not_found -> 121 raise 122 (Error.Error 123 (pos, "The content of <description> MUST be a non-empty string")) 124 125let image_of_xml = 126 let data_producer = 127 [ ("url", image_url_of_xml) 128 ; ("title", image_title_of_xml) 129 ; ("link", image_link_of_xml) 130 ; ("width", image_width_of_xml) 131 ; ("height", image_height_of_xml) 132 ; ("description", image_description_of_xml) ] 133 in 134 generate_catcher ~data_producer make_image 135 136let image_of_xml' = 137 let data_producer = 138 [ ("url", dummy_of_xml ~ctor:url_of_xml') 139 ; ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a)) 140 ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a))) 141 ; ("width", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Width a)) 142 ; ("height", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Height a)) 143 ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a)) 144 ] 145 in 146 generate_catcher ~data_producer (fun ~pos:_ x -> `Image x) 147 148type cloud = {uri: Uri.t; registerProcedure: string; protocol: string} 149 150type cloud' = 151 [ `Domain of string 152 | `Port of string 153 | `Path of string 154 | `RegisterProcedure of string 155 | `Protocol of string ] 156 157let make_cloud ~pos (l : [< cloud'] list) = 158 let domain = 159 match find (function `Domain _ -> true | _ -> false) l with 160 | Some (`Domain u) -> u 161 | _ -> 162 raise 163 (Error.Error (pos, "Cloud elements MUST have a 'domain' attribute")) 164 in 165 let port = 166 match find (function `Port _ -> true | _ -> false) l with 167 | Some (`Port p) -> int_of_string p 168 | _ -> 169 raise 170 (Error.Error (pos, "Cloud elements MUST have a 'port' attribute")) 171 in 172 let path = 173 match find (function `Path _ -> true | _ -> false) l with 174 | Some (`Path p) -> p 175 | _ -> 176 raise 177 (Error.Error (pos, "Cloud elements MUST have a 'path' attribute")) 178 in 179 let registerProcedure = 180 match find (function `RegisterProcedure _ -> true | _ -> false) l with 181 | Some (`RegisterProcedure r) -> r 182 | _ -> 183 raise 184 (Error.Error 185 (pos, "Cloud elements MUST have a 'registerProcedure' attribute")) 186 in 187 let protocol = 188 match find (function `Protocol _ -> true | _ -> false) l with 189 | Some (`Protocol p) -> p 190 | _ -> 191 raise 192 (Error.Error (pos, "Cloud elements MUST have a 'protocol' attribute")) 193 in 194 let uri = Uri.make ~host:domain ~port ~path () in 195 `Cloud ({uri; registerProcedure; protocol} : cloud) 196 197let cloud_attr_producer = 198 [ ("domain", fun ~xmlbase:_ a -> `Domain a) 199 ; ("port", fun ~xmlbase:_ a -> `Port a) 200 ; ("path", fun ~xmlbase:_ a -> `Path a) 201 ; (* XXX: it's RFC compliant ? *) 202 ("registerProcedure", fun ~xmlbase:_ a -> `RegisterProcedure a) 203 ; ("protocol", fun ~xmlbase:_ a -> `Protocol a) ] 204 205let cloud_of_xml = 206 generate_catcher ~attr_producer:cloud_attr_producer make_cloud 207 208let cloud_of_xml' = 209 generate_catcher ~attr_producer:cloud_attr_producer (fun ~pos:_ x -> `Cloud x) 210 211type textinput = {title: string; description: string; name: string; link: Uri.t} 212 213type textinput' = 214 [`Title of string | `Description of string | `Name of string | `Link of Uri.t] 215 216let make_textinput ~pos (l : [< textinput'] list) = 217 let title = 218 match find (function `Title _ -> true | _ -> false) l with 219 | Some (`Title t) -> t 220 | _ -> 221 raise 222 (Error.Error 223 ( pos 224 , "<textinput> elements MUST contains exactly one <title> element" 225 )) 226 in 227 let description = 228 match find (function `Description _ -> true | _ -> false) l with 229 | Some (`Description s) -> s 230 | _ -> 231 raise 232 (Error.Error 233 ( pos 234 , "<textinput> elements MUST contains exactly one <description> \ 235 element" )) 236 in 237 let name = 238 match find (function `Name _ -> true | _ -> false) l with 239 | Some (`Name s) -> s 240 | _ -> 241 raise 242 (Error.Error 243 ( pos 244 , "<textinput> elements MUST contains exactly one <name> element" 245 )) 246 in 247 let link = 248 match find (function `Link _ -> true | _ -> false) l with 249 | Some (`Link u) -> u 250 | _ -> 251 raise 252 (Error.Error 253 ( pos 254 , "<textinput> elements MUST contains exactly one <link> element" 255 )) 256 in 257 `TextInput ({title; description; name; link} : textinput) 258 259let textinput_title_of_xml ~xmlbase:_ (pos, _tag, datas) = 260 try `Title (get_leaf datas) with Not_found -> 261 raise 262 (Error.Error (pos, "The content of <title> MUST be a non-empty string")) 263 264let textinput_description_of_xml ~xmlbase:_ (pos, _tag, datas) = 265 try `Description (get_leaf datas) with Not_found -> 266 raise 267 (Error.Error 268 (pos, "The content of <description> MUST be a non-empty string")) 269 270let textinput_name_of_xml ~xmlbase:_ (pos, _tag, datas) = 271 try `Name (get_leaf datas) with Not_found -> 272 raise 273 (Error.Error (pos, "The content of <name> MUST be a non-empty string")) 274 275let textinput_link_of_xml ~xmlbase (pos, _tag, datas) = 276 try `Link (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 277 with Not_found -> 278 raise 279 (Error.Error (pos, "The content of <link> MUST be a non-empty string")) 280 281let textinput_of_xml = 282 let data_producer = 283 [ ("title", textinput_title_of_xml) 284 ; ("description", textinput_description_of_xml) 285 ; ("name", textinput_name_of_xml) 286 ; ("link", textinput_link_of_xml) ] 287 in 288 generate_catcher ~data_producer make_textinput 289 290let textinput_of_xml' = 291 let data_producer = 292 [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a)) 293 ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a)) 294 ; ("name", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Name a)) 295 ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a))) ] 296 in 297 generate_catcher ~data_producer (fun ~pos:_ x -> `TextInput x) 298 299type category = {data: string; domain: Uri.t option} 300type category' = [`Data of string | `Domain of Uri.t] 301 302let make_category ~pos:_ (l : [< category'] list) = 303 let data = 304 match find (function `Data _ -> true | _ -> false) l with 305 | Some (`Data s) -> s 306 | _ -> "" 307 in 308 let domain = 309 match find (function `Domain _ -> true | _ -> false) l with 310 | Some (`Domain d) -> Some d 311 | _ -> None 312 in 313 `Category ({data; domain} : category) 314 315let category_of_xml = 316 let attr_producer = 317 [("domain", fun ~xmlbase:_ a -> `Domain (Uri.of_string a))] 318 in 319 let leaf_producer ~xmlbase:_ _pos data = `Data data in 320 generate_catcher ~attr_producer ~leaf_producer make_category 321 322let category_of_xml' = 323 let attr_producer = [("domain", fun ~xmlbase:_ a -> `Domain a)] in 324 let leaf_producer ~xmlbase:_ _pos data = `Data data in 325 generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Category x) 326 327type enclosure = {url: Uri.t; length: int; mime: string} 328type enclosure' = [`URL of Uri.t | `Length of string | `Mime of string] 329 330let make_enclosure ~pos (l : [< enclosure'] list) = 331 let url = 332 match find (function `URL _ -> true | _ -> false) l with 333 | Some (`URL u) -> u 334 | _ -> 335 raise 336 (Error.Error (pos, "Enclosure elements MUST have a 'url' attribute")) 337 in 338 let length = 339 match find (function `Length _ -> true | _ -> false) l with 340 | Some (`Length l) -> int_of_string l 341 | _ -> 342 raise 343 (Error.Error 344 (pos, "Enclosure elements MUST have a 'length' attribute")) 345 in 346 let mime = 347 match find (function `Mime _ -> true | _ -> false) l with 348 | Some (`Mime m) -> m 349 | _ -> 350 raise 351 (Error.Error (pos, "Enclosure elements MUST have a 'type' attribute")) 352 in 353 `Enclosure ({url; length; mime} : enclosure) 354 355let enclosure_of_xml = 356 let attr_producer = 357 [ ("url", url_of_xml) 358 ; ("length", fun ~xmlbase:_ a -> `Length a) 359 ; ("type", fun ~xmlbase:_ a -> `Mime a) ] 360 in 361 generate_catcher ~attr_producer make_enclosure 362 363let enclosure_of_xml' = 364 let attr_producer = 365 [ ("url", url_of_xml') 366 ; ("length", fun ~xmlbase:_ a -> `Length a) 367 ; ("type", fun ~xmlbase:_ a -> `Mime a) ] 368 in 369 generate_catcher ~attr_producer (fun ~pos:_ x -> `Enclosure x) 370 371type guid = {data: Uri.t; (* must be uniq *) permalink: bool (* default true *)} 372type guid' = [`Data of Uri.t option * string | `Permalink of string] 373 374(* Some RSS2 server output <guid isPermaLink="false"></guid> ! *) 375let make_guid ~pos:_ (l : [< guid'] list) = 376 let permalink = 377 match find (function `Permalink _ -> true | _ -> false) l with 378 | Some (`Permalink b) -> bool_of_string b 379 | _ -> true 380 (* cf. RFC *) 381 in 382 match find (function `Data _ -> true | _ -> false) l with 383 | Some (`Data (xmlbase, u)) -> 384 if u = "" then `Guid None 385 else 386 (* When the GUID is declared as a permlink, resolve it using xml:base *) 387 let data = 388 if permalink then XML.resolve ~xmlbase (Uri.of_string u) 389 else Uri.of_string u 390 in 391 `Guid (Some ({data; permalink} : guid)) 392 | _ -> `Guid None 393 394let guid_of_xml, guid_of_xml' = 395 let attr_producer = [("isPermaLink", fun ~xmlbase:_ a -> `Permalink a)] in 396 let leaf_producer ~xmlbase _pos data = `Data (xmlbase, data) in 397 ( generate_catcher ~attr_producer ~leaf_producer make_guid 398 , generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Guid x) ) 399 400type source = {data: string; url: Uri.t} 401type source' = [`Data of string | `URL of Uri.t] 402 403let make_source ~pos (l : [< source'] list) = 404 let data = 405 match find (function `Data _ -> true | _ -> false) l with 406 | Some (`Data s) -> s 407 | _ -> 408 raise 409 (Error.Error 410 (pos, "The content of <source> MUST be a non-empty string")) 411 in 412 let url = 413 match find (function `URL _ -> true | _ -> false) l with 414 | Some (`URL u) -> u 415 | _ -> 416 raise 417 (Error.Error (pos, "Source elements MUST have a 'url' attribute")) 418 in 419 `Source ({data; url} : source) 420 421let source_of_xml = 422 let attr_producer = [("url", url_of_xml)] in 423 let leaf_producer ~xmlbase:_ _pos data = `Data data in 424 generate_catcher ~attr_producer ~leaf_producer make_source 425 426let source_of_xml' = 427 let attr_producer = [("url", url_of_xml')] in 428 let leaf_producer ~xmlbase:_ _pos data = `Data data in 429 generate_catcher ~attr_producer ~leaf_producer (fun ~pos:_ x -> `Source x) 430 431type story = 432 | All of string * Uri.t option * string 433 | Title of string 434 | Description of Uri.t option * string 435 436type item = 437 { story: story 438 ; content: Uri.t option * string 439 ; link: Uri.t option 440 ; author: string option 441 ; (* e-mail *) 442 categories: category list 443 ; comments: Uri.t option 444 ; enclosure: enclosure option 445 ; guid: guid option 446 ; pubDate: Date.t option 447 ; (* date *) 448 source: source option } 449 450[@@@warning "-34"] 451 452type item' = 453 [ `Title of string 454 | `Description of Uri.t option * string (* xmlbase, description *) 455 | `Content of Uri.t option * string 456 | `Link of Uri.t 457 | `Author of string (* e-mail *) 458 | `Category of category 459 | `Comments of Uri.t 460 | `Enclosure of enclosure 461 | `Guid of guid 462 | `PubDate of Date.t 463 | `Source of source ] 464 465let make_item ~pos (l : _ list) = 466 let story = 467 match 468 ( find (function `Title _ -> true | _ -> false) l 469 , find (function `Description _ -> true | _ -> false) l ) 470 with 471 | Some (`Title t), Some (`Description (x, d)) -> All (t, x, d) 472 | Some (`Title t), _ -> Title t 473 | _, Some (`Description (x, d)) -> Description (x, d) 474 | _, _ -> 475 raise (Error.Error (pos, "Item expected <title> or <description> tag")) 476 in 477 let content = 478 match find (function `Content _ -> true | _ -> false) l with 479 | Some (`Content (x, c)) -> (x, c) 480 | _ -> (None, "") 481 in 482 let link = 483 match find (function `Link _ -> true | _ -> false) l with 484 | Some (`Link l) -> l 485 | _ -> None 486 in 487 let author = 488 match find (function `Author _ -> true | _ -> false) l with 489 | Some (`Author a) -> Some a 490 | _ -> None 491 in 492 let categories = 493 let fn = fun acc -> function `Category x -> x :: acc | _ -> acc in 494 List.fold_left fn [] l |> List.rev 495 in 496 let comments = 497 match find (function `Comments _ -> true | _ -> false) l with 498 | Some (`Comments c) -> Some c 499 | _ -> None 500 in 501 let enclosure = 502 match find (function `Enclosure _ -> true | _ -> false) l with 503 | Some (`Enclosure e) -> Some e 504 | _ -> None 505 in 506 let guid = 507 match find (function `Guid _ -> true | _ -> false) l with 508 | Some (`Guid g) -> g 509 | _ -> None 510 in 511 let pubDate = 512 match find (function `PubDate _ -> true | _ -> false) l with 513 | Some (`PubDate p) -> Some p 514 | _ -> None 515 in 516 let source = 517 match find (function `Source _ -> true | _ -> false) l with 518 | Some (`Source s) -> Some s 519 | _ -> None 520 in 521 `Item 522 ( { story 523 ; content 524 ; link 525 ; author 526 ; categories 527 ; comments 528 ; enclosure 529 ; guid 530 ; pubDate 531 ; source } 532 : item ) 533 534let item_title_of_xml ~xmlbase:_ (pos, _tag, datas) = 535 try `Title (get_leaf datas) with Not_found -> 536 raise 537 (Error.Error (pos, "The content of <title> MUST be a non-empty string")) 538 539let item_description_of_xml ~xmlbase (_pos, _tag, datas) = 540 `Description (xmlbase, try get_leaf datas with Not_found -> "") 541 542let item_content_of_xml ~xmlbase (_pos, _tag, datas) = 543 `Content (xmlbase, try get_leaf datas with Not_found -> "") 544 545let item_link_of_xml ~xmlbase (_pos, _tag, datas) = 546 `Link 547 ( try Some (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 548 with Not_found -> None ) 549 550let item_author_of_xml ~xmlbase:_ (pos, _tag, datas) = 551 try `Author (get_leaf datas) with Not_found -> 552 raise 553 (Error.Error (pos, "The content of <author> MUST be a non-empty string")) 554 555let item_comments_of_xml ~xmlbase (pos, _tag, datas) = 556 try `Comments (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 557 with Not_found -> 558 raise 559 (Error.Error (pos, "The content of <comments> MUST be a non-empty string")) 560 561let item_pubdate_of_xml ~xmlbase:_ (pos, _tag, datas) = 562 try `PubDate (Date.of_rfc822 (get_leaf datas)) with Not_found -> 563 raise 564 (Error.Error (pos, "The content of <pubDate> MUST be a non-empty string")) 565 566let item_namespaces = [""; "http://purl.org/rss/1.0/modules/content/"] 567 568let item_of_xml = 569 let data_producer = 570 [ ("title", item_title_of_xml) 571 ; ("description", item_description_of_xml) 572 ; (* <content:encoded> where 573 xmlns:content="http://purl.org/rss/1.0/modules/content/" *) 574 ("encoded", item_content_of_xml) 575 ; ("link", item_link_of_xml) 576 ; ("author", item_author_of_xml) 577 ; ("category", category_of_xml) 578 ; ("comments", item_comments_of_xml) 579 ; ("enclosure", enclosure_of_xml) 580 ; ("guid", guid_of_xml) 581 ; ("pubDate", item_pubdate_of_xml) 582 ; ("source", source_of_xml) ] 583 in 584 generate_catcher ~data_producer make_item ~namespaces:item_namespaces 585 586let item_of_xml' = 587 let data_producer = 588 [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a)) 589 ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a)) 590 ; ("encoded", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Content a)) 591 ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a))) 592 ; ("author", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Author a)) 593 ; ("category", category_of_xml') 594 ; ("comments", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Comments a)) 595 ; ("enclosure", enclosure_of_xml') 596 ; ("guid", guid_of_xml') 597 ; ("pubdate", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `PubDate a)) 598 ; ("source", source_of_xml') ] 599 in 600 generate_catcher ~data_producer 601 (fun ~pos:_ x -> `Item x) 602 ~namespaces:item_namespaces 603 604type channel = 605 { title: string 606 ; link: Uri.t 607 ; description: string 608 ; language: string option 609 ; copyright: string option 610 ; managingEditor: string option 611 ; webMaster: string option 612 ; pubDate: Date.t option 613 ; lastBuildDate: Date.t option 614 ; category: string option 615 ; generator: string option 616 ; docs: Uri.t option 617 ; cloud: cloud option 618 ; ttl: int option 619 ; image: image option 620 ; rating: int option 621 ; textInput: textinput option 622 ; skipHours: int option 623 ; skipDays: int option 624 ; items: item list } 625 626type channel' = 627 [ `Title of string 628 | `Link of Uri.t 629 | `Description of string 630 | `Language of string 631 | `Copyright of string 632 | `ManagingEditor of string 633 | `WebMaster of string 634 | `PubDate of Date.t 635 | `LastBuildDate of Date.t 636 | `Category of string 637 | `Generator of string 638 | `Docs of Uri.t 639 | `Cloud of cloud 640 | `TTL of int 641 | `Image of image 642 | `Rating of int 643 | `TextInput of textinput 644 | `SkipHours of int 645 | `SkipDays of int 646 | `Item of item ] 647 648let make_channel ~pos (l : [< channel'] list) = 649 let title = 650 match find (function `Title _ -> true | _ -> false) l with 651 | Some (`Title t) -> t 652 | _ -> 653 raise 654 (Error.Error 655 ( pos 656 , "<channel> elements MUST contains exactly one <title> element" 657 )) 658 in 659 let link = 660 match find (function `Link _ -> true | _ -> false) l with 661 | Some (`Link l) -> l 662 | _ -> 663 raise 664 (Error.Error 665 ( pos 666 , "<channel> elements MUST contains exactly one <link> element" )) 667 in 668 let description = 669 match find (function `Description _ -> true | _ -> false) l with 670 | Some (`Description l) -> l 671 | _ -> 672 raise 673 (Error.Error 674 ( pos 675 , "<channel> elements MUST contains exactly one <description> \ 676 element" )) 677 in 678 let language = 679 match find (function `Language _ -> true | _ -> false) l with 680 | Some (`Language a) -> Some a 681 | _ -> None 682 in 683 let copyright = 684 match find (function `Copyright _ -> true | _ -> false) l with 685 | Some (`Copyright a) -> Some a 686 | _ -> None 687 in 688 let managingEditor = 689 match find (function `ManagingEditor _ -> true | _ -> false) l with 690 | Some (`ManagingEditor a) -> Some a 691 | _ -> None 692 in 693 let webMaster = 694 match find (function `WebMaster _ -> true | _ -> false) l with 695 | Some (`WebMaster a) -> Some a 696 | _ -> None 697 in 698 let pubDate = 699 match find (function `PubDate _ -> true | _ -> false) l with 700 | Some (`PubDate a) -> Some a 701 | _ -> None 702 in 703 let lastBuildDate = 704 match find (function `LastBuildDate _ -> true | _ -> false) l with 705 | Some (`LastBuildDate a) -> Some a 706 | _ -> None 707 in 708 let category = 709 match find (function `Category _ -> true | _ -> false) l with 710 | Some (`Category a) -> Some a 711 | _ -> None 712 in 713 let generator = 714 match find (function `Generator _ -> true | _ -> false) l with 715 | Some (`Generator a) -> Some a 716 | _ -> None 717 in 718 let docs = 719 match find (function `Docs _ -> true | _ -> false) l with 720 | Some (`Docs a) -> Some a 721 | _ -> None 722 in 723 let cloud = 724 match find (function `Cloud _ -> true | _ -> false) l with 725 | Some (`Cloud a) -> Some a 726 | _ -> None 727 in 728 let ttl = 729 match find (function `TTL _ -> true | _ -> false) l with 730 | Some (`TTL a) -> Some a 731 | _ -> None 732 in 733 let image = 734 match find (function `Image _ -> true | _ -> false) l with 735 | Some (`Image a) -> Some a 736 | _ -> None 737 in 738 let rating = 739 match find (function `Rating _ -> true | _ -> false) l with 740 | Some (`Rating a) -> Some a 741 | _ -> None 742 in 743 let textInput = 744 match find (function `TextInput _ -> true | _ -> false) l with 745 | Some (`TextInput a) -> Some a 746 | _ -> None 747 in 748 let skipHours = 749 match find (function `SkipHours _ -> true | _ -> false) l with 750 | Some (`SkipHours a) -> Some a 751 | _ -> None 752 in 753 let skipDays = 754 match find (function `SkipDays _ -> true | _ -> false) l with 755 | Some (`SkipDays a) -> Some a 756 | _ -> None 757 in 758 let items = 759 List.fold_left (fun acc -> function `Item x -> x :: acc | _ -> acc) [] l 760 in 761 ( { title 762 ; link 763 ; description 764 ; language 765 ; copyright 766 ; managingEditor 767 ; webMaster 768 ; pubDate 769 ; lastBuildDate 770 ; category 771 ; generator 772 ; docs 773 ; cloud 774 ; ttl 775 ; image 776 ; rating 777 ; textInput 778 ; skipHours 779 ; skipDays 780 ; items } 781 : channel ) 782 783let channel_title_of_xml ~xmlbase:_ (pos, _tag, datas) = 784 try `Title (get_leaf datas) with Not_found -> 785 raise 786 (Error.Error (pos, "The content of <title> MUST be a non-empty string")) 787 788let channel_description_of_xml ~xmlbase:_ (_pos, _tag, datas) = 789 `Description (try get_leaf datas with Not_found -> "") 790 791let channel_link_of_xml ~xmlbase (pos, _tag, datas) = 792 try `Link (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 793 with Not_found -> 794 raise 795 (Error.Error (pos, "The content of <link> MUST be a non-empty string")) 796 797let channel_language_of_xml ~xmlbase:_ (pos, _tag, datas) = 798 try `Language (get_leaf datas) with Not_found -> 799 raise 800 (Error.Error (pos, "The content of <language> MUST be a non-empty string")) 801 802let channel_copyright_of_xml ~xmlbase:_ (_pos, _tag, datas) = 803 try `Copyright (get_leaf datas) with Not_found -> `Copyright "" 804 805(* XXX(dinosaure): aempty copyright is allowed. *) 806 807let channel_managingeditor_of_xml ~xmlbase:_ (pos, _tag, datas) = 808 try `ManagingEditor (get_leaf datas) with Not_found -> 809 raise 810 (Error.Error 811 (pos, "The content of <managingEditor> MUST be a non-empty string")) 812 813let channel_webmaster_of_xml ~xmlbase:_ (pos, _tag, datas) = 814 try `WebMaster (get_leaf datas) with Not_found -> 815 raise 816 (Error.Error 817 (pos, "The content of <webMaster> MUST be a non-empty string")) 818 819let channel_pubdate_of_xml ~xmlbase:_ (pos, _tag, datas) = 820 try `PubDate (Date.of_rfc822 (get_leaf datas)) with Not_found -> 821 raise 822 (Error.Error (pos, "The content of <pubDate> MUST be a non-empty string")) 823 824let channel_lastbuilddate_of_xml ~xmlbase:_ (pos, _tag, datas) = 825 try `LastBuildDate (Date.of_rfc822 (get_leaf datas)) with Not_found -> 826 raise 827 (Error.Error 828 (pos, "The content of <lastBuildDate> MUST be a non-empty string")) 829 830let channel_category_of_xml ~xmlbase:_ (pos, _tag, datas) = 831 try `Category (get_leaf datas) with Not_found -> 832 raise 833 (Error.Error (pos, "The content of <category> MUST be a non-empty string")) 834 835let channel_generator_of_xml ~xmlbase:_ (pos, _tag, datas) = 836 try `Generator (get_leaf datas) with Not_found -> 837 raise 838 (Error.Error 839 (pos, "The content of <generator> MUST be a non-empty string")) 840 841let channel_docs_of_xml ~xmlbase (pos, _tag, datas) = 842 try `Docs (XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) 843 with Not_found -> 844 raise 845 (Error.Error (pos, "The content of <docs> MUST be a non-empty string")) 846 847let channel_ttl_of_xml ~xmlbase:_ (pos, _tag, datas) = 848 try `TTL (int_of_string (get_leaf datas)) with _ -> 849 raise 850 (Error.Error 851 ( pos 852 , "The content of <ttl> MUST be a non-empty string representing an \ 853 integer" )) 854 855let channel_rating_of_xml ~xmlbase:_ (pos, _tag, datas) = 856 try `Rating (int_of_string (get_leaf datas)) with _ -> 857 raise 858 (Error.Error 859 ( pos 860 , "The content of <rating> MUST be a non-empty string representing \ 861 an integer" )) 862 863let channel_skipHours_of_xml ~xmlbase:_ (pos, _tag, datas) = 864 try `SkipHours (int_of_string (get_leaf datas)) with _ -> 865 raise 866 (Error.Error 867 ( pos 868 , "The content of <skipHours> MUST be a non-empty string \ 869 representing an integer" )) 870 871let channel_skipDays_of_xml ~xmlbase:_ (pos, _tag, datas) = 872 try `SkipDays (int_of_string (get_leaf datas)) with _ -> 873 raise 874 (Error.Error 875 ( pos 876 , "The content of <skipDays> MUST be a non-empty string representing \ 877 an integer" )) 878 879let channel_of_xml = 880 let data_producer = 881 [ ("title", channel_title_of_xml) 882 ; ("link", channel_link_of_xml) 883 ; ("description", channel_description_of_xml) 884 ; ("Language", channel_language_of_xml) 885 ; ("copyright", channel_copyright_of_xml) 886 ; ("managingeditor", channel_managingeditor_of_xml) 887 ; ("webmaster", channel_webmaster_of_xml) 888 ; ("pubdate", channel_pubdate_of_xml) 889 ; ("lastbuilddate", channel_lastbuilddate_of_xml) 890 ; ("category", channel_category_of_xml) 891 ; ("generator", channel_generator_of_xml) 892 ; ("docs", channel_docs_of_xml) 893 ; ("cloud", cloud_of_xml) 894 ; ("ttl", channel_ttl_of_xml) 895 ; ("image", image_of_xml) 896 ; ("rating", channel_rating_of_xml) 897 ; ("textinput", textinput_of_xml) 898 ; ("skiphours", channel_skipHours_of_xml) 899 ; ("skipdays", channel_skipDays_of_xml) 900 ; ("item", item_of_xml) ] 901 in 902 generate_catcher ~data_producer make_channel 903 904let channel_of_xml' = 905 let data_producer = 906 [ ("title", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Title a)) 907 ; ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link (xmlbase, a))) 908 ; ("description", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Description a)) 909 ; ("Language", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Language a)) 910 ; ("copyright", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Copyright a)) 911 ; ( "managingeditor" 912 , dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `ManagingEditor a) ) 913 ; ("webmaster", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `WebMaster a)) 914 ; ("pubdate", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `PubDate a)) 915 ; ( "lastbuilddate" 916 , dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `LastBuildDate a) ) 917 ; ("category", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Category a)) 918 ; ("generator", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Generator a)) 919 ; ("docs", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Docs a)) 920 ; ("cloud", cloud_of_xml') 921 ; ("ttl", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `TTL a)) 922 ; ("image", image_of_xml') 923 ; ("rating", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `Rating a)) 924 ; ("textinput", textinput_of_xml') 925 ; ("skiphours", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `SkipHours a)) 926 ; ("skipdays", dummy_of_xml ~ctor:(fun ~xmlbase:_ a -> `SkipDays a)) 927 ; ("item", item_of_xml') ] 928 in 929 generate_catcher ~data_producer (fun ~pos:_ x -> x) 930 931let find_channel l = 932 find 933 (function 934 | XML.Node (_pos, tag, _data) -> tag_is tag "channel" 935 | XML.Data _ -> false) 936 l 937 938let parse ?xmlbase input = 939 match XML.of_xmlm input |> snd with 940 | XML.Node (pos, tag, data) -> ( 941 if tag_is tag "channel" then channel_of_xml ~xmlbase (pos, tag, data) 942 else 943 match find_channel data with 944 | Some (XML.Node (p, t, d)) -> channel_of_xml ~xmlbase (p, t, d) 945 | Some (XML.Data _) | _ -> 946 raise 947 (Error.Error 948 ( (0, 0) 949 , "document MUST contains exactly one <channel> element" )) ) 950 | _ -> 951 raise 952 (Error.Error 953 ((0, 0), "document MUST contains exactly one <channel> element")) 954 955let read ?xmlbase fname = 956 let fh = open_in fname in 957 try 958 let x = parse ?xmlbase (XML.input_of_channel fh) in 959 close_in fh ; x 960 with e -> close_in fh ; raise e 961 962type uri = Uri.t option * string 963 964let unsafe ?xmlbase input = 965 match XML.of_xmlm input |> snd with 966 | XML.Node (pos, tag, data) -> ( 967 if tag_is tag "channel" then 968 `Channel (channel_of_xml' ~xmlbase (pos, tag, data)) 969 else 970 match find_channel data with 971 | Some (XML.Node (p, t, d)) -> 972 `Channel (channel_of_xml' ~xmlbase (p, t, d)) 973 | Some (XML.Data _) | None -> `Channel [] ) 974 | _ -> `Channel [] 975 976(* Conversion to Atom *) 977 978let map_option o f = match o with None -> None | Some v -> Some (f v) 979 980(* Assume ASCII or a superset like UTF-8. *) 981let valid_local_part = 982 let is_valid c = 983 let c = Char.unsafe_chr c in 984 ('a' <= c && c <= 'z') 985 || ('A' <= c && c <= 'Z') 986 || ('0' <= c && c <= '9') 987 || c = '.' 988 (* shouldn't be the 1st char and not appear twice consecutively *) 989 || c = '!' 990 || c = '#' 991 || c = '$' 992 || c = '%' 993 || c = '&' 994 || c = '\'' 995 || c = '*' 996 || c = '+' 997 || c = '-' 998 || c = '/' 999 || c = '=' 1000 || c = '?' 1001 || c = '^' 1002 || c = '_' 1003 || c = '`' 1004 || c = '{' 1005 || c = '|' 1006 || c = '}' 1007 || c = '~' 1008 in 1009 Array.init 256 is_valid 1010 1011let is_valid_local_part c = valid_local_part.(Char.code c) 1012 1013let valid_domain_part = 1014 let is_valid c = 1015 let c = Char.unsafe_chr c in 1016 ('a' <= c && c <= 'z') 1017 || ('A' <= c && c <= 'Z') 1018 || ('0' <= c && c <= '9') 1019 || c = '.' 1020 || c = '.' 1021 in 1022 Array.init 256 is_valid 1023 1024let is_valid_domain_part c = valid_domain_part.(Char.code c) 1025 1026(* Valid range [s.[i]], [i0 ≤ i < i1]. *) 1027let sub_no_braces s i0 i1 = 1028 let i0 = if s.[i0] = '(' then i0 + 1 else i0 in 1029 let i1 = if s.[i1 - 1] = ')' then i1 - 1 else i1 in 1030 String.sub s i0 (i1 - i0) 1031 1032(* The item author sometimes contains the name and email under the form "name 1033 <email>" or "email (name)". Try to extract both compnents. *) 1034let extract_name_email a = 1035 try 1036 let i = String.index a '@' in 1037 (* or Not_found *) 1038 let len = String.length a in 1039 let i0 = ref (i - 1) in 1040 while !i0 >= 0 && is_valid_local_part a.[!i0] do 1041 decr i0 1042 done ; 1043 incr i0 ; 1044 (* !i0 >= 0 is the first char of the possible email. *) 1045 let i1 = ref (i + 1) in 1046 while !i1 < len && is_valid_domain_part a.[!i1] do 1047 incr i1 1048 done ; 1049 if !i0 < i && i + 1 < !i1 then ( 1050 let email = String.sub a !i0 (!i1 - !i0) in 1051 if !i0 > 0 && a.[!i0 - 1] = '<' then decr i0 ; 1052 if !i1 < len && a.[!i1] = '>' then incr i1 ; 1053 while !i1 < len && a.[!i1] = ' ' do 1054 incr i1 1055 done ; 1056 (* skip spaces *) 1057 let name = 1058 if !i0 <= 0 then 1059 if !i1 >= len then email (* no name *) else sub_no_braces a !i1 len 1060 else 1061 (* !i0 > 0 *) 1062 let name0 = String.trim (String.sub a 0 !i0) in 1063 if !i1 >= len then name0 else name0 ^ String.sub a !i1 (len - !i1) 1064 in 1065 (name, Some email) ) 1066 else (a, None) 1067 with Not_found -> (a, None) 1068 1069let looks_like_a_link u = 1070 (Uri.scheme u = Some "http" || Uri.scheme u = Some "https") 1071 && match Uri.host u with None | Some "" -> false | Some _ -> true 1072 1073let entry_of_item ch_link ch_updated (it : item) : Atom.entry = 1074 let author = 1075 match it.author with 1076 | Some a -> 1077 let name, email = extract_name_email a in 1078 {Atom.name; uri= None; email} 1079 | None -> 1080 (* If no author is specified for the item, there is little one can do 1081 just using the RSS2 feed. The user will have to set it using Atom 1082 convenience functions. *) 1083 {Atom.name= ""; uri= None; email= None} 1084 in 1085 let categories = 1086 let fn (c : category) = { Atom.term= c.data; scheme= map_option c.domain (fun d -> d); label= None } in 1087 List.map fn it.categories 1088 in 1089 let (title : Atom.title), content = 1090 match it.story with 1091 | All (t, xmlbase, d) -> 1092 let content = 1093 match it.content with 1094 | _, "" -> if d = "" then None else Some (Atom.Html (xmlbase, d)) 1095 | x, c -> Some (Atom.Html (x, c)) 1096 in 1097 (Atom.Text t, content) 1098 | Title t -> 1099 let content = 1100 match it.content with 1101 | _, "" -> None 1102 | x, c -> Some (Atom.Html (x, c)) 1103 in 1104 (Atom.Text t, content) 1105 | Description (xmlbase, d) -> 1106 let content = 1107 match it.content with 1108 | _, "" -> if d = "" then None else Some (Atom.Html (xmlbase, d)) 1109 | x, c -> Some (Atom.Html (x, c)) 1110 in 1111 (Atom.Text "", content) 1112 in 1113 let id = 1114 match it.guid with 1115 | Some g -> 1116 if g.permalink || looks_like_a_link g.data then g.data 1117 else 1118 let d = Digest.to_hex (Digest.string (Uri.to_string g.data)) in 1119 Uri.with_fragment ch_link (Some d) 1120 | None -> 1121 (* The [it.link] may not be a permanent link and may also be used by 1122 other items. We use a digest to make it unique. *) 1123 let link = match it.link with Some l -> l | None -> ch_link in 1124 let s = 1125 match it.story with 1126 | All (t, _, d) -> t ^ d 1127 | Title t -> t 1128 | Description (_, d) -> d 1129 in 1130 let d = Digest.to_hex (Digest.string s) in 1131 Uri.with_fragment link (Some d) 1132 in 1133 let links = 1134 match (it.guid, it.link) with 1135 | Some g, _ when g.permalink -> [Atom.link g.data ~rel:Atom.Alternate] 1136 | _, Some l -> [Atom.link l ~rel:Atom.Alternate] 1137 | Some g, _ -> 1138 (* Sometimes the guid sets [l.permalink = false] but is nonetheless the 1139 only URI we have. *) 1140 if looks_like_a_link g.data then [Atom.link g.data ~rel:Atom.Alternate] 1141 else [] 1142 | _, None -> [] 1143 in 1144 let links = 1145 match it.comments with 1146 | Some l -> 1147 { Atom.href= l 1148 ; rel= Atom.Related 1149 ; type_media= None 1150 ; hreflang= None 1151 ; title= "" 1152 ; length= None } 1153 :: links 1154 | None -> links 1155 in 1156 let links = 1157 match it.enclosure with 1158 | Some e -> 1159 { Atom.href= e.url 1160 ; rel= Atom.Enclosure 1161 ; type_media= Some e.mime 1162 ; hreflang= None 1163 ; title= "" 1164 ; length= Some e.length } 1165 :: links 1166 | None -> links 1167 in 1168 let source = 1169 match it.source with 1170 | Some s -> 1171 Some 1172 { Atom.authors= [author] 1173 ; (* Best guess *) 1174 categories= [] 1175 ; contributors= [] 1176 ; generator= None 1177 ; icon= None 1178 ; id= ch_link 1179 ; (* declared as the ID of the whole channel *) 1180 links= 1181 [ { Atom.href= s.url 1182 ; rel= Atom.Related 1183 ; type_media= None 1184 ; hreflang= None 1185 ; title= "" 1186 ; length= None } ] 1187 ; logo= None 1188 ; rights= None 1189 ; subtitle= None 1190 ; title= Atom.Text s.data 1191 ; updated= None } 1192 | None -> None 1193 in 1194 { Atom.authors= (author, []) 1195 ; categories 1196 ; content 1197 ; contributors= [] 1198 ; id 1199 ; links 1200 ; published= None 1201 ; rights= None 1202 ; source 1203 ; summary= None 1204 ; title 1205 ; updated= (match it.pubDate with Some d -> d | None -> ch_updated) } 1206 1207let more_recent_of_item date (it : item) = 1208 match (date, it.pubDate) with 1209 | _, None -> date 1210 | None, Some _ -> it.pubDate 1211 | Some d, Some di -> if Date.compare d di >= 0 then date else it.pubDate 1212 1213let max_date_opt d = function None -> d | Some d' -> Date.max d d' 1214 1215let to_atom ?self (ch : channel) : Atom.feed = 1216 let contributors = 1217 match ch.webMaster with 1218 | Some p -> [{Atom.name= "Webmaster"; uri= None; email= Some p}] 1219 | None -> [] 1220 in 1221 let contributors = 1222 match ch.managingEditor with 1223 | Some p -> 1224 {Atom.name= "Managing Editor"; uri= None; email= Some p} 1225 :: contributors 1226 | None -> contributors 1227 in 1228 let links = 1229 [ { Atom.href= ch.link 1230 ; rel= Atom.Related 1231 ; type_media= Some "text/html" 1232 ; hreflang= None 1233 ; title= ch.title 1234 ; length= None } ] 1235 in 1236 let links = 1237 match self with 1238 | Some self -> 1239 { Atom.href= self 1240 ; rel= Atom.Self 1241 ; type_media= Some "application/rss+xml" 1242 ; hreflang= None 1243 ; title= ch.title 1244 ; length= None } 1245 :: links 1246 | None -> links 1247 in 1248 let updated = 1249 match List.fold_left more_recent_of_item None ch.items with 1250 | None -> max_date_opt Date.epoch ch.lastBuildDate 1251 | Some d -> max_date_opt d ch.lastBuildDate 1252 in 1253 { Atom.authors= [] 1254 ; categories= 1255 ( match ch.category with 1256 | None -> [] 1257 | Some c -> [{Atom.term= c; scheme= None; label= None}] ) 1258 ; contributors 1259 ; generator= 1260 map_option ch.generator (fun g -> 1261 {Atom.content= g; version= None; uri= None} ) 1262 ; icon= None 1263 ; id= ch.link 1264 ; (* FIXME: Best we can do? *) 1265 links 1266 ; logo= map_option ch.image (fun i -> i.url) 1267 ; rights= map_option ch.copyright (fun c -> (Atom.Text c : Atom.rights)) 1268 ; subtitle= None 1269 ; title= Atom.Text ch.title 1270 ; updated 1271 ; entries= List.map (entry_of_item ch.link updated) ch.items }