My agentic slop goes here. Not intended for anyone else!
1(** [Syndic.Atom]: {{: http://tools.ietf.org/html/rfc4287} RFC 4287} compliant
2 Atom parser. *)
3
4module Error : module type of Syndic_error
5
6(** {2 Structure of Atom document} *)
7
8(** A {{:http://tools.ietf.org/html/rfc4287#section-3.1}text construct}. It
9 contains human-readable text, usually in small quantities. The content of
10 Text constructs is Language-Sensitive.
11
12 Since the constructors [Text], [Html] or [Xhtml] are shadowed by those of
13 the same name in the definition of {!type:content}, you may need a type
14 annotation to disambiguate the two. *)
15type text_construct =
16 | Text of string (** [Text(content)] *)
17 | Html of Uri.t option * string
18 (** [Html(xmlbase, content)] where the content is left unparsed. *)
19 | Xhtml of Uri.t option * Syndic_xml.t list (** [Xhtml(xmlbase, content)] *)
20
21(** Describes a person, corporation, or similar entity (hereafter, 'person')
22 that indicates the author of the entry or feed. {{:
23 http://tools.ietf.org/html/rfc4287#section-3.2} See RFC 4287 § 3.2}.
24 Person constructs allow extension Metadata elements (see {{:
25 http://tools.ietf.org/html/rfc4287#section-6.4}Section 6.4}).
26
27 They are used for authors
28 ({{:http://tools.ietf.org/html/rfc4287#section-4.2.1} See RFC 4287
29 § 4.2.1}) and contributors
30 ({{:http://tools.ietf.org/html/rfc4287#section-4.2.3} See RFC 4287
31 § 4.2.3}) *)
32type author = {name: string; uri: Uri.t option; email: string option}
33
34val author : ?uri:Uri.t -> ?email:string -> string -> author
35
36(** The [category] element conveys information about a category associated with
37 an entry or feed. This specification assigns no meaning to the content (if
38 any) of this element. {{:http://tools.ietf.org/html/rfc4287#section-4.2.2}
39 See RFC 4287 § 4.2.2}.
40
41 - [term] is a string that identifies the category to which the entry or
42 feed belongs. {{: http://tools.ietf.org/html/rfc4287#section-4.2.2.2} See
43 RFC 4287 § 4.2.2.2} - [scheme], if present, is an IRI that identifies a
44 categorization scheme. {{:
45 http://tools.ietf.org/html/rfc4287#section-4.2.2.3} See RFC 4287 §
46 4.2.2.3} - [label], if present, is a human-readable label for display in
47 end-user applications. The content of the "label" attribute is
48 Language-Sensitive. {{: http://tools.ietf.org/html/rfc4287#section-4.2.2.1}
49 See RFC 4287 § 4.2.2.1} *)
50type category = {term: string; scheme: Uri.t option; label: string option}
51
52val category : ?scheme:Uri.t -> ?label:string -> string -> category
53
54(** The [generator] element's content identifies the agent used to generate a
55 feed, for debugging and other purposes. - [content] is a human-readable
56 name for the generating agent. - [uri], if present, SHOULD produce a
57 representation that is relevant to that agent. - [version], if present,
58 indicates the version of the generating agent.
59
60 See {{: http://tools.ietf.org/html/rfc4287#section-4.2.4}RFC 4287 §
61 4.2.4}. *)
62type generator = {version: string option; uri: Uri.t option; content: string}
63
64val generator : ?uri:Uri.t -> ?version:string -> string -> generator
65
66(** The [icon] element's content is an IRI reference [RFC3987] that identifies
67 an image that provides iconic visual identification for a feed.
68
69 The image SHOULD have an aspect ratio of one (horizontal) to one (vertical)
70 and SHOULD be suitable for presentation at a small size.
71
72 {{:http://tools.ietf.org/html/rfc4287#section-4.2.5} See RFC 4287 § 4.2.5} *)
73type icon = Uri.t
74
75(** The [id] element conveys a permanent, universally unique identifier for an
76 entry or feed.
77
78 Its content MUST be an IRI, as defined by [RFC3987]. Note that the
79 definition of "IRI" excludes relative references. Though the IRI might use
80 a dereferencable scheme, Atom Processors MUST NOT assume it can be
81 dereferenced.
82
83 There is more information in the RFC but they are not necessary here, at
84 least, they can not be checked here.
85
86 {{: http://tools.ietf.org/html/rfc4287#section-4.2.6} See RFC 4287 § 4.2.6
87 } *)
88type id = Uri.t
89
90(** Indicates the link relation type. See {{:
91 http://tools.ietf.org/html/rfc4287#section-4.2.7.2} RFC 4287 § 4.2.7.2}. *)
92type rel =
93 | Alternate
94 (** Signifies that the URI in the value of the link [href] field
95 identifies an alternate version of the resource described by the
96 containing element. *)
97 | Related
98 (** Signifies that the URI in the value of the link [href] field
99 identifies a resource related to the resource described by the
100 containing element. *)
101 | Self
102 (** Signifies that the URI in the value of the link [href] field
103 identifies a resource equivalent to the containing element. *)
104 | Enclosure
105 (** Signifies that the IRI in the value of the link [href] field
106 identifies a related resource that is potentially large in size and
107 might require special handling. When [Enclosure] is specified, the
108 length attribute SHOULD be provided. *)
109 | Via
110 (** Signifies that the IRI in the value of the link [href] field
111 identifies a resource that is the source of the information provided
112 in the containing element. *)
113 | Link of Uri.t
114 (** The URI MUST be non-empty and match either the "isegment-nz-nc" or
115 the "IRI" production in {{:http://tools.ietf.org/html/rfc3987}
116 RFC3987}. Note that use of a relative reference other than a simple
117 name is not allowed. *)
118
119(** [link] defines a reference from an entry or feed to a Web resource. See {{:
120 http://tools.ietf.org/html/rfc4287#section-4.2.7} RFC 4287 § 4.2.7}.
121
122 - [href] contains the link's IRI. The value MUST be a IRI reference,
123 {{:http://tools.ietf.org/html/rfc3987} RFC3987}. See {{:
124 http://tools.ietf.org/html/rfc4287#section-4.2.7.1} RFC 4287 § 4.2.7.1}. -
125 [type_media] is an advisory media type: it is a hint about the type of the
126 representation that is expected to be returned when the value of the href
127 attribute is dereferenced. Note that the type attribute does not override
128 the actual media type returned with the representation. The value of
129 [type_media], if given, MUST conform to the syntax of a MIME media type,
130 {{:http://tools.ietf.org/html/rfc4287#ref-MIMEREG} MIMEREG}. See {{:
131 http://tools.ietf.org/html/rfc4287#section-4.2.7.3} RFC 4287 § 4.2.7.3}. -
132 [hreflang] describes the language of the resource pointed to by the href
133 attribute. When used together with the [rel=Alternate], it implies a
134 translated version of the entry. The value of [hreflang] MUST be a language
135 tag, {{:http://tools.ietf.org/html/rfc3066} RFC3066}. See {{:
136 http://tools.ietf.org/html/rfc4287#section-4.2.7.4} RFC 4287 § 4.2.7.4}. -
137 [title] conveys human-readable information about the link. The content of
138 the "title" attribute is Language-Sensitive. The value [""] means that no
139 title is provided. See {{:
140 http://tools.ietf.org/html/rfc4287#section-4.2.7.5} RFC 4287 § 4.2.7.5}. -
141 [length] indicates an advisory length of the linked content in octets; it
142 is a hint about the content length of the representation returned when the
143 IRI in the href attribute is mapped to a URI and dereferenced. Note that
144 the length attribute does not override the actual content length of the
145 representation as reported by the underlying protocol. See {{:
146 http://tools.ietf.org/html/rfc4287#section-4.2.7.6} RFC 4287 § 4.2.7.6}. *)
147type link =
148 { href: Uri.t
149 ; rel: rel
150 ; type_media: string option
151 ; hreflang: string option
152 ; title: string
153 ; length: int option }
154
155val link :
156 ?type_media:string
157 -> ?hreflang:string
158 -> ?title:string
159 -> ?length:int
160 -> ?rel:rel
161 -> Uri.t
162 -> link
163(** [link uri] creates a link element.
164
165 @param rel The [rel] attribute of the link. It defaults to [Alternate]
166 since {{:http://tools.ietf.org/html/rfc4287#section-4.2.7.2} RFC 4287 §
167 4.2.7.2} says that {i if the "rel" attribute is not present, the link
168 element MUST be interpreted as if the link relation type is "alternate".}
169
170 The other optional arguments all default to [None] (i.e., not specified). *)
171
172(** [logo] is an IRI reference [RFC3987] that identifies an image that provides
173 visual identification for a feed.
174
175 The image SHOULD have an aspect ratio of 2 (horizontal) to 1 (vertical).
176
177 {{: http://tools.ietf.org/html/rfc4287#section-4.2.8} See RFC 4287 §
178 4.2.8} *)
179type logo = Uri.t
180
181(** [published] is a Date construct indicating an instant in time associated
182 with an event early in the life cycle of the entry.
183
184 Typically, [published] will be associated with the initial creation or
185 first availability of the resource.
186
187 {{: http://tools.ietf.org/html/rfc4287#section-4.2.9} See RFC 4287 §
188 4.2.9} *)
189type published = Syndic_date.t
190
191(** [rights] is a Text construct that conveys information about rights held in
192 and over an entry or feed. The [rights] element SHOULD NOT be used to
193 convey machine-readable licensing information.
194
195 If an atom:entry element does not contain an atom:rights element, then the
196 atom:rights element of the containing atom:feed element, if present, is
197 considered to apply to the entry.
198
199 See {{: http://tools.ietf.org/html/rfc4287#section-4.2.10} RFC 4287 §
200 4.2.10 } *)
201type rights = text_construct
202
203(** [title] is a Text construct that conveys a human-readable title for an
204 entry or feed. {{: http://tools.ietf.org/html/rfc4287#section-4.2.14} See
205 RFC 4287 § 4.2.14 } *)
206type title = text_construct
207
208(** [subtitle] is a Text construct that conveys a human-readable description or
209 subtitle for a feed. {{: http://tools.ietf.org/html/rfc4287#section-4.2.12}
210 See RFC 4287 § 4.2.12 } *)
211type subtitle = text_construct
212
213(** [updated] is a Date construct indicating the most recent instant in time
214 when an entry or feed was modified in a way the publisher considers
215 significant. Therefore, not all modifications necessarily result in a
216 changed [updated] value.
217
218 Publishers MAY change the value of this element over time.
219
220 {{: http://tools.ietf.org/html/rfc4287#section-4.2.15} See RFC 4287 §
221 4.2.15 } *)
222type updated = Syndic_date.t
223
224(** If an {!entry} is copied from one feed into another feed, then the source
225 {!feed}'s metadata (all child elements of atom:feed other than the
226 atom:entry elements) MAY be preserved within the copied entry by adding an
227 atom:source child element, if it is not already present in the entry, and
228 including some or all of the source feed's Metadata elements as the
229 atom:source element's children. Such metadata SHOULD be preserved if the
230 source atom:feed contains any of the child elements atom:author,
231 atom:contributor, atom:rights, or atom:category and those child elements
232 are not present in the source atom:entry.
233
234 {{: http://tools.ietf.org/html/rfc4287#section-4.2.11} See RFC 4287 §
235 4.2.11 }
236
237 The atom:source element is designed to allow the aggregation of entries
238 from different feeds while retaining information about an entry's source
239 feed. For this reason, Atom Processors that are performing such aggregation
240 SHOULD include at least the required feed-level Metadata fields ([id],
241 [title], and [updated]) in the [source] element.
242
243 {{: http://tools.ietf.org/html/rfc4287#section-4.1.2} See RFC 4287 § 4.1.2
244 for more details.} *)
245type source =
246 { authors: author list
247 ; categories: category list
248 ; contributors: author list
249 (** {{: http://tools.ietf.org/html/rfc4287#section-4.2.3} See RFC 4287
250 § 4.2.3 } *)
251 ; generator: generator option
252 ; icon: icon option
253 ; id: id
254 ; links: link list
255 ; logo: logo option
256 ; rights: rights option
257 ; subtitle: subtitle option
258 ; title: title
259 ; updated: updated option }
260
261val source :
262 ?categories:category list
263 -> ?contributors:author list
264 -> ?generator:generator
265 -> ?icon:icon
266 -> ?links:link list
267 -> ?logo:logo
268 -> ?rights:rights
269 -> ?subtitle:subtitle
270 -> ?updated:updated
271 -> authors:author list
272 -> id:id
273 -> title
274 -> source
275
276(** A MIME type that conform to the syntax of a MIME media type, but MUST NOT
277 be a composite type (see Section 4.2.6 of [MIMEREG]).
278
279 {{: http://tools.ietf.org/html/rfc4287#section-4.1.3.1} See RFC 4287 §
280 4.1.3.1 } *)
281type mime = string
282
283(** [content] either contains or links to the content of the entry. The value
284 of [content] is Language-Sensitive. {{:
285 http://tools.ietf.org/html/rfc4287#section-4.1.3} See RFC 4287 § 4.1.3}
286
287 - [Text], [Html], [Xhtml] or [Mime] means that the content was part of the
288 document and is provided as an argument. The first argument to [Html] and
289 [Xhtml] is the possible xml:base value.
290 {{:http://tools.ietf.org/html/rfc4287#section-3.1.1} See RFC 4287 § 3.1.1}
291 - [Src(m, iri)] means that the content is to be found at [iri] and has MIME
292 type [m]. Atom Processors MAY use the IRI to retrieve the content and MAY
293 choose to ignore remote content or to present it in a different manner than
294 local content. The value of [m] is advisory; that is to say, when the
295 corresponding URI (mapped from an IRI, if necessary) is dereferenced, if
296 the server providing that content also provides a media type, the
297 server-provided media type is authoritative. See {{:
298 http://tools.ietf.org/html/rfc4287#section-4.1.3.2} RFC 4287 § 4.1.3.2} *)
299type content =
300 | Text of string
301 | Html of Uri.t option * string
302 | Xhtml of Uri.t option * Syndic_xml.t list
303 | Mime of mime * string
304 | Src of mime option * Uri.t
305
306(** [summary] is a Text construct that conveys a short summary, abstract, or
307 excerpt of an entry.
308
309 It is not advisable for [summary] to duplicate {!title} or {!content}
310 because Atom Processors might assume there is a useful summary when there
311 is none.
312
313 {{: http://tools.ietf.org/html/rfc4287#section-4.2.13} See RFC 4287 §
314 4.2.13 } *)
315type summary = text_construct
316
317(** [entry] represents an individual entry, acting as a container for metadata
318 and data associated with the entry. This element can appear as a child of
319 the atom:feed element, or it can appear as the document (i.e., top-level)
320 element of a stand-alone Atom Entry Document.
321
322 The specification mandates that each entry contains an author unless it
323 contains some sources or the feed contains an author element. This library
324 ensures that the authors are properly dispatched to all locations.
325
326 The following child elements are defined by this specification (note that
327 it requires the presence of some of these elements):
328
329 - if [content = None], then [links] MUST contain at least one element with
330 a rel attribute value of [Alternate]. - There MUST NOT be more than one
331 element of [links] with a rel attribute value of [Alternate] that has the
332 same combination of type and hreflang attribute values. - There MAY be
333 additional elements of [links] beyond those described above. - There MUST
334 be an [summary] in either of the following cases: {ul {- the atom:entry
335 contains an atom:content that has a "src" attribute (and is thus empty).}
336 {- the atom:entry contains content that is encoded in Base64; i.e., the
337 "type" attribute of atom:content is a MIME media type [MIMEREG], but is not
338 an XML media type [RFC3023], does not begin with "text/", and does not end
339 with "/xml" or "+xml".}}
340
341 {{: http://tools.ietf.org/html/rfc4287#section-4.1.2} See RFC 4287 §
342 4.1.2} *)
343type entry =
344 { authors: author * author list
345 ; categories: category list
346 ; content: content option
347 ; contributors: author list
348 ; id: id
349 ; links: link list
350 ; published: published option
351 ; rights: rights option
352 ; source: source option
353 ; summary: summary option
354 ; title: title
355 ; updated: updated }
356
357val entry :
358 ?categories:category list
359 -> ?content:content
360 -> ?contributors:author list
361 -> ?links:link list
362 -> ?published:published
363 -> ?rights:rights
364 -> ?source:source
365 -> ?summary:summary
366 -> id:id
367 -> authors:author * author list
368 -> title:title
369 -> updated:updated
370 -> unit
371 -> entry
372
373(** [feed] is the document (i.e., top-level) element of an Atom Feed Document,
374 acting as a container for metadata and data associated with the feed. Its
375 element children consist of metadata elements followed by zero or more
376 atom:entry child elements.
377
378 - one of the [links] SHOULD have a [rel] attribute value of [Self]. This is
379 the preferred URI for retrieving Atom Feed Documents representing this Atom
380 feed. - There MUST NOT be more than one element of [links] with a rel
381 attribute value of [Alternate] that has the same combination of type and
382 hreflang attribute values. - There may be additional elements in [links]
383 beyond those described above.
384
385 If multiple {!entry} elements with the same {!id} value appear in an Atom
386 Feed Document, they represent the same entry. Their {!updated} timestamps
387 SHOULD be different. If an Atom Feed Document contains multiple entries
388 with the same {!id}, Atom Processors MAY choose to display all of them or
389 some subset of them. One typical behavior would be to display only the
390 entry with the latest {!updated} timestamp.
391
392 {{: http://tools.ietf.org/html/rfc4287#section-4.1.1} See RFC 4287 §
393 4.1.1} *)
394type feed =
395 { authors: author list
396 ; categories: category list
397 ; contributors: author list
398 ; generator: generator option
399 ; icon: icon option
400 ; id: id
401 ; links: link list
402 ; logo: logo option
403 ; rights: rights option
404 ; subtitle: subtitle option
405 ; title: title
406 ; updated: updated
407 ; entries: entry list }
408
409val feed :
410 ?authors:author list
411 -> ?categories:category list
412 -> ?contributors:author list
413 -> ?generator:generator
414 -> ?icon:icon
415 -> ?links:link list
416 -> ?logo:logo
417 -> ?rights:rights
418 -> ?subtitle:subtitle
419 -> id:id
420 -> title:title
421 -> updated:updated
422 -> entry list
423 -> feed
424
425(** {2 Input and output} *)
426
427val parse : ?self:Uri.t -> ?xmlbase:Uri.t -> Xmlm.input -> feed
428(** [parse xml] returns the feed corresponding to [xml]. Beware that [xml] is
429 mutable, so when the parsing fails, one has to create a new copy of [xml]
430 to use it with another function. If you retrieve [xml] from a URL, you
431 should use that URL as [~xmlbase].
432
433 Raise [Error.Expected], [Expected_Data] or [Error.Duplicate_Link] if [xml]
434 is not a valid Atom document.
435
436 @param xmlbase default xml:base to resolve relative URLs (of course
437 xml:base attributes in the XML Atom document take precedence over this).
438 See {{:http://www.w3.org/TR/xmlbase/}XML Base}.
439
440 @param self the URI from where the current feed was retrieved. Giving this
441 information will add an entry to [links] with [rel = Self] unless one
442 already exists. *)
443
444val read : ?self:Uri.t -> ?xmlbase:Uri.t -> string -> feed
445(** [read fname] reads the file name [fname] and parses it. For the optional
446 parameters, see {!parse}. *)
447
448val to_xml : feed -> Syndic_xml.t
449(** [to_xml f] converts the feed [f] to an XML tree. *)
450
451val output : feed -> Xmlm.dest -> unit
452(** [output f dest] writes the XML tree of the feed [f] to [dest]. *)
453
454val write : feed -> string -> unit
455(** [write f fname] writes the XML tree of the feed [f] to the file named
456 [fname]. *)
457
458(** {2 Convenience functions} *)
459
460val ascending : entry -> entry -> int
461(** Compare entries so that older dates are smaller. The date of the entry is
462 taken from the [published] field, if available, or otherwise [updated] is
463 used. *)
464
465val descending : entry -> entry -> int
466(** Compare entries so that more recent dates are smaller. The date of the
467 entry is taken from the [published] field, if available, or otherwise
468 [updated] is used. *)
469
470val aggregate :
471 ?self:Uri.t
472 -> ?id:id
473 -> ?updated:updated
474 -> ?subtitle:subtitle
475 -> ?title:text_construct
476 -> ?sort:[`Newest_first | `Oldest_first | `None]
477 -> ?n:int
478 -> feed list
479 -> feed
480(** [aggregate feeds] returns a single feed containing all the posts in
481 [feeds]. In order to track the origin of each post in the aggrated feed, it
482 is recommended that each feed in [feeds] possesses a link with
483 [rel = Self] so that the [source] added to each entry contains a link to
484 the original feed. If an entry contains a [source], il will {i not} be
485 overwritten.
486
487 @param self The preferred URI for retrieving this aggregayed Atom Feed.
488 While not mandatory, it is good practice to set this.
489
490 @param id the universally unique identifier for the aggregated feed. If it
491 is not provided a URN is built from the [feeds] IDs. @param sort whether to
492 sort the entries of the final feed. The default is [`Newest_first] because
493 it is generally desired. @param n number of entries of the (sorted)
494 aggregated feed to return. *)
495
496val set_self_link : feed -> ?hreflang:string -> ?length:int -> Uri.t -> feed
497(** [set_self feed url] add or replace the URI in the self link of the feed.
498 You can also set the [hreflang] and [length] of the self link. *)
499
500val get_self_link : feed -> link option
501(** [get_self feed] return the self link of the feed, if any is present. *)
502
503val set_main_author : feed -> author -> feed
504(** [set_main_author feed author] will add [author] in front of the list of
505 authors of the [feed] (if an author with the same name already exists, the
506 optional information are merged, the ones in [author] taking precedence).
507 Also remove all empty authors (name = "" and no URI, no email) and replace
508 them with [author] if no author is left and an authors is mandatory. *)
509
510(**/**)
511
512(** An URI is given by (xmlbase, uri). The value of [xmlbase], if not [None],
513 gives the base URI against which [uri] must be resolved if it is relative. *)
514type uri = Uri.t option * string
515
516type person = [`Email of string | `Name of string | `URI of uri] list
517
518val unsafe :
519 ?xmlbase:Uri.t
520 -> Xmlm.input
521 -> [> `Feed of [> `Author of person
522 | `Category of [> `Label of string
523 | `Scheme of string
524 | `Term of string ]
525 list
526 | `Contributor of person
527 | `Entry of [> `Author of person
528 | `Category of [> `Label of string
529 | `Scheme of string
530 | `Term of string ]
531 list
532 | `Content of [> `Data of Syndic_xml.t list
533 | `SRC of string
534 | `Type of string ]
535 list
536 | `Contributor of person
537 | `ID of string list
538 | `Link of [> `HREF of string
539 | `HREFLang of string
540 | `Length of string
541 | `Rel of string
542 | `Title of string
543 | `Type of string ]
544 list
545 | `Published of [> `Date of string] list
546 | `Rights of Syndic_xml.t list
547 | `Source of [> `Author of person
548 | `Category of [> `Label of string
549 | `Scheme of string
550 | `Term of string ]
551 list
552 | `Contributor of person
553 | `Generator of [> `Content of string
554 | `URI of uri
555 | `Version of string
556 ]
557 list
558 | `ID of string list
559 | `Icon of [> `URI of uri] list
560 | `Link of [> `HREF of string
561 | `HREFLang of string
562 | `Length of string
563 | `Rel of string
564 | `Title of string
565 | `Type of string ]
566 list
567 | `Logo of [> `URI of uri] list
568 | `Rights of Syndic_xml.t list
569 | `Subtitle of Syndic_xml.t list
570 | `Title of Syndic_xml.t list
571 | `Updated of [> `Date of string]
572 list ]
573 list
574 | `Summary of Syndic_xml.t list
575 | `Title of Syndic_xml.t list
576 | `Updated of [> `Date of string] list ]
577 list
578 | `Generator of [> `Content of string
579 | `URI of uri
580 | `Version of string ]
581 list
582 | `ID of string list
583 | `Icon of [> `URI of uri] list
584 | `Link of [> `HREF of string
585 | `HREFLang of string
586 | `Length of string
587 | `Rel of string
588 | `Title of string
589 | `Type of string ]
590 list
591 | `Logo of [> `URI of uri] list
592 | `Rights of Syndic_xml.t list
593 | `Subtitle of Syndic_xml.t list
594 | `Title of Syndic_xml.t list
595 | `Updated of [> `Date of string] list ]
596 list ]
597(** Analysis without verification, enjoy ! *)