My agentic slop goes here. Not intended for anyone else!
1(** [Syndic.Rss2]: compliant with {{:
2 http://www.rssboard.org/rss-specification} RSS 2.0}. *)
3
4module Error : module type of Syndic_error
5
6(** An [image] is an optional sub-element of {!channel}, which contains three
7 required ([url], [title], [link]) and three optional ([width], [height],
8 [description]) sub-elements.
9
10 {{:
11 http://www.rssboard.org/rss-specification#ltimagegtSubelementOfLtchannelgt}
12 See RSS 2.0 about <image>}. *)
13type image =
14 { url: Uri.t
15 (** The URL of a GIF, JPEG or PNG image that represents the channel. *)
16 ; title: string
17 (** Describes the image. It's used in the ALT attribute of the HTML
18 <img> tag when the channel is rendered in HTML. *)
19 ; link: Uri.t
20 (** The URL of the site, when the channel is rendered, the image is a
21 link to the site. (Note, in practice the image [title] and [link]
22 should have the same value as the {!channel}'s [title] and [link]. *)
23 ; width: int
24 (** Width of the image in pixels. Maximum value is 144, default value
25 is 88. *)
26 ; height: int
27 (** Height of the image in pixels. Maximum value is 400, default value
28 is 31. *)
29 ; description: string option
30 (** contains text that is included in the TITLE attribute of the link
31 formed around the image in the HTML rendering. *) }
32
33(** [cloud] is an optional sub-element of {!channel}. It specifies a web
34 service that supports the rssCloud interface which can be implemented in
35 HTTP-POST, XML-RPC or SOAP 1.1.
36
37 Its purpose is to allow processes to register with a cloud to be notified
38 of updates to the channel, implementing a lightweight publish-subscribe
39 protocol for RSS feeds.
40
41 {{:
42 http://www.rssboard.org/rss-specification#ltcloudgtSubelementOfLtchannelgt}
43 See RSS 2.0 about <cloud> }
44
45 {[ <cloud domain="rpc.sys.com" port="80" path="/RPC2"
46 registerProcedure="myCloud.rssPleaseNotify" protocol="xml-rpc" /> ]}
47
48 In this example, to request notification on the channel it appears in, you
49 would send an XML-RPC message to rpc.sys.com on port 80, with a path of
50 /RPC2. The procedure to call is myCloud.rssPleaseNotify. *)
51type cloud =
52 { uri: Uri.t (** The URI of the cloud (domain, port, path). *)
53 ; registerProcedure: string
54 ; protocol: string }
55
56(** A {!channel} may optionally contain a [textInput] sub-element, which
57 contains four required sub-elements.
58
59 The purpose of the <textInput> element is something of a mystery. You can
60 use it to specify a search engine box. Or to allow a reader to provide
61 feedback. Most aggregators ignore it.
62
63 {{:
64 http://www.rssboard.org/rss-specification#lttextinputgtSubelementOfLtchannelgt}
65 See RSS 2.0 about <textinput>} *)
66type textinput =
67 { title: string (** The label of the Submit button in the text input area. *)
68 ; description: string (** Explains the text input area. *)
69 ; name: string (** The name of the text object in the text input area. *)
70 ; link: Uri.t
71 (** The URL of the CGI script that processes text input requests. *) }
72
73(** [category] is an optional sub-element of {!item}. - [data] is A
74 forward-slash-separated string that identifies a hierarchic location in the
75 indicated taxonomy. Processors may establish conventions for the
76 interpretation of categories. - [domain], if provided, a string that
77 identifies a categorization taxonomy.
78
79 {{:
80 http://www.rssboard.org/rss-specification#ltcategorygtSubelementOfLtitemgt}
81 See RSS 2.0 about <category> }
82
83 Two examples are provided below:
84
85 {[ <category>Grateful Dead</category> ]}
86
87 {[ <category domain="http://www.fool.com/cusips">MSFT</category> ]}
88
89 You may include as many category elements as you need to, for different
90 domains, and to have an item cross-referenced in different parts of the
91 same domain. *)
92type category = {data: string; domain: Uri.t option}
93
94(** [enclosure] is an optional sub-element of {!item}. It has three required
95 attributes. - [url] says where the enclosure is located (must be an http
96 url), - [length] says how big it is in bytes, and - [mime] says what its
97 type is, a standard MIME type.
98
99 {{:
100 http://www.rssboard.org/rss-specification#ltenclosuregtSubelementOfLtitemgt}
101 See RSS 2.0 about <enclosure> }
102
103 {[ <enclosure url="http://www.scripting.com/mp3s/weatherReportSuite.mp3"
104 length="12216320" type="audio/mpeg" /> ]} *)
105type enclosure = {url: Uri.t; length: int; mime: string}
106
107(** [guid] is an optional sub-element of {!item}. "guid" stands for globally
108 unique identifier. It's a string that uniquely identifies the item. When
109 present, an aggregator may choose to use this string to determine if an
110 item is new.
111
112 {{: http://www.rssboard.org/rss-specification#ltguidgtSubelementOfLtitemgt}
113 See RSS 2.0 about <guid>}
114
115 {[<guid>http://some.server.com/weblogItem3207</guid>]}
116
117 There are no rules for the syntax of a guid. Aggregators must view them as
118 a string. It's up to the source of the feed to establish the uniqueness of
119 the string.
120
121 If [permalink] is [true], the reader may assume that it is a permalink to
122 the item, that is, a url that can be opened in a Web browser, that points
123 to the full item described by the <item> element. An example:
124
125 {[<guid
126 isPermaLink="true">http://inessential.com/2002/09/01.php#a2</guid>]}
127
128 If [permalink] is [false], the guid may not be assumed to be a url, or a
129 url to anything in particular. *)
130type guid =
131 {data: Uri.t (** Must be unique *); permalink: bool (** default [true] *)}
132
133(** [source] is an optional sub-element of {!item}. - [data] is the name of the
134 RSS channel that the item came from, derived from its <title>. - [url]
135 links to the XMLization of the source.
136
137 The purpose of this element is to propagate credit for links, to publicize
138 the sources of news items. It can be used in the Post command of an
139 aggregator. It should be generated automatically when forwarding an item
140 from an aggregator to a weblog authoring tool.
141
142 {{:http://www.rssboard.org/rss-specification#ltsourcegtSubelementOfLtitemgt}
143 See RSS 2.0 about <source>}
144
145 {[<source url="http://www.tomalak.org/links2.xml">Tomalak's
146 Realm</source>]} *)
147type source = {data: string; url: Uri.t}
148
149type story =
150 | All of string * Uri.t option * string
151 (** [All(title, xmlbase, description)] *)
152 | Title of string
153 | Description of Uri.t option * string
154 (** [Description(xmlbase, description)] *)
155
156(** A {!channel} may contain any number of [item]s. An item may represent a
157 "story" — much like a story in a newspaper or magazine; if so its
158 description is a synopsis of the story, and the link points to the full
159 story. An item may also be complete in itself, if so, the description
160 contains the text (entity-encoded HTML is allowed; see examples), and the
161 link and title may be omitted.
162
163 - [title] : The title of the item. - [link] : The URL of the item. -
164 [story] : The item synopsis. - [content] : The possible full story
165 ([(_,"")] if not present). (Extension of RSS2, see
166 http://purl.org/rss/1.0/modules/content/) The first element of the couple
167 is the possible value of xml:base. It can be used to resolve URIs. -
168 [author] : Email address of the author of the item. - [category] : Includes
169 the item in one or more categories. - [comments] : URL of a page for
170 comments relating to the item. - [enclosure] : Describes a media object
171 that is attached to the item. - [guid] : A string that uniquely identifies
172 the item. - [pubDate] : Indicates when the item was published. - [source] :
173 The RSS channel that the item came from.
174
175 {{: http://www.rssboard.org/rss-specification#hrelementsOfLtitemgt} See RSS
176 2.0 about <item> } *)
177type item =
178 { story: story
179 ; content: Uri.t option * string
180 ; link: Uri.t option
181 ; author: string option
182 ; categories: category list
183 ; comments: Uri.t option
184 ; enclosure: enclosure option
185 ; guid: guid option
186 ; pubDate: Syndic_date.t option
187 ; source: source option }
188
189(** Here's a list of the required channel elements, each with a brief
190 description, an example, and where available, a pointer to a more complete
191 description.
192
193 - [title]: The name of the channel. It's how people refer to your service.
194 If you have an HTML website that contains the same information as your RSS
195 file, the title of your channel should be the same as the title of your
196 website. - [link]: The URL to the HTML website corresponding to the
197 channel. - [description]: Phrase or sentence describing the channel.
198
199 Here's a list of optional channel elements.
200
201 - [language]: The language the channel is written in. This allows
202 aggregators to group all Italian language sites, for example, on a single
203 page. A list of allowable values for this element, as provided by Netscape,
204 is here. You may also use values defined by the W3C. - [copyright]:
205 Copyright notice for content in the channel. - [managingEditor]: Email
206 address for person responsible for editorial content. - [webMaster]: Email
207 address for person responsible for technical issues relating to channel. -
208 [pubDate]: The publication date for the content in the channel. For
209 example, the New York Times publishes on a daily basis, the publication
210 date flips once every 24 hours. That's when the pubDate of the channel
211 changes. All date-times in RSS conform to the Date and Time Specification
212 of RFC 822, with the exception that the year may be expressed with two
213 characters or four characters (four preferred). - [lastBuildDate]: The last
214 time the content of the channel changed. - [category]: Specify one or more
215 categories that the channel belongs to. Follows the same rules as the
216 <item>-level category element. See {!category}. - [generator]: A string
217 indicating the program used to generate the channel. - [docs]: A URL that
218 points to the documentation for the format used in the RSS file. It's
219 probably a pointer to [http://www.rssboard.org/rss-specification]. It's for
220 people who might stumble across an RSS file on a Web server 25 years from
221 now and wonder what it is. - [cloud]: Allows processes to register with a
222 cloud to be notified of updates to the channel, implementing a lightweight
223 publish-subscribe protocol for RSS feeds. See {!cloud}. - [ttl]: ttl stands
224 for time to live. It's a number of minutes that indicates how long a
225 channel can be cached before refreshing from the source. - [image]:
226 Specifies a GIF, JPEG or PNG image that can be displayed with the channel.
227 See {!image}. - [rating]: The PICS rating for the channel. - [textInput]:
228 Specifies a text input box that can be displayed with the channel. See
229 {!textinput}. - [skipHours]: A hint for aggregators telling them which
230 hours they can skip. This element contains up to 24 <hour> sub-elements
231 whose value is a number between 0 and 23, representing a time in GMT, when
232 aggregators, if they support the feature, may not read the channel on hours
233 listed in the <skipHours> element. The hour beginning at midnight is hour
234 zero. - [skipDays]: A hint for aggregators telling them which days they can
235 skip. This element contains up to seven <day> sub-elements whose value is
236 Monday, Tuesday, Wednesday, Thursday, Friday, Saturday or Sunday.
237 Aggregators may not read the channel during days listed in the <skipDays>
238 element.
239
240 {{: http://www.rssboard.org/rss-specification#requiredChannelElements} See
241 RSS 2.0 about <channel>} *)
242type channel =
243 { title: string
244 ; link: Uri.t
245 ; description: string
246 ; language: string option
247 ; copyright: string option
248 ; managingEditor: string option
249 ; webMaster: string option
250 ; pubDate: Syndic_date.t option
251 ; lastBuildDate: Syndic_date.t option
252 ; category: string option
253 ; generator: string option
254 ; docs: Uri.t option
255 ; cloud: cloud option
256 ; ttl: int option
257 (** {{:
258 http://www.rssboard.org/rss-specification#ltcloudgtSubelementOfLtchannelgt}
259 See RSS 2.0 about <ttl> } *)
260 ; image: image option
261 ; rating: int option
262 ; (* lol *)
263 textInput: textinput option
264 ; skipHours: int option
265 ; skipDays: int option
266 ; items: item list }
267
268val parse : ?xmlbase:Uri.t -> Xmlm.input -> channel
269(** [parse xml] returns the channel corresponding to [xml].
270
271 Raise [Error.Expected], [Error.Size_Exceeded] or [Error.Item_expectation]
272 if [xml] is not a valid RSS2 document. *)
273
274val read : ?xmlbase:Uri.t -> string -> channel
275(** [read fname] reads the file name [fname] and parses it. For the optional
276 parameters, see {!parse}. *)
277
278val to_atom : ?self:Uri.t -> channel -> Syndic_atom.feed
279(** [to_atom ch] returns an Atom feed that (mostly) contains the same
280 information.
281
282 @param self the URI from where the current feed was retrieved. Contrarily
283 to Atom, RSS2 has no provision to store the URI of the feed itself. Giving
284 this information will add an entry to the [links] field of Atom feed with
285 [rel = Self]. *)
286
287(**/**)
288
289(** An URI is given by (xmlbase, uri). The value of [xmlbase], if not [None],
290 gives the base URI against which [uri] must be resolved if it is relative. *)
291type uri = Uri.t option * string
292
293val unsafe :
294 ?xmlbase:Uri.t
295 -> Xmlm.input
296 -> [> `Channel of [> `Category of string
297 | `Cloud of [> `Domain of string
298 | `Path of string
299 | `Port of string
300 | `Protocol of string
301 | `RegisterProcedure of string ]
302 list
303 | `Copyright of string
304 | `Description of string
305 | `Docs of string
306 | `Generator of string
307 | `Image of [> `Description of string
308 | `Height of string
309 | `Link of uri
310 | `Title of string
311 | `URL of uri
312 | `Width of string ]
313 list
314 | `Item of [> `Author of string
315 | `Category of [> `Data of string
316 | `Domain of string ]
317 list
318 | `Comments of string
319 | `Description of string
320 | `Content of string
321 | `Enclosure of [> `Length of string
322 | `Mime of string
323 | `URL of uri ]
324 list
325 | `Guid of [> `Data of uri
326 | `Permalink of string ]
327 list
328 | `Link of uri
329 | `PubDate of string
330 | `Source of [> `Data of string | `URL of uri]
331 list
332 | `Title of string ]
333 list
334 | `Language of string
335 | `LastBuildDate of string
336 | `Link of uri
337 | `ManagingEditor of string
338 | `PubDate of string
339 | `Rating of string
340 | `SkipDays of string
341 | `SkipHours of string
342 | `TTL of string
343 | `TextInput of [> `Description of string
344 | `Link of uri
345 | `Name of string
346 | `Title of string ]
347 list
348 | `Title of string
349 | `WebMaster of string ]
350 list ]
351(** Analysis without verification, enjoy ! *)