My agentic slop goes here. Not intended for anyone else!

vendor river

+52
stack/river/.github/workflows/ci.yml
···
+
name: CI
+
+
on:
+
push:
+
branches: [ main ]
+
pull_request:
+
branches: [ main ]
+
+
jobs:
+
build-and-test:
+
strategy:
+
fail-fast: false
+
+
matrix:
+
os:
+
- macos-latest
+
- ubuntu-latest
+
- windows-latest
+
+
ocaml-compiler:
+
- 4.12.x
+
+
runs-on: ${{ matrix.os }}
+
+
steps:
+
+
- name: Checkout code
+
uses: actions/checkout@v2
+
+
- name: Use OCaml ${{ matrix.ocaml-compiler }}
+
uses: ocaml/setup-ocaml@v2
+
with:
+
ocaml-compiler: ${{ matrix.ocaml-compiler }}
+
dune-cache: ${{ matrix.os != 'macos-latest' }}
+
+
- name: Install ocamlformat
+
run: opam install ocamlformat.0.18.0
+
if: ${{ matrix.os == 'ubuntu-latest' }}
+
+
- name: Install opam packages
+
run: opam install . --with-test
+
+
- name: Check formatting
+
run: make fmt
+
if: ${{ matrix.os == 'ubuntu-latest' && always() }}
+
+
- name: Run build
+
run: make build
+
+
- name: Run the unit tests
+
run: make test
+
timeout-minutes: 1
+9
stack/river/.gitignore
···
+
# Dune generated files
+
_build/
+
*.install
+
+
# Merlin configuring file for Vim and Emacs
+
.merlin
+
+
# Local OPAM switch
+
_opam/
+4
stack/river/.ocamlformat
···
+
version = 0.20.1
+
profile = conventional
+
parse-docstrings = true
+
wrap-comments = true
+29
stack/river/CHANGES.md
···
+
# 0.4 - 2024-11-08
+
+
- Replace ocamlnet HTML parser with Lambda Soup (#15, @aantron)
+
+
# 0.3 - 2023-11-21
+
+
- Fall back to entry id if entry links doesn't exist (#11, @sabine)
+
+
# 0.2 - 2022-04-14
+
+
- Build with dune.
+
- Make the types abstract and add accessor functions.
+
- Support fetching meta description and SEO image from the posts links.
+
+
# 0.1.3 - 2015-07-28
+
+
- Make river compatible with the latest syndic API
+
+
# 0.1.2 - 2015-03-24
+
+
- Refactoring modules.
+
+
# 0.1.1 - 2015-03-19
+
+
- Upgrading version number.
+
+
# 0.1 - 2015-03-15
+
+
- Initial release
+13
stack/river/LICENSE
···
+
Copyright (c) 2015, KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+
+
Permission to use, copy, modify, and/or distribute this software for any
+
purpose with or without fee is hereby granted, provided that the above
+
copyright notice and this permission notice appear in all copies.
+
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+43
stack/river/Makefile
···
+
.DEFAULT_GOAL := all
+
+
.PHONY: all
+
all:
+
opam exec -- dune build --root . @install
+
+
.PHONY: deps
+
deps: ## Install development dependencies
+
opam install -y dune-release ocamlformat utop ocaml-lsp-server
+
opam install --deps-only --with-test --with-doc -y .
+
+
.PHONY: create_switch
+
create_switch: ## Create an opam switch without any dependency
+
opam switch create . --no-install -y
+
+
.PHONY: switch
+
switch: ## Create an opam switch and install development dependencies
+
opam install . --deps-only --with-doc --with-test
+
opam install -y dune-release ocamlformat utop ocaml-lsp-server
+
+
.PHONY: build
+
build: ## Build the project, including non installable libraries and executables
+
opam exec -- dune build --root .
+
+
.PHONY: test
+
test: ## Run the unit tests
+
opam exec -- dune runtest --root .
+
+
.PHONY: clean
+
clean: ## Clean build artifacts and other generated files
+
opam exec -- dune clean --root .
+
+
.PHONY: doc
+
doc: ## Generate odoc documentation
+
opam exec -- dune build --root . @doc
+
+
.PHONY: fmt
+
fmt: ## Format the codebase with ocamlformat
+
opam exec -- dune build --root . --auto-promote @fmt
+
+
.PHONY: watch
+
watch: ## Watch for the filesystem and rebuild on every change
+
opam exec -- dune build --root . --watch
+58
stack/river/README.md
···
+
# River
+
+
[![Actions Status](https://github.com/kayceesrk/river/workflows/CI/badge.svg)](https://github.com/kayceesrk/river/actions)
+
+
RSS2 and Atom feed aggregator for OCaml
+
+
+
## Features
+
+
- Performs deduplication.
+
- Supports pagination and generating well-formed html prefix snippets.
+
- Support for generating aggregate feeds.
+
- Sorts the posts from most recent to oldest.
+
- Depends on Lambda Soup for html parsing.
+
+
## Installation
+
+
```bash
+
opam install river
+
```
+
+
## Usage
+
+
Here's an example program that aggregates the feeds from different sources:
+
+
```ocaml
+
let sources =
+
River.
+
[
+
{ name = "KC Sivaramakrishnan"; url = "http://kcsrk.info/atom-ocaml.xml" };
+
{
+
name = "Amir Chaudhry";
+
url = "http://amirchaudhry.com/tags/ocamllabs-atom.xml";
+
};
+
]
+
+
let () =
+
let feeds = List.map River.fetch sources in
+
let posts = River.posts feeds in
+
let entries = River.create_atom_entries posts in
+
let feed =
+
let authors = [ Syndic.Atom.author "OCaml Blog" ] in
+
let id = Uri.of_string "https://ocaml.org/atom.xml" in
+
let links = [ Syndic.Atom.link ~rel:Self id ] in
+
let title : Syndic.Atom.text_construct =
+
Text "OCaml Blog: Read the latest OCaml news from the community."
+
in
+
let updated = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get in
+
Syndic.Atom.feed ~authors ~links ~id ~title ~updated entries
+
in
+
let out_channel = open_out "example/atom.xml" in
+
Syndic.Atom.output feed (`Channel out_channel);
+
close_out out_channel
+
```
+
+
## Contributing
+
+
Take a look at our [Contributing Guide](CONTRIBUTING.md).
+38
stack/river/dune-project
···
+
(lang dune 3.0)
+
+
(name river)
+
+
(documentation "https://kayceesrk.github.io/river/")
+
+
(source
+
(github kayceesrk/river))
+
+
(license MIT)
+
+
(authors "KC Sivaramakrishnan <sk826@cl.cam.ac.uk>")
+
+
(maintainers "KC Sivaramakrishnan <sk826@cl.cam.ac.uk>")
+
+
(generate_opam_files true)
+
+
(package
+
(name river)
+
(synopsis "RSS2 and Atom feed aggregator for OCaml")
+
(description "RSS2 and Atom feed aggregator for OCaml")
+
(depends
+
(ocaml
+
(>= 4.08.0))
+
dune
+
(syndic
+
(>= 1.5))
+
(cohttp
+
(>= 5.0.0))
+
(cohttp-lwt
+
(>= 5.0.0))
+
(cohttp-lwt-unix
+
(>= 5.0.0))
+
ptime
+
lwt
+
ocamlnet
+
lambdasoup
+
(odoc :with-doc)))
+31
stack/river/example/aggregate_feeds.ml
···
+
let sources =
+
River.
+
[
+
{ name = "KC Sivaramakrishnan"; url = "http://kcsrk.info/atom-ocaml.xml" };
+
{
+
name = "Amir Chaudhry";
+
url = "http://amirchaudhry.com/tags/ocamllabs-atom.xml";
+
};
+
]
+
+
let main () =
+
let feeds = List.map River.fetch sources in
+
let posts = River.posts feeds in
+
let entries = River.create_atom_entries posts in
+
let feed =
+
let authors = [ Syndic.Atom.author "OCaml Blog" ] in
+
let id = Uri.of_string "https://ocaml.org/atom.xml" in
+
let links = [ Syndic.Atom.link ~rel:Self id ] in
+
let title : Syndic.Atom.text_construct =
+
Text "OCaml Blog: Read the latest OCaml news from the community."
+
in
+
let updated = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get in
+
Syndic.Atom.feed ~authors ~links ~id ~title ~updated entries
+
in
+
let out_channel = open_out "example/atom.xml" in
+
Syndic.Atom.output feed (`Channel out_channel);
+
close_out out_channel
+
+
let () =
+
Printexc.record_backtrace true;
+
main ()
+3
stack/river/example/dune
···
+
(executable
+
(name aggregate_feeds)
+
(libraries river))
+4
stack/river/lib/dune
···
+
(library
+
(name river)
+
(public_name river)
+
(libraries cohttp cohttp-lwt cohttp-lwt-unix str syndic lambdasoup))
+40
stack/river/lib/feed.ml
···
+
(*
+
* Copyright (c) 2014, OCaml.org project
+
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+
*
+
* Permission to use, copy, modify, and distribute this software for any
+
* purpose with or without fee is hereby granted, provided that the above
+
* copyright notice and this permission notice appear in all copies.
+
*
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
*)
+
+
type source = { name : string; url : string }
+
type content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel
+
+
let string_of_feed = function Atom _ -> "Atom" | Rss2 _ -> "Rss2"
+
+
type t = { name : string; title : string; url : string; content : content }
+
+
let classify_feed ~xmlbase (xml : string) =
+
try Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, xml))))
+
with Syndic.Atom.Error.Error _ -> (
+
try Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, xml))))
+
with Syndic.Rss2.Error.Error _ -> failwith "Neither Atom nor RSS2 feed")
+
+
let fetch (source : source) =
+
let xmlbase = Uri.of_string @@ source.url in
+
let response = Http.get source.url in
+
let content = classify_feed ~xmlbase response in
+
let title =
+
match content with
+
| Atom atom -> Util.string_of_text_construct atom.Syndic.Atom.title
+
| Rss2 ch -> ch.Syndic.Rss2.title
+
in
+
{ name = source.name; title; content; url = source.url }
+73
stack/river/lib/http.ml
···
+
(*
+
* Copyright (c) 2014, OCaml.org project
+
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+
*
+
* Permission to use, copy, modify, and distribute this software for any
+
* purpose with or without fee is hereby granted, provided that the above
+
* copyright notice and this permission notice appear in all copies.
+
*
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
*)
+
+
(* Download urls and cache them — especially during development, it slows down
+
the rendering to download over and over again the same URL. *)
+
+
open Printf
+
open Lwt
+
open Cohttp
+
open Cohttp.Response
+
open Cohttp.Code
+
+
exception Status_unhandled of string
+
exception Timeout
+
+
let max_num_redirects = 5
+
+
let get_location_exn headers =
+
match Header.get headers "location" with
+
| Some x -> x
+
| None -> raise @@ Status_unhandled "Location HTTP header not found"
+
+
let rec get_uri uri = function
+
| 0 -> raise (Status_unhandled "Too many redirects")
+
| n ->
+
let main =
+
Cohttp_lwt_unix.Client.get uri >>= fun (resp, body) ->
+
match resp.status with
+
| `OK -> Cohttp_lwt.Body.to_string body
+
| `Found | `See_other | `Moved_permanently | `Temporary_redirect
+
| `Permanent_redirect -> (
+
let l = Uri.of_string @@ get_location_exn resp.headers in
+
match Uri.host l with
+
| Some _ -> get_uri l (n - 1)
+
| None ->
+
let host = Uri.host uri in
+
let scheme = Uri.scheme uri in
+
let new_uri = Uri.with_scheme (Uri.with_host l host) scheme in
+
get_uri new_uri (n - 1))
+
| _ -> raise @@ Status_unhandled (string_of_status resp.status)
+
in
+
let timeout =
+
Lwt_unix.sleep (float_of_int 3) >>= fun () -> Lwt.fail Timeout
+
in
+
Lwt.pick [ main; timeout ]
+
+
let get url =
+
eprintf "Downloading %s ... %!" url;
+
try
+
let data = Lwt_main.run @@ get_uri (Uri.of_string url) max_num_redirects in
+
eprintf "done %!\n";
+
data
+
with
+
| (Status_unhandled s | Failure s) as e ->
+
eprintf "Failed: %s\n" s;
+
raise e
+
| Timeout as e ->
+
eprintf "Failed: Timeout\n";
+
raise e
+28
stack/river/lib/http.mli
···
+
(*
+
* Copyright (c) 2014, OCaml.org project
+
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+
*
+
* Permission to use, copy, modify, and distribute this software for any
+
* purpose with or without fee is hereby granted, provided that the above
+
* copyright notice and this permission notice appear in all copies.
+
*
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
*)
+
+
exception Status_unhandled of string
+
exception Timeout
+
+
val get : string -> string
+
(** [get uri] returns the body of the response of the HTTP GET request on [uri].
+
+
If the answer of is a redirection, it will follow the redirections up to 5
+
redirects.
+
+
The answer is cached for [cache_secs] seconds, where [cache_secs] is 3600
+
seconds (1 hour) by default. *)
+80
stack/river/lib/meta.ml
···
+
(*
+
* Copyright (c) 2014, OCaml.org project
+
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+
*
+
* Permission to use, copy, modify, and distribute this software for any
+
* purpose with or without fee is hereby granted, provided that the above
+
* copyright notice and this permission notice appear in all copies.
+
*
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
*)
+
+
(** This module determines an image to be used as preview of a website.
+
+
It does this by following the same logic Google+ and other websites use, and
+
described in this article:
+
https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *)
+
+
let og_image html =
+
let open Soup in
+
let soup = parse html in
+
try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some
+
with Failure _ -> None
+
+
let image_src html =
+
let open Soup in
+
let soup = parse html in
+
try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some
+
with Failure _ -> None
+
+
let twitter_image html =
+
let open Soup in
+
let soup = parse html in
+
try
+
soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content"
+
|> Option.some
+
with Failure _ -> None
+
+
let og_description html =
+
let open Soup in
+
let soup = parse html in
+
try
+
soup $ "meta[property=og:description]" |> R.attribute "content"
+
|> Option.some
+
with Failure _ -> None
+
+
let description html =
+
let open Soup in
+
let soup = parse html in
+
try
+
soup $ "meta[property=description]" |> R.attribute "content" |> Option.some
+
with Failure _ -> None
+
+
let preview_image html =
+
let preview_image =
+
match og_image html with
+
| None -> (
+
match image_src html with
+
| None -> twitter_image html
+
| Some x -> Some x)
+
| Some x -> Some x
+
in
+
match Option.map String.trim preview_image with
+
| Some "" -> None
+
| Some x -> Some x
+
| None -> None
+
+
let description html =
+
let preview_image =
+
match og_description html with None -> description html | Some x -> Some x
+
in
+
match Option.map String.trim preview_image with
+
| Some "" -> None
+
| Some x -> Some x
+
| None -> None
+215
stack/river/lib/post.ml
···
+
(*
+
* Copyright (c) 2014, OCaml.org project
+
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+
*
+
* Permission to use, copy, modify, and distribute this software for any
+
* purpose with or without fee is hereby granted, provided that the above
+
* copyright notice and this permission notice appear in all copies.
+
*
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
*)
+
+
type t = {
+
title : string;
+
link : Uri.t option;
+
date : Syndic.Date.t option;
+
feed : Feed.t;
+
author : string;
+
email : string;
+
content : Soup.soup Soup.node;
+
mutable link_response : (string, string) result option;
+
}
+
+
let resolve_links_attr ~xmlbase attr el =
+
Soup.R.attribute attr el
+
|> Uri.of_string
+
|> Syndic.XML.resolve ~xmlbase
+
|> Uri.to_string
+
|> fun value -> Soup.set_attribute attr value el
+
+
(* Things that posts should not contain *)
+
let undesired_tags = [ "style"; "script" ]
+
let undesired_attr = [ "id" ]
+
+
let html_of_text ?xmlbase s =
+
let soup = Soup.parse s in
+
let ($$) = Soup.($$) in
+
soup $$ "a[href]" |> Soup.iter (resolve_links_attr ~xmlbase "href");
+
soup $$ "img[src]" |> Soup.iter (resolve_links_attr ~xmlbase "src");
+
undesired_tags |> List.iter (fun tag -> soup $$ tag |> Soup.iter Soup.delete);
+
soup $$ "*" |> Soup.iter (fun el ->
+
undesired_attr |> List.iter (fun attr -> Soup.delete_attribute attr el));
+
soup
+
+
(* Do not trust sites using XML for HTML content. Convert to string and parse
+
back. (Does not always fix bad HTML unfortunately.) *)
+
let html_of_syndic =
+
let ns_prefix _ = Some "" in
+
fun ?xmlbase h ->
+
html_of_text ?xmlbase
+
(String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) h))
+
+
let string_of_option = function None -> "" | Some s -> s
+
+
(* Email on the forge contain the name in parenthesis *)
+
let forge_name_re = Str.regexp ".*(\\([^()]*\\))"
+
+
let post_compare p1 p2 =
+
(* Most recent posts first. Posts with no date are always last *)
+
match (p1.date, p2.date) with
+
| Some d1, Some d2 -> Syndic.Date.compare d2 d1
+
| None, Some _ -> 1
+
| Some _, None -> -1
+
| None, None -> 1
+
+
let rec remove n l =
+
if n <= 0 then l else match l with [] -> [] | _ :: tl -> remove (n - 1) tl
+
+
let rec take n = function
+
| [] -> []
+
| e :: tl -> if n > 0 then e :: take (n - 1) tl else []
+
+
(* Blog feed
+
***********************************************************************)
+
+
let post_of_atom ~(feed : Feed.t) (e : Syndic.Atom.entry) =
+
let link =
+
try
+
Some
+
(List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) e.links)
+
.href
+
with Not_found -> (
+
match e.links with
+
| l :: _ -> Some l.href
+
| [] -> (
+
match Uri.scheme e.id with
+
| Some "http" -> Some e.id
+
| Some "https" -> Some e.id
+
| _ -> None))
+
in
+
let date =
+
match e.published with Some _ -> e.published | None -> Some e.updated
+
in
+
let content =
+
match e.content with
+
| Some (Text s) -> html_of_text s
+
| Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s
+
| Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h
+
| Some (Mime _) | Some (Src _) | None -> (
+
match e.summary with
+
| Some (Text s) -> html_of_text s
+
| Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s
+
| Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h
+
| None -> Soup.parse "")
+
in
+
let author, _ = e.authors in
+
{
+
title = Util.string_of_text_construct e.title;
+
link;
+
date;
+
feed;
+
author = author.name;
+
email = "";
+
content;
+
link_response = None;
+
}
+
+
let post_of_rss2 ~(feed : Feed.t) it =
+
let title, content =
+
match it.Syndic.Rss2.story with
+
| All (t, xmlbase, d) -> (
+
( t,
+
match it.content with
+
| _, "" -> html_of_text ?xmlbase d
+
| xmlbase, c -> html_of_text ?xmlbase c ))
+
| Title t ->
+
let xmlbase, c = it.content in
+
(t, html_of_text ?xmlbase c)
+
| Description (xmlbase, d) -> (
+
( "",
+
match it.content with
+
| _, "" -> html_of_text ?xmlbase d
+
| xmlbase, c -> html_of_text ?xmlbase c ))
+
in
+
let link =
+
match (it.guid, it.link) with
+
| Some u, _ when u.permalink -> Some u.data
+
| _, Some _ -> it.link
+
| Some u, _ ->
+
(* Sometimes the guid is indicated with isPermaLink="false" but is
+
nonetheless the only URL we get (e.g. ocamlpro). *)
+
Some u.data
+
| None, None -> None
+
in
+
{
+
title;
+
link;
+
feed;
+
author = feed.name;
+
email = string_of_option it.author;
+
content;
+
date = it.pubDate;
+
link_response = None;
+
}
+
+
let posts_of_feed c =
+
match c.Feed.content with
+
| Feed.Atom f -> List.map (post_of_atom ~feed:c) f.Syndic.Atom.entries
+
| Feed.Rss2 ch -> List.map (post_of_rss2 ~feed:c) ch.Syndic.Rss2.items
+
+
let mk_entry post =
+
let content = Syndic.Atom.Html (None, Soup.to_string post.content) in
+
let contributors =
+
[ Syndic.Atom.author ~uri:(Uri.of_string post.feed.url) post.feed.name ]
+
in
+
let links =
+
match post.link with
+
| Some l -> [ Syndic.Atom.link ~rel:Syndic.Atom.Alternate l ]
+
| None -> []
+
in
+
(* TODO: include source *)
+
let id =
+
match post.link with
+
| Some l -> l
+
| None -> Uri.of_string (Digest.to_hex (Digest.string post.title))
+
in
+
let authors = (Syndic.Atom.author ~email:post.email post.author, []) in
+
let title : Syndic.Atom.text_construct = Syndic.Atom.Text post.title in
+
let updated =
+
match post.date with
+
(* Atom entry requires a date but RSS2 does not. So if a date
+
* is not available, just capture the current date. *)
+
| None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get
+
| Some d -> d
+
in
+
Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated
+
()
+
+
let mk_entries posts = List.map mk_entry posts
+
+
let get_posts ?n ?(ofs = 0) planet_feeds =
+
let posts = List.concat @@ List.map posts_of_feed planet_feeds in
+
let posts = List.sort post_compare posts in
+
let posts = remove ofs posts in
+
match n with None -> posts | Some n -> take n posts
+
+
(* Fetch the link response and cache it. *)
+
let fetch_link t =
+
match (t.link, t.link_response) with
+
| None, _ -> None
+
| Some _, Some (Ok x) -> Some x
+
| Some _, Some (Error _) -> None
+
| Some link, None -> (
+
try
+
let response = Http.get (Uri.to_string link) in
+
t.link_response <- Some (Ok response);
+
Some response
+
with _exn ->
+
t.link_response <- Some (Error "");
+
None)
+44
stack/river/lib/river.ml
···
+
(*
+
* Copyright (c) 2014, OCaml.org project
+
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+
*
+
* Permission to use, copy, modify, and distribute this software for any
+
* purpose with or without fee is hereby granted, provided that the above
+
* copyright notice and this permission notice appear in all copies.
+
*
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
*)
+
+
type source = Feed.source = { name : string; url : string }
+
type feed = Feed.t
+
type post = Post.t
+
+
let fetch = Feed.fetch
+
let name feed = feed.Feed.name
+
let url feed = feed.Feed.url
+
let posts feeds = Post.get_posts feeds
+
let title post = post.Post.title
+
let link post = post.Post.link
+
let date post = post.Post.date
+
let feed post = post.Post.feed
+
let author post = post.Post.author
+
let email post = post.Post.email
+
let content post = Soup.to_string post.Post.content
+
+
let meta_description post =
+
match Post.fetch_link post with
+
| None -> None
+
| Some response -> Meta.description response
+
+
let seo_image post =
+
match Post.fetch_link post with
+
| None -> None
+
| Some response -> Meta.preview_image response
+
+
let create_atom_entries = Post.mk_entries
+73
stack/river/lib/river.mli
···
+
(*
+
* Copyright (c) 2014, OCaml.org project
+
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+
*
+
* Permission to use, copy, modify, and distribute this software for any
+
* purpose with or without fee is hereby granted, provided that the above
+
* copyright notice and this permission notice appear in all copies.
+
*
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
*)
+
+
type source = { name : string; url : string }
+
(** The source of a feed. *)
+
+
type feed
+
type post
+
+
val fetch : source -> feed
+
(** [fetch source] returns an Atom or RSS feed from a source. *)
+
+
val name : feed -> string
+
(** [name feed] is the name of the feed source passed to [fetch]. *)
+
+
val url : feed -> string
+
(** [url feed] is the url of the feed source passed to [fetch]. *)
+
+
val posts : feed list -> post list
+
(** [posts feeds] is the list of deduplicated posts of the given feeds. *)
+
+
val feed : post -> feed
+
(** [feed post] is the feed the post originates from. *)
+
+
val title : post -> string
+
(** [title post] is the title of the post. *)
+
+
val link : post -> Uri.t option
+
(** [link post] is the link of the post. *)
+
+
val date : post -> Syndic.Date.t option
+
(** [date post] is the date of the post. *)
+
+
val author : post -> string
+
(** [author post] is the author of the post. *)
+
+
val email : post -> string
+
(** [email post] is the email of the post. *)
+
+
val content : post -> string
+
(** [content post] is the content of the post. *)
+
+
val meta_description : post -> string option
+
(** [meta_description post] is the meta description of the post on the origin
+
site.
+
+
To get the meta description, we make get the content of [link post] and look
+
for an HTML meta tag with the name "description" or "og:description".*)
+
+
val seo_image : post -> string option
+
(** [seo_image post] is the image to be used by social networks and links to the
+
post.
+
+
To get the seo image, we make get the content of [link post] and look for an
+
HTML meta tag with the name "og:image" or "twitter:image". *)
+
+
val create_atom_entries : post list -> Syndic.Atom.entry list
+
(** [create_atom_feed posts] creates a list of atom entries, which can then be
+
used to create an atom feed that is an aggregate of the posts. *)
+33
stack/river/lib/util.ml
···
+
(*
+
* Copyright (c) 2014, OCaml.org project
+
* Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
+
*
+
* Permission to use, copy, modify, and distribute this software for any
+
* purpose with or without fee is hereby granted, provided that the above
+
* copyright notice and this permission notice appear in all copies.
+
*
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
*)
+
+
open Syndic
+
+
(* Remove all tags *)
+
let rec syndic_to_buffer b = function
+
| XML.Node (_, _, subs) -> List.iter (syndic_to_buffer b) subs
+
| XML.Data (_, d) -> Buffer.add_string b d
+
+
let syndic_to_string x =
+
let b = Buffer.create 1024 in
+
List.iter (syndic_to_buffer b) x;
+
Buffer.contents b
+
+
let string_of_text_construct : Atom.text_construct -> string = function
+
(* FIXME: we probably would like to parse the HTML and remove the tags *)
+
| Atom.Text s | Atom.Html (_, s) -> s
+
| Atom.Xhtml (_, x) -> syndic_to_string x
+37
stack/river/river.opam
···
+
# This file is generated by dune, edit dune-project instead
+
opam-version: "2.0"
+
synopsis: "RSS2 and Atom feed aggregator for OCaml"
+
description: "RSS2 and Atom feed aggregator for OCaml"
+
maintainer: ["KC Sivaramakrishnan <sk826@cl.cam.ac.uk>"]
+
authors: ["KC Sivaramakrishnan <sk826@cl.cam.ac.uk>"]
+
license: "MIT"
+
homepage: "https://github.com/kayceesrk/river"
+
doc: "https://kayceesrk.github.io/river/"
+
bug-reports: "https://github.com/kayceesrk/river/issues"
+
depends: [
+
"ocaml" {>= "4.08.0"}
+
"dune" {>= "3.0"}
+
"syndic" {>= "1.5"}
+
"cohttp" {>= "5.0.0"}
+
"cohttp-lwt" {>= "5.0.0"}
+
"cohttp-lwt-unix" {>= "5.0.0"}
+
"ptime"
+
"lwt"
+
"lambdasoup"
+
"odoc" {with-doc}
+
]
+
build: [
+
["dune" "subst"] {dev}
+
[
+
"dune"
+
"build"
+
"-p"
+
name
+
"-j"
+
jobs
+
"@install"
+
"@runtest" {with-test}
+
"@doc" {with-doc}
+
]
+
]
+
dev-repo: "git+https://github.com/kayceesrk/river.git"