My agentic slop goes here. Not intended for anyone else!
1(*
2 * Copyright (c) 2014, OCaml.org project
3 * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *)
17
18(** Internal utility for HTML meta tag extraction *)
19
20[@@@warning "-32"] (* Suppress unused value warnings for internal utilities *)
21
22(** This module determines an image to be used as preview of a website.
23
24 It does this by following the same logic Google+ and other websites use, and
25 described in this article:
26 https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *)
27
28let og_image html =
29 let open Soup in
30 let soup = parse html in
31 try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some
32 with Failure _ -> None
33
34let image_src html =
35 let open Soup in
36 let soup = parse html in
37 try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some
38 with Failure _ -> None
39
40let twitter_image html =
41 let open Soup in
42 let soup = parse html in
43 try
44 soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content"
45 |> Option.some
46 with Failure _ -> None
47
48let og_description html =
49 let open Soup in
50 let soup = parse html in
51 try
52 soup $ "meta[property=og:description]" |> R.attribute "content"
53 |> Option.some
54 with Failure _ -> None
55
56let description html =
57 let open Soup in
58 let soup = parse html in
59 try
60 soup $ "meta[property=description]" |> R.attribute "content" |> Option.some
61 with Failure _ -> None
62
63let preview_image html =
64 let preview_image =
65 match og_image html with
66 | None -> (
67 match image_src html with
68 | None -> twitter_image html
69 | Some x -> Some x)
70 | Some x -> Some x
71 in
72 match Option.map String.trim preview_image with
73 | Some "" -> None
74 | Some x -> Some x
75 | None -> None
76
77let description html =
78 let preview_image =
79 match og_description html with None -> description html | Some x -> Some x
80 in
81 match Option.map String.trim preview_image with
82 | Some "" -> None
83 | Some x -> Some x
84 | None -> None