My agentic slop goes here. Not intended for anyone else!
at main 2.7 kB view raw
1(* 2 * Copyright (c) 2014, OCaml.org project 3 * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 *) 17 18(** Internal utility for HTML meta tag extraction *) 19 20[@@@warning "-32"] (* Suppress unused value warnings for internal utilities *) 21 22(** This module determines an image to be used as preview of a website. 23 24 It does this by following the same logic Google+ and other websites use, and 25 described in this article: 26 https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *) 27 28let og_image html = 29 let open Soup in 30 let soup = parse html in 31 try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some 32 with Failure _ -> None 33 34let image_src html = 35 let open Soup in 36 let soup = parse html in 37 try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some 38 with Failure _ -> None 39 40let twitter_image html = 41 let open Soup in 42 let soup = parse html in 43 try 44 soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content" 45 |> Option.some 46 with Failure _ -> None 47 48let og_description html = 49 let open Soup in 50 let soup = parse html in 51 try 52 soup $ "meta[property=og:description]" |> R.attribute "content" 53 |> Option.some 54 with Failure _ -> None 55 56let description html = 57 let open Soup in 58 let soup = parse html in 59 try 60 soup $ "meta[property=description]" |> R.attribute "content" |> Option.some 61 with Failure _ -> None 62 63let preview_image html = 64 let preview_image = 65 match og_image html with 66 | None -> ( 67 match image_src html with 68 | None -> twitter_image html 69 | Some x -> Some x) 70 | Some x -> Some x 71 in 72 match Option.map String.trim preview_image with 73 | Some "" -> None 74 | Some x -> Some x 75 | None -> None 76 77let description html = 78 let preview_image = 79 match og_description html with None -> description html | Some x -> Some x 80 in 81 match Option.map String.trim preview_image with 82 | Some "" -> None 83 | Some x -> Some x 84 | None -> None