My agentic slop goes here. Not intended for anyone else!
at main 12 kB view raw
1(** Resolve a DOI from a Zotero translation server *) 2 3(* From the ZTS source code: https://github.com/zotero/translation-server/blob/master/src/formats.js 4 bibtex: "9cb70025-a888-4a29-a210-93ec52da40d4", 5 biblatex: "b6e39b57-8942-4d11-8259-342c46ce395f", 6 bookmarks: "4e7119e0-02be-4848-86ef-79a64185aad8", 7 coins: "05d07af9-105a-4572-99f6-a8e231c0daef", 8 csljson: "bc03b4fe-436d-4a1f-ba59-de4d2d7a63f7", 9 csv: "25f4c5e2-d790-4daa-a667-797619c7e2f2", 10 endnote_xml: "eb7059a4-35ec-4961-a915-3cf58eb9784b", 11 evernote: "18dd188a-9afc-4cd6-8775-1980c3ce0fbf", 12 mods: "0e2235e7-babf-413c-9acf-f27cce5f059c", 13 rdf_bibliontology: "14763d25-8ba0-45df-8f52-b8d1108e7ac9", 14 rdf_dc: "6e372642-ed9d-4934-b5d1-c11ac758ebb7", 15 rdf_zotero: "14763d24-8ba0-45df-8f52-b8d1108e7ac9", 16 refer: "881f60f2-0802-411a-9228-ce5f47b64c7d", 17 refworks_tagged: "1a3506da-a303-4b0a-a1cd-f216e6138d86", 18 ris: "32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7", 19 tei: "032ae9b7-ab90-9205-a479-baf81f49184a", 20 wikipedia: "3f50aaac-7acc-4350-acd0-59cb77faf620" 21 *) 22type format = 23 | Bibtex 24 | Biblatex 25 | Bookmarks 26 | Coins 27 | Csljson 28 | Csv 29 | Endnote_xml 30 | Evernote 31 | Mods 32 | Rdf_bibliontology 33 | Rdf_dc 34 | Rdf_zotero 35 | Refer 36 | Refworks_tagged 37 | Ris 38 | Tei 39 | Wikipedia 40 41let format_to_string = function 42 | Bibtex -> "bibtex" 43 | Biblatex -> "biblatex" 44 | Bookmarks -> "bookmarks" 45 | Coins -> "coins" 46 | Csljson -> "csljson" 47 | Csv -> "csv" 48 | Endnote_xml -> "endnote_xml" 49 | Evernote -> "evernote" 50 | Mods -> "mods" 51 | Rdf_bibliontology -> "rdf_bibliontology" 52 | Rdf_dc -> "rdf_dc" 53 | Rdf_zotero -> "rdf_zotero" 54 | Refer -> "refer" 55 | Refworks_tagged -> "refworks_tagged" 56 | Ris -> "ris" 57 | Tei -> "tei" 58 | Wikipedia -> "wikipedia" 59 60let format_of_string = function 61 | "bibtex" -> Some Bibtex 62 | "biblatex" -> Some Biblatex 63 | "bookmarks" -> Some Bookmarks 64 | "coins" -> Some Coins 65 | "csljson" -> Some Csljson 66 | "csv" -> Some Csv 67 | "endnote_xml" -> Some Endnote_xml 68 | "evernote" -> Some Evernote 69 | "mods" -> Some Mods 70 | "rdf_bibliontology" -> Some Rdf_bibliontology 71 | "rdf_dc" -> Some Rdf_dc 72 | "rdf_zotero" -> Some Rdf_zotero 73 | "refer" -> Some Refer 74 | "refworks_tagged" -> Some Refworks_tagged 75 | "ris" -> Some Ris 76 | "tei" -> Some Tei 77 | "wikipedia" -> Some Wikipedia 78 | _ -> None 79 80let web_endp base_uri = 81 match String.ends_with ~suffix:"/" base_uri with 82 | true -> Uri.of_string (base_uri ^ "web") 83 | false -> Uri.of_string (base_uri ^ "/web") 84 85let export_endp base_uri = 86 match String.ends_with ~suffix:"/" base_uri with 87 | true -> Uri.of_string (base_uri ^ "export") 88 | false -> Uri.of_string (base_uri ^ "/export") 89 90let search_endp base_uri = 91 match String.ends_with ~suffix:"/" base_uri with 92 | true -> Uri.of_string (base_uri ^ "search") 93 | false -> Uri.of_string (base_uri ^ "/search") 94 95let _import_endp base_uri = 96 match String.ends_with ~suffix:"/" base_uri with 97 | true -> Uri.of_string (base_uri ^ "import") 98 | false -> Uri.of_string (base_uri ^ "/import") 99 100type ('clock, 'net) t = { 101 base_uri: string; 102 requests_session: ('clock, 'net) Requests.t; 103} 104 105let create ~requests_session base_uri = 106 { base_uri; requests_session } 107 108let resolve_doi { base_uri; requests_session } doi = 109 let body_str = "https://doi.org/" ^ doi in 110 let uri = web_endp base_uri in 111 let body = Requests.Body.text body_str in 112 let response = Requests.post requests_session ~body (Uri.to_string uri) in 113 let status = Requests.Response.status_code response in 114 let body = Requests.Response.body response |> Eio.Flow.read_all in 115 if status = 200 then begin 116 try 117 match Jsont_bytesrw.decode_string' Jsont.json body with 118 | Ok doi_json -> Ok doi_json 119 | Error e -> Error (`Msg (Jsont.Error.to_string e)) 120 with exn -> Error (`Msg (Printexc.to_string exn)) 121 end else 122 Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body)) 123 124let resolve_url { base_uri; requests_session } url = 125 let body_str = url in 126 let uri = web_endp base_uri in 127 let body = Requests.Body.text body_str in 128 let response = Requests.post requests_session ~body (Uri.to_string uri) in 129 let status = Requests.Response.status_code response in 130 let body = Requests.Response.body response |> Eio.Flow.read_all in 131 if status = 200 then begin 132 try 133 match Jsont_bytesrw.decode_string' Jsont.json body with 134 | Ok url_json -> Ok url_json 135 | Error e -> Error (`Msg (Jsont.Error.to_string e)) 136 with exn -> Error (`Msg (Printexc.to_string exn)) 137 end else 138 Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body)) 139 140let search_id { base_uri; requests_session } doi = 141 let body_str = "https://doi.org/" ^ doi in 142 let uri = search_endp base_uri in 143 let body = Requests.Body.text body_str in 144 let response = Requests.post requests_session ~body (Uri.to_string uri) in 145 let status = Requests.Response.status_code response in 146 let body = Requests.Response.body response |> Eio.Flow.read_all in 147 if status = 200 then begin 148 try 149 match Jsont_bytesrw.decode_string' Jsont.json body with 150 | Ok doi_json -> Ok doi_json 151 | Error e -> Error (`Msg (Jsont.Error.to_string e)) 152 with exn -> Error (`Msg (Printexc.to_string exn)) 153 end else 154 Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body)) 155 156let export { base_uri; requests_session } format api = 157 match Jsont_bytesrw.encode_string' Jsont.json api with 158 | Error e -> Error (`Msg (Jsont.Error.to_string e)) 159 | Ok body_str -> 160 let uri = Uri.with_query' (export_endp base_uri ) ["format", (format_to_string format)] in 161 let body = Requests.Body.of_string Requests.Mime.json body_str in 162 let response = Requests.post requests_session ~body (Uri.to_string uri) in 163 let status = Requests.Response.status_code response in 164 let body = Requests.Response.body response |> Eio.Flow.read_all in 165 if status = 200 then begin 166 try 167 match format with 168 | Bibtex -> Ok (Astring.String.trim body) 169 | _ -> Ok body 170 with exn -> Error (`Msg (Printexc.to_string exn)) 171 end else 172 Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body)) 173 174let unescape_hex s = 175 let buf = Buffer.create (String.length s) in 176 let rec aux i = 177 if i >= String.length s then 178 Buffer.contents buf 179 else 180 if s.[i] = '\\' && i+3 < String.length s && s.[i+1] = 'x' then 181 let hex = String.sub s (i+2) 2 in 182 let char_code = int_of_string ("0x" ^ hex) in 183 Buffer.add_char buf (char_of_int char_code); 184 aux (i+4) 185 else begin 186 Buffer.add_char buf s.[i]; 187 aux (i+1) 188 end 189 in aux 0 190 191let unescape_bibtex s = 192 unescape_hex s |> 193 String.split_on_char '{' |> String.concat "" |> 194 String.split_on_char '}' |> String.concat "" 195 196let fields_of_bib bib = 197 match Bibtex.of_string bib with 198 | Error e -> 199 prerr_endline bib; 200 Fmt.epr "%a\n%!" Bibtex.pp_error e; 201 failwith "bib parse err TODO" 202 | Ok [bib] -> 203 let f = Bibtex.fields bib |> Bibtex.SM.bindings |> List.map (fun (k,v) -> k, (unescape_bibtex v)) in 204 let ty = match Bibtex.type' bib with "inbook" -> "book" | x -> x in 205 let v = List.fold_left (fun acc (k,v) -> ((k, Jsont.Meta.none), Jsont.String (v, Jsont.Meta.none))::acc) 206 [(("bibtype", Jsont.Meta.none), Jsont.String (ty, Jsont.Meta.none))] f in 207 v 208 | Ok _ -> failwith "one bib at a time plz" 209 210let bib_of_doi zt doi = 211 prerr_endline ("Fetching " ^ doi); 212 match resolve_doi zt doi with 213 | Error (`Msg _) -> 214 Printf.eprintf "%s failed on /web, trying to /search\n%!" doi; 215 begin match search_id zt doi with 216 | Error (`Msg e) -> failwith e 217 | Ok v -> 218 match export zt Bibtex v with 219 | Error (`Msg e) -> failwith e 220 | Ok r -> 221 print_endline r; 222 r 223 end 224 | Ok v -> 225 match export zt Bibtex v with 226 | Error (`Msg e) -> failwith e 227 | Ok r -> 228 print_endline r; 229 r 230 231(* Helper to get string from Jsont.json *) 232let get_string = function 233 | Jsont.String (s, _) -> s 234 | _ -> failwith "Expected string in JSON" 235 236(* Helper to get list from Jsont.json *) 237let get_list f = function 238 | Jsont.Array (arr, _) -> List.map f arr 239 | _ -> failwith "Expected array in JSON" 240 241(* Helper to find a field in Jsont.Object *) 242let find_field name = function 243 | Jsont.Object (mems, _) -> 244 List.find_map (fun ((k, _), v) -> if k = name then Some v else None) mems 245 | _ -> None 246 247(* Helper to get a required field as string *) 248let get_field name json = 249 match find_field name json with 250 | Some v -> get_string v 251 | None -> failwith ("Missing field: " ^ name) 252 253(* Helper to update a field in a Jsont.Object *) 254let update_field name value json = 255 match json with 256 | Jsont.Object (mems, meta) -> 257 let mems' = 258 match value with 259 | None -> List.filter (fun ((k, _), _) -> k <> name) mems 260 | Some v -> 261 let without = List.filter (fun ((k, _), _) -> k <> name) mems in 262 ((name, Jsont.Meta.none), v) :: without 263 in 264 Jsont.Object (mems', meta) 265 | _ -> json 266 267let split_authors keys = 268 let json = Jsont.Object (keys, Jsont.Meta.none) in 269 let author_str = get_field "author" json in 270 let authors = 271 Astring.String.cuts ~empty:false ~sep:" and " author_str |> 272 List.map Bibtex.list_value |> 273 List.map (fun v -> List.rev v |> String.concat " ") |> 274 List.map (fun x -> Jsont.String (x, Jsont.Meta.none)) 275 in 276 let keywords = 277 match find_field "keywords" json with 278 | None -> [] 279 | Some k -> 280 Astring.String.cuts ~empty:false ~sep:", " (get_string k) |> 281 List.map (fun x -> Jsont.String (x, Jsont.Meta.none)) 282 in 283 let json' = update_field "author" (Some (Jsont.Array (authors, Jsont.Meta.none))) json in 284 let json'' = update_field "keywords" 285 (match keywords with [] -> None | _ -> Some (Jsont.Array (keywords, Jsont.Meta.none))) json' in 286 match json'' with 287 | Jsont.Object (mems, _) -> mems 288 | _ -> failwith "Expected object" 289 290let add_bibtex ~slug y = 291 let json = Jsont.Object (y, Jsont.Meta.none) in 292 let find_opt k = find_field k json in 293 let add_if_present k f m = 294 match find_opt k with 295 | Some v -> Bibtex.SM.add k (f v) m 296 | None -> m 297 in 298 let string k m = add_if_present k get_string m in 299 let authors m = add_if_present "author" (fun j -> get_list get_string j |> String.concat " and ") m in 300 let cite_key = Astring.String.map (function '-' -> '_' |x -> x) slug in 301 let fields = Bibtex.SM.empty in 302 let type' = get_field "bibtype" json |> String.lowercase_ascii in 303 let fields = authors fields |> string "title" |> string "doi" |> string "month" |> string "year" |> string "url" in 304 let fields = match type' with 305 | "article" -> string "journal" fields |> string "volume" |> string "number" |> string "pages" 306 | "inproceedings" | "incollection" -> string "booktitle" fields |> string "editor" |> string "address" |> string "series" |> 307 string "number" |> string "volume" |> string "organization" |> string "publisher" |> string "pages" 308 | "book" -> string "editor" fields |> string "publisher" |> string "volume" |> string "pages" 309 | "misc" -> string "howpublished" fields 310 | "techreport" -> string "institution" fields |> string "number" |> string "address" 311 | b -> prerr_endline ("unknown bibtype " ^ b); fields in 312 let bib = Bibtex.v ~type' ~cite_key ~fields () |> Fmt.str "%a" Bibtex.pp in 313 match update_field "bib" (Some (Jsont.String (bib, Jsont.Meta.none))) json with 314 | Jsont.Object (mems, _) -> mems 315 | _ -> failwith "Expected object" 316 317let json_of_doi zt ~slug doi = 318 let x = bib_of_doi zt doi in 319 let x = fields_of_bib x in 320 split_authors x |> add_bibtex ~slug