My agentic slop goes here. Not intended for anyone else!
1(** Resolve a DOI from a Zotero translation server *)
2
3(* From the ZTS source code: https://github.com/zotero/translation-server/blob/master/src/formats.js
4 bibtex: "9cb70025-a888-4a29-a210-93ec52da40d4",
5 biblatex: "b6e39b57-8942-4d11-8259-342c46ce395f",
6 bookmarks: "4e7119e0-02be-4848-86ef-79a64185aad8",
7 coins: "05d07af9-105a-4572-99f6-a8e231c0daef",
8 csljson: "bc03b4fe-436d-4a1f-ba59-de4d2d7a63f7",
9 csv: "25f4c5e2-d790-4daa-a667-797619c7e2f2",
10 endnote_xml: "eb7059a4-35ec-4961-a915-3cf58eb9784b",
11 evernote: "18dd188a-9afc-4cd6-8775-1980c3ce0fbf",
12 mods: "0e2235e7-babf-413c-9acf-f27cce5f059c",
13 rdf_bibliontology: "14763d25-8ba0-45df-8f52-b8d1108e7ac9",
14 rdf_dc: "6e372642-ed9d-4934-b5d1-c11ac758ebb7",
15 rdf_zotero: "14763d24-8ba0-45df-8f52-b8d1108e7ac9",
16 refer: "881f60f2-0802-411a-9228-ce5f47b64c7d",
17 refworks_tagged: "1a3506da-a303-4b0a-a1cd-f216e6138d86",
18 ris: "32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7",
19 tei: "032ae9b7-ab90-9205-a479-baf81f49184a",
20 wikipedia: "3f50aaac-7acc-4350-acd0-59cb77faf620"
21 *)
22type format =
23 | Bibtex
24 | Biblatex
25 | Bookmarks
26 | Coins
27 | Csljson
28 | Csv
29 | Endnote_xml
30 | Evernote
31 | Mods
32 | Rdf_bibliontology
33 | Rdf_dc
34 | Rdf_zotero
35 | Refer
36 | Refworks_tagged
37 | Ris
38 | Tei
39 | Wikipedia
40
41let format_to_string = function
42 | Bibtex -> "bibtex"
43 | Biblatex -> "biblatex"
44 | Bookmarks -> "bookmarks"
45 | Coins -> "coins"
46 | Csljson -> "csljson"
47 | Csv -> "csv"
48 | Endnote_xml -> "endnote_xml"
49 | Evernote -> "evernote"
50 | Mods -> "mods"
51 | Rdf_bibliontology -> "rdf_bibliontology"
52 | Rdf_dc -> "rdf_dc"
53 | Rdf_zotero -> "rdf_zotero"
54 | Refer -> "refer"
55 | Refworks_tagged -> "refworks_tagged"
56 | Ris -> "ris"
57 | Tei -> "tei"
58 | Wikipedia -> "wikipedia"
59
60let format_of_string = function
61 | "bibtex" -> Some Bibtex
62 | "biblatex" -> Some Biblatex
63 | "bookmarks" -> Some Bookmarks
64 | "coins" -> Some Coins
65 | "csljson" -> Some Csljson
66 | "csv" -> Some Csv
67 | "endnote_xml" -> Some Endnote_xml
68 | "evernote" -> Some Evernote
69 | "mods" -> Some Mods
70 | "rdf_bibliontology" -> Some Rdf_bibliontology
71 | "rdf_dc" -> Some Rdf_dc
72 | "rdf_zotero" -> Some Rdf_zotero
73 | "refer" -> Some Refer
74 | "refworks_tagged" -> Some Refworks_tagged
75 | "ris" -> Some Ris
76 | "tei" -> Some Tei
77 | "wikipedia" -> Some Wikipedia
78 | _ -> None
79
80let web_endp base_uri =
81 match String.ends_with ~suffix:"/" base_uri with
82 | true -> Uri.of_string (base_uri ^ "web")
83 | false -> Uri.of_string (base_uri ^ "/web")
84
85let export_endp base_uri =
86 match String.ends_with ~suffix:"/" base_uri with
87 | true -> Uri.of_string (base_uri ^ "export")
88 | false -> Uri.of_string (base_uri ^ "/export")
89
90let search_endp base_uri =
91 match String.ends_with ~suffix:"/" base_uri with
92 | true -> Uri.of_string (base_uri ^ "search")
93 | false -> Uri.of_string (base_uri ^ "/search")
94
95let _import_endp base_uri =
96 match String.ends_with ~suffix:"/" base_uri with
97 | true -> Uri.of_string (base_uri ^ "import")
98 | false -> Uri.of_string (base_uri ^ "/import")
99
100type ('clock, 'net) t = {
101 base_uri: string;
102 requests_session: ('clock, 'net) Requests.t;
103}
104
105let create ~requests_session base_uri =
106 { base_uri; requests_session }
107
108let resolve_doi { base_uri; requests_session } doi =
109 let body_str = "https://doi.org/" ^ doi in
110 let uri = web_endp base_uri in
111 let body = Requests.Body.text body_str in
112 let response = Requests.post requests_session ~body (Uri.to_string uri) in
113 let status = Requests.Response.status_code response in
114 let body = Requests.Response.body response |> Eio.Flow.read_all in
115 if status = 200 then begin
116 try
117 match Jsont_bytesrw.decode_string' Jsont.json body with
118 | Ok doi_json -> Ok doi_json
119 | Error e -> Error (`Msg (Jsont.Error.to_string e))
120 with exn -> Error (`Msg (Printexc.to_string exn))
121 end else
122 Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body))
123
124let resolve_url { base_uri; requests_session } url =
125 let body_str = url in
126 let uri = web_endp base_uri in
127 let body = Requests.Body.text body_str in
128 let response = Requests.post requests_session ~body (Uri.to_string uri) in
129 let status = Requests.Response.status_code response in
130 let body = Requests.Response.body response |> Eio.Flow.read_all in
131 if status = 200 then begin
132 try
133 match Jsont_bytesrw.decode_string' Jsont.json body with
134 | Ok url_json -> Ok url_json
135 | Error e -> Error (`Msg (Jsont.Error.to_string e))
136 with exn -> Error (`Msg (Printexc.to_string exn))
137 end else
138 Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body))
139
140let search_id { base_uri; requests_session } doi =
141 let body_str = "https://doi.org/" ^ doi in
142 let uri = search_endp base_uri in
143 let body = Requests.Body.text body_str in
144 let response = Requests.post requests_session ~body (Uri.to_string uri) in
145 let status = Requests.Response.status_code response in
146 let body = Requests.Response.body response |> Eio.Flow.read_all in
147 if status = 200 then begin
148 try
149 match Jsont_bytesrw.decode_string' Jsont.json body with
150 | Ok doi_json -> Ok doi_json
151 | Error e -> Error (`Msg (Jsont.Error.to_string e))
152 with exn -> Error (`Msg (Printexc.to_string exn))
153 end else
154 Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body))
155
156let export { base_uri; requests_session } format api =
157 match Jsont_bytesrw.encode_string' Jsont.json api with
158 | Error e -> Error (`Msg (Jsont.Error.to_string e))
159 | Ok body_str ->
160 let uri = Uri.with_query' (export_endp base_uri ) ["format", (format_to_string format)] in
161 let body = Requests.Body.of_string Requests.Mime.json body_str in
162 let response = Requests.post requests_session ~body (Uri.to_string uri) in
163 let status = Requests.Response.status_code response in
164 let body = Requests.Response.body response |> Eio.Flow.read_all in
165 if status = 200 then begin
166 try
167 match format with
168 | Bibtex -> Ok (Astring.String.trim body)
169 | _ -> Ok body
170 with exn -> Error (`Msg (Printexc.to_string exn))
171 end else
172 Error (`Msg (Format.asprintf "Unexpected HTTP status: %d for %s" status body))
173
174let unescape_hex s =
175 let buf = Buffer.create (String.length s) in
176 let rec aux i =
177 if i >= String.length s then
178 Buffer.contents buf
179 else
180 if s.[i] = '\\' && i+3 < String.length s && s.[i+1] = 'x' then
181 let hex = String.sub s (i+2) 2 in
182 let char_code = int_of_string ("0x" ^ hex) in
183 Buffer.add_char buf (char_of_int char_code);
184 aux (i+4)
185 else begin
186 Buffer.add_char buf s.[i];
187 aux (i+1)
188 end
189 in aux 0
190
191let unescape_bibtex s =
192 unescape_hex s |>
193 String.split_on_char '{' |> String.concat "" |>
194 String.split_on_char '}' |> String.concat ""
195
196let fields_of_bib bib =
197 match Bibtex.of_string bib with
198 | Error e ->
199 prerr_endline bib;
200 Fmt.epr "%a\n%!" Bibtex.pp_error e;
201 failwith "bib parse err TODO"
202 | Ok [bib] ->
203 let f = Bibtex.fields bib |> Bibtex.SM.bindings |> List.map (fun (k,v) -> k, (unescape_bibtex v)) in
204 let ty = match Bibtex.type' bib with "inbook" -> "book" | x -> x in
205 let v = List.fold_left (fun acc (k,v) -> ((k, Jsont.Meta.none), Jsont.String (v, Jsont.Meta.none))::acc)
206 [(("bibtype", Jsont.Meta.none), Jsont.String (ty, Jsont.Meta.none))] f in
207 v
208 | Ok _ -> failwith "one bib at a time plz"
209
210let bib_of_doi zt doi =
211 prerr_endline ("Fetching " ^ doi);
212 match resolve_doi zt doi with
213 | Error (`Msg _) ->
214 Printf.eprintf "%s failed on /web, trying to /search\n%!" doi;
215 begin match search_id zt doi with
216 | Error (`Msg e) -> failwith e
217 | Ok v ->
218 match export zt Bibtex v with
219 | Error (`Msg e) -> failwith e
220 | Ok r ->
221 print_endline r;
222 r
223 end
224 | Ok v ->
225 match export zt Bibtex v with
226 | Error (`Msg e) -> failwith e
227 | Ok r ->
228 print_endline r;
229 r
230
231(* Helper to get string from Jsont.json *)
232let get_string = function
233 | Jsont.String (s, _) -> s
234 | _ -> failwith "Expected string in JSON"
235
236(* Helper to get list from Jsont.json *)
237let get_list f = function
238 | Jsont.Array (arr, _) -> List.map f arr
239 | _ -> failwith "Expected array in JSON"
240
241(* Helper to find a field in Jsont.Object *)
242let find_field name = function
243 | Jsont.Object (mems, _) ->
244 List.find_map (fun ((k, _), v) -> if k = name then Some v else None) mems
245 | _ -> None
246
247(* Helper to get a required field as string *)
248let get_field name json =
249 match find_field name json with
250 | Some v -> get_string v
251 | None -> failwith ("Missing field: " ^ name)
252
253(* Helper to update a field in a Jsont.Object *)
254let update_field name value json =
255 match json with
256 | Jsont.Object (mems, meta) ->
257 let mems' =
258 match value with
259 | None -> List.filter (fun ((k, _), _) -> k <> name) mems
260 | Some v ->
261 let without = List.filter (fun ((k, _), _) -> k <> name) mems in
262 ((name, Jsont.Meta.none), v) :: without
263 in
264 Jsont.Object (mems', meta)
265 | _ -> json
266
267let split_authors keys =
268 let json = Jsont.Object (keys, Jsont.Meta.none) in
269 let author_str = get_field "author" json in
270 let authors =
271 Astring.String.cuts ~empty:false ~sep:" and " author_str |>
272 List.map Bibtex.list_value |>
273 List.map (fun v -> List.rev v |> String.concat " ") |>
274 List.map (fun x -> Jsont.String (x, Jsont.Meta.none))
275 in
276 let keywords =
277 match find_field "keywords" json with
278 | None -> []
279 | Some k ->
280 Astring.String.cuts ~empty:false ~sep:", " (get_string k) |>
281 List.map (fun x -> Jsont.String (x, Jsont.Meta.none))
282 in
283 let json' = update_field "author" (Some (Jsont.Array (authors, Jsont.Meta.none))) json in
284 let json'' = update_field "keywords"
285 (match keywords with [] -> None | _ -> Some (Jsont.Array (keywords, Jsont.Meta.none))) json' in
286 match json'' with
287 | Jsont.Object (mems, _) -> mems
288 | _ -> failwith "Expected object"
289
290let add_bibtex ~slug y =
291 let json = Jsont.Object (y, Jsont.Meta.none) in
292 let find_opt k = find_field k json in
293 let add_if_present k f m =
294 match find_opt k with
295 | Some v -> Bibtex.SM.add k (f v) m
296 | None -> m
297 in
298 let string k m = add_if_present k get_string m in
299 let authors m = add_if_present "author" (fun j -> get_list get_string j |> String.concat " and ") m in
300 let cite_key = Astring.String.map (function '-' -> '_' |x -> x) slug in
301 let fields = Bibtex.SM.empty in
302 let type' = get_field "bibtype" json |> String.lowercase_ascii in
303 let fields = authors fields |> string "title" |> string "doi" |> string "month" |> string "year" |> string "url" in
304 let fields = match type' with
305 | "article" -> string "journal" fields |> string "volume" |> string "number" |> string "pages"
306 | "inproceedings" | "incollection" -> string "booktitle" fields |> string "editor" |> string "address" |> string "series" |>
307 string "number" |> string "volume" |> string "organization" |> string "publisher" |> string "pages"
308 | "book" -> string "editor" fields |> string "publisher" |> string "volume" |> string "pages"
309 | "misc" -> string "howpublished" fields
310 | "techreport" -> string "institution" fields |> string "number" |> string "address"
311 | b -> prerr_endline ("unknown bibtype " ^ b); fields in
312 let bib = Bibtex.v ~type' ~cite_key ~fields () |> Fmt.str "%a" Bibtex.pp in
313 match update_field "bib" (Some (Jsont.String (bib, Jsont.Meta.none))) json with
314 | Jsont.Object (mems, _) -> mems
315 | _ -> failwith "Expected object"
316
317let json_of_doi zt ~slug doi =
318 let x = bib_of_doi zt doi in
319 let x = fields_of_bib x in
320 split_authors x |> add_bibtex ~slug