commit bf5d789370fac19a58db376436f3f8a6ab95bdbe · anil.recoil.org/slop

stack/zotero-translation/.gitignore

stack/zotero-translation/.ocamlformat

···

       1
       1
       +
       version=0.26.2

stack/zotero-translation/README.md

···

       1
       1
       +
       Partial OCaml interface to the [Zotero Translation Server](https://github.com/zotero/translation-server).

     

       2
       2
       +
       

     

       3
       3
       +
       This one uses Lwt so it's compatible with 5.2.0+jst (which doesn't have effects yet).

     

       4
       4
       +
       

     

       5
       5
       +
       It also vendors in a chunk of B0 to be standalone.

     

       6
       6
       +
       

     

       7
       7
       +
       This isn't really suitable for use by anyone yet except for Anil's personal website.

+639

stack/zotero-translation/bibtex.ml

···

       1
       1
       +
       (*---------------------------------------------------------------------------

     

       2
       2
       +
          Copyright (c) 2019 University of Bern. All rights reserved.

     

       3
       3
       +
          Distributed under the ISC license, see terms at the end of the file.

     

       4
       4
       +
         ---------------------------------------------------------------------------*)

     

       5
       5
       +
       

     

       6
       6
       +
       [@@@warning "-27-32-33-34"]

     

       7
       7
       +
       

     

       8
       8
       +
       module SM = Map.Make(String)

     

       9
       9
       +
       

     

       10
       10
       +
       module Err_msg = struct

     

       11
       11
       +
         let pf = Format.fprintf

     

       12
       12
       +
         let pp_sp = Format.pp_print_space

     

       13
       13
       +
         let pp_nop _ () = ()

     

       14
       14
       +
         let pp_any fmt ppf _ = pf ppf fmt

     

       15
       15
       +
       

     

       16
       16
       +
         let pp_op_enum op ?(empty = pp_nop) pp_v ppf = function

     

       17
       17
       +
         | [] -> empty ppf ()

     

       18
       18
       +
         | [v] -> pp_v ppf v

     

       19
       19
       +
         | _ as vs ->

     

       20
       20
       +
             let rec loop ppf = function

     

       21
       21
       +
             | [v0; v1] -> pf ppf "%a@ %s@ %a" pp_v v0 op pp_v v1

     

       22
       22
       +
             | v :: vs -> pf ppf "%a,@ " pp_v v; loop ppf vs

     

       23
       23
       +
             | [] -> assert false

     

       24
       24
       +
             in

     

       25
       25
       +
             loop ppf vs

     

       26
       26
       +
       

     

       27
       27
       +
         let pp_and_enum ?empty pp_v ppf vs = pp_op_enum "and" ?empty pp_v ppf vs

     

       28
       28
       +
         let pp_or_enum ?empty pp_v ppf vs = pp_op_enum "or" ?empty pp_v ppf vs

     

       29
       29
       +
         let pp_did_you_mean pp_v ppf = function

     

       30
       30
       +
         | [] -> () | vs -> pf ppf "Did@ you@ mean %a ?" (pp_or_enum pp_v) vs

     

       31
       31
       +
       

     

       32
       32
       +
         let pp_must_be pp_v ppf = function

     

       33
       33
       +
         | [] -> () | vs -> pf ppf "Must be %a." (pp_or_enum pp_v) vs

     

       34
       34
       +
       

     

       35
       35
       +
         let pp_unknown ~kind pp_v ppf v = pf ppf "Unknown %a %a." kind () pp_v v

     

       36
       36
       +
         let pp_unknown' ~kind pp_v ~hint ppf (v, hints) = match hints with

     

       37
       37
       +
         | [] -> pp_unknown ~kind pp_v ppf v

     

       38
       38
       +
         | hints -> pp_unknown ~kind pp_v ppf v; pp_sp ppf (); (hint pp_v) ppf hints

     

       39
       39
       +
       

     

       40
       40
       +
         let min_by f a b = if f a <= f b then a else b

     

       41
       41
       +
         let max_by f a b = if f a <= f b then b else a

     

       42
       42
       +
       

     

       43
       43
       +
         let edit_distance s0 s1 =

     

       44
       44
       +
           let minimum a b c = min a (min b c) in

     

       45
       45
       +
           let s0 = min_by String.length s0 s1     (* row *)

     

       46
       46
       +
           and s1 = max_by String.length s0 s1 in  (* column *)

     

       47
       47
       +
           let m = String.length s0 and n = String.length s1 in

     

       48
       48
       +
           let rec rows row0 row i =

     

       49
       49
       +
             if i > n then row0.(m) else begin

     

       50
       50
       +
               row.(0) <- i;

     

       51
       51
       +
               for j = 1 to m do

     

       52
       52
       +
                 if s0.[j - 1] = s1.[i - 1] then row.(j) <- row0.(j - 1) else

     

       53
       53
       +
                 row.(j) <-minimum (row0.(j - 1) + 1) (row0.(j) + 1) (row.(j - 1) + 1)

     

       54
       54
       +
               done;

     

       55
       55
       +
               rows row row0 (i + 1)

     

       56
       56
       +
             end in

     

       57
       57
       +
           rows (Array.init (m + 1) (fun x -> x)) (Array.make (m + 1) 0) 1

     

       58
       58
       +
       

     

       59
       59
       +
         let suggest ?(dist = 2) candidates s =

     

       60
       60
       +
           let add (min, acc) name =

     

       61
       61
       +
             let d = edit_distance s name in

     

       62
       62
       +
             if d = min then min, (name :: acc) else

     

       63
       63
       +
             if d < min then d, [name] else

     

       64
       64
       +
             min, acc

     

       65
       65
       +
           in

     

       66
       66
       +
           let d, suggs = List.fold_left add (max_int, []) candidates in

     

       67
       67
       +
           if d <= dist (* suggest only if not too far *) then List.rev suggs else []

     

       68
       68
       +
       end

     

       69
       69
       +
       

     

       70
       70
       +
       module Tloc = struct

     

       71
       71
       +
         type fpath = string

     

       72
       72
       +
         let pp_path = Format.pp_print_string

     

       73
       73
       +
       

     

       74
       74
       +
         type pos = int

     

       75
       75
       +
         type line = int

     

       76
       76
       +
         type line_pos = line * pos

     

       77
       77
       +
         (* For lines we keep the byte position just after the newlinexs. It

     

       78
       78
       +
            editors are still expecting tools to compute visual columns which

     

       79
       79
       +
            is stupid.  By keeping these byte positions we can approximate

     

       80
       80
       +
            columns by subtracting the line byte position from the byte

     

       81
       81
       +
            location. This will only be correct on US-ASCII data though. Best

     

       82
       82
       +
            would be to be able to give them [sbyte] and [ebyte]. *)

     

       83
       83
       +
       

     

       84
       84
       +
         let l v = v

     

       85
       85
       +
         type t =

     

       86
       86
       +
           { file : fpath;

     

       87
       87
       +
             sbyte : pos; ebyte : pos;

     

       88
       88
       +
             sline : pos * line; eline : pos * line }

     

       89
       89
       +
       

     

       90
       90
       +
         let no_file = "-"

     

       91
       91
       +
         let v ~file ~sbyte ~ebyte ~sline ~eline = { file; sbyte; ebyte; sline; eline }

     

       92
       92
       +
         let file l = l.file

     

       93
       93
       +
         let sbyte l = l.sbyte

     

       94
       94
       +
         let ebyte l = l.ebyte

     

       95
       95
       +
         let sline l = l.sline

     

       96
       96
       +
         let eline l = l.eline

     

       97
       97
       +
         let nil =

     

       98
       98
       +
           let pnil = -1 in

     

       99
       99
       +
           let lnil = (-1, pnil) in

     

       100
       100
       +
           v ~file:no_file ~sbyte:pnil ~ebyte:pnil ~sline:lnil ~eline:lnil

     

       101
       101
       +
       

     

       102
       102
       +
         let merge l0 l1 =

     

       103
       103
       +
           let sbyte, sline =

     

       104
       104
       +
             if l0.sbyte < l1.sbyte then l0.sbyte, l0.sline else l1.sbyte, l1.sline

     

       105
       105
       +
           in

     

       106
       106
       +
           let ebyte, eline =

     

       107
       107
       +
             if l0.ebyte < l1.ebyte then l1.ebyte, l1.eline else l0.ebyte, l0.eline

     

       108
       108
       +
           in

     

       109
       109
       +
           v ~file:l0.file ~sbyte ~ebyte ~sline ~eline

     

       110
       110
       +
       

     

       111
       111
       +
         let to_start l =

     

       112
       112
       +
           v ~file:l.file ~sbyte:l.sbyte ~ebyte:l.sbyte ~sline:l.sline ~eline:l.sline

     

       113
       113
       +
       

     

       114
       114
       +
         let to_end l =

     

       115
       115
       +
           v ~file:l.file ~sbyte:l.ebyte ~ebyte:l.ebyte ~sline:l.eline ~eline:l.eline

     

       116
       116
       +
       

     

       117
       117
       +
         let restart ~at:s e =

     

       118
       118
       +
           v ~file:e.file ~sbyte:s.sbyte ~ebyte:e.ebyte ~sline:s.sline ~eline:e.eline

     

       119
       119
       +
       

     

       120
       120
       +
         let pf = Format.fprintf

     

       121
       121
       +
         let pp_ocaml ppf l = match l.ebyte < 0 with

     

       122
       122
       +
         | true -> pf ppf "File \"%a\", line n/a, characters n/a" pp_path l.file

     

       123
       123
       +
         | false ->

     

       124
       124
       +
             let pp_lines ppf l = match fst l.sline = fst l.eline with

     

       125
       125
       +
             | true -> pf ppf "line %d" (fst l.sline)

     

       126
       126
       +
             | false -> pf ppf "lines %d-%d" (fst l.sline) (fst l.eline)

     

       127
       127
       +
             in

     

       128
       128
       +
             (* "characters" represent positions (insertion points) not columns *)

     

       129
       129
       +
             let pos_s = l.sbyte - snd l.sline in

     

       130
       130
       +
             let pos_e = l.ebyte - snd l.eline + 1 in

     

       131
       131
       +
             pf ppf "File \"%a\", %a, characters %d-%d"

     

       132
       132
       +
               pp_path l.file pp_lines l pos_s pos_e

     

       133
       133
       +
       

     

       134
       134
       +
         let pp_gnu ppf l = match l.ebyte < 0 with

     

       135
       135
       +
         | true -> pf ppf "%a:" pp_path l.file

     

       136
       136
       +
         | false ->

     

       137
       137
       +
             let pp_lines ppf l =

     

       138
       138
       +
               let col_s = l.sbyte - snd l.sline + 1 in

     

       139
       139
       +
               let col_e = l.ebyte - snd l.eline + 1 in

     

       140
       140
       +
               match fst l.sline = fst l.eline with

     

       141
       141
       +
               | true ->  pf ppf "%d.%d-%d" (fst l.sline) col_s col_e

     

       142
       142
       +
               | false ->

     

       143
       143
       +
                   pf ppf "%d.%d-%d.%d" (fst l.sline) col_s (fst l.eline) col_e

     

       144
       144
       +
             in

     

       145
       145
       +
             pf ppf "%a:%a" pp_path l.file pp_lines l

     

       146
       146
       +
       

     

       147
       147
       +
         let pp_dump ppf l =

     

       148
       148
       +
           pf ppf "[bytes %d;%d][lines %d;%d][lbytes %d;%d]"

     

       149
       149
       +
             l.sbyte l.ebyte (fst l.sline) (fst l.eline) (snd l.sline) (snd l.eline)

     

       150
       150
       +
       

     

       151
       151
       +
         let pp = pp_gnu

     

       152
       152
       +
       end

     

       153
       153
       +
       

     

       154
       154
       +
       module Utf_8 = struct

     

       155
       155
       +
         type case =

     

       156
       156
       +
         | L1 | L2 | L3_E0 | L3_E1_EC_or_EE_EF | L3_ED | L4_F0 | L4_F1_F3 | L4_F4 | E

     

       157
       157
       +
       

     

       158
       158
       +
         let case =

     

       159
       159
       +
       (*

     

       160
       160
       +
         (* See https://tools.ietf.org/html/rfc3629#section-4 *)

     

       161
       161
       +
         Printf.printf "[|";

     

       162
       162
       +
         for i = 0 to 255 do

     

       163
       163
       +
           if i mod 16 = 0 then Printf.printf "\n";

     

       164
       164
       +
           if 0x00 <= i && i <= 0x7F then Printf.printf "L1; " else

     

       165
       165
       +
           if 0xC2 <= i && i <= 0xDF then Printf.printf "L2; " else

     

       166
       166
       +
           if 0xE0 = i then Printf.printf "L3_E0; " else

     

       167
       167
       +
           if 0xE1 <= i && i <= 0xEC || 0xEE <= i && i <= 0xEF

     

       168
       168
       +
           then Printf.printf "L3_E1_EC_or_EE_EF; " else

     

       169
       169
       +
           if 0xED = i then Printf.printf "L3_ED;" else

     

       170
       170
       +
           if 0xF0 = i then Printf.printf "L4_F0; " else

     

       171
       171
       +
           if 0xF1 <= i && i <= 0xF3 then Printf.printf "L4_F1_F3; " else

     

       172
       172
       +
           if 0xF4 = i then Printf.printf "L4_F4; " else

     

       173
       173
       +
           Printf.printf "E; "

     

       174
       174
       +
         done;

     

       175
       175
       +
         Printf.printf "\n|]"

     

       176
       176
       +
       *)

     

       177
       177
       +
         [|

     

       178
       178
       +
           L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;

     

       179
       179
       +
           L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;

     

       180
       180
       +
           L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;

     

       181
       181
       +
           L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;

     

       182
       182
       +
           L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;

     

       183
       183
       +
           L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;

     

       184
       184
       +
           L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;

     

       185
       185
       +
           L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;

     

       186
       186
       +
           E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;

     

       187
       187
       +
           E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;

     

       188
       188
       +
           E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;

     

       189
       189
       +
           E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;

     

       190
       190
       +
           E; E; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;

     

       191
       191
       +
           L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;

     

       192
       192
       +
           L3_E0; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;

     

       193
       193
       +
           L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;

     

       194
       194
       +
           L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;

     

       195
       195
       +
           L3_E1_EC_or_EE_EF; L3_ED;L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;

     

       196
       196
       +
           L4_F0; L4_F1_F3; L4_F1_F3; L4_F1_F3; L4_F4; E; E; E; E; E; E; E; E; E; E; E;

     

       197
       197
       +
         |]

     

       198
       198
       +
       end

     

       199
       199
       +
       

     

       200
       200
       +
       module Tdec = struct

     

       201
       201
       +
         type 'a fmt = Format.formatter -> 'a -> unit

     

       202
       202
       +
         let pp_did_you_mean = Err_msg.pp_did_you_mean

     

       203
       203
       +
         let pp_and_enum = Err_msg.pp_and_enum

     

       204
       204
       +
         let pp_or_enum = Err_msg.pp_or_enum

     

       205
       205
       +
         let pp_did_you_mean = Err_msg.pp_did_you_mean

     

       206
       206
       +
         let pp_must_be = Err_msg.pp_must_be

     

       207
       207
       +
         let pp_unknown = Err_msg.pp_unknown

     

       208
       208
       +
         let pp_unknown' = Err_msg.pp_unknown'

     

       209
       209
       +
       

     

       210
       210
       +
         (* Decoders *)

     

       211
       211
       +
       

     

       212
       212
       +
         type t =

     

       213
       213
       +
           { file : Tloc.fpath; i : string; tok : Buffer.t;

     

       214
       214
       +
             mutable pos : int; mutable line : int; mutable line_pos : int; }

     

       215
       215
       +
       

     

       216
       216
       +
         let create ?(file = Tloc.no_file) i =

     

       217
       217
       +
           { file; i; tok = Buffer.create 255; pos = 0; line = 1; line_pos = 0 }

     

       218
       218
       +
       

     

       219
       219
       +
         (* Location *)

     

       220
       220
       +
       

     

       221
       221
       +
         let file d = d.file

     

       222
       222
       +
         let pos d = d.pos

     

       223
       223
       +
         let line d = d.line, d.line_pos

     

       224
       224
       +
       

     

       225
       225
       +
         let loc d ~sbyte ~ebyte ~sline ~eline =

     

       226
       226
       +
           Tloc.v ~file:d.file ~sbyte ~ebyte ~sline ~eline

     

       227
       227
       +
       

     

       228
       228
       +
         let loc_to_here d ~sbyte ~sline =

     

       229
       229
       +
           loc d ~sbyte ~ebyte:d.pos ~sline ~eline:(d.line, d.line_pos)

     

       230
       230
       +
       

     

       231
       231
       +
         let loc_here d = loc_to_here d ~sbyte:d.pos ~sline:(d.line, d.line_pos)

     

       232
       232
       +
       

     

       233
       233
       +
         (* Errors *)

     

       234
       234
       +
       

     

       235
       235
       +
         exception Err of Tloc.t * string

     

       236
       236
       +
       

     

       237
       237
       +
         let err loc msg = raise_notrace (Err (loc, msg))

     

       238
       238
       +
         let err_to_here d ~sbyte ~sline fmt =

     

       239
       239
       +
           Format.kasprintf (err (loc_to_here d ~sbyte ~sline)) fmt

     

       240
       240
       +
       

     

       241
       241
       +
         let err_here d fmt = Format.kasprintf (err (loc_here d)) fmt

     

       242
       242
       +
         let err_suggest = Err_msg.suggest

     

       243
       243
       +
       

     

       244
       244
       +
         (* Lexing *)

     

       245
       245
       +
       

     

       246
       246
       +
         let incr_line d = match d.i.[d.pos] with (* assert (not (eoi d)) *)

     

       247
       247
       +
         | '\r' -> d.line <- d.line + 1; d.line_pos <- d.pos + 1

     

       248
       248
       +
         | '\n' ->

     

       249
       249
       +
             (if d.pos = 0 || d.i.[d.pos - 1] <> '\r' then d.line <- d.line + 1);

     

       250
       250
       +
             d.line_pos <- d.pos + 1;

     

       251
       251
       +
         | _ -> ()

     

       252
       252
       +
         [@@ ocaml.inline]

     

       253
       253
       +
       

     

       254
       254
       +
         let eoi d = d.pos >= String.length d.i [@@ ocaml.inline]

     

       255
       255
       +
         let byte d = if eoi d then 0xFFFF else Char.code d.i.[d.pos] [@@ ocaml.inline]

     

       256
       256
       +
         let accept_byte d = incr_line d; d.pos <- d.pos + 1

     

       257
       257
       +
         [@@ ocaml.inline]

     

       258
       258
       +
       

     

       259
       259
       +
         let accept_utf_8 accept d =

     

       260
       260
       +
           let err d = match byte d with

     

       261
       261
       +
           | 0xFFFF -> err_here d "UTF-8 decoding error: unexpected end of input"

     

       262
       262
       +
           | b -> err_here d "UTF-8 decoding error: byte %02x illegal here" b

     

       263
       263
       +
           in

     

       264
       264
       +
           let accept_tail d = if (byte d lsr 6 = 0b10) then accept d else err d in

     

       265
       265
       +
           match byte d with

     

       266
       266
       +
           | 0xFFFF -> err d

     

       267
       267
       +
           | b ->

     

       268
       268
       +
               (* If a subsequent [byte d] invocation is 0xFFFF we get to [err]. *)

     

       269
       269
       +
               match Utf_8.case.(b) with

     

       270
       270
       +
               | L1 -> accept d

     

       271
       271
       +
               | L2 -> accept d; accept_tail d

     

       272
       272
       +
               | L3_E0 ->

     

       273
       273
       +
                   accept d;

     

       274
       274
       +
                   if (byte d - 0xA0 < 0xBF - 0xA0) then accept d else err d;

     

       275
       275
       +
                   accept_tail d

     

       276
       276
       +
               | L3_E1_EC_or_EE_EF -> accept d; accept_tail d; accept_tail d

     

       277
       277
       +
               | L3_ED ->

     

       278
       278
       +
                   accept d;

     

       279
       279
       +
                   if (byte d - 0x80 < 0x9F - 0x80) then accept d else err d;

     

       280
       280
       +
                   accept_tail d

     

       281
       281
       +
               | L4_F0 ->

     

       282
       282
       +
                   accept d;

     

       283
       283
       +
                   if (byte d - 0x90 < 0xBF - 0x90) then accept d else err d;

     

       284
       284
       +
                   accept_tail d; accept_tail d

     

       285
       285
       +
               | L4_F1_F3 ->

     

       286
       286
       +
                   accept d;

     

       287
       287
       +
                   accept_tail d; accept_tail d; accept_tail d;

     

       288
       288
       +
               | L4_F4 ->

     

       289
       289
       +
                   accept d;

     

       290
       290
       +
                   if (byte d - 0x80 < 0x8F - 0x80) then accept d else err d;

     

       291
       291
       +
               | E -> err d

     

       292
       292
       +
       

     

       293
       293
       +
         let accept_uchar d = accept_utf_8 accept_byte d

     

       294
       294
       +
       

     

       295
       295
       +
         (* Tokenizer *)

     

       296
       296
       +
       

     

       297
       297
       +
         let tok_reset d = Buffer.reset d.tok [@@ ocaml.inline]

     

       298
       298
       +
         let tok_pop d = let t = Buffer.contents d.tok in tok_reset d; t

     

       299
       299
       +
         [@@ ocaml.inline]

     

       300
       300
       +
       

     

       301
       301
       +
         let tok_accept_byte d =

     

       302
       302
       +
           Buffer.add_char d.tok d.i.[d.pos]; accept_byte d; [@@ ocaml.inline]

     

       303
       303
       +
       

     

       304
       304
       +
         let tok_accept_uchar d = accept_utf_8 tok_accept_byte d [@@ ocaml.inline]

     

       305
       305
       +
         let tok_add_byte d b = Buffer.add_char d.tok (Char.chr b) [@@ ocaml.inline]

     

       306
       306
       +
         let tok_add_bytes d s = Buffer.add_string d.tok s [@@ ocaml.inline]

     

       307
       307
       +
         let tok_add_char d c = Buffer.add_char d.tok c [@@ ocaml.inline]

     

       308
       308
       +
       

     

       309
       309
       +
         let buffer_add_uchar b u = match Uchar.to_int u with

     

       310
       310
       +
         (* XXX From 4.06 use Buffer.add_utf_8_uchar *)

     

       311
       311
       +
         | u when u < 0 -> assert false

     

       312
       312
       +
         | u when u <= 0x007F ->

     

       313
       313
       +
             Buffer.add_char b (Char.unsafe_chr u)

     

       314
       314
       +
         | u when u <= 0x07FF ->

     

       315
       315
       +
             Buffer.add_char b (Char.unsafe_chr (0xC0 lor (u lsr 6)));

     

       316
       316
       +
             Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)));

     

       317
       317
       +
         | u when u <= 0xFFFF ->

     

       318
       318
       +
             Buffer.add_char b (Char.unsafe_chr (0xE0 lor (u lsr 12)));

     

       319
       319
       +
             Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F)));

     

       320
       320
       +
             Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)));

     

       321
       321
       +
         | u when u <= 0x10FFFF ->

     

       322
       322
       +
             Buffer.add_char b (Char.unsafe_chr (0xF0 lor (u lsr 18)));

     

       323
       323
       +
             Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 12) land 0x3F)));

     

       324
       324
       +
             Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F)));

     

       325
       325
       +
             Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)))

     

       326
       326
       +
         | _ -> assert false

     

       327
       327
       +
       

     

       328
       328
       +
         let tok_add_uchar d u = buffer_add_uchar d.tok u

     

       329
       329
       +
       end

     

       330
       330
       +
       

     

       331
       331
       +
       module Url = struct

     

       332
       332
       +
         type scheme = string

     

       333
       333
       +
         type authority = string

     

       334
       334
       +
         type path = string

     

       335
       335
       +
         type query = string

     

       336
       336
       +
         type fragment = string

     

       337
       337
       +
         type t = string

     

       338
       338
       +
       

     

       339
       339
       +
         let string_subrange ?(first = 0) ?last s =

     

       340
       340
       +
           let max = String.length s - 1 in

     

       341
       341
       +
           let last = match last with

     

       342
       342
       +
           | None -> max

     

       343
       343
       +
           | Some l when l > max -> max

     

       344
       344
       +
           | Some l -> l

     

       345
       345
       +
           in

     

       346
       346
       +
           let first = if first < 0 then 0 else first in

     

       347
       347
       +
           if first > last then "" else String.sub s first (last - first + 1)

     

       348
       348
       +
       

     

       349
       349
       +
         let white = function ' ' | '\t' .. '\r'  -> true | _ -> false

     

       350
       350
       +
         let alpha = function 'A' .. 'Z' | 'a' .. 'z' -> true | _ -> false

     

       351
       351
       +
         let digit = function '0' .. '9' -> true | _ -> false

     

       352
       352
       +
       

     

       353
       353
       +
         let scheme_char c =

     

       354
       354
       +
           alpha c || digit c || Char.equal c '+' || Char.equal c '-' ||

     

       355
       355
       +
           Char.equal '.' c

     

       356
       356
       +
       

     

       357
       357
       +
         let find_scheme_colon u =

     

       358
       358
       +
           if u = "" || not (alpha u.[0]) then None else

     

       359
       359
       +
           let max = String.length u - 1 in

     

       360
       360
       +
           let i = ref 1 in

     

       361
       361
       +
           while !i <= max && scheme_char u.[!i] do incr i done;

     

       362
       362
       +
           if !i > max || u.[!i] <> ':' then None else Some !i

     

       363
       363
       +
       

     

       364
       364
       +
         let find_authority_last ~start u =

     

       365
       365
       +
           let max = String.length u - 1 in

     

       366
       366
       +
           if start > max then None else

     

       367
       367
       +
           if start + 1 > max then Some (start - 1) else

     

       368
       368
       +
           if not (u.[start] = '/' && u.[start + 1] = '/') then Some (start - 1) else

     

       369
       369
       +
           let i = ref (start + 2) in

     

       370
       370
       +
           while (!i <= max && u.[!i] <> '/' && u.[!i] <> '?' && u.[!i] <> '#')

     

       371
       371
       +
           do incr i done;

     

       372
       372
       +
           Some (!i - 1)

     

       373
       373
       +
       

     

       374
       374
       +
         let scheme u = match find_scheme_colon u with

     

       375
       375
       +
           | None -> None | Some i -> Some (String.sub u 0 i)

     

       376
       376
       +
       

     

       377
       377
       +
         let path_first u =

     

       378
       378
       +
           let start = match find_scheme_colon u with

     

       379
       379
       +
           | None -> 0 | Some i -> i + 1

     

       380
       380
       +
           in

     

       381
       381
       +
           let first = match find_authority_last ~start u with

     

       382
       382
       +
           | None -> start | Some last -> last + 1

     

       383
       383
       +
           in

     

       384
       384
       +
           let max = String.length u - 1 in

     

       385
       385
       +
           if first > max || u.[first] = '#' || u.[first] = '?' then None else Some first

     

       386
       386
       +
         

     

       387
       387
       +
         let path_last u ~first =

     

       388
       388
       +
           let max = String.length u - 1 in

     

       389
       389
       +
           let i = ref (first + 1) in

     

       390
       390
       +
           while (!i <= max && u.[!i] <> '?' && u.[!i] <> '#') do incr i done;

     

       391
       391
       +
           !i - 1

     

       392
       392
       +
         

     

       393
       393
       +
         let path u = match path_first u with

     

       394
       394
       +
         | None -> None

     

       395
       395
       +
         | Some first -> Some (string_subrange ~first ~last:(path_last u ~first) u)

     

       396
       396
       +
       end

     

       397
       397
       +
       

     

       398
       398
       +
       let escape = (* The escape rules are a bit unclear. These are those of LaTeX *)

     

       399
       399
       +
         let byte_replaced_length char_len s =

     

       400
       400
       +
           let rec loop s max i l = match i > max with

     

       401
       401
       +
           | true -> l

     

       402
       402
       +
           | false -> loop s max (i + 1) (l + char_len s.[i])

     

       403
       403
       +
           in

     

       404
       404
       +
           loop s (String.length s - 1) 0 0

     

       405
       405
       +
         in

     

       406
       406
       +
         let byte_replace set_char s ~len ~replaced_len =

     

       407
       407
       +
           let b = Bytes.create replaced_len in

     

       408
       408
       +
           let rec loop s max i k = match i > max with

     

       409
       409
       +
           | true -> Bytes.unsafe_to_string b

     

       410
       410
       +
           | false -> loop s max (i + 1) (set_char b k s.[i])

     

       411
       411
       +
           in

     

       412
       412
       +
           loop s (len - 1) 0 0

     

       413
       413
       +
         in

     

       414
       414
       +
         let byte_escaper char_len set_char s =

     

       415
       415
       +
           let len = String.length s in

     

       416
       416
       +
           let replaced_len = byte_replaced_length char_len s in

     

       417
       417
       +
           match replaced_len = len with

     

       418
       418
       +
           | true -> s

     

       419
       419
       +
           | false -> byte_replace set_char s ~len ~replaced_len

     

       420
       420
       +
         in

     

       421
       421
       +
         let tilde_esc = "\\textasciitilde" in

     

       422
       422
       +
         let tilde_len = String.length tilde_esc in

     

       423
       423
       +
         let circ_esc = "\\textasciicircum" in

     

       424
       424
       +
         let circ_len = String.length circ_esc in

     

       425
       425
       +
         let bslash_esc = "\\textbackslash" in

     

       426
       426
       +
         let bslash_len = String.length bslash_esc in

     

       427
       427
       +
         let char_len = function

     

       428
       428
       +
         | '&' | '%' | '$' | '#' | '_' | '{' | '}' -> 2

     

       429
       429
       +
         | '~' -> tilde_len

     

       430
       430
       +
         | '^' -> circ_len

     

       431
       431
       +
         | '\\' -> bslash_len

     

       432
       432
       +
         | _ -> 1

     

       433
       433
       +
         in

     

       434
       434
       +
         let set_char b i = function

     

       435
       435
       +
         | '&' | '%' | '$' | '#' | '_' | '{' | '}' as c ->

     

       436
       436
       +
             Bytes.set b i '\\'; Bytes.set b (i + 1) c; i + 2

     

       437
       437
       +
         | '~' -> Bytes.blit_string tilde_esc 0 b i tilde_len; i + tilde_len

     

       438
       438
       +
         | '^' -> Bytes.blit_string circ_esc 0 b i circ_len; i + circ_len

     

       439
       439
       +
         | '\\' -> Bytes.blit_string bslash_esc 0 b i bslash_len; i + bslash_len

     

       440
       440
       +
         | c -> Bytes.set b i c; i + 1

     

       441
       441
       +
         in

     

       442
       442
       +
         byte_escaper char_len set_char

     

       443
       443
       +
       

     

       444
       444
       +
       (* TODO unescape on decode. *)

     

       445
       445
       +
       

     

       446
       446
       +
       type t =

     

       447
       447
       +
         { type' : string;

     

       448
       448
       +
           cite_key : string;

     

       449
       449
       +
           fields : string SM.t;

     

       450
       450
       +
           loc : Tloc.t; }

     

       451
       451
       +
       

     

       452
       452
       +
       let v ~type' ~cite_key ~fields () = { type'; cite_key; fields; loc = Tloc.nil }

     

       453
       453
       +
       

     

       454
       454
       +
       let type' e = e.type'

     

       455
       455
       +
       let cite_key e = e.cite_key

     

       456
       456
       +
       let fields e = e.fields

     

       457
       457
       +
       let loc e = e.loc

     

       458
       458
       +
       let pp ppf e =

     

       459
       459
       +
         let pp_field ppf (k, v) = Fmt.pf ppf "@[<h>%s = {%s}@]" k (escape v) in

     

       460
       460
       +
         Fmt.pf ppf "@[<v2>@%s{%s,@,%a}@]" e.type' e.cite_key

     

       461
       461
       +
           (Fmt.iter_bindings ~sep:Fmt.comma SM.iter pp_field) e.fields

     

       462
       462
       +
       

     

       463
       463
       +
       (* Field values *)

     

       464
       464
       +
       

     

       465
       465
       +
       let list_value s =

     

       466
       466
       +
         List.filter (fun s -> s <> "") @@

     

       467
       467
       +
         List.map String.trim (String.split_on_char ',' s)

     

       468
       468
       +
       

     

       469
       469
       +
       let doi e = match SM.find_opt "doi" e.fields with

     

       470
       470
       +
       | None -> None

     

       471
       471
       +
       | Some doi ->

     

       472
       472
       +
           let ret doi = match String.trim doi with

     

       473
       473
       +
           | "" -> None

     

       474
       474
       +
           | doi -> Some doi

     

       475
       475
       +
           in

     

       476
       476
       +
           (* chop scheme and authority in case there is one *)

     

       477
       477
       +
           match Url.scheme doi with

     

       478
       478
       +
           | None -> ret doi

     

       479
       479
       +
           | Some _ ->

     

       480
       480
       +
               match Url.path doi with

     

       481
       481
       +
               | None -> ret doi

     

       482
       482
       +
               | Some p -> ret p

     

       483
       483
       +
       

     

       484
       484
       +
       let keywords e = Option.map list_value (SM.find_opt "keywords" e.fields)

     

       485
       485
       +
       let annote e = SM.find_opt "annote" e.fields

     

       486
       486
       +
       

     

       487
       487
       +
       (* Codec *)

     

       488
       488
       +
       

     

       489
       489
       +
       type error_kind = string

     

       490
       490
       +
       type error = error_kind * Tloc.t

     

       491
       491
       +
       

     

       492
       492
       +
       let pp_error ppf (err, l) =

     

       493
       493
       +
         Fmt.pf ppf "@[<v>%a:@,%a: %s@]"

     

       494
       494
       +
           Tloc.pp l Fmt.string "Error" err

     

       495
       495
       +
       

     

       496
       496
       +
       let curr_char d = (* TODO better escaping (this is for error reports) *)

     

       497
       497
       +
         Tdec.tok_reset d; Tdec.tok_accept_uchar d; Tdec.tok_pop d

     

       498
       498
       +
       

     

       499
       499
       +
       let err_illegal_uchar d = Tdec.err_here d "illegal character: %s" (curr_char d)

     

       500
       500
       +
       let err_illegal_byte d b = Tdec.err_here d "illegal character U+%04X" b

     

       501
       501
       +
       let err_expected d exp = Tdec.err_here d "expected %s" exp

     

       502
       502
       +
       let err_eoi msg d ~sbyte ~sline =

     

       503
       503
       +
         Tdec.err_to_here d ~sbyte ~sline "end of input: %s" msg

     

       504
       504
       +
       

     

       505
       505
       +
       let err_eoi_entry = err_eoi "unclosed BibTeX entry"

     

       506
       506
       +
       let err_eoi_field = err_eoi "unfinished BibTeX entry field"

     

       507
       507
       +
       let err_eoi_value = err_eoi "unfinished BibTeX field value"

     

       508
       508
       +
       let err_brace d ~sbyte ~sline =

     

       509
       509
       +
         Tdec.err_to_here d ~sbyte ~sline "incorrect brace {} nesting"

     

       510
       510
       +
       

     

       511
       511
       +
       let dec_byte d = match Tdec.byte d with

     

       512
       512
       +
       | c when 0x00 <= c && c <= 0x08 || 0x0E <= c && c <= 0x1F || c = 0x7F ->

     

       513
       513
       +
           err_illegal_byte d c

     

       514
       514
       +
       | c -> c

     

       515
       515
       +
       [@@ ocaml.inline]

     

       516
       516
       +
       

     

       517
       517
       +
       let rec skip_white d = match dec_byte d with

     

       518
       518
       +
       | 0x20 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D -> Tdec.accept_byte d; skip_white d

     

       519
       519
       +
       | _ -> ()

     

       520
       520
       +
       

     

       521
       521
       +
       let dec_token ~stop d =

     

       522
       522
       +
         let rec loop d = match dec_byte d with

     

       523
       523
       +
         | 0x28 | 0x29 | 0x3B | 0x22

     

       524
       524
       +
         | 0x20 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D

     

       525
       525
       +
         | 0xFFFF -> Tdec.tok_pop d

     

       526
       526
       +
         | c when c = stop -> Tdec.tok_pop d

     

       527
       527
       +
         | _ -> Tdec.tok_accept_uchar d; loop d

     

       528
       528
       +
         in

     

       529
       529
       +
         loop d

     

       530
       530
       +
       

     

       531
       531
       +
       let rec dec_string ~sbyte ~sline ~stop d = match dec_byte d with

     

       532
       532
       +
       | 0xFFFF -> err_eoi_value ~sbyte ~sline d

     

       533
       533
       +
       | c when c = stop -> Tdec.accept_byte d; Tdec.tok_pop d

     

       534
       534
       +
       | _ -> Tdec.tok_accept_uchar d; dec_string ~sbyte ~sline ~stop d

     

       535
       535
       +
       

     

       536
       536
       +
       let rec dec_tex i ~sbyte ~sline d = match dec_byte d with

     

       537
       537
       +
       | 0xFFFF -> err_eoi_value ~sbyte ~sline d

     

       538
       538
       +
       | 0x007D ->

     

       539
       539
       +
           if i = 0 then (Tdec.accept_byte d; Tdec.tok_pop d) else

     

       540
       540
       +
           (Tdec.tok_accept_uchar d; dec_tex (i - 1) ~sbyte ~sline d)

     

       541
       541
       +
       | c ->

     

       542
       542
       +
           let i = if c = 0x007B then i + 1 else i in

     

       543
       543
       +
           Tdec.tok_accept_uchar d; dec_tex i ~sbyte ~sline d

     

       544
       544
       +
       

     

       545
       545
       +
       let dec_value d =

     

       546
       546
       +
         let sbyte = Tdec.pos d and sline = Tdec.line d in

     

       547
       547
       +
         match dec_byte d with

     

       548
       548
       +
         | 0x007B (* { *) -> Tdec.accept_byte d; dec_tex 0 ~sbyte ~sline d

     

       549
       549
       +
         | 0x0022 -> Tdec.accept_byte d; dec_string ~sbyte ~sline ~stop:0x0022 d

     

       550
       550
       +
         | _ -> dec_token ~stop:0x002C d

     

       551
       551
       +
       

     

       552
       552
       +
       let dec_field d acc =

     

       553
       553
       +
         let sbyte = Tdec.pos d and sline = Tdec.line d in

     

       554
       554
       +
         let id = dec_token ~stop:0x003D (* = *) d in

     

       555
       555
       +
         skip_white d;

     

       556
       556
       +
         match dec_byte d with

     

       557
       557
       +
         | 0xFFFF -> err_eoi_field ~sbyte ~sline d

     

       558
       558
       +
         | 0x003D (* = *) ->

     

       559
       559
       +
             Tdec.accept_byte d;

     

       560
       560
       +
             skip_white d;

     

       561
       561
       +
             begin match dec_byte d with

     

       562
       562
       +
             | 0xFFFF -> err_eoi_field ~sbyte ~sline d

     

       563
       563
       +
             | _ ->

     

       564
       564
       +
                 SM.add (String.lowercase_ascii id) (dec_value d) acc

     

       565
       565
       +
             end

     

       566
       566
       +
         | _ -> err_expected d "'='"

     

       567
       567
       +
       

     

       568
       568
       +
       let rec dec_fields ~sbyte ~sline d acc =

     

       569
       569
       +
         skip_white d;

     

       570
       570
       +
         match dec_byte d with

     

       571
       571
       +
         | 0xFFFF -> err_eoi_entry ~sbyte ~sline d

     

       572
       572
       +
         | 0x007D (* } *) -> acc

     

       573
       573
       +
         | _ ->

     

       574
       574
       +
             let acc = dec_field d acc in

     

       575
       575
       +
             skip_white d;

     

       576
       576
       +
             match dec_byte d with

     

       577
       577
       +
             | 0x002C (* , *) -> Tdec.accept_byte d; dec_fields ~sbyte ~sline d acc

     

       578
       578
       +
             | 0x007D (* } *) -> acc

     

       579
       579
       +
             | 0xFFFF -> err_eoi_entry ~sbyte ~sline d

     

       580
       580
       +
             | b -> err_expected d "',' or '}'"

     

       581
       581
       +
       

     

       582
       582
       +
       let dec_entry d =

     

       583
       583
       +
         let sbyte = Tdec.pos d and sline = Tdec.line d in

     

       584
       584
       +
         Tdec.accept_byte d (* @ *);

     

       585
       585
       +
         let type' = dec_token ~stop:0x007B d (* { *) in

     

       586
       586
       +
         match dec_byte d with

     

       587
       587
       +
         | 0x007B ->

     

       588
       588
       +
             Tdec.accept_byte d;

     

       589
       589
       +
             let cite_key = dec_token ~stop:0x002C d (* , *) in

     

       590
       590
       +
             skip_white d;

     

       591
       591
       +
             begin match dec_byte d with

     

       592
       592
       +
             | 0x002C (* , *) ->

     

       593
       593
       +
                 Tdec.accept_byte d;

     

       594
       594
       +
                 let fields = dec_fields ~sbyte ~sline d SM.empty in

     

       595
       595
       +
                 let loc = Tdec.loc_to_here d ~sbyte ~sline in

     

       596
       596
       +
                 Tdec.accept_byte d;

     

       597
       597
       +
                 { type'; cite_key; fields; loc }

     

       598
       598
       +
             | _ -> err_expected d "','"

     

       599
       599
       +
             end

     

       600
       600
       +
         | _ -> err_expected d "'{'"

     

       601
       601
       +
       

     

       602
       602
       +
       let dec_entries d =

     

       603
       603
       +
         let rec loop d acc =

     

       604
       604
       +
           skip_white d;

     

       605
       605
       +
           match dec_byte d with

     

       606
       606
       +
           | 0x0040 (* @ *) -> loop d (dec_entry d :: acc)

     

       607
       607
       +
           | 0xFFFF -> List.rev acc

     

       608
       608
       +
           | b -> err_illegal_uchar d

     

       609
       609
       +
         in

     

       610
       610
       +
         loop d []

     

       611
       611
       +
       

     

       612
       612
       +
       let of_string ?(file = Fpath.v "-") s =

     

       613
       613
       +
         try

     

       614
       614
       +
           let file = Fpath.to_string file in

     

       615
       615
       +
           let d = Tdec.create ~file s in

     

       616
       616
       +
           Ok (dec_entries d)

     

       617
       617
       +
         with Tdec.Err (loc, msg) -> Error (msg, loc)

     

       618
       618
       +
       

     

       619
       619
       +
       let of_string' ?file s =

     

       620
       620
       +
         Result.map_error (fun e -> Fmt.str "%a" pp_error e) @@

     

       621
       621
       +
         (of_string ?file s)

     

       622
       622
       +
       

     

       623
       623
       +
       let to_string es = Fmt.str "@[<v>%a@]" (Fmt.list pp) es

     

       624
       624
       +
       

     

       625
       625
       +
       (*---------------------------------------------------------------------------

     

       626
       626
       +
          Copyright (c) 2019 University of Bern

     

       627
       627
       +
       

     

       628
       628
       +
          Permission to use, copy, modify, and/or distribute this software for any

     

       629
       629
       +
          purpose with or without fee is hereby granted, provided that the above

     

       630
       630
       +
          copyright notice and this permission notice appear in all copies.

     

       631
       631
       +
       

     

       632
       632
       +
          THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

     

       633
       633
       +
          WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

     

       634
       634
       +
          MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

     

       635
       635
       +
          ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

     

       636
       636
       +
          WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

     

       637
       637
       +
          ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

     

       638
       638
       +
          OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

     

       639
       639
       +
         ---------------------------------------------------------------------------*)

+92

stack/zotero-translation/bibtex.mli

···

       1
       1
       +
       (*---------------------------------------------------------------------------

     

       2
       2
       +
          Copyright (c) 2019 University of Bern. All rights reserved.

     

       3
       3
       +
          Distributed under the ISC license, see terms at the end of the file.

     

       4
       4
       +
         ---------------------------------------------------------------------------*)

     

       5
       5
       +
       

     

       6
       6
       +
       (** {{:https://www.ctan.org/pkg/bibtex}BibT{_E}X} codec.

     

       7
       7
       +
       

     

       8
       8
       +
           {b Limitations.} At the moment [@string], [@preamble]

     

       9
       9
       +
           and [@comment] are not supported. For values we assume UTF-8 without

     

       10
       10
       +
           escape sequences. Nested braces are handled though. *)

     

       11
       11
       +
       

     

       12
       12
       +
       val escape : string -> string

     

       13
       13
       +
       (** [escape s] escapes [s] for BibT{_E}X. *)

     

       14
       14
       +
       

     

       15
       15
       +
       type t

     

       16
       16
       +
       (** The type for bibtex entries. *)

     

       17
       17
       +
       

     

       18
       18
       +
       module Tloc : sig

     

       19
       19
       +
         type t

     

       20
       20
       +
       end

     

       21
       21
       +
       

     

       22
       22
       +
       module SM : Map.S with type key := string

     

       23
       23
       +
       

     

       24
       24
       +
       val v :

     

       25
       25
       +
         type':string -> cite_key:string -> fields:string SM.t -> unit -> t

     

       26
       26
       +
       (** [v ~type' ~id ~fields] is an entry of type [type'], identifier [id],

     

       27
       27
       +
           and with field [fields]. *)

     

       28
       28
       +
       

     

       29
       29
       +
       val type' : t -> string

     

       30
       30
       +
       (** [type' e] is the type of entry. *)

     

       31
       31
       +
       

     

       32
       32
       +
       val cite_key : t -> string

     

       33
       33
       +
       (** [cite_key e] is the citation key of the entry. *)

     

       34
       34
       +
       

     

       35
       35
       +
       val fields : t -> string SM.t

     

       36
       36
       +
       (** [fields e] are the BibTeX fields. Fields are lowercased according

     

       37
       37
       +
           to {!B0_std.String.Ascii.lowercase}. *)

     

       38
       38
       +
       

     

       39
       39
       +
       val pp : t Fmt.t

     

       40
       40
       +
       (** [pp] formats an entry using BibT{_E}X syntax. *)

     

       41
       41
       +
       

     

       42
       42
       +
       (** {1:fields Field queries} *)

     

       43
       43
       +
       

     

       44
       44
       +
       val list_value : string -> string list

     

       45
       45
       +
       (** [list_value] splits on comma and trims the results. *)

     

       46
       46
       +
       

     

       47
       47
       +
       val doi : t -> string option

     

       48
       48
       +
       (** [doi e] is the [doi] field of e. Note that if the field happens to

     

       49
       49
       +
           hold an URI, the scheme and authority are stripped. *)

     

       50
       50
       +
       

     

       51
       51
       +
       val keywords : t -> string list option

     

       52
       52
       +
       (** [keywords e] is the comma seperated [keywords] field. *)

     

       53
       53
       +
       

     

       54
       54
       +
       val annote : t -> string option

     

       55
       55
       +
       (** [annote e] is the [annote] field. *)

     

       56
       56
       +
       

     

       57
       57
       +
       (** {1:codec Codec} *)

     

       58
       58
       +
       

     

       59
       59
       +
       type error_kind

     

       60
       60
       +
       (** The type for kinds of decoding errors. *)

     

       61
       61
       +
       

     

       62
       62
       +
       type error = error_kind * Tloc.t

     

       63
       63
       +
       (** The type for errors. The error and its location. *)

     

       64
       64
       +
       

     

       65
       65
       +
       val pp_error : error Fmt.t

     

       66
       66
       +
       

     

       67
       67
       +
       val of_string : ?file:Fpath.t -> string -> (t list, error) result

     

       68
       68
       +
       (** [of_string ~file s] parses entries from [s] assuming it

     

       69
       69
       +
           was read from [file] (defaults to {!B0_std.Fpath.dash}). *)

     

       70
       70
       +
       

     

       71
       71
       +
       val of_string' : ?file:Fpath.t -> string -> (t list, string) result

     

       72
       72
       +
       (** [of_string'] is like {!of_string} but converts the error to an

     

       73
       73
       +
           error message. *)

     

       74
       74
       +
       

     

       75
       75
       +
       val to_string : t list -> string

     

       76
       76
       +
       (** [to_string es] formats the list of entries using BibT{_E}X syntax. *)

     

       77
       77
       +
       

     

       78
       78
       +
       (*---------------------------------------------------------------------------

     

       79
       79
       +
          Copyright (c) 2019 University of Bern

     

       80
       80
       +
       

     

       81
       81
       +
          Permission to use, copy, modify, and/or distribute this software for any

     

       82
       82
       +
          purpose with or without fee is hereby granted, provided that the above

     

       83
       83
       +
          copyright notice and this permission notice appear in all copies.

     

       84
       84
       +
       

     

       85
       85
       +
          THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

     

       86
       86
       +
          WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

     

       87
       87
       +
          MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

     

       88
       88
       +
          ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

     

       89
       89
       +
          WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

     

       90
       90
       +
          ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

     

       91
       91
       +
          OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

     

       92
       92
       +
         ---------------------------------------------------------------------------*)

stack/zotero-translation/dune

···

       1
       1
       +
       (library

     

       2
       2
       +
         (name zotero_translation)

     

       3
       3
       +
         (public_name zotero-translation)

     

       4
       4
       +
         (libraries astring cohttp-lwt-unix ezjsonm http fpath))

+21

stack/zotero-translation/dune-project

···

       1
       1
       +
       (lang dune 3.17)

     

       2
       2
       +
       (name zotero-translation)

     

       3
       3
       +
       

     

       4
       4
       +
       (source (github avsm/zotero-translation))

     

       5
       5
       +
       (license ISC)

     

       6
       6
       +
       (authors "Anil Madhavapeddy")

     

       7
       7
       +
       (maintainers "anil@recoil.org")

     

       8
       8
       +
       

     

       9
       9
       +
       (generate_opam_files true)

     

       10
       10
       +
       

     

       11
       11
       +
       (package

     

       12
       12
       +
        (name zotero-translation)

     

       13
       13
       +
        (synopsis "API client to the Zotero translation server")

     

       14
       14
       +
        (description "This is all still a work in progress")

     

       15
       15
       +
        (depends

     

       16
       16
       +
         (ocaml (>= "5.1.0"))

     

       17
       17
       +
         uri

     

       18
       18
       +
         http

     

       19
       19
       +
         cohttp-lwt-unix

     

       20
       20
       +
         ezjsonm

     

       21
       21
       +
         yaml))

+34

stack/zotero-translation/zotero-translation.opam

···

       1
       1
       +
       # This file is generated by dune, edit dune-project instead

     

       2
       2
       +
       opam-version: "2.0"

     

       3
       3
       +
       synopsis: "API client to the Zotero translation server"

     

       4
       4
       +
       description: "This is all still a work in progress"

     

       5
       5
       +
       maintainer: ["anil@recoil.org"]

     

       6
       6
       +
       authors: ["Anil Madhavapeddy"]

     

       7
       7
       +
       license: "ISC"

     

       8
       8
       +
       homepage: "https://github.com/avsm/zotero-translation"

     

       9
       9
       +
       bug-reports: "https://github.com/avsm/zotero-translation/issues"

     

       10
       10
       +
       depends: [

     

       11
       11
       +
         "dune" {>= "3.17"}

     

       12
       12
       +
         "ocaml" {>= "5.1.0"}

     

       13
       13
       +
         "uri"

     

       14
       14
       +
         "http"

     

       15
       15
       +
         "cohttp-lwt-unix"

     

       16
       16
       +
         "ezjsonm"

     

       17
       17
       +
         "yaml"

     

       18
       18
       +
         "odoc" {with-doc}

     

       19
       19
       +
       ]

     

       20
       20
       +
       build: [

     

       21
       21
       +
         ["dune" "subst"] {dev}

     

       22
       22
       +
         [

     

       23
       23
       +
           "dune"

     

       24
       24
       +
           "build"

     

       25
       25
       +
           "-p"

     

       26
       26
       +
           name

     

       27
       27
       +
           "-j"

     

       28
       28
       +
           jobs

     

       29
       29
       +
           "@install"

     

       30
       30
       +
           "@runtest" {with-test}

     

       31
       31
       +
           "@doc" {with-doc}

     

       32
       32
       +
         ]

     

       33
       33
       +
       ]

     

       34
       34
       +
       dev-repo: "git+https://github.com/avsm/zotero-translation.git"

+275

stack/zotero-translation/zotero_translation.ml

···

       1
       1
       +
       (** Resolve a DOI from a Zotero translation server *)

     

       2
       2
       +
       

     

       3
       3
       +
       module C = Cohttp

     

       4
       4
       +
       module CL = Cohttp_lwt

     

       5
       5
       +
       module CLU = Cohttp_lwt_unix.Client

     

       6
       6
       +
       module J = Ezjsonm

     

       7
       7
       +
       

     

       8
       8
       +
       (* From the ZTS source code:  https://github.com/zotero/translation-server/blob/master/src/formats.js

     

       9
       9
       +
         bibtex: "9cb70025-a888-4a29-a210-93ec52da40d4",

     

       10
       10
       +
       	biblatex: "b6e39b57-8942-4d11-8259-342c46ce395f",

     

       11
       11
       +
       	bookmarks: "4e7119e0-02be-4848-86ef-79a64185aad8",

     

       12
       12
       +
       	coins: "05d07af9-105a-4572-99f6-a8e231c0daef",

     

       13
       13
       +
       	csljson: "bc03b4fe-436d-4a1f-ba59-de4d2d7a63f7",

     

       14
       14
       +
       	csv: "25f4c5e2-d790-4daa-a667-797619c7e2f2",

     

       15
       15
       +
       	endnote_xml: "eb7059a4-35ec-4961-a915-3cf58eb9784b",

     

       16
       16
       +
       	evernote: "18dd188a-9afc-4cd6-8775-1980c3ce0fbf",

     

       17
       17
       +
       	mods: "0e2235e7-babf-413c-9acf-f27cce5f059c",

     

       18
       18
       +
       	rdf_bibliontology: "14763d25-8ba0-45df-8f52-b8d1108e7ac9",

     

       19
       19
       +
       	rdf_dc: "6e372642-ed9d-4934-b5d1-c11ac758ebb7",

     

       20
       20
       +
       	rdf_zotero: "14763d24-8ba0-45df-8f52-b8d1108e7ac9",

     

       21
       21
       +
       	refer: "881f60f2-0802-411a-9228-ce5f47b64c7d",

     

       22
       22
       +
       	refworks_tagged: "1a3506da-a303-4b0a-a1cd-f216e6138d86",

     

       23
       23
       +
       	ris: "32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7",

     

       24
       24
       +
       	tei: "032ae9b7-ab90-9205-a479-baf81f49184a",

     

       25
       25
       +
       	wikipedia: "3f50aaac-7acc-4350-acd0-59cb77faf620"

     

       26
       26
       +
        *)

     

       27
       27
       +
       type format =

     

       28
       28
       +
         | Bibtex

     

       29
       29
       +
         | Biblatex

     

       30
       30
       +
         | Bookmarks

     

       31
       31
       +
         | Coins

     

       32
       32
       +
         | Csljson

     

       33
       33
       +
         | Csv

     

       34
       34
       +
         | Endnote_xml

     

       35
       35
       +
         | Evernote

     

       36
       36
       +
         | Mods

     

       37
       37
       +
         | Rdf_bibliontology

     

       38
       38
       +
         | Rdf_dc

     

       39
       39
       +
         | Rdf_zotero

     

       40
       40
       +
         | Refer

     

       41
       41
       +
         | Refworks_tagged

     

       42
       42
       +
         | Ris

     

       43
       43
       +
         | Tei

     

       44
       44
       +
         | Wikipedia

     

       45
       45
       +
       

     

       46
       46
       +
       let format_to_string = function

     

       47
       47
       +
         | Bibtex -> "bibtex"

     

       48
       48
       +
         | Biblatex -> "biblatex"

     

       49
       49
       +
         | Bookmarks -> "bookmarks"

     

       50
       50
       +
         | Coins -> "coins"

     

       51
       51
       +
         | Csljson -> "csljson"

     

       52
       52
       +
         | Csv -> "csv"

     

       53
       53
       +
         | Endnote_xml -> "endnote_xml"

     

       54
       54
       +
         | Evernote -> "evernote"

     

       55
       55
       +
         | Mods -> "mods"

     

       56
       56
       +
         | Rdf_bibliontology -> "rdf_bibliontology"

     

       57
       57
       +
         | Rdf_dc -> "rdf_dc"

     

       58
       58
       +
         | Rdf_zotero -> "rdf_zotero"

     

       59
       59
       +
         | Refer -> "refer"

     

       60
       60
       +
         | Refworks_tagged -> "refworks_tagged"

     

       61
       61
       +
         | Ris -> "ris"

     

       62
       62
       +
         | Tei -> "tei"

     

       63
       63
       +
         | Wikipedia -> "wikipedia"

     

       64
       64
       +
       

     

       65
       65
       +
       let format_of_string = function

     

       66
       66
       +
         | "bibtex" -> Some Bibtex

     

       67
       67
       +
         | "biblatex" -> Some Biblatex

     

       68
       68
       +
         | "bookmarks" -> Some Bookmarks

     

       69
       69
       +
         |  "coins" -> Some Coins

     

       70
       70
       +
         | "csljson" -> Some Csljson

     

       71
       71
       +
         | "csv" -> Some Csv

     

       72
       72
       +
         | "endnote_xml" -> Some Endnote_xml

     

       73
       73
       +
         | "evernote" -> Some Evernote

     

       74
       74
       +
         | "mods" -> Some Mods

     

       75
       75
       +
         | "rdf_bibliontology" -> Some Rdf_bibliontology

     

       76
       76
       +
         | "rdf_dc" -> Some Rdf_dc

     

       77
       77
       +
         | "rdf_zotero" -> Some Rdf_zotero

     

       78
       78
       +
         | "refer" -> Some Refer

     

       79
       79
       +
         | "refworks_tagged" -> Some Refworks_tagged

     

       80
       80
       +
         | "ris" -> Some Ris

     

       81
       81
       +
         | "tei" -> Some Tei

     

       82
       82
       +
         | "wikipedia" -> Some Wikipedia

     

       83
       83
       +
         | _ -> None

     

       84
       84
       +
       

     

       85
       85
       +
       let web_endp base_uri =

     

       86
       86
       +
         match String.ends_with ~suffix:"/" base_uri with

     

       87
       87
       +
         | true -> Uri.of_string (base_uri ^ "web")

     

       88
       88
       +
         | false -> Uri.of_string (base_uri ^ "/web")

     

       89
       89
       +
       

     

       90
       90
       +
       let export_endp base_uri =

     

       91
       91
       +
         match String.ends_with ~suffix:"/" base_uri with

     

       92
       92
       +
         | true -> Uri.of_string (base_uri ^ "export")

     

       93
       93
       +
         | false -> Uri.of_string (base_uri ^ "/export")

     

       94
       94
       +
       

     

       95
       95
       +
       let search_endp base_uri =

     

       96
       96
       +
         match String.ends_with ~suffix:"/" base_uri with

     

       97
       97
       +
         | true -> Uri.of_string (base_uri ^ "search")

     

       98
       98
       +
         | false -> Uri.of_string (base_uri ^ "/search")

     

       99
       99
       +
       

     

       100
       100
       +
       let _import_endp base_uri =

     

       101
       101
       +
         match String.ends_with ~suffix:"/" base_uri with

     

       102
       102
       +
         | true -> Uri.of_string (base_uri ^ "import")

     

       103
       103
       +
         | false -> Uri.of_string (base_uri ^ "/import")

     

       104
       104
       +
       

     

       105
       105
       +
       open Lwt.Infix

     

       106
       106
       +
       

     

       107
       107
       +
       (* The Eio version has more in here, hence I'm just keeping this around. *)

     

       108
       108
       +
       type t = {

     

       109
       109
       +
         base_uri: string;

     

       110
       110
       +
       }

     

       111
       111
       +
       

     

       112
       112
       +
       let v base_uri = { base_uri }

     

       113
       113
       +
       

     

       114
       114
       +
       let resolve_doi { base_uri } doi =

     

       115
       115
       +
         let body = "https://doi.org/" ^ doi in

     

       116
       116
       +
         let doi_body = CL.Body.of_string body in

     

       117
       117
       +
         let headers = C.Header.init_with "content-type" "text/plain" in

     

       118
       118
       +
         let uri = web_endp base_uri in

     

       119
       119
       +
         CLU.call ~headers ~body:doi_body `POST uri >>= fun (resp, body) ->

     

       120
       120
       +
         let status = C.Response.status resp in

     

       121
       121
       +
         body |> Cohttp_lwt.Body.to_string >>= fun body ->

     

       122
       122
       +
         if status = `OK then begin

     

       123
       123
       +
           try

     

       124
       124
       +
             let doi_json = J.from_string body in

     

       125
       125
       +
             Lwt.return_ok doi_json

     

       126
       126
       +
           with exn -> Lwt.return_error (`Msg (Printexc.to_string exn))

     

       127
       127
       +
         end else

     

       128
       128
       +
           Lwt.return_error (`Msg (Format.asprintf "Unexpected HTTP status: %a for %s" Http.Status.pp status body))

     

       129
       129
       +
       

     

       130
       130
       +
       let resolve_url { base_uri } url =

     

       131
       131
       +
         let url_body = CL.Body.of_string url in

     

       132
       132
       +
         let headers = C.Header.init_with "content-type" "text/plain" in

     

       133
       133
       +
         let uri = web_endp base_uri in

     

       134
       134
       +
         CLU.call ~headers ~body:url_body `POST uri >>= fun (resp, body) ->

     

       135
       135
       +
         let status = C.Response.status resp in

     

       136
       136
       +
         body |> Cohttp_lwt.Body.to_string >>= fun body ->

     

       137
       137
       +
         if status = `OK then begin

     

       138
       138
       +
           try

     

       139
       139
       +
             let url_json = J.from_string body in

     

       140
       140
       +
             Lwt.return_ok url_json

     

       141
       141
       +
           with exn -> Lwt.return_error (`Msg (Printexc.to_string exn))

     

       142
       142
       +
         end else

     

       143
       143
       +
           Lwt.return_error (`Msg (Format.asprintf "Unexpected HTTP status: %a for %s" Http.Status.pp status body))

     

       144
       144
       +
       

     

       145
       145
       +
       let search_id { base_uri} doi =

     

       146
       146
       +
         let body = "https://doi.org/" ^ doi in

     

       147
       147
       +
         let doi_body = CL.Body.of_string body in

     

       148
       148
       +
         let headers = C.Header.init_with "content-type" "text/plain" in

     

       149
       149
       +
         let uri = search_endp base_uri in

     

       150
       150
       +
         CLU.call ~headers ~body:doi_body `POST uri >>= fun (resp, body) ->

     

       151
       151
       +
         let status = C.Response.status resp in

     

       152
       152
       +
         body |> Cohttp_lwt.Body.to_string >>= fun body ->

     

       153
       153
       +
         if status = `OK then begin

     

       154
       154
       +
             try

     

       155
       155
       +
               let doi_json = J.from_string body in

     

       156
       156
       +
               Lwt.return_ok doi_json

     

       157
       157
       +
             with exn -> Lwt.return_error (`Msg (Printexc.to_string exn))

     

       158
       158
       +
         end else

     

       159
       159
       +
           Lwt.return_error (`Msg (Format.asprintf "Unexpected HTTP status: %a for %s" Http.Status.pp status body))

     

       160
       160
       +
       

     

       161
       161
       +
       let export {base_uri} format api =

     

       162
       162
       +
         let body = CL.Body.of_string (J.to_string api) in

     

       163
       163
       +
         let headers = C.Header.init_with "content-type" "application/json" in

     

       164
       164
       +
         let uri = Uri.with_query' (export_endp base_uri ) ["format", (format_to_string format)] in

     

       165
       165
       +
         CLU.call ~headers ~body `POST uri >>= fun (resp, body) ->

     

       166
       166
       +
         let status = C.Response.status resp in

     

       167
       167
       +
         body |> Cohttp_lwt.Body.to_string >>= fun body ->

     

       168
       168
       +
         if status = `OK then begin

     

       169
       169
       +
           try

     

       170
       170
       +
             match format with

     

       171
       171
       +
             | Bibtex -> Lwt.return_ok (Astring.String.trim body)

     

       172
       172
       +
             | _ -> Lwt.return_ok body

     

       173
       173
       +
           with exn -> Lwt.return_error (`Msg (Printexc.to_string exn))

     

       174
       174
       +
         end else

     

       175
       175
       +
           Lwt.return_error (`Msg (Format.asprintf "Unexpected HTTP status: %a for %s" Http.Status.pp status body))

     

       176
       176
       +
       

     

       177
       177
       +
       let unescape_hex s =

     

       178
       178
       +
         let buf = Buffer.create (String.length s) in

     

       179
       179
       +
         let rec aux i =

     

       180
       180
       +
           if i >= String.length s then

     

       181
       181
       +
             Buffer.contents buf

     

       182
       182
       +
           else

     

       183
       183
       +
             if s.[i] = '\\' && i+3 < String.length s && s.[i+1] = 'x' then

     

       184
       184
       +
               let hex = String.sub s (i+2) 2 in

     

       185
       185
       +
               let char_code = int_of_string ("0x" ^ hex) in

     

       186
       186
       +
               Buffer.add_char buf (char_of_int char_code);

     

       187
       187
       +
               aux (i+4)

     

       188
       188
       +
             else begin

     

       189
       189
       +
               Buffer.add_char buf s.[i];

     

       190
       190
       +
               aux (i+1)

     

       191
       191
       +
             end

     

       192
       192
       +
         in aux 0

     

       193
       193
       +
       

     

       194
       194
       +
       let unescape_bibtex s =

     

       195
       195
       +
         unescape_hex s |>

     

       196
       196
       +
         String.split_on_char '{' |> String.concat "" |>

     

       197
       197
       +
         String.split_on_char '}' |> String.concat ""

     

       198
       198
       +
       

     

       199
       199
       +
       let fields_of_bib bib =

     

       200
       200
       +
         match Bibtex.of_string bib with

     

       201
       201
       +
         | Error e ->

     

       202
       202
       +
             prerr_endline bib;

     

       203
       203
       +
             Fmt.epr "%a\n%!" Bibtex.pp_error e;

     

       204
       204
       +
             Lwt.fail_with "bib parse err TODO"

     

       205
       205
       +
         | Ok [bib] ->

     

       206
       206
       +
             let f = Bibtex.fields bib |> Bibtex.SM.bindings |> List.map (fun (k,v) -> k, (unescape_bibtex v)) in

     

       207
       207
       +
             let ty = match Bibtex.type' bib with "inbook" -> "book" | x -> x in

     

       208
       208
       +
             let v = List.fold_left (fun acc (k,v) -> (k,(`String v))::acc) ["bibtype",`String ty] f in

     

       209
       209
       +
             Lwt.return v

     

       210
       210
       +
         | Ok _ -> Lwt.fail_with "one bib at a time plz"

     

       211
       211
       +
       

     

       212
       212
       +
       let bib_of_doi zt doi =

     

       213
       213
       +
         prerr_endline ("Fetching " ^ doi);

     

       214
       214
       +
         let v = resolve_doi zt doi >>= function

     

       215
       215
       +
         | Ok r ->

     

       216
       216
       +
            Lwt.return r

     

       217
       217
       +
         | Error (`Msg _) ->

     

       218
       218
       +
            Printf.eprintf "%s failed on /web, trying to /search\n%!" doi;

     

       219
       219
       +
            search_id zt doi >>= function

     

       220
       220
       +
            | Error (`Msg e) -> Lwt.fail_with e

     

       221
       221
       +
            | Ok r ->

     

       222
       222
       +
               Lwt.return r

     

       223
       223
       +
         in

     

       224
       224
       +
         v >>= fun v ->

     

       225
       225
       +
         export zt Bibtex v >>= function

     

       226
       226
       +
         | Error (`Msg e) -> Lwt.fail_with e

     

       227
       227
       +
         | Ok r ->

     

       228
       228
       +
             print_endline r;

     

       229
       229
       +
             Lwt.return r

     

       230
       230
       +
       

     

       231
       231
       +
       let split_authors keys =

     

       232
       232
       +
         let authors =

     

       233
       233
       +
           List.assoc "author" keys |> J.get_string |>

     

       234
       234
       +
           Astring.String.cuts ~empty:false ~sep:" and " |>

     

       235
       235
       +
           List.map Bibtex.list_value |>

     

       236
       236
       +
           List.map (fun v -> List.rev v |> String.concat " ") |>

     

       237
       237
       +
           List.map (fun x -> `String x)

     

       238
       238
       +
         in

     

       239
       239
       +
         let keywords =

     

       240
       240
       +
           List.assoc_opt "keywords" keys |> function

     

       241
       241
       +
           | None -> []

     

       242
       242
       +
           | Some k ->

     

       243
       243
       +
               Astring.String.cuts ~empty:false ~sep:", " (J.get_string k) |>

     

       244
       244
       +
               List.map (fun x -> `String x)

     

       245
       245
       +
         in

     

       246
       246
       +
         J.update (`O keys) ["author"] (Some (`A authors)) |> fun j ->

     

       247
       247
       +
         J.update j ["keywords"] (match keywords with [] -> None | _ -> Some (`A keywords))

     

       248
       248
       +
       

     

       249
       249
       +
       let add_bibtex ~slug y =

     

       250
       250
       +
         let (.%{}) = fun y k -> J.find y [k] in

     

       251
       251
       +
         let add_if_present k f m =

     

       252
       252
       +
           match J.find y [k] with

     

       253
       253
       +
           | v -> Bibtex.SM.add k (f v) m

     

       254
       254
       +
           | exception Not_found -> m in

     

       255
       255
       +
         let string k m = add_if_present k J.get_string m in

     

       256
       256
       +
         let authors m = add_if_present "author" (fun j -> J.get_list J.get_string j |> String.concat " and ") m in

     

       257
       257
       +
         let cite_key = Astring.String.map (function '-' -> '_' |x -> x) slug in

     

       258
       258
       +
         let fields = Bibtex.SM.empty in

     

       259
       259
       +
         let type' = y.%{"bibtype"} |> J.get_string |> String.lowercase_ascii in

     

       260
       260
       +
         let fields = authors fields |> string "title" |> string "doi" |> string "month" |> string "year" |> string "url" in

     

       261
       261
       +
         let fields = match type' with

     

       262
       262
       +
           | "article" -> string "journal" fields |> string "volume" |> string "number" |> string "pages"

     

       263
       263
       +
           | "inproceedings" | "incollection" -> string "booktitle" fields |> string "editor" |> string "address" |> string "series" |>

     

       264
       264
       +
               string "number" |> string "volume" |> string "organization" |> string "publisher" |> string "pages"

     

       265
       265
       +
           | "book" -> string "editor" fields |> string "publisher" |> string "volume" |> string "pages"

     

       266
       266
       +
           | "misc" -> string "howpublished" fields

     

       267
       267
       +
           | "techreport" -> string "institution" fields |> string "number" |> string "address"

     

       268
       268
       +
           | b -> prerr_endline ("unknown bibtype " ^ b); fields in

     

       269
       269
       +
         Bibtex.v ~type' ~cite_key ~fields () |> Fmt.str "%a" Bibtex.pp |>

     

       270
       270
       +
         fun bib -> J.update y ["bib"] (Some (`String bib))

     

       271
       271
       +
       

     

       272
       272
       +
       let json_of_doi zt ~slug doi =

     

       273
       273
       +
         bib_of_doi zt doi >>= fun x ->

     

       274
       274
       +
         fields_of_bib x >>= fun x ->

     

       275
       275
       +
         Lwt.return (split_authors x |> add_bibtex ~slug)

+37

stack/zotero-translation/zotero_translation.mli

···

       1
       1
       +
       (** {1 Interface to the Zotero Translation Server} *)

     

       2
       2
       +
       

     

       3
       3
       +
       type t

     

       4
       4
       +
       

     

       5
       5
       +
       type format =

     

       6
       6
       +
         | Bibtex

     

       7
       7
       +
         | Biblatex

     

       8
       8
       +
         | Bookmarks

     

       9
       9
       +
         | Coins

     

       10
       10
       +
         | Csljson

     

       11
       11
       +
         | Csv

     

       12
       12
       +
         | Endnote_xml

     

       13
       13
       +
         | Evernote

     

       14
       14
       +
         | Mods

     

       15
       15
       +
         | Rdf_bibliontology

     

       16
       16
       +
         | Rdf_dc

     

       17
       17
       +
         | Rdf_zotero

     

       18
       18
       +
         | Refer

     

       19
       19
       +
         | Refworks_tagged

     

       20
       20
       +
         | Ris

     

       21
       21
       +
         | Tei

     

       22
       22
       +
         | Wikipedia

     

       23
       23
       +
       

     

       24
       24
       +
       val format_to_string: format -> string

     

       25
       25
       +
       val format_of_string: string -> format option

     

       26
       26
       +
       

     

       27
       27
       +
       val v :  string -> t

     

       28
       28
       +
       

     

       29
       29
       +
       val resolve_doi: t -> string -> ([>Ezjsonm.t], [>`Msg of string]) Lwt_result.t

     

       30
       30
       +
       

     

       31
       31
       +
       val resolve_url: t -> string -> ([>Ezjsonm.t], [>`Msg of string]) Lwt_result.t

     

       32
       32
       +
       

     

       33
       33
       +
       val search_id: t -> string -> ([>Ezjsonm.t], [>`Msg of string]) Lwt_result.t

     

       34
       34
       +
       

     

       35
       35
       +
       val export: t -> format -> Ezjsonm.t -> (string, [>`Msg of string]) Lwt_result.t

     

       36
       36
       +
       

     

       37
       37
       +
       val json_of_doi : t -> slug:string -> string -> Ezjsonm.value Lwt.t