···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2019 University of Bern. All rights reserved.
3
+
Distributed under the ISC license, see terms at the end of the file.
4
+
---------------------------------------------------------------------------*)
6
+
[@@@warning "-27-32-33-34"]
8
+
module SM = Map.Make(String)
10
+
module Err_msg = struct
11
+
let pf = Format.fprintf
12
+
let pp_sp = Format.pp_print_space
13
+
let pp_nop _ () = ()
14
+
let pp_any fmt ppf _ = pf ppf fmt
16
+
let pp_op_enum op ?(empty = pp_nop) pp_v ppf = function
17
+
| [] -> empty ppf ()
20
+
let rec loop ppf = function
21
+
| [v0; v1] -> pf ppf "%a@ %s@ %a" pp_v v0 op pp_v v1
22
+
| v :: vs -> pf ppf "%a,@ " pp_v v; loop ppf vs
23
+
| [] -> assert false
27
+
let pp_and_enum ?empty pp_v ppf vs = pp_op_enum "and" ?empty pp_v ppf vs
28
+
let pp_or_enum ?empty pp_v ppf vs = pp_op_enum "or" ?empty pp_v ppf vs
29
+
let pp_did_you_mean pp_v ppf = function
30
+
| [] -> () | vs -> pf ppf "Did@ you@ mean %a ?" (pp_or_enum pp_v) vs
32
+
let pp_must_be pp_v ppf = function
33
+
| [] -> () | vs -> pf ppf "Must be %a." (pp_or_enum pp_v) vs
35
+
let pp_unknown ~kind pp_v ppf v = pf ppf "Unknown %a %a." kind () pp_v v
36
+
let pp_unknown' ~kind pp_v ~hint ppf (v, hints) = match hints with
37
+
| [] -> pp_unknown ~kind pp_v ppf v
38
+
| hints -> pp_unknown ~kind pp_v ppf v; pp_sp ppf (); (hint pp_v) ppf hints
40
+
let min_by f a b = if f a <= f b then a else b
41
+
let max_by f a b = if f a <= f b then b else a
43
+
let edit_distance s0 s1 =
44
+
let minimum a b c = min a (min b c) in
45
+
let s0 = min_by String.length s0 s1 (* row *)
46
+
and s1 = max_by String.length s0 s1 in (* column *)
47
+
let m = String.length s0 and n = String.length s1 in
48
+
let rec rows row0 row i =
49
+
if i > n then row0.(m) else begin
52
+
if s0.[j - 1] = s1.[i - 1] then row.(j) <- row0.(j - 1) else
53
+
row.(j) <-minimum (row0.(j - 1) + 1) (row0.(j) + 1) (row.(j - 1) + 1)
55
+
rows row row0 (i + 1)
57
+
rows (Array.init (m + 1) (fun x -> x)) (Array.make (m + 1) 0) 1
59
+
let suggest ?(dist = 2) candidates s =
60
+
let add (min, acc) name =
61
+
let d = edit_distance s name in
62
+
if d = min then min, (name :: acc) else
63
+
if d < min then d, [name] else
66
+
let d, suggs = List.fold_left add (max_int, []) candidates in
67
+
if d <= dist (* suggest only if not too far *) then List.rev suggs else []
70
+
module Tloc = struct
72
+
let pp_path = Format.pp_print_string
76
+
type line_pos = line * pos
77
+
(* For lines we keep the byte position just after the newlinexs. It
78
+
editors are still expecting tools to compute visual columns which
79
+
is stupid. By keeping these byte positions we can approximate
80
+
columns by subtracting the line byte position from the byte
81
+
location. This will only be correct on US-ASCII data though. Best
82
+
would be to be able to give them [sbyte] and [ebyte]. *)
87
+
sbyte : pos; ebyte : pos;
88
+
sline : pos * line; eline : pos * line }
91
+
let v ~file ~sbyte ~ebyte ~sline ~eline = { file; sbyte; ebyte; sline; eline }
93
+
let sbyte l = l.sbyte
94
+
let ebyte l = l.ebyte
95
+
let sline l = l.sline
96
+
let eline l = l.eline
99
+
let lnil = (-1, pnil) in
100
+
v ~file:no_file ~sbyte:pnil ~ebyte:pnil ~sline:lnil ~eline:lnil
104
+
if l0.sbyte < l1.sbyte then l0.sbyte, l0.sline else l1.sbyte, l1.sline
107
+
if l0.ebyte < l1.ebyte then l1.ebyte, l1.eline else l0.ebyte, l0.eline
109
+
v ~file:l0.file ~sbyte ~ebyte ~sline ~eline
112
+
v ~file:l.file ~sbyte:l.sbyte ~ebyte:l.sbyte ~sline:l.sline ~eline:l.sline
115
+
v ~file:l.file ~sbyte:l.ebyte ~ebyte:l.ebyte ~sline:l.eline ~eline:l.eline
117
+
let restart ~at:s e =
118
+
v ~file:e.file ~sbyte:s.sbyte ~ebyte:e.ebyte ~sline:s.sline ~eline:e.eline
120
+
let pf = Format.fprintf
121
+
let pp_ocaml ppf l = match l.ebyte < 0 with
122
+
| true -> pf ppf "File \"%a\", line n/a, characters n/a" pp_path l.file
124
+
let pp_lines ppf l = match fst l.sline = fst l.eline with
125
+
| true -> pf ppf "line %d" (fst l.sline)
126
+
| false -> pf ppf "lines %d-%d" (fst l.sline) (fst l.eline)
128
+
(* "characters" represent positions (insertion points) not columns *)
129
+
let pos_s = l.sbyte - snd l.sline in
130
+
let pos_e = l.ebyte - snd l.eline + 1 in
131
+
pf ppf "File \"%a\", %a, characters %d-%d"
132
+
pp_path l.file pp_lines l pos_s pos_e
134
+
let pp_gnu ppf l = match l.ebyte < 0 with
135
+
| true -> pf ppf "%a:" pp_path l.file
137
+
let pp_lines ppf l =
138
+
let col_s = l.sbyte - snd l.sline + 1 in
139
+
let col_e = l.ebyte - snd l.eline + 1 in
140
+
match fst l.sline = fst l.eline with
141
+
| true -> pf ppf "%d.%d-%d" (fst l.sline) col_s col_e
143
+
pf ppf "%d.%d-%d.%d" (fst l.sline) col_s (fst l.eline) col_e
145
+
pf ppf "%a:%a" pp_path l.file pp_lines l
147
+
let pp_dump ppf l =
148
+
pf ppf "[bytes %d;%d][lines %d;%d][lbytes %d;%d]"
149
+
l.sbyte l.ebyte (fst l.sline) (fst l.eline) (snd l.sline) (snd l.eline)
154
+
module Utf_8 = struct
156
+
| L1 | L2 | L3_E0 | L3_E1_EC_or_EE_EF | L3_ED | L4_F0 | L4_F1_F3 | L4_F4 | E
160
+
(* See https://tools.ietf.org/html/rfc3629#section-4 *)
161
+
Printf.printf "[|";
162
+
for i = 0 to 255 do
163
+
if i mod 16 = 0 then Printf.printf "\n";
164
+
if 0x00 <= i && i <= 0x7F then Printf.printf "L1; " else
165
+
if 0xC2 <= i && i <= 0xDF then Printf.printf "L2; " else
166
+
if 0xE0 = i then Printf.printf "L3_E0; " else
167
+
if 0xE1 <= i && i <= 0xEC || 0xEE <= i && i <= 0xEF
168
+
then Printf.printf "L3_E1_EC_or_EE_EF; " else
169
+
if 0xED = i then Printf.printf "L3_ED;" else
170
+
if 0xF0 = i then Printf.printf "L4_F0; " else
171
+
if 0xF1 <= i && i <= 0xF3 then Printf.printf "L4_F1_F3; " else
172
+
if 0xF4 = i then Printf.printf "L4_F4; " else
173
+
Printf.printf "E; "
175
+
Printf.printf "\n|]"
178
+
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
179
+
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
180
+
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
181
+
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
182
+
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
183
+
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
184
+
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
185
+
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
186
+
E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
187
+
E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
188
+
E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
189
+
E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
190
+
E; E; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;
191
+
L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;
192
+
L3_E0; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
193
+
L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
194
+
L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
195
+
L3_E1_EC_or_EE_EF; L3_ED;L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
196
+
L4_F0; L4_F1_F3; L4_F1_F3; L4_F1_F3; L4_F4; E; E; E; E; E; E; E; E; E; E; E;
200
+
module Tdec = struct
201
+
type 'a fmt = Format.formatter -> 'a -> unit
202
+
let pp_did_you_mean = Err_msg.pp_did_you_mean
203
+
let pp_and_enum = Err_msg.pp_and_enum
204
+
let pp_or_enum = Err_msg.pp_or_enum
205
+
let pp_did_you_mean = Err_msg.pp_did_you_mean
206
+
let pp_must_be = Err_msg.pp_must_be
207
+
let pp_unknown = Err_msg.pp_unknown
208
+
let pp_unknown' = Err_msg.pp_unknown'
213
+
{ file : Tloc.fpath; i : string; tok : Buffer.t;
214
+
mutable pos : int; mutable line : int; mutable line_pos : int; }
216
+
let create ?(file = Tloc.no_file) i =
217
+
{ file; i; tok = Buffer.create 255; pos = 0; line = 1; line_pos = 0 }
221
+
let file d = d.file
223
+
let line d = d.line, d.line_pos
225
+
let loc d ~sbyte ~ebyte ~sline ~eline =
226
+
Tloc.v ~file:d.file ~sbyte ~ebyte ~sline ~eline
228
+
let loc_to_here d ~sbyte ~sline =
229
+
loc d ~sbyte ~ebyte:d.pos ~sline ~eline:(d.line, d.line_pos)
231
+
let loc_here d = loc_to_here d ~sbyte:d.pos ~sline:(d.line, d.line_pos)
235
+
exception Err of Tloc.t * string
237
+
let err loc msg = raise_notrace (Err (loc, msg))
238
+
let err_to_here d ~sbyte ~sline fmt =
239
+
Format.kasprintf (err (loc_to_here d ~sbyte ~sline)) fmt
241
+
let err_here d fmt = Format.kasprintf (err (loc_here d)) fmt
242
+
let err_suggest = Err_msg.suggest
246
+
let incr_line d = match d.i.[d.pos] with (* assert (not (eoi d)) *)
247
+
| '\r' -> d.line <- d.line + 1; d.line_pos <- d.pos + 1
249
+
(if d.pos = 0 || d.i.[d.pos - 1] <> '\r' then d.line <- d.line + 1);
250
+
d.line_pos <- d.pos + 1;
254
+
let eoi d = d.pos >= String.length d.i [@@ ocaml.inline]
255
+
let byte d = if eoi d then 0xFFFF else Char.code d.i.[d.pos] [@@ ocaml.inline]
256
+
let accept_byte d = incr_line d; d.pos <- d.pos + 1
259
+
let accept_utf_8 accept d =
260
+
let err d = match byte d with
261
+
| 0xFFFF -> err_here d "UTF-8 decoding error: unexpected end of input"
262
+
| b -> err_here d "UTF-8 decoding error: byte %02x illegal here" b
264
+
let accept_tail d = if (byte d lsr 6 = 0b10) then accept d else err d in
268
+
(* If a subsequent [byte d] invocation is 0xFFFF we get to [err]. *)
269
+
match Utf_8.case.(b) with
271
+
| L2 -> accept d; accept_tail d
274
+
if (byte d - 0xA0 < 0xBF - 0xA0) then accept d else err d;
276
+
| L3_E1_EC_or_EE_EF -> accept d; accept_tail d; accept_tail d
279
+
if (byte d - 0x80 < 0x9F - 0x80) then accept d else err d;
283
+
if (byte d - 0x90 < 0xBF - 0x90) then accept d else err d;
284
+
accept_tail d; accept_tail d
287
+
accept_tail d; accept_tail d; accept_tail d;
290
+
if (byte d - 0x80 < 0x8F - 0x80) then accept d else err d;
293
+
let accept_uchar d = accept_utf_8 accept_byte d
297
+
let tok_reset d = Buffer.reset d.tok [@@ ocaml.inline]
298
+
let tok_pop d = let t = Buffer.contents d.tok in tok_reset d; t
301
+
let tok_accept_byte d =
302
+
Buffer.add_char d.tok d.i.[d.pos]; accept_byte d; [@@ ocaml.inline]
304
+
let tok_accept_uchar d = accept_utf_8 tok_accept_byte d [@@ ocaml.inline]
305
+
let tok_add_byte d b = Buffer.add_char d.tok (Char.chr b) [@@ ocaml.inline]
306
+
let tok_add_bytes d s = Buffer.add_string d.tok s [@@ ocaml.inline]
307
+
let tok_add_char d c = Buffer.add_char d.tok c [@@ ocaml.inline]
309
+
let buffer_add_uchar b u = match Uchar.to_int u with
310
+
(* XXX From 4.06 use Buffer.add_utf_8_uchar *)
311
+
| u when u < 0 -> assert false
312
+
| u when u <= 0x007F ->
313
+
Buffer.add_char b (Char.unsafe_chr u)
314
+
| u when u <= 0x07FF ->
315
+
Buffer.add_char b (Char.unsafe_chr (0xC0 lor (u lsr 6)));
316
+
Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)));
317
+
| u when u <= 0xFFFF ->
318
+
Buffer.add_char b (Char.unsafe_chr (0xE0 lor (u lsr 12)));
319
+
Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F)));
320
+
Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)));
321
+
| u when u <= 0x10FFFF ->
322
+
Buffer.add_char b (Char.unsafe_chr (0xF0 lor (u lsr 18)));
323
+
Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 12) land 0x3F)));
324
+
Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F)));
325
+
Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)))
326
+
| _ -> assert false
328
+
let tok_add_uchar d u = buffer_add_uchar d.tok u
331
+
module Url = struct
332
+
type scheme = string
333
+
type authority = string
335
+
type query = string
336
+
type fragment = string
339
+
let string_subrange ?(first = 0) ?last s =
340
+
let max = String.length s - 1 in
341
+
let last = match last with
343
+
| Some l when l > max -> max
346
+
let first = if first < 0 then 0 else first in
347
+
if first > last then "" else String.sub s first (last - first + 1)
349
+
let white = function ' ' | '\t' .. '\r' -> true | _ -> false
350
+
let alpha = function 'A' .. 'Z' | 'a' .. 'z' -> true | _ -> false
351
+
let digit = function '0' .. '9' -> true | _ -> false
353
+
let scheme_char c =
354
+
alpha c || digit c || Char.equal c '+' || Char.equal c '-' ||
357
+
let find_scheme_colon u =
358
+
if u = "" || not (alpha u.[0]) then None else
359
+
let max = String.length u - 1 in
361
+
while !i <= max && scheme_char u.[!i] do incr i done;
362
+
if !i > max || u.[!i] <> ':' then None else Some !i
364
+
let find_authority_last ~start u =
365
+
let max = String.length u - 1 in
366
+
if start > max then None else
367
+
if start + 1 > max then Some (start - 1) else
368
+
if not (u.[start] = '/' && u.[start + 1] = '/') then Some (start - 1) else
369
+
let i = ref (start + 2) in
370
+
while (!i <= max && u.[!i] <> '/' && u.[!i] <> '?' && u.[!i] <> '#')
374
+
let scheme u = match find_scheme_colon u with
375
+
| None -> None | Some i -> Some (String.sub u 0 i)
378
+
let start = match find_scheme_colon u with
379
+
| None -> 0 | Some i -> i + 1
381
+
let first = match find_authority_last ~start u with
382
+
| None -> start | Some last -> last + 1
384
+
let max = String.length u - 1 in
385
+
if first > max || u.[first] = '#' || u.[first] = '?' then None else Some first
387
+
let path_last u ~first =
388
+
let max = String.length u - 1 in
389
+
let i = ref (first + 1) in
390
+
while (!i <= max && u.[!i] <> '?' && u.[!i] <> '#') do incr i done;
393
+
let path u = match path_first u with
395
+
| Some first -> Some (string_subrange ~first ~last:(path_last u ~first) u)
398
+
let escape = (* The escape rules are a bit unclear. These are those of LaTeX *)
399
+
let byte_replaced_length char_len s =
400
+
let rec loop s max i l = match i > max with
402
+
| false -> loop s max (i + 1) (l + char_len s.[i])
404
+
loop s (String.length s - 1) 0 0
406
+
let byte_replace set_char s ~len ~replaced_len =
407
+
let b = Bytes.create replaced_len in
408
+
let rec loop s max i k = match i > max with
409
+
| true -> Bytes.unsafe_to_string b
410
+
| false -> loop s max (i + 1) (set_char b k s.[i])
412
+
loop s (len - 1) 0 0
414
+
let byte_escaper char_len set_char s =
415
+
let len = String.length s in
416
+
let replaced_len = byte_replaced_length char_len s in
417
+
match replaced_len = len with
419
+
| false -> byte_replace set_char s ~len ~replaced_len
421
+
let tilde_esc = "\\textasciitilde" in
422
+
let tilde_len = String.length tilde_esc in
423
+
let circ_esc = "\\textasciicircum" in
424
+
let circ_len = String.length circ_esc in
425
+
let bslash_esc = "\\textbackslash" in
426
+
let bslash_len = String.length bslash_esc in
427
+
let char_len = function
428
+
| '&' | '%' | '$' | '#' | '_' | '{' | '}' -> 2
431
+
| '\\' -> bslash_len
434
+
let set_char b i = function
435
+
| '&' | '%' | '$' | '#' | '_' | '{' | '}' as c ->
436
+
Bytes.set b i '\\'; Bytes.set b (i + 1) c; i + 2
437
+
| '~' -> Bytes.blit_string tilde_esc 0 b i tilde_len; i + tilde_len
438
+
| '^' -> Bytes.blit_string circ_esc 0 b i circ_len; i + circ_len
439
+
| '\\' -> Bytes.blit_string bslash_esc 0 b i bslash_len; i + bslash_len
440
+
| c -> Bytes.set b i c; i + 1
442
+
byte_escaper char_len set_char
444
+
(* TODO unescape on decode. *)
449
+
fields : string SM.t;
452
+
let v ~type' ~cite_key ~fields () = { type'; cite_key; fields; loc = Tloc.nil }
454
+
let type' e = e.type'
455
+
let cite_key e = e.cite_key
456
+
let fields e = e.fields
459
+
let pp_field ppf (k, v) = Fmt.pf ppf "@[<h>%s = {%s}@]" k (escape v) in
460
+
Fmt.pf ppf "@[<v2>@%s{%s,@,%a}@]" e.type' e.cite_key
461
+
(Fmt.iter_bindings ~sep:Fmt.comma SM.iter pp_field) e.fields
466
+
List.filter (fun s -> s <> "") @@
467
+
List.map String.trim (String.split_on_char ',' s)
469
+
let doi e = match SM.find_opt "doi" e.fields with
472
+
let ret doi = match String.trim doi with
476
+
(* chop scheme and authority in case there is one *)
477
+
match Url.scheme doi with
480
+
match Url.path doi with
484
+
let keywords e = Option.map list_value (SM.find_opt "keywords" e.fields)
485
+
let annote e = SM.find_opt "annote" e.fields
489
+
type error_kind = string
490
+
type error = error_kind * Tloc.t
492
+
let pp_error ppf (err, l) =
493
+
Fmt.pf ppf "@[<v>%a:@,%a: %s@]"
494
+
Tloc.pp l Fmt.string "Error" err
496
+
let curr_char d = (* TODO better escaping (this is for error reports) *)
497
+
Tdec.tok_reset d; Tdec.tok_accept_uchar d; Tdec.tok_pop d
499
+
let err_illegal_uchar d = Tdec.err_here d "illegal character: %s" (curr_char d)
500
+
let err_illegal_byte d b = Tdec.err_here d "illegal character U+%04X" b
501
+
let err_expected d exp = Tdec.err_here d "expected %s" exp
502
+
let err_eoi msg d ~sbyte ~sline =
503
+
Tdec.err_to_here d ~sbyte ~sline "end of input: %s" msg
505
+
let err_eoi_entry = err_eoi "unclosed BibTeX entry"
506
+
let err_eoi_field = err_eoi "unfinished BibTeX entry field"
507
+
let err_eoi_value = err_eoi "unfinished BibTeX field value"
508
+
let err_brace d ~sbyte ~sline =
509
+
Tdec.err_to_here d ~sbyte ~sline "incorrect brace {} nesting"
511
+
let dec_byte d = match Tdec.byte d with
512
+
| c when 0x00 <= c && c <= 0x08 || 0x0E <= c && c <= 0x1F || c = 0x7F ->
513
+
err_illegal_byte d c
517
+
let rec skip_white d = match dec_byte d with
518
+
| 0x20 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D -> Tdec.accept_byte d; skip_white d
521
+
let dec_token ~stop d =
522
+
let rec loop d = match dec_byte d with
523
+
| 0x28 | 0x29 | 0x3B | 0x22
524
+
| 0x20 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D
525
+
| 0xFFFF -> Tdec.tok_pop d
526
+
| c when c = stop -> Tdec.tok_pop d
527
+
| _ -> Tdec.tok_accept_uchar d; loop d
531
+
let rec dec_string ~sbyte ~sline ~stop d = match dec_byte d with
532
+
| 0xFFFF -> err_eoi_value ~sbyte ~sline d
533
+
| c when c = stop -> Tdec.accept_byte d; Tdec.tok_pop d
534
+
| _ -> Tdec.tok_accept_uchar d; dec_string ~sbyte ~sline ~stop d
536
+
let rec dec_tex i ~sbyte ~sline d = match dec_byte d with
537
+
| 0xFFFF -> err_eoi_value ~sbyte ~sline d
539
+
if i = 0 then (Tdec.accept_byte d; Tdec.tok_pop d) else
540
+
(Tdec.tok_accept_uchar d; dec_tex (i - 1) ~sbyte ~sline d)
542
+
let i = if c = 0x007B then i + 1 else i in
543
+
Tdec.tok_accept_uchar d; dec_tex i ~sbyte ~sline d
546
+
let sbyte = Tdec.pos d and sline = Tdec.line d in
547
+
match dec_byte d with
548
+
| 0x007B (* { *) -> Tdec.accept_byte d; dec_tex 0 ~sbyte ~sline d
549
+
| 0x0022 -> Tdec.accept_byte d; dec_string ~sbyte ~sline ~stop:0x0022 d
550
+
| _ -> dec_token ~stop:0x002C d
552
+
let dec_field d acc =
553
+
let sbyte = Tdec.pos d and sline = Tdec.line d in
554
+
let id = dec_token ~stop:0x003D (* = *) d in
556
+
match dec_byte d with
557
+
| 0xFFFF -> err_eoi_field ~sbyte ~sline d
558
+
| 0x003D (* = *) ->
559
+
Tdec.accept_byte d;
561
+
begin match dec_byte d with
562
+
| 0xFFFF -> err_eoi_field ~sbyte ~sline d
564
+
SM.add (String.lowercase_ascii id) (dec_value d) acc
566
+
| _ -> err_expected d "'='"
568
+
let rec dec_fields ~sbyte ~sline d acc =
570
+
match dec_byte d with
571
+
| 0xFFFF -> err_eoi_entry ~sbyte ~sline d
572
+
| 0x007D (* } *) -> acc
574
+
let acc = dec_field d acc in
576
+
match dec_byte d with
577
+
| 0x002C (* , *) -> Tdec.accept_byte d; dec_fields ~sbyte ~sline d acc
578
+
| 0x007D (* } *) -> acc
579
+
| 0xFFFF -> err_eoi_entry ~sbyte ~sline d
580
+
| b -> err_expected d "',' or '}'"
583
+
let sbyte = Tdec.pos d and sline = Tdec.line d in
584
+
Tdec.accept_byte d (* @ *);
585
+
let type' = dec_token ~stop:0x007B d (* { *) in
586
+
match dec_byte d with
588
+
Tdec.accept_byte d;
589
+
let cite_key = dec_token ~stop:0x002C d (* , *) in
591
+
begin match dec_byte d with
592
+
| 0x002C (* , *) ->
593
+
Tdec.accept_byte d;
594
+
let fields = dec_fields ~sbyte ~sline d SM.empty in
595
+
let loc = Tdec.loc_to_here d ~sbyte ~sline in
596
+
Tdec.accept_byte d;
597
+
{ type'; cite_key; fields; loc }
598
+
| _ -> err_expected d "','"
600
+
| _ -> err_expected d "'{'"
602
+
let dec_entries d =
603
+
let rec loop d acc =
605
+
match dec_byte d with
606
+
| 0x0040 (* @ *) -> loop d (dec_entry d :: acc)
607
+
| 0xFFFF -> List.rev acc
608
+
| b -> err_illegal_uchar d
612
+
let of_string ?(file = Fpath.v "-") s =
614
+
let file = Fpath.to_string file in
615
+
let d = Tdec.create ~file s in
617
+
with Tdec.Err (loc, msg) -> Error (msg, loc)
619
+
let of_string' ?file s =
620
+
Result.map_error (fun e -> Fmt.str "%a" pp_error e) @@
621
+
(of_string ?file s)
623
+
let to_string es = Fmt.str "@[<v>%a@]" (Fmt.list pp) es
625
+
(*---------------------------------------------------------------------------
626
+
Copyright (c) 2019 University of Bern
628
+
Permission to use, copy, modify, and/or distribute this software for any
629
+
purpose with or without fee is hereby granted, provided that the above
630
+
copyright notice and this permission notice appear in all copies.
632
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
633
+
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
634
+
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
635
+
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
636
+
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
637
+
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
638
+
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
639
+
---------------------------------------------------------------------------*)