My agentic slop goes here. Not intended for anyone else!
1(*---------------------------------------------------------------------------
2 Copyright (c) 2019 University of Bern. All rights reserved.
3 Distributed under the ISC license, see terms at the end of the file.
4 ---------------------------------------------------------------------------*)
5
6[@@@warning "-27-32-33-34"]
7
8module SM = Map.Make(String)
9
10module Err_msg = struct
11 let pf = Format.fprintf
12 let pp_sp = Format.pp_print_space
13 let pp_nop _ () = ()
14 let pp_any fmt ppf _ = pf ppf fmt
15
16 let pp_op_enum op ?(empty = pp_nop) pp_v ppf = function
17 | [] -> empty ppf ()
18 | [v] -> pp_v ppf v
19 | _ as vs ->
20 let rec loop ppf = function
21 | [v0; v1] -> pf ppf "%a@ %s@ %a" pp_v v0 op pp_v v1
22 | v :: vs -> pf ppf "%a,@ " pp_v v; loop ppf vs
23 | [] -> assert false
24 in
25 loop ppf vs
26
27 let pp_and_enum ?empty pp_v ppf vs = pp_op_enum "and" ?empty pp_v ppf vs
28 let pp_or_enum ?empty pp_v ppf vs = pp_op_enum "or" ?empty pp_v ppf vs
29 let pp_did_you_mean pp_v ppf = function
30 | [] -> () | vs -> pf ppf "Did@ you@ mean %a ?" (pp_or_enum pp_v) vs
31
32 let pp_must_be pp_v ppf = function
33 | [] -> () | vs -> pf ppf "Must be %a." (pp_or_enum pp_v) vs
34
35 let pp_unknown ~kind pp_v ppf v = pf ppf "Unknown %a %a." kind () pp_v v
36 let pp_unknown' ~kind pp_v ~hint ppf (v, hints) = match hints with
37 | [] -> pp_unknown ~kind pp_v ppf v
38 | hints -> pp_unknown ~kind pp_v ppf v; pp_sp ppf (); (hint pp_v) ppf hints
39
40 let min_by f a b = if f a <= f b then a else b
41 let max_by f a b = if f a <= f b then b else a
42
43 let edit_distance s0 s1 =
44 let minimum a b c = min a (min b c) in
45 let s0 = min_by String.length s0 s1 (* row *)
46 and s1 = max_by String.length s0 s1 in (* column *)
47 let m = String.length s0 and n = String.length s1 in
48 let rec rows row0 row i =
49 if i > n then row0.(m) else begin
50 row.(0) <- i;
51 for j = 1 to m do
52 if s0.[j - 1] = s1.[i - 1] then row.(j) <- row0.(j - 1) else
53 row.(j) <-minimum (row0.(j - 1) + 1) (row0.(j) + 1) (row.(j - 1) + 1)
54 done;
55 rows row row0 (i + 1)
56 end in
57 rows (Array.init (m + 1) (fun x -> x)) (Array.make (m + 1) 0) 1
58
59 let suggest ?(dist = 2) candidates s =
60 let add (min, acc) name =
61 let d = edit_distance s name in
62 if d = min then min, (name :: acc) else
63 if d < min then d, [name] else
64 min, acc
65 in
66 let d, suggs = List.fold_left add (max_int, []) candidates in
67 if d <= dist (* suggest only if not too far *) then List.rev suggs else []
68end
69
70module Tloc = struct
71 type fpath = string
72 let pp_path = Format.pp_print_string
73
74 type pos = int
75 type line = int
76 type line_pos = line * pos
77 (* For lines we keep the byte position just after the newlinexs. It
78 editors are still expecting tools to compute visual columns which
79 is stupid. By keeping these byte positions we can approximate
80 columns by subtracting the line byte position from the byte
81 location. This will only be correct on US-ASCII data though. Best
82 would be to be able to give them [sbyte] and [ebyte]. *)
83
84 let l v = v
85 type t =
86 { file : fpath;
87 sbyte : pos; ebyte : pos;
88 sline : pos * line; eline : pos * line }
89
90 let no_file = "-"
91 let v ~file ~sbyte ~ebyte ~sline ~eline = { file; sbyte; ebyte; sline; eline }
92 let file l = l.file
93 let sbyte l = l.sbyte
94 let ebyte l = l.ebyte
95 let sline l = l.sline
96 let eline l = l.eline
97 let nil =
98 let pnil = -1 in
99 let lnil = (-1, pnil) in
100 v ~file:no_file ~sbyte:pnil ~ebyte:pnil ~sline:lnil ~eline:lnil
101
102 let merge l0 l1 =
103 let sbyte, sline =
104 if l0.sbyte < l1.sbyte then l0.sbyte, l0.sline else l1.sbyte, l1.sline
105 in
106 let ebyte, eline =
107 if l0.ebyte < l1.ebyte then l1.ebyte, l1.eline else l0.ebyte, l0.eline
108 in
109 v ~file:l0.file ~sbyte ~ebyte ~sline ~eline
110
111 let to_start l =
112 v ~file:l.file ~sbyte:l.sbyte ~ebyte:l.sbyte ~sline:l.sline ~eline:l.sline
113
114 let to_end l =
115 v ~file:l.file ~sbyte:l.ebyte ~ebyte:l.ebyte ~sline:l.eline ~eline:l.eline
116
117 let restart ~at:s e =
118 v ~file:e.file ~sbyte:s.sbyte ~ebyte:e.ebyte ~sline:s.sline ~eline:e.eline
119
120 let pf = Format.fprintf
121 let pp_ocaml ppf l = match l.ebyte < 0 with
122 | true -> pf ppf "File \"%a\", line n/a, characters n/a" pp_path l.file
123 | false ->
124 let pp_lines ppf l = match fst l.sline = fst l.eline with
125 | true -> pf ppf "line %d" (fst l.sline)
126 | false -> pf ppf "lines %d-%d" (fst l.sline) (fst l.eline)
127 in
128 (* "characters" represent positions (insertion points) not columns *)
129 let pos_s = l.sbyte - snd l.sline in
130 let pos_e = l.ebyte - snd l.eline + 1 in
131 pf ppf "File \"%a\", %a, characters %d-%d"
132 pp_path l.file pp_lines l pos_s pos_e
133
134 let pp_gnu ppf l = match l.ebyte < 0 with
135 | true -> pf ppf "%a:" pp_path l.file
136 | false ->
137 let pp_lines ppf l =
138 let col_s = l.sbyte - snd l.sline + 1 in
139 let col_e = l.ebyte - snd l.eline + 1 in
140 match fst l.sline = fst l.eline with
141 | true -> pf ppf "%d.%d-%d" (fst l.sline) col_s col_e
142 | false ->
143 pf ppf "%d.%d-%d.%d" (fst l.sline) col_s (fst l.eline) col_e
144 in
145 pf ppf "%a:%a" pp_path l.file pp_lines l
146
147 let pp_dump ppf l =
148 pf ppf "[bytes %d;%d][lines %d;%d][lbytes %d;%d]"
149 l.sbyte l.ebyte (fst l.sline) (fst l.eline) (snd l.sline) (snd l.eline)
150
151 let pp = pp_gnu
152end
153
154module Utf_8 = struct
155 type case =
156 | L1 | L2 | L3_E0 | L3_E1_EC_or_EE_EF | L3_ED | L4_F0 | L4_F1_F3 | L4_F4 | E
157
158 let case =
159(*
160 (* See https://tools.ietf.org/html/rfc3629#section-4 *)
161 Printf.printf "[|";
162 for i = 0 to 255 do
163 if i mod 16 = 0 then Printf.printf "\n";
164 if 0x00 <= i && i <= 0x7F then Printf.printf "L1; " else
165 if 0xC2 <= i && i <= 0xDF then Printf.printf "L2; " else
166 if 0xE0 = i then Printf.printf "L3_E0; " else
167 if 0xE1 <= i && i <= 0xEC || 0xEE <= i && i <= 0xEF
168 then Printf.printf "L3_E1_EC_or_EE_EF; " else
169 if 0xED = i then Printf.printf "L3_ED;" else
170 if 0xF0 = i then Printf.printf "L4_F0; " else
171 if 0xF1 <= i && i <= 0xF3 then Printf.printf "L4_F1_F3; " else
172 if 0xF4 = i then Printf.printf "L4_F4; " else
173 Printf.printf "E; "
174 done;
175 Printf.printf "\n|]"
176*)
177 [|
178 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
179 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
180 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
181 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
182 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
183 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
184 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
185 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
186 E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
187 E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
188 E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
189 E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
190 E; E; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;
191 L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;
192 L3_E0; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
193 L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
194 L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
195 L3_E1_EC_or_EE_EF; L3_ED;L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
196 L4_F0; L4_F1_F3; L4_F1_F3; L4_F1_F3; L4_F4; E; E; E; E; E; E; E; E; E; E; E;
197 |]
198end
199
200module Tdec = struct
201 type 'a fmt = Format.formatter -> 'a -> unit
202 let pp_did_you_mean = Err_msg.pp_did_you_mean
203 let pp_and_enum = Err_msg.pp_and_enum
204 let pp_or_enum = Err_msg.pp_or_enum
205 let pp_did_you_mean = Err_msg.pp_did_you_mean
206 let pp_must_be = Err_msg.pp_must_be
207 let pp_unknown = Err_msg.pp_unknown
208 let pp_unknown' = Err_msg.pp_unknown'
209
210 (* Decoders *)
211
212 type t =
213 { file : Tloc.fpath; i : string; tok : Buffer.t;
214 mutable pos : int; mutable line : int; mutable line_pos : int; }
215
216 let create ?(file = Tloc.no_file) i =
217 { file; i; tok = Buffer.create 255; pos = 0; line = 1; line_pos = 0 }
218
219 (* Location *)
220
221 let file d = d.file
222 let pos d = d.pos
223 let line d = d.line, d.line_pos
224
225 let loc d ~sbyte ~ebyte ~sline ~eline =
226 Tloc.v ~file:d.file ~sbyte ~ebyte ~sline ~eline
227
228 let loc_to_here d ~sbyte ~sline =
229 loc d ~sbyte ~ebyte:d.pos ~sline ~eline:(d.line, d.line_pos)
230
231 let loc_here d = loc_to_here d ~sbyte:d.pos ~sline:(d.line, d.line_pos)
232
233 (* Errors *)
234
235 exception Err of Tloc.t * string
236
237 let err loc msg = raise_notrace (Err (loc, msg))
238 let err_to_here d ~sbyte ~sline fmt =
239 Format.kasprintf (err (loc_to_here d ~sbyte ~sline)) fmt
240
241 let err_here d fmt = Format.kasprintf (err (loc_here d)) fmt
242 let err_suggest = Err_msg.suggest
243
244 (* Lexing *)
245
246 let incr_line d = match d.i.[d.pos] with (* assert (not (eoi d)) *)
247 | '\r' -> d.line <- d.line + 1; d.line_pos <- d.pos + 1
248 | '\n' ->
249 (if d.pos = 0 || d.i.[d.pos - 1] <> '\r' then d.line <- d.line + 1);
250 d.line_pos <- d.pos + 1;
251 | _ -> ()
252 [@@ ocaml.inline]
253
254 let eoi d = d.pos >= String.length d.i [@@ ocaml.inline]
255 let byte d = if eoi d then 0xFFFF else Char.code d.i.[d.pos] [@@ ocaml.inline]
256 let accept_byte d = incr_line d; d.pos <- d.pos + 1
257 [@@ ocaml.inline]
258
259 let accept_utf_8 accept d =
260 let err d = match byte d with
261 | 0xFFFF -> err_here d "UTF-8 decoding error: unexpected end of input"
262 | b -> err_here d "UTF-8 decoding error: byte %02x illegal here" b
263 in
264 let accept_tail d = if (byte d lsr 6 = 0b10) then accept d else err d in
265 match byte d with
266 | 0xFFFF -> err d
267 | b ->
268 (* If a subsequent [byte d] invocation is 0xFFFF we get to [err]. *)
269 match Utf_8.case.(b) with
270 | L1 -> accept d
271 | L2 -> accept d; accept_tail d
272 | L3_E0 ->
273 accept d;
274 if (byte d - 0xA0 < 0xBF - 0xA0) then accept d else err d;
275 accept_tail d
276 | L3_E1_EC_or_EE_EF -> accept d; accept_tail d; accept_tail d
277 | L3_ED ->
278 accept d;
279 if (byte d - 0x80 < 0x9F - 0x80) then accept d else err d;
280 accept_tail d
281 | L4_F0 ->
282 accept d;
283 if (byte d - 0x90 < 0xBF - 0x90) then accept d else err d;
284 accept_tail d; accept_tail d
285 | L4_F1_F3 ->
286 accept d;
287 accept_tail d; accept_tail d; accept_tail d;
288 | L4_F4 ->
289 accept d;
290 if (byte d - 0x80 < 0x8F - 0x80) then accept d else err d;
291 | E -> err d
292
293 let accept_uchar d = accept_utf_8 accept_byte d
294
295 (* Tokenizer *)
296
297 let tok_reset d = Buffer.reset d.tok [@@ ocaml.inline]
298 let tok_pop d = let t = Buffer.contents d.tok in tok_reset d; t
299 [@@ ocaml.inline]
300
301 let tok_accept_byte d =
302 Buffer.add_char d.tok d.i.[d.pos]; accept_byte d; [@@ ocaml.inline]
303
304 let tok_accept_uchar d = accept_utf_8 tok_accept_byte d [@@ ocaml.inline]
305 let tok_add_byte d b = Buffer.add_char d.tok (Char.chr b) [@@ ocaml.inline]
306 let tok_add_bytes d s = Buffer.add_string d.tok s [@@ ocaml.inline]
307 let tok_add_char d c = Buffer.add_char d.tok c [@@ ocaml.inline]
308
309 let buffer_add_uchar b u = match Uchar.to_int u with
310 (* XXX From 4.06 use Buffer.add_utf_8_uchar *)
311 | u when u < 0 -> assert false
312 | u when u <= 0x007F ->
313 Buffer.add_char b (Char.unsafe_chr u)
314 | u when u <= 0x07FF ->
315 Buffer.add_char b (Char.unsafe_chr (0xC0 lor (u lsr 6)));
316 Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)));
317 | u when u <= 0xFFFF ->
318 Buffer.add_char b (Char.unsafe_chr (0xE0 lor (u lsr 12)));
319 Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F)));
320 Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)));
321 | u when u <= 0x10FFFF ->
322 Buffer.add_char b (Char.unsafe_chr (0xF0 lor (u lsr 18)));
323 Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 12) land 0x3F)));
324 Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F)));
325 Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)))
326 | _ -> assert false
327
328 let tok_add_uchar d u = buffer_add_uchar d.tok u
329end
330
331module Url = struct
332 type scheme = string
333 type authority = string
334 type path = string
335 type query = string
336 type fragment = string
337 type t = string
338
339 let string_subrange ?(first = 0) ?last s =
340 let max = String.length s - 1 in
341 let last = match last with
342 | None -> max
343 | Some l when l > max -> max
344 | Some l -> l
345 in
346 let first = if first < 0 then 0 else first in
347 if first > last then "" else String.sub s first (last - first + 1)
348
349 let white = function ' ' | '\t' .. '\r' -> true | _ -> false
350 let alpha = function 'A' .. 'Z' | 'a' .. 'z' -> true | _ -> false
351 let digit = function '0' .. '9' -> true | _ -> false
352
353 let scheme_char c =
354 alpha c || digit c || Char.equal c '+' || Char.equal c '-' ||
355 Char.equal '.' c
356
357 let find_scheme_colon u =
358 if u = "" || not (alpha u.[0]) then None else
359 let max = String.length u - 1 in
360 let i = ref 1 in
361 while !i <= max && scheme_char u.[!i] do incr i done;
362 if !i > max || u.[!i] <> ':' then None else Some !i
363
364 let find_authority_last ~start u =
365 let max = String.length u - 1 in
366 if start > max then None else
367 if start + 1 > max then Some (start - 1) else
368 if not (u.[start] = '/' && u.[start + 1] = '/') then Some (start - 1) else
369 let i = ref (start + 2) in
370 while (!i <= max && u.[!i] <> '/' && u.[!i] <> '?' && u.[!i] <> '#')
371 do incr i done;
372 Some (!i - 1)
373
374 let scheme u = match find_scheme_colon u with
375 | None -> None | Some i -> Some (String.sub u 0 i)
376
377 let path_first u =
378 let start = match find_scheme_colon u with
379 | None -> 0 | Some i -> i + 1
380 in
381 let first = match find_authority_last ~start u with
382 | None -> start | Some last -> last + 1
383 in
384 let max = String.length u - 1 in
385 if first > max || u.[first] = '#' || u.[first] = '?' then None else Some first
386
387 let path_last u ~first =
388 let max = String.length u - 1 in
389 let i = ref (first + 1) in
390 while (!i <= max && u.[!i] <> '?' && u.[!i] <> '#') do incr i done;
391 !i - 1
392
393 let path u = match path_first u with
394 | None -> None
395 | Some first -> Some (string_subrange ~first ~last:(path_last u ~first) u)
396end
397
398let escape = (* The escape rules are a bit unclear. These are those of LaTeX *)
399 let byte_replaced_length char_len s =
400 let rec loop s max i l = match i > max with
401 | true -> l
402 | false -> loop s max (i + 1) (l + char_len s.[i])
403 in
404 loop s (String.length s - 1) 0 0
405 in
406 let byte_replace set_char s ~len ~replaced_len =
407 let b = Bytes.create replaced_len in
408 let rec loop s max i k = match i > max with
409 | true -> Bytes.unsafe_to_string b
410 | false -> loop s max (i + 1) (set_char b k s.[i])
411 in
412 loop s (len - 1) 0 0
413 in
414 let byte_escaper char_len set_char s =
415 let len = String.length s in
416 let replaced_len = byte_replaced_length char_len s in
417 match replaced_len = len with
418 | true -> s
419 | false -> byte_replace set_char s ~len ~replaced_len
420 in
421 let tilde_esc = "\\textasciitilde" in
422 let tilde_len = String.length tilde_esc in
423 let circ_esc = "\\textasciicircum" in
424 let circ_len = String.length circ_esc in
425 let bslash_esc = "\\textbackslash" in
426 let bslash_len = String.length bslash_esc in
427 let char_len = function
428 | '&' | '%' | '$' | '#' | '_' | '{' | '}' -> 2
429 | '~' -> tilde_len
430 | '^' -> circ_len
431 | '\\' -> bslash_len
432 | _ -> 1
433 in
434 let set_char b i = function
435 | '&' | '%' | '$' | '#' | '_' | '{' | '}' as c ->
436 Bytes.set b i '\\'; Bytes.set b (i + 1) c; i + 2
437 | '~' -> Bytes.blit_string tilde_esc 0 b i tilde_len; i + tilde_len
438 | '^' -> Bytes.blit_string circ_esc 0 b i circ_len; i + circ_len
439 | '\\' -> Bytes.blit_string bslash_esc 0 b i bslash_len; i + bslash_len
440 | c -> Bytes.set b i c; i + 1
441 in
442 byte_escaper char_len set_char
443
444(* TODO unescape on decode. *)
445
446type t =
447 { type' : string;
448 cite_key : string;
449 fields : string SM.t;
450 loc : Tloc.t; }
451
452let v ~type' ~cite_key ~fields () = { type'; cite_key; fields; loc = Tloc.nil }
453
454let type' e = e.type'
455let cite_key e = e.cite_key
456let fields e = e.fields
457let loc e = e.loc
458let pp ppf e =
459 let pp_field ppf (k, v) = Fmt.pf ppf "@[<h>%s = {%s}@]" k (escape v) in
460 Fmt.pf ppf "@[<v2>@%s{%s,@,%a}@]" e.type' e.cite_key
461 (Fmt.iter_bindings ~sep:Fmt.comma SM.iter pp_field) e.fields
462
463(* Field values *)
464
465let list_value s =
466 List.filter (fun s -> s <> "") @@
467 List.map String.trim (String.split_on_char ',' s)
468
469let doi e = match SM.find_opt "doi" e.fields with
470| None -> None
471| Some doi ->
472 let ret doi = match String.trim doi with
473 | "" -> None
474 | doi -> Some doi
475 in
476 (* chop scheme and authority in case there is one *)
477 match Url.scheme doi with
478 | None -> ret doi
479 | Some _ ->
480 match Url.path doi with
481 | None -> ret doi
482 | Some p -> ret p
483
484let keywords e = Option.map list_value (SM.find_opt "keywords" e.fields)
485let annote e = SM.find_opt "annote" e.fields
486
487(* Codec *)
488
489type error_kind = string
490type error = error_kind * Tloc.t
491
492let pp_error ppf (err, l) =
493 Fmt.pf ppf "@[<v>%a:@,%a: %s@]"
494 Tloc.pp l Fmt.string "Error" err
495
496let curr_char d = (* TODO better escaping (this is for error reports) *)
497 Tdec.tok_reset d; Tdec.tok_accept_uchar d; Tdec.tok_pop d
498
499let err_illegal_uchar d = Tdec.err_here d "illegal character: %s" (curr_char d)
500let err_illegal_byte d b = Tdec.err_here d "illegal character U+%04X" b
501let err_expected d exp = Tdec.err_here d "expected %s" exp
502let err_eoi msg d ~sbyte ~sline =
503 Tdec.err_to_here d ~sbyte ~sline "end of input: %s" msg
504
505let err_eoi_entry = err_eoi "unclosed BibTeX entry"
506let err_eoi_field = err_eoi "unfinished BibTeX entry field"
507let err_eoi_value = err_eoi "unfinished BibTeX field value"
508let err_brace d ~sbyte ~sline =
509 Tdec.err_to_here d ~sbyte ~sline "incorrect brace {} nesting"
510
511let dec_byte d = match Tdec.byte d with
512| c when 0x00 <= c && c <= 0x08 || 0x0E <= c && c <= 0x1F || c = 0x7F ->
513 err_illegal_byte d c
514| c -> c
515[@@ ocaml.inline]
516
517let rec skip_white d = match dec_byte d with
518| 0x20 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D -> Tdec.accept_byte d; skip_white d
519| _ -> ()
520
521let dec_token ~stop d =
522 let rec loop d = match dec_byte d with
523 | 0x28 | 0x29 | 0x3B | 0x22
524 | 0x20 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D
525 | 0xFFFF -> Tdec.tok_pop d
526 | c when c = stop -> Tdec.tok_pop d
527 | _ -> Tdec.tok_accept_uchar d; loop d
528 in
529 loop d
530
531let rec dec_string ~sbyte ~sline ~stop d = match dec_byte d with
532| 0xFFFF -> err_eoi_value ~sbyte ~sline d
533| c when c = stop -> Tdec.accept_byte d; Tdec.tok_pop d
534| _ -> Tdec.tok_accept_uchar d; dec_string ~sbyte ~sline ~stop d
535
536let rec dec_tex i ~sbyte ~sline d = match dec_byte d with
537| 0xFFFF -> err_eoi_value ~sbyte ~sline d
538| 0x007D ->
539 if i = 0 then (Tdec.accept_byte d; Tdec.tok_pop d) else
540 (Tdec.tok_accept_uchar d; dec_tex (i - 1) ~sbyte ~sline d)
541| c ->
542 let i = if c = 0x007B then i + 1 else i in
543 Tdec.tok_accept_uchar d; dec_tex i ~sbyte ~sline d
544
545let dec_value d =
546 let sbyte = Tdec.pos d and sline = Tdec.line d in
547 match dec_byte d with
548 | 0x007B (* { *) -> Tdec.accept_byte d; dec_tex 0 ~sbyte ~sline d
549 | 0x0022 -> Tdec.accept_byte d; dec_string ~sbyte ~sline ~stop:0x0022 d
550 | _ -> dec_token ~stop:0x002C d
551
552let dec_field d acc =
553 let sbyte = Tdec.pos d and sline = Tdec.line d in
554 let id = dec_token ~stop:0x003D (* = *) d in
555 skip_white d;
556 match dec_byte d with
557 | 0xFFFF -> err_eoi_field ~sbyte ~sline d
558 | 0x003D (* = *) ->
559 Tdec.accept_byte d;
560 skip_white d;
561 begin match dec_byte d with
562 | 0xFFFF -> err_eoi_field ~sbyte ~sline d
563 | _ ->
564 SM.add (String.lowercase_ascii id) (dec_value d) acc
565 end
566 | _ -> err_expected d "'='"
567
568let rec dec_fields ~sbyte ~sline d acc =
569 skip_white d;
570 match dec_byte d with
571 | 0xFFFF -> err_eoi_entry ~sbyte ~sline d
572 | 0x007D (* } *) -> acc
573 | _ ->
574 let acc = dec_field d acc in
575 skip_white d;
576 match dec_byte d with
577 | 0x002C (* , *) -> Tdec.accept_byte d; dec_fields ~sbyte ~sline d acc
578 | 0x007D (* } *) -> acc
579 | 0xFFFF -> err_eoi_entry ~sbyte ~sline d
580 | b -> err_expected d "',' or '}'"
581
582let dec_entry d =
583 let sbyte = Tdec.pos d and sline = Tdec.line d in
584 Tdec.accept_byte d (* @ *);
585 let type' = dec_token ~stop:0x007B d (* { *) in
586 match dec_byte d with
587 | 0x007B ->
588 Tdec.accept_byte d;
589 let cite_key = dec_token ~stop:0x002C d (* , *) in
590 skip_white d;
591 begin match dec_byte d with
592 | 0x002C (* , *) ->
593 Tdec.accept_byte d;
594 let fields = dec_fields ~sbyte ~sline d SM.empty in
595 let loc = Tdec.loc_to_here d ~sbyte ~sline in
596 Tdec.accept_byte d;
597 { type'; cite_key; fields; loc }
598 | _ -> err_expected d "','"
599 end
600 | _ -> err_expected d "'{'"
601
602let dec_entries d =
603 let rec loop d acc =
604 skip_white d;
605 match dec_byte d with
606 | 0x0040 (* @ *) -> loop d (dec_entry d :: acc)
607 | 0xFFFF -> List.rev acc
608 | b -> err_illegal_uchar d
609 in
610 loop d []
611
612let of_string ?(file = Fpath.v "-") s =
613 try
614 let file = Fpath.to_string file in
615 let d = Tdec.create ~file s in
616 Ok (dec_entries d)
617 with Tdec.Err (loc, msg) -> Error (msg, loc)
618
619let of_string' ?file s =
620 Result.map_error (fun e -> Fmt.str "%a" pp_error e) @@
621 (of_string ?file s)
622
623let to_string es = Fmt.str "@[<v>%a@]" (Fmt.list pp) es
624
625(*---------------------------------------------------------------------------
626 Copyright (c) 2019 University of Bern
627
628 Permission to use, copy, modify, and/or distribute this software for any
629 purpose with or without fee is hereby granted, provided that the above
630 copyright notice and this permission notice appear in all copies.
631
632 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
633 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
634 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
635 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
636 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
637 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
638 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
639 ---------------------------------------------------------------------------*)