My agentic slop goes here. Not intended for anyone else!
at main 22 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2019 University of Bern. All rights reserved. 3 Distributed under the ISC license, see terms at the end of the file. 4 ---------------------------------------------------------------------------*) 5 6[@@@warning "-27-32-33-34"] 7 8module SM = Map.Make(String) 9 10module Err_msg = struct 11 let pf = Format.fprintf 12 let pp_sp = Format.pp_print_space 13 let pp_nop _ () = () 14 let pp_any fmt ppf _ = pf ppf fmt 15 16 let pp_op_enum op ?(empty = pp_nop) pp_v ppf = function 17 | [] -> empty ppf () 18 | [v] -> pp_v ppf v 19 | _ as vs -> 20 let rec loop ppf = function 21 | [v0; v1] -> pf ppf "%a@ %s@ %a" pp_v v0 op pp_v v1 22 | v :: vs -> pf ppf "%a,@ " pp_v v; loop ppf vs 23 | [] -> assert false 24 in 25 loop ppf vs 26 27 let pp_and_enum ?empty pp_v ppf vs = pp_op_enum "and" ?empty pp_v ppf vs 28 let pp_or_enum ?empty pp_v ppf vs = pp_op_enum "or" ?empty pp_v ppf vs 29 let pp_did_you_mean pp_v ppf = function 30 | [] -> () | vs -> pf ppf "Did@ you@ mean %a ?" (pp_or_enum pp_v) vs 31 32 let pp_must_be pp_v ppf = function 33 | [] -> () | vs -> pf ppf "Must be %a." (pp_or_enum pp_v) vs 34 35 let pp_unknown ~kind pp_v ppf v = pf ppf "Unknown %a %a." kind () pp_v v 36 let pp_unknown' ~kind pp_v ~hint ppf (v, hints) = match hints with 37 | [] -> pp_unknown ~kind pp_v ppf v 38 | hints -> pp_unknown ~kind pp_v ppf v; pp_sp ppf (); (hint pp_v) ppf hints 39 40 let min_by f a b = if f a <= f b then a else b 41 let max_by f a b = if f a <= f b then b else a 42 43 let edit_distance s0 s1 = 44 let minimum a b c = min a (min b c) in 45 let s0 = min_by String.length s0 s1 (* row *) 46 and s1 = max_by String.length s0 s1 in (* column *) 47 let m = String.length s0 and n = String.length s1 in 48 let rec rows row0 row i = 49 if i > n then row0.(m) else begin 50 row.(0) <- i; 51 for j = 1 to m do 52 if s0.[j - 1] = s1.[i - 1] then row.(j) <- row0.(j - 1) else 53 row.(j) <-minimum (row0.(j - 1) + 1) (row0.(j) + 1) (row.(j - 1) + 1) 54 done; 55 rows row row0 (i + 1) 56 end in 57 rows (Array.init (m + 1) (fun x -> x)) (Array.make (m + 1) 0) 1 58 59 let suggest ?(dist = 2) candidates s = 60 let add (min, acc) name = 61 let d = edit_distance s name in 62 if d = min then min, (name :: acc) else 63 if d < min then d, [name] else 64 min, acc 65 in 66 let d, suggs = List.fold_left add (max_int, []) candidates in 67 if d <= dist (* suggest only if not too far *) then List.rev suggs else [] 68end 69 70module Tloc = struct 71 type fpath = string 72 let pp_path = Format.pp_print_string 73 74 type pos = int 75 type line = int 76 type line_pos = line * pos 77 (* For lines we keep the byte position just after the newlinexs. It 78 editors are still expecting tools to compute visual columns which 79 is stupid. By keeping these byte positions we can approximate 80 columns by subtracting the line byte position from the byte 81 location. This will only be correct on US-ASCII data though. Best 82 would be to be able to give them [sbyte] and [ebyte]. *) 83 84 let l v = v 85 type t = 86 { file : fpath; 87 sbyte : pos; ebyte : pos; 88 sline : pos * line; eline : pos * line } 89 90 let no_file = "-" 91 let v ~file ~sbyte ~ebyte ~sline ~eline = { file; sbyte; ebyte; sline; eline } 92 let file l = l.file 93 let sbyte l = l.sbyte 94 let ebyte l = l.ebyte 95 let sline l = l.sline 96 let eline l = l.eline 97 let nil = 98 let pnil = -1 in 99 let lnil = (-1, pnil) in 100 v ~file:no_file ~sbyte:pnil ~ebyte:pnil ~sline:lnil ~eline:lnil 101 102 let merge l0 l1 = 103 let sbyte, sline = 104 if l0.sbyte < l1.sbyte then l0.sbyte, l0.sline else l1.sbyte, l1.sline 105 in 106 let ebyte, eline = 107 if l0.ebyte < l1.ebyte then l1.ebyte, l1.eline else l0.ebyte, l0.eline 108 in 109 v ~file:l0.file ~sbyte ~ebyte ~sline ~eline 110 111 let to_start l = 112 v ~file:l.file ~sbyte:l.sbyte ~ebyte:l.sbyte ~sline:l.sline ~eline:l.sline 113 114 let to_end l = 115 v ~file:l.file ~sbyte:l.ebyte ~ebyte:l.ebyte ~sline:l.eline ~eline:l.eline 116 117 let restart ~at:s e = 118 v ~file:e.file ~sbyte:s.sbyte ~ebyte:e.ebyte ~sline:s.sline ~eline:e.eline 119 120 let pf = Format.fprintf 121 let pp_ocaml ppf l = match l.ebyte < 0 with 122 | true -> pf ppf "File \"%a\", line n/a, characters n/a" pp_path l.file 123 | false -> 124 let pp_lines ppf l = match fst l.sline = fst l.eline with 125 | true -> pf ppf "line %d" (fst l.sline) 126 | false -> pf ppf "lines %d-%d" (fst l.sline) (fst l.eline) 127 in 128 (* "characters" represent positions (insertion points) not columns *) 129 let pos_s = l.sbyte - snd l.sline in 130 let pos_e = l.ebyte - snd l.eline + 1 in 131 pf ppf "File \"%a\", %a, characters %d-%d" 132 pp_path l.file pp_lines l pos_s pos_e 133 134 let pp_gnu ppf l = match l.ebyte < 0 with 135 | true -> pf ppf "%a:" pp_path l.file 136 | false -> 137 let pp_lines ppf l = 138 let col_s = l.sbyte - snd l.sline + 1 in 139 let col_e = l.ebyte - snd l.eline + 1 in 140 match fst l.sline = fst l.eline with 141 | true -> pf ppf "%d.%d-%d" (fst l.sline) col_s col_e 142 | false -> 143 pf ppf "%d.%d-%d.%d" (fst l.sline) col_s (fst l.eline) col_e 144 in 145 pf ppf "%a:%a" pp_path l.file pp_lines l 146 147 let pp_dump ppf l = 148 pf ppf "[bytes %d;%d][lines %d;%d][lbytes %d;%d]" 149 l.sbyte l.ebyte (fst l.sline) (fst l.eline) (snd l.sline) (snd l.eline) 150 151 let pp = pp_gnu 152end 153 154module Utf_8 = struct 155 type case = 156 | L1 | L2 | L3_E0 | L3_E1_EC_or_EE_EF | L3_ED | L4_F0 | L4_F1_F3 | L4_F4 | E 157 158 let case = 159(* 160 (* See https://tools.ietf.org/html/rfc3629#section-4 *) 161 Printf.printf "[|"; 162 for i = 0 to 255 do 163 if i mod 16 = 0 then Printf.printf "\n"; 164 if 0x00 <= i && i <= 0x7F then Printf.printf "L1; " else 165 if 0xC2 <= i && i <= 0xDF then Printf.printf "L2; " else 166 if 0xE0 = i then Printf.printf "L3_E0; " else 167 if 0xE1 <= i && i <= 0xEC || 0xEE <= i && i <= 0xEF 168 then Printf.printf "L3_E1_EC_or_EE_EF; " else 169 if 0xED = i then Printf.printf "L3_ED;" else 170 if 0xF0 = i then Printf.printf "L4_F0; " else 171 if 0xF1 <= i && i <= 0xF3 then Printf.printf "L4_F1_F3; " else 172 if 0xF4 = i then Printf.printf "L4_F4; " else 173 Printf.printf "E; " 174 done; 175 Printf.printf "\n|]" 176*) 177 [| 178 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; 179 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; 180 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; 181 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; 182 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; 183 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; 184 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; 185 L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; 186 E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; 187 E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; 188 E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; 189 E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; 190 E; E; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; 191 L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; 192 L3_E0; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; 193 L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; 194 L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; 195 L3_E1_EC_or_EE_EF; L3_ED;L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; 196 L4_F0; L4_F1_F3; L4_F1_F3; L4_F1_F3; L4_F4; E; E; E; E; E; E; E; E; E; E; E; 197 |] 198end 199 200module Tdec = struct 201 type 'a fmt = Format.formatter -> 'a -> unit 202 let pp_did_you_mean = Err_msg.pp_did_you_mean 203 let pp_and_enum = Err_msg.pp_and_enum 204 let pp_or_enum = Err_msg.pp_or_enum 205 let pp_did_you_mean = Err_msg.pp_did_you_mean 206 let pp_must_be = Err_msg.pp_must_be 207 let pp_unknown = Err_msg.pp_unknown 208 let pp_unknown' = Err_msg.pp_unknown' 209 210 (* Decoders *) 211 212 type t = 213 { file : Tloc.fpath; i : string; tok : Buffer.t; 214 mutable pos : int; mutable line : int; mutable line_pos : int; } 215 216 let create ?(file = Tloc.no_file) i = 217 { file; i; tok = Buffer.create 255; pos = 0; line = 1; line_pos = 0 } 218 219 (* Location *) 220 221 let file d = d.file 222 let pos d = d.pos 223 let line d = d.line, d.line_pos 224 225 let loc d ~sbyte ~ebyte ~sline ~eline = 226 Tloc.v ~file:d.file ~sbyte ~ebyte ~sline ~eline 227 228 let loc_to_here d ~sbyte ~sline = 229 loc d ~sbyte ~ebyte:d.pos ~sline ~eline:(d.line, d.line_pos) 230 231 let loc_here d = loc_to_here d ~sbyte:d.pos ~sline:(d.line, d.line_pos) 232 233 (* Errors *) 234 235 exception Err of Tloc.t * string 236 237 let err loc msg = raise_notrace (Err (loc, msg)) 238 let err_to_here d ~sbyte ~sline fmt = 239 Format.kasprintf (err (loc_to_here d ~sbyte ~sline)) fmt 240 241 let err_here d fmt = Format.kasprintf (err (loc_here d)) fmt 242 let err_suggest = Err_msg.suggest 243 244 (* Lexing *) 245 246 let incr_line d = match d.i.[d.pos] with (* assert (not (eoi d)) *) 247 | '\r' -> d.line <- d.line + 1; d.line_pos <- d.pos + 1 248 | '\n' -> 249 (if d.pos = 0 || d.i.[d.pos - 1] <> '\r' then d.line <- d.line + 1); 250 d.line_pos <- d.pos + 1; 251 | _ -> () 252 [@@ ocaml.inline] 253 254 let eoi d = d.pos >= String.length d.i [@@ ocaml.inline] 255 let byte d = if eoi d then 0xFFFF else Char.code d.i.[d.pos] [@@ ocaml.inline] 256 let accept_byte d = incr_line d; d.pos <- d.pos + 1 257 [@@ ocaml.inline] 258 259 let accept_utf_8 accept d = 260 let err d = match byte d with 261 | 0xFFFF -> err_here d "UTF-8 decoding error: unexpected end of input" 262 | b -> err_here d "UTF-8 decoding error: byte %02x illegal here" b 263 in 264 let accept_tail d = if (byte d lsr 6 = 0b10) then accept d else err d in 265 match byte d with 266 | 0xFFFF -> err d 267 | b -> 268 (* If a subsequent [byte d] invocation is 0xFFFF we get to [err]. *) 269 match Utf_8.case.(b) with 270 | L1 -> accept d 271 | L2 -> accept d; accept_tail d 272 | L3_E0 -> 273 accept d; 274 if (byte d - 0xA0 < 0xBF - 0xA0) then accept d else err d; 275 accept_tail d 276 | L3_E1_EC_or_EE_EF -> accept d; accept_tail d; accept_tail d 277 | L3_ED -> 278 accept d; 279 if (byte d - 0x80 < 0x9F - 0x80) then accept d else err d; 280 accept_tail d 281 | L4_F0 -> 282 accept d; 283 if (byte d - 0x90 < 0xBF - 0x90) then accept d else err d; 284 accept_tail d; accept_tail d 285 | L4_F1_F3 -> 286 accept d; 287 accept_tail d; accept_tail d; accept_tail d; 288 | L4_F4 -> 289 accept d; 290 if (byte d - 0x80 < 0x8F - 0x80) then accept d else err d; 291 | E -> err d 292 293 let accept_uchar d = accept_utf_8 accept_byte d 294 295 (* Tokenizer *) 296 297 let tok_reset d = Buffer.reset d.tok [@@ ocaml.inline] 298 let tok_pop d = let t = Buffer.contents d.tok in tok_reset d; t 299 [@@ ocaml.inline] 300 301 let tok_accept_byte d = 302 Buffer.add_char d.tok d.i.[d.pos]; accept_byte d; [@@ ocaml.inline] 303 304 let tok_accept_uchar d = accept_utf_8 tok_accept_byte d [@@ ocaml.inline] 305 let tok_add_byte d b = Buffer.add_char d.tok (Char.chr b) [@@ ocaml.inline] 306 let tok_add_bytes d s = Buffer.add_string d.tok s [@@ ocaml.inline] 307 let tok_add_char d c = Buffer.add_char d.tok c [@@ ocaml.inline] 308 309 let buffer_add_uchar b u = match Uchar.to_int u with 310 (* XXX From 4.06 use Buffer.add_utf_8_uchar *) 311 | u when u < 0 -> assert false 312 | u when u <= 0x007F -> 313 Buffer.add_char b (Char.unsafe_chr u) 314 | u when u <= 0x07FF -> 315 Buffer.add_char b (Char.unsafe_chr (0xC0 lor (u lsr 6))); 316 Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F))); 317 | u when u <= 0xFFFF -> 318 Buffer.add_char b (Char.unsafe_chr (0xE0 lor (u lsr 12))); 319 Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F))); 320 Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F))); 321 | u when u <= 0x10FFFF -> 322 Buffer.add_char b (Char.unsafe_chr (0xF0 lor (u lsr 18))); 323 Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 12) land 0x3F))); 324 Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F))); 325 Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F))) 326 | _ -> assert false 327 328 let tok_add_uchar d u = buffer_add_uchar d.tok u 329end 330 331module Url = struct 332 type scheme = string 333 type authority = string 334 type path = string 335 type query = string 336 type fragment = string 337 type t = string 338 339 let string_subrange ?(first = 0) ?last s = 340 let max = String.length s - 1 in 341 let last = match last with 342 | None -> max 343 | Some l when l > max -> max 344 | Some l -> l 345 in 346 let first = if first < 0 then 0 else first in 347 if first > last then "" else String.sub s first (last - first + 1) 348 349 let white = function ' ' | '\t' .. '\r' -> true | _ -> false 350 let alpha = function 'A' .. 'Z' | 'a' .. 'z' -> true | _ -> false 351 let digit = function '0' .. '9' -> true | _ -> false 352 353 let scheme_char c = 354 alpha c || digit c || Char.equal c '+' || Char.equal c '-' || 355 Char.equal '.' c 356 357 let find_scheme_colon u = 358 if u = "" || not (alpha u.[0]) then None else 359 let max = String.length u - 1 in 360 let i = ref 1 in 361 while !i <= max && scheme_char u.[!i] do incr i done; 362 if !i > max || u.[!i] <> ':' then None else Some !i 363 364 let find_authority_last ~start u = 365 let max = String.length u - 1 in 366 if start > max then None else 367 if start + 1 > max then Some (start - 1) else 368 if not (u.[start] = '/' && u.[start + 1] = '/') then Some (start - 1) else 369 let i = ref (start + 2) in 370 while (!i <= max && u.[!i] <> '/' && u.[!i] <> '?' && u.[!i] <> '#') 371 do incr i done; 372 Some (!i - 1) 373 374 let scheme u = match find_scheme_colon u with 375 | None -> None | Some i -> Some (String.sub u 0 i) 376 377 let path_first u = 378 let start = match find_scheme_colon u with 379 | None -> 0 | Some i -> i + 1 380 in 381 let first = match find_authority_last ~start u with 382 | None -> start | Some last -> last + 1 383 in 384 let max = String.length u - 1 in 385 if first > max || u.[first] = '#' || u.[first] = '?' then None else Some first 386 387 let path_last u ~first = 388 let max = String.length u - 1 in 389 let i = ref (first + 1) in 390 while (!i <= max && u.[!i] <> '?' && u.[!i] <> '#') do incr i done; 391 !i - 1 392 393 let path u = match path_first u with 394 | None -> None 395 | Some first -> Some (string_subrange ~first ~last:(path_last u ~first) u) 396end 397 398let escape = (* The escape rules are a bit unclear. These are those of LaTeX *) 399 let byte_replaced_length char_len s = 400 let rec loop s max i l = match i > max with 401 | true -> l 402 | false -> loop s max (i + 1) (l + char_len s.[i]) 403 in 404 loop s (String.length s - 1) 0 0 405 in 406 let byte_replace set_char s ~len ~replaced_len = 407 let b = Bytes.create replaced_len in 408 let rec loop s max i k = match i > max with 409 | true -> Bytes.unsafe_to_string b 410 | false -> loop s max (i + 1) (set_char b k s.[i]) 411 in 412 loop s (len - 1) 0 0 413 in 414 let byte_escaper char_len set_char s = 415 let len = String.length s in 416 let replaced_len = byte_replaced_length char_len s in 417 match replaced_len = len with 418 | true -> s 419 | false -> byte_replace set_char s ~len ~replaced_len 420 in 421 let tilde_esc = "\\textasciitilde" in 422 let tilde_len = String.length tilde_esc in 423 let circ_esc = "\\textasciicircum" in 424 let circ_len = String.length circ_esc in 425 let bslash_esc = "\\textbackslash" in 426 let bslash_len = String.length bslash_esc in 427 let char_len = function 428 | '&' | '%' | '$' | '#' | '_' | '{' | '}' -> 2 429 | '~' -> tilde_len 430 | '^' -> circ_len 431 | '\\' -> bslash_len 432 | _ -> 1 433 in 434 let set_char b i = function 435 | '&' | '%' | '$' | '#' | '_' | '{' | '}' as c -> 436 Bytes.set b i '\\'; Bytes.set b (i + 1) c; i + 2 437 | '~' -> Bytes.blit_string tilde_esc 0 b i tilde_len; i + tilde_len 438 | '^' -> Bytes.blit_string circ_esc 0 b i circ_len; i + circ_len 439 | '\\' -> Bytes.blit_string bslash_esc 0 b i bslash_len; i + bslash_len 440 | c -> Bytes.set b i c; i + 1 441 in 442 byte_escaper char_len set_char 443 444(* TODO unescape on decode. *) 445 446type t = 447 { type' : string; 448 cite_key : string; 449 fields : string SM.t; 450 loc : Tloc.t; } 451 452let v ~type' ~cite_key ~fields () = { type'; cite_key; fields; loc = Tloc.nil } 453 454let type' e = e.type' 455let cite_key e = e.cite_key 456let fields e = e.fields 457let loc e = e.loc 458let pp ppf e = 459 let pp_field ppf (k, v) = Fmt.pf ppf "@[<h>%s = {%s}@]" k (escape v) in 460 Fmt.pf ppf "@[<v2>@%s{%s,@,%a}@]" e.type' e.cite_key 461 (Fmt.iter_bindings ~sep:Fmt.comma SM.iter pp_field) e.fields 462 463(* Field values *) 464 465let list_value s = 466 List.filter (fun s -> s <> "") @@ 467 List.map String.trim (String.split_on_char ',' s) 468 469let doi e = match SM.find_opt "doi" e.fields with 470| None -> None 471| Some doi -> 472 let ret doi = match String.trim doi with 473 | "" -> None 474 | doi -> Some doi 475 in 476 (* chop scheme and authority in case there is one *) 477 match Url.scheme doi with 478 | None -> ret doi 479 | Some _ -> 480 match Url.path doi with 481 | None -> ret doi 482 | Some p -> ret p 483 484let keywords e = Option.map list_value (SM.find_opt "keywords" e.fields) 485let annote e = SM.find_opt "annote" e.fields 486 487(* Codec *) 488 489type error_kind = string 490type error = error_kind * Tloc.t 491 492let pp_error ppf (err, l) = 493 Fmt.pf ppf "@[<v>%a:@,%a: %s@]" 494 Tloc.pp l Fmt.string "Error" err 495 496let curr_char d = (* TODO better escaping (this is for error reports) *) 497 Tdec.tok_reset d; Tdec.tok_accept_uchar d; Tdec.tok_pop d 498 499let err_illegal_uchar d = Tdec.err_here d "illegal character: %s" (curr_char d) 500let err_illegal_byte d b = Tdec.err_here d "illegal character U+%04X" b 501let err_expected d exp = Tdec.err_here d "expected %s" exp 502let err_eoi msg d ~sbyte ~sline = 503 Tdec.err_to_here d ~sbyte ~sline "end of input: %s" msg 504 505let err_eoi_entry = err_eoi "unclosed BibTeX entry" 506let err_eoi_field = err_eoi "unfinished BibTeX entry field" 507let err_eoi_value = err_eoi "unfinished BibTeX field value" 508let err_brace d ~sbyte ~sline = 509 Tdec.err_to_here d ~sbyte ~sline "incorrect brace {} nesting" 510 511let dec_byte d = match Tdec.byte d with 512| c when 0x00 <= c && c <= 0x08 || 0x0E <= c && c <= 0x1F || c = 0x7F -> 513 err_illegal_byte d c 514| c -> c 515[@@ ocaml.inline] 516 517let rec skip_white d = match dec_byte d with 518| 0x20 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D -> Tdec.accept_byte d; skip_white d 519| _ -> () 520 521let dec_token ~stop d = 522 let rec loop d = match dec_byte d with 523 | 0x28 | 0x29 | 0x3B | 0x22 524 | 0x20 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D 525 | 0xFFFF -> Tdec.tok_pop d 526 | c when c = stop -> Tdec.tok_pop d 527 | _ -> Tdec.tok_accept_uchar d; loop d 528 in 529 loop d 530 531let rec dec_string ~sbyte ~sline ~stop d = match dec_byte d with 532| 0xFFFF -> err_eoi_value ~sbyte ~sline d 533| c when c = stop -> Tdec.accept_byte d; Tdec.tok_pop d 534| _ -> Tdec.tok_accept_uchar d; dec_string ~sbyte ~sline ~stop d 535 536let rec dec_tex i ~sbyte ~sline d = match dec_byte d with 537| 0xFFFF -> err_eoi_value ~sbyte ~sline d 538| 0x007D -> 539 if i = 0 then (Tdec.accept_byte d; Tdec.tok_pop d) else 540 (Tdec.tok_accept_uchar d; dec_tex (i - 1) ~sbyte ~sline d) 541| c -> 542 let i = if c = 0x007B then i + 1 else i in 543 Tdec.tok_accept_uchar d; dec_tex i ~sbyte ~sline d 544 545let dec_value d = 546 let sbyte = Tdec.pos d and sline = Tdec.line d in 547 match dec_byte d with 548 | 0x007B (* { *) -> Tdec.accept_byte d; dec_tex 0 ~sbyte ~sline d 549 | 0x0022 -> Tdec.accept_byte d; dec_string ~sbyte ~sline ~stop:0x0022 d 550 | _ -> dec_token ~stop:0x002C d 551 552let dec_field d acc = 553 let sbyte = Tdec.pos d and sline = Tdec.line d in 554 let id = dec_token ~stop:0x003D (* = *) d in 555 skip_white d; 556 match dec_byte d with 557 | 0xFFFF -> err_eoi_field ~sbyte ~sline d 558 | 0x003D (* = *) -> 559 Tdec.accept_byte d; 560 skip_white d; 561 begin match dec_byte d with 562 | 0xFFFF -> err_eoi_field ~sbyte ~sline d 563 | _ -> 564 SM.add (String.lowercase_ascii id) (dec_value d) acc 565 end 566 | _ -> err_expected d "'='" 567 568let rec dec_fields ~sbyte ~sline d acc = 569 skip_white d; 570 match dec_byte d with 571 | 0xFFFF -> err_eoi_entry ~sbyte ~sline d 572 | 0x007D (* } *) -> acc 573 | _ -> 574 let acc = dec_field d acc in 575 skip_white d; 576 match dec_byte d with 577 | 0x002C (* , *) -> Tdec.accept_byte d; dec_fields ~sbyte ~sline d acc 578 | 0x007D (* } *) -> acc 579 | 0xFFFF -> err_eoi_entry ~sbyte ~sline d 580 | b -> err_expected d "',' or '}'" 581 582let dec_entry d = 583 let sbyte = Tdec.pos d and sline = Tdec.line d in 584 Tdec.accept_byte d (* @ *); 585 let type' = dec_token ~stop:0x007B d (* { *) in 586 match dec_byte d with 587 | 0x007B -> 588 Tdec.accept_byte d; 589 let cite_key = dec_token ~stop:0x002C d (* , *) in 590 skip_white d; 591 begin match dec_byte d with 592 | 0x002C (* , *) -> 593 Tdec.accept_byte d; 594 let fields = dec_fields ~sbyte ~sline d SM.empty in 595 let loc = Tdec.loc_to_here d ~sbyte ~sline in 596 Tdec.accept_byte d; 597 { type'; cite_key; fields; loc } 598 | _ -> err_expected d "','" 599 end 600 | _ -> err_expected d "'{'" 601 602let dec_entries d = 603 let rec loop d acc = 604 skip_white d; 605 match dec_byte d with 606 | 0x0040 (* @ *) -> loop d (dec_entry d :: acc) 607 | 0xFFFF -> List.rev acc 608 | b -> err_illegal_uchar d 609 in 610 loop d [] 611 612let of_string ?(file = Fpath.v "-") s = 613 try 614 let file = Fpath.to_string file in 615 let d = Tdec.create ~file s in 616 Ok (dec_entries d) 617 with Tdec.Err (loc, msg) -> Error (msg, loc) 618 619let of_string' ?file s = 620 Result.map_error (fun e -> Fmt.str "%a" pp_error e) @@ 621 (of_string ?file s) 622 623let to_string es = Fmt.str "@[<v>%a@]" (Fmt.list pp) es 624 625(*--------------------------------------------------------------------------- 626 Copyright (c) 2019 University of Bern 627 628 Permission to use, copy, modify, and/or distribute this software for any 629 purpose with or without fee is hereby granted, provided that the above 630 copyright notice and this permission notice appear in all copies. 631 632 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 633 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 634 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 635 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 636 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 637 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 638 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 639 ---------------------------------------------------------------------------*)