Pure OCaml Yaml 1.2 reader and writer using Bytesrw
at main 18 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** {1 Error Handling} 7 8 Comprehensive error reporting for YAML parsing and emission. 9 10 This module provides detailed error types that correspond to various failure 11 modes in YAML processing, as specified in the 12 {{:https://yaml.org/spec/1.2.2/}YAML 1.2.2 specification}. 13 14 Each error includes: 15 - A classification of the error type ({!type:kind}) 16 - Optional source location information ({!type:Span.t}) 17 - A context stack showing where the error occurred 18 - Optional source text for error display 19 20 See also 21 {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} for 22 background on the YAML processing model. *) 23 24(** {2 Error Classification} 25 26 Error kinds are organized by the processing stage where they occur: 27 - Scanner errors: Lexical analysis failures (character-level) 28 - Parser errors: Syntax errors in event stream 29 - Loader errors: Semantic errors during representation construction 30 - Emitter errors: Failures during YAML generation *) 31type kind = 32 (* Scanner errors - see {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1} *) 33 | Unexpected_character of char 34 (** Invalid character in input. See 35 {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 36 (Character Set)}. *) 37 | Unexpected_eof (** Premature end of input. *) 38 | Invalid_escape_sequence of string 39 (** Invalid escape in double-quoted string. See 40 {{:https://yaml.org/spec/1.2.2/#57-escaped-characters}Section 5.7 41 (Escaped Characters)}. *) 42 | Invalid_unicode_escape of string 43 (** Invalid Unicode escape sequence (\uXXXX or \UXXXXXXXX). *) 44 | Invalid_hex_escape of string 45 (** Invalid hexadecimal escape sequence (\xXX). *) 46 | Invalid_tag of string 47 (** Malformed tag syntax. See 48 {{:https://yaml.org/spec/1.2.2/#681-node-tags}Section 6.8.1 (Node 49 Tags)}. *) 50 | Invalid_anchor of string 51 (** Malformed anchor name. See 52 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 53 3.2.2.2 (Anchors and Aliases)}. *) 54 | Invalid_alias of string 55 (** Malformed alias reference. See 56 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 57 3.2.2.2 (Anchors and Aliases)}. *) 58 | Invalid_comment 59 (** Comment not properly separated from content. See 60 {{:https://yaml.org/spec/1.2.2/#62-comments}Section 6.2 (Comments)}. 61 *) 62 | Unclosed_single_quote 63 (** Unterminated single-quoted scalar. See 64 {{:https://yaml.org/spec/1.2.2/#72-single-quoted-style}Section 7.2 65 (Single-Quoted Style)}. *) 66 | Unclosed_double_quote 67 (** Unterminated double-quoted scalar. See 68 {{:https://yaml.org/spec/1.2.2/#73-double-quoted-style}Section 7.3 69 (Double-Quoted Style)}. *) 70 | Unclosed_flow_sequence 71 (** Missing closing bracket \] for flow sequence. See 72 {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow 73 Sequences)}. *) 74 | Unclosed_flow_mapping 75 (** Missing closing brace \} for flow mapping. See 76 {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow 77 Mappings)}. *) 78 | Invalid_indentation of int * int 79 (** Incorrect indentation level (expected, got). See 80 {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 81 (Indentation Spaces)}. *) 82 | Invalid_flow_indentation 83 (** Content in flow collection must be indented. See 84 {{:https://yaml.org/spec/1.2.2/#74-flow-styles}Section 7.4 (Flow 85 Styles)}. *) 86 | Tab_in_indentation 87 (** Tab character used for indentation (only spaces allowed). See 88 {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 89 (Indentation Spaces)}. *) 90 | Invalid_block_scalar_header of string 91 (** Malformed block scalar header (| or >). See 92 {{:https://yaml.org/spec/1.2.2/#81-block-scalar-styles}Section 8.1 93 (Block Scalar Styles)}. *) 94 | Invalid_quoted_scalar_indentation of string 95 (** Incorrect indentation in quoted scalar. *) 96 | Invalid_directive of string 97 (** Malformed directive. See 98 {{:https://yaml.org/spec/1.2.2/#68-directives}Section 6.8 99 (Directives)}. *) 100 | Invalid_yaml_version of string 101 (** Unsupported YAML version in %YAML directive. See 102 {{:https://yaml.org/spec/1.2.2/#681-yaml-directives}Section 6.8.1 103 (YAML Directives)}. *) 104 | Invalid_tag_directive of string 105 (** Malformed %TAG directive. See 106 {{:https://yaml.org/spec/1.2.2/#682-tag-directives}Section 6.8.2 (TAG 107 Directives)}. *) 108 | Reserved_directive of string 109 (** Reserved directive name. See 110 {{:https://yaml.org/spec/1.2.2/#683-reserved-directives}Section 6.8.3 111 (Reserved Directives)}. *) 112 | Illegal_flow_key_line 113 (** Key and colon must be on same line in flow context. See 114 {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow 115 Mappings)}. *) 116 | Block_sequence_disallowed 117 (** Block sequence entries not allowed in this context. See 118 {{:https://yaml.org/spec/1.2.2/#82-block-collection-styles}Section 8.2 119 (Block Collection Styles)}. *) 120 (* Parser errors - see {{:https://yaml.org/spec/1.2.2/#3-processing-yaml-information}Section 3 (Processing)} *) 121 | Unexpected_token of string (** Unexpected token in event stream. *) 122 | Expected_document_start 123 (** Expected document start marker (---). See 124 {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 125 (Document Markers)}. *) 126 | Expected_document_end 127 (** Expected document end marker (...). See 128 {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 129 (Document Markers)}. *) 130 | Expected_block_entry 131 (** Expected block sequence entry marker (-). See 132 {{:https://yaml.org/spec/1.2.2/#821-block-sequences}Section 8.2.1 133 (Block Sequences)}. *) 134 | Expected_key 135 (** Expected mapping key. See 136 {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 137 (Block Mappings)}. *) 138 | Expected_value 139 (** Expected mapping value after colon. See 140 {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 141 (Block Mappings)}. *) 142 | Expected_node (** Expected a YAML node. *) 143 | Expected_scalar (** Expected a scalar value. *) 144 | Expected_sequence_end 145 (** Expected closing bracket \] for flow sequence. See 146 {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow 147 Sequences)}. *) 148 | Expected_mapping_end 149 (** Expected closing brace \} for flow mapping. See 150 {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow 151 Mappings)}. *) 152 | Duplicate_anchor of string 153 (** Anchor name defined multiple times. See 154 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 155 3.2.2.2 (Anchors and Aliases)}. *) 156 | Undefined_alias of string 157 (** Alias references non-existent anchor. See 158 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 159 3.2.2.2 (Anchors and Aliases)}. *) 160 | Alias_cycle of string 161 (** Circular reference in alias chain. See 162 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 163 3.2.2.2 (Anchors and Aliases)}. *) 164 | Multiple_documents 165 (** Multiple documents found when single document expected. See 166 {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 167 (Document Markers)}. *) 168 | Mapping_key_too_long 169 (** Mapping key exceeds maximum length (1024 characters). *) 170 (* Loader errors - see {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} *) 171 | Invalid_scalar_conversion of string * string 172 (** Cannot convert scalar value to target type (value, target type). See 173 {{:https://yaml.org/spec/1.2.2/#103-core-schema}Section 10.3 (Core 174 Schema)}. *) 175 | Type_mismatch of string * string 176 (** Value has wrong type for operation (expected, got). *) 177 | Unresolved_alias of string 178 (** Alias encountered during conversion but not resolved. See 179 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 180 3.2.2.2 (Anchors and Aliases)}. *) 181 | Key_not_found of string (** Mapping key not found. *) 182 | Alias_expansion_node_limit of int 183 (** Alias expansion exceeded maximum node count (protection against 184 billion laughs attack). See 185 {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 186 (Processes)}. 187 188 The "billion laughs attack" (also known as an XML bomb) is a 189 denial-of-service attack where a small YAML document expands to 190 enormous size through recursive alias expansion. This limit prevents 191 such attacks. *) 192 | Alias_expansion_depth_limit of int 193 (** Alias expansion exceeded maximum nesting depth (protection against 194 deeply nested aliases). See 195 {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 196 (Processes)}. *) 197 (* Emitter errors *) 198 | Invalid_encoding of string 199 (** Invalid character encoding specified. See 200 {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 201 (Character Set)}. *) 202 | Scalar_contains_invalid_chars of string 203 (** Scalar contains characters invalid for chosen style. *) 204 | Anchor_not_set (** Attempted to emit alias before anchor was defined. *) 205 | Invalid_state of string 206 (** Emitter in invalid state for requested operation. *) 207 (* Generic *) 208 | Custom of string (** Custom error message. *) 209 210type t = { 211 kind : kind; (** The specific error classification. *) 212 span : Span.t option; 213 (** Source location where the error occurred (if available). *) 214 context : string list; 215 (** Context stack showing the processing path leading to the error. *) 216 source : string option; 217 (** Source text for displaying the error in context. *) 218} 219(** {2 Error Value} 220 221 Full error information including classification, location, and context. *) 222 223exception Yamlrw_error of t 224(** {2 Exception} 225 226 The main exception type raised by all yamlrw operations. 227 228 All parsing, loading, and emitting errors are reported by raising this 229 exception with detailed error information. *) 230 231let () = 232 Printexc.register_printer (function 233 | Yamlrw_error e -> 234 let loc = 235 match e.span with 236 | None -> "" 237 | Some span -> " at " ^ Span.to_string span 238 in 239 Some 240 (Printf.sprintf "Yamlrw_error: %s%s" 241 (match e.kind with Custom s -> s | _ -> "error") 242 loc) 243 | _ -> None) 244 245(** {2 Error Construction} *) 246 247(** [make ?span ?context ?source kind] constructs an error value. 248 249 @param span Source location 250 @param context Context stack (defaults to empty) 251 @param source Source text 252 @param kind Error classification *) 253let make ?span ?(context = []) ?source kind = { kind; span; context; source } 254 255(** [raise ?span ?context ?source kind] constructs and raises an error. 256 257 This is the primary way to report errors in yamlrw. 258 259 @param span Source location 260 @param context Context stack 261 @param source Source text 262 @param kind Error classification 263 @raise Yamlrw_error *) 264let raise ?span ?context ?source kind = 265 Stdlib.raise (Yamlrw_error (make ?span ?context ?source kind)) 266 267(** [raise_at pos kind] raises an error at a specific position. 268 269 @param pos Source position 270 @param kind Error classification 271 @raise Yamlrw_error *) 272let raise_at pos kind = 273 let span = Span.point pos in 274 raise ~span kind 275 276(** [raise_span span kind] raises an error at a specific span. 277 278 @param span Source span 279 @param kind Error classification 280 @raise Yamlrw_error *) 281let raise_span span kind = raise ~span kind 282 283(** [with_context ctx f] executes [f ()] and adds [ctx] to any raised error's 284 context. 285 286 This is useful for tracking the processing path through nested structures. 287 288 @param ctx Context description (e.g., "parsing mapping key") 289 @param f Function to execute *) 290let with_context ctx f = 291 try f () 292 with Yamlrw_error e -> 293 Stdlib.raise (Yamlrw_error { e with context = ctx :: e.context }) 294 295(** {2 Error Formatting} *) 296 297(** [kind_to_string kind] converts an error kind to a human-readable string. *) 298let kind_to_string = function 299 | Unexpected_character c -> Printf.sprintf "unexpected character %C" c 300 | Unexpected_eof -> "unexpected end of input" 301 | Invalid_escape_sequence s -> Printf.sprintf "invalid escape sequence: %s" s 302 | Invalid_unicode_escape s -> Printf.sprintf "invalid unicode escape: %s" s 303 | Invalid_hex_escape s -> Printf.sprintf "invalid hex escape: %s" s 304 | Invalid_tag s -> Printf.sprintf "invalid tag: %s" s 305 | Invalid_anchor s -> Printf.sprintf "invalid anchor: %s" s 306 | Invalid_alias s -> Printf.sprintf "invalid alias: %s" s 307 | Invalid_comment -> 308 "comments must be separated from other tokens by whitespace" 309 | Unclosed_single_quote -> "unclosed single quote" 310 | Unclosed_double_quote -> "unclosed double quote" 311 | Unclosed_flow_sequence -> "unclosed flow sequence '['" 312 | Unclosed_flow_mapping -> "unclosed flow mapping '{'" 313 | Invalid_indentation (expected, got) -> 314 Printf.sprintf "invalid indentation: expected %d, got %d" expected got 315 | Invalid_flow_indentation -> "invalid indentation in flow construct" 316 | Tab_in_indentation -> "tab character in indentation" 317 | Invalid_block_scalar_header s -> 318 Printf.sprintf "invalid block scalar header: %s" s 319 | Invalid_quoted_scalar_indentation s -> Printf.sprintf "%s" s 320 | Invalid_directive s -> Printf.sprintf "invalid directive: %s" s 321 | Invalid_yaml_version s -> Printf.sprintf "invalid YAML version: %s" s 322 | Invalid_tag_directive s -> Printf.sprintf "invalid TAG directive: %s" s 323 | Reserved_directive s -> Printf.sprintf "reserved directive: %s" s 324 | Illegal_flow_key_line -> 325 "key and ':' must be on the same line in flow context" 326 | Block_sequence_disallowed -> 327 "block sequence entries are not allowed in this context" 328 | Unexpected_token s -> Printf.sprintf "unexpected token: %s" s 329 | Expected_document_start -> "expected document start '---'" 330 | Expected_document_end -> "expected document end '...'" 331 | Expected_block_entry -> "expected block entry '-'" 332 | Expected_key -> "expected mapping key" 333 | Expected_value -> "expected mapping value" 334 | Expected_node -> "expected node" 335 | Expected_scalar -> "expected scalar" 336 | Expected_sequence_end -> "expected sequence end ']'" 337 | Expected_mapping_end -> "expected mapping end '}'" 338 | Duplicate_anchor s -> Printf.sprintf "duplicate anchor: &%s" s 339 | Undefined_alias s -> Printf.sprintf "undefined alias: *%s" s 340 | Alias_cycle s -> Printf.sprintf "alias cycle detected: *%s" s 341 | Multiple_documents -> "multiple documents found when single expected" 342 | Mapping_key_too_long -> "mapping key too long (max 1024 characters)" 343 | Invalid_scalar_conversion (value, typ) -> 344 Printf.sprintf "cannot convert %S to %s" value typ 345 | Type_mismatch (expected, got) -> 346 Printf.sprintf "type mismatch: expected %s, got %s" expected got 347 | Unresolved_alias s -> Printf.sprintf "unresolved alias: *%s" s 348 | Key_not_found s -> Printf.sprintf "key not found: %s" s 349 | Alias_expansion_node_limit n -> 350 Printf.sprintf "alias expansion exceeded node limit (%d nodes)" n 351 | Alias_expansion_depth_limit n -> 352 Printf.sprintf "alias expansion exceeded depth limit (%d levels)" n 353 | Invalid_encoding s -> Printf.sprintf "invalid encoding: %s" s 354 | Scalar_contains_invalid_chars s -> 355 Printf.sprintf "scalar contains invalid characters: %s" s 356 | Anchor_not_set -> "anchor not set" 357 | Invalid_state s -> Printf.sprintf "invalid state: %s" s 358 | Custom s -> s 359 360(** [to_string t] converts an error to a human-readable string. 361 362 Includes error kind, source location (if available), and context stack. *) 363let to_string t = 364 let loc = 365 match t.span with None -> "" | Some span -> " at " ^ Span.to_string span 366 in 367 let ctx = 368 match t.context with 369 | [] -> "" 370 | ctxs -> " (in " ^ String.concat " > " (List.rev ctxs) ^ ")" 371 in 372 kind_to_string t.kind ^ loc ^ ctx 373 374(** [pp fmt t] pretty-prints an error to a formatter. *) 375let pp fmt t = Format.fprintf fmt "Yamlrw error: %s" (to_string t) 376 377(** [pp_with_source ~source fmt t] pretty-prints an error with source context. 378 379 Shows the error message followed by the relevant source line with a caret 380 (^) pointing to the error location. 381 382 @param source The source text 383 @param fmt Output formatter 384 @param t The error to display *) 385let pp_with_source ~source fmt t = 386 let extract_line source line_num = 387 let lines = String.split_on_char '\n' source in 388 if line_num >= 1 && line_num <= List.length lines then 389 Some (List.nth lines (line_num - 1)) 390 else None 391 in 392 393 pp fmt t; 394 match t.span with 395 | None -> () 396 | Some span -> ( 397 match extract_line source span.start.line with 398 | None -> () 399 | Some line -> 400 Format.fprintf fmt "\n %d | %s\n" span.start.line line; 401 let padding = String.make (span.start.column - 1) ' ' in 402 Format.fprintf fmt " | %s^" padding)