Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** {1 Error Handling} 7 8 Comprehensive error reporting for YAML parsing and emission. 9 10 This module provides detailed error types that correspond to various 11 failure modes in YAML processing, as specified in the 12 {{:https://yaml.org/spec/1.2.2/}YAML 1.2.2 specification}. 13 14 Each error includes: 15 - A classification of the error type ({!type:kind}) 16 - Optional source location information ({!type:Span.t}) 17 - A context stack showing where the error occurred 18 - Optional source text for error display 19 20 See also {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} 21 for background on the YAML processing model. *) 22 23(** {2 Error Classification} 24 25 Error kinds are organized by the processing stage where they occur: 26 - Scanner errors: Lexical analysis failures (character-level) 27 - Parser errors: Syntax errors in event stream 28 - Loader errors: Semantic errors during representation construction 29 - Emitter errors: Failures during YAML generation *) 30type kind = 31 (* Scanner errors - see {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1} *) 32 | Unexpected_character of char 33 (** Invalid character in input. See 34 {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 (Character Set)}. *) 35 | Unexpected_eof 36 (** Premature end of input. *) 37 | Invalid_escape_sequence of string 38 (** Invalid escape in double-quoted string. See 39 {{:https://yaml.org/spec/1.2.2/#57-escaped-characters}Section 5.7 (Escaped Characters)}. *) 40 | Invalid_unicode_escape of string 41 (** Invalid Unicode escape sequence (\uXXXX or \UXXXXXXXX). *) 42 | Invalid_hex_escape of string 43 (** Invalid hexadecimal escape sequence (\xXX). *) 44 | Invalid_tag of string 45 (** Malformed tag syntax. See 46 {{:https://yaml.org/spec/1.2.2/#681-node-tags}Section 6.8.1 (Node Tags)}. *) 47 | Invalid_anchor of string 48 (** Malformed anchor name. See 49 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *) 50 | Invalid_alias of string 51 (** Malformed alias reference. See 52 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *) 53 | Invalid_comment 54 (** Comment not properly separated from content. See 55 {{:https://yaml.org/spec/1.2.2/#62-comments}Section 6.2 (Comments)}. *) 56 | Unclosed_single_quote 57 (** Unterminated single-quoted scalar. See 58 {{:https://yaml.org/spec/1.2.2/#72-single-quoted-style}Section 7.2 (Single-Quoted Style)}. *) 59 | Unclosed_double_quote 60 (** Unterminated double-quoted scalar. See 61 {{:https://yaml.org/spec/1.2.2/#73-double-quoted-style}Section 7.3 (Double-Quoted Style)}. *) 62 | Unclosed_flow_sequence 63 (** Missing closing bracket \] for flow sequence. See 64 {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow Sequences)}. *) 65 | Unclosed_flow_mapping 66 (** Missing closing brace \} for flow mapping. See 67 {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *) 68 | Invalid_indentation of int * int 69 (** Incorrect indentation level (expected, got). See 70 {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 (Indentation Spaces)}. *) 71 | Invalid_flow_indentation 72 (** Content in flow collection must be indented. See 73 {{:https://yaml.org/spec/1.2.2/#74-flow-styles}Section 7.4 (Flow Styles)}. *) 74 | Tab_in_indentation 75 (** Tab character used for indentation (only spaces allowed). See 76 {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 (Indentation Spaces)}. *) 77 | Invalid_block_scalar_header of string 78 (** Malformed block scalar header (| or >). See 79 {{:https://yaml.org/spec/1.2.2/#81-block-scalar-styles}Section 8.1 (Block Scalar Styles)}. *) 80 | Invalid_quoted_scalar_indentation of string 81 (** Incorrect indentation in quoted scalar. *) 82 | Invalid_directive of string 83 (** Malformed directive. See 84 {{:https://yaml.org/spec/1.2.2/#68-directives}Section 6.8 (Directives)}. *) 85 | Invalid_yaml_version of string 86 (** Unsupported YAML version in %YAML directive. See 87 {{:https://yaml.org/spec/1.2.2/#681-yaml-directives}Section 6.8.1 (YAML Directives)}. *) 88 | Invalid_tag_directive of string 89 (** Malformed %TAG directive. See 90 {{:https://yaml.org/spec/1.2.2/#682-tag-directives}Section 6.8.2 (TAG Directives)}. *) 91 | Reserved_directive of string 92 (** Reserved directive name. See 93 {{:https://yaml.org/spec/1.2.2/#683-reserved-directives}Section 6.8.3 (Reserved Directives)}. *) 94 | Illegal_flow_key_line 95 (** Key and colon must be on same line in flow context. See 96 {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *) 97 | Block_sequence_disallowed 98 (** Block sequence entries not allowed in this context. See 99 {{:https://yaml.org/spec/1.2.2/#82-block-collection-styles}Section 8.2 (Block Collection Styles)}. *) 100 101 (* Parser errors - see {{:https://yaml.org/spec/1.2.2/#3-processing-yaml-information}Section 3 (Processing)} *) 102 | Unexpected_token of string 103 (** Unexpected token in event stream. *) 104 | Expected_document_start 105 (** Expected document start marker (---). See 106 {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *) 107 | Expected_document_end 108 (** Expected document end marker (...). See 109 {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *) 110 | Expected_block_entry 111 (** Expected block sequence entry marker (-). See 112 {{:https://yaml.org/spec/1.2.2/#821-block-sequences}Section 8.2.1 (Block Sequences)}. *) 113 | Expected_key 114 (** Expected mapping key. See 115 {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 (Block Mappings)}. *) 116 | Expected_value 117 (** Expected mapping value after colon. See 118 {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 (Block Mappings)}. *) 119 | Expected_node 120 (** Expected a YAML node. *) 121 | Expected_scalar 122 (** Expected a scalar value. *) 123 | Expected_sequence_end 124 (** Expected closing bracket \] for flow sequence. See 125 {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow Sequences)}. *) 126 | Expected_mapping_end 127 (** Expected closing brace \} for flow mapping. See 128 {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *) 129 | Duplicate_anchor of string 130 (** Anchor name defined multiple times. See 131 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *) 132 | Undefined_alias of string 133 (** Alias references non-existent anchor. See 134 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *) 135 | Alias_cycle of string 136 (** Circular reference in alias chain. See 137 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *) 138 | Multiple_documents 139 (** Multiple documents found when single document expected. See 140 {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *) 141 | Mapping_key_too_long 142 (** Mapping key exceeds maximum length (1024 characters). *) 143 144 (* Loader errors - see {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} *) 145 | Invalid_scalar_conversion of string * string 146 (** Cannot convert scalar value to target type (value, target type). 147 See {{:https://yaml.org/spec/1.2.2/#103-core-schema}Section 10.3 (Core Schema)}. *) 148 | Type_mismatch of string * string 149 (** Value has wrong type for operation (expected, got). *) 150 | Unresolved_alias of string 151 (** Alias encountered during conversion but not resolved. 152 See {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *) 153 | Key_not_found of string 154 (** Mapping key not found. *) 155 | Alias_expansion_node_limit of int 156 (** Alias expansion exceeded maximum node count (protection against billion laughs attack). 157 See {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}. 158 159 The "billion laughs attack" (also known as an XML bomb) is a denial-of-service 160 attack where a small YAML document expands to enormous size through recursive 161 alias expansion. This limit prevents such attacks. *) 162 | Alias_expansion_depth_limit of int 163 (** Alias expansion exceeded maximum nesting depth (protection against deeply nested aliases). 164 See {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}. *) 165 166 (* Emitter errors *) 167 | Invalid_encoding of string 168 (** Invalid character encoding specified. See 169 {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 (Character Set)}. *) 170 | Scalar_contains_invalid_chars of string 171 (** Scalar contains characters invalid for chosen style. *) 172 | Anchor_not_set 173 (** Attempted to emit alias before anchor was defined. *) 174 | Invalid_state of string 175 (** Emitter in invalid state for requested operation. *) 176 177 (* Generic *) 178 | Custom of string 179 (** Custom error message. *) 180 181(** {2 Error Value} 182 183 Full error information including classification, location, and context. *) 184type t = { 185 kind : kind; 186 (** The specific error classification. *) 187 span : Span.t option; 188 (** Source location where the error occurred (if available). *) 189 context : string list; 190 (** Context stack showing the processing path leading to the error. *) 191 source : string option; 192 (** Source text for displaying the error in context. *) 193} 194 195(** {2 Exception} 196 197 The main exception type raised by all yamlrw operations. 198 199 All parsing, loading, and emitting errors are reported by raising 200 this exception with detailed error information. *) 201exception Yamlrw_error of t 202 203let () = 204 Printexc.register_printer (function 205 | Yamlrw_error e -> 206 let loc = match e.span with 207 | None -> "" 208 | Some span -> " at " ^ Span.to_string span 209 in 210 Some (Printf.sprintf "Yamlrw_error: %s%s" 211 (match e.kind with Custom s -> s | _ -> "error") loc) 212 | _ -> None) 213 214(** {2 Error Construction} *) 215 216(** [make ?span ?context ?source kind] constructs an error value. 217 218 @param span Source location 219 @param context Context stack (defaults to empty) 220 @param source Source text 221 @param kind Error classification *) 222let make ?span ?(context=[]) ?source kind = 223 { kind; span; context; source } 224 225(** [raise ?span ?context ?source kind] constructs and raises an error. 226 227 This is the primary way to report errors in yamlrw. 228 229 @param span Source location 230 @param context Context stack 231 @param source Source text 232 @param kind Error classification 233 @raise Yamlrw_error *) 234let raise ?span ?context ?source kind = 235 Stdlib.raise (Yamlrw_error (make ?span ?context ?source kind)) 236 237(** [raise_at pos kind] raises an error at a specific position. 238 239 @param pos Source position 240 @param kind Error classification 241 @raise Yamlrw_error *) 242let raise_at pos kind = 243 let span = Span.point pos in 244 raise ~span kind 245 246(** [raise_span span kind] raises an error at a specific span. 247 248 @param span Source span 249 @param kind Error classification 250 @raise Yamlrw_error *) 251let raise_span span kind = 252 raise ~span kind 253 254(** [with_context ctx f] executes [f ()] and adds [ctx] to any raised error's context. 255 256 This is useful for tracking the processing path through nested structures. 257 258 @param ctx Context description (e.g., "parsing mapping key") 259 @param f Function to execute *) 260let with_context ctx f = 261 try f () with 262 | Yamlrw_error e -> 263 Stdlib.raise (Yamlrw_error { e with context = ctx :: e.context }) 264 265(** {2 Error Formatting} *) 266 267(** [kind_to_string kind] converts an error kind to a human-readable string. *) 268let kind_to_string = function 269 | Unexpected_character c -> Printf.sprintf "unexpected character %C" c 270 | Unexpected_eof -> "unexpected end of input" 271 | Invalid_escape_sequence s -> Printf.sprintf "invalid escape sequence: %s" s 272 | Invalid_unicode_escape s -> Printf.sprintf "invalid unicode escape: %s" s 273 | Invalid_hex_escape s -> Printf.sprintf "invalid hex escape: %s" s 274 | Invalid_tag s -> Printf.sprintf "invalid tag: %s" s 275 | Invalid_anchor s -> Printf.sprintf "invalid anchor: %s" s 276 | Invalid_alias s -> Printf.sprintf "invalid alias: %s" s 277 | Invalid_comment -> "comments must be separated from other tokens by whitespace" 278 | Unclosed_single_quote -> "unclosed single quote" 279 | Unclosed_double_quote -> "unclosed double quote" 280 | Unclosed_flow_sequence -> "unclosed flow sequence '['" 281 | Unclosed_flow_mapping -> "unclosed flow mapping '{'" 282 | Invalid_indentation (expected, got) -> 283 Printf.sprintf "invalid indentation: expected %d, got %d" expected got 284 | Invalid_flow_indentation -> "invalid indentation in flow construct" 285 | Tab_in_indentation -> "tab character in indentation" 286 | Invalid_block_scalar_header s -> 287 Printf.sprintf "invalid block scalar header: %s" s 288 | Invalid_quoted_scalar_indentation s -> 289 Printf.sprintf "%s" s 290 | Invalid_directive s -> Printf.sprintf "invalid directive: %s" s 291 | Invalid_yaml_version s -> Printf.sprintf "invalid YAML version: %s" s 292 | Invalid_tag_directive s -> Printf.sprintf "invalid TAG directive: %s" s 293 | Reserved_directive s -> Printf.sprintf "reserved directive: %s" s 294 | Illegal_flow_key_line -> "key and ':' must be on the same line in flow context" 295 | Block_sequence_disallowed -> "block sequence entries are not allowed in this context" 296 | Unexpected_token s -> Printf.sprintf "unexpected token: %s" s 297 | Expected_document_start -> "expected document start '---'" 298 | Expected_document_end -> "expected document end '...'" 299 | Expected_block_entry -> "expected block entry '-'" 300 | Expected_key -> "expected mapping key" 301 | Expected_value -> "expected mapping value" 302 | Expected_node -> "expected node" 303 | Expected_scalar -> "expected scalar" 304 | Expected_sequence_end -> "expected sequence end ']'" 305 | Expected_mapping_end -> "expected mapping end '}'" 306 | Duplicate_anchor s -> Printf.sprintf "duplicate anchor: &%s" s 307 | Undefined_alias s -> Printf.sprintf "undefined alias: *%s" s 308 | Alias_cycle s -> Printf.sprintf "alias cycle detected: *%s" s 309 | Multiple_documents -> "multiple documents found when single expected" 310 | Mapping_key_too_long -> "mapping key too long (max 1024 characters)" 311 | Invalid_scalar_conversion (value, typ) -> 312 Printf.sprintf "cannot convert %S to %s" value typ 313 | Type_mismatch (expected, got) -> 314 Printf.sprintf "type mismatch: expected %s, got %s" expected got 315 | Unresolved_alias s -> Printf.sprintf "unresolved alias: *%s" s 316 | Key_not_found s -> Printf.sprintf "key not found: %s" s 317 | Alias_expansion_node_limit n -> 318 Printf.sprintf "alias expansion exceeded node limit (%d nodes)" n 319 | Alias_expansion_depth_limit n -> 320 Printf.sprintf "alias expansion exceeded depth limit (%d levels)" n 321 | Invalid_encoding s -> Printf.sprintf "invalid encoding: %s" s 322 | Scalar_contains_invalid_chars s -> 323 Printf.sprintf "scalar contains invalid characters: %s" s 324 | Anchor_not_set -> "anchor not set" 325 | Invalid_state s -> Printf.sprintf "invalid state: %s" s 326 | Custom s -> s 327 328(** [to_string t] converts an error to a human-readable string. 329 330 Includes error kind, source location (if available), and context stack. *) 331let to_string t = 332 let loc = match t.span with 333 | None -> "" 334 | Some span -> " at " ^ Span.to_string span 335 in 336 let ctx = match t.context with 337 | [] -> "" 338 | ctxs -> " (in " ^ String.concat " > " (List.rev ctxs) ^ ")" 339 in 340 kind_to_string t.kind ^ loc ^ ctx 341 342(** [pp fmt t] pretty-prints an error to a formatter. *) 343let pp fmt t = 344 Format.fprintf fmt "Yamlrw error: %s" (to_string t) 345 346(** [pp_with_source ~source fmt t] pretty-prints an error with source context. 347 348 Shows the error message followed by the relevant source line with 349 a caret (^) pointing to the error location. 350 351 @param source The source text 352 @param fmt Output formatter 353 @param t The error to display *) 354let pp_with_source ~source fmt t = 355let extract_line source line_num = 356 let lines = String.split_on_char '\n' source in 357 if line_num >= 1 && line_num <= List.length lines then 358 Some (List.nth lines (line_num - 1)) 359 else 360 None 361 in 362 363 pp fmt t; 364 match t.span with 365 | None -> () 366 | Some span -> 367 match extract_line source span.start.line with 368 | None -> () 369 | Some line -> 370 Format.fprintf fmt "\n %d | %s\n" span.start.line line; 371 let padding = String.make (span.start.column - 1) ' ' in 372 Format.fprintf fmt " | %s^" padding