Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** {1 Error Handling}
7
8 Comprehensive error reporting for YAML parsing and emission.
9
10 This module provides detailed error types that correspond to various failure
11 modes in YAML processing, as specified in the
12 {{:https://yaml.org/spec/1.2.2/}YAML 1.2.2 specification}.
13
14 Each error includes:
15 - A classification of the error type ({!type:kind})
16 - Optional source location information ({!type:Span.t})
17 - A context stack showing where the error occurred
18 - Optional source text for error display
19
20 See also
21 {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} for
22 background on the YAML processing model. *)
23
24(** {2 Error Classification}
25
26 Error kinds are organized by the processing stage where they occur:
27 - Scanner errors: Lexical analysis failures (character-level)
28 - Parser errors: Syntax errors in event stream
29 - Loader errors: Semantic errors during representation construction
30 - Emitter errors: Failures during YAML generation *)
31type kind =
32 (* Scanner errors - see {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1} *)
33 | Unexpected_character of char
34 (** Invalid character in input. See
35 {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1
36 (Character Set)}. *)
37 | Unexpected_eof (** Premature end of input. *)
38 | Invalid_escape_sequence of string
39 (** Invalid escape in double-quoted string. See
40 {{:https://yaml.org/spec/1.2.2/#57-escaped-characters}Section 5.7
41 (Escaped Characters)}. *)
42 | Invalid_unicode_escape of string
43 (** Invalid Unicode escape sequence (\uXXXX or \UXXXXXXXX). *)
44 | Invalid_hex_escape of string
45 (** Invalid hexadecimal escape sequence (\xXX). *)
46 | Invalid_tag of string
47 (** Malformed tag syntax. See
48 {{:https://yaml.org/spec/1.2.2/#681-node-tags}Section 6.8.1 (Node
49 Tags)}. *)
50 | Invalid_anchor of string
51 (** Malformed anchor name. See
52 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section
53 3.2.2.2 (Anchors and Aliases)}. *)
54 | Invalid_alias of string
55 (** Malformed alias reference. See
56 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section
57 3.2.2.2 (Anchors and Aliases)}. *)
58 | Invalid_comment
59 (** Comment not properly separated from content. See
60 {{:https://yaml.org/spec/1.2.2/#62-comments}Section 6.2 (Comments)}.
61 *)
62 | Unclosed_single_quote
63 (** Unterminated single-quoted scalar. See
64 {{:https://yaml.org/spec/1.2.2/#72-single-quoted-style}Section 7.2
65 (Single-Quoted Style)}. *)
66 | Unclosed_double_quote
67 (** Unterminated double-quoted scalar. See
68 {{:https://yaml.org/spec/1.2.2/#73-double-quoted-style}Section 7.3
69 (Double-Quoted Style)}. *)
70 | Unclosed_flow_sequence
71 (** Missing closing bracket \] for flow sequence. See
72 {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow
73 Sequences)}. *)
74 | Unclosed_flow_mapping
75 (** Missing closing brace \} for flow mapping. See
76 {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow
77 Mappings)}. *)
78 | Invalid_indentation of int * int
79 (** Incorrect indentation level (expected, got). See
80 {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1
81 (Indentation Spaces)}. *)
82 | Invalid_flow_indentation
83 (** Content in flow collection must be indented. See
84 {{:https://yaml.org/spec/1.2.2/#74-flow-styles}Section 7.4 (Flow
85 Styles)}. *)
86 | Tab_in_indentation
87 (** Tab character used for indentation (only spaces allowed). See
88 {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1
89 (Indentation Spaces)}. *)
90 | Invalid_block_scalar_header of string
91 (** Malformed block scalar header (| or >). See
92 {{:https://yaml.org/spec/1.2.2/#81-block-scalar-styles}Section 8.1
93 (Block Scalar Styles)}. *)
94 | Invalid_quoted_scalar_indentation of string
95 (** Incorrect indentation in quoted scalar. *)
96 | Invalid_directive of string
97 (** Malformed directive. See
98 {{:https://yaml.org/spec/1.2.2/#68-directives}Section 6.8
99 (Directives)}. *)
100 | Invalid_yaml_version of string
101 (** Unsupported YAML version in %YAML directive. See
102 {{:https://yaml.org/spec/1.2.2/#681-yaml-directives}Section 6.8.1
103 (YAML Directives)}. *)
104 | Invalid_tag_directive of string
105 (** Malformed %TAG directive. See
106 {{:https://yaml.org/spec/1.2.2/#682-tag-directives}Section 6.8.2 (TAG
107 Directives)}. *)
108 | Reserved_directive of string
109 (** Reserved directive name. See
110 {{:https://yaml.org/spec/1.2.2/#683-reserved-directives}Section 6.8.3
111 (Reserved Directives)}. *)
112 | Illegal_flow_key_line
113 (** Key and colon must be on same line in flow context. See
114 {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow
115 Mappings)}. *)
116 | Block_sequence_disallowed
117 (** Block sequence entries not allowed in this context. See
118 {{:https://yaml.org/spec/1.2.2/#82-block-collection-styles}Section 8.2
119 (Block Collection Styles)}. *)
120 (* Parser errors - see {{:https://yaml.org/spec/1.2.2/#3-processing-yaml-information}Section 3 (Processing)} *)
121 | Unexpected_token of string (** Unexpected token in event stream. *)
122 | Expected_document_start
123 (** Expected document start marker (---). See
124 {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2
125 (Document Markers)}. *)
126 | Expected_document_end
127 (** Expected document end marker (...). See
128 {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2
129 (Document Markers)}. *)
130 | Expected_block_entry
131 (** Expected block sequence entry marker (-). See
132 {{:https://yaml.org/spec/1.2.2/#821-block-sequences}Section 8.2.1
133 (Block Sequences)}. *)
134 | Expected_key
135 (** Expected mapping key. See
136 {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2
137 (Block Mappings)}. *)
138 | Expected_value
139 (** Expected mapping value after colon. See
140 {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2
141 (Block Mappings)}. *)
142 | Expected_node (** Expected a YAML node. *)
143 | Expected_scalar (** Expected a scalar value. *)
144 | Expected_sequence_end
145 (** Expected closing bracket \] for flow sequence. See
146 {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow
147 Sequences)}. *)
148 | Expected_mapping_end
149 (** Expected closing brace \} for flow mapping. See
150 {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow
151 Mappings)}. *)
152 | Duplicate_anchor of string
153 (** Anchor name defined multiple times. See
154 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section
155 3.2.2.2 (Anchors and Aliases)}. *)
156 | Undefined_alias of string
157 (** Alias references non-existent anchor. See
158 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section
159 3.2.2.2 (Anchors and Aliases)}. *)
160 | Alias_cycle of string
161 (** Circular reference in alias chain. See
162 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section
163 3.2.2.2 (Anchors and Aliases)}. *)
164 | Multiple_documents
165 (** Multiple documents found when single document expected. See
166 {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2
167 (Document Markers)}. *)
168 | Mapping_key_too_long
169 (** Mapping key exceeds maximum length (1024 characters). *)
170 (* Loader errors - see {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} *)
171 | Invalid_scalar_conversion of string * string
172 (** Cannot convert scalar value to target type (value, target type). See
173 {{:https://yaml.org/spec/1.2.2/#103-core-schema}Section 10.3 (Core
174 Schema)}. *)
175 | Type_mismatch of string * string
176 (** Value has wrong type for operation (expected, got). *)
177 | Unresolved_alias of string
178 (** Alias encountered during conversion but not resolved. See
179 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section
180 3.2.2.2 (Anchors and Aliases)}. *)
181 | Key_not_found of string (** Mapping key not found. *)
182 | Alias_expansion_node_limit of int
183 (** Alias expansion exceeded maximum node count (protection against
184 billion laughs attack). See
185 {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1
186 (Processes)}.
187
188 The "billion laughs attack" (also known as an XML bomb) is a
189 denial-of-service attack where a small YAML document expands to
190 enormous size through recursive alias expansion. This limit prevents
191 such attacks. *)
192 | Alias_expansion_depth_limit of int
193 (** Alias expansion exceeded maximum nesting depth (protection against
194 deeply nested aliases). See
195 {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1
196 (Processes)}. *)
197 (* Emitter errors *)
198 | Invalid_encoding of string
199 (** Invalid character encoding specified. See
200 {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1
201 (Character Set)}. *)
202 | Scalar_contains_invalid_chars of string
203 (** Scalar contains characters invalid for chosen style. *)
204 | Anchor_not_set (** Attempted to emit alias before anchor was defined. *)
205 | Invalid_state of string
206 (** Emitter in invalid state for requested operation. *)
207 (* Generic *)
208 | Custom of string (** Custom error message. *)
209
210type t = {
211 kind : kind; (** The specific error classification. *)
212 span : Span.t option;
213 (** Source location where the error occurred (if available). *)
214 context : string list;
215 (** Context stack showing the processing path leading to the error. *)
216 source : string option;
217 (** Source text for displaying the error in context. *)
218}
219(** {2 Error Value}
220
221 Full error information including classification, location, and context. *)
222
223exception Yamlrw_error of t
224(** {2 Exception}
225
226 The main exception type raised by all yamlrw operations.
227
228 All parsing, loading, and emitting errors are reported by raising this
229 exception with detailed error information. *)
230
231let () =
232 Printexc.register_printer (function
233 | Yamlrw_error e ->
234 let loc =
235 match e.span with
236 | None -> ""
237 | Some span -> " at " ^ Span.to_string span
238 in
239 Some
240 (Printf.sprintf "Yamlrw_error: %s%s"
241 (match e.kind with Custom s -> s | _ -> "error")
242 loc)
243 | _ -> None)
244
245(** {2 Error Construction} *)
246
247(** [make ?span ?context ?source kind] constructs an error value.
248
249 @param span Source location
250 @param context Context stack (defaults to empty)
251 @param source Source text
252 @param kind Error classification *)
253let make ?span ?(context = []) ?source kind = { kind; span; context; source }
254
255(** [raise ?span ?context ?source kind] constructs and raises an error.
256
257 This is the primary way to report errors in yamlrw.
258
259 @param span Source location
260 @param context Context stack
261 @param source Source text
262 @param kind Error classification
263 @raise Yamlrw_error *)
264let raise ?span ?context ?source kind =
265 Stdlib.raise (Yamlrw_error (make ?span ?context ?source kind))
266
267(** [raise_at pos kind] raises an error at a specific position.
268
269 @param pos Source position
270 @param kind Error classification
271 @raise Yamlrw_error *)
272let raise_at pos kind =
273 let span = Span.point pos in
274 raise ~span kind
275
276(** [raise_span span kind] raises an error at a specific span.
277
278 @param span Source span
279 @param kind Error classification
280 @raise Yamlrw_error *)
281let raise_span span kind = raise ~span kind
282
283(** [with_context ctx f] executes [f ()] and adds [ctx] to any raised error's
284 context.
285
286 This is useful for tracking the processing path through nested structures.
287
288 @param ctx Context description (e.g., "parsing mapping key")
289 @param f Function to execute *)
290let with_context ctx f =
291 try f ()
292 with Yamlrw_error e ->
293 Stdlib.raise (Yamlrw_error { e with context = ctx :: e.context })
294
295(** {2 Error Formatting} *)
296
297(** [kind_to_string kind] converts an error kind to a human-readable string. *)
298let kind_to_string = function
299 | Unexpected_character c -> Printf.sprintf "unexpected character %C" c
300 | Unexpected_eof -> "unexpected end of input"
301 | Invalid_escape_sequence s -> Printf.sprintf "invalid escape sequence: %s" s
302 | Invalid_unicode_escape s -> Printf.sprintf "invalid unicode escape: %s" s
303 | Invalid_hex_escape s -> Printf.sprintf "invalid hex escape: %s" s
304 | Invalid_tag s -> Printf.sprintf "invalid tag: %s" s
305 | Invalid_anchor s -> Printf.sprintf "invalid anchor: %s" s
306 | Invalid_alias s -> Printf.sprintf "invalid alias: %s" s
307 | Invalid_comment ->
308 "comments must be separated from other tokens by whitespace"
309 | Unclosed_single_quote -> "unclosed single quote"
310 | Unclosed_double_quote -> "unclosed double quote"
311 | Unclosed_flow_sequence -> "unclosed flow sequence '['"
312 | Unclosed_flow_mapping -> "unclosed flow mapping '{'"
313 | Invalid_indentation (expected, got) ->
314 Printf.sprintf "invalid indentation: expected %d, got %d" expected got
315 | Invalid_flow_indentation -> "invalid indentation in flow construct"
316 | Tab_in_indentation -> "tab character in indentation"
317 | Invalid_block_scalar_header s ->
318 Printf.sprintf "invalid block scalar header: %s" s
319 | Invalid_quoted_scalar_indentation s -> Printf.sprintf "%s" s
320 | Invalid_directive s -> Printf.sprintf "invalid directive: %s" s
321 | Invalid_yaml_version s -> Printf.sprintf "invalid YAML version: %s" s
322 | Invalid_tag_directive s -> Printf.sprintf "invalid TAG directive: %s" s
323 | Reserved_directive s -> Printf.sprintf "reserved directive: %s" s
324 | Illegal_flow_key_line ->
325 "key and ':' must be on the same line in flow context"
326 | Block_sequence_disallowed ->
327 "block sequence entries are not allowed in this context"
328 | Unexpected_token s -> Printf.sprintf "unexpected token: %s" s
329 | Expected_document_start -> "expected document start '---'"
330 | Expected_document_end -> "expected document end '...'"
331 | Expected_block_entry -> "expected block entry '-'"
332 | Expected_key -> "expected mapping key"
333 | Expected_value -> "expected mapping value"
334 | Expected_node -> "expected node"
335 | Expected_scalar -> "expected scalar"
336 | Expected_sequence_end -> "expected sequence end ']'"
337 | Expected_mapping_end -> "expected mapping end '}'"
338 | Duplicate_anchor s -> Printf.sprintf "duplicate anchor: &%s" s
339 | Undefined_alias s -> Printf.sprintf "undefined alias: *%s" s
340 | Alias_cycle s -> Printf.sprintf "alias cycle detected: *%s" s
341 | Multiple_documents -> "multiple documents found when single expected"
342 | Mapping_key_too_long -> "mapping key too long (max 1024 characters)"
343 | Invalid_scalar_conversion (value, typ) ->
344 Printf.sprintf "cannot convert %S to %s" value typ
345 | Type_mismatch (expected, got) ->
346 Printf.sprintf "type mismatch: expected %s, got %s" expected got
347 | Unresolved_alias s -> Printf.sprintf "unresolved alias: *%s" s
348 | Key_not_found s -> Printf.sprintf "key not found: %s" s
349 | Alias_expansion_node_limit n ->
350 Printf.sprintf "alias expansion exceeded node limit (%d nodes)" n
351 | Alias_expansion_depth_limit n ->
352 Printf.sprintf "alias expansion exceeded depth limit (%d levels)" n
353 | Invalid_encoding s -> Printf.sprintf "invalid encoding: %s" s
354 | Scalar_contains_invalid_chars s ->
355 Printf.sprintf "scalar contains invalid characters: %s" s
356 | Anchor_not_set -> "anchor not set"
357 | Invalid_state s -> Printf.sprintf "invalid state: %s" s
358 | Custom s -> s
359
360(** [to_string t] converts an error to a human-readable string.
361
362 Includes error kind, source location (if available), and context stack. *)
363let to_string t =
364 let loc =
365 match t.span with None -> "" | Some span -> " at " ^ Span.to_string span
366 in
367 let ctx =
368 match t.context with
369 | [] -> ""
370 | ctxs -> " (in " ^ String.concat " > " (List.rev ctxs) ^ ")"
371 in
372 kind_to_string t.kind ^ loc ^ ctx
373
374(** [pp fmt t] pretty-prints an error to a formatter. *)
375let pp fmt t = Format.fprintf fmt "Yamlrw error: %s" (to_string t)
376
377(** [pp_with_source ~source fmt t] pretty-prints an error with source context.
378
379 Shows the error message followed by the relevant source line with a caret
380 (^) pointing to the error location.
381
382 @param source The source text
383 @param fmt Output formatter
384 @param t The error to display *)
385let pp_with_source ~source fmt t =
386 let extract_line source line_num =
387 let lines = String.split_on_char '\n' source in
388 if line_num >= 1 && line_num <= List.length lines then
389 Some (List.nth lines (line_num - 1))
390 else None
391 in
392
393 pp fmt t;
394 match t.span with
395 | None -> ()
396 | Some span -> (
397 match extract_line source span.start.line with
398 | None -> ()
399 | Some line ->
400 Format.fprintf fmt "\n %d | %s\n" span.start.line line;
401 let padding = String.make (span.start.column - 1) ' ' in
402 Format.fprintf fmt " | %s^" padding)