Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** {1 Error Handling}
7
8 Comprehensive error reporting for YAML parsing and emission.
9
10 This module provides detailed error types that correspond to various
11 failure modes in YAML processing, as specified in the
12 {{:https://yaml.org/spec/1.2.2/}YAML 1.2.2 specification}.
13
14 Each error includes:
15 - A classification of the error type ({!type:kind})
16 - Optional source location information ({!type:Span.t})
17 - A context stack showing where the error occurred
18 - Optional source text for error display
19
20 See also {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)}
21 for background on the YAML processing model. *)
22
23(** {2 Error Classification}
24
25 Error kinds are organized by the processing stage where they occur:
26 - Scanner errors: Lexical analysis failures (character-level)
27 - Parser errors: Syntax errors in event stream
28 - Loader errors: Semantic errors during representation construction
29 - Emitter errors: Failures during YAML generation *)
30type kind =
31 (* Scanner errors - see {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1} *)
32 | Unexpected_character of char
33 (** Invalid character in input. See
34 {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 (Character Set)}. *)
35 | Unexpected_eof
36 (** Premature end of input. *)
37 | Invalid_escape_sequence of string
38 (** Invalid escape in double-quoted string. See
39 {{:https://yaml.org/spec/1.2.2/#57-escaped-characters}Section 5.7 (Escaped Characters)}. *)
40 | Invalid_unicode_escape of string
41 (** Invalid Unicode escape sequence (\uXXXX or \UXXXXXXXX). *)
42 | Invalid_hex_escape of string
43 (** Invalid hexadecimal escape sequence (\xXX). *)
44 | Invalid_tag of string
45 (** Malformed tag syntax. See
46 {{:https://yaml.org/spec/1.2.2/#681-node-tags}Section 6.8.1 (Node Tags)}. *)
47 | Invalid_anchor of string
48 (** Malformed anchor name. See
49 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
50 | Invalid_alias of string
51 (** Malformed alias reference. See
52 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
53 | Invalid_comment
54 (** Comment not properly separated from content. See
55 {{:https://yaml.org/spec/1.2.2/#62-comments}Section 6.2 (Comments)}. *)
56 | Unclosed_single_quote
57 (** Unterminated single-quoted scalar. See
58 {{:https://yaml.org/spec/1.2.2/#72-single-quoted-style}Section 7.2 (Single-Quoted Style)}. *)
59 | Unclosed_double_quote
60 (** Unterminated double-quoted scalar. See
61 {{:https://yaml.org/spec/1.2.2/#73-double-quoted-style}Section 7.3 (Double-Quoted Style)}. *)
62 | Unclosed_flow_sequence
63 (** Missing closing bracket \] for flow sequence. See
64 {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow Sequences)}. *)
65 | Unclosed_flow_mapping
66 (** Missing closing brace \} for flow mapping. See
67 {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *)
68 | Invalid_indentation of int * int
69 (** Incorrect indentation level (expected, got). See
70 {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 (Indentation Spaces)}. *)
71 | Invalid_flow_indentation
72 (** Content in flow collection must be indented. See
73 {{:https://yaml.org/spec/1.2.2/#74-flow-styles}Section 7.4 (Flow Styles)}. *)
74 | Tab_in_indentation
75 (** Tab character used for indentation (only spaces allowed). See
76 {{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 (Indentation Spaces)}. *)
77 | Invalid_block_scalar_header of string
78 (** Malformed block scalar header (| or >). See
79 {{:https://yaml.org/spec/1.2.2/#81-block-scalar-styles}Section 8.1 (Block Scalar Styles)}. *)
80 | Invalid_quoted_scalar_indentation of string
81 (** Incorrect indentation in quoted scalar. *)
82 | Invalid_directive of string
83 (** Malformed directive. See
84 {{:https://yaml.org/spec/1.2.2/#68-directives}Section 6.8 (Directives)}. *)
85 | Invalid_yaml_version of string
86 (** Unsupported YAML version in %YAML directive. See
87 {{:https://yaml.org/spec/1.2.2/#681-yaml-directives}Section 6.8.1 (YAML Directives)}. *)
88 | Invalid_tag_directive of string
89 (** Malformed %TAG directive. See
90 {{:https://yaml.org/spec/1.2.2/#682-tag-directives}Section 6.8.2 (TAG Directives)}. *)
91 | Reserved_directive of string
92 (** Reserved directive name. See
93 {{:https://yaml.org/spec/1.2.2/#683-reserved-directives}Section 6.8.3 (Reserved Directives)}. *)
94 | Illegal_flow_key_line
95 (** Key and colon must be on same line in flow context. See
96 {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *)
97 | Block_sequence_disallowed
98 (** Block sequence entries not allowed in this context. See
99 {{:https://yaml.org/spec/1.2.2/#82-block-collection-styles}Section 8.2 (Block Collection Styles)}. *)
100
101 (* Parser errors - see {{:https://yaml.org/spec/1.2.2/#3-processing-yaml-information}Section 3 (Processing)} *)
102 | Unexpected_token of string
103 (** Unexpected token in event stream. *)
104 | Expected_document_start
105 (** Expected document start marker (---). See
106 {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *)
107 | Expected_document_end
108 (** Expected document end marker (...). See
109 {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *)
110 | Expected_block_entry
111 (** Expected block sequence entry marker (-). See
112 {{:https://yaml.org/spec/1.2.2/#821-block-sequences}Section 8.2.1 (Block Sequences)}. *)
113 | Expected_key
114 (** Expected mapping key. See
115 {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 (Block Mappings)}. *)
116 | Expected_value
117 (** Expected mapping value after colon. See
118 {{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 (Block Mappings)}. *)
119 | Expected_node
120 (** Expected a YAML node. *)
121 | Expected_scalar
122 (** Expected a scalar value. *)
123 | Expected_sequence_end
124 (** Expected closing bracket \] for flow sequence. See
125 {{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow Sequences)}. *)
126 | Expected_mapping_end
127 (** Expected closing brace \} for flow mapping. See
128 {{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *)
129 | Duplicate_anchor of string
130 (** Anchor name defined multiple times. See
131 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
132 | Undefined_alias of string
133 (** Alias references non-existent anchor. See
134 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
135 | Alias_cycle of string
136 (** Circular reference in alias chain. See
137 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
138 | Multiple_documents
139 (** Multiple documents found when single document expected. See
140 {{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *)
141 | Mapping_key_too_long
142 (** Mapping key exceeds maximum length (1024 characters). *)
143
144 (* Loader errors - see {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} *)
145 | Invalid_scalar_conversion of string * string
146 (** Cannot convert scalar value to target type (value, target type).
147 See {{:https://yaml.org/spec/1.2.2/#103-core-schema}Section 10.3 (Core Schema)}. *)
148 | Type_mismatch of string * string
149 (** Value has wrong type for operation (expected, got). *)
150 | Unresolved_alias of string
151 (** Alias encountered during conversion but not resolved.
152 See {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
153 | Key_not_found of string
154 (** Mapping key not found. *)
155 | Alias_expansion_node_limit of int
156 (** Alias expansion exceeded maximum node count (protection against billion laughs attack).
157 See {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}.
158
159 The "billion laughs attack" (also known as an XML bomb) is a denial-of-service
160 attack where a small YAML document expands to enormous size through recursive
161 alias expansion. This limit prevents such attacks. *)
162 | Alias_expansion_depth_limit of int
163 (** Alias expansion exceeded maximum nesting depth (protection against deeply nested aliases).
164 See {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}. *)
165
166 (* Emitter errors *)
167 | Invalid_encoding of string
168 (** Invalid character encoding specified. See
169 {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 (Character Set)}. *)
170 | Scalar_contains_invalid_chars of string
171 (** Scalar contains characters invalid for chosen style. *)
172 | Anchor_not_set
173 (** Attempted to emit alias before anchor was defined. *)
174 | Invalid_state of string
175 (** Emitter in invalid state for requested operation. *)
176
177 (* Generic *)
178 | Custom of string
179 (** Custom error message. *)
180
181(** {2 Error Value}
182
183 Full error information including classification, location, and context. *)
184type t = {
185 kind : kind;
186 (** The specific error classification. *)
187 span : Span.t option;
188 (** Source location where the error occurred (if available). *)
189 context : string list;
190 (** Context stack showing the processing path leading to the error. *)
191 source : string option;
192 (** Source text for displaying the error in context. *)
193}
194
195(** {2 Exception}
196
197 The main exception type raised by all yamlrw operations.
198
199 All parsing, loading, and emitting errors are reported by raising
200 this exception with detailed error information. *)
201exception Yamlrw_error of t
202
203let () =
204 Printexc.register_printer (function
205 | Yamlrw_error e ->
206 let loc = match e.span with
207 | None -> ""
208 | Some span -> " at " ^ Span.to_string span
209 in
210 Some (Printf.sprintf "Yamlrw_error: %s%s"
211 (match e.kind with Custom s -> s | _ -> "error") loc)
212 | _ -> None)
213
214(** {2 Error Construction} *)
215
216(** [make ?span ?context ?source kind] constructs an error value.
217
218 @param span Source location
219 @param context Context stack (defaults to empty)
220 @param source Source text
221 @param kind Error classification *)
222let make ?span ?(context=[]) ?source kind =
223 { kind; span; context; source }
224
225(** [raise ?span ?context ?source kind] constructs and raises an error.
226
227 This is the primary way to report errors in yamlrw.
228
229 @param span Source location
230 @param context Context stack
231 @param source Source text
232 @param kind Error classification
233 @raise Yamlrw_error *)
234let raise ?span ?context ?source kind =
235 Stdlib.raise (Yamlrw_error (make ?span ?context ?source kind))
236
237(** [raise_at pos kind] raises an error at a specific position.
238
239 @param pos Source position
240 @param kind Error classification
241 @raise Yamlrw_error *)
242let raise_at pos kind =
243 let span = Span.point pos in
244 raise ~span kind
245
246(** [raise_span span kind] raises an error at a specific span.
247
248 @param span Source span
249 @param kind Error classification
250 @raise Yamlrw_error *)
251let raise_span span kind =
252 raise ~span kind
253
254(** [with_context ctx f] executes [f ()] and adds [ctx] to any raised error's context.
255
256 This is useful for tracking the processing path through nested structures.
257
258 @param ctx Context description (e.g., "parsing mapping key")
259 @param f Function to execute *)
260let with_context ctx f =
261 try f () with
262 | Yamlrw_error e ->
263 Stdlib.raise (Yamlrw_error { e with context = ctx :: e.context })
264
265(** {2 Error Formatting} *)
266
267(** [kind_to_string kind] converts an error kind to a human-readable string. *)
268let kind_to_string = function
269 | Unexpected_character c -> Printf.sprintf "unexpected character %C" c
270 | Unexpected_eof -> "unexpected end of input"
271 | Invalid_escape_sequence s -> Printf.sprintf "invalid escape sequence: %s" s
272 | Invalid_unicode_escape s -> Printf.sprintf "invalid unicode escape: %s" s
273 | Invalid_hex_escape s -> Printf.sprintf "invalid hex escape: %s" s
274 | Invalid_tag s -> Printf.sprintf "invalid tag: %s" s
275 | Invalid_anchor s -> Printf.sprintf "invalid anchor: %s" s
276 | Invalid_alias s -> Printf.sprintf "invalid alias: %s" s
277 | Invalid_comment -> "comments must be separated from other tokens by whitespace"
278 | Unclosed_single_quote -> "unclosed single quote"
279 | Unclosed_double_quote -> "unclosed double quote"
280 | Unclosed_flow_sequence -> "unclosed flow sequence '['"
281 | Unclosed_flow_mapping -> "unclosed flow mapping '{'"
282 | Invalid_indentation (expected, got) ->
283 Printf.sprintf "invalid indentation: expected %d, got %d" expected got
284 | Invalid_flow_indentation -> "invalid indentation in flow construct"
285 | Tab_in_indentation -> "tab character in indentation"
286 | Invalid_block_scalar_header s ->
287 Printf.sprintf "invalid block scalar header: %s" s
288 | Invalid_quoted_scalar_indentation s ->
289 Printf.sprintf "%s" s
290 | Invalid_directive s -> Printf.sprintf "invalid directive: %s" s
291 | Invalid_yaml_version s -> Printf.sprintf "invalid YAML version: %s" s
292 | Invalid_tag_directive s -> Printf.sprintf "invalid TAG directive: %s" s
293 | Reserved_directive s -> Printf.sprintf "reserved directive: %s" s
294 | Illegal_flow_key_line -> "key and ':' must be on the same line in flow context"
295 | Block_sequence_disallowed -> "block sequence entries are not allowed in this context"
296 | Unexpected_token s -> Printf.sprintf "unexpected token: %s" s
297 | Expected_document_start -> "expected document start '---'"
298 | Expected_document_end -> "expected document end '...'"
299 | Expected_block_entry -> "expected block entry '-'"
300 | Expected_key -> "expected mapping key"
301 | Expected_value -> "expected mapping value"
302 | Expected_node -> "expected node"
303 | Expected_scalar -> "expected scalar"
304 | Expected_sequence_end -> "expected sequence end ']'"
305 | Expected_mapping_end -> "expected mapping end '}'"
306 | Duplicate_anchor s -> Printf.sprintf "duplicate anchor: &%s" s
307 | Undefined_alias s -> Printf.sprintf "undefined alias: *%s" s
308 | Alias_cycle s -> Printf.sprintf "alias cycle detected: *%s" s
309 | Multiple_documents -> "multiple documents found when single expected"
310 | Mapping_key_too_long -> "mapping key too long (max 1024 characters)"
311 | Invalid_scalar_conversion (value, typ) ->
312 Printf.sprintf "cannot convert %S to %s" value typ
313 | Type_mismatch (expected, got) ->
314 Printf.sprintf "type mismatch: expected %s, got %s" expected got
315 | Unresolved_alias s -> Printf.sprintf "unresolved alias: *%s" s
316 | Key_not_found s -> Printf.sprintf "key not found: %s" s
317 | Alias_expansion_node_limit n ->
318 Printf.sprintf "alias expansion exceeded node limit (%d nodes)" n
319 | Alias_expansion_depth_limit n ->
320 Printf.sprintf "alias expansion exceeded depth limit (%d levels)" n
321 | Invalid_encoding s -> Printf.sprintf "invalid encoding: %s" s
322 | Scalar_contains_invalid_chars s ->
323 Printf.sprintf "scalar contains invalid characters: %s" s
324 | Anchor_not_set -> "anchor not set"
325 | Invalid_state s -> Printf.sprintf "invalid state: %s" s
326 | Custom s -> s
327
328(** [to_string t] converts an error to a human-readable string.
329
330 Includes error kind, source location (if available), and context stack. *)
331let to_string t =
332 let loc = match t.span with
333 | None -> ""
334 | Some span -> " at " ^ Span.to_string span
335 in
336 let ctx = match t.context with
337 | [] -> ""
338 | ctxs -> " (in " ^ String.concat " > " (List.rev ctxs) ^ ")"
339 in
340 kind_to_string t.kind ^ loc ^ ctx
341
342(** [pp fmt t] pretty-prints an error to a formatter. *)
343let pp fmt t =
344 Format.fprintf fmt "Yamlrw error: %s" (to_string t)
345
346(** [pp_with_source ~source fmt t] pretty-prints an error with source context.
347
348 Shows the error message followed by the relevant source line with
349 a caret (^) pointing to the error location.
350
351 @param source The source text
352 @param fmt Output formatter
353 @param t The error to display *)
354let pp_with_source ~source fmt t =
355let extract_line source line_num =
356 let lines = String.split_on_char '\n' source in
357 if line_num >= 1 && line_num <= List.length lines then
358 Some (List.nth lines (line_num - 1))
359 else
360 None
361 in
362
363 pp fmt t;
364 match t.span with
365 | None -> ()
366 | Some span ->
367 match extract_line source span.start.line with
368 | None -> ()
369 | Some line ->
370 Format.fprintf fmt "\n %d | %s\n" span.start.line line;
371 let padding = String.make (span.start.column - 1) ' ' in
372 Format.fprintf fmt " | %s^" padding