Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** {1 Yamlrw - A Pure OCaml YAML Parser and Emitter}
7
8 Yamlrw is a pure OCaml implementation of YAML 1.1/1.2 parsing and emission.
9 It provides both a high-level JSON-compatible interface and a lower-level
10 streaming API for fine-grained control.
11
12 {2 Quick Start}
13
14 Parse a YAML string:
15 {[
16 let value = Yamlrw.of_string "name: Alice\nage: 30" in
17 match value with
18 | `O [("name", `String "Alice"); ("age", `Float 30.)] -> ...
19 | _ -> ...
20 ]}
21
22 Serialize to YAML:
23 {[
24 let yaml = `O [("name", `String "Bob"); ("active", `Bool true)] in
25 let s = Yamlrw.to_string yaml in
26 (* "name: Bob\nactive: true\n" *)
27 ]}
28
29 Use the Util module for convenient access:
30 {[
31 let name = Yamlrw.Util.(get_string (get "name" value)) in
32 let age = Yamlrw.Util.(get_int (get "age" value)) in
33 ]} *)
34
35
36(** {2 Error Handling} *)
37
38module Error = Error
39
40exception Yamlrw_error of Error.t
41(** Raised on parse or emit errors. *)
42
43
44(** {2 Core Types} *)
45
46type value = [
47 | `Null (** YAML null, ~, or empty values *)
48 | `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *)
49 | `Float of float (** All YAML numbers (integers stored as floats) *)
50 | `String of string (** YAML strings *)
51 | `A of value list (** YAML sequences/arrays *)
52 | `O of (string * value) list (** YAML mappings/objects with string keys *)
53]
54(** JSON-compatible YAML representation. Use this for simple data interchange.
55
56 This type is structurally equivalent to {!Value.t} and compatible with the
57 ezjsonm representation. For additional operations, see {!Value} and {!Util}. *)
58
59type yaml = [
60 | `Scalar of Scalar.t (** YAML scalar value with style and metadata *)
61 | `Alias of string (** Alias reference to an anchored node *)
62 | `A of yaml Sequence.t (** YAML sequence with style and metadata *)
63 | `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *)
64]
65(** Full YAML representation preserving anchors, tags, and aliases.
66
67 This type is structurally equivalent to {!Yaml.t}. Use this when you need
68 access to YAML-specific features like anchors and aliases for node reuse,
69 type tags for custom types, scalar styles (plain, quoted, literal, folded),
70 and collection styles (block vs flow).
71
72 For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and {!Mapping}. *)
73
74type document = {
75 version : (int * int) option; (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *)
76 tags : (string * string) list; (** TAG directives mapping handles to prefixes *)
77 root : yaml option; (** Root content of the document *)
78 implicit_start : bool; (** Whether the document start marker (---) is implicit *)
79 implicit_end : bool; (** Whether the document end marker (...) is implicit *)
80}
81(** A YAML document with directives and metadata.
82
83 This type is structurally equivalent to {!Document.t}. A YAML stream can
84 contain multiple documents, each separated by document markers.
85
86 For additional operations, see {!Document}. *)
87
88
89(** {2 Character Encoding} *)
90
91module Encoding = Encoding
92
93
94(** {2 Parsing} *)
95
96type version = [ `V1_1 | `V1_2 ]
97(** YAML specification version. *)
98
99val default_max_alias_nodes : int
100(** Default maximum nodes during alias expansion (10 million). *)
101
102val default_max_alias_depth : int
103(** Default maximum alias nesting depth (100). *)
104
105val of_string :
106 ?resolve_aliases:bool ->
107 ?max_nodes:int ->
108 ?max_depth:int ->
109 string -> value
110(** Parse a YAML string into a JSON-compatible value.
111
112 @param resolve_aliases Whether to expand aliases (default: true)
113 @param max_nodes Maximum nodes during alias expansion (default: 10M)
114 @param max_depth Maximum alias nesting depth (default: 100)
115 @raise Yamlrw_error on parse error or if multiple documents found *)
116
117val yaml_of_string :
118 ?resolve_aliases:bool ->
119 ?max_nodes:int ->
120 ?max_depth:int ->
121 string -> yaml
122(** Parse a YAML string preserving full YAML metadata (anchors, tags, etc).
123
124 By default, aliases are NOT resolved, preserving the document structure.
125
126 @param resolve_aliases Whether to expand aliases (default: false)
127 @param max_nodes Maximum nodes during alias expansion (default: 10M)
128 @param max_depth Maximum alias nesting depth (default: 100)
129 @raise Yamlrw_error on parse error or if multiple documents found *)
130
131val documents_of_string : string -> document list
132(** Parse a multi-document YAML stream.
133
134 Use this when your YAML input contains multiple documents separated
135 by document markers (---).
136
137 @raise Yamlrw_error on parse error *)
138
139
140(** {2 Formatting Styles} *)
141
142module Scalar_style = Scalar_style
143
144module Layout_style = Layout_style
145
146
147(** {2 Serialization} *)
148
149val to_buffer :
150 ?encoding:Encoding.t ->
151 ?scalar_style:Scalar_style.t ->
152 ?layout_style:Layout_style.t ->
153 ?buffer:Buffer.t ->
154 value -> Buffer.t
155(** Serialize a value to a buffer.
156
157 @param encoding Output encoding (default: UTF-8)
158 @param scalar_style Preferred scalar style (default: Any)
159 @param layout_style Preferred layout style (default: Any)
160 @param buffer Optional buffer to append to (allocates new one if not provided)
161 @return The buffer containing the serialized YAML *)
162
163val to_string :
164 ?encoding:Encoding.t ->
165 ?scalar_style:Scalar_style.t ->
166 ?layout_style:Layout_style.t ->
167 value -> string
168(** Serialize a value to a YAML string.
169
170 @param encoding Output encoding (default: UTF-8)
171 @param scalar_style Preferred scalar style (default: Any)
172 @param layout_style Preferred layout style (default: Any) *)
173
174val yaml_to_buffer :
175 ?encoding:Encoding.t ->
176 ?scalar_style:Scalar_style.t ->
177 ?layout_style:Layout_style.t ->
178 ?buffer:Buffer.t ->
179 yaml -> Buffer.t
180(** Serialize a full YAML value to a buffer.
181
182 @param encoding Output encoding (default: UTF-8)
183 @param scalar_style Preferred scalar style (default: Any)
184 @param layout_style Preferred layout style (default: Any)
185 @param buffer Optional buffer to append to (allocates new one if not provided)
186 @return The buffer containing the serialized YAML *)
187
188val yaml_to_string :
189 ?encoding:Encoding.t ->
190 ?scalar_style:Scalar_style.t ->
191 ?layout_style:Layout_style.t ->
192 yaml -> string
193(** Serialize a full YAML value to a string.
194
195 @param encoding Output encoding (default: UTF-8)
196 @param scalar_style Preferred scalar style (default: Any)
197 @param layout_style Preferred layout style (default: Any) *)
198
199val documents_to_buffer :
200 ?encoding:Encoding.t ->
201 ?scalar_style:Scalar_style.t ->
202 ?layout_style:Layout_style.t ->
203 ?resolve_aliases:bool ->
204 ?buffer:Buffer.t ->
205 document list -> Buffer.t
206(** Serialize multiple documents to a buffer.
207
208 @param encoding Output encoding (default: UTF-8)
209 @param scalar_style Preferred scalar style (default: Any)
210 @param layout_style Preferred layout style (default: Any)
211 @param resolve_aliases Whether to expand aliases (default: true)
212 @param buffer Optional buffer to append to (allocates new one if not provided)
213 @return The buffer containing the serialized YAML *)
214
215val documents_to_string :
216 ?encoding:Encoding.t ->
217 ?scalar_style:Scalar_style.t ->
218 ?layout_style:Layout_style.t ->
219 ?resolve_aliases:bool ->
220 document list -> string
221(** Serialize multiple documents to a YAML stream.
222
223 @param encoding Output encoding (default: UTF-8)
224 @param scalar_style Preferred scalar style (default: Any)
225 @param layout_style Preferred layout style (default: Any)
226 @param resolve_aliases Whether to expand aliases (default: true) *)
227
228(** {2 Buffer Parsing} *)
229
230val of_buffer :
231 ?resolve_aliases:bool ->
232 ?max_nodes:int ->
233 ?max_depth:int ->
234 Buffer.t -> value
235(** Parse YAML from a buffer into a JSON-compatible value.
236
237 @param resolve_aliases Whether to expand aliases (default: true)
238 @param max_nodes Maximum nodes during alias expansion (default: 10M)
239 @param max_depth Maximum alias nesting depth (default: 100)
240 @raise Yamlrw_error on parse error or if multiple documents found *)
241
242val yaml_of_buffer :
243 ?resolve_aliases:bool ->
244 ?max_nodes:int ->
245 ?max_depth:int ->
246 Buffer.t -> yaml
247(** Parse YAML from a buffer preserving full YAML metadata.
248
249 @param resolve_aliases Whether to expand aliases (default: false)
250 @param max_nodes Maximum nodes during alias expansion (default: 10M)
251 @param max_depth Maximum alias nesting depth (default: 100)
252 @raise Yamlrw_error on parse error or if multiple documents found *)
253
254val documents_of_buffer : Buffer.t -> document list
255(** Parse a multi-document YAML stream from a buffer.
256
257 @raise Yamlrw_error on parse error *)
258
259
260(** {2 Conversion} *)
261
262val to_json :
263 ?resolve_aliases:bool ->
264 ?max_nodes:int ->
265 ?max_depth:int ->
266 yaml -> value
267(** Convert full YAML to JSON-compatible value.
268
269 @param resolve_aliases Whether to expand aliases (default: true)
270 @param max_nodes Maximum nodes during alias expansion (default: 10M)
271 @param max_depth Maximum alias nesting depth (default: 100)
272 @raise Yamlrw_error if alias limits exceeded or complex keys found *)
273
274val of_json : value -> yaml
275(** Convert JSON-compatible value to full YAML representation. *)
276
277
278(** {2 Pretty Printing & Equality} *)
279
280val pp : Format.formatter -> value -> unit
281(** Pretty-print a value. *)
282
283val equal : value -> value -> bool
284(** Test equality of two values. *)
285
286
287(** {2 Util - Value Combinators}
288
289 Combinators for working with {!type:value} values.
290
291 This module provides constructors, accessors, and transformations
292 for JSON-compatible YAML values. *)
293
294module Util : sig
295 type t = Value.t
296 (** Alias for {!type:value}. *)
297
298 (** {3 Type Error} *)
299
300 exception Type_error of string * t
301 (** Raised when a value has unexpected type.
302 [Type_error (expected, actual_value)] *)
303
304 (** {3 Constructors} *)
305
306 val null : t
307 (** The null value. *)
308
309 val bool : bool -> t
310 (** Create a boolean value. *)
311
312 val int : int -> t
313 (** Create an integer value (stored as float). *)
314
315 val float : float -> t
316 (** Create a float value. *)
317
318 val string : string -> t
319 (** Create a string value. *)
320
321 val strings : string list -> t
322 (** Create a list of strings. *)
323
324 val list : t list -> t
325 (** Create a list value. *)
326
327 val obj : (string * t) list -> t
328 (** Create an object value from key-value pairs. *)
329
330 (** {3 Type Predicates} *)
331
332 val is_null : t -> bool
333 (** Check if value is null. *)
334
335 val is_bool : t -> bool
336 (** Check if value is a boolean. *)
337
338 val is_number : t -> bool
339 (** Check if value is a number. *)
340
341 val is_string : t -> bool
342 (** Check if value is a string. *)
343
344 val is_list : t -> bool
345 (** Check if value is a list. *)
346
347 val is_obj : t -> bool
348 (** Check if value is an object. *)
349
350 (** {3 Safe Accessors}
351
352 These return [None] if the value has the wrong type. *)
353
354 val as_null : t -> unit option
355 (** Get unit if value is null. *)
356
357 val as_bool : t -> bool option
358 (** Get boolean value. *)
359
360 val as_float : t -> float option
361 (** Get float value. *)
362
363 val as_string : t -> string option
364 (** Get string value. *)
365
366 val as_list : t -> t list option
367 (** Get list value. *)
368
369 val as_obj : t -> (string * t) list option
370 (** Get object as association list. *)
371
372 val as_int : t -> int option
373 (** Get integer value if float is an exact integer. *)
374
375 (** {3 Unsafe Accessors}
376
377 These raise {!Type_error} if the value has the wrong type. *)
378
379 val get_null : t -> unit
380 (** Get unit or raise {!Type_error}. *)
381
382 val get_bool : t -> bool
383 (** Get boolean or raise {!Type_error}. *)
384
385 val get_float : t -> float
386 (** Get float or raise {!Type_error}. *)
387
388 val get_string : t -> string
389 (** Get string or raise {!Type_error}. *)
390
391 val get_list : t -> t list
392 (** Get list or raise {!Type_error}. *)
393
394 val get_obj : t -> (string * t) list
395 (** Get object or raise {!Type_error}. *)
396
397 val get_int : t -> int
398 (** Get integer or raise {!Type_error}. *)
399
400 (** {3 Object Operations} *)
401
402 val mem : string -> t -> bool
403 (** [mem key obj] checks if [key] exists in object [obj].
404 Returns [false] if [obj] is not an object. *)
405
406 val find : string -> t -> t option
407 (** [find key obj] looks up [key] in object [obj].
408 Returns [None] if key not found or if [obj] is not an object. *)
409
410 val get : string -> t -> t
411 (** [get key obj] looks up [key] in object [obj].
412 Raises [Not_found] if key not found. *)
413
414 val keys : t -> string list
415 (** Get all keys from an object.
416 @raise Type_error if not an object *)
417
418 val values : t -> t list
419 (** Get all values from an object.
420 @raise Type_error if not an object *)
421
422 val update : string -> t -> t -> t
423 (** [update key value obj] sets [key] to [value] in [obj].
424 Adds the key if it doesn't exist.
425 @raise Type_error if [obj] is not an object *)
426
427 val remove : string -> t -> t
428 (** [remove key obj] removes [key] from [obj].
429 @raise Type_error if [obj] is not an object *)
430
431 val combine : t -> t -> t
432 (** [combine obj1 obj2] merges two objects, with [obj2] values taking precedence.
433 @raise Type_error if either argument is not an object *)
434
435 (** {3 List Operations} *)
436
437 val map : (t -> t) -> t -> t
438 (** [map f lst] applies [f] to each element of list [lst].
439 @raise Type_error if [lst] is not a list *)
440
441 val mapi : (int -> t -> t) -> t -> t
442 (** [mapi f lst] applies [f i x] to each element [x] at index [i].
443 @raise Type_error if [lst] is not a list *)
444
445 val filter : (t -> bool) -> t -> t
446 (** [filter pred lst] keeps elements satisfying [pred].
447 @raise Type_error if [lst] is not a list *)
448
449 val fold : ('a -> t -> 'a) -> 'a -> t -> 'a
450 (** [fold f init lst] folds [f] over list [lst].
451 @raise Type_error if [lst] is not a list *)
452
453 val nth : int -> t -> t option
454 (** [nth n lst] gets element at index [n].
455 Returns [None] if [lst] is not a list or index out of bounds. *)
456
457 val length : t -> int
458 (** Get the length of a list or object. Returns 0 for other types. *)
459
460 val flatten : t -> t
461 (** Flatten a list of lists into a single list.
462 Non-list elements are kept as-is.
463 @raise Type_error if not a list *)
464
465 (** {3 Path Operations} *)
466
467 val get_path : string list -> t -> t option
468 (** [get_path ["a"; "b"; "c"] obj] looks up nested path [obj.a.b.c].
469 Returns [None] if any key is not found. *)
470
471 val get_path_exn : string list -> t -> t
472 (** Like {!get_path} but raises [Not_found] if path not found. *)
473
474 (** {3 Iteration} *)
475
476 val iter_obj : (string -> t -> unit) -> t -> unit
477 (** [iter_obj f obj] calls [f key value] for each pair in [obj].
478 @raise Type_error if [obj] is not an object *)
479
480 val iter_list : (t -> unit) -> t -> unit
481 (** [iter_list f lst] calls [f] on each element of [lst].
482 @raise Type_error if [lst] is not a list *)
483
484 val fold_obj : ('a -> string -> t -> 'a) -> 'a -> t -> 'a
485 (** [fold_obj f init obj] folds over object key-value pairs.
486 @raise Type_error if [obj] is not an object *)
487
488 (** {3 Mapping} *)
489
490 val map_obj : (string -> t -> t) -> t -> t
491 (** [map_obj f obj] maps [f key value] over each pair in [obj].
492 @raise Type_error if [obj] is not an object *)
493
494 val filter_obj : (string -> t -> bool) -> t -> t
495 (** [filter_obj pred obj] keeps pairs satisfying [pred key value].
496 @raise Type_error if [obj] is not an object *)
497
498 (** {3 Conversion Helpers}
499
500 Get values with optional defaults. If no default is provided and the type
501 doesn't match, these raise {!Type_error}. *)
502
503 val to_bool : ?default:bool -> t -> bool
504 (** Get boolean or return default.
505 @raise Type_error if type doesn't match and no default provided *)
506
507 val to_int : ?default:int -> t -> int
508 (** Get integer or return default.
509 @raise Type_error if type doesn't match and no default provided *)
510
511 val to_float : ?default:float -> t -> float
512 (** Get float or return default.
513 @raise Type_error if type doesn't match and no default provided *)
514
515 val to_string : ?default:string -> t -> string
516 (** Get string or return default.
517 @raise Type_error if type doesn't match and no default provided *)
518
519 val to_list : ?default:t list -> t -> t list
520 (** Get list or return default.
521 @raise Type_error if type doesn't match and no default provided *)
522end
523
524
525(** {2 Stream - Low-Level Event API}
526
527 Low-level streaming API for event-based YAML processing.
528
529 This is useful for:
530 - Processing very large YAML files incrementally
531 - Building custom YAML transformers
532 - Fine-grained control over YAML emission *)
533
534module Stream : sig
535
536 (** {3 Event Types} *)
537
538 type event = Event.t
539 (** A parsing or emitting event. *)
540
541 type position = Position.t
542 (** A position in the source (line, column, byte offset). *)
543
544 type event_result = {
545 event : event;
546 start_pos : position;
547 end_pos : position;
548 }
549 (** Result of parsing an event with its source location. *)
550
551 (** {3 Parsing} *)
552
553 type parser
554 (** A streaming YAML parser. *)
555
556 val parser : string -> parser
557 (** Create a parser from a string. *)
558
559 val next : parser -> event_result option
560 (** Get the next event from the parser.
561 Returns [None] when parsing is complete. *)
562
563 val iter : (event -> position -> position -> unit) -> parser -> unit
564 (** [iter f parser] calls [f event start_pos end_pos] for each event. *)
565
566 val fold : ('a -> event -> 'a) -> 'a -> parser -> 'a
567 (** [fold f init parser] folds [f] over all events. *)
568
569 (** {3 Emitting} *)
570
571 type emitter
572 (** A streaming YAML emitter. *)
573
574 val emitter : ?len:int -> unit -> emitter
575 (** Create a new emitter. *)
576
577 val contents : emitter -> string
578 (** Get the emitted YAML string. *)
579
580 val emit : emitter -> event -> unit
581 (** Emit an event.
582 @raise Yamlrw_error if the event sequence is invalid *)
583
584 (** {3 Event Emission Helpers} *)
585
586 val stream_start : emitter -> Encoding.t -> unit
587 (** Emit a stream start event. *)
588
589 val stream_end : emitter -> unit
590 (** Emit a stream end event. *)
591
592 val document_start : emitter -> ?version:version -> ?implicit:bool -> unit -> unit
593 (** Emit a document start event.
594 @param version YAML version directive
595 @param implicit Whether start marker is implicit (default: true) *)
596
597 val document_end : emitter -> ?implicit:bool -> unit -> unit
598 (** Emit a document end event.
599 @param implicit Whether end marker is implicit (default: true) *)
600
601 val scalar : emitter -> ?anchor:string -> ?tag:string -> ?style:Scalar_style.t -> string -> unit
602 (** Emit a scalar value.
603 @param anchor Optional anchor name
604 @param tag Optional type tag
605 @param style Scalar style (default: Any) *)
606
607 val alias : emitter -> string -> unit
608 (** Emit an alias reference. *)
609
610 val sequence_start : emitter -> ?anchor:string -> ?tag:string -> ?style:Layout_style.t -> unit -> unit
611 (** Emit a sequence start event.
612 @param anchor Optional anchor name
613 @param tag Optional type tag
614 @param style Layout style (default: Any) *)
615
616 val sequence_end : emitter -> unit
617 (** Emit a sequence end event. *)
618
619 val mapping_start : emitter -> ?anchor:string -> ?tag:string -> ?style:Layout_style.t -> unit -> unit
620 (** Emit a mapping start event.
621 @param anchor Optional anchor name
622 @param tag Optional type tag
623 @param style Layout style (default: Any) *)
624
625 val mapping_end : emitter -> unit
626 (** Emit a mapping end event. *)
627end
628
629
630(** {2 Internal Modules}
631
632 These modules are exposed for advanced use cases requiring
633 fine-grained control over parsing, emission, or data structures.
634
635 For typical usage, prefer the top-level functions and {!Util}. *)
636
637module Position = Position
638(** Source position tracking. *)
639
640module Span = Span
641(** Source span (range of positions). *)
642
643module Chomping = Chomping
644(** Block scalar chomping modes. *)
645
646module Tag = Tag
647(** YAML type tags. *)
648
649module Value = Value
650(** JSON-compatible value type and operations. *)
651
652module Scalar = Scalar
653(** YAML scalar with metadata. *)
654
655module Sequence = Sequence
656(** YAML sequence with metadata. *)
657
658module Mapping = Mapping
659(** YAML mapping with metadata. *)
660
661module Yaml = Yaml
662(** Full YAML value type. *)
663
664module Document = Document
665(** YAML document with directives. *)
666
667module Token = Token
668(** Lexical tokens. *)
669
670module Scanner = Scanner
671(** Lexical scanner. *)
672
673module Event = Event
674(** Parser events. *)
675
676module Parser = Parser
677(** Event-based parser. *)
678
679module Loader = Loader
680(** Document loader. *)
681
682module Emitter = Emitter
683(** Event-based emitter. *)
684
685module Input = Input
686(** Input stream utilities. *)
687
688module Serialize = Serialize
689(** Buffer serialization utilities. *)