Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** {1 Yamlrw - A Pure OCaml YAML Parser and Emitter} *)
7
8(** {2 Error Handling} *)
9
10module Error = Error
11
12exception Yamlrw_error = Error.Yamlrw_error
13
14
15(** {2 Core Types} *)
16
17(** JSON-compatible YAML representation. Use this for simple data interchange.
18
19 This type is structurally equivalent to {!Value.t} and compatible with the
20 ezjsonm representation. For additional operations, see {!Value} and {!Util}. *)
21type value = [
22 | `Null (** YAML null, ~, or empty values *)
23 | `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *)
24 | `Float of float (** All YAML numbers (integers stored as floats) *)
25 | `String of string (** YAML strings *)
26 | `A of value list (** YAML sequences/arrays *)
27 | `O of (string * value) list (** YAML mappings/objects with string keys *)
28]
29
30(** Full YAML representation preserving anchors, tags, and aliases.
31
32 This type is structurally equivalent to {!Yaml.t}. Use this when you need
33 access to YAML-specific features like anchors and aliases for node reuse,
34 type tags for custom types, scalar styles (plain, quoted, literal, folded),
35 and collection styles (block vs flow).
36
37 For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and {!Mapping}. *)
38type yaml = [
39 | `Scalar of Scalar.t (** YAML scalar value with style and metadata *)
40 | `Alias of string (** Alias reference to an anchored node *)
41 | `A of yaml Sequence.t (** YAML sequence with style and metadata *)
42 | `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *)
43]
44
45(** A YAML document with directives and metadata.
46
47 This type is structurally equivalent to {!Document.t}. A YAML stream can
48 contain multiple documents, each separated by document markers.
49
50 For additional operations, see {!Document}. *)
51type document = {
52 version : (int * int) option; (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *)
53 tags : (string * string) list; (** TAG directives mapping handles to prefixes *)
54 root : yaml option; (** Root content of the document *)
55 implicit_start : bool; (** Whether the document start marker (---) is implicit *)
56 implicit_end : bool; (** Whether the document end marker (...) is implicit *)
57}
58
59
60(** {2 Character Encoding} *)
61
62module Encoding = Encoding
63
64
65(** {2 Parsing} *)
66
67type version = [ `V1_1 | `V1_2 ]
68
69(** Default maximum nodes during alias expansion (10 million). *)
70let default_max_alias_nodes = Yaml.default_max_alias_nodes
71
72(** Default maximum alias nesting depth (100). *)
73let default_max_alias_depth = Yaml.default_max_alias_depth
74
75let of_string
76 ?(resolve_aliases = true)
77 ?(max_nodes = default_max_alias_nodes)
78 ?(max_depth = default_max_alias_depth)
79 s : value =
80 (Loader.value_of_string ~resolve_aliases ~max_nodes ~max_depth s :> value)
81(** Parse a YAML string into a JSON-compatible value.
82
83 @param resolve_aliases Whether to expand aliases (default: true)
84 @param max_nodes Maximum nodes during alias expansion (default: 10M)
85 @param max_depth Maximum alias nesting depth (default: 100)
86 @raise Yamlrw_error on parse error or if multiple documents found *)
87
88let yaml_of_string
89 ?(resolve_aliases = false)
90 ?(max_nodes = default_max_alias_nodes)
91 ?(max_depth = default_max_alias_depth)
92 s : yaml =
93 (Loader.yaml_of_string ~resolve_aliases ~max_nodes ~max_depth s :> yaml)
94(** Parse a YAML string preserving full YAML metadata (anchors, tags, etc).
95
96 By default, aliases are NOT resolved, preserving the document structure.
97
98 @param resolve_aliases Whether to expand aliases (default: false)
99 @param max_nodes Maximum nodes during alias expansion (default: 10M)
100 @param max_depth Maximum alias nesting depth (default: 100)
101 @raise Yamlrw_error on parse error or if multiple documents found *)
102
103let documents_of_string s : document list =
104 let docs = Loader.documents_of_string s in
105 List.map (fun (d : Document.t) : document -> {
106 version = d.version;
107 tags = d.tags;
108 root = (d.root :> yaml option);
109 implicit_start = d.implicit_start;
110 implicit_end = d.implicit_end;
111 }) docs
112(** Parse a multi-document YAML stream.
113
114 Use this when your YAML input contains multiple documents separated
115 by document markers (---).
116
117 @raise Yamlrw_error on parse error *)
118
119
120(** {2 Formatting Styles} *)
121
122module Scalar_style = Scalar_style
123
124module Layout_style = Layout_style
125
126
127(** {2 Serialization} *)
128
129let make_config ~encoding ~scalar_style ~layout_style =
130 { Emitter.default_config with encoding; scalar_style; layout_style }
131
132let to_buffer
133 ?(encoding = `Utf8)
134 ?(scalar_style = `Any)
135 ?(layout_style = `Any)
136 ?buffer
137 (value : value) =
138 let config = make_config ~encoding ~scalar_style ~layout_style in
139 Serialize.value_to_buffer ~config ?buffer (value :> Value.t)
140(** Serialize a value to a buffer.
141
142 @param encoding Output encoding (default: UTF-8)
143 @param scalar_style Preferred scalar style (default: Any)
144 @param layout_style Preferred layout style (default: Any)
145 @param buffer Optional buffer to append to (allocates new one if not provided)
146 @return The buffer containing the serialized YAML *)
147
148let to_string
149 ?(encoding = `Utf8)
150 ?(scalar_style = `Any)
151 ?(layout_style = `Any)
152 (value : value) =
153 Buffer.contents (to_buffer ~encoding ~scalar_style ~layout_style value)
154(** Serialize a value to a YAML string.
155
156 @param encoding Output encoding (default: UTF-8)
157 @param scalar_style Preferred scalar style (default: Any)
158 @param layout_style Preferred layout style (default: Any) *)
159
160let yaml_to_buffer
161 ?(encoding = `Utf8)
162 ?(scalar_style = `Any)
163 ?(layout_style = `Any)
164 ?buffer
165 (yaml : yaml) =
166 let config = make_config ~encoding ~scalar_style ~layout_style in
167 Serialize.yaml_to_buffer ~config ?buffer (yaml :> Yaml.t)
168(** Serialize a full YAML value to a buffer.
169
170 @param encoding Output encoding (default: UTF-8)
171 @param scalar_style Preferred scalar style (default: Any)
172 @param layout_style Preferred layout style (default: Any)
173 @param buffer Optional buffer to append to (allocates new one if not provided)
174 @return The buffer containing the serialized YAML *)
175
176let yaml_to_string
177 ?(encoding = `Utf8)
178 ?(scalar_style = `Any)
179 ?(layout_style = `Any)
180 (yaml : yaml) =
181 Buffer.contents (yaml_to_buffer ~encoding ~scalar_style ~layout_style yaml)
182(** Serialize a full YAML value to a string.
183
184 @param encoding Output encoding (default: UTF-8)
185 @param scalar_style Preferred scalar style (default: Any)
186 @param layout_style Preferred layout style (default: Any) *)
187
188let documents_to_buffer
189 ?(encoding = `Utf8)
190 ?(scalar_style = `Any)
191 ?(layout_style = `Any)
192 ?(resolve_aliases = true)
193 ?buffer
194 (documents : document list) =
195 let config = make_config ~encoding ~scalar_style ~layout_style in
196 let docs' = List.map (fun (d : document) : Document.t -> {
197 Document.version = d.version;
198 Document.tags = d.tags;
199 Document.root = (d.root :> Yaml.t option);
200 Document.implicit_start = d.implicit_start;
201 Document.implicit_end = d.implicit_end;
202 }) documents in
203 Serialize.documents_to_buffer ~config ~resolve_aliases ?buffer docs'
204(** Serialize multiple documents to a buffer.
205
206 @param encoding Output encoding (default: UTF-8)
207 @param scalar_style Preferred scalar style (default: Any)
208 @param layout_style Preferred layout style (default: Any)
209 @param resolve_aliases Whether to expand aliases (default: true)
210 @param buffer Optional buffer to append to (allocates new one if not provided)
211 @return The buffer containing the serialized YAML *)
212
213let documents_to_string
214 ?(encoding = `Utf8)
215 ?(scalar_style = `Any)
216 ?(layout_style = `Any)
217 ?(resolve_aliases = true)
218 (documents : document list) =
219 Buffer.contents (documents_to_buffer ~encoding ~scalar_style ~layout_style ~resolve_aliases documents)
220(** Serialize multiple documents to a YAML stream.
221
222 @param encoding Output encoding (default: UTF-8)
223 @param scalar_style Preferred scalar style (default: Any)
224 @param layout_style Preferred layout style (default: Any)
225 @param resolve_aliases Whether to expand aliases (default: true) *)
226
227(** {2 Buffer Parsing} *)
228
229let of_buffer
230 ?(resolve_aliases = true)
231 ?(max_nodes = default_max_alias_nodes)
232 ?(max_depth = default_max_alias_depth)
233 buffer : value =
234 of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer)
235(** Parse YAML from a buffer into a JSON-compatible value.
236
237 @param resolve_aliases Whether to expand aliases (default: true)
238 @param max_nodes Maximum nodes during alias expansion (default: 10M)
239 @param max_depth Maximum alias nesting depth (default: 100)
240 @raise Yamlrw_error on parse error or if multiple documents found *)
241
242let yaml_of_buffer
243 ?(resolve_aliases = false)
244 ?(max_nodes = default_max_alias_nodes)
245 ?(max_depth = default_max_alias_depth)
246 buffer : yaml =
247 yaml_of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer)
248(** Parse YAML from a buffer preserving full YAML metadata.
249
250 @param resolve_aliases Whether to expand aliases (default: false)
251 @param max_nodes Maximum nodes during alias expansion (default: 10M)
252 @param max_depth Maximum alias nesting depth (default: 100)
253 @raise Yamlrw_error on parse error or if multiple documents found *)
254
255let documents_of_buffer buffer : document list =
256 documents_of_string (Buffer.contents buffer)
257(** Parse a multi-document YAML stream from a buffer.
258
259 @raise Yamlrw_error on parse error *)
260
261
262(** {2 Conversion} *)
263
264let to_json
265 ?(resolve_aliases = true)
266 ?(max_nodes = default_max_alias_nodes)
267 ?(max_depth = default_max_alias_depth)
268 (yaml : yaml) : value =
269 (Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth (yaml :> Yaml.t) :> value)
270(** Convert full YAML to JSON-compatible value.
271
272 @param resolve_aliases Whether to expand aliases (default: true)
273 @param max_nodes Maximum nodes during alias expansion (default: 10M)
274 @param max_depth Maximum alias nesting depth (default: 100)
275 @raise Yamlrw_error if alias limits exceeded or complex keys found *)
276
277let of_json (value : value) : yaml =
278 (Yaml.of_value (value :> Value.t) :> yaml)
279(** Convert JSON-compatible value to full YAML representation. *)
280
281
282(** {2 Pretty Printing & Equality} *)
283
284let pp = Value.pp
285(** Pretty-print a value. *)
286
287let equal = Value.equal
288(** Test equality of two values. *)
289
290
291(** {2 Util - Value Combinators} *)
292
293module Util = struct
294 (** Combinators for working with {!type:value} values.
295
296 This module provides constructors, accessors, and transformations
297 for JSON-compatible YAML values. *)
298
299 type t = Value.t
300
301 (** {3 Type Error} *)
302
303 exception Type_error of string * t
304 (** Raised when a value has unexpected type.
305 [Type_error (expected, actual_value)] *)
306
307 let type_error expected v = raise (Type_error (expected, v))
308
309 (** {3 Constructors} *)
310
311 let null : t = `Null
312 let bool b : t = `Bool b
313 let int n : t = `Float (Float.of_int n)
314 let float f : t = `Float f
315 let string s : t = `String s
316 let strings ss : t = `A (List.map (fun s -> `String s) ss)
317 let list vs : t = `A vs
318 let obj pairs : t = `O pairs
319
320 (** {3 Type Predicates} *)
321
322 let is_null = function `Null -> true | _ -> false
323 let is_bool = function `Bool _ -> true | _ -> false
324 let is_number = function `Float _ -> true | _ -> false
325 let is_string = function `String _ -> true | _ -> false
326 let is_list = function `A _ -> true | _ -> false
327 let is_obj = function `O _ -> true | _ -> false
328
329 (** {3 Safe Accessors} *)
330
331 let as_null = function `Null -> Some () | _ -> None
332 let as_bool = function `Bool b -> Some b | _ -> None
333 let as_float = function `Float f -> Some f | _ -> None
334 let as_string = function `String s -> Some s | _ -> None
335 let as_list = function `A l -> Some l | _ -> None
336 let as_obj = function `O o -> Some o | _ -> None
337
338 let as_int = function
339 | `Float f ->
340 let i = Float.to_int f in
341 if Float.equal (Float.of_int i) f then Some i else None
342 | _ -> None
343
344 (** {3 Unsafe Accessors} *)
345
346 let get_null v = match v with `Null -> () | _ -> type_error "null" v
347 let get_bool v = match v with `Bool b -> b | _ -> type_error "bool" v
348 let get_float v = match v with `Float f -> f | _ -> type_error "float" v
349 let get_string v = match v with `String s -> s | _ -> type_error "string" v
350 let get_list v = match v with `A l -> l | _ -> type_error "list" v
351 let get_obj v = match v with `O o -> o | _ -> type_error "object" v
352
353 let get_int v =
354 match as_int v with
355 | Some i -> i
356 | None -> type_error "int" v
357
358 (** {3 Object Operations} *)
359
360 let mem key = function
361 | `O pairs -> List.exists (fun (k, _) -> k = key) pairs
362 | _ -> false
363
364 let find key = function
365 | `O pairs -> List.assoc_opt key pairs
366 | _ -> None
367
368 let get key v =
369 match find key v with
370 | Some v -> v
371 | None -> raise Not_found
372
373 let keys v = match v with
374 | `O pairs -> List.map fst pairs
375 | _ -> type_error "object" v
376
377 let values v = match v with
378 | `O pairs -> List.map snd pairs
379 | _ -> type_error "object" v
380
381 let update key value = function
382 | `O pairs ->
383 let rec go = function
384 | [] -> [(key, value)]
385 | (k, _) :: rest when k = key -> (key, value) :: rest
386 | kv :: rest -> kv :: go rest
387 in
388 `O (go pairs)
389 | v -> type_error "object" v
390
391 let remove key = function
392 | `O pairs -> `O (List.filter (fun (k, _) -> k <> key) pairs)
393 | v -> type_error "object" v
394
395 let combine v1 v2 =
396 match v1, v2 with
397 | `O o1, `O o2 -> `O (o1 @ o2)
398 | `O _, _ -> type_error "object" v2
399 | _, _ -> type_error "object" v1
400
401 (** {3 List Operations} *)
402
403 let map f = function
404 | `A l -> `A (List.map f l)
405 | v -> type_error "list" v
406
407 let mapi f = function
408 | `A l -> `A (List.mapi f l)
409 | v -> type_error "list" v
410
411 let filter pred = function
412 | `A l -> `A (List.filter pred l)
413 | v -> type_error "list" v
414
415 let fold f init = function
416 | `A l -> List.fold_left f init l
417 | v -> type_error "list" v
418
419 let nth n = function
420 | `A l -> List.nth_opt l n
421 | _ -> None
422
423 let length = function
424 | `A l -> List.length l
425 | `O o -> List.length o
426 | _ -> 0
427
428 let flatten = function
429 | `A l ->
430 `A (List.concat_map (function `A inner -> inner | v -> [v]) l)
431 | v -> type_error "list" v
432
433 (** {3 Path Operations} *)
434
435 let rec get_path path v =
436 match path with
437 | [] -> Some v
438 | key :: rest ->
439 match find key v with
440 | Some child -> get_path rest child
441 | None -> None
442
443 let get_path_exn path v =
444 match get_path path v with
445 | Some v -> v
446 | None -> raise Not_found
447
448 (** {3 Iteration} *)
449
450 let iter_obj f = function
451 | `O pairs -> List.iter (fun (k, v) -> f k v) pairs
452 | v -> type_error "object" v
453
454 let iter_list f = function
455 | `A l -> List.iter f l
456 | v -> type_error "list" v
457
458 let fold_obj f init = function
459 | `O pairs -> List.fold_left (fun acc (k, v) -> f acc k v) init pairs
460 | v -> type_error "object" v
461
462 (** {3 Mapping} *)
463
464 let map_obj f = function
465 | `O pairs -> `O (List.map (fun (k, v) -> (k, f k v)) pairs)
466 | v -> type_error "object" v
467
468 let filter_obj pred = function
469 | `O pairs -> `O (List.filter (fun (k, v) -> pred k v) pairs)
470 | v -> type_error "object" v
471
472 (** {3 Conversion Helpers} *)
473
474 let to_bool ?default v =
475 match as_bool v, default with
476 | Some b, _ -> b
477 | None, Some d -> d
478 | None, None -> type_error "bool" v
479
480 let to_int ?default v =
481 match as_int v, default with
482 | Some i, _ -> i
483 | None, Some d -> d
484 | None, None -> type_error "int" v
485
486 let to_float ?default v =
487 match as_float v, default with
488 | Some f, _ -> f
489 | None, Some d -> d
490 | None, None -> type_error "float" v
491
492 let to_string ?default v =
493 match as_string v, default with
494 | Some s, _ -> s
495 | None, Some d -> d
496 | None, None -> type_error "string" v
497
498 let to_list ?default v =
499 match as_list v, default with
500 | Some l, _ -> l
501 | None, Some d -> d
502 | None, None -> type_error "list" v
503end
504
505
506(** {2 Stream - Low-Level Event API} *)
507
508module Stream = struct
509 (** Low-level streaming API for event-based YAML processing.
510
511 This is useful for:
512 - Processing very large YAML files incrementally
513 - Building custom YAML transformers
514 - Fine-grained control over YAML emission *)
515
516 (** {3 Event Types} *)
517
518 type event = Event.t
519 (** A parsing or emitting event. *)
520
521 type position = Position.t
522 (** A position in the source (line, column, byte offset). *)
523
524 (** Result of parsing an event. *)
525 type event_result = {
526 event : event;
527 start_pos : position;
528 end_pos : position;
529 }
530
531 (** {3 Parsing} *)
532
533 type parser = Parser.t
534 (** A streaming YAML parser. *)
535
536 let parser s = Parser.of_string s
537 (** Create a parser from a string. *)
538
539 let next p =
540 match Parser.next p with
541 | Some { event; span } ->
542 Some {
543 event;
544 start_pos = span.start;
545 end_pos = span.stop;
546 }
547 | None -> None
548 (** Get the next event from the parser.
549 Returns [None] when parsing is complete. *)
550
551 let iter f p =
552 let rec go () =
553 match next p with
554 | Some { event; start_pos; end_pos } ->
555 f event start_pos end_pos;
556 go ()
557 | None -> ()
558 in
559 go ()
560 (** Iterate over all events from the parser. *)
561
562 let fold f init p =
563 let rec go acc =
564 match Parser.next p with
565 | Some { event; _ } -> go (f acc event)
566 | None -> acc
567 in
568 go init
569 (** Fold over all events from the parser. *)
570
571 (** {3 Emitting} *)
572
573 type emitter = Emitter.t
574 (** A streaming YAML emitter. *)
575
576 let emitter ?len:_ () = Emitter.create ()
577 (** Create a new emitter. *)
578
579 let contents e = Emitter.contents e
580 (** Get the emitted YAML string. *)
581
582 let emit e ev = Emitter.emit e ev
583 (** Emit an event. *)
584
585 (** {3 Event Emission Helpers} *)
586
587 let stream_start e enc =
588 Emitter.emit e (Event.Stream_start { encoding = enc })
589
590 let stream_end e =
591 Emitter.emit e Event.Stream_end
592
593 let document_start e ?version ?(implicit = true) () =
594 let version = match version with
595 | Some `V1_1 -> Some (1, 1)
596 | Some `V1_2 -> Some (1, 2)
597 | None -> None
598 in
599 Emitter.emit e (Event.Document_start { version; implicit })
600
601 let document_end e ?(implicit = true) () =
602 Emitter.emit e (Event.Document_end { implicit })
603
604 let scalar e ?anchor ?tag ?(style = `Any) value =
605 Emitter.emit e (Event.Scalar {
606 anchor;
607 tag;
608 value;
609 plain_implicit = true;
610 quoted_implicit = true;
611 style;
612 })
613
614 let alias e name =
615 Emitter.emit e (Event.Alias { anchor = name })
616
617 let sequence_start e ?anchor ?tag ?(style = `Any) () =
618 Emitter.emit e (Event.Sequence_start {
619 anchor;
620 tag;
621 implicit = true;
622 style;
623 })
624
625 let sequence_end e =
626 Emitter.emit e Event.Sequence_end
627
628 let mapping_start e ?anchor ?tag ?(style = `Any) () =
629 Emitter.emit e (Event.Mapping_start {
630 anchor;
631 tag;
632 implicit = true;
633 style;
634 })
635
636 let mapping_end e =
637 Emitter.emit e Event.Mapping_end
638end
639
640
641(** {2 Internal Modules} *)
642
643(** These modules are exposed for advanced use cases requiring
644 fine-grained control over parsing, emission, or data structures.
645
646 For typical usage, prefer the top-level functions and {!Util}. *)
647
648(** Source position tracking. *)
649module Position = Position
650
651(** Source span (range of positions). *)
652module Span = Span
653
654(** Block scalar chomping modes. *)
655module Chomping = Chomping
656
657(** YAML type tags. *)
658module Tag = Tag
659
660(** JSON-compatible value type and operations. *)
661module Value = Value
662
663(** YAML scalar with metadata. *)
664module Scalar = Scalar
665
666(** YAML sequence with metadata. *)
667module Sequence = Sequence
668
669(** YAML mapping with metadata. *)
670module Mapping = Mapping
671
672(** Full YAML value type. *)
673module Yaml = Yaml
674
675(** YAML document with directives. *)
676module Document = Document
677
678(** Lexical tokens. *)
679module Token = Token
680
681(** Lexical scanner. *)
682module Scanner = Scanner
683
684(** Parser events. *)
685module Event = Event
686
687(** Event-based parser. *)
688module Parser = Parser
689
690(** Document loader. *)
691module Loader = Loader
692
693(** Event-based emitter. *)
694module Emitter = Emitter
695
696(** Input stream utilities. *)
697module Input = Input
698
699(** Buffer serialization utilities. *)
700module Serialize = Serialize