Pure OCaml Yaml 1.2 reader and writer using Bytesrw

initial import

Changed files
+15879
.tangled
workflows
bin
lib
tests
+18
.gitignore
···
+
# OCaml build artifacts
+
_build/
+
*.install
+
*.merlin
+
+
# Third-party sources (fetch locally with opam source)
+
third_party/
+
+
# Editor and OS files
+
.DS_Store
+
*.swp
+
*~
+
.vscode/
+
.idea/
+
+
# Opam local switch
+
_opam/
+
bytesrw-eio
+1
.ocamlformat
···
+
version=0.28.1
+49
.tangled/workflows/build.yml
···
+
when:
+
- event: ["push", "pull_request"]
+
branch: ["main"]
+
+
engine: nixery
+
+
dependencies:
+
nixpkgs:
+
- shell
+
- stdenv
+
- findutils
+
- binutils
+
- libunwind
+
- ncurses
+
- opam
+
- git
+
- gawk
+
- gnupatch
+
- gnum4
+
- gnumake
+
- gnutar
+
- gnused
+
- gnugrep
+
- diffutils
+
- gzip
+
- bzip2
+
- gcc
+
- ocaml
+
+
steps:
+
- name: opam
+
command: |
+
opam init --disable-sandboxing -a -y
+
- name: switch
+
command: |
+
opam install . --confirm-level=unsafe-yes --deps-only
+
- name: build
+
command: |
+
opam exec -- dune build
+
- name: switch-test
+
command: |
+
opam install . --confirm-level=unsafe-yes --deps-only --with-test
+
- name: test
+
command: |
+
opam exec -- dune runtest --verbose
+
- name: doc
+
command: |
+
opam install -y odoc
+
opam exec -- dune build @doc
+15
LICENSE.md
···
+
ISC License
+
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>
+
+
Permission to use, copy, modify, and distribute this software for any
+
purpose with or without fee is hereby granted, provided that the above
+
copyright notice and this permission notice appear in all copies.
+
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+94
README.md
···
+
# Yamlrw
+
+
A pure OCaml implementation of YAML 1.2 parsing and emission.
+
+
## Features
+
+
- **Pure OCaml**: No C bindings, works on all OCaml platforms
+
- **YAML 1.2 Compliant**: Full support for the YAML 1.2 specification
+
- **High-level API**: JSON-compatible value representation for simple use cases
+
- **Low-level Streaming**: Event-based parsing for fine-grained control
+
- **Multiple I/O Backends**:
+
- `yamlrw`: Core library with bytesrw-based I/O
+
- `yamlrw-unix`: Unix file and channel operations
+
- `yamlrw-eio`: Eio-based streaming I/O (OCaml 5.0+)
+
+
## Installation
+
+
```bash
+
opam install yamlrw
+
# For Eio support:
+
opam install yamlrw-eio
+
# For Unix support:
+
opam install yamlrw-unix
+
```
+
+
## Quick Start
+
+
### Parsing YAML
+
+
```ocaml
+
let value = Yamlrw.of_string "name: Alice\nage: 30" in
+
match value with
+
| `O [("name", `String "Alice"); ("age", `Float 30.)] ->
+
print_endline "Parsed successfully"
+
| _ ->
+
print_endline "Unexpected structure"
+
```
+
+
### Emitting YAML
+
+
```ocaml
+
let yaml = `O [
+
("name", `String "Bob");
+
("active", `Bool true);
+
("tags", `A [`String "developer"; `String "ocaml"])
+
] in
+
let s = Yamlrw.to_string yaml in
+
print_endline s
+
(* Output:
+
name: Bob
+
active: true
+
tags:
+
- developer
+
- ocaml
+
*)
+
```
+
+
### Using the Utility Functions
+
+
```ocaml
+
open Yamlrw.Util
+
+
let config = Yamlrw.of_string_exn "
+
server:
+
host: localhost
+
port: 8080
+
" in
+
+
let host = get_string (get "host" (get "server" config)) in
+
let port = get_int (get "port" (get "server" config)) in
+
Printf.printf "Server: %s:%d\n" host port
+
```
+
+
## Command-line Tool
+
+
The `yamlcat` binary validates and pretty-prints YAML files:
+
+
```bash
+
yamlcat input.yaml
+
```
+
+
## API Documentation
+
+
Build the documentation with:
+
+
```bash
+
opam exec -- dune build @doc
+
```
+
+
## License
+
+
ISC License - see [LICENSE.md](LICENSE.md) for details.
+
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>
+5
bin/dune
···
+
(executable
+
(name yamlcat)
+
(package yamlrw)
+
(libraries cmdliner yamlrw)
+
(public_name yamlcat))
+197
bin/yamlcat.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** yamlcat - parse and reprint YAML files *)
+
+
open Cmdliner
+
+
type output_format = Yaml | Json | Flow | Debug
+
+
let rec json_to_string buf (v : Yamlrw.value) =
+
match v with
+
| `Null -> Buffer.add_string buf "null"
+
| `Bool b -> Buffer.add_string buf (if b then "true" else "false")
+
| `Float f ->
+
if Float.is_integer f && Float.abs f < 1e15 then
+
Buffer.add_string buf (Printf.sprintf "%.0f" f)
+
else
+
Buffer.add_string buf (Printf.sprintf "%g" f)
+
| `String s -> Buffer.add_string buf (Printf.sprintf "%S" s)
+
| `A items ->
+
Buffer.add_char buf '[';
+
List.iteri (fun i item ->
+
if i > 0 then Buffer.add_string buf ", ";
+
json_to_string buf item
+
) items;
+
Buffer.add_char buf ']'
+
| `O pairs ->
+
Buffer.add_char buf '{';
+
List.iteri (fun i (k, v) ->
+
if i > 0 then Buffer.add_string buf ", ";
+
Buffer.add_string buf (Printf.sprintf "%S: " k);
+
json_to_string buf v
+
) pairs;
+
Buffer.add_char buf '}'
+
+
let value_to_json v =
+
let buf = Buffer.create 256 in
+
json_to_string buf v;
+
Buffer.contents buf
+
+
let process_string ~format ~resolve_aliases ~max_nodes ~max_depth content =
+
try
+
(* Always parse as multi-document stream *)
+
let documents = Yamlrw.documents_of_string content in
+
+
match format with
+
| Yaml ->
+
(* Convert through Value to apply tag-based type coercion *)
+
let first = ref true in
+
List.iter (fun (doc : Yamlrw.document) ->
+
if not !first then print_string "---\n";
+
first := false;
+
match doc.root with
+
| None -> print_endline ""
+
| Some yaml ->
+
let value = Yamlrw.to_json ~resolve_aliases ~max_nodes ~max_depth yaml in
+
print_string (Yamlrw.to_string value)
+
) documents
+
| Flow ->
+
(* Convert through Value to apply tag-based type coercion *)
+
let first = ref true in
+
List.iter (fun (doc : Yamlrw.document) ->
+
if not !first then print_string "---\n";
+
first := false;
+
match doc.root with
+
| None -> print_endline ""
+
| Some yaml ->
+
let value = Yamlrw.to_json ~resolve_aliases ~max_nodes ~max_depth yaml in
+
print_string (Yamlrw.to_string ~layout_style:`Flow value)
+
) documents
+
| Json ->
+
let first = ref true in
+
List.iter (fun (doc : Yamlrw.document) ->
+
match doc.root with
+
| None -> ()
+
| Some yaml ->
+
if not !first then print_endline "---";
+
first := false;
+
let value = Yamlrw.to_json ~resolve_aliases ~max_nodes ~max_depth yaml in
+
print_endline (value_to_json value)
+
) documents
+
| Debug ->
+
List.iteri (fun i (doc : Yamlrw.document) ->
+
Format.printf "Document %d:@." (i + 1);
+
(* Convert back to Document.t for printing *)
+
let doc' : Yamlrw.Document.t = {
+
Yamlrw.Document.version = doc.version;
+
Yamlrw.Document.tags = doc.tags;
+
Yamlrw.Document.root = (doc.root :> Yamlrw.Yaml.t option);
+
Yamlrw.Document.implicit_start = doc.implicit_start;
+
Yamlrw.Document.implicit_end = doc.implicit_end;
+
} in
+
Format.printf "%a@." Yamlrw.Document.pp doc'
+
) documents
+
with
+
| Yamlrw.Yamlrw_error e ->
+
Printf.eprintf "Error: %s\n" (Yamlrw.Error.to_string e);
+
exit 1
+
+
let process_file ~format ~resolve_aliases ~max_nodes ~max_depth filename =
+
let content =
+
if filename = "-" then
+
In_channel.input_all In_channel.stdin
+
else
+
In_channel.with_open_text filename In_channel.input_all
+
in
+
process_string ~format ~resolve_aliases ~max_nodes ~max_depth content
+
+
let run format _all resolve_aliases max_nodes max_depth files =
+
let files = if files = [] then ["-"] else files in
+
List.iter (process_file ~format ~resolve_aliases ~max_nodes ~max_depth) files;
+
`Ok ()
+
+
(* Command-line arguments *)
+
+
let format_arg =
+
let doc = "Output format: yaml (default), json, flow, or debug." in
+
let formats = [
+
("yaml", Yaml);
+
("json", Json);
+
("flow", Flow);
+
("debug", Debug);
+
] in
+
Arg.(value & opt (enum formats) Yaml & info ["format"; "f"] ~docv:"FORMAT" ~doc)
+
+
let json_arg =
+
let doc = "Output as JSON (shorthand for --format=json)." in
+
Arg.(value & flag & info ["json"] ~doc)
+
+
let flow_arg =
+
let doc = "Output in flow style (shorthand for --format=flow)." in
+
Arg.(value & flag & info ["flow"] ~doc)
+
+
let debug_arg =
+
let doc = "Output internal representation (shorthand for --format=debug)." in
+
Arg.(value & flag & info ["debug"] ~doc)
+
+
let all_arg =
+
let doc = "Output all documents (for multi-document YAML)." in
+
Arg.(value & flag & info ["all"; "a"] ~doc)
+
+
let no_resolve_aliases_arg =
+
let doc = "Don't resolve aliases (keep them as references)." in
+
Arg.(value & flag & info ["no-resolve-aliases"] ~doc)
+
+
let max_nodes_arg =
+
let doc = "Maximum number of nodes during alias expansion (default: 10000000). \
+
Protection against billion laughs attack." in
+
Arg.(value & opt int Yamlrw.default_max_alias_nodes & info ["max-nodes"] ~docv:"N" ~doc)
+
+
let max_depth_arg =
+
let doc = "Maximum alias nesting depth (default: 100). \
+
Protection against deeply nested alias chains." in
+
Arg.(value & opt int Yamlrw.default_max_alias_depth & info ["max-depth"] ~docv:"N" ~doc)
+
+
let files_arg =
+
let doc = "YAML file(s) to process. Use '-' for stdin." in
+
Arg.(value & pos_all file [] & info [] ~docv:"FILE" ~doc)
+
+
let combined_format format json flow debug =
+
if json then Json
+
else if flow then Flow
+
else if debug then Debug
+
else format
+
+
let term =
+
let combine format json flow debug all no_resolve max_nodes max_depth files =
+
let format = combined_format format json flow debug in
+
let resolve_aliases = not no_resolve in
+
run format all resolve_aliases max_nodes max_depth files
+
in
+
Term.(ret (const combine $ format_arg $ json_arg $ flow_arg $ debug_arg $
+
all_arg $ no_resolve_aliases_arg $ max_nodes_arg $ max_depth_arg $ files_arg))
+
+
let info =
+
let doc = "Parse and reprint YAML files" in
+
let man = [
+
`S Manpage.s_description;
+
`P "$(tname) parses YAML files and reprints them in various formats. \
+
It can be used to validate YAML, convert between styles, or convert to JSON.";
+
`S Manpage.s_examples;
+
`P "Parse and reprint a YAML file:";
+
`Pre " $(tname) config.yaml";
+
`P "Convert YAML to JSON:";
+
`Pre " $(tname) --json config.yaml";
+
`P "Process multi-document YAML:";
+
`Pre " $(tname) --all multi.yaml";
+
`P "Limit alias expansion (protection against malicious YAML):";
+
`Pre " $(tname) --max-nodes 1000 --max-depth 10 untrusted.yaml";
+
`S Manpage.s_bugs;
+
`P "Report bugs at https://github.com/avsm/ocaml-yaml/issues";
+
] in
+
Cmd.info "yamlcat" ~version:"0.1.0" ~doc ~man
+
+
let () = exit (Cmd.eval (Cmd.v info term))
+4
dune
···
+
; Root dune file
+
+
; Ignore third_party directory (for fetched dependency sources)
+
(data_only_dirs third_party)
+52
dune-project
···
+
(lang dune 3.18)
+
(name yamlrw)
+
+
(generate_opam_files true)
+
+
(license ISC)
+
(authors "Anil Madhavapeddy")
+
(homepage "https://tangled.org/@anil.recoil.org/ocaml-yamlrw")
+
(maintainers "Anil Madhavapeddy <anil@recoil.org>")
+
(bug_reports "https://tangled.org/@anil.recoil.org/ocaml-yamlrw/issues")
+
(maintenance_intent "(latest)")
+
+
(package
+
(name yamlrw)
+
(synopsis "Pure OCaml YAML 1.2 parser and emitter")
+
(description "\
+
Yamlrw is a pure OCaml implementation of YAML 1.2 parsing and emission. \
+
It provides both a high-level JSON-compatible interface for simple data interchange \
+
and a lower-level streaming API for fine-grained control over parsing and emission. \
+
The library works on all OCaml platforms without C dependencies.")
+
(depends
+
(ocaml (>= 4.14.0))
+
bytesrw
+
cmdliner
+
(odoc :with-doc)
+
(alcotest :with-test)))
+
+
(package
+
(name yamlrw-unix)
+
(synopsis "Unix I/O for Yamlrw")
+
(description "\
+
Unix file and channel operations for Yamlrw. \
+
Provides convenient functions for reading and writing YAML files using Unix I/O.")
+
(depends
+
(ocaml (>= 4.14.0))
+
yamlrw
+
bytesrw
+
(odoc :with-doc)))
+
+
(package
+
(name yamlrw-eio)
+
(synopsis "Eio support for Yamlrw")
+
(description "\
+
Eio-based streaming I/O for Yamlrw. \
+
Provides efficient async YAML parsing and emission using the Eio effects-based concurrency library. \
+
Requires OCaml 5.0 or later.")
+
(depends
+
(ocaml (>= 5.0.0))
+
yamlrw
+
bytesrw-eio
+
(eio (>= 1.1))
+
(odoc :with-doc)))
+46
lib/char_class.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Character classification for YAML parsing *)
+
+
(** Line break characters *)
+
let is_break c = c = '\n' || c = '\r'
+
+
(** Blank (space or tab) *)
+
let is_blank c = c = ' ' || c = '\t'
+
+
(** Whitespace (break or blank) *)
+
let is_whitespace c = is_break c || is_blank c
+
+
(** Decimal digit *)
+
let is_digit c = c >= '0' && c <= '9'
+
+
(** Hexadecimal digit *)
+
let is_hex c =
+
(c >= '0' && c <= '9') ||
+
(c >= 'a' && c <= 'f') ||
+
(c >= 'A' && c <= 'F')
+
+
(** Alphabetic character *)
+
let is_alpha c =
+
(c >= 'a' && c <= 'z') ||
+
(c >= 'A' && c <= 'Z')
+
+
(** Alphanumeric character *)
+
let is_alnum c = is_alpha c || is_digit c
+
+
(** YAML indicator characters *)
+
let is_indicator c =
+
match c with
+
| '-' | '?' | ':' | ',' | '[' | ']' | '{' | '}'
+
| '#' | '&' | '*' | '!' | '|' | '>' | '\'' | '"'
+
| '%' | '@' | '`' -> true
+
| _ -> false
+
+
(** Flow context indicator characters *)
+
let is_flow_indicator c =
+
match c with
+
| ',' | '[' | ']' | '{' | '}' -> true
+
| _ -> false
+31
lib/chomping.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Block scalar chomping indicators *)
+
+
type t =
+
| Strip (** Remove final line break and trailing empty lines *)
+
| Clip (** Keep final line break, remove trailing empty lines (default) *)
+
| Keep (** Keep final line break and trailing empty lines *)
+
+
let to_string = function
+
| Strip -> "strip"
+
| Clip -> "clip"
+
| Keep -> "keep"
+
+
let pp fmt t =
+
Format.pp_print_string fmt (to_string t)
+
+
let of_char = function
+
| '-' -> Some Strip
+
| '+' -> Some Keep
+
| _ -> None
+
+
let to_char = function
+
| Strip -> Some '-'
+
| Clip -> None
+
| Keep -> Some '+'
+
+
let equal a b = a = b
+59
lib/document.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML document with directives and content *)
+
+
type t = {
+
version : (int * int) option;
+
tags : (string * string) list;
+
root : Yaml.t option;
+
implicit_start : bool;
+
implicit_end : bool;
+
}
+
+
let make
+
?(version : (int * int) option)
+
?(tags : (string * string) list = [])
+
?(implicit_start = true)
+
?(implicit_end = true)
+
root =
+
{ version; tags; root; implicit_start; implicit_end }
+
+
let version t = t.version
+
let tags t = t.tags
+
let root t = t.root
+
let implicit_start t = t.implicit_start
+
let implicit_end t = t.implicit_end
+
+
let with_version version t = { t with version = Some version }
+
let with_tags tags t = { t with tags }
+
let with_root root t = { t with root = Some root }
+
+
let pp fmt t =
+
Format.fprintf fmt "@[<v 2>document(@,";
+
(match t.version with
+
| Some (maj, min) -> Format.fprintf fmt "version=%d.%d,@ " maj min
+
| None -> ());
+
if t.tags <> [] then begin
+
Format.fprintf fmt "tags=[";
+
List.iteri (fun i (h, p) ->
+
if i > 0 then Format.fprintf fmt ", ";
+
Format.fprintf fmt "%s -> %s" h p
+
) t.tags;
+
Format.fprintf fmt "],@ "
+
end;
+
Format.fprintf fmt "implicit_start=%b,@ " t.implicit_start;
+
Format.fprintf fmt "implicit_end=%b,@ " t.implicit_end;
+
(match t.root with
+
| Some root -> Format.fprintf fmt "root=%a" Yaml.pp root
+
| None -> Format.fprintf fmt "root=<empty>");
+
Format.fprintf fmt "@]@,)"
+
+
let equal a b =
+
Option.equal (fun (a1, a2) (b1, b2) -> a1 = b1 && a2 = b2) a.version b.version &&
+
List.equal (fun (h1, p1) (h2, p2) -> h1 = h2 && p1 = p2) a.tags b.tags &&
+
Option.equal Yaml.equal a.root b.root &&
+
a.implicit_start = b.implicit_start &&
+
a.implicit_end = b.implicit_end
+35
lib/dune
···
+
(library
+
(name yamlrw)
+
(public_name yamlrw)
+
(libraries bytesrw)
+
(modules
+
; Core types
+
position
+
span
+
error
+
encoding
+
scalar_style
+
layout_style
+
chomping
+
char_class
+
quoting
+
; Streaming
+
input
+
token
+
scanner
+
event
+
parser
+
emitter
+
; Value types
+
tag
+
value
+
scalar
+
sequence
+
mapping
+
yaml
+
document
+
; Conversion
+
loader
+
serialize
+
; Main module
+
yamlrw))
+4
lib/eio/dune
···
+
(library
+
(name yamlrw_eio)
+
(public_name yamlrw-eio)
+
(libraries yamlrw bytesrw-eio eio))
+268
lib/eio/yamlrw_eio.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Yamlrw Eio - Streaming YAML parsing and emitting with Eio
+
+
This module provides Eio-compatible streaming YAML parsing and emitting.
+
It uses bytesrw adapters to convert Eio sources/sinks to the standard
+
YAML scanner/parser/emitter, eliminating code duplication. *)
+
+
open Yamlrw
+
+
(** {1 Types} *)
+
+
type value = Value.t
+
type yaml = Yaml.t
+
type document = Document.t
+
type event = Event.t
+
+
(** {1 Reading from Eio Sources} *)
+
+
module Read = struct
+
(** Parse YAML from Eio sources/flows *)
+
+
(** Create a scanner from an Eio flow *)
+
let scanner_of_flow flow =
+
let reader = Bytesrw_eio.bytes_reader_of_flow flow in
+
let input = Input.of_reader reader in
+
Scanner.of_input input
+
+
(** Create a parser from an Eio flow *)
+
let parser_of_flow flow =
+
Parser.of_scanner (scanner_of_flow flow)
+
+
(** Parse a JSON-compatible value from an Eio flow.
+
+
@param resolve_aliases Whether to expand aliases (default: true)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100) *)
+
let value
+
?(resolve_aliases = true)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
flow =
+
let parser = parser_of_flow flow in
+
Loader.value_of_parser
+
~resolve_aliases ~max_nodes ~max_depth
+
(fun () -> Parser.next parser)
+
+
(** Parse a full YAML value from an Eio flow.
+
+
By default, aliases are NOT resolved, preserving the document structure.
+
+
@param resolve_aliases Whether to expand aliases (default: false)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100) *)
+
let yaml
+
?(resolve_aliases = false)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
flow =
+
let parser = parser_of_flow flow in
+
Loader.yaml_of_parser
+
~resolve_aliases ~max_nodes ~max_depth
+
(fun () -> Parser.next parser)
+
+
(** Parse multiple YAML documents from an Eio flow. *)
+
let documents flow =
+
let parser = parser_of_flow flow in
+
Loader.documents_of_parser (fun () -> Parser.next parser)
+
+
(** {2 Event-Based Streaming} *)
+
+
(** A streaming event reader backed by a flow *)
+
type event_reader = {
+
parser : Parser.t;
+
}
+
+
(** Create an event reader from an Eio flow.
+
This reads data incrementally as events are requested. *)
+
let event_reader flow =
+
{ parser = parser_of_flow flow }
+
+
(** Get the next event from an event reader.
+
Returns [None] when parsing is complete. *)
+
let next_event reader =
+
Parser.next reader.parser
+
+
(** Iterate over all events from a flow.
+
+
@param f Called with each event and its source span *)
+
let iter_events f flow =
+
let parser = parser_of_flow flow in
+
Parser.iter (fun ev -> f ev.event ev.span) parser
+
+
(** Fold over all events from a flow. *)
+
let fold_events f init flow =
+
let parser = parser_of_flow flow in
+
let rec loop acc =
+
match Parser.next parser with
+
| Some ev -> loop (f acc ev.event)
+
| None -> acc
+
in
+
loop init
+
+
(** Iterate over documents from a flow, calling [f] for each document. *)
+
let iter_documents f flow =
+
let parser = parser_of_flow flow in
+
Loader.iter_documents_parser f (fun () -> Parser.next parser)
+
+
(** Fold over documents from a flow. *)
+
let fold_documents f init flow =
+
let parser = parser_of_flow flow in
+
Loader.fold_documents_parser f init (fun () -> Parser.next parser)
+
end
+
+
(** {1 Writing to Eio Sinks} *)
+
+
module Write = struct
+
(** Emit YAML to Eio sinks/flows using true streaming output. *)
+
+
(** Write a JSON-compatible value to an Eio flow.
+
+
Uses the emitter's native Writer support for streaming output.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any) *)
+
let value
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
flow
+
(v : value) =
+
let config = { Emitter.default_config with encoding; scalar_style; layout_style } in
+
let writer = Bytesrw_eio.bytes_writer_of_flow flow in
+
Serialize.value_to_writer ~config writer v
+
+
(** Write a full YAML value to an Eio flow.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any) *)
+
let yaml
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
flow
+
(v : yaml) =
+
let config = { Emitter.default_config with encoding; scalar_style; layout_style } in
+
let writer = Bytesrw_eio.bytes_writer_of_flow flow in
+
Serialize.yaml_to_writer ~config writer v
+
+
(** Write multiple YAML documents to an Eio flow.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any)
+
@param resolve_aliases Whether to expand aliases (default: true) *)
+
let documents
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
?(resolve_aliases = true)
+
flow
+
docs =
+
let config = { Emitter.default_config with encoding; scalar_style; layout_style } in
+
let writer = Bytesrw_eio.bytes_writer_of_flow flow in
+
Serialize.documents_to_writer ~config ~resolve_aliases writer docs
+
+
(** {2 Event-Based Streaming} *)
+
+
(** A streaming event writer that writes directly to a flow *)
+
type event_writer = {
+
emitter : Emitter.t;
+
}
+
+
(** Create an event writer that writes directly to a flow.
+
Events are written incrementally as they are emitted.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any) *)
+
let event_writer
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
flow =
+
let config = { Emitter.default_config with encoding; scalar_style; layout_style } in
+
let writer = Bytesrw_eio.bytes_writer_of_flow flow in
+
{ emitter = Emitter.of_writer ~config writer }
+
+
(** Emit a single event to the writer. *)
+
let emit ew ev =
+
Emitter.emit ew.emitter ev
+
+
(** Flush the writer by sending end-of-data. *)
+
let flush ew =
+
Emitter.flush ew.emitter
+
+
(** Emit events from a list to a flow. *)
+
let emit_all flow events =
+
let ew = event_writer flow in
+
List.iter (emit ew) events;
+
flush ew
+
end
+
+
(** {1 Convenience Functions} *)
+
+
(** Read a value from a file path *)
+
let of_file
+
?(resolve_aliases = true)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
~fs
+
path =
+
Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow ->
+
Read.value ~resolve_aliases ~max_nodes ~max_depth flow
+
+
(** Read full YAML from a file path *)
+
let yaml_of_file
+
?(resolve_aliases = false)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
~fs
+
path =
+
Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow ->
+
Read.yaml ~resolve_aliases ~max_nodes ~max_depth flow
+
+
(** Read documents from a file path *)
+
let documents_of_file ~fs path =
+
Eio.Path.with_open_in Eio.Path.(fs / path) @@ fun flow ->
+
Read.documents flow
+
+
(** Write a value to a file path *)
+
let to_file
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
~fs
+
path
+
v =
+
Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) @@ fun flow ->
+
Write.value ~encoding ~scalar_style ~layout_style flow v
+
+
(** Write full YAML to a file path *)
+
let yaml_to_file
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
~fs
+
path
+
v =
+
Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) @@ fun flow ->
+
Write.yaml ~encoding ~scalar_style ~layout_style flow v
+
+
(** Write documents to a file path *)
+
let documents_to_file
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
?(resolve_aliases = true)
+
~fs
+
path
+
docs =
+
Eio.Path.with_open_out ~create:(`Or_truncate 0o644) Eio.Path.(fs / path) @@ fun flow ->
+
Write.documents ~encoding ~scalar_style ~layout_style ~resolve_aliases flow docs
+265
lib/eio/yamlrw_eio.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Yamlrw Eio - Streaming YAML parsing and emitting with Eio
+
+
This library provides Eio-based streaming support for YAML parsing
+
and emitting. It uses bytesrw adapters that read/write directly to
+
Eio flows, with bytesrw handling internal buffering.
+
+
{2 Quick Start}
+
+
Read YAML from a file:
+
{[
+
Eio_main.run @@ fun env ->
+
let fs = Eio.Stdenv.fs env in
+
let value = Yaml_eio.of_file ~fs "config.yaml" in
+
...
+
]}
+
+
Write YAML to a flow:
+
{[
+
Eio_main.run @@ fun env ->
+
let fs = Eio.Stdenv.fs env in
+
Eio.Path.with_open_out Eio.Path.(fs / "output.yaml") @@ fun flow ->
+
Yaml_eio.Write.value flow (`O [("name", `String "test")])
+
]}
+
+
Stream events incrementally:
+
{[
+
Eio_main.run @@ fun env ->
+
let fs = Eio.Stdenv.fs env in
+
Eio.Path.with_open_in Eio.Path.(fs / "data.yaml") @@ fun flow ->
+
Yaml_eio.Read.iter_events (fun event span ->
+
Format.printf "Event at %a@." Yamlrw.Span.pp span
+
) flow
+
]}
+
+
{2 Streaming Architecture}
+
+
This library uses bytesrw for direct I/O with Eio flows:
+
+
- {b Reading}: Data is read directly from the flow as the
+
parser requests it. Bytesrw handles internal buffering.
+
+
- {b Writing}: Output is written directly to the flow.
+
Bytesrw handles chunking and buffering. *)
+
+
(** {1 Types} *)
+
+
type value = Yamlrw.Value.t
+
(** JSON-compatible YAML value *)
+
+
type yaml = Yamlrw.Yaml.t
+
(** Full YAML value with metadata *)
+
+
type document = Yamlrw.Document.t
+
(** YAML document with directives *)
+
+
type event = Yamlrw.Event.t
+
(** Parser/emitter event *)
+
+
(** {1 Reading from Eio Sources} *)
+
+
module Read : sig
+
(** Parse YAML from Eio flows.
+
+
All functions read data incrementally from the underlying flow,
+
without loading the entire file into memory first. *)
+
+
(** {2 High-Level Parsing} *)
+
+
val value :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
_ Eio.Flow.source -> value
+
(** Parse a JSON-compatible value from an Eio flow.
+
+
@param resolve_aliases Whether to expand aliases (default: true)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100) *)
+
+
val yaml :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
_ Eio.Flow.source -> yaml
+
(** Parse a full YAML value from an Eio flow.
+
+
By default, aliases are NOT resolved, preserving the document structure.
+
+
@param resolve_aliases Whether to expand aliases (default: false)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100) *)
+
+
val documents : _ Eio.Flow.source -> document list
+
(** Parse multiple YAML documents from an Eio flow. *)
+
+
(** {2 Event-Based Streaming} *)
+
+
type event_reader
+
(** A streaming event reader backed by a flow.
+
Events are parsed incrementally as requested. *)
+
+
val event_reader : _ Eio.Flow.source -> event_reader
+
(** Create an event reader from an Eio flow. *)
+
+
val next_event : event_reader -> Yamlrw.Event.spanned option
+
(** Get the next event from an event reader.
+
Returns [None] when parsing is complete. *)
+
+
val iter_events :
+
(event -> Yamlrw.Span.t -> unit) ->
+
_ Eio.Flow.source -> unit
+
(** Iterate over all events from a flow. *)
+
+
val fold_events :
+
('a -> event -> 'a) -> 'a ->
+
_ Eio.Flow.source -> 'a
+
(** Fold over all events from a flow. *)
+
+
val iter_documents :
+
(document -> unit) ->
+
_ Eio.Flow.source -> unit
+
(** Iterate over documents from a flow, calling [f] for each document. *)
+
+
val fold_documents :
+
('a -> document -> 'a) -> 'a ->
+
_ Eio.Flow.source -> 'a
+
(** Fold over documents from a flow. *)
+
end
+
+
(** {1 Writing to Eio Sinks} *)
+
+
module Write : sig
+
(** Emit YAML to Eio flows.
+
+
All functions write data directly to the underlying flow. *)
+
+
(** {2 High-Level Emission} *)
+
+
val value :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
_ Eio.Flow.sink -> value -> unit
+
(** Write a JSON-compatible value to an Eio flow.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any) *)
+
+
val yaml :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
_ Eio.Flow.sink -> yaml -> unit
+
(** Write a full YAML value to an Eio flow.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any) *)
+
+
val documents :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
?resolve_aliases:bool ->
+
_ Eio.Flow.sink -> document list -> unit
+
(** Write multiple YAML documents to an Eio flow.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any)
+
@param resolve_aliases Whether to expand aliases (default: true) *)
+
+
(** {2 Event-Based Streaming} *)
+
+
type event_writer
+
(** A streaming event writer backed by a flow.
+
Events are written incrementally to the underlying flow. *)
+
+
val event_writer :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
_ Eio.Flow.sink -> event_writer
+
(** Create an event writer that writes directly to a flow.
+
Events are written incrementally as they are emitted.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any) *)
+
+
val emit : event_writer -> event -> unit
+
(** Emit a single event to the writer. *)
+
+
val flush : event_writer -> unit
+
(** Flush the writer by sending end-of-data. *)
+
+
val emit_all : _ Eio.Flow.sink -> event list -> unit
+
(** Emit events from a list to a flow. *)
+
end
+
+
(** {1 Convenience Functions} *)
+
+
val of_file :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
fs:_ Eio.Path.t ->
+
string -> value
+
(** Read a value from a file path.
+
+
@param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
+
+
val yaml_of_file :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
fs:_ Eio.Path.t ->
+
string -> yaml
+
(** Read full YAML from a file path.
+
+
@param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
+
+
val documents_of_file :
+
fs:_ Eio.Path.t ->
+
string -> document list
+
(** Read documents from a file path.
+
+
@param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
+
+
val to_file :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
fs:_ Eio.Path.t ->
+
string -> value -> unit
+
(** Write a value to a file path.
+
+
@param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
+
+
val yaml_to_file :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
fs:_ Eio.Path.t ->
+
string -> yaml -> unit
+
(** Write full YAML to a file path.
+
+
@param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
+
+
val documents_to_file :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
?resolve_aliases:bool ->
+
fs:_ Eio.Path.t ->
+
string -> document list -> unit
+
(** Write documents to a file path.
+
+
@param fs The filesystem path (e.g., [Eio.Stdenv.fs env] or [Eio.Stdenv.cwd env]) *)
+581
lib/emitter.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Emitter - converts YAML data structures to string output
+
+
The emitter can write to either a Buffer (default) or directly to a
+
bytesrw Bytes.Writer for streaming output. *)
+
+
type config = {
+
encoding : Encoding.t;
+
scalar_style : Scalar_style.t;
+
layout_style : Layout_style.t;
+
indent : int;
+
width : int;
+
canonical : bool;
+
}
+
+
let default_config = {
+
encoding = `Utf8;
+
scalar_style = `Any;
+
layout_style = `Any;
+
indent = 2;
+
width = 80;
+
canonical = false;
+
}
+
+
type state =
+
| Initial
+
| Stream_started
+
| Document_started
+
| In_block_sequence of int (* indent level *)
+
| In_block_mapping_key of int
+
| In_block_mapping_value of int
+
| In_block_mapping_first_key of int (* first key after "- ", no indent needed *)
+
| In_flow_sequence
+
| In_flow_mapping_key
+
| In_flow_mapping_value
+
| Document_ended
+
| Stream_ended
+
+
(** Output sink - either a Buffer or a bytesrw Writer *)
+
type sink =
+
| Buffer_sink of Buffer.t
+
| Writer_sink of Bytesrw.Bytes.Writer.t
+
+
type t = {
+
config : config;
+
sink : sink;
+
mutable state : state;
+
mutable states : state list;
+
mutable indent : int;
+
mutable flow_level : int;
+
mutable need_separator : bool;
+
}
+
+
let create ?(config = default_config) () = {
+
config;
+
sink = Buffer_sink (Buffer.create 1024);
+
state = Initial;
+
states = [];
+
indent = 0;
+
flow_level = 0;
+
need_separator = false;
+
}
+
+
(** Create an emitter that writes directly to a Bytes.Writer *)
+
let of_writer ?(config = default_config) writer = {
+
config;
+
sink = Writer_sink writer;
+
state = Initial;
+
states = [];
+
indent = 0;
+
flow_level = 0;
+
need_separator = false;
+
}
+
+
let contents t =
+
match t.sink with
+
| Buffer_sink buf -> Buffer.contents buf
+
| Writer_sink _ -> "" (* No accumulated content for writer sink *)
+
+
let reset t =
+
(match t.sink with
+
| Buffer_sink buf -> Buffer.clear buf
+
| Writer_sink _ -> ());
+
t.state <- Initial;
+
t.states <- [];
+
t.indent <- 0;
+
t.flow_level <- 0;
+
t.need_separator <- false
+
+
(** Output helpers - write to appropriate sink *)
+
+
let write t s =
+
match t.sink with
+
| Buffer_sink buf -> Buffer.add_string buf s
+
| Writer_sink w -> Bytesrw.Bytes.Writer.write_string w s
+
+
let write_char t c =
+
match t.sink with
+
| Buffer_sink buf -> Buffer.add_char buf c
+
| Writer_sink w ->
+
let b = Bytes.make 1 c in
+
Bytesrw.Bytes.Writer.write_bytes w b
+
+
let write_indent t =
+
if t.indent <= 8 then
+
for _ = 1 to t.indent do write_char t ' ' done
+
else
+
write t (String.make t.indent ' ')
+
+
let write_newline t =
+
write_char t '\n'
+
+
let push_state t s =
+
t.states <- t.state :: t.states;
+
t.state <- s
+
+
let pop_state t =
+
match t.states with
+
| s :: rest ->
+
t.state <- s;
+
t.states <- rest
+
| [] ->
+
t.state <- Stream_ended
+
+
(** Escape a string for double-quoted output.
+
Uses a buffer to batch writes instead of character-by-character. *)
+
let escape_double_quoted value =
+
let len = String.length value in
+
(* Check if any escaping is needed *)
+
let needs_escape = ref false in
+
for i = 0 to len - 1 do
+
match value.[i] with
+
| '"' | '\\' | '\n' | '\r' | '\t' -> needs_escape := true
+
| c when c < ' ' -> needs_escape := true
+
| _ -> ()
+
done;
+
if not !needs_escape then value
+
else begin
+
let buf = Buffer.create (len + len / 4) in
+
for i = 0 to len - 1 do
+
match value.[i] with
+
| '"' -> Buffer.add_string buf "\\\""
+
| '\\' -> Buffer.add_string buf "\\\\"
+
| '\n' -> Buffer.add_string buf "\\n"
+
| '\r' -> Buffer.add_string buf "\\r"
+
| '\t' -> Buffer.add_string buf "\\t"
+
| c when c < ' ' -> Buffer.add_string buf (Printf.sprintf "\\x%02x" (Char.code c))
+
| c -> Buffer.add_char buf c
+
done;
+
Buffer.contents buf
+
end
+
+
(** Escape a string for single-quoted output. *)
+
let escape_single_quoted value =
+
if not (String.contains value '\'') then value
+
else begin
+
let len = String.length value in
+
let buf = Buffer.create (len + len / 8) in
+
for i = 0 to len - 1 do
+
let c = value.[i] in
+
if c = '\'' then Buffer.add_string buf "''"
+
else Buffer.add_char buf c
+
done;
+
Buffer.contents buf
+
end
+
+
(** Write scalar with appropriate quoting *)
+
let write_scalar t ?(style = `Any) value =
+
match (match style with `Any -> Quoting.choose_style value | s -> s) with
+
| `Plain | `Any ->
+
write t value
+
| `Single_quoted ->
+
write_char t '\'';
+
write t (escape_single_quoted value);
+
write_char t '\''
+
| `Double_quoted ->
+
write_char t '"';
+
write t (escape_double_quoted value);
+
write_char t '"'
+
| `Literal ->
+
write t "|";
+
write_newline t;
+
String.split_on_char '\n' value |> List.iter (fun line ->
+
write_indent t;
+
write t line;
+
write_newline t
+
)
+
| `Folded ->
+
write t ">";
+
write_newline t;
+
String.split_on_char '\n' value |> List.iter (fun line ->
+
write_indent t;
+
write t line;
+
write_newline t
+
)
+
+
(** Write anchor if present *)
+
let write_anchor t anchor =
+
match anchor with
+
| Some name ->
+
write_char t '&';
+
write t name;
+
write_char t ' '
+
| None -> ()
+
+
(** Write tag if present and not implicit *)
+
let write_tag t ~implicit tag =
+
if not implicit then
+
match tag with
+
| Some tag_str ->
+
write_char t '!';
+
write t tag_str;
+
write_char t ' '
+
| None -> ()
+
+
(** Emit events *)
+
+
let emit t (ev : Event.t) =
+
match ev with
+
| Event.Stream_start _ ->
+
t.state <- Stream_started
+
+
| Event.Stream_end ->
+
t.state <- Stream_ended
+
+
| Event.Document_start { version; implicit } ->
+
if not implicit then begin
+
(match version with
+
| Some (maj, min) ->
+
write t (Printf.sprintf "%%YAML %d.%d\n" maj min)
+
| None -> ());
+
write t "---";
+
write_newline t
+
end;
+
t.state <- Document_started
+
+
| Event.Document_end { implicit } ->
+
if not implicit then begin
+
write t "...";
+
write_newline t
+
end;
+
t.state <- Document_ended
+
+
| Event.Alias { anchor } ->
+
if t.flow_level > 0 then begin
+
if t.need_separator then write t ", ";
+
t.need_separator <- true;
+
write_char t '*';
+
write t anchor
+
end else begin
+
(match t.state with
+
| In_block_sequence _ ->
+
write_indent t;
+
write t "- *";
+
write t anchor;
+
write_newline t
+
| In_block_mapping_key _ ->
+
write_indent t;
+
write_char t '*';
+
write t anchor;
+
write t ": ";
+
t.state <- In_block_mapping_value t.indent
+
| In_block_mapping_value indent ->
+
write_char t '*';
+
write t anchor;
+
write_newline t;
+
t.state <- In_block_mapping_key indent
+
| _ ->
+
write_char t '*';
+
write t anchor;
+
write_newline t)
+
end
+
+
| Event.Scalar { anchor; tag; value; plain_implicit; style; _ } ->
+
if t.flow_level > 0 then begin
+
(match t.state with
+
| In_flow_mapping_key ->
+
if t.need_separator then write t ", ";
+
write_anchor t anchor;
+
write_tag t ~implicit:plain_implicit tag;
+
write_scalar t ~style value;
+
write t ": ";
+
t.need_separator <- false;
+
t.state <- In_flow_mapping_value
+
| In_flow_mapping_value ->
+
write_anchor t anchor;
+
write_tag t ~implicit:plain_implicit tag;
+
write_scalar t ~style value;
+
t.need_separator <- true;
+
t.state <- In_flow_mapping_key
+
| _ ->
+
if t.need_separator then write t ", ";
+
t.need_separator <- true;
+
write_anchor t anchor;
+
write_tag t ~implicit:plain_implicit tag;
+
write_scalar t ~style value)
+
end else begin
+
match t.state with
+
| In_block_sequence _ ->
+
write_indent t;
+
write t "- ";
+
write_anchor t anchor;
+
write_tag t ~implicit:plain_implicit tag;
+
write_scalar t ~style value;
+
write_newline t
+
| In_block_mapping_key indent ->
+
write_indent t;
+
write_anchor t anchor;
+
write_tag t ~implicit:plain_implicit tag;
+
write_scalar t ~style value;
+
write_char t ':';
+
t.state <- In_block_mapping_value indent
+
| In_block_mapping_first_key indent ->
+
(* First key after "- ", no indent needed *)
+
write_anchor t anchor;
+
write_tag t ~implicit:plain_implicit tag;
+
write_scalar t ~style value;
+
write_char t ':';
+
t.state <- In_block_mapping_value indent
+
| In_block_mapping_value indent ->
+
write_char t ' ';
+
write_anchor t anchor;
+
write_tag t ~implicit:plain_implicit tag;
+
write_scalar t ~style value;
+
write_newline t;
+
t.state <- In_block_mapping_key indent
+
| _ ->
+
write_anchor t anchor;
+
write_tag t ~implicit:plain_implicit tag;
+
write_scalar t ~style value;
+
write_newline t
+
end
+
+
| Event.Sequence_start { anchor; tag; implicit; style } ->
+
let use_flow = style = `Flow || t.flow_level > 0 in
+
if t.flow_level > 0 then begin
+
(match t.state with
+
| In_flow_mapping_key ->
+
if t.need_separator then write t ", ";
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
write_char t '[';
+
t.flow_level <- t.flow_level + 1;
+
t.need_separator <- false;
+
push_state t In_flow_mapping_value; (* After ] we'll be in value position but sequence handles it *)
+
t.state <- In_flow_sequence
+
| In_flow_mapping_value ->
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
write_char t '[';
+
t.flow_level <- t.flow_level + 1;
+
t.need_separator <- false;
+
push_state t In_flow_mapping_key;
+
t.state <- In_flow_sequence
+
| _ ->
+
if t.need_separator then write t ", ";
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
write_char t '[';
+
t.flow_level <- t.flow_level + 1;
+
t.need_separator <- false;
+
push_state t In_flow_sequence)
+
end else begin
+
match t.state with
+
| In_block_sequence _ ->
+
write_indent t;
+
write t "- ";
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
if use_flow then begin
+
write_char t '[';
+
t.flow_level <- t.flow_level + 1;
+
t.need_separator <- false;
+
push_state t In_flow_sequence
+
end else begin
+
write_newline t;
+
push_state t (In_block_sequence t.indent);
+
t.indent <- t.indent + t.config.indent
+
end
+
| In_block_mapping_key indent ->
+
write_indent t;
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
write t ":";
+
write_newline t;
+
push_state t (In_block_mapping_key indent);
+
t.indent <- t.indent + t.config.indent;
+
t.state <- In_block_sequence t.indent
+
| In_block_mapping_first_key indent ->
+
(* First key after "- " with sequence value - no indent *)
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
write t ":";
+
write_newline t;
+
push_state t (In_block_mapping_key indent);
+
t.indent <- t.indent + t.config.indent;
+
t.state <- In_block_sequence t.indent
+
| In_block_mapping_value indent ->
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
if use_flow then begin
+
write_char t ' ';
+
write_char t '[';
+
t.flow_level <- t.flow_level + 1;
+
t.need_separator <- false;
+
(* Save key state to return to after flow sequence *)
+
t.state <- In_block_mapping_key indent;
+
push_state t In_flow_sequence
+
end else begin
+
write_newline t;
+
(* Save key state to return to after nested sequence *)
+
t.state <- In_block_mapping_key indent;
+
push_state t (In_block_sequence (t.indent + t.config.indent));
+
t.indent <- t.indent + t.config.indent
+
end
+
| _ ->
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
if use_flow then begin
+
write_char t '[';
+
t.flow_level <- t.flow_level + 1;
+
t.need_separator <- false;
+
push_state t In_flow_sequence
+
end else begin
+
push_state t (In_block_sequence t.indent);
+
t.state <- In_block_sequence t.indent
+
end
+
end
+
+
| Event.Sequence_end ->
+
if t.flow_level > 0 then begin
+
write_char t ']';
+
t.flow_level <- t.flow_level - 1;
+
t.need_separator <- true;
+
pop_state t;
+
(* Write newline if returning to block context *)
+
(match t.state with
+
| In_block_mapping_key _ | In_block_sequence _ -> write_newline t
+
| _ -> ())
+
end else begin
+
t.indent <- t.indent - t.config.indent;
+
pop_state t
+
end
+
+
| Event.Mapping_start { anchor; tag; implicit; style } ->
+
let use_flow = style = `Flow || t.flow_level > 0 in
+
if t.flow_level > 0 then begin
+
(match t.state with
+
| In_flow_mapping_key ->
+
if t.need_separator then write t ", ";
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
write_char t '{';
+
t.flow_level <- t.flow_level + 1;
+
t.need_separator <- false;
+
push_state t In_flow_mapping_value;
+
t.state <- In_flow_mapping_key
+
| In_flow_mapping_value ->
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
write_char t '{';
+
t.flow_level <- t.flow_level + 1;
+
t.need_separator <- false;
+
push_state t In_flow_mapping_key;
+
t.state <- In_flow_mapping_key
+
| _ ->
+
if t.need_separator then write t ", ";
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
write_char t '{';
+
t.flow_level <- t.flow_level + 1;
+
t.need_separator <- false;
+
push_state t In_flow_mapping_key)
+
end else begin
+
match t.state with
+
| In_block_sequence _ ->
+
write_indent t;
+
write t "- ";
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
if use_flow then begin
+
write_char t '{';
+
t.flow_level <- t.flow_level + 1;
+
t.need_separator <- false;
+
push_state t In_flow_mapping_key
+
end else begin
+
(* Don't write newline - first key goes on same line as "- " *)
+
push_state t (In_block_sequence t.indent);
+
t.indent <- t.indent + t.config.indent;
+
t.state <- In_block_mapping_first_key t.indent
+
end
+
| In_block_mapping_key indent ->
+
write_indent t;
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
write t ":";
+
write_newline t;
+
push_state t (In_block_mapping_key indent);
+
t.indent <- t.indent + t.config.indent;
+
t.state <- In_block_mapping_key t.indent
+
| In_block_mapping_first_key indent ->
+
(* First key after "- " with mapping value - no indent *)
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
write t ":";
+
write_newline t;
+
push_state t (In_block_mapping_key indent);
+
t.indent <- t.indent + t.config.indent;
+
t.state <- In_block_mapping_key t.indent
+
| In_block_mapping_value indent ->
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
if use_flow then begin
+
write_char t ' ';
+
write_char t '{';
+
t.flow_level <- t.flow_level + 1;
+
t.need_separator <- false;
+
(* Save key state to return to after flow mapping *)
+
t.state <- In_block_mapping_key indent;
+
push_state t In_flow_mapping_key
+
end else begin
+
write_newline t;
+
(* Save key state to return to after nested mapping *)
+
t.state <- In_block_mapping_key indent;
+
push_state t (In_block_mapping_key (t.indent + t.config.indent));
+
t.indent <- t.indent + t.config.indent
+
end
+
| _ ->
+
write_anchor t anchor;
+
write_tag t ~implicit tag;
+
if use_flow then begin
+
write_char t '{';
+
t.flow_level <- t.flow_level + 1;
+
t.need_separator <- false;
+
push_state t In_flow_mapping_key
+
end else begin
+
push_state t (In_block_mapping_key t.indent);
+
t.state <- In_block_mapping_key t.indent
+
end
+
end
+
+
| Event.Mapping_end ->
+
if t.flow_level > 0 then begin
+
write_char t '}';
+
t.flow_level <- t.flow_level - 1;
+
t.need_separator <- true;
+
pop_state t;
+
(* Write newline if returning to block context *)
+
(match t.state with
+
| In_block_mapping_key _ | In_block_sequence _ -> write_newline t
+
| _ -> ())
+
end else begin
+
t.indent <- t.indent - t.config.indent;
+
pop_state t
+
end
+
+
(** Access to the underlying buffer for advanced use.
+
Returns None if emitter is writing to a Writer instead of Buffer. *)
+
let buffer t =
+
match t.sink with
+
| Buffer_sink buf -> Some buf
+
| Writer_sink _ -> None
+
+
(** Get config *)
+
let config t = t.config
+
+
(** Check if emitter is writing to a Writer *)
+
let is_streaming t =
+
match t.sink with
+
| Writer_sink _ -> true
+
| Buffer_sink _ -> false
+
+
(** Flush the writer sink (no-op for buffer sink) *)
+
let flush t =
+
match t.sink with
+
| Writer_sink w -> Bytesrw.Bytes.Writer.write_eod w
+
| Buffer_sink _ -> ()
+50
lib/encoding.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Character encoding detection and handling *)
+
+
type t = [
+
| `Utf8
+
| `Utf16be
+
| `Utf16le
+
| `Utf32be
+
| `Utf32le
+
]
+
+
let to_string = function
+
| `Utf8 -> "UTF-8"
+
| `Utf16be -> "UTF-16BE"
+
| `Utf16le -> "UTF-16LE"
+
| `Utf32be -> "UTF-32BE"
+
| `Utf32le -> "UTF-32LE"
+
+
let pp fmt t =
+
Format.pp_print_string fmt (to_string t)
+
+
(** Detect encoding from BOM or first bytes.
+
Returns (encoding, bom_length) *)
+
let detect s =
+
let len = String.length s in
+
if len = 0 then (`Utf8, 0)
+
else
+
let b0 = Char.code s.[0] in
+
let b1 = if len > 1 then Char.code s.[1] else 0 in
+
let b2 = if len > 2 then Char.code s.[2] else 0 in
+
let b3 = if len > 3 then Char.code s.[3] else 0 in
+
match (b0, b1, b2, b3) with
+
(* BOM patterns *)
+
| (0xEF, 0xBB, 0xBF, _) -> (`Utf8, 3)
+
| (0xFE, 0xFF, _, _) -> (`Utf16be, 2)
+
| (0xFF, 0xFE, 0x00, 0x00) -> (`Utf32le, 4)
+
| (0xFF, 0xFE, _, _) -> (`Utf16le, 2)
+
| (0x00, 0x00, 0xFE, 0xFF) -> (`Utf32be, 4)
+
(* Content pattern detection (no BOM) *)
+
| (0x00, 0x00, 0x00, b3) when b3 <> 0x00 -> (`Utf32be, 0)
+
| (b0, 0x00, 0x00, 0x00) when b0 <> 0x00 -> (`Utf32le, 0)
+
| (0x00, b1, _, _) when b1 <> 0x00 -> (`Utf16be, 0)
+
| (b0, 0x00, _, _) when b0 <> 0x00 -> (`Utf16le, 0)
+
| _ -> (`Utf8, 0)
+
+
let equal a b = a = b
+372
lib/error.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** {1 Error Handling}
+
+
Comprehensive error reporting for YAML parsing and emission.
+
+
This module provides detailed error types that correspond to various
+
failure modes in YAML processing, as specified in the
+
{{:https://yaml.org/spec/1.2.2/}YAML 1.2.2 specification}.
+
+
Each error includes:
+
- A classification of the error type ({!type:kind})
+
- Optional source location information ({!type:Span.t})
+
- A context stack showing where the error occurred
+
- Optional source text for error display
+
+
See also {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)}
+
for background on the YAML processing model. *)
+
+
(** {2 Error Classification}
+
+
Error kinds are organized by the processing stage where they occur:
+
- Scanner errors: Lexical analysis failures (character-level)
+
- Parser errors: Syntax errors in event stream
+
- Loader errors: Semantic errors during representation construction
+
- Emitter errors: Failures during YAML generation *)
+
type kind =
+
(* Scanner errors - see {{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1} *)
+
| Unexpected_character of char
+
(** Invalid character in input. See
+
{{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 (Character Set)}. *)
+
| Unexpected_eof
+
(** Premature end of input. *)
+
| Invalid_escape_sequence of string
+
(** Invalid escape in double-quoted string. See
+
{{:https://yaml.org/spec/1.2.2/#57-escaped-characters}Section 5.7 (Escaped Characters)}. *)
+
| Invalid_unicode_escape of string
+
(** Invalid Unicode escape sequence (\uXXXX or \UXXXXXXXX). *)
+
| Invalid_hex_escape of string
+
(** Invalid hexadecimal escape sequence (\xXX). *)
+
| Invalid_tag of string
+
(** Malformed tag syntax. See
+
{{:https://yaml.org/spec/1.2.2/#681-node-tags}Section 6.8.1 (Node Tags)}. *)
+
| Invalid_anchor of string
+
(** Malformed anchor name. See
+
{{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
+
| Invalid_alias of string
+
(** Malformed alias reference. See
+
{{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
+
| Invalid_comment
+
(** Comment not properly separated from content. See
+
{{:https://yaml.org/spec/1.2.2/#62-comments}Section 6.2 (Comments)}. *)
+
| Unclosed_single_quote
+
(** Unterminated single-quoted scalar. See
+
{{:https://yaml.org/spec/1.2.2/#72-single-quoted-style}Section 7.2 (Single-Quoted Style)}. *)
+
| Unclosed_double_quote
+
(** Unterminated double-quoted scalar. See
+
{{:https://yaml.org/spec/1.2.2/#73-double-quoted-style}Section 7.3 (Double-Quoted Style)}. *)
+
| Unclosed_flow_sequence
+
(** Missing closing bracket \] for flow sequence. See
+
{{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow Sequences)}. *)
+
| Unclosed_flow_mapping
+
(** Missing closing brace \} for flow mapping. See
+
{{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *)
+
| Invalid_indentation of int * int
+
(** Incorrect indentation level (expected, got). See
+
{{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 (Indentation Spaces)}. *)
+
| Invalid_flow_indentation
+
(** Content in flow collection must be indented. See
+
{{:https://yaml.org/spec/1.2.2/#74-flow-styles}Section 7.4 (Flow Styles)}. *)
+
| Tab_in_indentation
+
(** Tab character used for indentation (only spaces allowed). See
+
{{:https://yaml.org/spec/1.2.2/#61-indentation-spaces}Section 6.1 (Indentation Spaces)}. *)
+
| Invalid_block_scalar_header of string
+
(** Malformed block scalar header (| or >). See
+
{{:https://yaml.org/spec/1.2.2/#81-block-scalar-styles}Section 8.1 (Block Scalar Styles)}. *)
+
| Invalid_quoted_scalar_indentation of string
+
(** Incorrect indentation in quoted scalar. *)
+
| Invalid_directive of string
+
(** Malformed directive. See
+
{{:https://yaml.org/spec/1.2.2/#68-directives}Section 6.8 (Directives)}. *)
+
| Invalid_yaml_version of string
+
(** Unsupported YAML version in %YAML directive. See
+
{{:https://yaml.org/spec/1.2.2/#681-yaml-directives}Section 6.8.1 (YAML Directives)}. *)
+
| Invalid_tag_directive of string
+
(** Malformed %TAG directive. See
+
{{:https://yaml.org/spec/1.2.2/#682-tag-directives}Section 6.8.2 (TAG Directives)}. *)
+
| Reserved_directive of string
+
(** Reserved directive name. See
+
{{:https://yaml.org/spec/1.2.2/#683-reserved-directives}Section 6.8.3 (Reserved Directives)}. *)
+
| Illegal_flow_key_line
+
(** Key and colon must be on same line in flow context. See
+
{{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *)
+
| Block_sequence_disallowed
+
(** Block sequence entries not allowed in this context. See
+
{{:https://yaml.org/spec/1.2.2/#82-block-collection-styles}Section 8.2 (Block Collection Styles)}. *)
+
+
(* Parser errors - see {{:https://yaml.org/spec/1.2.2/#3-processing-yaml-information}Section 3 (Processing)} *)
+
| Unexpected_token of string
+
(** Unexpected token in event stream. *)
+
| Expected_document_start
+
(** Expected document start marker (---). See
+
{{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *)
+
| Expected_document_end
+
(** Expected document end marker (...). See
+
{{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *)
+
| Expected_block_entry
+
(** Expected block sequence entry marker (-). See
+
{{:https://yaml.org/spec/1.2.2/#821-block-sequences}Section 8.2.1 (Block Sequences)}. *)
+
| Expected_key
+
(** Expected mapping key. See
+
{{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 (Block Mappings)}. *)
+
| Expected_value
+
(** Expected mapping value after colon. See
+
{{:https://yaml.org/spec/1.2.2/#822-block-mappings}Section 8.2.2 (Block Mappings)}. *)
+
| Expected_node
+
(** Expected a YAML node. *)
+
| Expected_scalar
+
(** Expected a scalar value. *)
+
| Expected_sequence_end
+
(** Expected closing bracket \] for flow sequence. See
+
{{:https://yaml.org/spec/1.2.2/#742-flow-sequences}Section 7.4.2 (Flow Sequences)}. *)
+
| Expected_mapping_end
+
(** Expected closing brace \} for flow mapping. See
+
{{:https://yaml.org/spec/1.2.2/#743-flow-mappings}Section 7.4.3 (Flow Mappings)}. *)
+
| Duplicate_anchor of string
+
(** Anchor name defined multiple times. See
+
{{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
+
| Undefined_alias of string
+
(** Alias references non-existent anchor. See
+
{{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
+
| Alias_cycle of string
+
(** Circular reference in alias chain. See
+
{{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
+
| Multiple_documents
+
(** Multiple documents found when single document expected. See
+
{{:https://yaml.org/spec/1.2.2/#912-document-markers}Section 9.1.2 (Document Markers)}. *)
+
| Mapping_key_too_long
+
(** Mapping key exceeds maximum length (1024 characters). *)
+
+
(* Loader errors - see {{:https://yaml.org/spec/1.2.2/#31-processes}Section 3.1 (Processes)} *)
+
| Invalid_scalar_conversion of string * string
+
(** Cannot convert scalar value to target type (value, target type).
+
See {{:https://yaml.org/spec/1.2.2/#103-core-schema}Section 10.3 (Core Schema)}. *)
+
| Type_mismatch of string * string
+
(** Value has wrong type for operation (expected, got). *)
+
| Unresolved_alias of string
+
(** Alias encountered during conversion but not resolved.
+
See {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 (Anchors and Aliases)}. *)
+
| Key_not_found of string
+
(** Mapping key not found. *)
+
| Alias_expansion_node_limit of int
+
(** Alias expansion exceeded maximum node count (protection against billion laughs attack).
+
See {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}.
+
+
The "billion laughs attack" (also known as an XML bomb) is a denial-of-service
+
attack where a small YAML document expands to enormous size through recursive
+
alias expansion. This limit prevents such attacks. *)
+
| Alias_expansion_depth_limit of int
+
(** Alias expansion exceeded maximum nesting depth (protection against deeply nested aliases).
+
See {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}. *)
+
+
(* Emitter errors *)
+
| Invalid_encoding of string
+
(** Invalid character encoding specified. See
+
{{:https://yaml.org/spec/1.2.2/#51-character-set}Section 5.1 (Character Set)}. *)
+
| Scalar_contains_invalid_chars of string
+
(** Scalar contains characters invalid for chosen style. *)
+
| Anchor_not_set
+
(** Attempted to emit alias before anchor was defined. *)
+
| Invalid_state of string
+
(** Emitter in invalid state for requested operation. *)
+
+
(* Generic *)
+
| Custom of string
+
(** Custom error message. *)
+
+
(** {2 Error Value}
+
+
Full error information including classification, location, and context. *)
+
type t = {
+
kind : kind;
+
(** The specific error classification. *)
+
span : Span.t option;
+
(** Source location where the error occurred (if available). *)
+
context : string list;
+
(** Context stack showing the processing path leading to the error. *)
+
source : string option;
+
(** Source text for displaying the error in context. *)
+
}
+
+
(** {2 Exception}
+
+
The main exception type raised by all yamlrw operations.
+
+
All parsing, loading, and emitting errors are reported by raising
+
this exception with detailed error information. *)
+
exception Yamlrw_error of t
+
+
let () =
+
Printexc.register_printer (function
+
| Yamlrw_error e ->
+
let loc = match e.span with
+
| None -> ""
+
| Some span -> " at " ^ Span.to_string span
+
in
+
Some (Printf.sprintf "Yamlrw_error: %s%s"
+
(match e.kind with Custom s -> s | _ -> "error") loc)
+
| _ -> None)
+
+
(** {2 Error Construction} *)
+
+
(** [make ?span ?context ?source kind] constructs an error value.
+
+
@param span Source location
+
@param context Context stack (defaults to empty)
+
@param source Source text
+
@param kind Error classification *)
+
let make ?span ?(context=[]) ?source kind =
+
{ kind; span; context; source }
+
+
(** [raise ?span ?context ?source kind] constructs and raises an error.
+
+
This is the primary way to report errors in yamlrw.
+
+
@param span Source location
+
@param context Context stack
+
@param source Source text
+
@param kind Error classification
+
@raise Yamlrw_error *)
+
let raise ?span ?context ?source kind =
+
Stdlib.raise (Yamlrw_error (make ?span ?context ?source kind))
+
+
(** [raise_at pos kind] raises an error at a specific position.
+
+
@param pos Source position
+
@param kind Error classification
+
@raise Yamlrw_error *)
+
let raise_at pos kind =
+
let span = Span.point pos in
+
raise ~span kind
+
+
(** [raise_span span kind] raises an error at a specific span.
+
+
@param span Source span
+
@param kind Error classification
+
@raise Yamlrw_error *)
+
let raise_span span kind =
+
raise ~span kind
+
+
(** [with_context ctx f] executes [f ()] and adds [ctx] to any raised error's context.
+
+
This is useful for tracking the processing path through nested structures.
+
+
@param ctx Context description (e.g., "parsing mapping key")
+
@param f Function to execute *)
+
let with_context ctx f =
+
try f () with
+
| Yamlrw_error e ->
+
Stdlib.raise (Yamlrw_error { e with context = ctx :: e.context })
+
+
(** {2 Error Formatting} *)
+
+
(** [kind_to_string kind] converts an error kind to a human-readable string. *)
+
let kind_to_string = function
+
| Unexpected_character c -> Printf.sprintf "unexpected character %C" c
+
| Unexpected_eof -> "unexpected end of input"
+
| Invalid_escape_sequence s -> Printf.sprintf "invalid escape sequence: %s" s
+
| Invalid_unicode_escape s -> Printf.sprintf "invalid unicode escape: %s" s
+
| Invalid_hex_escape s -> Printf.sprintf "invalid hex escape: %s" s
+
| Invalid_tag s -> Printf.sprintf "invalid tag: %s" s
+
| Invalid_anchor s -> Printf.sprintf "invalid anchor: %s" s
+
| Invalid_alias s -> Printf.sprintf "invalid alias: %s" s
+
| Invalid_comment -> "comments must be separated from other tokens by whitespace"
+
| Unclosed_single_quote -> "unclosed single quote"
+
| Unclosed_double_quote -> "unclosed double quote"
+
| Unclosed_flow_sequence -> "unclosed flow sequence '['"
+
| Unclosed_flow_mapping -> "unclosed flow mapping '{'"
+
| Invalid_indentation (expected, got) ->
+
Printf.sprintf "invalid indentation: expected %d, got %d" expected got
+
| Invalid_flow_indentation -> "invalid indentation in flow construct"
+
| Tab_in_indentation -> "tab character in indentation"
+
| Invalid_block_scalar_header s ->
+
Printf.sprintf "invalid block scalar header: %s" s
+
| Invalid_quoted_scalar_indentation s ->
+
Printf.sprintf "%s" s
+
| Invalid_directive s -> Printf.sprintf "invalid directive: %s" s
+
| Invalid_yaml_version s -> Printf.sprintf "invalid YAML version: %s" s
+
| Invalid_tag_directive s -> Printf.sprintf "invalid TAG directive: %s" s
+
| Reserved_directive s -> Printf.sprintf "reserved directive: %s" s
+
| Illegal_flow_key_line -> "key and ':' must be on the same line in flow context"
+
| Block_sequence_disallowed -> "block sequence entries are not allowed in this context"
+
| Unexpected_token s -> Printf.sprintf "unexpected token: %s" s
+
| Expected_document_start -> "expected document start '---'"
+
| Expected_document_end -> "expected document end '...'"
+
| Expected_block_entry -> "expected block entry '-'"
+
| Expected_key -> "expected mapping key"
+
| Expected_value -> "expected mapping value"
+
| Expected_node -> "expected node"
+
| Expected_scalar -> "expected scalar"
+
| Expected_sequence_end -> "expected sequence end ']'"
+
| Expected_mapping_end -> "expected mapping end '}'"
+
| Duplicate_anchor s -> Printf.sprintf "duplicate anchor: &%s" s
+
| Undefined_alias s -> Printf.sprintf "undefined alias: *%s" s
+
| Alias_cycle s -> Printf.sprintf "alias cycle detected: *%s" s
+
| Multiple_documents -> "multiple documents found when single expected"
+
| Mapping_key_too_long -> "mapping key too long (max 1024 characters)"
+
| Invalid_scalar_conversion (value, typ) ->
+
Printf.sprintf "cannot convert %S to %s" value typ
+
| Type_mismatch (expected, got) ->
+
Printf.sprintf "type mismatch: expected %s, got %s" expected got
+
| Unresolved_alias s -> Printf.sprintf "unresolved alias: *%s" s
+
| Key_not_found s -> Printf.sprintf "key not found: %s" s
+
| Alias_expansion_node_limit n ->
+
Printf.sprintf "alias expansion exceeded node limit (%d nodes)" n
+
| Alias_expansion_depth_limit n ->
+
Printf.sprintf "alias expansion exceeded depth limit (%d levels)" n
+
| Invalid_encoding s -> Printf.sprintf "invalid encoding: %s" s
+
| Scalar_contains_invalid_chars s ->
+
Printf.sprintf "scalar contains invalid characters: %s" s
+
| Anchor_not_set -> "anchor not set"
+
| Invalid_state s -> Printf.sprintf "invalid state: %s" s
+
| Custom s -> s
+
+
(** [to_string t] converts an error to a human-readable string.
+
+
Includes error kind, source location (if available), and context stack. *)
+
let to_string t =
+
let loc = match t.span with
+
| None -> ""
+
| Some span -> " at " ^ Span.to_string span
+
in
+
let ctx = match t.context with
+
| [] -> ""
+
| ctxs -> " (in " ^ String.concat " > " (List.rev ctxs) ^ ")"
+
in
+
kind_to_string t.kind ^ loc ^ ctx
+
+
(** [pp fmt t] pretty-prints an error to a formatter. *)
+
let pp fmt t =
+
Format.fprintf fmt "Yamlrw error: %s" (to_string t)
+
+
(** [pp_with_source ~source fmt t] pretty-prints an error with source context.
+
+
Shows the error message followed by the relevant source line with
+
a caret (^) pointing to the error location.
+
+
@param source The source text
+
@param fmt Output formatter
+
@param t The error to display *)
+
let pp_with_source ~source fmt t =
+
let extract_line source line_num =
+
let lines = String.split_on_char '\n' source in
+
if line_num >= 1 && line_num <= List.length lines then
+
Some (List.nth lines (line_num - 1))
+
else
+
None
+
in
+
+
pp fmt t;
+
match t.span with
+
| None -> ()
+
| Some span ->
+
match extract_line source span.start.line with
+
| None -> ()
+
| Some line ->
+
Format.fprintf fmt "\n %d | %s\n" span.start.line line;
+
let padding = String.make (span.start.column - 1) ' ' in
+
Format.fprintf fmt " | %s^" padding
+82
lib/event.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML parser events *)
+
+
type t =
+
| Stream_start of { encoding : Encoding.t }
+
| Stream_end
+
| Document_start of {
+
version : (int * int) option;
+
implicit : bool;
+
}
+
| Document_end of { implicit : bool }
+
| Alias of { anchor : string }
+
| Scalar of {
+
anchor : string option;
+
tag : string option;
+
value : string;
+
plain_implicit : bool;
+
quoted_implicit : bool;
+
style : Scalar_style.t;
+
}
+
| Sequence_start of {
+
anchor : string option;
+
tag : string option;
+
implicit : bool;
+
style : Layout_style.t;
+
}
+
| Sequence_end
+
| Mapping_start of {
+
anchor : string option;
+
tag : string option;
+
implicit : bool;
+
style : Layout_style.t;
+
}
+
| Mapping_end
+
+
type spanned = {
+
event : t;
+
span : Span.t;
+
}
+
+
let pp fmt = function
+
| Stream_start { encoding } ->
+
Format.fprintf fmt "stream-start(%a)" Encoding.pp encoding
+
| Stream_end ->
+
Format.fprintf fmt "stream-end"
+
| Document_start { version; implicit } ->
+
Format.fprintf fmt "document-start(version=%s, implicit=%b)"
+
(match version with None -> "none" | Some (maj, min) -> Printf.sprintf "%d.%d" maj min)
+
implicit
+
| Document_end { implicit } ->
+
Format.fprintf fmt "document-end(implicit=%b)" implicit
+
| Alias { anchor } ->
+
Format.fprintf fmt "alias(%s)" anchor
+
| Scalar { anchor; tag; value; style; _ } ->
+
Format.fprintf fmt "scalar(anchor=%s, tag=%s, style=%a, value=%S)"
+
(Option.value anchor ~default:"none")
+
(Option.value tag ~default:"none")
+
Scalar_style.pp style
+
value
+
| Sequence_start { anchor; tag; implicit; style } ->
+
Format.fprintf fmt "sequence-start(anchor=%s, tag=%s, implicit=%b, style=%a)"
+
(Option.value anchor ~default:"none")
+
(Option.value tag ~default:"none")
+
implicit
+
Layout_style.pp style
+
| Sequence_end ->
+
Format.fprintf fmt "sequence-end"
+
| Mapping_start { anchor; tag; implicit; style } ->
+
Format.fprintf fmt "mapping-start(anchor=%s, tag=%s, implicit=%b, style=%a)"
+
(Option.value anchor ~default:"none")
+
(Option.value tag ~default:"none")
+
implicit
+
Layout_style.pp style
+
| Mapping_end ->
+
Format.fprintf fmt "mapping-end"
+
+
let pp_spanned fmt { event; span } =
+
Format.fprintf fmt "%a at %a" pp event Span.pp span
+272
lib/input.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Character input source with lookahead, based on Bytes.Reader.t
+
+
This module wraps a bytesrw [Bytes.Reader.t] to provide character-by-character
+
access with lookahead for the YAML scanner. Uses bytesrw's sniff and push_back
+
for efficient lookahead without excessive copying.
+
+
The same input type works with any reader source: strings, files, channels,
+
or streaming sources like Eio. *)
+
+
open Bytesrw
+
+
(** Re-export character classification *)
+
include Char_class
+
+
type t = {
+
reader : Bytes.Reader.t;
+
mutable current_slice : Bytes.Slice.t option; (** Current slice being consumed *)
+
mutable slice_pos : int; (** Position within current slice *)
+
mutable position : Position.t; (** Line/column tracking *)
+
}
+
+
(** Ensure we have a current slice. Returns true if data available. *)
+
let ensure_slice t =
+
match t.current_slice with
+
| Some slice when t.slice_pos < Bytes.Slice.length slice -> true
+
| _ ->
+
let slice = Bytes.Reader.read t.reader in
+
if Bytes.Slice.is_eod slice then begin
+
t.current_slice <- None;
+
false
+
end else begin
+
t.current_slice <- Some slice;
+
t.slice_pos <- 0;
+
true
+
end
+
+
(** Get current character without advancing *)
+
let peek_current t =
+
match t.current_slice with
+
| Some slice when t.slice_pos < Bytes.Slice.length slice ->
+
let bytes = Bytes.Slice.bytes slice in
+
let first = Bytes.Slice.first slice in
+
Some (Stdlib.Bytes.get bytes (first + t.slice_pos))
+
| _ -> None
+
+
(** Create input from a Bytes.Reader.t *)
+
let of_reader ?(initial_position = Position.initial) reader =
+
let t = {
+
reader;
+
current_slice = None;
+
slice_pos = 0;
+
position = initial_position;
+
} in
+
(* Use sniff for BOM detection - this is exactly what sniff is for *)
+
let sample = Bytes.Reader.sniff 4 t.reader in
+
let bom_len =
+
if String.length sample >= 3 &&
+
sample.[0] = '\xEF' &&
+
sample.[1] = '\xBB' &&
+
sample.[2] = '\xBF'
+
then 3 (* UTF-8 BOM *)
+
else 0
+
in
+
(* Skip BOM if present *)
+
if bom_len > 0 then
+
Bytes.Reader.skip bom_len t.reader;
+
t
+
+
(** Create input from a string *)
+
let of_string s =
+
let reader = Bytes.Reader.of_string s in
+
of_reader reader
+
+
let position t = t.position
+
+
let is_eof t =
+
not (ensure_slice t)
+
+
let peek t =
+
if ensure_slice t then
+
peek_current t
+
else
+
None
+
+
let peek_exn t =
+
match peek t with
+
| Some c -> c
+
| None -> Error.raise_at t.position Unexpected_eof
+
+
(** Peek at nth character (0-indexed from current position) *)
+
let peek_nth t n =
+
if n = 0 then peek t
+
else begin
+
(* Use sniff for lookahead - it pushes back automatically *)
+
let sample = Bytes.Reader.sniff (n + 1) t.reader in
+
(* But sniff reads from reader, and we may have a current slice.
+
We need to account for what's already in current_slice *)
+
match t.current_slice with
+
| Some slice ->
+
let slice_bytes = Bytes.Slice.bytes slice in
+
let slice_first = Bytes.Slice.first slice in
+
let slice_remaining = Bytes.Slice.length slice - t.slice_pos in
+
if n < slice_remaining then
+
Some (Stdlib.Bytes.get slice_bytes (slice_first + t.slice_pos + n))
+
else begin
+
(* Need to look beyond current slice *)
+
let sample_offset = n - slice_remaining in
+
if sample_offset < String.length sample then
+
Some sample.[sample_offset]
+
else
+
None
+
end
+
| None ->
+
if n < String.length sample then
+
Some sample.[n]
+
else
+
None
+
end
+
+
(** Peek at up to n characters as a string *)
+
let rec peek_string t n =
+
if n <= 0 then ""
+
else begin
+
match t.current_slice with
+
| Some slice ->
+
let slice_bytes = Bytes.Slice.bytes slice in
+
let slice_first = Bytes.Slice.first slice in
+
let slice_remaining = Bytes.Slice.length slice - t.slice_pos in
+
if n <= slice_remaining then
+
(* All within current slice *)
+
Stdlib.Bytes.sub_string slice_bytes (slice_first + t.slice_pos) n
+
else begin
+
(* Need data from beyond current slice - use sniff *)
+
let needed_from_reader = n - slice_remaining in
+
let sample = Bytes.Reader.sniff needed_from_reader t.reader in
+
let buf = Buffer.create n in
+
Buffer.add_subbytes buf slice_bytes (slice_first + t.slice_pos) slice_remaining;
+
Buffer.add_string buf sample;
+
Buffer.contents buf
+
end
+
| None ->
+
if ensure_slice t then
+
peek_string t n
+
else
+
""
+
end
+
+
(** Consume next character *)
+
let next t =
+
if ensure_slice t then begin
+
match t.current_slice with
+
| Some slice ->
+
let bytes = Bytes.Slice.bytes slice in
+
let first = Bytes.Slice.first slice in
+
let c = Stdlib.Bytes.get bytes (first + t.slice_pos) in
+
t.slice_pos <- t.slice_pos + 1;
+
t.position <- Position.advance_char c t.position;
+
(* Check if we've exhausted this slice *)
+
if t.slice_pos >= Bytes.Slice.length slice then
+
t.current_slice <- None;
+
Some c
+
| None -> None
+
end else
+
None
+
+
let next_exn t =
+
match next t with
+
| Some c -> c
+
| None -> Error.raise_at t.position Unexpected_eof
+
+
let skip t n =
+
for _ = 1 to n do
+
ignore (next t)
+
done
+
+
let skip_while t pred =
+
let rec loop () =
+
match peek t with
+
| Some c when pred c -> ignore (next t); loop ()
+
| _ -> ()
+
in
+
loop ()
+
+
(** Check if next char satisfies predicate *)
+
let next_is pred t =
+
match peek t with
+
| None -> false
+
| Some c -> pred c
+
+
let next_is_break t = next_is is_break t
+
let next_is_blank t = next_is is_blank t
+
let next_is_whitespace t = next_is is_whitespace t
+
let next_is_digit t = next_is is_digit t
+
let next_is_hex t = next_is is_hex t
+
let next_is_alpha t = next_is is_alpha t
+
let next_is_indicator t = next_is is_indicator t
+
+
(** Check if at document boundary (--- or ...) *)
+
let at_document_boundary t =
+
if t.position.column <> 1 then false
+
else begin
+
let s = peek_string t 4 in
+
let len = String.length s in
+
if len < 3 then false
+
else
+
let prefix = String.sub s 0 3 in
+
(prefix = "---" || prefix = "...") &&
+
(len = 3 || is_whitespace s.[3])
+
end
+
+
(** Consume line break, handling \r\n as single break *)
+
let consume_break t =
+
match peek t with
+
| Some '\r' ->
+
ignore (next t);
+
(match peek t with
+
| Some '\n' -> ignore (next t)
+
| _ -> ())
+
| Some '\n' ->
+
ignore (next t)
+
| _ -> ()
+
+
(** Get remaining content from current position *)
+
let remaining t =
+
let buf = Buffer.create 256 in
+
(* Add current slice remainder *)
+
(match t.current_slice with
+
| Some slice ->
+
let bytes = Bytes.Slice.bytes slice in
+
let first = Bytes.Slice.first slice in
+
let remaining = Bytes.Slice.length slice - t.slice_pos in
+
if remaining > 0 then
+
Buffer.add_subbytes buf bytes (first + t.slice_pos) remaining
+
| None -> ());
+
(* Add remaining from reader *)
+
Bytes.Reader.add_to_buffer buf t.reader;
+
Buffer.contents buf
+
+
(** Mark current position for span creation *)
+
let mark t = t.position
+
+
(** Get the character before the current position (limited lookahead) *)
+
let peek_back t =
+
match t.current_slice with
+
| Some slice when t.slice_pos > 0 ->
+
let bytes = Bytes.Slice.bytes slice in
+
let first = Bytes.Slice.first slice in
+
Some (Stdlib.Bytes.get bytes (first + t.slice_pos - 1))
+
| _ -> None
+
+
(** Get a sample of the source for encoding detection.
+
Uses sniff to peek without consuming. *)
+
let source t =
+
(* First check current slice *)
+
match t.current_slice with
+
| Some slice ->
+
let bytes = Bytes.Slice.bytes slice in
+
let first = Bytes.Slice.first slice in
+
let available = min 4 (Bytes.Slice.length slice - t.slice_pos) in
+
Stdlib.Bytes.sub_string bytes (first + t.slice_pos) available
+
| None ->
+
(* Use sniff to peek at reader *)
+
Bytes.Reader.sniff 4 t.reader
+
+
(** Get the byte position in the underlying stream *)
+
let byte_pos t =
+
Bytes.Reader.pos t.reader
+30
lib/layout_style.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Collection layout styles *)
+
+
type t = [
+
| `Any (** Let emitter choose *)
+
| `Block (** Indentation-based *)
+
| `Flow (** Inline with brackets *)
+
]
+
+
let to_string = function
+
| `Any -> "any"
+
| `Block -> "block"
+
| `Flow -> "flow"
+
+
let pp fmt t =
+
Format.pp_print_string fmt (to_string t)
+
+
let equal a b = a = b
+
+
let compare a b =
+
let to_int = function
+
| `Any -> 0
+
| `Block -> 1
+
| `Flow -> 2
+
in
+
Int.compare (to_int a) (to_int b)
+416
lib/loader.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Loader - converts parser events to YAML data structures *)
+
+
(** Stack frame for building nested structures *)
+
type frame =
+
| Sequence_frame of {
+
anchor : string option;
+
tag : string option;
+
implicit : bool;
+
style : Layout_style.t;
+
items : Yaml.t list;
+
}
+
| Mapping_frame of {
+
anchor : string option;
+
tag : string option;
+
implicit : bool;
+
style : Layout_style.t;
+
pairs : (Yaml.t * Yaml.t) list;
+
pending_key : Yaml.t option;
+
}
+
+
type state = {
+
mutable stack : frame list;
+
mutable current : Yaml.t option;
+
mutable documents : Document.t list;
+
mutable doc_version : (int * int) option;
+
mutable doc_implicit_start : bool;
+
}
+
+
let create_state () = {
+
stack = [];
+
current = None;
+
documents = [];
+
doc_version = None;
+
doc_implicit_start = true;
+
}
+
+
(** Process a single event *)
+
let rec process_event state (ev : Event.spanned) =
+
match ev.event with
+
| Event.Stream_start _ -> ()
+
+
| Event.Stream_end -> ()
+
+
| Event.Document_start { version; implicit } ->
+
state.doc_version <- version;
+
state.doc_implicit_start <- implicit
+
+
| Event.Document_end { implicit } ->
+
let doc = Document.make
+
?version:state.doc_version
+
~implicit_start:state.doc_implicit_start
+
~implicit_end:implicit
+
state.current
+
in
+
state.documents <- doc :: state.documents;
+
state.current <- None;
+
state.doc_version <- None;
+
state.doc_implicit_start <- true
+
+
| Event.Alias { anchor } ->
+
let node : Yaml.t = `Alias anchor in
+
add_node state node
+
+
| Event.Scalar { anchor; tag; value; plain_implicit; quoted_implicit; style } ->
+
let scalar = Scalar.make
+
?anchor ?tag
+
~plain_implicit ~quoted_implicit
+
~style value
+
in
+
let node : Yaml.t = `Scalar scalar in
+
add_node state node
+
+
| Event.Sequence_start { anchor; tag; implicit; style } ->
+
let frame = Sequence_frame {
+
anchor; tag; implicit; style;
+
items = [];
+
} in
+
state.stack <- frame :: state.stack
+
+
| Event.Sequence_end ->
+
(match state.stack with
+
| Sequence_frame { anchor; tag; implicit; style; items } :: rest ->
+
let seq = Sequence.make ?anchor ?tag ~implicit ~style (List.rev items) in
+
let node : Yaml.t = `A seq in
+
state.stack <- rest;
+
add_node state node
+
| _ -> Error.raise (Invalid_state "unexpected sequence end"))
+
+
| Event.Mapping_start { anchor; tag; implicit; style } ->
+
let frame = Mapping_frame {
+
anchor; tag; implicit; style;
+
pairs = [];
+
pending_key = None;
+
} in
+
state.stack <- frame :: state.stack
+
+
| Event.Mapping_end ->
+
(match state.stack with
+
| Mapping_frame { anchor; tag; implicit; style; pairs; pending_key = None } :: rest ->
+
let map = Mapping.make ?anchor ?tag ~implicit ~style (List.rev pairs) in
+
let node : Yaml.t = `O map in
+
state.stack <- rest;
+
add_node state node
+
| Mapping_frame { pending_key = Some _; _ } :: _ ->
+
Error.raise (Invalid_state "mapping ended with pending key")
+
| _ -> Error.raise (Invalid_state "unexpected mapping end"))
+
+
(** Add a node to current context *)
+
and add_node state node =
+
match state.stack with
+
| [] ->
+
state.current <- Some node
+
+
| Sequence_frame f :: rest ->
+
state.stack <- Sequence_frame { f with items = node :: f.items } :: rest
+
+
| Mapping_frame f :: rest ->
+
(match f.pending_key with
+
| None ->
+
(* This is a key *)
+
state.stack <- Mapping_frame { f with pending_key = Some node } :: rest
+
| Some key ->
+
(* This is a value *)
+
state.stack <- Mapping_frame {
+
f with
+
pairs = (key, node) :: f.pairs;
+
pending_key = None;
+
} :: rest)
+
+
(** Load single document as Value.
+
+
@param resolve_aliases Whether to resolve aliases (default true)
+
@param max_nodes Maximum nodes during alias expansion (default 10M)
+
@param max_depth Maximum alias nesting depth (default 100)
+
*)
+
let value_of_string
+
?(resolve_aliases = true)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
s =
+
let parser = Parser.of_string s in
+
let state = create_state () in
+
Parser.iter (process_event state) parser;
+
match state.documents with
+
| [] -> `Null
+
| [doc] ->
+
(match Document.root doc with
+
| None -> `Null
+
| Some yaml ->
+
Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml)
+
| _ -> Error.raise Multiple_documents
+
+
(** Load single document as Yaml.
+
+
@param resolve_aliases Whether to resolve aliases (default false for Yaml.t)
+
@param max_nodes Maximum nodes during alias expansion (default 10M)
+
@param max_depth Maximum alias nesting depth (default 100)
+
*)
+
let yaml_of_string
+
?(resolve_aliases = false)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
s =
+
let parser = Parser.of_string s in
+
let state = create_state () in
+
Parser.iter (process_event state) parser;
+
match state.documents with
+
| [] -> `Scalar (Scalar.make "")
+
| [doc] ->
+
(match Document.root doc with
+
| None -> `Scalar (Scalar.make "")
+
| Some yaml ->
+
if resolve_aliases then
+
Yaml.resolve_aliases ~max_nodes ~max_depth yaml
+
else
+
yaml)
+
| _ -> Error.raise Multiple_documents
+
+
(** Load all documents *)
+
let documents_of_string s =
+
let parser = Parser.of_string s in
+
let state = create_state () in
+
Parser.iter (process_event state) parser;
+
List.rev state.documents
+
+
(** {2 Reader-based loading} *)
+
+
(** Load single document as Value from a Bytes.Reader.
+
+
@param resolve_aliases Whether to resolve aliases (default true)
+
@param max_nodes Maximum nodes during alias expansion (default 10M)
+
@param max_depth Maximum alias nesting depth (default 100)
+
*)
+
let value_of_reader
+
?(resolve_aliases = true)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
reader =
+
let parser = Parser.of_reader reader in
+
let state = create_state () in
+
Parser.iter (process_event state) parser;
+
match state.documents with
+
| [] -> `Null
+
| [doc] ->
+
(match Document.root doc with
+
| None -> `Null
+
| Some yaml ->
+
Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml)
+
| _ -> Error.raise Multiple_documents
+
+
(** Load single document as Yaml from a Bytes.Reader.
+
+
@param resolve_aliases Whether to resolve aliases (default false for Yaml.t)
+
@param max_nodes Maximum nodes during alias expansion (default 10M)
+
@param max_depth Maximum alias nesting depth (default 100)
+
*)
+
let yaml_of_reader
+
?(resolve_aliases = false)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
reader =
+
let parser = Parser.of_reader reader in
+
let state = create_state () in
+
Parser.iter (process_event state) parser;
+
match state.documents with
+
| [] -> `Scalar (Scalar.make "")
+
| [doc] ->
+
(match Document.root doc with
+
| None -> `Scalar (Scalar.make "")
+
| Some yaml ->
+
if resolve_aliases then
+
Yaml.resolve_aliases ~max_nodes ~max_depth yaml
+
else
+
yaml)
+
| _ -> Error.raise Multiple_documents
+
+
(** Load all documents from a Bytes.Reader *)
+
let documents_of_reader reader =
+
let parser = Parser.of_reader reader in
+
let state = create_state () in
+
Parser.iter (process_event state) parser;
+
List.rev state.documents
+
+
(** Generic document loader - extracts common pattern from load_* functions *)
+
let load_generic extract parser =
+
let state = create_state () in
+
let rec loop () =
+
match Parser.next parser with
+
| None -> None
+
| Some ev ->
+
process_event state ev;
+
match ev.event with
+
| Event.Document_end _ ->
+
(match state.documents with
+
| doc :: _ ->
+
state.documents <- [];
+
Some (extract doc)
+
| [] -> None)
+
| Event.Stream_end -> None
+
| _ -> loop ()
+
in
+
loop ()
+
+
(** Load single Value from parser.
+
+
@param resolve_aliases Whether to resolve aliases (default true)
+
@param max_nodes Maximum nodes during alias expansion (default 10M)
+
@param max_depth Maximum alias nesting depth (default 100)
+
*)
+
let load_value
+
?(resolve_aliases = true)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
parser =
+
load_generic (fun doc ->
+
match Document.root doc with
+
| None -> `Null
+
| Some yaml ->
+
Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml
+
) parser
+
+
(** Load single Yaml from parser *)
+
let load_yaml parser =
+
load_generic (fun doc ->
+
Document.root doc |> Option.value ~default:(`Scalar (Scalar.make ""))
+
) parser
+
+
(** Load single Document from parser *)
+
let load_document parser =
+
load_generic Fun.id parser
+
+
(** Iterate over documents *)
+
let iter_documents f parser =
+
let rec loop () =
+
match load_document parser with
+
| None -> ()
+
| Some doc -> f doc; loop ()
+
in
+
loop ()
+
+
(** Fold over documents *)
+
let fold_documents f init parser =
+
let rec loop acc =
+
match load_document parser with
+
| None -> acc
+
| Some doc -> loop (f acc doc)
+
in
+
loop init
+
+
(** {2 Parser-function based loading}
+
+
These functions accept a [unit -> Event.spanned option] function
+
instead of a [Parser.t], allowing them to work with any event source
+
(e.g., streaming parsers). *)
+
+
(** Generic document loader using event source function *)
+
let load_generic_fn extract next_event =
+
let state = create_state () in
+
let rec loop () =
+
match next_event () with
+
| None -> None
+
| Some ev ->
+
process_event state ev;
+
match ev.event with
+
| Event.Document_end _ ->
+
(match state.documents with
+
| doc :: _ ->
+
state.documents <- [];
+
Some (extract doc)
+
| [] -> None)
+
| Event.Stream_end -> None
+
| _ -> loop ()
+
in
+
loop ()
+
+
(** Load single Value from event source.
+
+
@param resolve_aliases Whether to resolve aliases (default true)
+
@param max_nodes Maximum nodes during alias expansion (default 10M)
+
@param max_depth Maximum alias nesting depth (default 100)
+
*)
+
let value_of_parser
+
?(resolve_aliases = true)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
next_event =
+
match load_generic_fn (fun doc ->
+
match Document.root doc with
+
| None -> `Null
+
| Some yaml ->
+
Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml
+
) next_event with
+
| Some v -> v
+
| None -> `Null
+
+
(** Load single Yaml from event source.
+
+
@param resolve_aliases Whether to resolve aliases (default false)
+
@param max_nodes Maximum nodes during alias expansion (default 10M)
+
@param max_depth Maximum alias nesting depth (default 100)
+
*)
+
let yaml_of_parser
+
?(resolve_aliases = false)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
next_event =
+
match load_generic_fn (fun doc ->
+
match Document.root doc with
+
| None -> `Scalar (Scalar.make "")
+
| Some yaml ->
+
if resolve_aliases then
+
Yaml.resolve_aliases ~max_nodes ~max_depth yaml
+
else
+
yaml
+
) next_event with
+
| Some v -> v
+
| None -> `Scalar (Scalar.make "")
+
+
(** Load single Document from event source *)
+
let document_of_parser next_event =
+
load_generic_fn Fun.id next_event
+
+
(** Load all documents from event source *)
+
let documents_of_parser next_event =
+
let state = create_state () in
+
let rec loop () =
+
match next_event () with
+
| None -> List.rev state.documents
+
| Some ev ->
+
process_event state ev;
+
loop ()
+
in
+
loop ()
+
+
(** Iterate over documents from event source *)
+
let iter_documents_parser f next_event =
+
let rec loop () =
+
match document_of_parser next_event with
+
| None -> ()
+
| Some doc -> f doc; loop ()
+
in
+
loop ()
+
+
(** Fold over documents from event source *)
+
let fold_documents_parser f init next_event =
+
let rec loop acc =
+
match document_of_parser next_event with
+
| None -> acc
+
| Some doc -> loop (f acc doc)
+
in
+
loop init
+95
lib/mapping.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML mapping (object) values with metadata *)
+
+
type ('k, 'v) t = {
+
anchor : string option;
+
tag : string option;
+
implicit : bool;
+
style : Layout_style.t;
+
members : ('k * 'v) list;
+
}
+
+
let make
+
?(anchor : string option)
+
?(tag : string option)
+
?(implicit = true)
+
?(style = `Any)
+
members =
+
{ anchor; tag; implicit; style; members }
+
+
let members t = t.members
+
let anchor t = t.anchor
+
let tag t = t.tag
+
let implicit t = t.implicit
+
let style t = t.style
+
+
let with_anchor anchor t = { t with anchor = Some anchor }
+
let with_tag tag t = { t with tag = Some tag }
+
let with_style style t = { t with style }
+
+
let map_keys f t = { t with members = List.map (fun (k, v) -> (f k, v)) t.members }
+
let map_values f t = { t with members = List.map (fun (k, v) -> (k, f v)) t.members }
+
let map f t = { t with members = List.map (fun (k, v) -> f k v) t.members }
+
+
let length t = List.length t.members
+
+
let is_empty t = t.members = []
+
+
let find pred t =
+
List.find_opt (fun (k, _) -> pred k) t.members |> Option.map snd
+
+
let find_key pred t =
+
List.find_opt (fun (k, _) -> pred k) t.members
+
+
let mem pred t =
+
List.exists (fun (k, _) -> pred k) t.members
+
+
let keys t = List.map fst t.members
+
+
let values t = List.map snd t.members
+
+
let iter f t = List.iter (fun (k, v) -> f k v) t.members
+
+
let fold f init t = List.fold_left (fun acc (k, v) -> f acc k v) init t.members
+
+
let pp pp_key pp_val fmt t =
+
Format.fprintf fmt "@[<hv 2>mapping(@,";
+
(match t.anchor with
+
| Some a -> Format.fprintf fmt "anchor=%s,@ " a
+
| None -> ());
+
(match t.tag with
+
| Some tag -> Format.fprintf fmt "tag=%s,@ " tag
+
| None -> ());
+
Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style;
+
Format.fprintf fmt "members={@,";
+
List.iteri (fun i (k, v) ->
+
if i > 0 then Format.fprintf fmt ",@ ";
+
Format.fprintf fmt "@[<hv 2>%a:@ %a@]" pp_key k pp_val v
+
) t.members;
+
Format.fprintf fmt "@]@,})"
+
+
let equal eq_k eq_v a b =
+
Option.equal String.equal a.anchor b.anchor &&
+
Option.equal String.equal a.tag b.tag &&
+
a.implicit = b.implicit &&
+
Layout_style.equal a.style b.style &&
+
List.equal (fun (k1, v1) (k2, v2) -> eq_k k1 k2 && eq_v v1 v2) a.members b.members
+
+
let compare cmp_k cmp_v a b =
+
let c = Option.compare String.compare a.anchor b.anchor in
+
if c <> 0 then c else
+
let c = Option.compare String.compare a.tag b.tag in
+
if c <> 0 then c else
+
let c = Bool.compare a.implicit b.implicit in
+
if c <> 0 then c else
+
let c = Layout_style.compare a.style b.style in
+
if c <> 0 then c else
+
let cmp_pair (k1, v1) (k2, v2) =
+
let c = cmp_k k1 k2 in
+
if c <> 0 then c else cmp_v v1 v2
+
in
+
List.compare cmp_pair a.members b.members
+789
lib/parser.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML parser - converts tokens to semantic events via state machine *)
+
+
(** Parser states *)
+
type state =
+
| Stream_start
+
| Implicit_document_start
+
| Document_start
+
| Document_content
+
| Document_content_done (* After parsing a node, check for unexpected content *)
+
| Document_end
+
| Block_node
+
| Block_node_or_indentless_sequence
+
| Flow_node
+
| Block_sequence_first_entry
+
| Block_sequence_entry
+
| Indentless_sequence_entry
+
| Block_mapping_first_key
+
| Block_mapping_key
+
| Block_mapping_value
+
| Flow_sequence_first_entry
+
| Flow_sequence_entry
+
| Flow_sequence_entry_mapping_key
+
| Flow_sequence_entry_mapping_value
+
| Flow_sequence_entry_mapping_end
+
| Flow_mapping_first_key
+
| Flow_mapping_key
+
| Flow_mapping_value
+
| Flow_mapping_empty_value
+
| End
+
+
type t = {
+
scanner : Scanner.t;
+
mutable state : state;
+
mutable states : state list; (** State stack *)
+
mutable marks : Span.t list; (** Mark stack for span tracking *)
+
mutable version : (int * int) option;
+
mutable tag_directives : (string * string) list;
+
mutable current_token : Token.spanned option;
+
mutable finished : bool;
+
mutable explicit_doc_end : bool; (** True if last doc ended with explicit ... *)
+
mutable stream_start : bool; (** True if we haven't emitted any documents yet *)
+
}
+
+
let create scanner = {
+
scanner;
+
state = Stream_start;
+
states = [];
+
marks = [];
+
version = None;
+
tag_directives = [
+
("!", "!");
+
("!!", "tag:yaml.org,2002:");
+
];
+
current_token = None;
+
finished = false;
+
explicit_doc_end = false;
+
stream_start = true;
+
}
+
+
let of_string s = create (Scanner.of_string s)
+
let of_scanner = create
+
let of_input i = create (Scanner.of_input i)
+
let of_reader r = create (Scanner.of_reader r)
+
+
(** Get current token, fetching if needed *)
+
let current_token t =
+
match t.current_token with
+
| Some tok -> tok
+
| None ->
+
let tok = Scanner.next t.scanner in
+
t.current_token <- tok;
+
match tok with
+
| Some tok -> tok
+
| None -> Error.raise Unexpected_eof
+
+
(** Peek at current token *)
+
let peek_token t =
+
match t.current_token with
+
| Some _ -> t.current_token
+
| None ->
+
t.current_token <- Scanner.next t.scanner;
+
t.current_token
+
+
(** Skip current token *)
+
let skip_token t =
+
t.current_token <- None
+
+
(** Check if current token matches *)
+
let check t pred =
+
match peek_token t with
+
| Some tok -> pred tok.token
+
| None -> false
+
+
(** Check for specific token *)
+
let check_token t token_match =
+
check t token_match
+
+
(** Push state onto stack *)
+
let push_state t s =
+
t.states <- s :: t.states
+
+
(** Pop state from stack *)
+
let pop_state t =
+
match t.states with
+
| s :: rest ->
+
t.states <- rest;
+
s
+
| [] -> End
+
+
(** Resolve a tag *)
+
let resolve_tag t ~handle ~suffix =
+
if handle = "" then
+
(* Verbatim tag - suffix is already the full URI *)
+
suffix
+
else
+
match List.assoc_opt handle t.tag_directives with
+
| Some prefix -> prefix ^ suffix
+
| None when handle = "!" -> "!" ^ suffix
+
| None -> Error.raise (Invalid_tag (handle ^ suffix))
+
+
(** Process directives at document start *)
+
let process_directives t =
+
t.version <- None;
+
t.tag_directives <- [("!", "!"); ("!!", "tag:yaml.org,2002:")];
+
+
while check t (function
+
| Token.Version_directive _ | Token.Tag_directive _ -> true
+
| _ -> false)
+
do
+
let tok = current_token t in
+
skip_token t;
+
match tok.token with
+
| Token.Version_directive { major; minor } ->
+
if t.version <> None then
+
Error.raise_span tok.span (Invalid_yaml_version "duplicate YAML directive");
+
t.version <- Some (major, minor)
+
| Token.Tag_directive { handle; prefix } ->
+
(* Skip empty tag directives (these are reserved/unknown directives that were ignored) *)
+
if handle = "" && prefix = "" then
+
() (* Ignore reserved directives *)
+
else begin
+
if List.mem_assoc handle t.tag_directives &&
+
handle <> "!" && handle <> "!!" then
+
Error.raise_span tok.span (Invalid_tag_directive ("duplicate tag handle: " ^ handle));
+
t.tag_directives <- (handle, prefix) :: t.tag_directives
+
end
+
| _ -> ()
+
done
+
+
(** Parse anchor and/or tag properties *)
+
let parse_properties t =
+
let anchor = ref None in
+
let tag = ref None in
+
+
while check t (function
+
| Token.Anchor _ | Token.Tag _ -> true
+
| _ -> false)
+
do
+
let tok = current_token t in
+
skip_token t;
+
match tok.token with
+
| Token.Anchor name ->
+
if Option.is_some !anchor then
+
Error.raise_span tok.span (Duplicate_anchor name);
+
anchor := Some name
+
| Token.Tag { handle; suffix } ->
+
if Option.is_some !tag then
+
Error.raise_span tok.span (Invalid_tag "duplicate tag");
+
let resolved =
+
if handle = "" && suffix = "" then None
+
else if handle = "!" && suffix = "" then Some "!"
+
else Some (resolve_tag t ~handle ~suffix)
+
in
+
tag := resolved
+
| _ -> ()
+
done;
+
(!anchor, !tag)
+
+
(** Empty scalar event *)
+
let empty_scalar_event ~anchor ~tag span =
+
Event.Scalar {
+
anchor;
+
tag;
+
value = "";
+
plain_implicit = tag = None;
+
quoted_implicit = false;
+
style = `Plain;
+
}, span
+
+
(** Parse stream start *)
+
let parse_stream_start t =
+
let tok = current_token t in
+
skip_token t;
+
match tok.token with
+
| Token.Stream_start encoding ->
+
t.state <- Implicit_document_start;
+
Event.Stream_start { encoding }, tok.span
+
| _ ->
+
Error.raise_span tok.span (Unexpected_token "expected stream start")
+
+
(** Parse document start (implicit or explicit) *)
+
let parse_document_start t ~implicit =
+
process_directives t;
+
+
if not implicit then begin
+
let tok = current_token t in
+
match tok.token with
+
| Token.Document_start ->
+
skip_token t
+
| _ ->
+
Error.raise_span tok.span Expected_document_start
+
end;
+
+
let span = match peek_token t with
+
| Some tok -> tok.span
+
| None -> Span.point Position.initial
+
in
+
+
(* After first document, stream_start is false *)
+
t.stream_start <- false;
+
push_state t Document_end;
+
t.state <- Document_content;
+
Event.Document_start { version = t.version; implicit }, span
+
+
(** Parse document end *)
+
let parse_document_end t =
+
let implicit = not (check t (function Token.Document_end -> true | _ -> false)) in
+
let span = match peek_token t with
+
| Some tok -> tok.span
+
| None -> Span.point Position.initial
+
in
+
+
if not implicit then skip_token t;
+
+
(* Track if this document ended explicitly with ... *)
+
t.explicit_doc_end <- not implicit;
+
t.state <- Implicit_document_start;
+
Event.Document_end { implicit }, span
+
+
(** Parse node in various contexts *)
+
let parse_node t ~block ~indentless =
+
let tok = current_token t in
+
match tok.token with
+
| Token.Alias name ->
+
skip_token t;
+
t.state <- pop_state t;
+
Event.Alias { anchor = name }, tok.span
+
+
| Token.Anchor _ | Token.Tag _ ->
+
let anchor, tag = parse_properties t in
+
let tok = current_token t in
+
(match tok.token with
+
| Token.Block_entry when indentless ->
+
t.state <- Indentless_sequence_entry;
+
Event.Sequence_start {
+
anchor; tag;
+
implicit = tag = None;
+
style = `Block;
+
}, tok.span
+
+
| Token.Block_sequence_start when block ->
+
t.state <- Block_sequence_first_entry;
+
skip_token t;
+
Event.Sequence_start {
+
anchor; tag;
+
implicit = tag = None;
+
style = `Block;
+
}, tok.span
+
+
| Token.Block_mapping_start when block ->
+
t.state <- Block_mapping_first_key;
+
skip_token t;
+
Event.Mapping_start {
+
anchor; tag;
+
implicit = tag = None;
+
style = `Block;
+
}, tok.span
+
+
| Token.Flow_sequence_start ->
+
t.state <- Flow_sequence_first_entry;
+
skip_token t;
+
Event.Sequence_start {
+
anchor; tag;
+
implicit = tag = None;
+
style = `Flow;
+
}, tok.span
+
+
| Token.Flow_mapping_start ->
+
t.state <- Flow_mapping_first_key;
+
skip_token t;
+
Event.Mapping_start {
+
anchor; tag;
+
implicit = tag = None;
+
style = `Flow;
+
}, tok.span
+
+
| Token.Scalar { style; value } ->
+
skip_token t;
+
t.state <- pop_state t;
+
let plain_implicit = tag = None && style = `Plain in
+
let quoted_implicit = tag = None && style <> `Plain in
+
Event.Scalar {
+
anchor; tag; value;
+
plain_implicit; quoted_implicit; style;
+
}, tok.span
+
+
| _ ->
+
(* Empty node *)
+
t.state <- pop_state t;
+
empty_scalar_event ~anchor ~tag tok.span)
+
+
| Token.Block_sequence_start when block ->
+
t.state <- Block_sequence_first_entry;
+
skip_token t;
+
Event.Sequence_start {
+
anchor = None; tag = None;
+
implicit = true;
+
style = `Block;
+
}, tok.span
+
+
| Token.Block_mapping_start when block ->
+
t.state <- Block_mapping_first_key;
+
skip_token t;
+
Event.Mapping_start {
+
anchor = None; tag = None;
+
implicit = true;
+
style = `Block;
+
}, tok.span
+
+
| Token.Flow_sequence_start ->
+
t.state <- Flow_sequence_first_entry;
+
skip_token t;
+
Event.Sequence_start {
+
anchor = None; tag = None;
+
implicit = true;
+
style = `Flow;
+
}, tok.span
+
+
| Token.Flow_mapping_start ->
+
t.state <- Flow_mapping_first_key;
+
skip_token t;
+
Event.Mapping_start {
+
anchor = None; tag = None;
+
implicit = true;
+
style = `Flow;
+
}, tok.span
+
+
| Token.Block_entry when indentless ->
+
t.state <- Indentless_sequence_entry;
+
Event.Sequence_start {
+
anchor = None; tag = None;
+
implicit = true;
+
style = `Block;
+
}, tok.span
+
+
| Token.Scalar { style; value } ->
+
skip_token t;
+
t.state <- pop_state t;
+
let plain_implicit = style = `Plain in
+
let quoted_implicit = style <> `Plain in
+
Event.Scalar {
+
anchor = None; tag = None; value;
+
plain_implicit; quoted_implicit; style;
+
}, tok.span
+
+
| _ ->
+
(* Empty node *)
+
t.state <- pop_state t;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
+
(** Parse block sequence entry *)
+
let parse_block_sequence_entry t =
+
let tok = current_token t in
+
match tok.token with
+
| Token.Block_entry ->
+
skip_token t;
+
if check t (function
+
| Token.Block_entry | Token.Block_end -> true
+
| _ -> false)
+
then begin
+
t.state <- Block_sequence_entry;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
end else begin
+
push_state t Block_sequence_entry;
+
parse_node t ~block:true ~indentless:false
+
end
+
| Token.Block_end ->
+
skip_token t;
+
t.state <- pop_state t;
+
Event.Sequence_end, tok.span
+
| _ ->
+
Error.raise_span tok.span Expected_block_entry
+
+
(** Parse block mapping key *)
+
let parse_block_mapping_key t =
+
let tok = current_token t in
+
match tok.token with
+
| Token.Key ->
+
skip_token t;
+
if check t (function
+
| Token.Key | Token.Value | Token.Block_end -> true
+
| _ -> false)
+
then begin
+
t.state <- Block_mapping_value;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
end else begin
+
push_state t Block_mapping_value;
+
parse_node t ~block:true ~indentless:true
+
end
+
(* Handle value without explicit key - key is empty/null *)
+
| Token.Value ->
+
t.state <- Block_mapping_value;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
| Token.Block_end ->
+
skip_token t;
+
t.state <- pop_state t;
+
Event.Mapping_end, tok.span
+
| _ ->
+
Error.raise_span tok.span Expected_key
+
+
(** Parse block mapping value *)
+
let parse_block_mapping_value t =
+
let tok = current_token t in
+
match tok.token with
+
| Token.Value ->
+
skip_token t;
+
if check t (function
+
| Token.Key | Token.Value | Token.Block_end -> true
+
| _ -> false)
+
then begin
+
t.state <- Block_mapping_key;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
end else begin
+
push_state t Block_mapping_key;
+
parse_node t ~block:true ~indentless:true
+
end
+
| _ ->
+
(* Implicit empty value *)
+
t.state <- Block_mapping_key;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
+
(** Parse indentless sequence entry *)
+
let parse_indentless_sequence_entry t =
+
let tok = current_token t in
+
match tok.token with
+
| Token.Block_entry ->
+
skip_token t;
+
if check t (function
+
| Token.Block_entry | Token.Key | Token.Value | Token.Block_end -> true
+
| _ -> false)
+
then begin
+
t.state <- Indentless_sequence_entry;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
end else begin
+
push_state t Indentless_sequence_entry;
+
parse_node t ~block:true ~indentless:false
+
end
+
| _ ->
+
t.state <- pop_state t;
+
Event.Sequence_end, tok.span
+
+
(** Parse flow sequence *)
+
let rec parse_flow_sequence_entry t ~first =
+
let tok = current_token t in
+
match tok.token with
+
| Token.Flow_sequence_end ->
+
skip_token t;
+
t.state <- pop_state t;
+
Event.Sequence_end, tok.span
+
| Token.Flow_entry when not first ->
+
skip_token t;
+
parse_flow_sequence_entry_internal t
+
| _ when first ->
+
parse_flow_sequence_entry_internal t
+
| _ ->
+
Error.raise_span tok.span Expected_sequence_end
+
+
and parse_flow_sequence_entry_internal t =
+
let tok = current_token t in
+
match tok.token with
+
| Token.Flow_sequence_end ->
+
(* Trailing comma case - don't emit empty scalar, just go back to sequence entry state *)
+
skip_token t;
+
t.state <- pop_state t;
+
Event.Sequence_end, tok.span
+
| Token.Flow_entry ->
+
(* Double comma or comma after comma - invalid *)
+
Error.raise_span tok.span (Unexpected_token "unexpected ',' in flow sequence")
+
| Token.Key ->
+
skip_token t;
+
t.state <- Flow_sequence_entry_mapping_key;
+
Event.Mapping_start {
+
anchor = None; tag = None;
+
implicit = true;
+
style = `Flow;
+
}, tok.span
+
| Token.Value ->
+
(* Implicit empty key mapping: [ : value ] *)
+
t.state <- Flow_sequence_entry_mapping_key;
+
Event.Mapping_start {
+
anchor = None; tag = None;
+
implicit = true;
+
style = `Flow;
+
}, tok.span
+
| _ ->
+
push_state t Flow_sequence_entry;
+
parse_node t ~block:false ~indentless:false
+
+
(** Parse flow sequence entry mapping *)
+
let parse_flow_sequence_entry_mapping_key t =
+
let tok = current_token t in
+
if check t (function
+
| Token.Value | Token.Flow_entry | Token.Flow_sequence_end -> true
+
| _ -> false)
+
then begin
+
t.state <- Flow_sequence_entry_mapping_value;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
end else begin
+
push_state t Flow_sequence_entry_mapping_value;
+
parse_node t ~block:false ~indentless:false
+
end
+
+
let parse_flow_sequence_entry_mapping_value t =
+
let tok = current_token t in
+
match tok.token with
+
| Token.Value ->
+
skip_token t;
+
if check t (function
+
| Token.Flow_entry | Token.Flow_sequence_end -> true
+
| _ -> false)
+
then begin
+
t.state <- Flow_sequence_entry_mapping_end;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
end else begin
+
push_state t Flow_sequence_entry_mapping_end;
+
parse_node t ~block:false ~indentless:false
+
end
+
| _ ->
+
t.state <- Flow_sequence_entry_mapping_end;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
+
let parse_flow_sequence_entry_mapping_end t =
+
let tok = current_token t in
+
t.state <- Flow_sequence_entry;
+
Event.Mapping_end, tok.span
+
+
(** Parse flow mapping *)
+
let rec parse_flow_mapping_key t ~first =
+
let tok = current_token t in
+
match tok.token with
+
| Token.Flow_mapping_end ->
+
skip_token t;
+
t.state <- pop_state t;
+
Event.Mapping_end, tok.span
+
| Token.Flow_entry when not first ->
+
skip_token t;
+
parse_flow_mapping_key_internal t
+
| _ when first ->
+
parse_flow_mapping_key_internal t
+
| _ ->
+
Error.raise_span tok.span Expected_mapping_end
+
+
and parse_flow_mapping_key_internal t =
+
let tok = current_token t in
+
match tok.token with
+
| Token.Flow_mapping_end ->
+
(* Trailing comma case - don't emit empty scalar, just return to key state *)
+
skip_token t;
+
t.state <- pop_state t;
+
Event.Mapping_end, tok.span
+
| Token.Flow_entry ->
+
(* Double comma or comma after comma - invalid *)
+
Error.raise_span tok.span (Unexpected_token "unexpected ',' in flow mapping")
+
| Token.Key ->
+
skip_token t;
+
if check t (function
+
| Token.Value | Token.Flow_entry | Token.Flow_mapping_end -> true
+
| _ -> false)
+
then begin
+
t.state <- Flow_mapping_value;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
end else begin
+
push_state t Flow_mapping_value;
+
parse_node t ~block:false ~indentless:false
+
end
+
| _ ->
+
push_state t Flow_mapping_value;
+
parse_node t ~block:false ~indentless:false
+
+
let parse_flow_mapping_value t ~empty =
+
let tok = current_token t in
+
if empty then begin
+
t.state <- Flow_mapping_key;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
end else
+
match tok.token with
+
| Token.Value ->
+
skip_token t;
+
if check t (function
+
| Token.Flow_entry | Token.Flow_mapping_end -> true
+
| _ -> false)
+
then begin
+
t.state <- Flow_mapping_key;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
end else begin
+
push_state t Flow_mapping_key;
+
parse_node t ~block:false ~indentless:false
+
end
+
| _ ->
+
t.state <- Flow_mapping_key;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
+
(** Main state machine dispatcher *)
+
let rec parse t =
+
match t.state with
+
| Stream_start ->
+
parse_stream_start t
+
+
| Implicit_document_start ->
+
(* Skip any document end markers before checking what's next *)
+
while check t (function Token.Document_end -> true | _ -> false) do
+
t.explicit_doc_end <- true; (* Seeing ... counts as explicit end *)
+
skip_token t
+
done;
+
+
let tok = current_token t in
+
(match tok.token with
+
| Token.Stream_end ->
+
skip_token t;
+
t.state <- End;
+
t.finished <- true;
+
Event.Stream_end, tok.span
+
| Token.Version_directive _ | Token.Tag_directive _ ->
+
(* Directives are only allowed at stream start or after explicit ... (MUS6/01) *)
+
if not t.stream_start && not t.explicit_doc_end then
+
Error.raise_span tok.span (Invalid_directive "directives require explicit document end '...' before them");
+
parse_document_start t ~implicit:false
+
| Token.Document_start ->
+
parse_document_start t ~implicit:false
+
(* These tokens are invalid at document start - they indicate leftover junk *)
+
| Token.Flow_sequence_end | Token.Flow_mapping_end | Token.Flow_entry
+
| Token.Block_end | Token.Value ->
+
Error.raise_span tok.span (Unexpected_token "unexpected token at document start")
+
| _ ->
+
parse_document_start t ~implicit:true)
+
+
| Document_start ->
+
parse_document_start t ~implicit:false
+
+
| Document_content ->
+
if check t (function
+
| Token.Version_directive _ | Token.Tag_directive _
+
| Token.Document_start | Token.Document_end | Token.Stream_end -> true
+
| _ -> false)
+
then begin
+
let tok = current_token t in
+
t.state <- pop_state t;
+
empty_scalar_event ~anchor:None ~tag:None tok.span
+
end else begin
+
(* Push Document_content_done so we return there after parsing the node.
+
This allows us to check for unexpected content after the node. *)
+
push_state t Document_content_done;
+
parse_node t ~block:true ~indentless:false
+
end
+
+
| Document_content_done ->
+
(* After parsing a node in document content, check for unexpected content *)
+
if check t (function
+
| Token.Version_directive _ | Token.Tag_directive _
+
| Token.Document_start | Token.Document_end | Token.Stream_end -> true
+
| _ -> false)
+
then begin
+
(* Valid document boundary - continue to Document_end *)
+
t.state <- pop_state t;
+
parse t (* Continue to emit the next event *)
+
end else begin
+
(* Unexpected content after document value - this is an error (KS4U, BS4K) *)
+
let tok = current_token t in
+
Error.raise_span tok.span
+
(Unexpected_token "content not allowed after document value")
+
end
+
+
| Document_end ->
+
parse_document_end t
+
+
| Block_node ->
+
parse_node t ~block:true ~indentless:false
+
+
| Block_node_or_indentless_sequence ->
+
parse_node t ~block:true ~indentless:true
+
+
| Flow_node ->
+
parse_node t ~block:false ~indentless:false
+
+
| Block_sequence_first_entry ->
+
t.state <- Block_sequence_entry;
+
parse_block_sequence_entry t
+
+
| Block_sequence_entry ->
+
parse_block_sequence_entry t
+
+
| Indentless_sequence_entry ->
+
parse_indentless_sequence_entry t
+
+
| Block_mapping_first_key ->
+
t.state <- Block_mapping_key;
+
parse_block_mapping_key t
+
+
| Block_mapping_key ->
+
parse_block_mapping_key t
+
+
| Block_mapping_value ->
+
parse_block_mapping_value t
+
+
| Flow_sequence_first_entry ->
+
parse_flow_sequence_entry t ~first:true
+
+
| Flow_sequence_entry ->
+
parse_flow_sequence_entry t ~first:false
+
+
| Flow_sequence_entry_mapping_key ->
+
parse_flow_sequence_entry_mapping_key t
+
+
| Flow_sequence_entry_mapping_value ->
+
parse_flow_sequence_entry_mapping_value t
+
+
| Flow_sequence_entry_mapping_end ->
+
parse_flow_sequence_entry_mapping_end t
+
+
| Flow_mapping_first_key ->
+
parse_flow_mapping_key t ~first:true
+
+
| Flow_mapping_key ->
+
parse_flow_mapping_key t ~first:false
+
+
| Flow_mapping_value ->
+
parse_flow_mapping_value t ~empty:false
+
+
| Flow_mapping_empty_value ->
+
parse_flow_mapping_value t ~empty:true
+
+
| End ->
+
let span = Span.point Position.initial in
+
t.finished <- true;
+
Event.Stream_end, span
+
+
(** Get next event *)
+
let next t =
+
if t.finished then None
+
else begin
+
let event, span = parse t in
+
Some { Event.event; span }
+
end
+
+
(** Peek at next event *)
+
let peek t =
+
(* Parser is not easily peekable without full state save/restore *)
+
(* For now, we don't support peek - could add caching if needed *)
+
if t.finished then None
+
else
+
(* Just call next and the caller will have to deal with it *)
+
next t
+
+
(** Iterate over all events *)
+
let iter f t =
+
let rec loop () =
+
match next t with
+
| None -> ()
+
| Some ev -> f ev; loop ()
+
in
+
loop ()
+
+
(** Fold over all events *)
+
let fold f init t =
+
let rec loop acc =
+
match next t with
+
| None -> acc
+
| Some ev -> loop (f acc ev)
+
in
+
loop init
+
+
(** Convert to list *)
+
let to_list t =
+
fold (fun acc ev -> ev :: acc) [] t |> List.rev
+47
lib/position.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Position tracking for source locations *)
+
+
type t = {
+
index : int; (** Byte offset from start *)
+
line : int; (** 1-indexed line number *)
+
column : int; (** 1-indexed column number *)
+
}
+
+
let initial = { index = 0; line = 1; column = 1 }
+
+
let advance_byte t =
+
{ t with index = t.index + 1; column = t.column + 1 }
+
+
let advance_line t =
+
{ index = t.index + 1; line = t.line + 1; column = 1 }
+
+
let advance_char c t =
+
if c = '\n' then advance_line t
+
else advance_byte t
+
+
let advance_utf8 uchar t =
+
let len = Uchar.utf_8_byte_length uchar in
+
let code = Uchar.to_int uchar in
+
if code = 0x0A (* LF *) then
+
{ index = t.index + len; line = t.line + 1; column = 1 }
+
else
+
{ t with index = t.index + len; column = t.column + 1 }
+
+
let advance_bytes n t =
+
{ t with index = t.index + n; column = t.column + n }
+
+
let pp fmt t =
+
Format.fprintf fmt "line %d, column %d" t.line t.column
+
+
let to_string t =
+
Format.asprintf "%a" pp t
+
+
let compare a b =
+
Int.compare a.index b.index
+
+
let equal a b =
+
a.index = b.index
+62
lib/quoting.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML scalar quoting detection *)
+
+
(** Check if a string value needs quoting in YAML output.
+
Returns true if the string:
+
- Is empty
+
- Starts with an indicator character
+
- Is a reserved word (null, true, false, yes, no, etc.)
+
- Contains characters that would be ambiguous
+
- Looks like a number *)
+
let needs_quoting s =
+
if String.length s = 0 then true
+
else
+
let first = s.[0] in
+
(* Check first character for indicators *)
+
if first = '-' || first = '?' || first = ':' || first = ',' ||
+
first = '[' || first = ']' || first = '{' || first = '}' ||
+
first = '#' || first = '&' || first = '*' || first = '!' ||
+
first = '|' || first = '>' || first = '\'' || first = '"' ||
+
first = '%' || first = '@' || first = '`' || first = ' ' then
+
true
+
else
+
(* Check for reserved/special values *)
+
let lower = String.lowercase_ascii s in
+
if lower = "null" || lower = "true" || lower = "false" ||
+
lower = "yes" || lower = "no" || lower = "on" || lower = "off" ||
+
lower = "~" || lower = ".inf" || lower = "-.inf" || lower = ".nan" then
+
true
+
else
+
(* Check for problematic characters *)
+
try
+
String.iter (fun c ->
+
if c = ':' || c = '#' || c = '\n' || c = '\r' then
+
raise Exit
+
) s;
+
(* Check if it looks like a number *)
+
(try ignore (Float.of_string s); true with _ -> false)
+
with Exit -> true
+
+
(** Check if a string requires double quotes (vs single quotes).
+
Returns true if the string contains characters that need escape sequences. *)
+
let needs_double_quotes s =
+
try
+
String.iter (fun c ->
+
if c = '\n' || c = '\r' || c = '\t' || c = '\\' ||
+
c < ' ' || c = '"' then
+
raise Exit
+
) s;
+
false
+
with Exit -> true
+
+
(** Choose the appropriate quoting style for a string value *)
+
let choose_style s =
+
match (needs_double_quotes s, needs_quoting s) with
+
| (true, _) -> `Double_quoted
+
| (_, true) -> `Single_quoted
+
| _ -> `Plain
+
+66
lib/scalar.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML scalar values with metadata *)
+
+
type t = {
+
anchor : string option;
+
tag : string option;
+
value : string;
+
plain_implicit : bool;
+
quoted_implicit : bool;
+
style : Scalar_style.t;
+
}
+
+
let make
+
?(anchor : string option)
+
?(tag : string option)
+
?(plain_implicit = true)
+
?(quoted_implicit = false)
+
?(style = `Plain)
+
value =
+
{ anchor; tag; value; plain_implicit; quoted_implicit; style }
+
+
let value t = t.value
+
let anchor t = t.anchor
+
let tag t = t.tag
+
let style t = t.style
+
let plain_implicit t = t.plain_implicit
+
let quoted_implicit t = t.quoted_implicit
+
+
let with_anchor anchor t = { t with anchor = Some anchor }
+
let with_tag tag t = { t with tag = Some tag }
+
let with_style style t = { t with style }
+
+
let pp fmt t =
+
Format.fprintf fmt "scalar(%S" t.value;
+
(match t.anchor with
+
| Some a -> Format.fprintf fmt ", anchor=%s" a
+
| None -> ());
+
(match t.tag with
+
| Some tag -> Format.fprintf fmt ", tag=%s" tag
+
| None -> ());
+
Format.fprintf fmt ", style=%a)" Scalar_style.pp t.style
+
+
let equal a b =
+
Option.equal String.equal a.anchor b.anchor &&
+
Option.equal String.equal a.tag b.tag &&
+
String.equal a.value b.value &&
+
a.plain_implicit = b.plain_implicit &&
+
a.quoted_implicit = b.quoted_implicit &&
+
Scalar_style.equal a.style b.style
+
+
let compare a b =
+
let c = Option.compare String.compare a.anchor b.anchor in
+
if c <> 0 then c else
+
let c = Option.compare String.compare a.tag b.tag in
+
if c <> 0 then c else
+
let c = String.compare a.value b.value in
+
if c <> 0 then c else
+
let c = Bool.compare a.plain_implicit b.plain_implicit in
+
if c <> 0 then c else
+
let c = Bool.compare a.quoted_implicit b.quoted_implicit in
+
if c <> 0 then c else
+
Scalar_style.compare a.style b.style
+39
lib/scalar_style.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Scalar formatting styles *)
+
+
type t = [
+
| `Any (** Let emitter choose *)
+
| `Plain (** Unquoted: foo *)
+
| `Single_quoted (** 'foo' *)
+
| `Double_quoted (** "foo" *)
+
| `Literal (** | block *)
+
| `Folded (** > block *)
+
]
+
+
let to_string = function
+
| `Any -> "any"
+
| `Plain -> "plain"
+
| `Single_quoted -> "single-quoted"
+
| `Double_quoted -> "double-quoted"
+
| `Literal -> "literal"
+
| `Folded -> "folded"
+
+
let pp fmt t =
+
Format.pp_print_string fmt (to_string t)
+
+
let equal a b = a = b
+
+
let compare a b =
+
let to_int = function
+
| `Any -> 0
+
| `Plain -> 1
+
| `Single_quoted -> 2
+
| `Double_quoted -> 3
+
| `Literal -> 4
+
| `Folded -> 5
+
in
+
Int.compare (to_int a) (to_int b)
+1575
lib/scanner.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML tokenizer/scanner with lookahead for ambiguity resolution *)
+
+
(** Simple key tracking for mapping key disambiguation *)
+
type simple_key = {
+
sk_possible : bool;
+
sk_required : bool;
+
sk_token_number : int;
+
sk_position : Position.t;
+
}
+
+
(** Indent level tracking *)
+
type indent = {
+
indent : int;
+
needs_block_end : bool;
+
sequence : bool; (** true if this is a sequence indent *)
+
}
+
+
type t = {
+
input : Input.t;
+
mutable tokens : Token.spanned Queue.t;
+
mutable token_number : int;
+
mutable tokens_taken : int;
+
mutable stream_started : bool;
+
mutable stream_ended : bool;
+
mutable indent_stack : indent list; (** Stack of indentation levels *)
+
mutable flow_level : int; (** Nesting depth in \[\] or \{\} *)
+
mutable flow_indent : int; (** Column where outermost flow collection started *)
+
mutable simple_keys : simple_key option list; (** Per flow-level simple key tracking *)
+
mutable allow_simple_key : bool;
+
mutable leading_whitespace : bool; (** True when at start of line (only whitespace seen) *)
+
mutable document_has_content : bool; (** True if we've emitted content tokens in current document *)
+
mutable adjacent_value_allowed_at : Position.t option; (** Position where adjacent : is allowed *)
+
mutable pending_value : bool; (** True if we've emitted a KEY and are waiting for VALUE *)
+
mutable flow_mapping_stack : bool list; (** Stack of whether each flow level is a mapping *)
+
}
+
+
let create input =
+
{
+
input;
+
tokens = Queue.create ();
+
token_number = 0;
+
tokens_taken = 0;
+
stream_started = false;
+
stream_ended = false;
+
indent_stack = [];
+
flow_level = 0;
+
flow_indent = 0;
+
simple_keys = [None]; (* One entry for the base level *)
+
allow_simple_key = true;
+
leading_whitespace = true; (* Start at beginning of stream *)
+
document_has_content = false;
+
adjacent_value_allowed_at = None;
+
pending_value = false;
+
flow_mapping_stack = [];
+
}
+
+
let of_string s = create (Input.of_string s)
+
let of_input = create
+
let of_reader r = create (Input.of_reader r)
+
+
let position t = Input.position t.input
+
+
(** Add a token to the queue *)
+
let emit t span token =
+
Queue.add { Token.token; span } t.tokens;
+
t.token_number <- t.token_number + 1
+
+
(** Get current column (1-indexed) *)
+
let column t = (Input.position t.input).column
+
+
(** Get current indent level *)
+
let current_indent t =
+
match t.indent_stack with
+
| [] -> -1
+
| { indent; _ } :: _ -> indent
+
+
(** Skip whitespace to end of line, checking for valid comments.
+
Returns true if any whitespace (including tabs) was found before a comment. *)
+
let skip_whitespace_and_comment t =
+
let has_whitespace = ref false in
+
(* Skip blanks (spaces and tabs) *)
+
while Input.next_is_blank t.input do
+
has_whitespace := true;
+
ignore (Input.next t.input)
+
done;
+
(* Check for comment *)
+
if Input.next_is (( = ) '#') t.input then begin
+
(* Validate: comment must be preceded by whitespace or be at start of line *)
+
if not !has_whitespace then begin
+
(* Check if we're at the start of input or after whitespace (blank or line break) *)
+
match Input.peek_back t.input with
+
| None -> () (* Start of input - OK *)
+
| Some c when Input.is_whitespace c -> () (* After whitespace - OK *)
+
| _ ->
+
(* Comment not preceded by whitespace - ERROR *)
+
Error.raise_at (Input.mark t.input) Invalid_comment
+
end;
+
(* Skip to end of line *)
+
while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
+
ignore (Input.next t.input)
+
done
+
end
+
+
(** Skip blanks (spaces/tabs) and return (found_tabs, found_spaces) *)
+
let skip_blanks_check_tabs t =
+
let found_tab = ref false in
+
let found_space = ref false in
+
while Input.next_is_blank t.input do
+
(match Input.peek t.input with
+
| Some '\t' -> found_tab := true
+
| Some ' ' -> found_space := true
+
| _ -> ());
+
ignore (Input.next t.input)
+
done;
+
(!found_tab, !found_space)
+
+
(** Skip whitespace and comments, return true if at newline *)
+
let rec skip_to_next_token t =
+
(* Check for tabs used as indentation in block context *)
+
(match Input.peek t.input with
+
| Some '\t' when t.flow_level = 0 && t.leading_whitespace &&
+
(column t - 1) < current_indent t ->
+
(* Tab found in indentation zone - this is invalid *)
+
(* Skip to end of line to check if line has content *)
+
let start_pos = Input.mark t.input in
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
done;
+
(* If we have content on this line with a tab, raise error *)
+
if not (Input.next_is_break t.input) && not (Input.is_eof t.input) then
+
Error.raise_at start_pos Tab_in_indentation
+
| _ -> ());
+
+
(* Skip blanks and validate comments *)
+
skip_whitespace_and_comment t;
+
(* Skip line break in block context *)
+
if t.flow_level = 0 && Input.next_is_break t.input then begin
+
Input.consume_break t.input;
+
t.allow_simple_key <- true;
+
t.leading_whitespace <- true;
+
skip_to_next_token t
+
end
+
else if t.flow_level > 0 && Input.next_is_whitespace t.input then begin
+
(* In flow context, skip all whitespace including line breaks *)
+
if Input.next_is_break t.input then begin
+
Input.consume_break t.input;
+
(* Allow simple keys after line breaks in flow context *)
+
t.allow_simple_key <- true;
+
(* After line break in flow, check for tabs at start of line (Y79Y/03)
+
Tabs are not allowed as indentation - if tab is first char and results
+
in a column less than flow_indent, it's an error *)
+
if Input.next_is (( = ) '\t') t.input then begin
+
(* Tab at start of line in flow context - skip tabs and check position *)
+
let start_mark = Input.mark t.input in
+
while Input.next_is (( = ) '\t') t.input do
+
ignore (Input.next t.input)
+
done;
+
(* If only tabs were used (no spaces) and column < flow_indent, error *)
+
if not (Input.next_is_break t.input) && not (Input.is_eof t.input) &&
+
column t < t.flow_indent then
+
Error.raise_at start_mark Invalid_flow_indentation
+
end;
+
skip_to_next_token t
+
end else begin
+
ignore (Input.next t.input);
+
skip_to_next_token t
+
end
+
end
+
+
(** Roll the indentation level *)
+
let roll_indent t col ~sequence =
+
if t.flow_level = 0 && col > current_indent t then begin
+
t.indent_stack <- { indent = col; needs_block_end = true; sequence } :: t.indent_stack;
+
true
+
end else
+
false
+
+
(** Unroll indentation to given column *)
+
let unroll_indent t col =
+
while t.flow_level = 0 &&
+
match t.indent_stack with
+
| { indent; needs_block_end = true; _ } :: _ when indent > col -> true
+
| _ -> false
+
do
+
match t.indent_stack with
+
| { indent = _; needs_block_end = true; _ } :: rest ->
+
let pos = Input.position t.input in
+
let span = Span.point pos in
+
emit t span Token.Block_end;
+
t.indent_stack <- rest
+
| _ -> ()
+
done
+
+
(** Save a potential simple key *)
+
let save_simple_key t =
+
if t.allow_simple_key then begin
+
(* A simple key is required only if we're in a block context,
+
at the current indentation level, AND the current indent needs a block end.
+
This matches saphyr's logic and prevents false positives for values. *)
+
let required = t.flow_level = 0 &&
+
match t.indent_stack with
+
| { indent; needs_block_end = true; _ } :: _ ->
+
indent = column t
+
| _ -> false
+
in
+
let sk = {
+
sk_possible = true;
+
sk_required = required;
+
sk_token_number = t.token_number;
+
sk_position = Input.position t.input;
+
} in
+
(* Remove any existing simple key at current level *)
+
t.simple_keys <- (
+
match t.simple_keys with
+
| _ :: rest -> Some sk :: rest
+
| [] -> [Some sk]
+
)
+
end
+
+
(** Remove simple key at current level *)
+
let remove_simple_key t =
+
match t.simple_keys with
+
| Some sk :: _rest when sk.sk_required ->
+
Error.raise_at sk.sk_position Expected_key
+
| _ :: rest -> t.simple_keys <- None :: rest
+
| [] -> ()
+
+
(** Stale simple keys that span too many tokens *)
+
let stale_simple_keys t =
+
t.simple_keys <- List.map (fun sk_opt ->
+
match sk_opt with
+
| Some sk when sk.sk_possible &&
+
(Input.position t.input).line > sk.sk_position.line &&
+
t.flow_level = 0 ->
+
if sk.sk_required then
+
Error.raise_at sk.sk_position Expected_key;
+
None
+
| _ -> sk_opt
+
) t.simple_keys
+
+
(** Read anchor or alias name *)
+
let scan_anchor_alias t =
+
let start = Input.mark t.input in
+
let buf = Buffer.create 16 in
+
(* Per YAML 1.2 spec: anchor names can contain any character that is NOT:
+
- Whitespace (space, tab, line breaks)
+
- Flow indicators: []{}
+
- Comma (,)
+
This matches the saphyr implementation: is_yaml_non_space && !is_flow *)
+
while
+
match Input.peek t.input with
+
| Some c when not (Input.is_whitespace c) &&
+
not (Input.is_flow_indicator c) &&
+
c <> '\x00' ->
+
Buffer.add_char buf c;
+
ignore (Input.next t.input);
+
true
+
| _ -> false
+
do () done;
+
let name = Buffer.contents buf in
+
if String.length name = 0 then
+
Error.raise_at start (Invalid_anchor "empty anchor name");
+
(name, Span.make ~start ~stop:(Input.mark t.input))
+
+
(** Scan tag handle *)
+
let scan_tag_handle t =
+
let start = Input.mark t.input in
+
let buf = Buffer.create 16 in
+
(* Expect ! *)
+
(match Input.peek t.input with
+
| Some '!' ->
+
Buffer.add_char buf '!';
+
ignore (Input.next t.input)
+
| _ -> Error.raise_at start (Invalid_tag "expected '!'"));
+
(* Read word chars *)
+
while
+
match Input.peek t.input with
+
| Some c when Input.is_alnum c || c = '-' ->
+
Buffer.add_char buf c;
+
ignore (Input.next t.input);
+
true
+
| _ -> false
+
do () done;
+
(* Check for secondary ! *)
+
(match Input.peek t.input with
+
| Some '!' ->
+
Buffer.add_char buf '!';
+
ignore (Input.next t.input)
+
| _ -> ());
+
Buffer.contents buf
+
+
(** Scan tag suffix (after handle) *)
+
let scan_tag_suffix t =
+
let is_hex_digit c =
+
(c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')
+
in
+
let hex_val c =
+
match c with
+
| '0'..'9' -> Char.code c - Char.code '0'
+
| 'A'..'F' -> Char.code c - Char.code 'A' + 10
+
| 'a'..'f' -> Char.code c - Char.code 'a' + 10
+
| _ -> 0
+
in
+
let buf = Buffer.create 32 in
+
while
+
match Input.peek t.input with
+
| Some '%' ->
+
(* Percent-encoded character *)
+
ignore (Input.next t.input);
+
(match Input.peek t.input, Input.peek_nth t.input 1 with
+
| Some c1, Some c2 when is_hex_digit c1 && is_hex_digit c2 ->
+
ignore (Input.next t.input);
+
ignore (Input.next t.input);
+
let code = (hex_val c1) * 16 + (hex_val c2) in
+
Buffer.add_char buf (Char.chr code);
+
true
+
| _ ->
+
(* Invalid percent encoding - keep the % *)
+
Buffer.add_char buf '%';
+
true)
+
| Some c when not (Input.is_whitespace c) &&
+
not (Input.is_flow_indicator c) ->
+
Buffer.add_char buf c;
+
ignore (Input.next t.input);
+
true
+
| _ -> false
+
do () done;
+
Buffer.contents buf
+
+
(** Scan a tag *)
+
let scan_tag t =
+
let start = Input.mark t.input in
+
ignore (Input.next t.input); (* consume ! *)
+
let handle, suffix =
+
match Input.peek t.input with
+
| Some '<' ->
+
(* Verbatim tag: !<...> - handle is empty, suffix is full URI *)
+
ignore (Input.next t.input);
+
let buf = Buffer.create 32 in
+
while
+
match Input.peek t.input with
+
| Some '>' -> false
+
| Some c ->
+
Buffer.add_char buf c;
+
ignore (Input.next t.input);
+
true
+
| None -> Error.raise_at (Input.mark t.input) (Invalid_tag "unclosed verbatim tag")
+
do () done;
+
ignore (Input.next t.input); (* consume > *)
+
("", Buffer.contents buf)
+
| Some c when Input.is_whitespace c || Input.is_flow_indicator c ->
+
(* Non-specific tag: ! *)
+
("!", "")
+
| Some '!' ->
+
(* Secondary handle: !! *)
+
ignore (Input.next t.input); (* consume second ! *)
+
let suffix = scan_tag_suffix t in
+
("!!", suffix)
+
| _ ->
+
(* Primary handle or just suffix: !foo or !e!foo *)
+
(* Read alphanumeric characters *)
+
let buf = Buffer.create 16 in
+
while
+
match Input.peek t.input with
+
| Some c when Input.is_alnum c || c = '-' ->
+
Buffer.add_char buf c;
+
ignore (Input.next t.input);
+
true
+
| _ -> false
+
do () done;
+
(* Check if next character is ! - if so, this is a named handle *)
+
(match Input.peek t.input with
+
| Some '!' ->
+
(* Named handle like !e! *)
+
ignore (Input.next t.input);
+
let handle_name = Buffer.contents buf in
+
let suffix = scan_tag_suffix t in
+
("!" ^ handle_name ^ "!", suffix)
+
| _ ->
+
(* Just ! followed by suffix *)
+
("!", Buffer.contents buf ^ scan_tag_suffix t))
+
in
+
(* Validate that tag is followed by whitespace, break, or (in flow) flow indicator *)
+
(match Input.peek t.input with
+
| None -> () (* EOF is ok *)
+
| Some c when Input.is_whitespace c || Input.is_break c -> ()
+
| Some c when t.flow_level > 0 && Input.is_flow_indicator c -> ()
+
| _ -> Error.raise_at start (Invalid_tag "expected whitespace or line break after tag"));
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
(handle, suffix, span)
+
+
(** Scan single-quoted scalar *)
+
let scan_single_quoted t =
+
let start = Input.mark t.input in
+
ignore (Input.next t.input); (* consume opening single-quote *)
+
let buf = Buffer.create 64 in
+
let whitespace = Buffer.create 16 in (* Track trailing whitespace *)
+
+
let flush_whitespace () =
+
if Buffer.length whitespace > 0 then begin
+
Buffer.add_buffer buf whitespace;
+
Buffer.clear whitespace
+
end
+
in
+
+
let rec loop () =
+
match Input.peek t.input with
+
| None -> Error.raise_at start Unclosed_single_quote
+
| Some '\'' ->
+
ignore (Input.next t.input);
+
(* Check for escaped quote ('') *)
+
(match Input.peek t.input with
+
| Some '\'' ->
+
flush_whitespace ();
+
Buffer.add_char buf '\'';
+
ignore (Input.next t.input);
+
loop ()
+
| _ ->
+
(* End of string - flush any trailing whitespace *)
+
flush_whitespace ())
+
| Some ' ' | Some '\t' ->
+
(* Track whitespace - don't add to buf yet *)
+
Buffer.add_char whitespace (Option.get (Input.peek t.input));
+
ignore (Input.next t.input);
+
loop ()
+
| Some '\n' | Some '\r' ->
+
(* Discard trailing whitespace before line break *)
+
Buffer.clear whitespace;
+
Input.consume_break t.input;
+
(* Skip leading whitespace on next line *)
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
done;
+
(* Check for document boundary *)
+
if Input.at_document_boundary t.input then
+
Error.raise_at start Unclosed_single_quote;
+
(* Check indentation: continuation must be > block indent (QB6E, DK95) *)
+
let col = column t in
+
let indent = current_indent t in
+
if not (Input.is_eof t.input) && not (Input.next_is_break t.input) && col <= indent && indent >= 0 then
+
Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar");
+
(* Count empty lines (consecutive line breaks) *)
+
let empty_lines = ref 0 in
+
while Input.next_is_break t.input do
+
incr empty_lines;
+
Input.consume_break t.input;
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
done;
+
if Input.at_document_boundary t.input then
+
Error.raise_at start Unclosed_single_quote;
+
(* Check indentation after each empty line too *)
+
let col = column t in
+
let indent = current_indent t in
+
if not (Input.is_eof t.input) && not (Input.next_is_break t.input) && col <= indent && indent >= 0 then
+
Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar")
+
done;
+
(* Apply folding rules *)
+
if !empty_lines > 0 then begin
+
(* Empty lines: preserve as newlines *)
+
for _ = 1 to !empty_lines do
+
Buffer.add_char buf '\n'
+
done
+
end else
+
(* Single break: fold to space (even at start of string) *)
+
Buffer.add_char buf ' ';
+
loop ()
+
| Some c ->
+
flush_whitespace ();
+
Buffer.add_char buf c;
+
ignore (Input.next t.input);
+
loop ()
+
in
+
loop ();
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
(Buffer.contents buf, span)
+
+
(** Decode hex escape of given length *)
+
let decode_hex t len =
+
let start = Input.mark t.input in
+
let buf = Buffer.create len in
+
for _ = 1 to len do
+
match Input.peek t.input with
+
| Some c when Input.is_hex c ->
+
Buffer.add_char buf c;
+
ignore (Input.next t.input)
+
| _ ->
+
Error.raise_at start (Invalid_hex_escape (Buffer.contents buf))
+
done;
+
let code = int_of_string ("0x" ^ Buffer.contents buf) in
+
if code <= 0x7F then
+
String.make 1 (Char.chr code)
+
else if code <= 0x7FF then
+
let b1 = 0xC0 lor (code lsr 6) in
+
let b2 = 0x80 lor (code land 0x3F) in
+
String.init 2 (fun i -> Char.chr (if i = 0 then b1 else b2))
+
else if code <= 0xFFFF then
+
let b1 = 0xE0 lor (code lsr 12) in
+
let b2 = 0x80 lor ((code lsr 6) land 0x3F) in
+
let b3 = 0x80 lor (code land 0x3F) in
+
String.init 3 (fun i -> Char.chr (match i with 0 -> b1 | 1 -> b2 | _ -> b3))
+
else
+
let b1 = 0xF0 lor (code lsr 18) in
+
let b2 = 0x80 lor ((code lsr 12) land 0x3F) in
+
let b3 = 0x80 lor ((code lsr 6) land 0x3F) in
+
let b4 = 0x80 lor (code land 0x3F) in
+
String.init 4 (fun i -> Char.chr (match i with 0 -> b1 | 1 -> b2 | 2 -> b3 | _ -> b4))
+
+
(** Scan double-quoted scalar *)
+
let scan_double_quoted t =
+
let start = Input.mark t.input in
+
ignore (Input.next t.input); (* consume opening double-quote *)
+
let buf = Buffer.create 64 in
+
let whitespace = Buffer.create 16 in (* Track pending whitespace *)
+
+
let flush_whitespace () =
+
if Buffer.length whitespace > 0 then begin
+
Buffer.add_buffer buf whitespace;
+
Buffer.clear whitespace
+
end
+
in
+
+
let rec loop () =
+
match Input.peek t.input with
+
| None -> Error.raise_at start Unclosed_double_quote
+
| Some '"' ->
+
(* Flush trailing whitespace before closing quote to preserve it *)
+
flush_whitespace ();
+
ignore (Input.next t.input)
+
| Some ' ' | Some '\t' as c_opt ->
+
(* Track whitespace - don't add to buf yet *)
+
let c = match c_opt with Some c -> c | None -> assert false in
+
Buffer.add_char whitespace c;
+
ignore (Input.next t.input);
+
loop ()
+
| Some '\\' ->
+
(* Escape sequence - this is non-whitespace content *)
+
flush_whitespace (); (* Commit any pending whitespace *)
+
ignore (Input.next t.input);
+
(match Input.peek t.input with
+
| None -> Error.raise_at start (Invalid_escape_sequence "\\<EOF>")
+
| Some '0' -> Buffer.add_char buf '\x00'; ignore (Input.next t.input)
+
| Some 'a' -> Buffer.add_char buf '\x07'; ignore (Input.next t.input)
+
| Some 'b' -> Buffer.add_char buf '\x08'; ignore (Input.next t.input)
+
| Some 't' | Some '\t' -> Buffer.add_char buf '\t'; ignore (Input.next t.input)
+
| Some 'n' -> Buffer.add_char buf '\n'; ignore (Input.next t.input)
+
| Some 'v' -> Buffer.add_char buf '\x0B'; ignore (Input.next t.input)
+
| Some 'f' -> Buffer.add_char buf '\x0C'; ignore (Input.next t.input)
+
| Some 'r' -> Buffer.add_char buf '\r'; ignore (Input.next t.input)
+
| Some 'e' -> Buffer.add_char buf '\x1B'; ignore (Input.next t.input)
+
| Some ' ' -> Buffer.add_char buf ' '; ignore (Input.next t.input)
+
| Some '"' -> Buffer.add_char buf '"'; ignore (Input.next t.input)
+
| Some '/' -> Buffer.add_char buf '/'; ignore (Input.next t.input)
+
| Some '\\' -> Buffer.add_char buf '\\'; ignore (Input.next t.input)
+
| Some 'N' -> Buffer.add_string buf "\xC2\x85"; ignore (Input.next t.input) (* NEL *)
+
| Some '_' -> Buffer.add_string buf "\xC2\xA0"; ignore (Input.next t.input) (* NBSP *)
+
| Some 'L' -> Buffer.add_string buf "\xE2\x80\xA8"; ignore (Input.next t.input) (* LS *)
+
| Some 'P' -> Buffer.add_string buf "\xE2\x80\xA9"; ignore (Input.next t.input) (* PS *)
+
| Some 'x' ->
+
ignore (Input.next t.input);
+
Buffer.add_string buf (decode_hex t 2)
+
| Some 'u' ->
+
ignore (Input.next t.input);
+
Buffer.add_string buf (decode_hex t 4)
+
| Some 'U' ->
+
ignore (Input.next t.input);
+
Buffer.add_string buf (decode_hex t 8)
+
| Some '\n' | Some '\r' ->
+
(* Line continuation escape *)
+
Input.consume_break t.input;
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
done
+
| Some c ->
+
Error.raise_at (Input.mark t.input)
+
(Invalid_escape_sequence (Printf.sprintf "\\%c" c)));
+
loop ()
+
| Some '\n' | Some '\r' ->
+
(* Line break: discard any pending trailing whitespace *)
+
Buffer.clear whitespace;
+
Input.consume_break t.input;
+
(* Count consecutive line breaks (empty lines) *)
+
let empty_lines = ref 0 in
+
let continue = ref true in
+
let started_with_tab = ref false in
+
while !continue do
+
(* Track if we start with a tab (for DK95/01 check) *)
+
if Input.next_is (( = ) '\t') t.input then started_with_tab := true;
+
(* Skip blanks (spaces/tabs) on the line *)
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
done;
+
(* Check if we hit another line break (empty line) *)
+
if Input.next_is_break t.input then begin
+
Input.consume_break t.input;
+
incr empty_lines;
+
started_with_tab := false (* Reset for next line *)
+
end else
+
continue := false
+
done;
+
(* Check for document boundary - this terminates the quoted string *)
+
if Input.at_document_boundary t.input then
+
Error.raise_at start Unclosed_double_quote;
+
(* Check indentation: continuation must be > block indent (QB6E, DK95)
+
Note: must be strictly greater than block indent, not just equal *)
+
let col = column t in
+
let indent = current_indent t in
+
let start_col = start.column in
+
(* DK95/01: if continuation started with tabs and column < start column, error *)
+
if not (Input.is_eof t.input) && !started_with_tab && col < start_col then
+
Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar");
+
if not (Input.is_eof t.input) && col <= indent && indent >= 0 then
+
Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar");
+
(* Per YAML spec: single break = space, break + empty lines = newlines *)
+
if !empty_lines > 0 then begin
+
(* Empty lines: output N newlines where N = number of empty lines *)
+
for _ = 1 to !empty_lines do
+
Buffer.add_char buf '\n'
+
done
+
end else
+
(* Single break folds to space *)
+
Buffer.add_char buf ' ';
+
loop ()
+
| Some c ->
+
(* Non-whitespace character *)
+
flush_whitespace (); (* Commit any pending whitespace *)
+
Buffer.add_char buf c;
+
ignore (Input.next t.input);
+
loop ()
+
in
+
loop ();
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
(Buffer.contents buf, span)
+
+
(** Check if character can appear in plain scalar at this position *)
+
let can_continue_plain t c ~in_flow =
+
match c with
+
| ':' ->
+
(* : is OK if not followed by whitespace or flow indicator *)
+
(match Input.peek_nth t.input 1 with
+
| None -> true
+
| Some c2 when Input.is_whitespace c2 -> false
+
| Some c2 when in_flow && Input.is_flow_indicator c2 -> false
+
| _ -> true)
+
| '#' ->
+
(* # is a comment indicator only if preceded by whitespace *)
+
(* Check the previous character to determine if this is a comment *)
+
(match Input.peek_back t.input with
+
| None -> true (* At start - can't be comment indicator, allow it *)
+
| Some c when Input.is_whitespace c -> false (* Preceded by whitespace - comment *)
+
| Some c when Input.is_break c -> false (* At start of line - comment *)
+
| _ -> true) (* Not preceded by whitespace - part of scalar *)
+
| c when in_flow && Input.is_flow_indicator c -> false
+
| _ when Input.is_break c -> false
+
| _ -> true
+
+
(** Scan plain scalar *)
+
let scan_plain_scalar t =
+
let start = Input.mark t.input in
+
let in_flow = t.flow_level > 0 in
+
let indent = current_indent t in
+
(* In flow context, scalars must be indented more than the current block indent.
+
This ensures that content at block indent or less ends the flow context. *)
+
if in_flow && (column t - 1) < indent then
+
Error.raise_at start Invalid_flow_indentation;
+
let buf = Buffer.create 64 in
+
let spaces = Buffer.create 16 in
+
let whitespace = Buffer.create 16 in (* Track whitespace within a line *)
+
let leading_blanks = ref false in
+
+
let rec scan_line () =
+
match Input.peek t.input with
+
| None -> ()
+
| Some c when Input.is_blank c && can_continue_plain t c ~in_flow ->
+
(* Blank character within a line - save to whitespace buffer *)
+
Buffer.add_char whitespace c;
+
ignore (Input.next t.input);
+
scan_line ()
+
| Some c when can_continue_plain t c ~in_flow ->
+
(* Non-blank character - process any pending breaks/whitespace first *)
+
begin
+
if Buffer.length spaces > 0 then begin
+
if !leading_blanks then begin
+
(* Fold line break *)
+
if Buffer.contents spaces = "\n" then
+
Buffer.add_char buf ' '
+
else begin
+
(* Multiple breaks - preserve all but first *)
+
let s = Buffer.contents spaces in
+
Buffer.add_substring buf s 1 (String.length s - 1)
+
end
+
end else
+
Buffer.add_buffer buf spaces;
+
Buffer.clear spaces
+
end;
+
(* Add any pending whitespace from within the line *)
+
if Buffer.length whitespace > 0 then begin
+
Buffer.add_buffer buf whitespace;
+
Buffer.clear whitespace
+
end;
+
(* Add the character *)
+
Buffer.add_char buf c;
+
ignore (Input.next t.input);
+
leading_blanks := false;
+
scan_line ()
+
end
+
| _ -> ()
+
in
+
+
let rec scan_lines () =
+
scan_line ();
+
(* Check for line continuation *)
+
if Input.next_is_break t.input then begin
+
(* Discard any trailing whitespace from the current line *)
+
Buffer.clear whitespace;
+
(* Save the line break *)
+
if !leading_blanks then begin
+
(* We already had a break - this is an additional break (empty line) *)
+
Buffer.add_char spaces '\n'
+
end else begin
+
(* First line break *)
+
Buffer.clear spaces;
+
Buffer.add_char spaces '\n';
+
leading_blanks := true
+
end;
+
Input.consume_break t.input;
+
(* Note: We do NOT set allow_simple_key here during plain scalar scanning.
+
Setting it here would incorrectly allow ':' that appears on a continuation
+
line to become a mapping indicator. The flag will be set properly after
+
the scalar ends and skip_to_next_token processes line breaks. *)
+
(* Skip leading blanks on the next line *)
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
done;
+
let col = (Input.position t.input).column in
+
(* Check indentation - stop if we're at or before the containing block's indent *)
+
(* However, allow empty lines (line breaks) to continue even if dedented *)
+
if Input.next_is_break t.input then
+
scan_lines () (* Empty line - continue *)
+
else if not in_flow && col <= indent then
+
() (* Stop - dedented or at parent level in block context *)
+
else if Input.at_document_boundary t.input then
+
() (* Stop - document boundary *)
+
else
+
scan_lines ()
+
end
+
in
+
+
scan_lines ();
+
let value = Buffer.contents buf in
+
(* Trim trailing whitespace (spaces and tabs) *)
+
let value =
+
let len = String.length value in
+
let rec find_end i =
+
if i < 0 then 0
+
else match value.[i] with
+
| ' ' | '\t' -> find_end (i - 1)
+
| _ -> i + 1
+
in
+
let end_pos = find_end (len - 1) in
+
String.sub value 0 end_pos
+
in
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
(* Return value, span, and whether we ended with leading blanks (crossed a line break) *)
+
(value, span, !leading_blanks)
+
+
(** Scan block scalar (literal | or folded >) *)
+
let scan_block_scalar t literal =
+
let start = Input.mark t.input in
+
ignore (Input.next t.input); (* consume | or > *)
+
+
(* Parse header: optional indentation indicator and chomping *)
+
let explicit_indent = ref None in
+
let chomping = ref Chomping.Clip in
+
+
(* First character of header *)
+
(match Input.peek t.input with
+
| Some c when Input.is_digit c && c <> '0' ->
+
explicit_indent := Some (Char.code c - Char.code '0');
+
ignore (Input.next t.input)
+
| Some '-' -> chomping := Chomping.Strip; ignore (Input.next t.input)
+
| Some '+' -> chomping := Chomping.Keep; ignore (Input.next t.input)
+
| _ -> ());
+
+
(* Second character of header *)
+
(match Input.peek t.input with
+
| Some c when Input.is_digit c && c <> '0' && !explicit_indent = None ->
+
explicit_indent := Some (Char.code c - Char.code '0');
+
ignore (Input.next t.input)
+
| Some '-' when !chomping = Chomping.Clip ->
+
chomping := Chomping.Strip; ignore (Input.next t.input)
+
| Some '+' when !chomping = Chomping.Clip ->
+
chomping := Chomping.Keep; ignore (Input.next t.input)
+
| _ -> ());
+
+
(* Skip whitespace and optional comment *)
+
skip_whitespace_and_comment t;
+
+
(* Consume line break *)
+
if Input.next_is_break t.input then
+
Input.consume_break t.input
+
else if not (Input.is_eof t.input) then
+
Error.raise_at (Input.mark t.input)
+
(Invalid_block_scalar_header "expected newline after header");
+
+
let base_indent = current_indent t in
+
(* base_indent is the indent level from the stack, -1 if empty.
+
It's used directly for comparisons in implicit indent case. *)
+
let content_indent = ref (
+
match !explicit_indent with
+
| Some n ->
+
(* Explicit indent: base_indent is 1-indexed column, convert to 0-indexed.
+
content_indent = (base_indent - 1) + n, but at least n for document level. *)
+
let base_level = max 0 (base_indent - 1) in
+
base_level + n
+
| None -> 0 (* Will be determined by first non-empty line *)
+
) in
+
+
let buf = Buffer.create 256 in
+
let trailing_breaks = Buffer.create 16 in
+
let leading_blank = ref false in (* Was the previous line "more indented"? *)
+
let max_empty_line_indent = ref 0 in (* Track max indent of empty lines before first content *)
+
+
(* Skip to content indentation, skipping empty lines.
+
Returns the number of spaces actually skipped (important for detecting dedentation). *)
+
let rec skip_to_content_indent () =
+
if !content_indent > 0 then begin
+
(* Explicit indent - skip up to content_indent spaces *)
+
let spaces_skipped = ref 0 in
+
while !spaces_skipped < !content_indent && Input.next_is (( = ) ' ') t.input do
+
incr spaces_skipped;
+
ignore (Input.next t.input)
+
done;
+
+
(* Check if this line is empty (only spaces/tabs until break/eof) *)
+
if Input.next_is_break t.input then begin
+
(* Empty line - record the break and continue *)
+
Buffer.add_char trailing_breaks '\n';
+
Input.consume_break t.input;
+
skip_to_content_indent ()
+
end else if !spaces_skipped < !content_indent then begin
+
(* Line starts with fewer spaces than content_indent - dedented *)
+
!spaces_skipped
+
end else if Input.next_is_blank t.input then begin
+
(* Line has spaces/tabs beyond content_indent - could be whitespace content or empty line.
+
For literal scalars, whitespace-only lines ARE content (not empty).
+
For folded scalars, whitespace-only lines that are "more indented" are preserved. *)
+
if literal then
+
(* Literal: whitespace beyond content_indent is content, let read_lines handle it *)
+
!content_indent
+
else begin
+
(* Folded: check if rest is only blanks *)
+
let idx = ref 0 in
+
while match Input.peek_nth t.input !idx with
+
| Some c when Input.is_blank c -> incr idx; true
+
| _ -> false
+
do () done;
+
match Input.peek_nth t.input (!idx) with
+
| None | Some '\n' | Some '\r' ->
+
(* Empty/whitespace-only line in folded - skip spaces *)
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
done;
+
Buffer.add_char trailing_breaks '\n';
+
Input.consume_break t.input;
+
skip_to_content_indent ()
+
| _ ->
+
(* Has non-whitespace content *)
+
!content_indent
+
end
+
end else
+
!content_indent
+
end else begin
+
(* Implicit indent - skip empty lines without consuming spaces.
+
Note: Only SPACES count as indentation. Tabs are content, not indentation.
+
So we only check for spaces when determining if a line is "empty". *)
+
if Input.next_is_break t.input then begin
+
Buffer.add_char trailing_breaks '\n';
+
Input.consume_break t.input;
+
skip_to_content_indent ()
+
end else if Input.next_is (( = ) ' ') t.input then begin
+
(* Check if line is empty (only spaces before break) *)
+
let idx = ref 0 in
+
while match Input.peek_nth t.input !idx with
+
| Some ' ' -> incr idx; true
+
| _ -> false
+
do () done;
+
match Input.peek_nth t.input (!idx) with
+
| None | Some '\n' | Some '\r' ->
+
(* Line has only spaces - empty line *)
+
(* Track max indent of empty lines for later validation *)
+
if !idx > !max_empty_line_indent then
+
max_empty_line_indent := !idx;
+
while Input.next_is (( = ) ' ') t.input do
+
ignore (Input.next t.input)
+
done;
+
Buffer.add_char trailing_breaks '\n';
+
Input.consume_break t.input;
+
skip_to_content_indent ()
+
| _ ->
+
(* Has content (including tabs which are content, not indentation) *)
+
0
+
end else if Input.next_is (( = ) '\t') t.input then begin
+
(* Tab at start of line in implicit indent mode - this is an error (Y79Y)
+
because tabs cannot be used as indentation in YAML *)
+
Error.raise_at (Input.mark t.input) Tab_in_indentation
+
end else
+
(* Not at break or space - other content character *)
+
0
+
end
+
in
+
+
(* Read content *)
+
let rec read_lines () =
+
let spaces_skipped = skip_to_content_indent () in
+
+
(* Check if we're at content *)
+
if Input.is_eof t.input then ()
+
else if Input.at_document_boundary t.input then ()
+
else begin
+
(* Count additional leading spaces beyond what was skipped *)
+
let extra_spaces = ref 0 in
+
while Input.next_is (( = ) ' ') t.input do
+
incr extra_spaces;
+
ignore (Input.next t.input)
+
done;
+
+
(* Calculate actual line indentation *)
+
let line_indent = spaces_skipped + !extra_spaces in
+
+
(* Determine content indent from first content line (implicit case) *)
+
let first_line = !content_indent = 0 in
+
(* base_indent is 1-indexed column, convert to 0-indexed for comparison with line_indent.
+
If base_indent = -1 (empty stack), then base_level = -1 means col 0 is valid. *)
+
let base_level = base_indent - 1 in
+
let should_process =
+
if !content_indent = 0 then begin
+
(* For implicit indent, content must be more indented than base_level. *)
+
if line_indent <= base_level then
+
false (* No content - first line not indented enough *)
+
else begin
+
(* Validate: first content line must be indented at least as much as
+
the maximum indent seen on empty lines before it (5LLU, S98Z, W9L4) *)
+
if line_indent < !max_empty_line_indent && line_indent > base_level then
+
Error.raise_at (Input.mark t.input)
+
(Invalid_block_scalar_header "wrongly indented line in block scalar");
+
content_indent := line_indent;
+
true
+
end
+
end else if line_indent < !content_indent then
+
false (* Dedented - done with content *)
+
else
+
true
+
in
+
+
if should_process then begin
+
(* Check if current line is "more indented" (has extra indent or starts with whitespace).
+
For folded scalars, lines that start with any whitespace (space or tab) after the
+
content indentation are "more indented" and preserve breaks.
+
Note: we check Input.next_is_blank BEFORE reading content to see if content starts with whitespace. *)
+
let trailing_blank = line_indent > !content_indent || Input.next_is_blank t.input in
+
+
(* Add trailing breaks to buffer *)
+
if Buffer.length buf > 0 then begin
+
if Buffer.length trailing_breaks > 0 then begin
+
if literal then
+
Buffer.add_buffer buf trailing_breaks
+
else begin
+
(* Folded scalar: fold only if both previous and current lines are not more-indented *)
+
if not !leading_blank && not trailing_blank then begin
+
let breaks = Buffer.contents trailing_breaks in
+
if String.length breaks = 1 then
+
Buffer.add_char buf ' '
+
else
+
Buffer.add_substring buf breaks 1 (String.length breaks - 1)
+
end else begin
+
(* Preserve breaks for more-indented lines *)
+
Buffer.add_buffer buf trailing_breaks
+
end
+
end
+
end else if not literal then
+
Buffer.add_char buf ' '
+
end else
+
Buffer.add_buffer buf trailing_breaks;
+
Buffer.clear trailing_breaks;
+
+
(* Add extra indentation for literal or more-indented folded lines *)
+
(* On the first line (when determining content_indent), we've already consumed all spaces,
+
so we should NOT add any back. On subsequent lines, we add only the spaces beyond content_indent. *)
+
if not first_line && (literal || (!extra_spaces > 0 && not literal)) then begin
+
for _ = 1 to !extra_spaces do
+
Buffer.add_char buf ' '
+
done
+
end;
+
+
(* Read line content *)
+
while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
+
Buffer.add_char buf (Input.next_exn t.input)
+
done;
+
+
(* Record trailing break *)
+
if Input.next_is_break t.input then begin
+
Buffer.add_char trailing_breaks '\n';
+
Input.consume_break t.input
+
end;
+
+
(* Update leading_blank for next iteration *)
+
leading_blank := trailing_blank;
+
+
read_lines ()
+
end
+
end
+
in
+
+
read_lines ();
+
+
(* Apply chomping *)
+
let value =
+
let content = Buffer.contents buf in
+
match !chomping with
+
| Chomping.Strip -> content
+
| Chomping.Clip ->
+
if String.length content > 0 then content ^ "\n" else content
+
| Chomping.Keep ->
+
content ^ Buffer.contents trailing_breaks
+
in
+
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
let style = if literal then `Literal else `Folded in
+
(value, style, span)
+
+
(** Scan directive (after %) *)
+
let scan_directive t =
+
let start = Input.mark t.input in
+
ignore (Input.next t.input); (* consume % *)
+
+
(* Read directive name *)
+
let name_buf = Buffer.create 16 in
+
while
+
match Input.peek t.input with
+
| Some c when Input.is_alnum c || c = '-' ->
+
Buffer.add_char name_buf c;
+
ignore (Input.next t.input);
+
true
+
| _ -> false
+
do () done;
+
let name = Buffer.contents name_buf in
+
+
(* Skip blanks *)
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
done;
+
+
match name with
+
| "YAML" ->
+
(* Version directive: %YAML 1.2 *)
+
let major = ref 0 in
+
let minor = ref 0 in
+
(* Read major version *)
+
while Input.next_is_digit t.input do
+
major := !major * 10 + (Char.code (Input.next_exn t.input) - Char.code '0')
+
done;
+
(* Expect . *)
+
(match Input.peek t.input with
+
| Some '.' -> ignore (Input.next t.input)
+
| _ -> Error.raise_at (Input.mark t.input) (Invalid_yaml_version "expected '.'"));
+
(* Read minor version *)
+
while Input.next_is_digit t.input do
+
minor := !minor * 10 + (Char.code (Input.next_exn t.input) - Char.code '0')
+
done;
+
(* Validate: only whitespace and comments allowed before line break (MUS6) *)
+
skip_whitespace_and_comment t;
+
if not (Input.next_is_break t.input) && not (Input.is_eof t.input) then
+
Error.raise_at (Input.mark t.input) (Invalid_directive "expected comment or line break after version");
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
Token.Version_directive { major = !major; minor = !minor }, span
+
+
| "TAG" ->
+
(* Tag directive: %TAG !foo! tag:example.com,2000: *)
+
let handle = scan_tag_handle t in
+
(* Skip blanks *)
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
done;
+
(* Read prefix *)
+
let prefix_buf = Buffer.create 32 in
+
while
+
match Input.peek t.input with
+
| Some c when not (Input.is_whitespace c) ->
+
Buffer.add_char prefix_buf c;
+
ignore (Input.next t.input);
+
true
+
| _ -> false
+
do () done;
+
let prefix = Buffer.contents prefix_buf in
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
Token.Tag_directive { handle; prefix }, span
+
+
| _ ->
+
(* Reserved/Unknown directive - skip to end of line and ignore *)
+
(* Per YAML spec, reserved directives should be ignored with a warning *)
+
while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
+
ignore (Input.next t.input)
+
done;
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
(* Return an empty tag directive token to indicate directive was processed but ignored *)
+
Token.Tag_directive { handle = ""; prefix = "" }, span
+
+
(** Fetch the next token(s) into the queue *)
+
let rec fetch_next_token t =
+
skip_to_next_token t;
+
stale_simple_keys t;
+
let col = column t in
+
(* Unroll indents that are deeper than current column.
+
Note: we use col, not col-1, to allow entries at the same level. *)
+
unroll_indent t col;
+
+
(* We're about to process actual content, not leading whitespace *)
+
t.leading_whitespace <- false;
+
+
if Input.is_eof t.input then
+
fetch_stream_end t
+
else if Input.at_document_boundary t.input then
+
fetch_document_indicator t
+
else begin
+
match Input.peek t.input with
+
| None -> fetch_stream_end t
+
| Some '%' when (Input.position t.input).column = 1 ->
+
fetch_directive t
+
| Some '[' -> fetch_flow_collection_start t Token.Flow_sequence_start
+
| Some '{' -> fetch_flow_collection_start t Token.Flow_mapping_start
+
| Some ']' -> fetch_flow_collection_end t Token.Flow_sequence_end
+
| Some '}' -> fetch_flow_collection_end t Token.Flow_mapping_end
+
| Some ',' -> fetch_flow_entry t
+
| Some '-' when t.flow_level = 0 && check_block_entry t ->
+
fetch_block_entry t
+
| Some '?' when check_key t ->
+
fetch_key t
+
| Some ':' when check_value t ->
+
fetch_value t
+
| Some '*' -> fetch_alias t
+
| Some '&' -> fetch_anchor t
+
| Some '!' -> fetch_tag t
+
| Some '|' when t.flow_level = 0 -> fetch_block_scalar t true
+
| Some '>' when t.flow_level = 0 -> fetch_block_scalar t false
+
| Some '\'' -> fetch_single_quoted t
+
| Some '"' -> fetch_double_quoted t
+
| Some '-' when can_start_plain t ->
+
fetch_plain_scalar t
+
| Some '?' when can_start_plain t ->
+
fetch_plain_scalar t
+
| Some ':' when can_start_plain t ->
+
fetch_plain_scalar t
+
| Some c when can_start_plain_char c t ->
+
fetch_plain_scalar t
+
| Some c ->
+
Error.raise_at (Input.mark t.input) (Unexpected_character c)
+
end
+
+
and fetch_stream_end t =
+
if not t.stream_ended then begin
+
unroll_indent t (-1);
+
remove_simple_key t;
+
t.allow_simple_key <- false;
+
t.stream_ended <- true;
+
let span = Span.point (Input.mark t.input) in
+
emit t span Token.Stream_end
+
end
+
+
and fetch_document_indicator t =
+
unroll_indent t (-1);
+
remove_simple_key t;
+
t.allow_simple_key <- false;
+
let start = Input.mark t.input in
+
let indicator = Input.peek_string t.input 3 in
+
Input.skip t.input 3;
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
let token = if indicator = "---" then Token.Document_start else Token.Document_end in
+
(* Reset document content flag after document end marker *)
+
if indicator = "..." then begin
+
t.document_has_content <- false;
+
(* After document end marker, skip whitespace and check for end of line or comment *)
+
while Input.next_is_blank t.input do ignore (Input.next t.input) done;
+
(match Input.peek t.input with
+
| None -> () (* EOF is ok *)
+
| Some c when Input.is_break c -> ()
+
| Some '#' -> () (* Comment is ok *)
+
| _ -> Error.raise_at start (Invalid_directive "content not allowed after document end marker on same line"))
+
end;
+
emit t span token
+
+
and fetch_directive t =
+
(* Directives can only appear:
+
1. At stream start (before any document content)
+
2. After a document end marker (...)
+
If we've emitted content in the current document, we need a document end marker first *)
+
if t.document_has_content then
+
Error.raise_at (Input.mark t.input)
+
(Unexpected_token "directives must be separated from document content by document end marker (...)");
+
unroll_indent t (-1);
+
remove_simple_key t;
+
t.allow_simple_key <- false;
+
let token, span = scan_directive t in
+
emit t span token
+
+
and fetch_flow_collection_start t token_type =
+
save_simple_key t;
+
(* Record indent of outermost flow collection *)
+
if t.flow_level = 0 then
+
t.flow_indent <- column t;
+
t.flow_level <- t.flow_level + 1;
+
(* Track whether this is a mapping or sequence *)
+
let is_mapping = (token_type = Token.Flow_mapping_start) in
+
t.flow_mapping_stack <- is_mapping :: t.flow_mapping_stack;
+
t.allow_simple_key <- true;
+
t.simple_keys <- None :: t.simple_keys;
+
t.document_has_content <- true;
+
let start = Input.mark t.input in
+
ignore (Input.next t.input);
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
emit t span token_type
+
+
and fetch_flow_collection_end t token_type =
+
remove_simple_key t;
+
t.flow_level <- t.flow_level - 1;
+
t.flow_mapping_stack <- (match t.flow_mapping_stack with _ :: rest -> rest | [] -> []);
+
t.simple_keys <- (match t.simple_keys with _ :: rest -> rest | [] -> []);
+
t.allow_simple_key <- false;
+
let start = Input.mark t.input in
+
ignore (Input.next t.input);
+
(* Allow adjacent values after flow collection ends *)
+
if t.flow_level > 0 then
+
t.adjacent_value_allowed_at <- Some (Input.position t.input);
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
emit t span token_type
+
+
and fetch_flow_entry t =
+
remove_simple_key t;
+
t.allow_simple_key <- true;
+
let start = Input.mark t.input in
+
ignore (Input.next t.input);
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
emit t span Token.Flow_entry
+
+
and check_block_entry t =
+
(* - followed by whitespace or EOF *)
+
match Input.peek_nth t.input 1 with
+
| None -> true
+
| Some c -> Input.is_whitespace c
+
+
and fetch_block_entry t =
+
if t.flow_level = 0 then begin
+
(* Block entries require allow_simple_key to be true.
+
This prevents block sequences on the same line as a mapping value,
+
e.g., "key: - a" is invalid. *)
+
if not t.allow_simple_key then
+
Error.raise_at (Input.mark t.input) Block_sequence_disallowed;
+
let col = column t in
+
if roll_indent t col ~sequence:true then begin
+
let span = Span.point (Input.mark t.input) in
+
emit t span Token.Block_sequence_start
+
end
+
end;
+
remove_simple_key t;
+
t.allow_simple_key <- true;
+
t.document_has_content <- true;
+
let start = Input.mark t.input in
+
ignore (Input.next t.input);
+
+
(* Check for tabs after - : pattern like -\t- is invalid *)
+
let (found_tabs, _found_spaces) = skip_blanks_check_tabs t in
+
if found_tabs then begin
+
(* If we found tabs and next char is - followed by whitespace, error *)
+
match Input.peek t.input with
+
| Some '-' ->
+
(match Input.peek_nth t.input 1 with
+
| None -> Error.raise_at start Tab_in_indentation
+
| Some c when Input.is_whitespace c ->
+
Error.raise_at start Tab_in_indentation
+
| Some _ -> ())
+
| _ -> ()
+
end;
+
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
emit t span Token.Block_entry
+
+
and check_key t =
+
(* ? followed by whitespace or flow indicator in both block and flow *)
+
match Input.peek_nth t.input 1 with
+
| None -> true
+
| Some c ->
+
Input.is_whitespace c ||
+
(t.flow_level > 0 && Input.is_flow_indicator c)
+
+
and fetch_key t =
+
if t.flow_level = 0 then begin
+
if not t.allow_simple_key then
+
Error.raise_at (Input.mark t.input) Expected_key;
+
let col = column t in
+
if roll_indent t col ~sequence:false then begin
+
let span = Span.point (Input.mark t.input) in
+
emit t span Token.Block_mapping_start
+
end
+
end;
+
remove_simple_key t;
+
t.allow_simple_key <- t.flow_level = 0;
+
t.document_has_content <- true;
+
let start = Input.mark t.input in
+
ignore (Input.next t.input);
+
+
(* Check for tabs after ? : pattern like ?\t- or ?\tkey is invalid *)
+
let (found_tabs, _found_spaces) = skip_blanks_check_tabs t in
+
if found_tabs && t.flow_level = 0 then begin
+
(* In block context, tabs after ? are not allowed *)
+
Error.raise_at start Tab_in_indentation
+
end;
+
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
emit t span Token.Key;
+
t.pending_value <- true (* We've emitted a KEY, now waiting for VALUE *)
+
+
and check_value t =
+
(* : followed by whitespace in block, or whitespace/flow indicator in flow, or adjacent value *)
+
match Input.peek_nth t.input 1 with
+
| None -> true
+
| Some c ->
+
Input.is_whitespace c ||
+
(t.flow_level > 0 && Input.is_flow_indicator c) ||
+
(* Allow adjacent values in flow context at designated positions *)
+
(t.flow_level > 0 &&
+
match t.adjacent_value_allowed_at with
+
| Some pos -> pos.Position.line = (Input.position t.input).Position.line &&
+
pos.Position.column = (Input.position t.input).Position.column
+
| None -> false)
+
+
and fetch_value t =
+
let start = Input.mark t.input in
+
(* Check for simple key *)
+
let used_simple_key =
+
match t.simple_keys with
+
| Some sk :: _ when sk.sk_possible ->
+
(* In implicit flow mapping (inside a flow sequence), key and : must be on the same line.
+
In explicit flow mapping { }, key and : can span lines. *)
+
let is_implicit_flow_mapping = match t.flow_mapping_stack with
+
| false :: _ -> true (* false = we're in a sequence, so any mapping is implicit *)
+
| _ -> false
+
in
+
if is_implicit_flow_mapping && sk.sk_position.line < (Input.position t.input).line then
+
Error.raise_at start Illegal_flow_key_line;
+
(* Insert KEY token before the simple key value *)
+
let key_span = Span.point sk.sk_position in
+
let key_token = { Token.token = Token.Key; span = key_span } in
+
(* We need to insert at the right position *)
+
let tokens = Queue.to_seq t.tokens |> Array.of_seq in
+
Queue.clear t.tokens;
+
let insert_pos = sk.sk_token_number - t.tokens_taken in
+
Array.iteri (fun i tok ->
+
if i = insert_pos then Queue.add key_token t.tokens;
+
Queue.add tok t.tokens
+
) tokens;
+
if insert_pos >= Array.length tokens then
+
Queue.add key_token t.tokens;
+
t.token_number <- t.token_number + 1;
+
t.pending_value <- true; (* We've inserted a KEY token, now waiting for VALUE *)
+
(* Roll indent for implicit block mapping *)
+
if t.flow_level = 0 then begin
+
let col = sk.sk_position.column in
+
if roll_indent t col ~sequence:false then begin
+
let span = Span.point sk.sk_position in
+
(* Insert block mapping start before key *)
+
let bm_token = { Token.token = Token.Block_mapping_start; span } in
+
let tokens = Queue.to_seq t.tokens |> Array.of_seq in
+
Queue.clear t.tokens;
+
Array.iteri (fun i tok ->
+
if i = insert_pos then Queue.add bm_token t.tokens;
+
Queue.add tok t.tokens
+
) tokens;
+
if insert_pos >= Array.length tokens then
+
Queue.add bm_token t.tokens;
+
t.token_number <- t.token_number + 1
+
end
+
end;
+
t.simple_keys <- None :: (List.tl t.simple_keys);
+
true
+
| _ ->
+
(* No simple key - this is a complex value (or empty key) *)
+
if t.flow_level = 0 then begin
+
if not t.allow_simple_key then
+
Error.raise_at (Input.mark t.input) Expected_key;
+
let col = column t in
+
if roll_indent t col ~sequence:false then begin
+
let span = Span.point (Input.mark t.input) in
+
emit t span Token.Block_mapping_start
+
end
+
(* Note: We don't emit KEY here. Empty key handling is done by the parser,
+
which emits empty scalar when it sees VALUE without preceding KEY. *)
+
end;
+
false
+
in
+
remove_simple_key t;
+
(* In block context without simple key, allow simple keys for compact mappings like ": moon: white"
+
In flow context or after using a simple key, disallow simple keys *)
+
t.allow_simple_key <- (not used_simple_key) && (t.flow_level = 0);
+
t.document_has_content <- true;
+
let start = Input.mark t.input in
+
ignore (Input.next t.input);
+
+
(* Check for tabs after : : patterns like :\t- or :\tkey: are invalid in block context (Y79Y/09)
+
However, :\t bar (tab followed by space then content) is valid (6BCT) *)
+
let (found_tabs, found_spaces) = skip_blanks_check_tabs t in
+
if found_tabs && not found_spaces && t.flow_level = 0 then begin
+
(* In block context, tabs-only after : followed by indicator or alphanumeric are not allowed *)
+
match Input.peek t.input with
+
| Some ('-' | '?') ->
+
Error.raise_at start Tab_in_indentation
+
| Some c when (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') ->
+
(* Tab-only followed by alphanumeric - likely a key, which is invalid *)
+
Error.raise_at start Tab_in_indentation
+
| _ -> ()
+
end;
+
+
(* Skip any comment that may follow the colon and whitespace *)
+
skip_whitespace_and_comment t;
+
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
+
emit t span Token.Value;
+
t.pending_value <- false (* We've emitted a VALUE, no longer pending *)
+
+
and fetch_alias t =
+
save_simple_key t;
+
t.allow_simple_key <- false;
+
t.document_has_content <- true;
+
let start = Input.mark t.input in
+
ignore (Input.next t.input); (* consume * *)
+
let name, span = scan_anchor_alias t in
+
let span = Span.make ~start ~stop:span.stop in
+
emit t span (Token.Alias name)
+
+
and fetch_anchor t =
+
save_simple_key t;
+
t.allow_simple_key <- false;
+
t.document_has_content <- true;
+
let start = Input.mark t.input in
+
ignore (Input.next t.input); (* consume & *)
+
let name, span = scan_anchor_alias t in
+
let span = Span.make ~start ~stop:span.stop in
+
emit t span (Token.Anchor name)
+
+
and fetch_tag t =
+
save_simple_key t;
+
t.allow_simple_key <- false;
+
t.document_has_content <- true;
+
let handle, suffix, span = scan_tag t in
+
emit t span (Token.Tag { handle; suffix })
+
+
and fetch_block_scalar t literal =
+
remove_simple_key t;
+
t.allow_simple_key <- true;
+
t.document_has_content <- true;
+
let value, style, span = scan_block_scalar t literal in
+
emit t span (Token.Scalar { style; value })
+
+
and fetch_single_quoted t =
+
save_simple_key t;
+
t.allow_simple_key <- false;
+
t.document_has_content <- true;
+
let value, span = scan_single_quoted t in
+
(* Allow adjacent values after quoted scalars in flow context (for JSON compatibility) *)
+
skip_to_next_token t;
+
if t.flow_level > 0 then
+
t.adjacent_value_allowed_at <- Some (Input.position t.input);
+
emit t span (Token.Scalar { style = `Single_quoted; value })
+
+
and fetch_double_quoted t =
+
save_simple_key t;
+
t.allow_simple_key <- false;
+
t.document_has_content <- true;
+
let value, span = scan_double_quoted t in
+
(* Allow adjacent values after quoted scalars in flow context (for JSON compatibility) *)
+
skip_to_next_token t;
+
if t.flow_level > 0 then
+
t.adjacent_value_allowed_at <- Some (Input.position t.input);
+
emit t span (Token.Scalar { style = `Double_quoted; value })
+
+
and can_start_plain t =
+
(* Check if - ? : can start a plain scalar *)
+
match Input.peek_nth t.input 1 with
+
| None -> false
+
| Some c ->
+
not (Input.is_whitespace c) &&
+
(t.flow_level = 0 || not (Input.is_flow_indicator c))
+
+
and can_start_plain_char c _t =
+
(* Characters that can start a plain scalar *)
+
if Input.is_whitespace c then false
+
else if Input.is_indicator c then false
+
else true
+
+
and fetch_plain_scalar t =
+
save_simple_key t;
+
t.allow_simple_key <- false;
+
t.document_has_content <- true;
+
let value, span, ended_with_linebreak = scan_plain_scalar t in
+
(* If the plain scalar ended after crossing a line break (leading_blanks = true),
+
allow simple keys. This is important because the scanner already consumed the
+
line break and leading whitespace when checking for continuation. *)
+
if ended_with_linebreak then
+
t.allow_simple_key <- true;
+
emit t span (Token.Scalar { style = `Plain; value })
+
+
(** Check if we need more tokens to resolve simple keys *)
+
let need_more_tokens t =
+
if t.stream_ended then false
+
else if Queue.is_empty t.tokens then true
+
else
+
(* Check if any simple key could affect the first queued token *)
+
List.exists (function
+
| Some sk when sk.sk_possible ->
+
sk.sk_token_number >= t.tokens_taken
+
| _ -> false
+
) t.simple_keys
+
+
(** Ensure we have enough tokens to return one safely *)
+
let ensure_tokens t =
+
if not t.stream_started then begin
+
t.stream_started <- true;
+
let span = Span.point (Input.position t.input) in
+
let encoding, _ = Encoding.detect (Input.source t.input) in
+
emit t span (Token.Stream_start encoding)
+
end;
+
while need_more_tokens t do
+
fetch_next_token t
+
done
+
+
(** Get next token *)
+
let next t =
+
ensure_tokens t;
+
if Queue.is_empty t.tokens then
+
None
+
else begin
+
t.tokens_taken <- t.tokens_taken + 1;
+
Some (Queue.pop t.tokens)
+
end
+
+
(** Peek at next token *)
+
let peek t =
+
ensure_tokens t;
+
Queue.peek_opt t.tokens
+
+
(** Iterate over all tokens *)
+
let iter f t =
+
let rec loop () =
+
match next t with
+
| None -> ()
+
| Some tok -> f tok; loop ()
+
in
+
loop ()
+
+
(** Fold over all tokens *)
+
let fold f init t =
+
let rec loop acc =
+
match next t with
+
| None -> acc
+
| Some tok -> loop (f acc tok)
+
in
+
loop init
+
+
(** Convert to list *)
+
let to_list t =
+
fold (fun acc tok -> tok :: acc) [] t |> List.rev
+77
lib/sequence.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML sequence (array) values with metadata *)
+
+
type 'a t = {
+
anchor : string option;
+
tag : string option;
+
implicit : bool;
+
style : Layout_style.t;
+
members : 'a list;
+
}
+
+
let make
+
?(anchor : string option)
+
?(tag : string option)
+
?(implicit = true)
+
?(style = `Any)
+
members =
+
{ anchor; tag; implicit; style; members }
+
+
let members t = t.members
+
let anchor t = t.anchor
+
let tag t = t.tag
+
let implicit t = t.implicit
+
let style t = t.style
+
+
let with_anchor anchor t = { t with anchor = Some anchor }
+
let with_tag tag t = { t with tag = Some tag }
+
let with_style style t = { t with style }
+
+
let map f t = { t with members = List.map f t.members }
+
+
let length t = List.length t.members
+
+
let is_empty t = t.members = []
+
+
let nth t n = List.nth t.members n
+
+
let nth_opt t n = List.nth_opt t.members n
+
+
let iter f t = List.iter f t.members
+
+
let fold f init t = List.fold_left f init t.members
+
+
let pp pp_elem fmt t =
+
Format.fprintf fmt "@[<hv 2>sequence(@,";
+
(match t.anchor with
+
| Some a -> Format.fprintf fmt "anchor=%s,@ " a
+
| None -> ());
+
(match t.tag with
+
| Some tag -> Format.fprintf fmt "tag=%s,@ " tag
+
| None -> ());
+
Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style;
+
Format.fprintf fmt "members=[@,%a@]@,)"
+
(Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") pp_elem)
+
t.members
+
+
let equal eq a b =
+
Option.equal String.equal a.anchor b.anchor &&
+
Option.equal String.equal a.tag b.tag &&
+
a.implicit = b.implicit &&
+
Layout_style.equal a.style b.style &&
+
List.equal eq a.members b.members
+
+
let compare cmp a b =
+
let c = Option.compare String.compare a.anchor b.anchor in
+
if c <> 0 then c else
+
let c = Option.compare String.compare a.tag b.tag in
+
if c <> 0 then c else
+
let c = Bool.compare a.implicit b.implicit in
+
if c <> 0 then c else
+
let c = Layout_style.compare a.style b.style in
+
if c <> 0 then c else
+
List.compare cmp a.members b.members
+369
lib/serialize.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Serialize - high-level serialization to buffers and event streams
+
+
This module provides functions to convert YAML values to events and strings.
+
Both {!Emitter.t}-based and function-based emission APIs are provided. *)
+
+
(** {1 Internal Helpers} *)
+
+
(** Emit a YAML node using an emit function.
+
This is the core implementation used by both Emitter.t and function-based APIs. *)
+
let rec emit_yaml_node_impl ~emit (yaml : Yaml.t) =
+
match yaml with
+
| `Scalar s ->
+
emit (Event.Scalar {
+
anchor = Scalar.anchor s;
+
tag = Scalar.tag s;
+
value = Scalar.value s;
+
plain_implicit = Scalar.plain_implicit s;
+
quoted_implicit = Scalar.quoted_implicit s;
+
style = Scalar.style s;
+
})
+
+
| `Alias name ->
+
emit (Event.Alias { anchor = name })
+
+
| `A seq ->
+
let members = Sequence.members seq in
+
let style =
+
(* Force flow style for empty sequences *)
+
if members = [] then `Flow
+
else Sequence.style seq
+
in
+
emit (Event.Sequence_start {
+
anchor = Sequence.anchor seq;
+
tag = Sequence.tag seq;
+
implicit = Sequence.implicit seq;
+
style;
+
});
+
List.iter (emit_yaml_node_impl ~emit) members;
+
emit Event.Sequence_end
+
+
| `O map ->
+
let members = Mapping.members map in
+
let style =
+
(* Force flow style for empty mappings *)
+
if members = [] then `Flow
+
else Mapping.style map
+
in
+
emit (Event.Mapping_start {
+
anchor = Mapping.anchor map;
+
tag = Mapping.tag map;
+
implicit = Mapping.implicit map;
+
style;
+
});
+
List.iter (fun (k, v) ->
+
emit_yaml_node_impl ~emit k;
+
emit_yaml_node_impl ~emit v
+
) members;
+
emit Event.Mapping_end
+
+
(** Emit a Value node using an emit function.
+
This is the core implementation used by both Emitter.t and function-based APIs. *)
+
let rec emit_value_node_impl ~emit ~config (value : Value.t) =
+
match value with
+
| `Null ->
+
emit (Event.Scalar {
+
anchor = None; tag = None;
+
value = "null";
+
plain_implicit = true; quoted_implicit = false;
+
style = `Plain;
+
})
+
+
| `Bool b ->
+
emit (Event.Scalar {
+
anchor = None; tag = None;
+
value = if b then "true" else "false";
+
plain_implicit = true; quoted_implicit = false;
+
style = `Plain;
+
})
+
+
| `Float f ->
+
let value =
+
match Float.classify_float f with
+
| FP_nan -> ".nan"
+
| FP_infinite -> if f > 0.0 then ".inf" else "-.inf"
+
| _ ->
+
if Float.is_integer f && Float.abs f < 1e15 then
+
Printf.sprintf "%.0f" f
+
else
+
Printf.sprintf "%g" f
+
in
+
emit (Event.Scalar {
+
anchor = None; tag = None;
+
value;
+
plain_implicit = true; quoted_implicit = false;
+
style = `Plain;
+
})
+
+
| `String s ->
+
let style = Quoting.choose_style s in
+
emit (Event.Scalar {
+
anchor = None; tag = None;
+
value = s;
+
plain_implicit = style = `Plain;
+
quoted_implicit = style <> `Plain;
+
style;
+
})
+
+
| `A items ->
+
let style =
+
(* Force flow style for empty sequences *)
+
if items = [] then `Flow
+
else if config.Emitter.layout_style = `Flow then `Flow
+
else `Block
+
in
+
emit (Event.Sequence_start {
+
anchor = None; tag = None;
+
implicit = true;
+
style;
+
});
+
List.iter (emit_value_node_impl ~emit ~config) items;
+
emit Event.Sequence_end
+
+
| `O pairs ->
+
let style =
+
(* Force flow style for empty mappings *)
+
if pairs = [] then `Flow
+
else if config.Emitter.layout_style = `Flow then `Flow
+
else `Block
+
in
+
emit (Event.Mapping_start {
+
anchor = None; tag = None;
+
implicit = true;
+
style;
+
});
+
List.iter (fun (k, v) ->
+
let style = Quoting.choose_style k in
+
emit (Event.Scalar {
+
anchor = None; tag = None;
+
value = k;
+
plain_implicit = style = `Plain;
+
quoted_implicit = style <> `Plain;
+
style;
+
});
+
emit_value_node_impl ~emit ~config v
+
) pairs;
+
emit Event.Mapping_end
+
+
(** Strip anchors from a YAML tree (used when resolving aliases for output) *)
+
let rec strip_anchors (yaml : Yaml.t) : Yaml.t =
+
match yaml with
+
| `Scalar s ->
+
if Option.is_none (Scalar.anchor s) then yaml
+
else
+
`Scalar (Scalar.make
+
?tag:(Scalar.tag s)
+
~plain_implicit:(Scalar.plain_implicit s)
+
~quoted_implicit:(Scalar.quoted_implicit s)
+
~style:(Scalar.style s)
+
(Scalar.value s))
+
| `Alias _ -> yaml
+
| `A seq ->
+
`A (Sequence.make
+
?tag:(Sequence.tag seq)
+
~implicit:(Sequence.implicit seq)
+
~style:(Sequence.style seq)
+
(List.map strip_anchors (Sequence.members seq)))
+
| `O map ->
+
`O (Mapping.make
+
?tag:(Mapping.tag map)
+
~implicit:(Mapping.implicit map)
+
~style:(Mapping.style map)
+
(List.map (fun (k, v) -> (strip_anchors k, strip_anchors v)) (Mapping.members map)))
+
+
(** Emit a document using an emit function *)
+
let emit_document_impl ?(resolve_aliases = true) ~emit doc =
+
emit (Event.Document_start {
+
version = Document.version doc;
+
implicit = Document.implicit_start doc;
+
});
+
(match Document.root doc with
+
| Some yaml ->
+
let yaml = if resolve_aliases then
+
yaml |> Yaml.resolve_aliases |> strip_anchors
+
else yaml in
+
emit_yaml_node_impl ~emit yaml
+
| None ->
+
emit (Event.Scalar {
+
anchor = None; tag = None;
+
value = "";
+
plain_implicit = true; quoted_implicit = false;
+
style = `Plain;
+
}));
+
emit (Event.Document_end { implicit = Document.implicit_end doc })
+
+
(** {1 Emitter.t-based API} *)
+
+
(** Emit a YAML node to an emitter *)
+
let emit_yaml_node t yaml =
+
emit_yaml_node_impl ~emit:(Emitter.emit t) yaml
+
+
(** Emit a complete YAML document to an emitter *)
+
let emit_yaml t yaml =
+
let config = Emitter.config t in
+
Emitter.emit t (Event.Stream_start { encoding = config.encoding });
+
Emitter.emit t (Event.Document_start { version = None; implicit = true });
+
emit_yaml_node t yaml;
+
Emitter.emit t (Event.Document_end { implicit = true });
+
Emitter.emit t Event.Stream_end
+
+
(** Emit a Value node to an emitter *)
+
let emit_value_node t value =
+
let config = Emitter.config t in
+
emit_value_node_impl ~emit:(Emitter.emit t) ~config value
+
+
(** Emit a complete Value document to an emitter *)
+
let emit_value t value =
+
let config = Emitter.config t in
+
Emitter.emit t (Event.Stream_start { encoding = config.encoding });
+
Emitter.emit t (Event.Document_start { version = None; implicit = true });
+
emit_value_node t value;
+
Emitter.emit t (Event.Document_end { implicit = true });
+
Emitter.emit t Event.Stream_end
+
+
(** Emit a document to an emitter *)
+
let emit_document ?resolve_aliases t doc =
+
emit_document_impl ?resolve_aliases ~emit:(Emitter.emit t) doc
+
+
(** {1 Buffer-based API} *)
+
+
(** Serialize a Value to a buffer.
+
+
@param config Emitter configuration (default: {!Emitter.default_config})
+
@param buffer Optional buffer to append to; creates new one if not provided
+
@return The buffer containing serialized YAML *)
+
let value_to_buffer ?(config = Emitter.default_config) ?buffer value =
+
let buf = Option.value buffer ~default:(Buffer.create 1024) in
+
let t = Emitter.create ~config () in
+
emit_value t value;
+
Buffer.add_string buf (Emitter.contents t);
+
buf
+
+
(** Serialize a Yaml.t to a buffer.
+
+
@param config Emitter configuration (default: {!Emitter.default_config})
+
@param buffer Optional buffer to append to; creates new one if not provided
+
@return The buffer containing serialized YAML *)
+
let yaml_to_buffer ?(config = Emitter.default_config) ?buffer yaml =
+
let buf = Option.value buffer ~default:(Buffer.create 1024) in
+
let t = Emitter.create ~config () in
+
emit_yaml t yaml;
+
Buffer.add_string buf (Emitter.contents t);
+
buf
+
+
(** Serialize documents to a buffer.
+
+
@param config Emitter configuration (default: {!Emitter.default_config})
+
@param resolve_aliases Whether to resolve aliases before emission (default: true)
+
@param buffer Optional buffer to append to; creates new one if not provided
+
@return The buffer containing serialized YAML *)
+
let documents_to_buffer ?(config = Emitter.default_config) ?(resolve_aliases = true) ?buffer documents =
+
let buf = Option.value buffer ~default:(Buffer.create 1024) in
+
let t = Emitter.create ~config () in
+
Emitter.emit t (Event.Stream_start { encoding = config.encoding });
+
List.iter (emit_document ~resolve_aliases t) documents;
+
Emitter.emit t Event.Stream_end;
+
Buffer.add_string buf (Emitter.contents t);
+
buf
+
+
(** {1 String-based API} *)
+
+
(** Serialize a Value to a string.
+
+
@param config Emitter configuration (default: {!Emitter.default_config}) *)
+
let value_to_string ?(config = Emitter.default_config) value =
+
Buffer.contents (value_to_buffer ~config value)
+
+
(** Serialize a Yaml.t to a string.
+
+
@param config Emitter configuration (default: {!Emitter.default_config}) *)
+
let yaml_to_string ?(config = Emitter.default_config) yaml =
+
Buffer.contents (yaml_to_buffer ~config yaml)
+
+
(** Serialize documents to a string.
+
+
@param config Emitter configuration (default: {!Emitter.default_config})
+
@param resolve_aliases Whether to resolve aliases before emission (default: true) *)
+
let documents_to_string ?(config = Emitter.default_config) ?(resolve_aliases = true) documents =
+
Buffer.contents (documents_to_buffer ~config ~resolve_aliases documents)
+
+
(** {1 Writer-based API}
+
+
These functions write directly to a bytesrw [Bytes.Writer.t],
+
enabling true streaming output without intermediate string allocation.
+
Uses the emitter's native Writer support for efficiency. *)
+
+
(** Serialize a Value directly to a Bytes.Writer.
+
+
@param config Emitter configuration (default: {!Emitter.default_config})
+
@param eod Whether to write end-of-data after serialization (default: true) *)
+
let value_to_writer ?(config = Emitter.default_config) ?(eod = true) writer value =
+
let t = Emitter.of_writer ~config writer in
+
emit_value t value;
+
if eod then Emitter.flush t
+
+
(** Serialize a Yaml.t directly to a Bytes.Writer.
+
+
@param config Emitter configuration (default: {!Emitter.default_config})
+
@param eod Whether to write end-of-data after serialization (default: true) *)
+
let yaml_to_writer ?(config = Emitter.default_config) ?(eod = true) writer yaml =
+
let t = Emitter.of_writer ~config writer in
+
emit_yaml t yaml;
+
if eod then Emitter.flush t
+
+
(** Serialize documents directly to a Bytes.Writer.
+
+
@param config Emitter configuration (default: {!Emitter.default_config})
+
@param resolve_aliases Whether to resolve aliases before emission (default: true)
+
@param eod Whether to write end-of-data after serialization (default: true) *)
+
let documents_to_writer ?(config = Emitter.default_config) ?(resolve_aliases = true) ?(eod = true) writer documents =
+
let t = Emitter.of_writer ~config writer in
+
Emitter.emit t (Event.Stream_start { encoding = config.encoding });
+
List.iter (emit_document ~resolve_aliases t) documents;
+
Emitter.emit t Event.Stream_end;
+
if eod then Emitter.flush t
+
+
(** {1 Function-based API}
+
+
These functions accept an emit function [Event.t -> unit] instead of
+
an {!Emitter.t}, allowing them to work with any event sink
+
(e.g., streaming writers, custom processors). *)
+
+
(** Emit a YAML node using an emitter function *)
+
let emit_yaml_node_fn ~emitter yaml =
+
emit_yaml_node_impl ~emit:emitter yaml
+
+
(** Emit a complete YAML stream using an emitter function *)
+
let emit_yaml ~emitter ~config yaml =
+
emitter (Event.Stream_start { encoding = config.Emitter.encoding });
+
emitter (Event.Document_start { version = None; implicit = true });
+
emit_yaml_node_fn ~emitter yaml;
+
emitter (Event.Document_end { implicit = true });
+
emitter Event.Stream_end
+
+
(** Emit a Value node using an emitter function *)
+
let emit_value_node_fn ~emitter ~config value =
+
emit_value_node_impl ~emit:emitter ~config value
+
+
(** Emit a complete Value stream using an emitter function *)
+
let emit_value ~emitter ~config value =
+
emitter (Event.Stream_start { encoding = config.Emitter.encoding });
+
emitter (Event.Document_start { version = None; implicit = true });
+
emit_value_node_fn ~emitter ~config value;
+
emitter (Event.Document_end { implicit = true });
+
emitter Event.Stream_end
+
+
(** Emit a document using an emitter function *)
+
let emit_document_fn ?resolve_aliases ~emitter doc =
+
emit_document_impl ?resolve_aliases ~emit:emitter doc
+
+
(** Emit multiple documents using an emitter function *)
+
let emit_documents ~emitter ~config ?(resolve_aliases = true) documents =
+
emitter (Event.Stream_start { encoding = config.Emitter.encoding });
+
List.iter (emit_document_fn ~resolve_aliases ~emitter) documents;
+
emitter Event.Stream_end
+40
lib/span.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Source spans representing ranges in input *)
+
+
type t = {
+
start : Position.t;
+
stop : Position.t;
+
}
+
+
let make ~start ~stop = { start; stop }
+
+
let point pos = { start = pos; stop = pos }
+
+
let merge a b =
+
let start = if Position.compare a.start b.start <= 0 then a.start else b.start in
+
let stop = if Position.compare a.stop b.stop >= 0 then a.stop else b.stop in
+
{ start; stop }
+
+
let extend span pos =
+
{ span with stop = pos }
+
+
let pp fmt t =
+
if t.start.line = t.stop.line then
+
Format.fprintf fmt "line %d, columns %d-%d"
+
t.start.line t.start.column t.stop.column
+
else
+
Format.fprintf fmt "lines %d-%d" t.start.line t.stop.line
+
+
let to_string t =
+
Format.asprintf "%a" pp t
+
+
let compare a b =
+
let c = Position.compare a.start b.start in
+
if c <> 0 then c else Position.compare a.stop b.stop
+
+
let equal a b =
+
Position.equal a.start b.start && Position.equal a.stop b.stop
+73
lib/tag.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML tags for type information *)
+
+
type t = {
+
handle : string; (** e.g., "!" or "!!" or "!foo!" *)
+
suffix : string; (** e.g., "str", "int", "custom/type" *)
+
}
+
+
let make ~handle ~suffix = { handle; suffix }
+
+
let of_string s =
+
let len = String.length s in
+
match len with
+
| 0 -> None
+
| _ when s.[0] <> '!' -> None
+
| 1 -> Some { handle = "!"; suffix = "" }
+
| _ ->
+
match s.[1] with
+
| '!' -> (* !! handle *)
+
Some { handle = "!!"; suffix = String.sub s 2 (len - 2) }
+
| '<' -> (* Verbatim tag !<...> *)
+
if len > 2 && s.[len - 1] = '>' then
+
Some { handle = "!"; suffix = String.sub s 2 (len - 3) }
+
else
+
None
+
| _ -> (* Primary handle or local tag *)
+
Some { handle = "!"; suffix = String.sub s 1 (len - 1) }
+
+
let to_string t =
+
if t.handle = "!" && t.suffix = "" then "!"
+
else t.handle ^ t.suffix
+
+
let to_uri t =
+
match t.handle with
+
| "!!" -> "tag:yaml.org,2002:" ^ t.suffix
+
| "!" -> "!" ^ t.suffix
+
| h -> h ^ t.suffix
+
+
let pp fmt t =
+
Format.pp_print_string fmt (to_string t)
+
+
let equal a b =
+
String.equal a.handle b.handle && String.equal a.suffix b.suffix
+
+
let compare a b =
+
let c = String.compare a.handle b.handle in
+
if c <> 0 then c else String.compare a.suffix b.suffix
+
+
(** Standard tags *)
+
+
let null = { handle = "!!"; suffix = "null" }
+
let bool = { handle = "!!"; suffix = "bool" }
+
let int = { handle = "!!"; suffix = "int" }
+
let float = { handle = "!!"; suffix = "float" }
+
let str = { handle = "!!"; suffix = "str" }
+
let seq = { handle = "!!"; suffix = "seq" }
+
let map = { handle = "!!"; suffix = "map" }
+
let binary = { handle = "!!"; suffix = "binary" }
+
let timestamp = { handle = "!!"; suffix = "timestamp" }
+
+
(** Check if tag matches a standard type *)
+
+
let is_null t = equal t null || (t.handle = "!" && t.suffix = "")
+
let is_bool t = equal t bool
+
let is_int t = equal t int
+
let is_float t = equal t float
+
let is_str t = equal t str
+
let is_seq t = equal t seq
+
let is_map t = equal t map
+83
lib/token.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML token types produced by the scanner *)
+
+
type t =
+
| Stream_start of Encoding.t
+
| Stream_end
+
| Version_directive of { major : int; minor : int }
+
| Tag_directive of { handle : string; prefix : string }
+
| Document_start (** --- *)
+
| Document_end (** ... *)
+
| Block_sequence_start
+
| Block_mapping_start
+
| Block_entry (** [-] *)
+
| Block_end (** implicit, from dedent *)
+
| Flow_sequence_start (** \[ *)
+
| Flow_sequence_end (** \] *)
+
| Flow_mapping_start (** \{ *)
+
| Flow_mapping_end (** \} *)
+
| Flow_entry (** [,] *)
+
| Key (** ? or implicit key *)
+
| Value (** : *)
+
| Anchor of string (** &name *)
+
| Alias of string (** *name *)
+
| Tag of { handle : string; suffix : string }
+
| Scalar of { style : Scalar_style.t; value : string }
+
+
type spanned = {
+
token : t;
+
span : Span.t;
+
}
+
+
let pp_token fmt = function
+
| Stream_start enc ->
+
Format.fprintf fmt "STREAM-START(%a)" Encoding.pp enc
+
| Stream_end ->
+
Format.fprintf fmt "STREAM-END"
+
| Version_directive { major; minor } ->
+
Format.fprintf fmt "VERSION-DIRECTIVE(%d.%d)" major minor
+
| Tag_directive { handle; prefix } ->
+
Format.fprintf fmt "TAG-DIRECTIVE(%s, %s)" handle prefix
+
| Document_start ->
+
Format.fprintf fmt "DOCUMENT-START"
+
| Document_end ->
+
Format.fprintf fmt "DOCUMENT-END"
+
| Block_sequence_start ->
+
Format.fprintf fmt "BLOCK-SEQUENCE-START"
+
| Block_mapping_start ->
+
Format.fprintf fmt "BLOCK-MAPPING-START"
+
| Block_entry ->
+
Format.fprintf fmt "BLOCK-ENTRY"
+
| Block_end ->
+
Format.fprintf fmt "BLOCK-END"
+
| Flow_sequence_start ->
+
Format.fprintf fmt "FLOW-SEQUENCE-START"
+
| Flow_sequence_end ->
+
Format.fprintf fmt "FLOW-SEQUENCE-END"
+
| Flow_mapping_start ->
+
Format.fprintf fmt "FLOW-MAPPING-START"
+
| Flow_mapping_end ->
+
Format.fprintf fmt "FLOW-MAPPING-END"
+
| Flow_entry ->
+
Format.fprintf fmt "FLOW-ENTRY"
+
| Key ->
+
Format.fprintf fmt "KEY"
+
| Value ->
+
Format.fprintf fmt "VALUE"
+
| Anchor name ->
+
Format.fprintf fmt "ANCHOR(%s)" name
+
| Alias name ->
+
Format.fprintf fmt "ALIAS(%s)" name
+
| Tag { handle; suffix } ->
+
Format.fprintf fmt "TAG(%s, %s)" handle suffix
+
| Scalar { style; value } ->
+
Format.fprintf fmt "SCALAR(%a, %S)" Scalar_style.pp style value
+
+
let pp fmt t = pp_token fmt t
+
+
let pp_spanned fmt { token; span } =
+
Format.fprintf fmt "%a at %a" pp_token token Span.pp span
+4
lib/unix/dune
···
+
(library
+
(name yamlrw_unix)
+
(public_name yamlrw-unix)
+
(libraries yamlrw bytesrw))
+124
lib/unix/yamlrw_unix.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Yamlrw Unix - Channel and file I/O for YAML
+
+
This module provides channel and file operations for parsing
+
and emitting YAML using bytesrw for efficient streaming I/O. *)
+
+
open Bytesrw
+
open Yamlrw
+
+
(** {1 Types} *)
+
+
type value = Value.t
+
type yaml = Yaml.t
+
type document = Document.t
+
+
(** {1 Channel Input} *)
+
+
let value_of_channel
+
?(resolve_aliases = true)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
ic =
+
let reader = Bytes.Reader.of_in_channel ic in
+
Loader.value_of_reader ~resolve_aliases ~max_nodes ~max_depth reader
+
+
let yaml_of_channel
+
?(resolve_aliases = false)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
ic =
+
let reader = Bytes.Reader.of_in_channel ic in
+
Loader.yaml_of_reader ~resolve_aliases ~max_nodes ~max_depth reader
+
+
let documents_of_channel ic =
+
let reader = Bytes.Reader.of_in_channel ic in
+
Loader.documents_of_reader reader
+
+
(** {1 Channel Output} *)
+
+
let value_to_channel
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
oc
+
(v : value) =
+
let config = { Emitter.default_config with encoding; scalar_style; layout_style } in
+
let writer = Bytes.Writer.of_out_channel oc in
+
Serialize.value_to_writer ~config writer v
+
+
let yaml_to_channel
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
oc
+
(v : yaml) =
+
let config = { Emitter.default_config with encoding; scalar_style; layout_style } in
+
let writer = Bytes.Writer.of_out_channel oc in
+
Serialize.yaml_to_writer ~config writer v
+
+
let documents_to_channel
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
?(resolve_aliases = true)
+
oc
+
docs =
+
let config = { Emitter.default_config with encoding; scalar_style; layout_style } in
+
let writer = Bytes.Writer.of_out_channel oc in
+
Serialize.documents_to_writer ~config ~resolve_aliases writer docs
+
+
(** {1 File Input} *)
+
+
let value_of_file
+
?(resolve_aliases = true)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
path =
+
In_channel.with_open_bin path (fun ic ->
+
value_of_channel ~resolve_aliases ~max_nodes ~max_depth ic)
+
+
let yaml_of_file
+
?(resolve_aliases = false)
+
?(max_nodes = Yaml.default_max_alias_nodes)
+
?(max_depth = Yaml.default_max_alias_depth)
+
path =
+
In_channel.with_open_bin path (fun ic ->
+
yaml_of_channel ~resolve_aliases ~max_nodes ~max_depth ic)
+
+
let documents_of_file path =
+
In_channel.with_open_bin path documents_of_channel
+
+
(** {1 File Output} *)
+
+
let value_to_file
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
path
+
v =
+
Out_channel.with_open_bin path (fun oc ->
+
value_to_channel ~encoding ~scalar_style ~layout_style oc v)
+
+
let yaml_to_file
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
path
+
v =
+
Out_channel.with_open_bin path (fun oc ->
+
yaml_to_channel ~encoding ~scalar_style ~layout_style oc v)
+
+
let documents_to_file
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
?(resolve_aliases = true)
+
path
+
docs =
+
Out_channel.with_open_bin path (fun oc ->
+
documents_to_channel ~encoding ~scalar_style ~layout_style ~resolve_aliases oc docs)
+125
lib/unix/yamlrw_unix.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Yamlrw Unix - Channel and file I/O for YAML
+
+
This module provides channel and file operations for parsing
+
and emitting YAML using bytesrw for efficient streaming I/O. *)
+
+
(** {1 Types} *)
+
+
type value = Yamlrw.Value.t
+
type yaml = Yamlrw.Yaml.t
+
type document = Yamlrw.Document.t
+
+
(** {1 Channel Input} *)
+
+
val value_of_channel :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
in_channel ->
+
value
+
(** Parse a JSON-compatible value from an input channel.
+
+
@param resolve_aliases Whether to expand aliases (default: true)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100) *)
+
+
val yaml_of_channel :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
in_channel ->
+
yaml
+
(** Parse a full YAML value from an input channel.
+
+
@param resolve_aliases Whether to expand aliases (default: false)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100) *)
+
+
val documents_of_channel : in_channel -> document list
+
(** Parse multiple YAML documents from an input channel. *)
+
+
(** {1 Channel Output} *)
+
+
val value_to_channel :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
out_channel ->
+
value ->
+
unit
+
(** Write a JSON-compatible value to an output channel. *)
+
+
val yaml_to_channel :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
out_channel ->
+
yaml ->
+
unit
+
(** Write a full YAML value to an output channel. *)
+
+
val documents_to_channel :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
?resolve_aliases:bool ->
+
out_channel ->
+
document list ->
+
unit
+
(** Write multiple YAML documents to an output channel. *)
+
+
(** {1 File Input} *)
+
+
val value_of_file :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
string ->
+
value
+
(** Parse a JSON-compatible value from a file. *)
+
+
val yaml_of_file :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
string ->
+
yaml
+
(** Parse a full YAML value from a file. *)
+
+
val documents_of_file : string -> document list
+
(** Parse multiple YAML documents from a file. *)
+
+
(** {1 File Output} *)
+
+
val value_to_file :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
string ->
+
value ->
+
unit
+
(** Write a JSON-compatible value to a file. *)
+
+
val yaml_to_file :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
string ->
+
yaml ->
+
unit
+
(** Write a full YAML value to a file. *)
+
+
val documents_to_file :
+
?encoding:Yamlrw.Encoding.t ->
+
?scalar_style:Yamlrw.Scalar_style.t ->
+
?layout_style:Yamlrw.Layout_style.t ->
+
?resolve_aliases:bool ->
+
string ->
+
document list ->
+
unit
+
(** Write multiple YAML documents to a file. *)
+167
lib/value.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** JSON-compatible YAML value representation *)
+
+
type t = [
+
| `Null
+
| `Bool of bool
+
| `Float of float
+
| `String of string
+
| `A of t list
+
| `O of (string * t) list
+
]
+
+
(* Type equality is ensured by structural compatibility with Yamlrw.value *)
+
+
(** Constructors *)
+
+
let null : t = `Null
+
let bool b : t = `Bool b
+
let int n : t = `Float (Float.of_int n)
+
let float f : t = `Float f
+
let string s : t = `String s
+
+
let list f xs : t = `A (List.map f xs)
+
let obj pairs : t = `O pairs
+
+
(** Type name for error messages *)
+
let type_name : t -> string = function
+
| `Null -> "null"
+
| `Bool _ -> "bool"
+
| `Float _ -> "float"
+
| `String _ -> "string"
+
| `A _ -> "array"
+
| `O _ -> "object"
+
+
(** Safe accessors (return option) *)
+
+
let as_null = function `Null -> Some () | _ -> None
+
let as_bool = function `Bool b -> Some b | _ -> None
+
let as_float = function `Float f -> Some f | _ -> None
+
let as_string = function `String s -> Some s | _ -> None
+
let as_list = function `A l -> Some l | _ -> None
+
let as_assoc = function `O o -> Some o | _ -> None
+
+
let as_int = function
+
| `Float f ->
+
let i = Float.to_int f in
+
if Float.equal (Float.of_int i) f then Some i else None
+
| _ -> None
+
+
(** Unsafe accessors (raise on type mismatch) *)
+
+
let unwrap_or_type_error expected_type extractor v =
+
match extractor v with
+
| Some x -> x
+
| None -> Error.raise (Type_mismatch (expected_type, type_name v))
+
+
let to_null v = unwrap_or_type_error "null" as_null v
+
let to_bool v = unwrap_or_type_error "bool" as_bool v
+
let to_float v = unwrap_or_type_error "float" as_float v
+
let to_string v = unwrap_or_type_error "string" as_string v
+
let to_list v = unwrap_or_type_error "array" as_list v
+
let to_assoc v = unwrap_or_type_error "object" as_assoc v
+
let to_int v = unwrap_or_type_error "int" as_int v
+
+
(** Object access *)
+
+
let mem key = function
+
| `O pairs -> List.exists (fun (k, _) -> k = key) pairs
+
| _ -> false
+
+
let find key = function
+
| `O pairs -> List.assoc_opt key pairs
+
| _ -> None
+
+
let get key v =
+
match find key v with
+
| Some v -> v
+
| None -> Error.raise (Key_not_found key)
+
+
let keys = function
+
| `O pairs -> List.map fst pairs
+
| v -> Error.raise (Type_mismatch ("object", type_name v))
+
+
let values = function
+
| `O pairs -> List.map snd pairs
+
| v -> Error.raise (Type_mismatch ("object", type_name v))
+
+
(** Combinators *)
+
+
let combine v1 v2 =
+
match v1, v2 with
+
| `O o1, `O o2 -> `O (o1 @ o2)
+
| v1, _ -> Error.raise (Type_mismatch ("object", type_name v1))
+
+
let map f = function
+
| `A l -> `A (List.map f l)
+
| v -> Error.raise (Type_mismatch ("array", type_name v))
+
+
let filter pred = function
+
| `A l -> `A (List.filter pred l)
+
| v -> Error.raise (Type_mismatch ("array", type_name v))
+
+
(** Pretty printing *)
+
+
let rec pp fmt (v : t) =
+
match v with
+
| `Null -> Format.pp_print_string fmt "null"
+
| `Bool b -> Format.pp_print_bool fmt b
+
| `Float f ->
+
if Float.is_integer f && Float.abs f < 1e15 then
+
Format.fprintf fmt "%.0f" f
+
else
+
Format.fprintf fmt "%g" f
+
| `String s -> Format.fprintf fmt "%S" s
+
| `A [] -> Format.pp_print_string fmt "[]"
+
| `A items ->
+
Format.fprintf fmt "@[<hv 2>[@,%a@]@,]"
+
(Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") pp)
+
items
+
| `O [] -> Format.pp_print_string fmt "{}"
+
| `O pairs ->
+
Format.fprintf fmt "@[<hv 2>{@,%a@]@,}"
+
(Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ")
+
(fun fmt (k, v) -> Format.fprintf fmt "@[<hv 2>%S:@ %a@]" k pp v))
+
pairs
+
+
(** Equality and comparison *)
+
+
let rec equal (a : t) (b : t) =
+
match a, b with
+
| `Null, `Null -> true
+
| `Bool a, `Bool b -> a = b
+
| `Float a, `Float b -> Float.equal a b
+
| `String a, `String b -> String.equal a b
+
| `A a, `A b -> List.equal equal a b
+
| `O a, `O b ->
+
List.length a = List.length b &&
+
List.for_all2 (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2) a b
+
| _ -> false
+
+
let rec compare (a : t) (b : t) =
+
match a, b with
+
| `Null, `Null -> 0
+
| `Null, _ -> -1
+
| _, `Null -> 1
+
| `Bool a, `Bool b -> Bool.compare a b
+
| `Bool _, _ -> -1
+
| _, `Bool _ -> 1
+
| `Float a, `Float b -> Float.compare a b
+
| `Float _, _ -> -1
+
| _, `Float _ -> 1
+
| `String a, `String b -> String.compare a b
+
| `String _, _ -> -1
+
| _, `String _ -> 1
+
| `A a, `A b -> List.compare compare a b
+
| `A _, _ -> -1
+
| _, `A _ -> 1
+
| `O a, `O b ->
+
let cmp_pair (k1, v1) (k2, v2) =
+
let c = String.compare k1 k2 in
+
if c <> 0 then c else compare v1 v2
+
in
+
List.compare cmp_pair a b
+311
lib/yaml.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Full YAML representation with anchors, tags, and aliases *)
+
+
type t = [
+
| `Scalar of Scalar.t
+
| `Alias of string
+
| `A of t Sequence.t
+
| `O of (t, t) Mapping.t
+
]
+
+
(** Pretty printing *)
+
+
let rec pp fmt (v : t) =
+
match v with
+
| `Scalar s -> Scalar.pp fmt s
+
| `Alias name -> Format.fprintf fmt "*%s" name
+
| `A seq -> Sequence.pp pp fmt seq
+
| `O map -> Mapping.pp pp pp fmt map
+
+
(** Equality *)
+
+
let rec equal (a : t) (b : t) =
+
match a, b with
+
| `Scalar a, `Scalar b -> Scalar.equal a b
+
| `Alias a, `Alias b -> String.equal a b
+
| `A a, `A b -> Sequence.equal equal a b
+
| `O a, `O b -> Mapping.equal equal equal a b
+
| _ -> false
+
+
(** Construct from JSON-compatible Value *)
+
+
let rec of_value (v : Value.t) : t =
+
match v with
+
| `Null -> `Scalar (Scalar.make "null")
+
| `Bool true -> `Scalar (Scalar.make "true")
+
| `Bool false -> `Scalar (Scalar.make "false")
+
| `Float f ->
+
let s =
+
if Float.is_integer f && Float.abs f < 1e15 then
+
Printf.sprintf "%.0f" f
+
else
+
Printf.sprintf "%g" f
+
in
+
`Scalar (Scalar.make s)
+
| `String s ->
+
`Scalar (Scalar.make s ~style:`Double_quoted)
+
| `A items ->
+
`A (Sequence.make (List.map of_value items))
+
| `O pairs ->
+
`O (Mapping.make (List.map (fun (k, v) ->
+
(`Scalar (Scalar.make k), of_value v)
+
) pairs))
+
+
(** Default limits for alias expansion (protection against billion laughs attack) *)
+
let default_max_alias_nodes = 10_000_000
+
let default_max_alias_depth = 100
+
+
(** Resolve aliases by replacing them with referenced nodes.
+
+
Processes the tree in document order so that aliases resolve to the
+
anchor value that was defined at the point the alias was encountered.
+
+
See {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2
+
(Anchors and Aliases)} of the YAML 1.2.2 specification for details on
+
how anchors and aliases work in YAML.
+
+
This implements protection against the "billion laughs attack"
+
(see {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)})
+
by limiting both the total number of nodes and the nesting depth during expansion.
+
+
@param max_nodes Maximum number of nodes to create during expansion (default 10M)
+
@param max_depth Maximum depth of alias-within-alias resolution (default 100)
+
@raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_node_limit] if max_nodes is exceeded
+
@raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_depth_limit] if max_depth is exceeded
+
*)
+
let resolve_aliases ?(max_nodes = default_max_alias_nodes) ?(max_depth = default_max_alias_depth) (root : t) : t =
+
let anchors = Hashtbl.create 16 in
+
let node_count = ref 0 in
+
+
(* Check node limit *)
+
let check_node_limit () =
+
incr node_count;
+
if !node_count > max_nodes then
+
Error.raise (Alias_expansion_node_limit max_nodes)
+
in
+
+
(* Register anchor if present on a node *)
+
let register_anchor name resolved_node =
+
Hashtbl.replace anchors name resolved_node
+
in
+
+
(* Resolve an alias by looking up and expanding the target *)
+
let rec expand_alias ~depth name =
+
if depth >= max_depth then
+
Error.raise (Alias_expansion_depth_limit max_depth);
+
match Hashtbl.find_opt anchors name with
+
| Some target ->
+
(* The target is already resolved, but may contain aliases that
+
need expansion if it was registered before those anchors existed *)
+
resolve ~depth:(depth + 1) target
+
| None -> Error.raise (Undefined_alias name)
+
+
(* Single pass: process in document order, registering anchors and resolving aliases *)
+
and resolve ~depth (v : t) : t =
+
check_node_limit ();
+
match v with
+
| `Scalar s ->
+
(* Register anchor after we have the resolved node *)
+
(match Scalar.anchor s with
+
| Some name -> register_anchor name v
+
| None -> ());
+
v
+
| `Alias name ->
+
expand_alias ~depth name
+
| `A seq ->
+
(* First resolve all members in order *)
+
let resolved_members = List.map (resolve ~depth) (Sequence.members seq) in
+
let resolved = `A (Sequence.make
+
?anchor:(Sequence.anchor seq)
+
?tag:(Sequence.tag seq)
+
~implicit:(Sequence.implicit seq)
+
~style:(Sequence.style seq)
+
resolved_members) in
+
(* Register anchor with resolved node *)
+
(match Sequence.anchor seq with
+
| Some name -> register_anchor name resolved
+
| None -> ());
+
resolved
+
| `O map ->
+
(* Process key-value pairs in document order *)
+
let resolved_pairs = List.map (fun (k, v) ->
+
let resolved_k = resolve ~depth k in
+
let resolved_v = resolve ~depth v in
+
(resolved_k, resolved_v)
+
) (Mapping.members map) in
+
let resolved = `O (Mapping.make
+
?anchor:(Mapping.anchor map)
+
?tag:(Mapping.tag map)
+
~implicit:(Mapping.implicit map)
+
~style:(Mapping.style map)
+
resolved_pairs) in
+
(* Register anchor with resolved node *)
+
(match Mapping.anchor map with
+
| Some name -> register_anchor name resolved
+
| None -> ());
+
resolved
+
in
+
resolve ~depth:0 root
+
+
(** Convert scalar to JSON value based on content *)
+
let rec scalar_to_value s =
+
let value = Scalar.value s in
+
let tag = Scalar.tag s in
+
let style = Scalar.style s in
+
+
(* If explicitly tagged, respect the tag *)
+
match tag with
+
| Some "tag:yaml.org,2002:null" | Some "!!null" ->
+
`Null
+
| Some "tag:yaml.org,2002:bool" | Some "!!bool" ->
+
(match String.lowercase_ascii value with
+
| "true" | "yes" | "on" -> `Bool true
+
| "false" | "no" | "off" -> `Bool false
+
| _ -> Error.raise (Invalid_scalar_conversion (value, "bool")))
+
| Some "tag:yaml.org,2002:int" | Some "!!int" ->
+
(try `Float (Float.of_string value)
+
with _ -> Error.raise (Invalid_scalar_conversion (value, "int")))
+
| Some "tag:yaml.org,2002:float" | Some "!!float" ->
+
(try `Float (Float.of_string value)
+
with _ -> Error.raise (Invalid_scalar_conversion (value, "float")))
+
| Some "tag:yaml.org,2002:str" | Some "!!str" ->
+
`String value
+
| Some _ ->
+
(* Unknown tag - treat as string *)
+
`String value
+
| None ->
+
(* Implicit type resolution for plain scalars *)
+
if style <> `Plain then
+
`String value
+
else
+
infer_scalar_type value
+
+
(** Infer type from plain scalar value *)
+
and infer_scalar_type value =
+
match String.lowercase_ascii value with
+
(* Null *)
+
| "" | "null" | "~" -> `Null
+
(* Boolean true *)
+
| "true" | "yes" | "on" -> `Bool true
+
(* Boolean false *)
+
| "false" | "no" | "off" -> `Bool false
+
(* Special floats *)
+
| ".inf" | "+.inf" -> `Float Float.infinity
+
| "-.inf" -> `Float Float.neg_infinity
+
| ".nan" -> `Float Float.nan
+
(* Try numeric *)
+
| _ -> try_parse_number value
+
+
(** Try to parse as number *)
+
and try_parse_number value =
+
(* Check if value looks like a valid YAML number (not inf/nan without dot)
+
This guards against OCaml's Float.of_string accepting "inf", "nan", etc.
+
See: https://github.com/avsm/ocaml-yaml/issues/82 *)
+
let looks_like_number () =
+
let len = String.length value in
+
if len = 0 then false
+
else
+
let first = value.[0] in
+
if first >= '0' && first <= '9' then true
+
else if (first = '-' || first = '+') && len >= 2 then
+
let second = value.[1] in
+
(* After sign, must be digit or dot-digit (for +.5, -.5) *)
+
second >= '0' && second <= '9' ||
+
(second = '.' && len >= 3 && value.[2] >= '0' && value.[2] <= '9')
+
else false
+
in
+
(* Try integer/float *)
+
let try_numeric () =
+
if looks_like_number () then
+
try
+
(* Handle octal: 0o prefix or leading 0 *)
+
if String.length value > 2 && value.[0] = '0' then
+
match value.[1] with
+
| 'x' | 'X' ->
+
(* Hex *)
+
Some (`Float (Float.of_int (int_of_string value)))
+
| 'o' | 'O' ->
+
(* Octal *)
+
Some (`Float (Float.of_int (int_of_string value)))
+
| 'b' | 'B' ->
+
(* Binary *)
+
Some (`Float (Float.of_int (int_of_string value)))
+
| _ ->
+
(* Decimal with leading zero or octal in YAML 1.1 *)
+
Some (`Float (Float.of_string value))
+
else
+
Some (`Float (Float.of_string value))
+
with _ -> None
+
else None
+
in
+
match try_numeric () with
+
| Some v -> v
+
| None ->
+
(* Try float starting with dot (e.g., .5 for 0.5)
+
Note: We must NOT use Float.of_string as a general fallback because
+
OCaml accepts "nan", "inf", "infinity" which are NOT valid YAML floats.
+
YAML requires the leading dot: .nan, .inf, -.inf
+
See: https://github.com/avsm/ocaml-yaml/issues/82 *)
+
if String.length value >= 2 && value.[0] = '.' &&
+
value.[1] >= '0' && value.[1] <= '9' then
+
try `Float (Float.of_string value)
+
with _ -> `String value
+
else
+
`String value
+
+
(** Convert to JSON-compatible Value.
+
+
Converts a full YAML representation to a simplified JSON-compatible value type.
+
This process implements the representation graph to serialization tree conversion
+
described in {{:https://yaml.org/spec/1.2.2/#32-processes}Section 3.2 (Processes)}
+
of the YAML 1.2.2 specification.
+
+
See also {{:https://yaml.org/spec/1.2.2/#10212-json-schema}Section 10.2.1.2
+
(JSON Schema)} for the tag resolution used during conversion.
+
+
@param resolve_aliases_first Whether to resolve aliases before conversion (default true)
+
@param max_nodes Maximum nodes during alias expansion (default 10M)
+
@param max_depth Maximum alias nesting depth (default 100)
+
@raise Error.Yamlrw_error with {!type:Error.kind} [Unresolved_alias] if resolve_aliases_first is false and an alias is encountered
+
*)
+
let to_value
+
?(resolve_aliases_first = true)
+
?(max_nodes = default_max_alias_nodes)
+
?(max_depth = default_max_alias_depth)
+
(v : t) : Value.t =
+
let v = if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v in
+
let rec convert (v : t) : Value.t =
+
match v with
+
| `Scalar s -> scalar_to_value s
+
| `Alias name -> Error.raise (Unresolved_alias name)
+
| `A seq -> `A (List.map convert (Sequence.members seq))
+
| `O map ->
+
`O (List.map (fun (k, v) ->
+
let key = match k with
+
| `Scalar s -> Scalar.value s
+
| _ -> Error.raise (Type_mismatch ("string key", "complex key"))
+
in
+
(key, convert v)
+
) (Mapping.members map))
+
in
+
convert v
+
+
(** Get anchor from any node *)
+
let anchor (v : t) =
+
match v with
+
| `Scalar s -> Scalar.anchor s
+
| `Alias _ -> None
+
| `A seq -> Sequence.anchor seq
+
| `O map -> Mapping.anchor map
+
+
(** Get tag from any node *)
+
let tag (v : t) =
+
match v with
+
| `Scalar s -> Scalar.tag s
+
| `Alias _ -> None
+
| `A seq -> Sequence.tag seq
+
| `O map -> Mapping.tag map
+700
lib/yamlrw.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** {1 Yamlrw - A Pure OCaml YAML Parser and Emitter} *)
+
+
(** {2 Error Handling} *)
+
+
module Error = Error
+
+
exception Yamlrw_error = Error.Yamlrw_error
+
+
+
(** {2 Core Types} *)
+
+
(** JSON-compatible YAML representation. Use this for simple data interchange.
+
+
This type is structurally equivalent to {!Value.t} and compatible with the
+
ezjsonm representation. For additional operations, see {!Value} and {!Util}. *)
+
type value = [
+
| `Null (** YAML null, ~, or empty values *)
+
| `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *)
+
| `Float of float (** All YAML numbers (integers stored as floats) *)
+
| `String of string (** YAML strings *)
+
| `A of value list (** YAML sequences/arrays *)
+
| `O of (string * value) list (** YAML mappings/objects with string keys *)
+
]
+
+
(** Full YAML representation preserving anchors, tags, and aliases.
+
+
This type is structurally equivalent to {!Yaml.t}. Use this when you need
+
access to YAML-specific features like anchors and aliases for node reuse,
+
type tags for custom types, scalar styles (plain, quoted, literal, folded),
+
and collection styles (block vs flow).
+
+
For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and {!Mapping}. *)
+
type yaml = [
+
| `Scalar of Scalar.t (** YAML scalar value with style and metadata *)
+
| `Alias of string (** Alias reference to an anchored node *)
+
| `A of yaml Sequence.t (** YAML sequence with style and metadata *)
+
| `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *)
+
]
+
+
(** A YAML document with directives and metadata.
+
+
This type is structurally equivalent to {!Document.t}. A YAML stream can
+
contain multiple documents, each separated by document markers.
+
+
For additional operations, see {!Document}. *)
+
type document = {
+
version : (int * int) option; (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *)
+
tags : (string * string) list; (** TAG directives mapping handles to prefixes *)
+
root : yaml option; (** Root content of the document *)
+
implicit_start : bool; (** Whether the document start marker (---) is implicit *)
+
implicit_end : bool; (** Whether the document end marker (...) is implicit *)
+
}
+
+
+
(** {2 Character Encoding} *)
+
+
module Encoding = Encoding
+
+
+
(** {2 Parsing} *)
+
+
type version = [ `V1_1 | `V1_2 ]
+
+
(** Default maximum nodes during alias expansion (10 million). *)
+
let default_max_alias_nodes = Yaml.default_max_alias_nodes
+
+
(** Default maximum alias nesting depth (100). *)
+
let default_max_alias_depth = Yaml.default_max_alias_depth
+
+
let of_string
+
?(resolve_aliases = true)
+
?(max_nodes = default_max_alias_nodes)
+
?(max_depth = default_max_alias_depth)
+
s : value =
+
(Loader.value_of_string ~resolve_aliases ~max_nodes ~max_depth s :> value)
+
(** Parse a YAML string into a JSON-compatible value.
+
+
@param resolve_aliases Whether to expand aliases (default: true)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100)
+
@raise Yamlrw_error on parse error or if multiple documents found *)
+
+
let yaml_of_string
+
?(resolve_aliases = false)
+
?(max_nodes = default_max_alias_nodes)
+
?(max_depth = default_max_alias_depth)
+
s : yaml =
+
(Loader.yaml_of_string ~resolve_aliases ~max_nodes ~max_depth s :> yaml)
+
(** Parse a YAML string preserving full YAML metadata (anchors, tags, etc).
+
+
By default, aliases are NOT resolved, preserving the document structure.
+
+
@param resolve_aliases Whether to expand aliases (default: false)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100)
+
@raise Yamlrw_error on parse error or if multiple documents found *)
+
+
let documents_of_string s : document list =
+
let docs = Loader.documents_of_string s in
+
List.map (fun (d : Document.t) : document -> {
+
version = d.version;
+
tags = d.tags;
+
root = (d.root :> yaml option);
+
implicit_start = d.implicit_start;
+
implicit_end = d.implicit_end;
+
}) docs
+
(** Parse a multi-document YAML stream.
+
+
Use this when your YAML input contains multiple documents separated
+
by document markers (---).
+
+
@raise Yamlrw_error on parse error *)
+
+
+
(** {2 Formatting Styles} *)
+
+
module Scalar_style = Scalar_style
+
+
module Layout_style = Layout_style
+
+
+
(** {2 Serialization} *)
+
+
let make_config ~encoding ~scalar_style ~layout_style =
+
{ Emitter.default_config with encoding; scalar_style; layout_style }
+
+
let to_buffer
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
?buffer
+
(value : value) =
+
let config = make_config ~encoding ~scalar_style ~layout_style in
+
Serialize.value_to_buffer ~config ?buffer (value :> Value.t)
+
(** Serialize a value to a buffer.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any)
+
@param buffer Optional buffer to append to (allocates new one if not provided)
+
@return The buffer containing the serialized YAML *)
+
+
let to_string
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
(value : value) =
+
Buffer.contents (to_buffer ~encoding ~scalar_style ~layout_style value)
+
(** Serialize a value to a YAML string.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any) *)
+
+
let yaml_to_buffer
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
?buffer
+
(yaml : yaml) =
+
let config = make_config ~encoding ~scalar_style ~layout_style in
+
Serialize.yaml_to_buffer ~config ?buffer (yaml :> Yaml.t)
+
(** Serialize a full YAML value to a buffer.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any)
+
@param buffer Optional buffer to append to (allocates new one if not provided)
+
@return The buffer containing the serialized YAML *)
+
+
let yaml_to_string
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
(yaml : yaml) =
+
Buffer.contents (yaml_to_buffer ~encoding ~scalar_style ~layout_style yaml)
+
(** Serialize a full YAML value to a string.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any) *)
+
+
let documents_to_buffer
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
?(resolve_aliases = true)
+
?buffer
+
(documents : document list) =
+
let config = make_config ~encoding ~scalar_style ~layout_style in
+
let docs' = List.map (fun (d : document) : Document.t -> {
+
Document.version = d.version;
+
Document.tags = d.tags;
+
Document.root = (d.root :> Yaml.t option);
+
Document.implicit_start = d.implicit_start;
+
Document.implicit_end = d.implicit_end;
+
}) documents in
+
Serialize.documents_to_buffer ~config ~resolve_aliases ?buffer docs'
+
(** Serialize multiple documents to a buffer.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any)
+
@param resolve_aliases Whether to expand aliases (default: true)
+
@param buffer Optional buffer to append to (allocates new one if not provided)
+
@return The buffer containing the serialized YAML *)
+
+
let documents_to_string
+
?(encoding = `Utf8)
+
?(scalar_style = `Any)
+
?(layout_style = `Any)
+
?(resolve_aliases = true)
+
(documents : document list) =
+
Buffer.contents (documents_to_buffer ~encoding ~scalar_style ~layout_style ~resolve_aliases documents)
+
(** Serialize multiple documents to a YAML stream.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any)
+
@param resolve_aliases Whether to expand aliases (default: true) *)
+
+
(** {2 Buffer Parsing} *)
+
+
let of_buffer
+
?(resolve_aliases = true)
+
?(max_nodes = default_max_alias_nodes)
+
?(max_depth = default_max_alias_depth)
+
buffer : value =
+
of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer)
+
(** Parse YAML from a buffer into a JSON-compatible value.
+
+
@param resolve_aliases Whether to expand aliases (default: true)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100)
+
@raise Yamlrw_error on parse error or if multiple documents found *)
+
+
let yaml_of_buffer
+
?(resolve_aliases = false)
+
?(max_nodes = default_max_alias_nodes)
+
?(max_depth = default_max_alias_depth)
+
buffer : yaml =
+
yaml_of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer)
+
(** Parse YAML from a buffer preserving full YAML metadata.
+
+
@param resolve_aliases Whether to expand aliases (default: false)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100)
+
@raise Yamlrw_error on parse error or if multiple documents found *)
+
+
let documents_of_buffer buffer : document list =
+
documents_of_string (Buffer.contents buffer)
+
(** Parse a multi-document YAML stream from a buffer.
+
+
@raise Yamlrw_error on parse error *)
+
+
+
(** {2 Conversion} *)
+
+
let to_json
+
?(resolve_aliases = true)
+
?(max_nodes = default_max_alias_nodes)
+
?(max_depth = default_max_alias_depth)
+
(yaml : yaml) : value =
+
(Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth (yaml :> Yaml.t) :> value)
+
(** Convert full YAML to JSON-compatible value.
+
+
@param resolve_aliases Whether to expand aliases (default: true)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100)
+
@raise Yamlrw_error if alias limits exceeded or complex keys found *)
+
+
let of_json (value : value) : yaml =
+
(Yaml.of_value (value :> Value.t) :> yaml)
+
(** Convert JSON-compatible value to full YAML representation. *)
+
+
+
(** {2 Pretty Printing & Equality} *)
+
+
let pp = Value.pp
+
(** Pretty-print a value. *)
+
+
let equal = Value.equal
+
(** Test equality of two values. *)
+
+
+
(** {2 Util - Value Combinators} *)
+
+
module Util = struct
+
(** Combinators for working with {!type:value} values.
+
+
This module provides constructors, accessors, and transformations
+
for JSON-compatible YAML values. *)
+
+
type t = Value.t
+
+
(** {3 Type Error} *)
+
+
exception Type_error of string * t
+
(** Raised when a value has unexpected type.
+
[Type_error (expected, actual_value)] *)
+
+
let type_error expected v = raise (Type_error (expected, v))
+
+
(** {3 Constructors} *)
+
+
let null : t = `Null
+
let bool b : t = `Bool b
+
let int n : t = `Float (Float.of_int n)
+
let float f : t = `Float f
+
let string s : t = `String s
+
let strings ss : t = `A (List.map (fun s -> `String s) ss)
+
let list vs : t = `A vs
+
let obj pairs : t = `O pairs
+
+
(** {3 Type Predicates} *)
+
+
let is_null = function `Null -> true | _ -> false
+
let is_bool = function `Bool _ -> true | _ -> false
+
let is_number = function `Float _ -> true | _ -> false
+
let is_string = function `String _ -> true | _ -> false
+
let is_list = function `A _ -> true | _ -> false
+
let is_obj = function `O _ -> true | _ -> false
+
+
(** {3 Safe Accessors} *)
+
+
let as_null = function `Null -> Some () | _ -> None
+
let as_bool = function `Bool b -> Some b | _ -> None
+
let as_float = function `Float f -> Some f | _ -> None
+
let as_string = function `String s -> Some s | _ -> None
+
let as_list = function `A l -> Some l | _ -> None
+
let as_obj = function `O o -> Some o | _ -> None
+
+
let as_int = function
+
| `Float f ->
+
let i = Float.to_int f in
+
if Float.equal (Float.of_int i) f then Some i else None
+
| _ -> None
+
+
(** {3 Unsafe Accessors} *)
+
+
let get_null v = match v with `Null -> () | _ -> type_error "null" v
+
let get_bool v = match v with `Bool b -> b | _ -> type_error "bool" v
+
let get_float v = match v with `Float f -> f | _ -> type_error "float" v
+
let get_string v = match v with `String s -> s | _ -> type_error "string" v
+
let get_list v = match v with `A l -> l | _ -> type_error "list" v
+
let get_obj v = match v with `O o -> o | _ -> type_error "object" v
+
+
let get_int v =
+
match as_int v with
+
| Some i -> i
+
| None -> type_error "int" v
+
+
(** {3 Object Operations} *)
+
+
let mem key = function
+
| `O pairs -> List.exists (fun (k, _) -> k = key) pairs
+
| _ -> false
+
+
let find key = function
+
| `O pairs -> List.assoc_opt key pairs
+
| _ -> None
+
+
let get key v =
+
match find key v with
+
| Some v -> v
+
| None -> raise Not_found
+
+
let keys v = match v with
+
| `O pairs -> List.map fst pairs
+
| _ -> type_error "object" v
+
+
let values v = match v with
+
| `O pairs -> List.map snd pairs
+
| _ -> type_error "object" v
+
+
let update key value = function
+
| `O pairs ->
+
let rec go = function
+
| [] -> [(key, value)]
+
| (k, _) :: rest when k = key -> (key, value) :: rest
+
| kv :: rest -> kv :: go rest
+
in
+
`O (go pairs)
+
| v -> type_error "object" v
+
+
let remove key = function
+
| `O pairs -> `O (List.filter (fun (k, _) -> k <> key) pairs)
+
| v -> type_error "object" v
+
+
let combine v1 v2 =
+
match v1, v2 with
+
| `O o1, `O o2 -> `O (o1 @ o2)
+
| `O _, _ -> type_error "object" v2
+
| _, _ -> type_error "object" v1
+
+
(** {3 List Operations} *)
+
+
let map f = function
+
| `A l -> `A (List.map f l)
+
| v -> type_error "list" v
+
+
let mapi f = function
+
| `A l -> `A (List.mapi f l)
+
| v -> type_error "list" v
+
+
let filter pred = function
+
| `A l -> `A (List.filter pred l)
+
| v -> type_error "list" v
+
+
let fold f init = function
+
| `A l -> List.fold_left f init l
+
| v -> type_error "list" v
+
+
let nth n = function
+
| `A l -> List.nth_opt l n
+
| _ -> None
+
+
let length = function
+
| `A l -> List.length l
+
| `O o -> List.length o
+
| _ -> 0
+
+
let flatten = function
+
| `A l ->
+
`A (List.concat_map (function `A inner -> inner | v -> [v]) l)
+
| v -> type_error "list" v
+
+
(** {3 Path Operations} *)
+
+
let rec get_path path v =
+
match path with
+
| [] -> Some v
+
| key :: rest ->
+
match find key v with
+
| Some child -> get_path rest child
+
| None -> None
+
+
let get_path_exn path v =
+
match get_path path v with
+
| Some v -> v
+
| None -> raise Not_found
+
+
(** {3 Iteration} *)
+
+
let iter_obj f = function
+
| `O pairs -> List.iter (fun (k, v) -> f k v) pairs
+
| v -> type_error "object" v
+
+
let iter_list f = function
+
| `A l -> List.iter f l
+
| v -> type_error "list" v
+
+
let fold_obj f init = function
+
| `O pairs -> List.fold_left (fun acc (k, v) -> f acc k v) init pairs
+
| v -> type_error "object" v
+
+
(** {3 Mapping} *)
+
+
let map_obj f = function
+
| `O pairs -> `O (List.map (fun (k, v) -> (k, f k v)) pairs)
+
| v -> type_error "object" v
+
+
let filter_obj pred = function
+
| `O pairs -> `O (List.filter (fun (k, v) -> pred k v) pairs)
+
| v -> type_error "object" v
+
+
(** {3 Conversion Helpers} *)
+
+
let to_bool ?default v =
+
match as_bool v, default with
+
| Some b, _ -> b
+
| None, Some d -> d
+
| None, None -> type_error "bool" v
+
+
let to_int ?default v =
+
match as_int v, default with
+
| Some i, _ -> i
+
| None, Some d -> d
+
| None, None -> type_error "int" v
+
+
let to_float ?default v =
+
match as_float v, default with
+
| Some f, _ -> f
+
| None, Some d -> d
+
| None, None -> type_error "float" v
+
+
let to_string ?default v =
+
match as_string v, default with
+
| Some s, _ -> s
+
| None, Some d -> d
+
| None, None -> type_error "string" v
+
+
let to_list ?default v =
+
match as_list v, default with
+
| Some l, _ -> l
+
| None, Some d -> d
+
| None, None -> type_error "list" v
+
end
+
+
+
(** {2 Stream - Low-Level Event API} *)
+
+
module Stream = struct
+
(** Low-level streaming API for event-based YAML processing.
+
+
This is useful for:
+
- Processing very large YAML files incrementally
+
- Building custom YAML transformers
+
- Fine-grained control over YAML emission *)
+
+
(** {3 Event Types} *)
+
+
type event = Event.t
+
(** A parsing or emitting event. *)
+
+
type position = Position.t
+
(** A position in the source (line, column, byte offset). *)
+
+
(** Result of parsing an event. *)
+
type event_result = {
+
event : event;
+
start_pos : position;
+
end_pos : position;
+
}
+
+
(** {3 Parsing} *)
+
+
type parser = Parser.t
+
(** A streaming YAML parser. *)
+
+
let parser s = Parser.of_string s
+
(** Create a parser from a string. *)
+
+
let next p =
+
match Parser.next p with
+
| Some { event; span } ->
+
Some {
+
event;
+
start_pos = span.start;
+
end_pos = span.stop;
+
}
+
| None -> None
+
(** Get the next event from the parser.
+
Returns [None] when parsing is complete. *)
+
+
let iter f p =
+
let rec go () =
+
match next p with
+
| Some { event; start_pos; end_pos } ->
+
f event start_pos end_pos;
+
go ()
+
| None -> ()
+
in
+
go ()
+
(** Iterate over all events from the parser. *)
+
+
let fold f init p =
+
let rec go acc =
+
match Parser.next p with
+
| Some { event; _ } -> go (f acc event)
+
| None -> acc
+
in
+
go init
+
(** Fold over all events from the parser. *)
+
+
(** {3 Emitting} *)
+
+
type emitter = Emitter.t
+
(** A streaming YAML emitter. *)
+
+
let emitter ?len:_ () = Emitter.create ()
+
(** Create a new emitter. *)
+
+
let contents e = Emitter.contents e
+
(** Get the emitted YAML string. *)
+
+
let emit e ev = Emitter.emit e ev
+
(** Emit an event. *)
+
+
(** {3 Event Emission Helpers} *)
+
+
let stream_start e enc =
+
Emitter.emit e (Event.Stream_start { encoding = enc })
+
+
let stream_end e =
+
Emitter.emit e Event.Stream_end
+
+
let document_start e ?version ?(implicit = true) () =
+
let version = match version with
+
| Some `V1_1 -> Some (1, 1)
+
| Some `V1_2 -> Some (1, 2)
+
| None -> None
+
in
+
Emitter.emit e (Event.Document_start { version; implicit })
+
+
let document_end e ?(implicit = true) () =
+
Emitter.emit e (Event.Document_end { implicit })
+
+
let scalar e ?anchor ?tag ?(style = `Any) value =
+
Emitter.emit e (Event.Scalar {
+
anchor;
+
tag;
+
value;
+
plain_implicit = true;
+
quoted_implicit = true;
+
style;
+
})
+
+
let alias e name =
+
Emitter.emit e (Event.Alias { anchor = name })
+
+
let sequence_start e ?anchor ?tag ?(style = `Any) () =
+
Emitter.emit e (Event.Sequence_start {
+
anchor;
+
tag;
+
implicit = true;
+
style;
+
})
+
+
let sequence_end e =
+
Emitter.emit e Event.Sequence_end
+
+
let mapping_start e ?anchor ?tag ?(style = `Any) () =
+
Emitter.emit e (Event.Mapping_start {
+
anchor;
+
tag;
+
implicit = true;
+
style;
+
})
+
+
let mapping_end e =
+
Emitter.emit e Event.Mapping_end
+
end
+
+
+
(** {2 Internal Modules} *)
+
+
(** These modules are exposed for advanced use cases requiring
+
fine-grained control over parsing, emission, or data structures.
+
+
For typical usage, prefer the top-level functions and {!Util}. *)
+
+
(** Source position tracking. *)
+
module Position = Position
+
+
(** Source span (range of positions). *)
+
module Span = Span
+
+
(** Block scalar chomping modes. *)
+
module Chomping = Chomping
+
+
(** YAML type tags. *)
+
module Tag = Tag
+
+
(** JSON-compatible value type and operations. *)
+
module Value = Value
+
+
(** YAML scalar with metadata. *)
+
module Scalar = Scalar
+
+
(** YAML sequence with metadata. *)
+
module Sequence = Sequence
+
+
(** YAML mapping with metadata. *)
+
module Mapping = Mapping
+
+
(** Full YAML value type. *)
+
module Yaml = Yaml
+
+
(** YAML document with directives. *)
+
module Document = Document
+
+
(** Lexical tokens. *)
+
module Token = Token
+
+
(** Lexical scanner. *)
+
module Scanner = Scanner
+
+
(** Parser events. *)
+
module Event = Event
+
+
(** Event-based parser. *)
+
module Parser = Parser
+
+
(** Document loader. *)
+
module Loader = Loader
+
+
(** Event-based emitter. *)
+
module Emitter = Emitter
+
+
(** Input stream utilities. *)
+
module Input = Input
+
+
(** Buffer serialization utilities. *)
+
module Serialize = Serialize
+689
lib/yamlrw.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** {1 Yamlrw - A Pure OCaml YAML Parser and Emitter}
+
+
Yamlrw is a pure OCaml implementation of YAML 1.1/1.2 parsing and emission.
+
It provides both a high-level JSON-compatible interface and a lower-level
+
streaming API for fine-grained control.
+
+
{2 Quick Start}
+
+
Parse a YAML string:
+
{[
+
let value = Yamlrw.of_string "name: Alice\nage: 30" in
+
match value with
+
| `O [("name", `String "Alice"); ("age", `Float 30.)] -> ...
+
| _ -> ...
+
]}
+
+
Serialize to YAML:
+
{[
+
let yaml = `O [("name", `String "Bob"); ("active", `Bool true)] in
+
let s = Yamlrw.to_string yaml in
+
(* "name: Bob\nactive: true\n" *)
+
]}
+
+
Use the Util module for convenient access:
+
{[
+
let name = Yamlrw.Util.(get_string (get "name" value)) in
+
let age = Yamlrw.Util.(get_int (get "age" value)) in
+
]} *)
+
+
+
(** {2 Error Handling} *)
+
+
module Error = Error
+
+
exception Yamlrw_error of Error.t
+
(** Raised on parse or emit errors. *)
+
+
+
(** {2 Core Types} *)
+
+
type value = [
+
| `Null (** YAML null, ~, or empty values *)
+
| `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *)
+
| `Float of float (** All YAML numbers (integers stored as floats) *)
+
| `String of string (** YAML strings *)
+
| `A of value list (** YAML sequences/arrays *)
+
| `O of (string * value) list (** YAML mappings/objects with string keys *)
+
]
+
(** JSON-compatible YAML representation. Use this for simple data interchange.
+
+
This type is structurally equivalent to {!Value.t} and compatible with the
+
ezjsonm representation. For additional operations, see {!Value} and {!Util}. *)
+
+
type yaml = [
+
| `Scalar of Scalar.t (** YAML scalar value with style and metadata *)
+
| `Alias of string (** Alias reference to an anchored node *)
+
| `A of yaml Sequence.t (** YAML sequence with style and metadata *)
+
| `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *)
+
]
+
(** Full YAML representation preserving anchors, tags, and aliases.
+
+
This type is structurally equivalent to {!Yaml.t}. Use this when you need
+
access to YAML-specific features like anchors and aliases for node reuse,
+
type tags for custom types, scalar styles (plain, quoted, literal, folded),
+
and collection styles (block vs flow).
+
+
For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and {!Mapping}. *)
+
+
type document = {
+
version : (int * int) option; (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *)
+
tags : (string * string) list; (** TAG directives mapping handles to prefixes *)
+
root : yaml option; (** Root content of the document *)
+
implicit_start : bool; (** Whether the document start marker (---) is implicit *)
+
implicit_end : bool; (** Whether the document end marker (...) is implicit *)
+
}
+
(** A YAML document with directives and metadata.
+
+
This type is structurally equivalent to {!Document.t}. A YAML stream can
+
contain multiple documents, each separated by document markers.
+
+
For additional operations, see {!Document}. *)
+
+
+
(** {2 Character Encoding} *)
+
+
module Encoding = Encoding
+
+
+
(** {2 Parsing} *)
+
+
type version = [ `V1_1 | `V1_2 ]
+
(** YAML specification version. *)
+
+
val default_max_alias_nodes : int
+
(** Default maximum nodes during alias expansion (10 million). *)
+
+
val default_max_alias_depth : int
+
(** Default maximum alias nesting depth (100). *)
+
+
val of_string :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
string -> value
+
(** Parse a YAML string into a JSON-compatible value.
+
+
@param resolve_aliases Whether to expand aliases (default: true)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100)
+
@raise Yamlrw_error on parse error or if multiple documents found *)
+
+
val yaml_of_string :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
string -> yaml
+
(** Parse a YAML string preserving full YAML metadata (anchors, tags, etc).
+
+
By default, aliases are NOT resolved, preserving the document structure.
+
+
@param resolve_aliases Whether to expand aliases (default: false)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100)
+
@raise Yamlrw_error on parse error or if multiple documents found *)
+
+
val documents_of_string : string -> document list
+
(** Parse a multi-document YAML stream.
+
+
Use this when your YAML input contains multiple documents separated
+
by document markers (---).
+
+
@raise Yamlrw_error on parse error *)
+
+
+
(** {2 Formatting Styles} *)
+
+
module Scalar_style = Scalar_style
+
+
module Layout_style = Layout_style
+
+
+
(** {2 Serialization} *)
+
+
val to_buffer :
+
?encoding:Encoding.t ->
+
?scalar_style:Scalar_style.t ->
+
?layout_style:Layout_style.t ->
+
?buffer:Buffer.t ->
+
value -> Buffer.t
+
(** Serialize a value to a buffer.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any)
+
@param buffer Optional buffer to append to (allocates new one if not provided)
+
@return The buffer containing the serialized YAML *)
+
+
val to_string :
+
?encoding:Encoding.t ->
+
?scalar_style:Scalar_style.t ->
+
?layout_style:Layout_style.t ->
+
value -> string
+
(** Serialize a value to a YAML string.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any) *)
+
+
val yaml_to_buffer :
+
?encoding:Encoding.t ->
+
?scalar_style:Scalar_style.t ->
+
?layout_style:Layout_style.t ->
+
?buffer:Buffer.t ->
+
yaml -> Buffer.t
+
(** Serialize a full YAML value to a buffer.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any)
+
@param buffer Optional buffer to append to (allocates new one if not provided)
+
@return The buffer containing the serialized YAML *)
+
+
val yaml_to_string :
+
?encoding:Encoding.t ->
+
?scalar_style:Scalar_style.t ->
+
?layout_style:Layout_style.t ->
+
yaml -> string
+
(** Serialize a full YAML value to a string.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any) *)
+
+
val documents_to_buffer :
+
?encoding:Encoding.t ->
+
?scalar_style:Scalar_style.t ->
+
?layout_style:Layout_style.t ->
+
?resolve_aliases:bool ->
+
?buffer:Buffer.t ->
+
document list -> Buffer.t
+
(** Serialize multiple documents to a buffer.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any)
+
@param resolve_aliases Whether to expand aliases (default: true)
+
@param buffer Optional buffer to append to (allocates new one if not provided)
+
@return The buffer containing the serialized YAML *)
+
+
val documents_to_string :
+
?encoding:Encoding.t ->
+
?scalar_style:Scalar_style.t ->
+
?layout_style:Layout_style.t ->
+
?resolve_aliases:bool ->
+
document list -> string
+
(** Serialize multiple documents to a YAML stream.
+
+
@param encoding Output encoding (default: UTF-8)
+
@param scalar_style Preferred scalar style (default: Any)
+
@param layout_style Preferred layout style (default: Any)
+
@param resolve_aliases Whether to expand aliases (default: true) *)
+
+
(** {2 Buffer Parsing} *)
+
+
val of_buffer :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
Buffer.t -> value
+
(** Parse YAML from a buffer into a JSON-compatible value.
+
+
@param resolve_aliases Whether to expand aliases (default: true)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100)
+
@raise Yamlrw_error on parse error or if multiple documents found *)
+
+
val yaml_of_buffer :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
Buffer.t -> yaml
+
(** Parse YAML from a buffer preserving full YAML metadata.
+
+
@param resolve_aliases Whether to expand aliases (default: false)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100)
+
@raise Yamlrw_error on parse error or if multiple documents found *)
+
+
val documents_of_buffer : Buffer.t -> document list
+
(** Parse a multi-document YAML stream from a buffer.
+
+
@raise Yamlrw_error on parse error *)
+
+
+
(** {2 Conversion} *)
+
+
val to_json :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
yaml -> value
+
(** Convert full YAML to JSON-compatible value.
+
+
@param resolve_aliases Whether to expand aliases (default: true)
+
@param max_nodes Maximum nodes during alias expansion (default: 10M)
+
@param max_depth Maximum alias nesting depth (default: 100)
+
@raise Yamlrw_error if alias limits exceeded or complex keys found *)
+
+
val of_json : value -> yaml
+
(** Convert JSON-compatible value to full YAML representation. *)
+
+
+
(** {2 Pretty Printing & Equality} *)
+
+
val pp : Format.formatter -> value -> unit
+
(** Pretty-print a value. *)
+
+
val equal : value -> value -> bool
+
(** Test equality of two values. *)
+
+
+
(** {2 Util - Value Combinators}
+
+
Combinators for working with {!type:value} values.
+
+
This module provides constructors, accessors, and transformations
+
for JSON-compatible YAML values. *)
+
+
module Util : sig
+
type t = Value.t
+
(** Alias for {!type:value}. *)
+
+
(** {3 Type Error} *)
+
+
exception Type_error of string * t
+
(** Raised when a value has unexpected type.
+
[Type_error (expected, actual_value)] *)
+
+
(** {3 Constructors} *)
+
+
val null : t
+
(** The null value. *)
+
+
val bool : bool -> t
+
(** Create a boolean value. *)
+
+
val int : int -> t
+
(** Create an integer value (stored as float). *)
+
+
val float : float -> t
+
(** Create a float value. *)
+
+
val string : string -> t
+
(** Create a string value. *)
+
+
val strings : string list -> t
+
(** Create a list of strings. *)
+
+
val list : t list -> t
+
(** Create a list value. *)
+
+
val obj : (string * t) list -> t
+
(** Create an object value from key-value pairs. *)
+
+
(** {3 Type Predicates} *)
+
+
val is_null : t -> bool
+
(** Check if value is null. *)
+
+
val is_bool : t -> bool
+
(** Check if value is a boolean. *)
+
+
val is_number : t -> bool
+
(** Check if value is a number. *)
+
+
val is_string : t -> bool
+
(** Check if value is a string. *)
+
+
val is_list : t -> bool
+
(** Check if value is a list. *)
+
+
val is_obj : t -> bool
+
(** Check if value is an object. *)
+
+
(** {3 Safe Accessors}
+
+
These return [None] if the value has the wrong type. *)
+
+
val as_null : t -> unit option
+
(** Get unit if value is null. *)
+
+
val as_bool : t -> bool option
+
(** Get boolean value. *)
+
+
val as_float : t -> float option
+
(** Get float value. *)
+
+
val as_string : t -> string option
+
(** Get string value. *)
+
+
val as_list : t -> t list option
+
(** Get list value. *)
+
+
val as_obj : t -> (string * t) list option
+
(** Get object as association list. *)
+
+
val as_int : t -> int option
+
(** Get integer value if float is an exact integer. *)
+
+
(** {3 Unsafe Accessors}
+
+
These raise {!Type_error} if the value has the wrong type. *)
+
+
val get_null : t -> unit
+
(** Get unit or raise {!Type_error}. *)
+
+
val get_bool : t -> bool
+
(** Get boolean or raise {!Type_error}. *)
+
+
val get_float : t -> float
+
(** Get float or raise {!Type_error}. *)
+
+
val get_string : t -> string
+
(** Get string or raise {!Type_error}. *)
+
+
val get_list : t -> t list
+
(** Get list or raise {!Type_error}. *)
+
+
val get_obj : t -> (string * t) list
+
(** Get object or raise {!Type_error}. *)
+
+
val get_int : t -> int
+
(** Get integer or raise {!Type_error}. *)
+
+
(** {3 Object Operations} *)
+
+
val mem : string -> t -> bool
+
(** [mem key obj] checks if [key] exists in object [obj].
+
Returns [false] if [obj] is not an object. *)
+
+
val find : string -> t -> t option
+
(** [find key obj] looks up [key] in object [obj].
+
Returns [None] if key not found or if [obj] is not an object. *)
+
+
val get : string -> t -> t
+
(** [get key obj] looks up [key] in object [obj].
+
Raises [Not_found] if key not found. *)
+
+
val keys : t -> string list
+
(** Get all keys from an object.
+
@raise Type_error if not an object *)
+
+
val values : t -> t list
+
(** Get all values from an object.
+
@raise Type_error if not an object *)
+
+
val update : string -> t -> t -> t
+
(** [update key value obj] sets [key] to [value] in [obj].
+
Adds the key if it doesn't exist.
+
@raise Type_error if [obj] is not an object *)
+
+
val remove : string -> t -> t
+
(** [remove key obj] removes [key] from [obj].
+
@raise Type_error if [obj] is not an object *)
+
+
val combine : t -> t -> t
+
(** [combine obj1 obj2] merges two objects, with [obj2] values taking precedence.
+
@raise Type_error if either argument is not an object *)
+
+
(** {3 List Operations} *)
+
+
val map : (t -> t) -> t -> t
+
(** [map f lst] applies [f] to each element of list [lst].
+
@raise Type_error if [lst] is not a list *)
+
+
val mapi : (int -> t -> t) -> t -> t
+
(** [mapi f lst] applies [f i x] to each element [x] at index [i].
+
@raise Type_error if [lst] is not a list *)
+
+
val filter : (t -> bool) -> t -> t
+
(** [filter pred lst] keeps elements satisfying [pred].
+
@raise Type_error if [lst] is not a list *)
+
+
val fold : ('a -> t -> 'a) -> 'a -> t -> 'a
+
(** [fold f init lst] folds [f] over list [lst].
+
@raise Type_error if [lst] is not a list *)
+
+
val nth : int -> t -> t option
+
(** [nth n lst] gets element at index [n].
+
Returns [None] if [lst] is not a list or index out of bounds. *)
+
+
val length : t -> int
+
(** Get the length of a list or object. Returns 0 for other types. *)
+
+
val flatten : t -> t
+
(** Flatten a list of lists into a single list.
+
Non-list elements are kept as-is.
+
@raise Type_error if not a list *)
+
+
(** {3 Path Operations} *)
+
+
val get_path : string list -> t -> t option
+
(** [get_path ["a"; "b"; "c"] obj] looks up nested path [obj.a.b.c].
+
Returns [None] if any key is not found. *)
+
+
val get_path_exn : string list -> t -> t
+
(** Like {!get_path} but raises [Not_found] if path not found. *)
+
+
(** {3 Iteration} *)
+
+
val iter_obj : (string -> t -> unit) -> t -> unit
+
(** [iter_obj f obj] calls [f key value] for each pair in [obj].
+
@raise Type_error if [obj] is not an object *)
+
+
val iter_list : (t -> unit) -> t -> unit
+
(** [iter_list f lst] calls [f] on each element of [lst].
+
@raise Type_error if [lst] is not a list *)
+
+
val fold_obj : ('a -> string -> t -> 'a) -> 'a -> t -> 'a
+
(** [fold_obj f init obj] folds over object key-value pairs.
+
@raise Type_error if [obj] is not an object *)
+
+
(** {3 Mapping} *)
+
+
val map_obj : (string -> t -> t) -> t -> t
+
(** [map_obj f obj] maps [f key value] over each pair in [obj].
+
@raise Type_error if [obj] is not an object *)
+
+
val filter_obj : (string -> t -> bool) -> t -> t
+
(** [filter_obj pred obj] keeps pairs satisfying [pred key value].
+
@raise Type_error if [obj] is not an object *)
+
+
(** {3 Conversion Helpers}
+
+
Get values with optional defaults. If no default is provided and the type
+
doesn't match, these raise {!Type_error}. *)
+
+
val to_bool : ?default:bool -> t -> bool
+
(** Get boolean or return default.
+
@raise Type_error if type doesn't match and no default provided *)
+
+
val to_int : ?default:int -> t -> int
+
(** Get integer or return default.
+
@raise Type_error if type doesn't match and no default provided *)
+
+
val to_float : ?default:float -> t -> float
+
(** Get float or return default.
+
@raise Type_error if type doesn't match and no default provided *)
+
+
val to_string : ?default:string -> t -> string
+
(** Get string or return default.
+
@raise Type_error if type doesn't match and no default provided *)
+
+
val to_list : ?default:t list -> t -> t list
+
(** Get list or return default.
+
@raise Type_error if type doesn't match and no default provided *)
+
end
+
+
+
(** {2 Stream - Low-Level Event API}
+
+
Low-level streaming API for event-based YAML processing.
+
+
This is useful for:
+
- Processing very large YAML files incrementally
+
- Building custom YAML transformers
+
- Fine-grained control over YAML emission *)
+
+
module Stream : sig
+
+
(** {3 Event Types} *)
+
+
type event = Event.t
+
(** A parsing or emitting event. *)
+
+
type position = Position.t
+
(** A position in the source (line, column, byte offset). *)
+
+
type event_result = {
+
event : event;
+
start_pos : position;
+
end_pos : position;
+
}
+
(** Result of parsing an event with its source location. *)
+
+
(** {3 Parsing} *)
+
+
type parser
+
(** A streaming YAML parser. *)
+
+
val parser : string -> parser
+
(** Create a parser from a string. *)
+
+
val next : parser -> event_result option
+
(** Get the next event from the parser.
+
Returns [None] when parsing is complete. *)
+
+
val iter : (event -> position -> position -> unit) -> parser -> unit
+
(** [iter f parser] calls [f event start_pos end_pos] for each event. *)
+
+
val fold : ('a -> event -> 'a) -> 'a -> parser -> 'a
+
(** [fold f init parser] folds [f] over all events. *)
+
+
(** {3 Emitting} *)
+
+
type emitter
+
(** A streaming YAML emitter. *)
+
+
val emitter : ?len:int -> unit -> emitter
+
(** Create a new emitter. *)
+
+
val contents : emitter -> string
+
(** Get the emitted YAML string. *)
+
+
val emit : emitter -> event -> unit
+
(** Emit an event.
+
@raise Yamlrw_error if the event sequence is invalid *)
+
+
(** {3 Event Emission Helpers} *)
+
+
val stream_start : emitter -> Encoding.t -> unit
+
(** Emit a stream start event. *)
+
+
val stream_end : emitter -> unit
+
(** Emit a stream end event. *)
+
+
val document_start : emitter -> ?version:version -> ?implicit:bool -> unit -> unit
+
(** Emit a document start event.
+
@param version YAML version directive
+
@param implicit Whether start marker is implicit (default: true) *)
+
+
val document_end : emitter -> ?implicit:bool -> unit -> unit
+
(** Emit a document end event.
+
@param implicit Whether end marker is implicit (default: true) *)
+
+
val scalar : emitter -> ?anchor:string -> ?tag:string -> ?style:Scalar_style.t -> string -> unit
+
(** Emit a scalar value.
+
@param anchor Optional anchor name
+
@param tag Optional type tag
+
@param style Scalar style (default: Any) *)
+
+
val alias : emitter -> string -> unit
+
(** Emit an alias reference. *)
+
+
val sequence_start : emitter -> ?anchor:string -> ?tag:string -> ?style:Layout_style.t -> unit -> unit
+
(** Emit a sequence start event.
+
@param anchor Optional anchor name
+
@param tag Optional type tag
+
@param style Layout style (default: Any) *)
+
+
val sequence_end : emitter -> unit
+
(** Emit a sequence end event. *)
+
+
val mapping_start : emitter -> ?anchor:string -> ?tag:string -> ?style:Layout_style.t -> unit -> unit
+
(** Emit a mapping start event.
+
@param anchor Optional anchor name
+
@param tag Optional type tag
+
@param style Layout style (default: Any) *)
+
+
val mapping_end : emitter -> unit
+
(** Emit a mapping end event. *)
+
end
+
+
+
(** {2 Internal Modules}
+
+
These modules are exposed for advanced use cases requiring
+
fine-grained control over parsing, emission, or data structures.
+
+
For typical usage, prefer the top-level functions and {!Util}. *)
+
+
module Position = Position
+
(** Source position tracking. *)
+
+
module Span = Span
+
(** Source span (range of positions). *)
+
+
module Chomping = Chomping
+
(** Block scalar chomping modes. *)
+
+
module Tag = Tag
+
(** YAML type tags. *)
+
+
module Value = Value
+
(** JSON-compatible value type and operations. *)
+
+
module Scalar = Scalar
+
(** YAML scalar with metadata. *)
+
+
module Sequence = Sequence
+
(** YAML sequence with metadata. *)
+
+
module Mapping = Mapping
+
(** YAML mapping with metadata. *)
+
+
module Yaml = Yaml
+
(** Full YAML value type. *)
+
+
module Document = Document
+
(** YAML document with directives. *)
+
+
module Token = Token
+
(** Lexical tokens. *)
+
+
module Scanner = Scanner
+
(** Lexical scanner. *)
+
+
module Event = Event
+
(** Parser events. *)
+
+
module Parser = Parser
+
(** Event-based parser. *)
+
+
module Loader = Loader
+
(** Document loader. *)
+
+
module Emitter = Emitter
+
(** Event-based emitter. *)
+
+
module Input = Input
+
(** Input stream utilities. *)
+
+
module Serialize = Serialize
+
(** Buffer serialization utilities. *)
+24
tests/cram/anchor.yml
···
+
datetime: 2001-12-15T02:59:43.1Z
+
datetime_with_spaces: 2001-12-14 21:59:43.10 -5
+
date: 2002-12-14
+
+
# The !!binary tag indicates that a string is actually a base64-encoded
+
# representation of a binary blob.
+
gif_file: !!binary |
+
R0lGODlhDAAMAIQAAP//9/X17unp5WZmZgAAAOfn515eXvPz7Y6OjuDg4J+fn5
+
OTk6enp56enmlpaWNjY6Ojo4SEhP/++f/++f/++f/++f/++f/++f/++f/++f/+
+
+f/++f/++f/++f/++f/++SH+Dk1hZGUgd2l0aCBHSU1QACwAAAAADAAMAAAFLC
+
AgjoEwnuNAFOhpEMTRiggcz4BNJHrv/zCFcLiwMWYNG84BwwEeECcgggoBADs=
+
+
# YAML also has a set type, which looks like this:
+
set:
+
? item1
+
? item2
+
? item3
+
+
# Like Python, sets are just maps with null values; the above is equivalent to:
+
set2:
+
item1: null
+
item2: null
+
item3: null
+
+375
tests/cram/anchors.t
···
+
Anchor and Alias Support (currently not supported)
+
+
These tests document anchor (&) and alias (*) support that is not yet
+
implemented. Currently, aliases fail with "unresolved alias" error.
+
+
Test: Simple scalar anchor and alias
+
+
$ echo 'anchor: &anc value
+
> alias: *anc' | yamlcat 2>&1
+
anchor: value
+
alias: value
+
+
Test: Numeric anchor and alias
+
+
$ echo 'original: &num 42
+
> copy: *num' | yamlcat 2>&1
+
original: 42
+
copy: 42
+
+
Test: Sequence anchor and alias
+
+
$ echo 'list: &items
+
> - one
+
> - two
+
> copy: *items' | yamlcat 2>&1
+
list:
+
- one
+
- two
+
copy:
+
- one
+
- two
+
+
Test: Mapping anchor and alias
+
+
$ echo 'person: &p
+
> name: Alice
+
> age: 30
+
> copy: *p' | yamlcat 2>&1
+
person:
+
name: Alice
+
age: 30
+
copy:
+
name: Alice
+
age: 30
+
+
Test: Multiple aliases to same anchor
+
+
$ echo 'value: &v 100
+
> first: *v
+
> second: *v
+
> third: *v' | yamlcat 2>&1
+
value: 100
+
first: 100
+
second: 100
+
third: 100
+
+
Test: Anchor in flow context
+
+
$ echo '[&item apple, *item]' | yamlcat 2>&1
+
- apple
+
- apple
+
+
Test: Anchor with mapping in flow
+
+
$ echo '{original: &cfg {a: 1}, copy: *cfg}' | yamlcat 2>&1
+
original:
+
a: 1
+
copy:
+
a: 1
+
+
Test: Anchors file from test suite
+
+
$ yamlcat anchors_basic.yml 2>&1
+
scalar_anchor: Hello, World!
+
scalar_alias: Hello, World!
+
---
+
original: 42
+
copy: 42
+
another_copy: 42
+
---
+
original_list:
+
- apple
+
- banana
+
- cherry
+
copied_list:
+
- apple
+
- banana
+
- cherry
+
---
+
original_map:
+
name: Alice
+
age: 30
+
city: London
+
copied_map:
+
name: Alice
+
age: 30
+
city: London
+
---
+
defaults:
+
timeout: 30
+
retries: 3
+
colors:
+
- red
+
- green
+
- blue
+
config:
+
settings:
+
timeout: 30
+
retries: 3
+
palette:
+
- red
+
- green
+
- blue
+
---
+
template:
+
metadata:
+
version: 1
+
author: John Doe
+
settings:
+
enabled: true
+
debug: false
+
instance1:
+
metadata:
+
version: 1
+
author: John Doe
+
settings:
+
enabled: true
+
debug: false
+
instance2:
+
metadata:
+
version: 1
+
author: John Doe
+
settings:
+
enabled: true
+
debug: false
+
---
+
items:
+
- id: 1
+
name: First
+
- id: 2
+
name: Second
+
- id: 1
+
name: First
+
---
+
shared_value: 100
+
calculations:
+
base: 100
+
doubled: 200
+
reference: 100
+
another_ref: 100
+
---
+
feature_flag: true
+
features:
+
login: true
+
signup: true
+
export: true
+
---
+
empty: null
+
values:
+
first: null
+
second: null
+
---
+
message: "This is a multi-line\nmessage with some\nspecial content!\n"
+
output1: "This is a multi-line\nmessage with some\nspecial content!\n"
+
output2: "This is a multi-line\nmessage with some\nspecial content!\n"
+
---
+
database:
+
primary:
+
host: localhost
+
port: 5432
+
ssl: true
+
replica:
+
host: localhost
+
port: 5432
+
ssl: true
+
backup:
+
host: localhost
+
port: 5432
+
ssl: true
+
+
$ yamlcat anchors_merge.yml 2>&1
+
defaults:
+
timeout: 30
+
retries: 3
+
verbose: false
+
production:
+
<<:
+
timeout: 30
+
retries: 3
+
verbose: false
+
environment: production
+
---
+
base:
+
color: red
+
size: medium
+
weight: 100
+
custom:
+
<<:
+
color: red
+
size: medium
+
weight: 100
+
color: blue
+
shape: circle
+
---
+
connection:
+
host: localhost
+
port: 8080
+
authentication:
+
username: admin
+
password: secret
+
server:
+
<<:
+
- host: localhost
+
port: 8080
+
- username: admin
+
password: secret
+
ssl: true
+
---
+
defaults:
+
timeout: 30
+
retries: 3
+
advanced:
+
cache: true
+
pool_size: 10
+
config:
+
<<:
+
- timeout: 30
+
retries: 3
+
- cache: true
+
pool_size: 10
+
timeout: 60
+
custom: value
+
---
+
base_style:
+
font: Arial
+
size: 12
+
heading_defaults:
+
<<:
+
font: Arial
+
size: 12
+
weight: bold
+
main_heading:
+
<<:
+
<<:
+
font: Arial
+
size: 12
+
weight: bold
+
size: 18
+
color: navy
+
---
+
common:
+
enabled: true
+
log_level: info
+
services:
+
- name: web
+
<<:
+
enabled: true
+
log_level: info
+
port: 80
+
- name: api
+
<<:
+
enabled: true
+
log_level: info
+
port: 3000
+
- name: worker
+
<<:
+
enabled: true
+
log_level: info
+
threads: 4
+
---
+
empty: {}
+
config:
+
<<: {}
+
key: value
+
---
+
metadata:
+
created: 2023-01-01
+
author: Admin
+
tags:
+
- v1
+
- stable
+
document:
+
<<:
+
created: 2023-01-01
+
author: Admin
+
tags:
+
- v1
+
- stable
+
title: Important Document
+
content: Some content here
+
---
+
level1:
+
a: 1
+
b: 2
+
level2:
+
<<:
+
a: 1
+
b: 2
+
c: 3
+
level3:
+
<<:
+
<<:
+
a: 1
+
b: 2
+
c: 3
+
d: 4
+
---
+
first:
+
name: First
+
value: 100
+
priority: low
+
second:
+
name: Second
+
value: 200
+
category: important
+
combined:
+
<<:
+
- name: First
+
value: 100
+
priority: low
+
- name: Second
+
value: 200
+
category: important
+
name: Combined
+
---
+
numbers:
+
count: 42
+
ratio: 3.14
+
active: true
+
derived:
+
<<:
+
count: 42
+
ratio: 3.14
+
active: true
+
label: Test
+
---
+
db_defaults:
+
pool_size: 5
+
timeout: 30
+
ssl: false
+
cache_defaults:
+
ttl: 3600
+
max_size: 1000
+
development:
+
database:
+
<<:
+
pool_size: 5
+
timeout: 30
+
ssl: false
+
host: localhost
+
name: dev_db
+
cache:
+
<<:
+
ttl: 3600
+
max_size: 1000
+
backend: memory
+
production:
+
database:
+
<<:
+
pool_size: 5
+
timeout: 30
+
ssl: false
+
host: prod.example.com
+
name: prod_db
+
ssl: true
+
pool_size: 20
+
cache:
+
<<:
+
ttl: 3600
+
max_size: 1000
+
backend: redis
+
ttl: 7200
+
+
Note: The anchor test files also use multiple documents, so they fail
+
with multi-document errors before even hitting anchor issues.
+125
tests/cram/anchors_basic.yml
···
+
# Basic Anchor and Alias Test Cases
+
# Tests fundamental anchor (&) and alias (*) functionality
+
+
# Test 1: Simple scalar anchor and alias
+
---
+
scalar_anchor: &simple_scalar "Hello, World!"
+
scalar_alias: *simple_scalar
+
# Expected: both should have the value "Hello, World!"
+
+
# Test 2: Numeric scalar anchor
+
---
+
original: &num 42
+
copy: *num
+
another_copy: *num
+
# Expected: all three should have the value 42
+
+
# Test 3: Sequence anchor and alias
+
---
+
original_list: &my_list
+
- apple
+
- banana
+
- cherry
+
+
copied_list: *my_list
+
# Expected: both lists should be identical
+
+
# Test 4: Mapping anchor and alias
+
---
+
original_map: &person
+
name: Alice
+
age: 30
+
city: London
+
+
copied_map: *person
+
# Expected: both maps should be identical
+
+
# Test 5: Multiple anchors in same document
+
---
+
defaults: &defaults
+
timeout: 30
+
retries: 3
+
+
colors: &colors
+
- red
+
- green
+
- blue
+
+
config:
+
settings: *defaults
+
palette: *colors
+
# Expected: config.settings should have timeout and retries, config.palette should have the color list
+
+
# Test 6: Nested structure with anchor
+
---
+
template: &template
+
metadata:
+
version: 1.0
+
author: John Doe
+
settings:
+
enabled: true
+
debug: false
+
+
instance1: *template
+
instance2: *template
+
# Expected: both instances should be identical copies of template
+
+
# Test 7: Anchor in sequence
+
---
+
items:
+
- &first_item
+
id: 1
+
name: First
+
- id: 2
+
name: Second
+
- *first_item
+
# Expected: first and third items should be identical
+
+
# Test 8: Multiple uses of same alias
+
---
+
shared_value: &shared 100
+
calculations:
+
base: *shared
+
doubled: 200 # Just a value, not calculated
+
reference: *shared
+
another_ref: *shared
+
# Expected: base, reference, and another_ref should all be 100
+
+
# Test 9: Boolean anchor
+
---
+
feature_flag: &enabled true
+
features:
+
login: *enabled
+
signup: *enabled
+
export: *enabled
+
# Expected: all features should be true
+
+
# Test 10: Null anchor
+
---
+
empty: &null_value ~
+
values:
+
first: *null_value
+
second: *null_value
+
# Expected: all should be null
+
+
# Test 11: String with special characters
+
---
+
message: &msg |
+
This is a multi-line
+
message with some
+
special content!
+
+
output1: *msg
+
output2: *msg
+
# Expected: both outputs should have the same multi-line string
+
+
# Test 12: Anchor in mapping value
+
---
+
database:
+
primary: &db_config
+
host: localhost
+
port: 5432
+
ssl: true
+
replica: *db_config
+
backup: *db_config
+
# Expected: primary, replica, and backup should all have identical configuration
+194
tests/cram/anchors_merge.yml
···
+
# Merge Key Test Cases
+
# Tests YAML 1.1 merge key (<<) functionality
+
# Note: Merge keys are a YAML 1.1 feature and may not be supported in YAML 1.2
+
+
# Test 1: Basic merge key
+
---
+
defaults: &defaults
+
timeout: 30
+
retries: 3
+
verbose: false
+
+
production:
+
<<: *defaults
+
environment: production
+
# Expected: production should have timeout, retries, verbose from defaults, plus environment
+
+
# Test 2: Override after merge
+
---
+
base: &base
+
color: red
+
size: medium
+
weight: 100
+
+
custom:
+
<<: *base
+
color: blue
+
shape: circle
+
# Expected: custom should have size and weight from base, but color should be blue, and add shape
+
+
# Test 3: Merging multiple anchors
+
---
+
connection: &connection
+
host: localhost
+
port: 8080
+
+
authentication: &auth
+
username: admin
+
password: secret
+
+
server:
+
<<: [*connection, *auth]
+
ssl: true
+
# Expected: server should have host, port, username, password, and ssl
+
+
# Test 4: Multiple merges with override
+
---
+
defaults: &defaults
+
timeout: 30
+
retries: 3
+
+
advanced: &advanced
+
cache: true
+
pool_size: 10
+
+
config:
+
<<: [*defaults, *advanced]
+
timeout: 60
+
custom: value
+
# Expected: config should have all fields from both anchors, with timeout overridden to 60
+
+
# Test 5: Nested merge
+
---
+
base_style: &base_style
+
font: Arial
+
size: 12
+
+
heading_defaults: &heading
+
<<: *base_style
+
weight: bold
+
+
main_heading:
+
<<: *heading
+
size: 18
+
color: navy
+
# Expected: main_heading should inherit from heading (which inherits from base_style) with overrides
+
+
# Test 6: Merge in sequence context
+
---
+
common: &common
+
enabled: true
+
log_level: info
+
+
services:
+
- name: web
+
<<: *common
+
port: 80
+
- name: api
+
<<: *common
+
port: 3000
+
- name: worker
+
<<: *common
+
threads: 4
+
# Expected: each service should have enabled and log_level, plus their specific fields
+
+
# Test 7: Empty merge (edge case)
+
---
+
empty: &empty {}
+
+
config:
+
<<: *empty
+
key: value
+
# Expected: config should just have key: value
+
+
# Test 8: Merge with nested structures
+
---
+
metadata: &metadata
+
created: 2023-01-01
+
author: Admin
+
tags:
+
- v1
+
- stable
+
+
document:
+
<<: *metadata
+
title: Important Document
+
content: Some content here
+
# Expected: document should have all metadata fields plus title and content
+
+
# Test 9: Chain of merges
+
---
+
level1: &l1
+
a: 1
+
b: 2
+
+
level2: &l2
+
<<: *l1
+
c: 3
+
+
level3:
+
<<: *l2
+
d: 4
+
# Expected: level3 should have a, b, c, and d
+
+
# Test 10: Merge with conflicting keys
+
---
+
first: &first
+
name: First
+
value: 100
+
priority: low
+
+
second: &second
+
name: Second
+
value: 200
+
category: important
+
+
combined:
+
<<: [*first, *second]
+
name: Combined
+
# Expected: later merges and direct assignments take precedence
+
+
# Test 11: Merge preserving types
+
---
+
numbers: &numbers
+
count: 42
+
ratio: 3.14
+
active: true
+
+
derived:
+
<<: *numbers
+
label: Test
+
# Expected: types should be preserved (int, float, bool)
+
+
# Test 12: Complex real-world example
+
---
+
db_defaults: &db_defaults
+
pool_size: 5
+
timeout: 30
+
ssl: false
+
+
cache_defaults: &cache_defaults
+
ttl: 3600
+
max_size: 1000
+
+
development:
+
database:
+
<<: *db_defaults
+
host: localhost
+
name: dev_db
+
cache:
+
<<: *cache_defaults
+
backend: memory
+
+
production:
+
database:
+
<<: *db_defaults
+
host: prod.example.com
+
name: prod_db
+
ssl: true
+
pool_size: 20
+
cache:
+
<<: *cache_defaults
+
backend: redis
+
ttl: 7200
+
# Expected: each environment should inherit defaults with environment-specific overrides
+70
tests/cram/bomb.t
···
+
Billion laughs attack protection tests
+
+
Test with a tight node limit (small bomb would expand to ~6561 nodes):
+
+
$ yamlcat --max-nodes 100 --json bomb_small.yml
+
Error: alias expansion exceeded node limit (100 nodes)
+
[1]
+
+
Test with a limit that allows the small bomb:
+
+
$ yamlcat --max-nodes 10000 --json bomb_small.yml | head -c 100
+
{"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [[1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 2, 3, 4, 5, 6, 7, 8, 9], [
+
+
Test depth limit with a nested alias chain:
+
+
$ yamlcat --max-depth 2 --json depth_bomb.yml | head -c 50
+
{"a": ["x", "y", "z"], "b": [["x", "y", "z"], ["x"
+
+
$ yamlcat --max-depth 10 --json depth_bomb.yml | head -c 50
+
{"a": ["x", "y", "z"], "b": [["x", "y", "z"], ["x"
+
+
Test that --no-resolve-aliases keeps aliases as-is (in debug mode):
+
+
$ yamlcat --no-resolve-aliases --debug simple_alias.yml
+
Document 1:
+
document(
+
implicit_start=true,
+
implicit_end=true,
+
root=mapping(
+
style=block,
+
members={
+
scalar("anchor", style=plain):
+
scalar("hello", anchor=anc, style=plain),
+
scalar("alias", style=plain): *anc
+
})
+
)
+
+
With resolve (default), aliases are expanded:
+
+
$ yamlcat --json simple_alias.yml
+
{"anchor": "hello", "alias": "hello"}
+
+
Test the full bomb is rejected with default limits:
+
+
$ yamlcat --json bomb.yml 2>&1 | head -1
+
Error: alias expansion exceeded node limit (10000000 nodes)
+
+
With a very small limit:
+
+
$ yamlcat --max-nodes 50 --json bomb.yml
+
Error: alias expansion exceeded node limit (50 nodes)
+
[1]
+
+
Test that valid YAML with aliases works:
+
+
$ yamlcat --json valid_alias.yml
+
{"defaults": {"timeout": 30, "retries": 3}, "production": {"<<": {"timeout": 30, "retries": 3}, "port": 8080}}
+
+
Test help includes the new options:
+
+
$ yamlcat --help=plain | grep 'max-nodes'
+
--max-nodes=N (absent=10000000)
+
yamlcat --max-nodes 1000 --max-depth 10 untrusted.yaml
+
+
$ yamlcat --help=plain | grep 'max-depth'
+
--max-depth=N (absent=100)
+
yamlcat --max-nodes 1000 --max-depth 10 untrusted.yaml
+
+
$ yamlcat --help=plain | grep 'no-resolve-aliases'
+
--no-resolve-aliases
+9
tests/cram/bomb.yml
···
+
a: &a ["lol","lol","lol","lol","lol","lol","lol","lol","lol"]
+
b: &b [*a,*a,*a,*a,*a,*a,*a,*a,*a]
+
c: &c [*b,*b,*b,*b,*b,*b,*b,*b,*b]
+
d: &d [*c,*c,*c,*c,*c,*c,*c,*c,*c]
+
e: &e [*d,*d,*d,*d,*d,*d,*d,*d,*d]
+
f: &f [*e,*e,*e,*e,*e,*e,*e,*e,*e]
+
g: &g [*f,*f,*f,*f,*f,*f,*f,*f,*f]
+
h: &h [*g,*g,*g,*g,*g,*g,*g,*g,*g]
+
i: &i [*h,*h,*h,*h,*h,*h,*h,*h,*h]
+6
tests/cram/bomb_small.yml
···
+
# Small "billion laughs" style YAML bomb for testing
+
# Expands to 9^4 = 6561 nodes when aliases are resolved
+
a: &a [1, 2, 3, 4, 5, 6, 7, 8, 9]
+
b: &b [*a, *a, *a, *a, *a, *a, *a, *a, *a]
+
c: &c [*b, *b, *b, *b, *b, *b, *b, *b, *b]
+
d: &d [*c, *c, *c, *c, *c, *c, *c, *c, *c]
+23
tests/cram/cohttp.yml
···
+
language: c
+
sudo: false
+
services:
+
- docker
+
install: wget https://raw.githubusercontent.com/ocaml/ocaml-travisci-skeleton/master/.travis-docker.sh
+
script: bash -ex ./.travis-docker.sh
+
env:
+
global:
+
- EXTRA_REMOTES="https://github.com/mirage/mirage-dev.git"
+
- PINS="cohttp-top:. cohttp-async:. cohttp-lwt-unix:. cohttp-lwt-jsoo:. cohttp-lwt:. cohttp-mirage:. cohttp:."
+
matrix:
+
- PACKAGE="cohttp" DISTRO="alpine-3.5" OCAML_VERSION="4.06.0"
+
- PACKAGE="cohttp-async" DISTRO="alpine" OCAML_VERSION="4.06.0"
+
- PACKAGE="cohttp-lwt" DISTRO="debian-unstable" OCAML_VERSION="4.03.0"
+
- PACKAGE="cohttp-mirage" DISTRO="debian-unstable" OCAML_VERSION="4.03.0"
+
notifications:
+
webhooks:
+
urls:
+
- https://webhooks.gitter.im/e/6ee5059c7420709f4ad1
+
on_success: change
+
on_failure: always
+
on_start: false
+
+863
tests/cram/collections.t
···
+
Test collections_block.yml - Block style collections
+
+
$ yamlcat collections_block.yml
+
simple_sequence:
+
- apple
+
- banana
+
- cherry
+
- date
+
simple_mapping:
+
name: John Doe
+
age: 30
+
city: New York
+
country: USA
+
nested_sequences:
+
-
+
- alpha
+
- beta
+
- gamma
+
-
+
- one
+
- two
+
- three
+
-
+
- red
+
- green
+
- blue
+
nested_mappings:
+
person:
+
name: Alice
+
contact:
+
email: alice@example.com
+
phone: 555-1234
+
address:
+
street: 123 Main St
+
city: Boston
+
mapping_with_sequences:
+
colors:
+
- red
+
- green
+
- blue
+
sizes:
+
- small
+
- medium
+
- large
+
numbers:
+
- 1
+
- 2
+
- 3
+
sequence_with_mappings:
+
- name: Alice
+
age: 25
+
role: developer
+
- name: Bob
+
age: 30
+
role: designer
+
- name: Charlie
+
age: 35
+
role: manager
+
deep_nesting:
+
level1:
+
level2:
+
level3:
+
level4:
+
- deeply
+
- nested
+
- values
+
another_key: value
+
items:
+
- item1
+
- item2
+
metadata:
+
created: 2024-01-01
+
modified: 2024-12-04
+
complex_structure:
+
database:
+
connections:
+
- host: db1.example.com
+
port: 5432
+
credentials:
+
username: admin
+
password: secret
+
- host: db2.example.com
+
port: 5432
+
credentials:
+
username: readonly
+
password: public
+
services:
+
- name: api
+
endpoints:
+
- /users
+
- /posts
+
- /comments
+
config:
+
timeout: 30
+
retries: 3
+
- name: worker
+
tasks:
+
- email
+
- reports
+
config:
+
concurrency: 10
+
empty_collections:
+
empty_sequence: []
+
empty_mapping: {}
+
sequence_with_empty:
+
- value1
+
- []
+
- value2
+
mapping_with_empty:
+
key1: value1
+
key2: {}
+
key3: value3
+
+
Test collections_block.yml with JSON output
+
+
$ yamlcat --json collections_block.yml
+
{"simple_sequence": ["apple", "banana", "cherry", "date"], "simple_mapping": {"name": "John Doe", "age": 30, "city": "New York", "country": "USA"}, "nested_sequences": [["alpha", "beta", "gamma"], ["one", "two", "three"], ["red", "green", "blue"]], "nested_mappings": {"person": {"name": "Alice", "contact": {"email": "alice@example.com", "phone": "555-1234"}, "address": {"street": "123 Main St", "city": "Boston"}}}, "mapping_with_sequences": {"colors": ["red", "green", "blue"], "sizes": ["small", "medium", "large"], "numbers": [1, 2, 3]}, "sequence_with_mappings": [{"name": "Alice", "age": 25, "role": "developer"}, {"name": "Bob", "age": 30, "role": "designer"}, {"name": "Charlie", "age": 35, "role": "manager"}], "deep_nesting": {"level1": {"level2": {"level3": {"level4": ["deeply", "nested", "values"], "another_key": "value"}, "items": ["item1", "item2"]}, "metadata": {"created": "2024-01-01", "modified": "2024-12-04"}}}, "complex_structure": {"database": {"connections": [{"host": "db1.example.com", "port": 5432, "credentials": {"username": "admin", "password": "secret"}}, {"host": "db2.example.com", "port": 5432, "credentials": {"username": "readonly", "password": "public"}}]}, "services": [{"name": "api", "endpoints": ["/users", "/posts", "/comments"], "config": {"timeout": 30, "retries": 3}}, {"name": "worker", "tasks": ["email", "reports"], "config": {"concurrency": 10}}]}, "empty_collections": {"empty_sequence": [], "empty_mapping": {}, "sequence_with_empty": ["value1", [], "value2"], "mapping_with_empty": {"key1": "value1", "key2": {}, "key3": "value3"}}}
+
+
Test collections_block.yml with flow output
+
+
$ yamlcat --flow collections_block.yml
+
{simple_sequence: [apple, banana, cherry, date]simple_mapping, {name: John Doe, age: 30, city: New York, country: USA}, nested_sequences: [[alpha, beta, gamma], [one, two, three], [red, green, blue]]nested_mappings, {person: {name: Alice, contact: {email: alice@example.com, phone: 555-1234}address, {street: 123 Main St, city: Boston}}}, mapping_with_sequences: {colors: [red, green, blue]sizes, [small, medium, large], numbers: [1, 2, 3]}sequence_with_mappings, [{name: Alice, age: 25, role: developer}, {name: Bob, age: 30, role: designer}, {name: Charlie, age: 35, role: manager}], deep_nesting: {level1: {level2: {level3: {level4: [deeply, nested, values]another_key, value: }items, [item1, item2]}metadata, {created: 2024-01-01, modified: 2024-12-04}}}complex_structure, {database: {connections: [{host: db1.example.com, port: 5432, credentials: {username: admin, password: secret}}, {host: db2.example.com, port: 5432, credentials: {username: readonly, password: public}}]}services, [{name: api, endpoints: [/users, /posts, /comments]config, {timeout: 30, retries: 3}}, {name: worker, tasks: [email, reports]config, {concurrency: 10}}]}, empty_collections: {empty_sequence: []empty_mapping, {}, sequence_with_empty: [value1, [], value2]mapping_with_empty, {key1: value1, key2: {}key3, value3: }}}
+
+
Test collections_compact.yml - Compact notation
+
+
$ yamlcat collections_compact.yml
+
compact_sequence:
+
- name: Alice
+
age: 25
+
city: Boston
+
- name: Bob
+
age: 30
+
city: Seattle
+
- name: Charlie
+
age: 35
+
city: Portland
+
compact_nested:
+
- id: 1
+
details:
+
type: admin
+
permissions:
+
- read
+
- write
+
- delete
+
- id: 2
+
details:
+
type: user
+
permissions:
+
- read
+
compact_complex:
+
- key1: value1
+
key2: value2
+
nested:
+
sub1: val1
+
sub2: val2
+
- key1: value3
+
key2: value4
+
nested:
+
sub1: val3
+
sub2: val4
+
users:
+
- username: alice
+
email: alice@example.com
+
active: true
+
- username: bob
+
email: bob@example.com
+
active: false
+
compact_with_flow:
+
- name: service1
+
ports:
+
- 8080
+
- 8443
+
env:
+
DEBUG: true
+
MODE: production
+
- name: service2
+
ports:
+
- 3000
+
env:
+
DEBUG: false
+
MODE: development
+
deep_compact:
+
- category: electronics
+
items:
+
- name: laptop
+
specs:
+
cpu: Intel i7
+
ram: 16GB
+
storage: 512GB SSD
+
- name: phone
+
specs:
+
os: Android
+
ram: 8GB
+
storage: 256GB
+
- category: furniture
+
items:
+
- name: desk
+
dimensions:
+
width: 150cm
+
depth: 75cm
+
height: 75cm
+
- name: chair
+
dimensions:
+
width: 60cm
+
depth: 60cm
+
height: 120cm
+
mixed_compact:
+
databases:
+
- type: postgresql
+
connection:
+
host: localhost
+
port: 5432
+
credentials:
+
user: admin
+
password: secret
+
- type: mongodb
+
connection:
+
host: localhost
+
port: 27017
+
credentials:
+
user: root
+
password: root
+
single_line_compact:
+
- name: Alice
+
age: 25
+
role: developer
+
- name: Bob
+
age: 30
+
role: designer
+
- name: Charlie
+
age: 35
+
role: manager
+
sequences_in_compact:
+
- title: Project A
+
members:
+
- Alice
+
- Bob
+
- Charlie
+
tags:
+
- urgent
+
- backend
+
- title: Project B
+
members:
+
- David
+
- Eve
+
tags:
+
- frontend
+
- design
+
compact_with_empty:
+
- id: 1
+
data: []
+
meta: {}
+
- id: 2
+
data:
+
- item1
+
meta:
+
key: value
+
compact_complex_nesting:
+
- level: 1
+
children:
+
- level: 2a
+
children:
+
- level: 3a
+
value: leaf1
+
- level: 3b
+
value: leaf2
+
- level: 2b
+
children:
+
- level: 3c
+
value: leaf3
+
api_endpoints:
+
- path: /users
+
method: GET
+
auth: required
+
params:
+
- name: page
+
type: integer
+
default: 1
+
- name: limit
+
type: integer
+
default: 10
+
- path: '/users/:id'
+
method: GET
+
auth: required
+
params: []
+
- path: /users
+
method: POST
+
auth: required
+
body:
+
username: string
+
email: string
+
password: string
+
compact_types:
+
- string_val: hello
+
number_val: 42
+
float_val: 3.14
+
bool_val: true
+
null_val: null
+
- string_val: world
+
number_val: 100
+
float_val: 2.71
+
bool_val: false
+
null_val: null
+
minimal:
+
- a: 1
+
- b: 2
+
- c: 3
+
+
Test collections_compact.yml with JSON output
+
+
$ yamlcat --json collections_compact.yml
+
{"compact_sequence": [{"name": "Alice", "age": 25, "city": "Boston"}, {"name": "Bob", "age": 30, "city": "Seattle"}, {"name": "Charlie", "age": 35, "city": "Portland"}], "compact_nested": [{"id": 1, "details": {"type": "admin", "permissions": ["read", "write", "delete"]}}, {"id": 2, "details": {"type": "user", "permissions": ["read"]}}], "compact_complex": [{"key1": "value1", "key2": "value2", "nested": {"sub1": "val1", "sub2": "val2"}}, {"key1": "value3", "key2": "value4", "nested": {"sub1": "val3", "sub2": "val4"}}], "users": [{"username": "alice", "email": "alice@example.com", "active": true}, {"username": "bob", "email": "bob@example.com", "active": false}], "compact_with_flow": [{"name": "service1", "ports": [8080, 8443], "env": {"DEBUG": true, "MODE": "production"}}, {"name": "service2", "ports": [3000], "env": {"DEBUG": false, "MODE": "development"}}], "deep_compact": [{"category": "electronics", "items": [{"name": "laptop", "specs": {"cpu": "Intel i7", "ram": "16GB", "storage": "512GB SSD"}}, {"name": "phone", "specs": {"os": "Android", "ram": "8GB", "storage": "256GB"}}]}, {"category": "furniture", "items": [{"name": "desk", "dimensions": {"width": "150cm", "depth": "75cm", "height": "75cm"}}, {"name": "chair", "dimensions": {"width": "60cm", "depth": "60cm", "height": "120cm"}}]}], "mixed_compact": {"databases": [{"type": "postgresql", "connection": {"host": "localhost", "port": 5432}, "credentials": {"user": "admin", "password": "secret"}}, {"type": "mongodb", "connection": {"host": "localhost", "port": 27017}, "credentials": {"user": "root", "password": "root"}}]}, "single_line_compact": [{"name": "Alice", "age": 25, "role": "developer"}, {"name": "Bob", "age": 30, "role": "designer"}, {"name": "Charlie", "age": 35, "role": "manager"}], "sequences_in_compact": [{"title": "Project A", "members": ["Alice", "Bob", "Charlie"], "tags": ["urgent", "backend"]}, {"title": "Project B", "members": ["David", "Eve"], "tags": ["frontend", "design"]}], "compact_with_empty": [{"id": 1, "data": [], "meta": {}}, {"id": 2, "data": ["item1"], "meta": {"key": "value"}}], "compact_complex_nesting": [{"level": 1, "children": [{"level": "2a", "children": [{"level": "3a", "value": "leaf1"}, {"level": "3b", "value": "leaf2"}]}, {"level": "2b", "children": [{"level": "3c", "value": "leaf3"}]}]}], "api_endpoints": [{"path": "/users", "method": "GET", "auth": "required", "params": [{"name": "page", "type": "integer", "default": 1}, {"name": "limit", "type": "integer", "default": 10}]}, {"path": "/users/:id", "method": "GET", "auth": "required", "params": []}, {"path": "/users", "method": "POST", "auth": "required", "body": {"username": "string", "email": "string", "password": "string"}}], "compact_types": [{"string_val": "hello", "number_val": 42, "float_val": 3.14, "bool_val": true, "null_val": null}, {"string_val": "world", "number_val": 100, "float_val": 2.71, "bool_val": false, "null_val": null}], "minimal": [{"a": 1}, {"b": 2}, {"c": 3}]}
+
+
Test collections_compact.yml with flow output
+
+
$ yamlcat --flow collections_compact.yml
+
{compact_sequence: [{name: Alice, age: 25, city: Boston}, {name: Bob, age: 30, city: Seattle}, {name: Charlie, age: 35, city: Portland}]compact_nested, [{id: 1, details: {type: admin, permissions: [read, write, delete]}}, {id: 2, details: {type: user, permissions: [read]}}], compact_complex: [{key1: value1, key2: value2, nested: {sub1: val1, sub2: val2}}, {key1: value3, key2: value4, nested: {sub1: val3, sub2: val4}}]users, [{username: alice, email: alice@example.com, active: true}, {username: bob, email: bob@example.com, active: false}], compact_with_flow: [{name: service1, ports: [8080, 8443]env, {DEBUG: true, MODE: production}}, {name: service2, ports: [3000]env, {DEBUG: false, MODE: development}}]deep_compact, [{category: electronics, items: [{name: laptop, specs: {cpu: Intel i7, ram: 16GB, storage: 512GB SSD}}, {name: phone, specs: {os: Android, ram: 8GB, storage: 256GB}}]}, {category: furniture, items: [{name: desk, dimensions: {width: 150cm, depth: 75cm, height: 75cm}}, {name: chair, dimensions: {width: 60cm, depth: 60cm, height: 120cm}}]}], mixed_compact: {databases: [{type: postgresql, connection: {host: localhost, port: 5432}credentials, {user: admin, password: secret}}, {type: mongodb, connection: {host: localhost, port: 27017}credentials, {user: root, password: root}}]}single_line_compact, [{name: Alice, age: 25, role: developer}, {name: Bob, age: 30, role: designer}, {name: Charlie, age: 35, role: manager}], sequences_in_compact: [{title: Project A, members: [Alice, Bob, Charlie]tags, [urgent, backend]}, {title: Project B, members: [David, Eve]tags, [frontend, design]}]compact_with_empty, [{id: 1, data: []meta, {}}, {id: 2, data: [item1]meta, {key: value}}], compact_complex_nesting: [{level: 1, children: [{level: 2a, children: [{level: 3a, value: leaf1}, {level: 3b, value: leaf2}]}, {level: 2b, children: [{level: 3c, value: leaf3}]}]}]api_endpoints, [{path: /users, method: GET, auth: required, params: [{name: page, type: integer, default: 1}, {name: limit, type: integer, default: 10}]}, {path: '/users/:id', method: GET, auth: required, params: []}, {path: /users, method: POST, auth: required, body: {username: string, email: string, password: string}}], compact_types: [{string_val: hello, number_val: 42, float_val: 3.14, bool_val: true, null_val: null}, {string_val: world, number_val: 100, float_val: 2.71, bool_val: false, null_val: null}]minimal, [{a: 1}, {b: 2}, {c: 3}]}
+
+
Test collections_flow.yml - Flow style collections
+
+
$ yamlcat collections_flow.yml
+
simple_flow_sequence:
+
- apple
+
- banana
+
- cherry
+
- date
+
simple_flow_mapping:
+
name: John
+
age: 30
+
city: New York
+
nested_flow_sequences:
+
-
+
- a
+
- b
+
- c
+
-
+
- 1
+
- 2
+
- 3
+
-
+
- red
+
- green
+
- blue
+
nested_flow_mappings:
+
person:
+
name: Alice
+
age: 25
+
contact:
+
email: alice@example.com
+
phone: 555-1234
+
flow_seq_with_maps:
+
- name: Alice
+
role: dev
+
- name: Bob
+
role: ops
+
- name: Charlie
+
role: qa
+
flow_map_with_seqs:
+
colors:
+
- red
+
- green
+
- blue
+
sizes:
+
- S
+
- M
+
- L
+
numbers:
+
- 1
+
- 2
+
- 3
+
deep_flow_nesting:
+
level1:
+
level2:
+
level3:
+
level4:
+
- a
+
- b
+
- c
+
empty_flow:
+
empty_seq: []
+
empty_map: {}
+
both:
+
- []
+
- {}
+
flow_in_block:
+
sequence:
+
- 1
+
- 2
+
- 3
+
- 4
+
- 5
+
mapping:
+
a: 1
+
b: 2
+
c: 3
+
nested:
+
items:
+
- x
+
- y
+
- z
+
config:
+
timeout: 30
+
retries: 3
+
block_in_flow:
+
users:
+
- name: Alice
+
tags:
+
- dev
+
- senior
+
- name: Bob
+
tags:
+
- ops
+
- junior
+
mixed_structure:
+
services:
+
- name: api
+
ports:
+
- 8080
+
- 8443
+
env:
+
DEBUG: true
+
LOG_LEVEL: info
+
- name: db
+
ports:
+
- 5432
+
env:
+
POSTGRES_DB: mydb
+
POSTGRES_USER: admin
+
config:
+
version: 1
+
enabled: true
+
flow_types:
+
strings:
+
- hello
+
- world
+
- foo
+
- bar
+
numbers:
+
- 1
+
- 2
+
- 3
+
- 42
+
- 100
+
mixed:
+
- string
+
- 123
+
- true
+
- false
+
- null
+
quoted:
+
- with spaces
+
- 'special:chars'
+
- commas, here
+
flow_map_types:
+
string: value
+
number: 42
+
boolean: true
+
null_value: null
+
float: 3.14
+
nested_mixed:
+
- id: 1
+
data:
+
- a
+
- b
+
- c
+
meta:
+
type: first
+
- id: 2
+
data:
+
- d
+
- e
+
- f
+
meta:
+
type: second
+
- id: 3
+
data:
+
- g
+
- h
+
- i
+
meta:
+
type: third
+
multiline_flow:
+
long_sequence:
+
- item1
+
- item2
+
- item3
+
- item4
+
long_mapping:
+
key1: value1
+
key2: value2
+
key3: value3
+
edge_cases:
+
single_item_seq:
+
- alone
+
single_item_map:
+
only: one
+
nested_empty:
+
- []
+
-
+
- {}
+
-
+
- {}
+
- []
+
all_empty:
+
- {}
+
- []
+
- a: []
+
- b: {}
+
+
Test collections_flow.yml with JSON output
+
+
$ yamlcat --json collections_flow.yml
+
{"simple_flow_sequence": ["apple", "banana", "cherry", "date"], "simple_flow_mapping": {"name": "John", "age": 30, "city": "New York"}, "nested_flow_sequences": [["a", "b", "c"], [1, 2, 3], ["red", "green", "blue"]], "nested_flow_mappings": {"person": {"name": "Alice", "age": 25}, "contact": {"email": "alice@example.com", "phone": "555-1234"}}, "flow_seq_with_maps": [{"name": "Alice", "role": "dev"}, {"name": "Bob", "role": "ops"}, {"name": "Charlie", "role": "qa"}], "flow_map_with_seqs": {"colors": ["red", "green", "blue"], "sizes": ["S", "M", "L"], "numbers": [1, 2, 3]}, "deep_flow_nesting": {"level1": {"level2": {"level3": {"level4": ["a", "b", "c"]}}}}, "empty_flow": {"empty_seq": [], "empty_map": {}, "both": [[], {}]}, "flow_in_block": {"sequence": [1, 2, 3, 4, 5], "mapping": {"a": 1, "b": 2, "c": 3}, "nested": {"items": ["x", "y", "z"], "config": {"timeout": 30, "retries": 3}}}, "block_in_flow": {"users": [{"name": "Alice", "tags": ["dev", "senior"]}, {"name": "Bob", "tags": ["ops", "junior"]}]}, "mixed_structure": {"services": [{"name": "api", "ports": [8080, 8443], "env": {"DEBUG": true, "LOG_LEVEL": "info"}}, {"name": "db", "ports": [5432], "env": {"POSTGRES_DB": "mydb", "POSTGRES_USER": "admin"}}], "config": {"version": 1, "enabled": true}}, "flow_types": {"strings": ["hello", "world", "foo", "bar"], "numbers": [1, 2, 3, 42, 100], "mixed": ["string", 123, true, false, null], "quoted": ["with spaces", "special:chars", "commas, here"]}, "flow_map_types": {"string": "value", "number": 42, "boolean": true, "null_value": null, "float": 3.14}, "nested_mixed": [{"id": 1, "data": ["a", "b", "c"], "meta": {"type": "first"}}, {"id": 2, "data": ["d", "e", "f"], "meta": {"type": "second"}}, {"id": 3, "data": ["g", "h", "i"], "meta": {"type": "third"}}], "multiline_flow": {"long_sequence": ["item1", "item2", "item3", "item4"], "long_mapping": {"key1": "value1", "key2": "value2", "key3": "value3"}}, "edge_cases": {"single_item_seq": ["alone"], "single_item_map": {"only": "one"}, "nested_empty": [[], [{}], [{}, []]], "all_empty": [{}, [], {"a": []}, {"b": {}}]}}
+
+
Test collections_flow.yml with flow output
+
+
$ yamlcat --flow collections_flow.yml
+
{simple_flow_sequence: [apple, banana, cherry, date]simple_flow_mapping, {name: John, age: 30, city: New York}, nested_flow_sequences: [[a, b, c], [1, 2, 3], [red, green, blue]]nested_flow_mappings, {person: {name: Alice, age: 25}contact, {email: alice@example.com, phone: 555-1234}}, flow_seq_with_maps: [{name: Alice, role: dev}, {name: Bob, role: ops}, {name: Charlie, role: qa}]flow_map_with_seqs, {colors: [red, green, blue]sizes, [S, M, L], numbers: [1, 2, 3]}, deep_flow_nesting: {level1: {level2: {level3: {level4: [a, b, c]}}}}empty_flow, {empty_seq: []empty_map, {}, both: [[], {}]}, flow_in_block: {sequence: [1, 2, 3, 4, 5]mapping, {a: 1, b: 2, c: 3}, nested: {items: [x, y, z]config, {timeout: 30, retries: 3}}}block_in_flow, {users: [{name: Alice, tags: [dev, senior]}, {name: Bob, tags: [ops, junior]}]}, mixed_structure: {services: [{name: api, ports: [8080, 8443]env, {DEBUG: true, LOG_LEVEL: info}}, {name: db, ports: [5432]env, {POSTGRES_DB: mydb, POSTGRES_USER: admin}}]config, {version: 1, enabled: true}}flow_types, {strings: [hello, world, foo, bar]numbers, [1, 2, 3, 42, 100], mixed: [string, 123, true, false, null]quoted, [with spaces, 'special:chars', commas, here]}, flow_map_types: {string: value, number: 42, boolean: true, null_value: null, float: 3.14}nested_mixed, [{id: 1, data: [a, b, c]meta, {type: first}}, {id: 2, data: [d, e, f]meta, {type: second}}, {id: 3, data: [g, h, i]meta, {type: third}}], multiline_flow: {long_sequence: [item1, item2, item3, item4]long_mapping, {key1: value1, key2: value2, key3: value3}}edge_cases, {single_item_seq: [alone]single_item_map, {only: one}, nested_empty: [[], [{}], [{}, []]]all_empty, [{}, [], {a: []}, {b: {}}]}}
+
+
Inline test: Simple sequence
+
+
$ echo '- a
+
> - b
+
> - c' | yamlcat
+
- a
+
- b
+
- c
+
+
$ echo '- a
+
> - b
+
> - c' | yamlcat --json
+
["a", "b", "c"]
+
+
$ echo '- a
+
> - b
+
> - c' | yamlcat --flow
+
[a, b, c]
+
+
Inline test: Simple mapping
+
+
$ echo 'key1: value1
+
> key2: value2
+
> key3: value3' | yamlcat
+
key1: value1
+
key2: value2
+
key3: value3
+
+
$ echo 'key1: value1
+
> key2: value2
+
> key3: value3' | yamlcat --json
+
{"key1": "value1", "key2": "value2", "key3": "value3"}
+
+
$ echo 'key1: value1
+
> key2: value2
+
> key3: value3' | yamlcat --flow
+
{key1: value1, key2: value2, key3: value3}
+
+
Inline test: Nested sequences
+
+
$ echo 'outer:
+
> - - inner1
+
> - inner2
+
> - - inner3
+
> - inner4' | yamlcat
+
outer:
+
-
+
- inner1
+
- inner2
+
-
+
- inner3
+
- inner4
+
+
$ echo 'outer:
+
> - - inner1
+
> - inner2
+
> - - inner3
+
> - inner4' | yamlcat --json
+
{"outer": [["inner1", "inner2"], ["inner3", "inner4"]]}
+
+
$ echo 'outer:
+
> - - inner1
+
> - inner2
+
> - - inner3
+
> - inner4' | yamlcat --flow
+
{outer: [[inner1, inner2], [inner3, inner4]]}
+
+
Inline test: Nested mappings
+
+
$ echo 'level1:
+
> level2:
+
> level3:
+
> key: value' | yamlcat
+
level1:
+
level2:
+
level3:
+
key: value
+
+
$ echo 'level1:
+
> level2:
+
> level3:
+
> key: value' | yamlcat --json
+
{"level1": {"level2": {"level3": {"key": "value"}}}}
+
+
$ echo 'level1:
+
> level2:
+
> level3:
+
> key: value' | yamlcat --flow
+
{level1: {level2: {level3: {key: value}}}}
+
+
Inline test: Flow sequence
+
+
$ echo '[a, b, c]' | yamlcat
+
- a
+
- b
+
- c
+
+
$ echo '[a, b, c]' | yamlcat --json
+
["a", "b", "c"]
+
+
$ echo '[a, b, c]' | yamlcat --flow
+
[a, b, c]
+
+
Inline test: Flow mapping
+
+
$ echo '{a: 1, b: 2, c: 3}' | yamlcat
+
a: 1
+
b: 2
+
c: 3
+
+
$ echo '{a: 1, b: 2, c: 3}' | yamlcat --json
+
{"a": 1, "b": 2, "c": 3}
+
+
$ echo '{a: 1, b: 2, c: 3}' | yamlcat --flow
+
{a: 1, b: 2, c: 3}
+
+
Inline test: Nested flow collections
+
+
$ echo '[[1, 2], [3, 4], [5, 6]]' | yamlcat
+
-
+
- 1
+
- 2
+
-
+
- 3
+
- 4
+
-
+
- 5
+
- 6
+
+
$ echo '[[1, 2], [3, 4], [5, 6]]' | yamlcat --json
+
[[1, 2], [3, 4], [5, 6]]
+
+
$ echo '[[1, 2], [3, 4], [5, 6]]' | yamlcat --flow
+
[[1, 2], [3, 4], [5, 6]]
+
+
Inline test: Flow mapping with nested mapping
+
+
$ echo '{outer: {inner: value}}' | yamlcat
+
outer:
+
inner: value
+
+
$ echo '{outer: {inner: value}}' | yamlcat --json
+
{"outer": {"inner": "value"}}
+
+
$ echo '{outer: {inner: value}}' | yamlcat --flow
+
{outer: {inner: value}}
+
+
Inline test: Mixed block and flow (flow in block)
+
+
$ echo 'block_key:
+
> flow_seq: [1, 2, 3]
+
> flow_map: {a: 1, b: 2}' | yamlcat
+
block_key:
+
flow_seq:
+
- 1
+
- 2
+
- 3
+
flow_map:
+
a: 1
+
b: 2
+
+
$ echo 'block_key:
+
> flow_seq: [1, 2, 3]
+
> flow_map: {a: 1, b: 2}' | yamlcat --json
+
{"block_key": {"flow_seq": [1, 2, 3], "flow_map": {"a": 1, "b": 2}}}
+
+
$ echo 'block_key:
+
> flow_seq: [1, 2, 3]
+
> flow_map: {a: 1, b: 2}' | yamlcat --flow
+
{block_key: {flow_seq: [1, 2, 3]flow_map, {a: 1, b: 2}}}
+
+
Inline test: Mixed block and flow (block in flow)
+
+
$ echo '{users: [{name: Alice, age: 30}, {name: Bob, age: 25}]}' | yamlcat
+
users:
+
- name: Alice
+
age: 30
+
- name: Bob
+
age: 25
+
+
$ echo '{users: [{name: Alice, age: 30}, {name: Bob, age: 25}]}' | yamlcat --json
+
{"users": [{"name": "Alice", "age": 30}, {"name": "Bob", "age": 25}]}
+
+
$ echo '{users: [{name: Alice, age: 30}, {name: Bob, age: 25}]}' | yamlcat --flow
+
{users: [{name: Alice, age: 30}, {name: Bob, age: 25}]}
+
+
Inline test: Compact notation - sequence of mappings
+
+
$ echo '- name: Alice
+
> role: dev
+
> - name: Bob
+
> role: ops' | yamlcat
+
- name: Alice
+
role: dev
+
- name: Bob
+
role: ops
+
+
$ echo '- name: Alice
+
> role: dev
+
> - name: Bob
+
> role: ops' | yamlcat --json
+
[{"name": "Alice", "role": "dev"}, {"name": "Bob", "role": "ops"}]
+
+
$ echo '- name: Alice
+
> role: dev
+
> - name: Bob
+
> role: ops' | yamlcat --flow
+
[{name: Alice, role: dev}, {name: Bob, role: ops}]
+
+
Inline test: Compact with nested collections
+
+
$ echo '- id: 1
+
> tags: [a, b, c]
+
> config:
+
> enabled: true
+
> - id: 2
+
> tags: [x, y, z]
+
> config:
+
> enabled: false' | yamlcat
+
- id: 1
+
tags:
+
- a
+
- b
+
- c
+
config:
+
enabled: true
+
- id: 2
+
tags:
+
- x
+
- y
+
- z
+
config:
+
enabled: false
+
+
$ echo '- id: 1
+
> tags: [a, b, c]
+
> config:
+
> enabled: true
+
> - id: 2
+
> tags: [x, y, z]
+
> config:
+
> enabled: false' | yamlcat --json
+
[{"id": 1, "tags": ["a", "b", "c"], "config": {"enabled": true}}, {"id": 2, "tags": ["x", "y", "z"], "config": {"enabled": false}}]
+
+
$ echo '- id: 1
+
> tags: [a, b, c]
+
> config:
+
> enabled: true
+
> - id: 2
+
> tags: [x, y, z]
+
> config:
+
> enabled: false' | yamlcat --flow
+
[{id: 1, tags: [a, b, c]config, {enabled: true}}, {id: 2, tags: [x, y, z]config, {enabled: false}}]
+
+
Inline test: Empty collections
+
+
$ echo 'empty_seq: []
+
> empty_map: {}' | yamlcat
+
empty_seq: []
+
empty_map: {}
+
+
$ echo 'empty_seq: []
+
> empty_map: {}' | yamlcat --json
+
{"empty_seq": [], "empty_map": {}}
+
+
$ echo 'empty_seq: []
+
> empty_map: {}' | yamlcat --flow
+
{empty_seq: []empty_map, {}}
+
+
Inline test: Sequence with mapping values
+
+
$ echo 'items:
+
> - apple
+
> - banana
+
> config:
+
> mode: fast' | yamlcat
+
items:
+
- apple
+
- banana
+
config:
+
mode: fast
+
+
$ echo 'items:
+
> - apple
+
> - banana
+
> config:
+
> mode: fast' | yamlcat --json
+
{"items": ["apple", "banana"], "config": {"mode": "fast"}}
+
+
$ echo 'items:
+
> - apple
+
> - banana
+
> config:
+
> mode: fast' | yamlcat --flow
+
{items: [apple, banana]config, {mode: fast}}
+
+
Inline test: Complex nested structure
+
+
$ echo 'services:
+
> - name: web
+
> ports:
+
> - 80
+
> - 443
+
> env:
+
> DEBUG: false
+
> MODE: prod' | yamlcat
+
services:
+
- name: web
+
ports:
+
- 80
+
- 443
+
env:
+
DEBUG: false
+
MODE: prod
+
+
$ echo 'services:
+
> - name: web
+
> ports:
+
> - 80
+
> - 443
+
> env:
+
> DEBUG: false
+
> MODE: prod' | yamlcat --json
+
{"services": [{"name": "web", "ports": [80, 443], "env": {"DEBUG": false, "MODE": "prod"}}]}
+
+
$ echo 'services:
+
> - name: web
+
> ports:
+
> - 80
+
> - 443
+
> env:
+
> DEBUG: false
+
> MODE: prod' | yamlcat --flow
+
{services: [{name: web, ports: [80, 443]env, {DEBUG: false, MODE: prod}}]}
+
+
Inline test: Flow sequence with various types
+
+
$ echo '[string, 42, true, false, null, 3.14]' | yamlcat --json
+
["string", 42, true, false, null, 3.14]
+
+
Inline test: Flow mapping with various types
+
+
$ echo '{str: hello, num: 42, bool: true, nil: null, float: 3.14}' | yamlcat --json
+
{"str": "hello", "num": 42, "bool": true, "nil": null, "float": 3.14}
+126
tests/cram/collections_block.yml
···
+
# Block Style Collections Test File
+
# Testing various block-style collection structures
+
+
# Simple sequence
+
simple_sequence:
+
- apple
+
- banana
+
- cherry
+
- date
+
+
# Simple mapping
+
simple_mapping:
+
name: John Doe
+
age: 30
+
city: New York
+
country: USA
+
+
# Nested sequences
+
nested_sequences:
+
- - alpha
+
- beta
+
- gamma
+
- - one
+
- two
+
- three
+
- - red
+
- green
+
- blue
+
+
# Nested mappings
+
nested_mappings:
+
person:
+
name: Alice
+
contact:
+
email: alice@example.com
+
phone: 555-1234
+
address:
+
street: 123 Main St
+
city: Boston
+
+
# Mapping containing sequences
+
mapping_with_sequences:
+
colors:
+
- red
+
- green
+
- blue
+
sizes:
+
- small
+
- medium
+
- large
+
numbers:
+
- 1
+
- 2
+
- 3
+
+
# Sequence containing mappings
+
sequence_with_mappings:
+
- name: Alice
+
age: 25
+
role: developer
+
- name: Bob
+
age: 30
+
role: designer
+
- name: Charlie
+
age: 35
+
role: manager
+
+
# Deep nesting (4 levels)
+
deep_nesting:
+
level1:
+
level2:
+
level3:
+
level4:
+
- deeply
+
- nested
+
- values
+
another_key: value
+
items:
+
- item1
+
- item2
+
metadata:
+
created: 2024-01-01
+
modified: 2024-12-04
+
+
# Mixed complex structure
+
complex_structure:
+
database:
+
connections:
+
- host: db1.example.com
+
port: 5432
+
credentials:
+
username: admin
+
password: secret
+
- host: db2.example.com
+
port: 5432
+
credentials:
+
username: readonly
+
password: public
+
services:
+
- name: api
+
endpoints:
+
- /users
+
- /posts
+
- /comments
+
config:
+
timeout: 30
+
retries: 3
+
- name: worker
+
tasks:
+
- email
+
- reports
+
config:
+
concurrency: 10
+
+
# Empty sequences and mappings in block style
+
empty_collections:
+
empty_sequence: []
+
empty_mapping: {}
+
sequence_with_empty:
+
- value1
+
- []
+
- value2
+
mapping_with_empty:
+
key1: value1
+
key2: {}
+
key3: value3
+198
tests/cram/collections_compact.yml
···
+
# Compact Notation Collections Test File
+
# Testing compact block notation and mixed styles
+
+
# Compact nested mapping in sequence (most common form)
+
compact_sequence:
+
- name: Alice
+
age: 25
+
city: Boston
+
- name: Bob
+
age: 30
+
city: Seattle
+
- name: Charlie
+
age: 35
+
city: Portland
+
+
# Compact with nested structures
+
compact_nested:
+
- id: 1
+
details:
+
type: admin
+
permissions:
+
- read
+
- write
+
- delete
+
- id: 2
+
details:
+
type: user
+
permissions:
+
- read
+
+
# Multiple keys in same sequence entry with sub-structures
+
compact_complex:
+
- key1: value1
+
key2: value2
+
nested:
+
sub1: val1
+
sub2: val2
+
- key1: value3
+
key2: value4
+
nested:
+
sub1: val3
+
sub2: val4
+
+
# Compact block mappings with inline values
+
users:
+
- username: alice
+
email: alice@example.com
+
active: true
+
- username: bob
+
email: bob@example.com
+
active: false
+
+
# Compact with flow collections
+
compact_with_flow:
+
- name: service1
+
ports: [8080, 8443]
+
env: {DEBUG: true, MODE: production}
+
- name: service2
+
ports: [3000]
+
env: {DEBUG: false, MODE: development}
+
+
# Deeply nested compact notation
+
deep_compact:
+
- category: electronics
+
items:
+
- name: laptop
+
specs:
+
cpu: Intel i7
+
ram: 16GB
+
storage: 512GB SSD
+
- name: phone
+
specs:
+
os: Android
+
ram: 8GB
+
storage: 256GB
+
- category: furniture
+
items:
+
- name: desk
+
dimensions:
+
width: 150cm
+
depth: 75cm
+
height: 75cm
+
- name: chair
+
dimensions:
+
width: 60cm
+
depth: 60cm
+
height: 120cm
+
+
# Compact with mixed indentation styles
+
mixed_compact:
+
databases:
+
- type: postgresql
+
connection:
+
host: localhost
+
port: 5432
+
credentials:
+
user: admin
+
password: secret
+
- type: mongodb
+
connection:
+
host: localhost
+
port: 27017
+
credentials:
+
user: root
+
password: root
+
+
# Single-line compact entries
+
single_line_compact:
+
- {name: Alice, age: 25, role: developer}
+
- {name: Bob, age: 30, role: designer}
+
- {name: Charlie, age: 35, role: manager}
+
+
# Compact notation with sequences as values
+
sequences_in_compact:
+
- title: Project A
+
members:
+
- Alice
+
- Bob
+
- Charlie
+
tags:
+
- urgent
+
- backend
+
- title: Project B
+
members:
+
- David
+
- Eve
+
tags:
+
- frontend
+
- design
+
+
# Compact with empty values
+
compact_with_empty:
+
- id: 1
+
data: []
+
meta: {}
+
- id: 2
+
data:
+
- item1
+
meta:
+
key: value
+
+
# Compact notation with complex nesting
+
compact_complex_nesting:
+
- level: 1
+
children:
+
- level: 2a
+
children:
+
- level: 3a
+
value: leaf1
+
- level: 3b
+
value: leaf2
+
- level: 2b
+
children:
+
- level: 3c
+
value: leaf3
+
+
# Real-world example: API endpoints
+
api_endpoints:
+
- path: /users
+
method: GET
+
auth: required
+
params:
+
- name: page
+
type: integer
+
default: 1
+
- name: limit
+
type: integer
+
default: 10
+
- path: /users/:id
+
method: GET
+
auth: required
+
params: []
+
- path: /users
+
method: POST
+
auth: required
+
body:
+
username: string
+
email: string
+
password: string
+
+
# Compact with various data types
+
compact_types:
+
- string_val: hello
+
number_val: 42
+
float_val: 3.14
+
bool_val: true
+
null_val: null
+
- string_val: world
+
number_val: 100
+
float_val: 2.71
+
bool_val: false
+
null_val: ~
+
+
# Edge case: minimal compact notation
+
minimal:
+
- a: 1
+
- b: 2
+
- c: 3
+96
tests/cram/collections_flow.yml
···
+
# Flow Style Collections Test File
+
# Testing various flow-style collection structures
+
+
# Simple flow sequence
+
simple_flow_sequence: [apple, banana, cherry, date]
+
+
# Simple flow mapping
+
simple_flow_mapping: {name: John, age: 30, city: New York}
+
+
# Nested flow sequences
+
nested_flow_sequences: [[a, b, c], [1, 2, 3], [red, green, blue]]
+
+
# Nested flow mappings
+
nested_flow_mappings: {person: {name: Alice, age: 25}, contact: {email: alice@example.com, phone: 555-1234}}
+
+
# Flow sequence with mappings
+
flow_seq_with_maps: [{name: Alice, role: dev}, {name: Bob, role: ops}, {name: Charlie, role: qa}]
+
+
# Flow mapping with sequences
+
flow_map_with_seqs: {colors: [red, green, blue], sizes: [S, M, L], numbers: [1, 2, 3]}
+
+
# Deeply nested flow collections
+
deep_flow_nesting: {level1: {level2: {level3: {level4: [a, b, c]}}}}
+
+
# Empty flow collections
+
empty_flow: {empty_seq: [], empty_map: {}, both: [[], {}]}
+
+
# Mixed flow and block - flow in block
+
flow_in_block:
+
sequence: [1, 2, 3, 4, 5]
+
mapping: {a: 1, b: 2, c: 3}
+
nested:
+
items: [x, y, z]
+
config: {timeout: 30, retries: 3}
+
+
# Mixed flow and block - block in flow
+
block_in_flow: {
+
users: [
+
{name: Alice, tags: [dev, senior]},
+
{name: Bob, tags: [ops, junior]}
+
]
+
}
+
+
# Complex mixed structure
+
mixed_structure:
+
services:
+
- name: api
+
ports: [8080, 8443]
+
env: {DEBUG: true, LOG_LEVEL: info}
+
- name: db
+
ports: [5432]
+
env: {POSTGRES_DB: mydb, POSTGRES_USER: admin}
+
config: {version: 1.0, enabled: true}
+
+
# Flow sequences with various types
+
flow_types:
+
strings: [hello, world, foo, bar]
+
numbers: [1, 2, 3, 42, 100]
+
mixed: [string, 123, true, false, null]
+
quoted: ["with spaces", "special:chars", "commas, here"]
+
+
# Flow mappings with various types
+
flow_map_types: {
+
string: value,
+
number: 42,
+
boolean: true,
+
null_value: null,
+
float: 3.14
+
}
+
+
# Nested mixed collections
+
nested_mixed:
+
- {id: 1, data: [a, b, c], meta: {type: first}}
+
- {id: 2, data: [d, e, f], meta: {type: second}}
+
- {id: 3, data: [g, h, i], meta: {type: third}}
+
+
# Flow with multiline (should still be valid)
+
multiline_flow:
+
long_sequence: [
+
item1,
+
item2,
+
item3,
+
item4
+
]
+
long_mapping: {
+
key1: value1,
+
key2: value2,
+
key3: value3
+
}
+
+
# Edge cases
+
edge_cases:
+
single_item_seq: [alone]
+
single_item_map: {only: one}
+
nested_empty: [[], [{}], [{}, []]]
+
all_empty: [{}, [], {a: []}, {b: {}}]
+197
tests/cram/comments.t
···
+
Test comments.yml file with various comment styles
+
+
$ yamlcat comments.yml
+
name: John Doe
+
age: 30
+
address:
+
street: 123 Main St
+
city: Springfield
+
zip: 12345
+
items:
+
- apple
+
- banana
+
- cherry
+
- date
+
flow_seq:
+
- 1
+
- 2
+
- 3
+
flow_map:
+
key1: value1
+
key2: value2
+
nested:
+
level1:
+
level2:
+
value: deeply nested
+
multi_comment_key: value
+
special: 'value with # hash inside quotes'
+
empty_value: null
+
final_key: final_value
+
+
Test comments.yml roundtrip with JSON to verify parsed values
+
+
$ yamlcat --json comments.yml
+
{"name": "John Doe", "age": 30, "address": {"street": "123 Main St", "city": "Springfield", "zip": 12345}, "items": ["apple", "banana", "cherry", "date"], "flow_seq": [1, 2, 3], "flow_map": {"key1": "value1", "key2": "value2"}, "nested": {"level1": {"level2": {"value": "deeply nested"}}}, "multi_comment_key": "value", "special": "value with # hash inside quotes", "empty_value": null, "final_key": "final_value"}
+
+
Test full line comments are ignored
+
+
$ echo '# This is a full line comment
+
> name: Alice
+
> # Another comment
+
> age: 25' | yamlcat --json
+
{"name": "Alice", "age": 25}
+
+
Test end of line comments after scalars
+
+
$ echo 'name: Bob # This is an end of line comment
+
> age: 35 # Another end of line comment' | yamlcat --json
+
{"name": "Bob", "age": 35}
+
+
Test comments after sequence items
+
+
$ echo 'fruits:
+
> - apple # First fruit
+
> - banana # Second fruit
+
> - cherry # Third fruit' | yamlcat --json
+
{"fruits": ["apple", "banana", "cherry"]}
+
+
Test comments between sequence items
+
+
$ echo 'numbers:
+
> - 1
+
> # Comment between items
+
> - 2
+
> # Another comment
+
> - 3' | yamlcat --json
+
{"numbers": [1, 2, 3]}
+
+
Test comments after flow sequences
+
+
$ echo 'flow: [1, 2, 3] # Comment after flow sequence' | yamlcat --json
+
{"flow": [1, 2, 3]}
+
+
Test comments after flow mappings
+
+
$ echo 'flow: {a: 1, b: 2} # Comment after flow mapping' | yamlcat --json
+
{"flow": {"a": 1, "b": 2}}
+
+
Test comments between mapping entries
+
+
$ echo 'first: value1
+
> # Comment between entries
+
> second: value2
+
> # Another comment
+
> third: value3' | yamlcat --json
+
{"first": "value1", "second": "value2", "third": "value3"}
+
+
Test multiple consecutive comments
+
+
$ echo '# First comment
+
> # Second comment
+
> # Third comment
+
> key: value' | yamlcat --json
+
{"key": "value"}
+
+
Test comments in nested structures
+
+
$ echo 'outer:
+
> # Comment in nested level
+
> inner:
+
> # Comment in deeper level
+
> key: value # End of line comment' | yamlcat --json
+
{"outer": {"inner": {"key": "value"}}}
+
+
Test comments with special characters
+
+
$ echo '# Comment with !@#$%^&*()
+
> key: value' | yamlcat --json
+
{"key": "value"}
+
+
Test that hash in quoted strings is not treated as comment
+
+
$ echo 'text: "This # is not a comment"
+
> other: '"'"'Also # not a comment'"'"'' | yamlcat --json
+
{"text": "This # is not a comment", "other": "Also # not a comment"}
+
+
Test comment before empty value (null)
+
+
$ echo 'key: # Comment, value is null' | yamlcat --json
+
{"key": null}
+
+
Test comments at document start
+
+
$ echo '# Comment at very start
+
> # Another at start
+
> data: value' | yamlcat --json
+
{"data": "value"}
+
+
Test comments at document end
+
+
$ echo 'data: value
+
> # Comment at end
+
> # Another at end' | yamlcat --json
+
{"data": "value"}
+
+
Test comments with various indentation levels
+
+
$ echo 'level1:
+
> # Indented comment
+
> level2:
+
> # More indented comment
+
> value: data' | yamlcat --json
+
{"level1": {"level2": {"value": "data"}}}
+
+
Test empty lines with comments
+
+
$ echo 'first: 1
+
>
+
> # Comment after empty line
+
>
+
> second: 2' | yamlcat --json
+
{"first": 1, "second": 2}
+
+
Test comment after sequence with nested mapping
+
+
$ echo 'items:
+
> - name: item1 # Comment after nested value
+
> value: 10
+
> # Comment between sequence items
+
> - name: item2
+
> value: 20 # Another comment' | yamlcat --json
+
{"items": [{"name": "item1", "value": 10}, {"name": "item2", "value": 20}]}
+
+
Test comment only lines between complex structures
+
+
$ echo 'mapping1:
+
> key: value
+
> # Comment between mappings
+
> mapping2:
+
> key: value' | yamlcat --json
+
{"mapping1": {"key": "value"}, "mapping2": {"key": "value"}}
+
+
Test comments do not affect block scalars
+
+
$ echo 'literal: |
+
> # This looks like a comment
+
> but it is part of the literal text
+
> key: value' | yamlcat --json
+
{"literal": "# This looks like a comment\nbut it is part of the literal text\n", "key": "value"}
+
+
Test comments do not affect folded scalars
+
+
$ echo 'folded: >
+
> # This also looks like a comment
+
> but is part of folded text
+
> key: value' | yamlcat --json
+
{"folded": "# This also looks like a comment but is part of folded text\n", "key": "value"}
+
+
Test whitespace preservation around comments
+
+
$ echo 'key1: value1 # Comment with spaces' | yamlcat --json
+
{"key1": "value1"}
+
+
Test comment after colon but before value
+
+
$ echo 'key: # Comment before value
+
> value' | yamlcat --json
+
{"key": "value"}
+53
tests/cram/comments.yml
···
+
# Full line comment at the beginning
+
# This is a YAML file testing comment handling
+
+
# Comment before a mapping
+
name: John Doe # End of line comment after a scalar value
+
age: 30 # Another end of line comment
+
+
# Comment between mapping entries
+
address:
+
# Comment inside nested mapping
+
street: 123 Main St # End of line comment in nested value
+
city: Springfield
+
# Comment between nested entries
+
zip: 12345
+
+
# Comment before sequence
+
items:
+
- apple # Comment after sequence item
+
- banana
+
# Comment between sequence items
+
- cherry
+
- date # Last item comment
+
+
# Comment before flow sequence
+
flow_seq: [1, 2, 3] # Comment after flow sequence
+
+
# Comment before flow mapping
+
flow_map: {key1: value1, key2: value2} # Comment after flow mapping
+
+
# Comments with various indentation levels
+
nested:
+
# Indented comment level 1
+
level1:
+
# Indented comment level 2
+
level2:
+
# Indented comment level 3
+
value: deeply nested # End comment at depth
+
+
# Multiple consecutive comments
+
# Line 1
+
# Line 2
+
# Line 3
+
multi_comment_key: value
+
+
# Comment with special characters: !@#$%^&*()
+
special: "value with # hash inside quotes"
+
+
# Empty value with comment
+
empty_value: # This key has no value (null)
+
+
# Comment before document end
+
final_key: final_value
+
# Final comment at end of file
+6
tests/cram/depth_bomb.yml
···
+
a: &a [x, y, z]
+
b: &b [*a, *a]
+
c: &c [*b, *b]
+
d: &d [*c, *c]
+
e: &e [*d, *d]
+
result: *e
+8
tests/cram/directives.yml
···
+
# YAML directive tests
+
+
# Test 1: %YAML 1.2 directive
+
%YAML 1.2
+
---
+
version: "1.2"
+
content: This document uses YAML 1.2
+
...
+15
tests/cram/directives_multiple_tags.yml
···
+
# Test 4: Multiple TAG directives
+
%YAML 1.2
+
%TAG !e! tag:example.com,2025:
+
%TAG !app! tag:myapp.org,2025:types:
+
%TAG !geo! tag:geography.net,2025:shapes:
+
---
+
user: !e!person
+
name: Alice
+
age: 30
+
location: !geo!coordinates
+
lat: 40.7128
+
lon: -74.0060
+
config: !app!settings
+
debug: true
+
timeout: 30
+10
tests/cram/directives_tag.yml
···
+
# Test 3: %TAG directive with custom prefix
+
%YAML 1.2
+
%TAG !custom! tag:example.com,2025:
+
---
+
shape: !custom!circle
+
radius: 5
+
color: red
+
point: !custom!point
+
x: 10
+
y: 20
+10
tests/cram/directives_yaml11.yml
···
+
# Test 2: %YAML 1.1 directive
+
%YAML 1.1
+
---
+
version: "1.1"
+
content: This document uses YAML 1.1
+
booleans:
+
- yes
+
- no
+
- on
+
- off
+204
tests/cram/documents.t
···
+
Test YAML directives and single document parsing
+
+
This test suite covers YAML directives (%YAML, %TAG) and various single document formats.
+
Multi-document streams are not yet supported and are not tested here.
+
+
Test 1: Basic YAML 1.2 directive
+
====================================
+
+
$ yamlcat directives.yml
+
version: '1.2'
+
content: This document uses YAML 1.2
+
+
Test 2: YAML 1.1 directive
+
====================================
+
+
$ yamlcat directives_yaml11.yml
+
version: '1.1'
+
content: This document uses YAML 1.1
+
booleans:
+
- true
+
- false
+
- true
+
- false
+
+
Test 3: TAG directive with custom prefix
+
====================================
+
+
$ yamlcat directives_tag.yml
+
shape:
+
radius: 5
+
color: red
+
point:
+
x: 10
+
y: 20
+
+
Test 4: Multiple TAG directives
+
====================================
+
+
$ yamlcat directives_multiple_tags.yml
+
user:
+
name: Alice
+
age: 30
+
location:
+
lat: 40.7128
+
lon: -74.006
+
config:
+
debug: true
+
timeout: 30
+
+
Test 5: Implicit document (no markers)
+
====================================
+
+
$ yamlcat documents_single.yml
+
key1: value1
+
key2: value2
+
nested:
+
inner: data
+
list:
+
- item1
+
- item2
+
- item3
+
+
Test 6: Explicit start marker (---)
+
====================================
+
+
$ yamlcat documents_single_explicit_start.yml
+
key1: value1
+
key2: value2
+
nested:
+
inner: data
+
list:
+
- item1
+
- item2
+
- item3
+
+
Test 7: Explicit start and end markers (--- ... )
+
====================================
+
+
$ yamlcat documents_single_explicit_both.yml
+
key1: value1
+
key2: value2
+
nested:
+
inner: data
+
list:
+
- item1
+
- item2
+
- item3
+
+
Test 8: Document with YAML directive
+
====================================
+
+
$ yamlcat documents_single_with_directive.yml
+
key1: value1
+
key2: value2
+
nested:
+
inner: data
+
list:
+
- item1
+
- item2
+
- item3
+
+
Test 9: Inline - implicit document (no markers)
+
====================================
+
+
$ echo 'name: John
+
> age: 30
+
> city: New York' | yamlcat
+
name: John
+
age: 30
+
city: New York
+
+
Test 10: Inline - explicit start marker
+
====================================
+
+
$ echo '---
+
> name: Jane
+
> age: 25' | yamlcat
+
name: Jane
+
age: 25
+
+
Test 11: Inline - explicit start and end markers
+
====================================
+
+
$ echo '---
+
> title: Example
+
> content: data
+
> ...' | yamlcat
+
title: Example
+
content: data
+
+
Test 12: Inline - document with %YAML 1.2 directive
+
====================================
+
+
$ echo '%YAML 1.2
+
> ---
+
> version: 1.2
+
> enabled: true' | yamlcat
+
version: 1.2
+
enabled: true
+
+
Test 13: Inline - document with comment before content
+
====================================
+
+
$ echo '# This is a comment
+
> name: Alice
+
> # Another comment
+
> value: 42' | yamlcat
+
name: Alice
+
value: 42
+
+
Test 14: Inline - document with comment after directive
+
====================================
+
+
$ echo '%YAML 1.2
+
> # Comment after directive
+
> ---
+
> key: value' | yamlcat
+
key: value
+
+
Test 15: Inline - explicit markers with comments
+
====================================
+
+
$ echo '--- # Document start
+
> data: content
+
> # Mid-document comment
+
> more: values
+
> ... # Document end' | yamlcat
+
data: content
+
more: values
+
+
Test 16: Verify JSON roundtrip for directive file
+
====================================
+
+
$ yamlcat --json directives.yml
+
{"version": "1.2", "content": "This document uses YAML 1.2"}
+
+
Test 17: Verify JSON roundtrip for explicit markers
+
====================================
+
+
$ yamlcat --json documents_single_explicit_both.yml
+
{"key1": "value1", "key2": "value2", "nested": {"inner": "data"}, "list": ["item1", "item2", "item3"]}
+
+
Test 18: Empty document with explicit markers
+
====================================
+
+
$ echo '---
+
> ...' | yamlcat
+
null
+
+
Test 19: Document with only whitespace and markers
+
====================================
+
+
$ echo '---
+
>
+
> ...' | yamlcat
+
null
+
+
Test 20: Directive followed by content without explicit start
+
====================================
+
+
$ echo '%YAML 1.2
+
> simple: document' | yamlcat
+
Error: expected document start '---' at line 2, columns 1-1
+
[1]
+15
tests/cram/documents_multi.yml
···
+
# Multiple document variations
+
+
# Test 1: Two documents separated by ---
+
---
+
document: first
+
type: mapping
+
data:
+
key1: value1
+
key2: value2
+
---
+
document: second
+
type: mapping
+
data:
+
key3: value3
+
key4: value4
+10
tests/cram/documents_multi_empty.yml
···
+
# Test 4: Empty documents
+
---
+
# Empty document (implicitly null)
+
---
+
key: value
+
---
+
# Another empty document
+
---
+
- item1
+
- item2
+15
tests/cram/documents_multi_three.yml
···
+
# Test 2: Three documents with different content types
+
---
+
# First document: mapping
+
name: John Doe
+
age: 30
+
city: New York
+
---
+
# Second document: sequence
+
- apple
+
- banana
+
- orange
+
- grape
+
---
+
# Third document: scalar
+
This is a plain scalar document
+16
tests/cram/documents_multi_with_end.yml
···
+
# Test 3: Documents with explicit end markers
+
---
+
first:
+
document: data1
+
value: 100
+
...
+
---
+
second:
+
document: data2
+
value: 200
+
...
+
---
+
third:
+
document: data3
+
value: 300
+
...
+11
tests/cram/documents_single.yml
···
+
# Single document variations
+
+
# Test 1: Implicit document (no markers)
+
key1: value1
+
key2: value2
+
nested:
+
inner: data
+
list:
+
- item1
+
- item2
+
- item3
+11
tests/cram/documents_single_explicit_both.yml
···
+
# Test 3: Explicit start and end (--- ... )
+
---
+
key1: value1
+
key2: value2
+
nested:
+
inner: data
+
list:
+
- item1
+
- item2
+
- item3
+
...
+10
tests/cram/documents_single_explicit_start.yml
···
+
# Test 2: Explicit start (---)
+
---
+
key1: value1
+
key2: value2
+
nested:
+
inner: data
+
list:
+
- item1
+
- item2
+
- item3
+11
tests/cram/documents_single_with_directive.yml
···
+
# Test 4: With %YAML directive
+
%YAML 1.2
+
---
+
key1: value1
+
key2: value2
+
nested:
+
inner: data
+
list:
+
- item1
+
- item2
+
- item3
+4
tests/cram/dune
···
+
(cram
+
(deps
+
(package yamlrw)
+
(glob_files *.yml)))
+155
tests/cram/edge_cases.yml
···
+
# Edge cases test file for YAML parsing
+
+
# Case 1: Keys with colons (must be quoted)
+
"key:with:colons": value
+
"http://example.com": url_as_key
+
"time:12:30": time_value
+
+
# Case 2: Values starting with indicators (must be quoted or escaped)
+
indicator_square: "[this starts with bracket]"
+
indicator_curly: "{this starts with brace}"
+
indicator_star: "*this starts with star"
+
indicator_amp: "&this starts with ampersand"
+
indicator_question: "?this starts with question"
+
indicator_pipe: "|this starts with pipe"
+
indicator_gt: ">this starts with gt"
+
indicator_dash: "-this starts with dash"
+
indicator_hash: "#this starts with hash"
+
+
# Case 3: Special string values that look like other types
+
string_true: "true"
+
string_false: "false"
+
string_null: "null"
+
string_number: "123"
+
string_float: "45.67"
+
string_yes: "yes"
+
string_no: "no"
+
+
# Case 4: Actual special values
+
bool_true: true
+
bool_false: false
+
null_value: null
+
null_tilde: ~
+
number_int: 123
+
number_float: 45.67
+
number_exp: 1.23e4
+
number_hex: 0x1F
+
number_oct: 0o17
+
+
# Case 5: Empty values
+
empty_string: ""
+
empty_list: []
+
empty_map: {}
+
null_implicit:
+
+
# Case 6: Very long lines
+
very_long_key: "This is a very long value that contains a lot of text to test how the parser handles long lines. It should be able to handle lines that are much longer than typical lines in most YAML files. This continues for quite a while to make sure we test the boundaries of reasonable line lengths. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
+
+
very_long_literal: |
+
This is a very long literal block that should preserve all the whitespace and newlines exactly as written. It can contain very long lines that go on and on and on without breaking. This tests whether the parser can handle long content in literal blocks properly. Lorem ipsum dolor sit amet, consectetur adipiscing elit.
+
+
# Case 7: Unicode and special characters
+
unicode_emoji: "Hello 🌍 World 🚀"
+
unicode_chars: "Héllo Wörld 你好 مرحبا"
+
unicode_key_🔑: unicode_value
+
escaped_chars: "Line1\nLine2\tTabbed"
+
+
# Case 8: Nested empty structures
+
nested_empty:
+
level1: {}
+
level2:
+
inner: []
+
level3:
+
inner:
+
deep: null
+
+
# Case 9: Complex keys (flow collections as keys)
+
? [complex, key]
+
: complex_value
+
? {nested: key}
+
: another_value
+
+
# Case 10: Multi-line keys and values
+
? |
+
This is a multi-line
+
key using literal block
+
: |
+
This is a multi-line
+
value using literal block
+
+
# Case 11: Quoted strings with escape sequences
+
single_quoted: 'It''s a single-quoted string with doubled quotes'
+
double_quoted: "It's a \"double-quoted\" string with escapes"
+
backslash: "Path\\to\\file"
+
newline_escape: "First line\nSecond line"
+
+
# Case 12: Anchors and aliases at edge positions
+
anchor_list: &anchor_ref
+
- item1
+
- item2
+
- item3
+
+
alias_usage: *anchor_ref
+
+
nested_anchor:
+
data: &nested_ref
+
key: value
+
reference: *nested_ref
+
+
# Case 13: Mixed flow and block styles
+
mixed_style:
+
block_key:
+
- flow_in_block: [1, 2, 3]
+
- another: {a: 1, b: 2}
+
flow_key: {block_in_flow:
+
- item1
+
- item2}
+
+
# Case 14: Trailing commas in flow (typically invalid in YAML)
+
# flow_trailing: [1, 2, 3,] # This would be invalid
+
+
# Case 15: Strings that need quoting
+
needs_quote_1: "value with # in it"
+
needs_quote_2: "value with: colon"
+
needs_quote_3: "value with @ at sign"
+
needs_quote_4: "value with ` backtick"
+
+
# Case 16: Multiple documents separator (not starting a new document)
+
not_doc_separator: "--- this is just a string value"
+
+
# Case 17: Extremely nested structures
+
deeply_nested:
+
l1:
+
l2:
+
l3:
+
l4:
+
l5:
+
l6:
+
l7:
+
l8:
+
l9:
+
l10: "deep value"
+
+
# Case 18: Large sequence
+
large_sequence:
+
- item_001
+
- item_002
+
- item_003
+
- item_004
+
- item_005
+
- item_006
+
- item_007
+
- item_008
+
- item_009
+
- item_010
+
+
# Case 19: Keys and values with only whitespace differences
+
" key": "value with leading space in key"
+
"key ": "value with trailing space in key"
+
" spaced ": " spaced "
+
+
# Case 20: Binary-looking values
+
binary_string: "0b101010"
+
hex_string: "0xDEADBEEF"
+
+
# End of edge cases test file
+49
tests/cram/empty_collections.t
···
+
Empty Collection YAML Emission
+
+
These tests verify that empty sequences and mappings are correctly emitted
+
as [] and {} in YAML output.
+
+
Test: Empty sequence
+
+
$ echo 'empty_seq: []' | yamlcat
+
empty_seq: []
+
+
Test: Empty mapping
+
+
$ echo 'empty_map: {}' | yamlcat
+
empty_map: {}
+
+
Test: Multiple empty collections
+
+
$ echo 'seq: []
+
> map: {}
+
> data: value' | yamlcat
+
seq: []
+
map: {}
+
data: value
+
+
Test: Nested empty collections
+
+
$ echo 'outer:
+
> inner_seq: []
+
> inner_map: {}' | yamlcat
+
outer:
+
inner_seq: []
+
inner_map: {}
+
+
Test: Empty collection in sequence
+
+
$ echo 'items:
+
> - first
+
> - []
+
> - third' | yamlcat
+
items:
+
- first
+
- []
+
- third
+
+
Test: Verify JSON output is correct (for comparison)
+
+
$ echo 'empty_seq: []
+
> empty_map: {}' | yamlcat --json
+
{"empty_seq": [], "empty_map": {}}
+1
tests/cram/escaped_newline.yml
···
+
text: "Line one\nLine two"
+45
tests/cram/failing_escapes.t
···
+
Escape Sequence Issues (documentation of known edge cases)
+
+
These tests document escape sequence handling edge cases.
+
+
The primary issue is with \U (capital U) in double-quoted strings.
+
In YAML, \U is a 32-bit unicode escape that expects 8 hex digits.
+
When users write paths like "C:\Users" the \U is interpreted as
+
a unicode escape but "sers" are not valid hex digits.
+
+
Test: Capital U interpreted as unicode escape
+
+
$ echo 'path: "C:\\Users\\Name"' | yamlcat --json 2>&1
+
Error: invalid hex escape: at line 1, columns 12-12
+
[1]
+
+
This fails because:
+
- Shell: echo 'C:\\Users\\Name' produces C:\Users\Name
+
- YAML sees: "C:\Users\Name"
+
- \U is a 32-bit unicode escape (expects \UHHHHHHHH)
+
- "sers" are not 8 hex digits, so it fails
+
+
Test: Lowercase u unicode escape works
+
+
$ echo 'unicode: "\\u0041"' | yamlcat --json
+
{"unicode": "A"}
+
+
Test: Uppercase U requires 8 hex digits
+
+
$ echo 'unicode: "\\U00000041"' | yamlcat --json
+
{"unicode": "A"}
+
+
Test: Backslash escaping works for non-unicode
+
+
$ echo 'escaped: "one\\\\two\\\\three"' | yamlcat --json
+
{"escaped": "one\\two\\three"}
+
+
Test: Mixed valid escapes
+
+
$ echo 'text: "Tab:\\t Newline:\\n Quote:\\\\"' | yamlcat --json
+
{"text": "Tab:\t Newline:\n Quote:\\"}
+
+
Test: Backslash a is bell character
+
+
$ echo 'text: "test\\a"' | yamlcat --json
+
{"text": "test\007"}
+42
tests/cram/invalid_floats.t
···
+
Test that invalid float representations are treated as strings
+
(See: https://github.com/avsm/ocaml-yaml/issues/82)
+
+
Per YAML spec, only .nan/.NaN/.NAN (with leading dot) are valid NaN values,
+
and only .inf/.Inf/.INF (with leading dot) are valid infinity values.
+
+
OCaml's Float.of_string accepts "nan", "inf", "infinity" without the dot,
+
which causes incorrect parsing if used as a fallback.
+
+
$ yamlcat invalid_floats.yml
+
valid_nan: .nan
+
valid_nan_title: .nan
+
valid_nan_upper: .nan
+
valid_inf: .inf
+
valid_inf_title: .inf
+
valid_inf_upper: .inf
+
valid_neg_inf: -.inf
+
valid_neg_inf_title: -.inf
+
valid_neg_inf_upper: -.inf
+
valid_pos_inf: .inf
+
invalid_nan: 'nan'
+
invalid_nan_title: 'NaN'
+
invalid_nan_upper: 'NAN'
+
invalid_inf: 'inf'
+
invalid_inf_title: 'Inf'
+
invalid_inf_upper: 'INF'
+
invalid_infinity: 'infinity'
+
invalid_infinity_title: 'Infinity'
+
invalid_infinity_upper: 'INFINITY'
+
invalid_neg_inf: '-inf'
+
invalid_neg_infinity: '-infinity'
+
invalid_pos_inf: '+inf'
+
invalid_pos_infinity: '+infinity'
+
quoted_nan: 'nan'
+
quoted_inf: 'inf'
+
quoted_infinity: 'infinity'
+
dependencies:
+
'nan': 2.14.0
+
'inf': 1.0.0
+
+
$ yamlcat --json invalid_floats.yml
+
{"valid_nan": nan, "valid_nan_title": nan, "valid_nan_upper": nan, "valid_inf": inf, "valid_inf_title": inf, "valid_inf_upper": inf, "valid_neg_inf": -inf, "valid_neg_inf_title": -inf, "valid_neg_inf_upper": -inf, "valid_pos_inf": inf, "invalid_nan": "nan", "invalid_nan_title": "NaN", "invalid_nan_upper": "NAN", "invalid_inf": "inf", "invalid_inf_title": "Inf", "invalid_inf_upper": "INF", "invalid_infinity": "infinity", "invalid_infinity_title": "Infinity", "invalid_infinity_upper": "INFINITY", "invalid_neg_inf": "-inf", "invalid_neg_infinity": "-infinity", "invalid_pos_inf": "+inf", "invalid_pos_infinity": "+infinity", "quoted_nan": "nan", "quoted_inf": "inf", "quoted_infinity": "infinity", "dependencies": {"nan": "2.14.0", "inf": "1.0.0"}}
+40
tests/cram/invalid_floats.yml
···
+
# Test that invalid float representations are treated as strings
+
# Per YAML spec, only .nan/.NaN/.NAN and .inf/.Inf/.INF (with dot) are valid
+
+
# These should be floats (valid YAML float syntax)
+
valid_nan: .nan
+
valid_nan_title: .NaN
+
valid_nan_upper: .NAN
+
valid_inf: .inf
+
valid_inf_title: .Inf
+
valid_inf_upper: .INF
+
valid_neg_inf: -.inf
+
valid_neg_inf_title: -.Inf
+
valid_neg_inf_upper: -.INF
+
valid_pos_inf: +.inf
+
+
# These should be STRINGS (invalid YAML float syntax, but OCaml float_of_string accepts them)
+
# See: https://github.com/avsm/ocaml-yaml/issues/82
+
invalid_nan: nan
+
invalid_nan_title: NaN
+
invalid_nan_upper: NAN
+
invalid_inf: inf
+
invalid_inf_title: Inf
+
invalid_inf_upper: INF
+
invalid_infinity: infinity
+
invalid_infinity_title: Infinity
+
invalid_infinity_upper: INFINITY
+
invalid_neg_inf: -inf
+
invalid_neg_infinity: -infinity
+
invalid_pos_inf: +inf
+
invalid_pos_infinity: +infinity
+
+
# Quoted versions should always be strings
+
quoted_nan: 'nan'
+
quoted_inf: 'inf'
+
quoted_infinity: 'infinity'
+
+
# As mapping keys (yarn.lock use case)
+
dependencies:
+
nan: 2.14.0
+
inf: 1.0.0
+59
tests/cram/linuxkit.yml
···
+
kernel:
+
image: linuxkit/kernel:4.9.40
+
cmdline: "console=tty0 console=ttyS0"
+
init:
+
- linuxkit/init:906e174b3f2e07f97d6fd693a2e8518e98dafa58
+
- linuxkit/runc:90e45f13e1d0a0983f36ef854621e3eac91cf541
+
- linuxkit/containerd:7c986fb7df33bea73b5c8097b46989e46f49d875
+
- linuxkit/ca-certificates:e44b0a66df5a102c0e220f0066b0d904710dcb10
+
onboot:
+
- name: sysctl
+
image: linuxkit/sysctl:184c914d23a017062d7b53d7fc1dfaf47764bef6
+
- name: dhcpcd
+
image: linuxkit/dhcpcd:f3f5413abb78fae9020e35bd4788fa93df4530b7
+
command: ["/sbin/dhcpcd", "--nobackground", "-f", "/dhcpcd.conf", "-1"]
+
onshutdown:
+
- name: shutdown
+
image: busybox:latest
+
command: ["/bin/echo", "so long and thanks for all the fish"]
+
services:
+
- name: getty
+
image: linuxkit/getty:2c841cdc34396e3fa8f25b62d112808f63f16df6
+
env:
+
- INSECURE=true
+
- name: rngd
+
image: linuxkit/rngd:b2f4bdcb55aa88a25c86733e294628614504f383
+
- name: nginx
+
image: nginx:alpine
+
capabilities:
+
- CAP_NET_BIND_SERVICE
+
- CAP_CHOWN
+
- CAP_SETUID
+
- CAP_SETGID
+
- CAP_DAC_OVERRIDE
+
files:
+
- path: etc/containerd/config.toml
+
contents: |
+
state = "/run/containerd"
+
root = "/var/lib/containerd"
+
snapshotter = "io.containerd.snapshotter.v1.overlayfs"
+
differ = "io.containerd.differ.v1.base-diff"
+
subreaper = false
+
+
[grpc]
+
address = "/run/containerd/containerd.sock"
+
uid = 0
+
gid = 0
+
+
[debug]
+
address = "/run/containerd/debug.sock"
+
level = "info"
+
+
[metrics]
+
address = ":13337"
+
- path: etc/linuxkit-config
+
metadata: yaml
+
trust:
+
org:
+
- linuxkit
+
- library
+407
tests/cram/multidoc.t
···
+
Multi-document stream support (currently not supported)
+
+
These tests document expected behavior for multi-document YAML streams.
+
They currently fail with "multiple documents found when single expected".
+
+
Test: Two documents separated by ---
+
+
$ echo '---
+
> first: document
+
> ---
+
> second: document' | yamlcat 2>&1
+
first: document
+
---
+
second: document
+
+
Test: Three documents with different types
+
+
$ echo '---
+
> mapping: value
+
> ---
+
> - sequence
+
> - items
+
> ---
+
> scalar value' | yamlcat 2>&1
+
mapping: value
+
---
+
- sequence
+
- items
+
---
+
scalar value
+
+
Test: Documents with explicit end markers
+
+
$ echo '---
+
> doc1: value
+
> ...
+
> ---
+
> doc2: value
+
> ...' | yamlcat 2>&1
+
doc1: value
+
---
+
doc2: value
+
+
Test: Empty documents
+
+
$ echo '---
+
> ---
+
> content: here
+
> ---' | yamlcat 2>&1
+
null
+
---
+
content: here
+
---
+
null
+
+
Test: Multi-document file
+
+
$ yamlcat documents_multi.yml 2>&1
+
document: first
+
type: mapping
+
data:
+
key1: value1
+
key2: value2
+
---
+
document: second
+
type: mapping
+
data:
+
key3: value3
+
key4: value4
+
+
$ yamlcat documents_multi_three.yml 2>&1
+
name: John Doe
+
age: 30
+
city: New York
+
---
+
- apple
+
- banana
+
- orange
+
- grape
+
---
+
This is a plain scalar document
+
+
$ yamlcat documents_multi_with_end.yml 2>&1
+
first:
+
document: data1
+
value: 100
+
---
+
second:
+
document: data2
+
value: 200
+
---
+
third:
+
document: data3
+
value: 300
+
+
$ yamlcat documents_multi_empty.yml 2>&1
+
null
+
---
+
key: value
+
---
+
null
+
---
+
- item1
+
- item2
+
+
Test: Anchors file (uses multiple documents)
+
+
$ yamlcat anchors_basic.yml 2>&1
+
scalar_anchor: Hello, World!
+
scalar_alias: Hello, World!
+
---
+
original: 42
+
copy: 42
+
another_copy: 42
+
---
+
original_list:
+
- apple
+
- banana
+
- cherry
+
copied_list:
+
- apple
+
- banana
+
- cherry
+
---
+
original_map:
+
name: Alice
+
age: 30
+
city: London
+
copied_map:
+
name: Alice
+
age: 30
+
city: London
+
---
+
defaults:
+
timeout: 30
+
retries: 3
+
colors:
+
- red
+
- green
+
- blue
+
config:
+
settings:
+
timeout: 30
+
retries: 3
+
palette:
+
- red
+
- green
+
- blue
+
---
+
template:
+
metadata:
+
version: 1
+
author: John Doe
+
settings:
+
enabled: true
+
debug: false
+
instance1:
+
metadata:
+
version: 1
+
author: John Doe
+
settings:
+
enabled: true
+
debug: false
+
instance2:
+
metadata:
+
version: 1
+
author: John Doe
+
settings:
+
enabled: true
+
debug: false
+
---
+
items:
+
- id: 1
+
name: First
+
- id: 2
+
name: Second
+
- id: 1
+
name: First
+
---
+
shared_value: 100
+
calculations:
+
base: 100
+
doubled: 200
+
reference: 100
+
another_ref: 100
+
---
+
feature_flag: true
+
features:
+
login: true
+
signup: true
+
export: true
+
---
+
empty: null
+
values:
+
first: null
+
second: null
+
---
+
message: "This is a multi-line\nmessage with some\nspecial content!\n"
+
output1: "This is a multi-line\nmessage with some\nspecial content!\n"
+
output2: "This is a multi-line\nmessage with some\nspecial content!\n"
+
---
+
database:
+
primary:
+
host: localhost
+
port: 5432
+
ssl: true
+
replica:
+
host: localhost
+
port: 5432
+
ssl: true
+
backup:
+
host: localhost
+
port: 5432
+
ssl: true
+
+
$ yamlcat anchors_merge.yml 2>&1
+
defaults:
+
timeout: 30
+
retries: 3
+
verbose: false
+
production:
+
<<:
+
timeout: 30
+
retries: 3
+
verbose: false
+
environment: production
+
---
+
base:
+
color: red
+
size: medium
+
weight: 100
+
custom:
+
<<:
+
color: red
+
size: medium
+
weight: 100
+
color: blue
+
shape: circle
+
---
+
connection:
+
host: localhost
+
port: 8080
+
authentication:
+
username: admin
+
password: secret
+
server:
+
<<:
+
- host: localhost
+
port: 8080
+
- username: admin
+
password: secret
+
ssl: true
+
---
+
defaults:
+
timeout: 30
+
retries: 3
+
advanced:
+
cache: true
+
pool_size: 10
+
config:
+
<<:
+
- timeout: 30
+
retries: 3
+
- cache: true
+
pool_size: 10
+
timeout: 60
+
custom: value
+
---
+
base_style:
+
font: Arial
+
size: 12
+
heading_defaults:
+
<<:
+
font: Arial
+
size: 12
+
weight: bold
+
main_heading:
+
<<:
+
<<:
+
font: Arial
+
size: 12
+
weight: bold
+
size: 18
+
color: navy
+
---
+
common:
+
enabled: true
+
log_level: info
+
services:
+
- name: web
+
<<:
+
enabled: true
+
log_level: info
+
port: 80
+
- name: api
+
<<:
+
enabled: true
+
log_level: info
+
port: 3000
+
- name: worker
+
<<:
+
enabled: true
+
log_level: info
+
threads: 4
+
---
+
empty: {}
+
config:
+
<<: {}
+
key: value
+
---
+
metadata:
+
created: 2023-01-01
+
author: Admin
+
tags:
+
- v1
+
- stable
+
document:
+
<<:
+
created: 2023-01-01
+
author: Admin
+
tags:
+
- v1
+
- stable
+
title: Important Document
+
content: Some content here
+
---
+
level1:
+
a: 1
+
b: 2
+
level2:
+
<<:
+
a: 1
+
b: 2
+
c: 3
+
level3:
+
<<:
+
<<:
+
a: 1
+
b: 2
+
c: 3
+
d: 4
+
---
+
first:
+
name: First
+
value: 100
+
priority: low
+
second:
+
name: Second
+
value: 200
+
category: important
+
combined:
+
<<:
+
- name: First
+
value: 100
+
priority: low
+
- name: Second
+
value: 200
+
category: important
+
name: Combined
+
---
+
numbers:
+
count: 42
+
ratio: 3.14
+
active: true
+
derived:
+
<<:
+
count: 42
+
ratio: 3.14
+
active: true
+
label: Test
+
---
+
db_defaults:
+
pool_size: 5
+
timeout: 30
+
ssl: false
+
cache_defaults:
+
ttl: 3600
+
max_size: 1000
+
development:
+
database:
+
<<:
+
pool_size: 5
+
timeout: 30
+
ssl: false
+
host: localhost
+
name: dev_db
+
cache:
+
<<:
+
ttl: 3600
+
max_size: 1000
+
backend: memory
+
production:
+
database:
+
<<:
+
pool_size: 5
+
timeout: 30
+
ssl: false
+
host: prod.example.com
+
name: prod_db
+
ssl: true
+
pool_size: 20
+
cache:
+
<<:
+
ttl: 3600
+
max_size: 1000
+
backend: redis
+
ttl: 7200
+471
tests/cram/scalars.t
···
+
YAML Scalar Parsing Tests
+
+
This file tests various forms of YAML scalar values including plain, quoted, and block scalars.
+
+
================================================================================
+
PLAIN SCALARS
+
================================================================================
+
+
Simple plain scalars
+
+
$ echo 'key: value' | yamlcat
+
key: value
+
+
$ echo 'name: Alice
+
> age: 30
+
> active: true' | yamlcat
+
name: Alice
+
age: 30
+
active: true
+
+
Plain scalars with special values
+
+
$ echo 'null_val: null
+
> bool_true: true
+
> bool_false: false
+
> number: 42
+
> float: 3.14' | yamlcat --json
+
{"null_val": null, "bool_true": true, "bool_false": false, "number": 42, "float": 3.14}
+
+
================================================================================
+
QUOTED SCALARS - SINGLE QUOTES
+
================================================================================
+
+
Single-quoted strings preserve literal text
+
+
$ echo "single: 'hello world'" | yamlcat
+
single: hello world
+
+
Single-quoted strings with embedded double quotes
+
+
$ echo "quote: 'He said \"hello\"'" | yamlcat
+
quote: "He said \"hello\""
+
+
Single-quoted strings with escaped single quotes (doubled)
+
+
$ echo "escaped: 'It''s a test'" | yamlcat
+
escaped: It's a test
+
+
Single-quoted multiline (newlines become spaces)
+
+
$ echo "text: 'This is a
+
> multi-line
+
> string'" | yamlcat --json
+
{"text": "This is a multi-line string"}
+
+
Empty single-quoted string
+
+
$ echo "empty: ''" | yamlcat
+
empty: ''
+
+
================================================================================
+
QUOTED SCALARS - DOUBLE QUOTES
+
================================================================================
+
+
Simple double-quoted strings
+
+
$ echo 'double: "hello world"' | yamlcat
+
double: hello world
+
+
Double-quoted with escaped newline
+
+
$ yamlcat --json escaped_newline.yml
+
{"text": "Line one\nLine two"}
+
+
Double-quoted with escaped tab
+
+
$ echo 'text: "Col1\tCol2\tCol3"' | yamlcat --json
+
{"text": "Col1\tCol2\tCol3"}
+
+
Double-quoted with backslash escape
+
+
$ echo 'path: "C:\\Users\\Name"' | yamlcat --json
+
Error: invalid hex escape: at line 1, columns 12-12
+
[1]
+
+
Double-quoted with escaped quote
+
+
$ echo 'text: "She said \"hello\""' | yamlcat --json
+
{"text": "She said \"hello\""}
+
+
Double-quoted with multiple escape sequences
+
+
$ echo 'text: "Tab:\t Newline:\n Quote:\" Backslash:\\\\"' | yamlcat --json
+
{"text": "Tab:\t Newline: Quote:\" Backslash:\\"}
+
+
Empty double-quoted string
+
+
$ echo 'empty: ""' | yamlcat
+
empty: ''
+
+
================================================================================
+
BLOCK SCALARS - LITERAL STYLE (|)
+
================================================================================
+
+
Basic literal block scalar (preserves newlines)
+
+
$ echo 'text: |
+
> line one
+
> line two
+
> line three' | yamlcat --json
+
{"text": "line one\nline two\nline three\n"}
+
+
Literal with indentation
+
+
$ echo 'text: |
+
> First line
+
> Indented line
+
> Back to first' | yamlcat --json
+
{"text": "First line\n Indented line\nBack to first\n"}
+
+
Literal with blank lines
+
+
$ echo 'text: |
+
> First paragraph
+
>
+
> Second paragraph' | yamlcat --json
+
{"text": "First paragraph\n\nSecond paragraph\n"}
+
+
================================================================================
+
BLOCK SCALARS - FOLDED STYLE (>)
+
================================================================================
+
+
Basic folded block scalar (newlines become spaces)
+
+
$ echo 'text: >
+
> This is a long paragraph
+
> that will be folded into
+
> a single line.' | yamlcat --json
+
{"text": "This is a long paragraph that will be folded into a single line.\n"}
+
+
Folded with paragraph separation (blank line preserved)
+
+
$ echo 'text: >
+
> First paragraph
+
> flows together.
+
>
+
> Second paragraph
+
> also flows.' | yamlcat --json
+
{"text": "First paragraph flows together.\nSecond paragraph also flows.\n"}
+
+
================================================================================
+
CHOMPING INDICATORS
+
================================================================================
+
+
Strip chomping (-) removes trailing newlines
+
+
$ echo 'text: |-
+
> No trailing newline' | yamlcat --json
+
{"text": "No trailing newline"}
+
+
$ echo 'text: |-
+
> Text here
+
>
+
> ' | yamlcat --json
+
{"text": "Text here"}
+
+
Folded with strip
+
+
$ echo 'text: >-
+
> Folded text
+
> with stripped
+
> trailing newlines
+
>
+
> ' | yamlcat --json
+
{"text": "Folded text with stripped trailing newlines"}
+
+
Clip chomping (default) keeps single trailing newline
+
+
$ echo 'text: |
+
> One trailing newline
+
>
+
> ' | yamlcat --json
+
{"text": "One trailing newline\n"}
+
+
$ echo 'text: >
+
> Folded with one
+
> trailing newline
+
>
+
> ' | yamlcat --json
+
{"text": "Folded with one trailing newline\n"}
+
+
Keep chomping (+) preserves all trailing newlines
+
+
$ echo 'text: |+
+
> Keeps trailing newlines
+
>
+
>
+
> ' | yamlcat --json
+
{"text": "Keeps trailing newlines\n\n\n\n"}
+
+
$ echo 'text: >+
+
> Folded text
+
> keeps trailing
+
>
+
>
+
> ' | yamlcat --json
+
{"text": "Folded text keeps trailing\n\n\n\n"}
+
+
================================================================================
+
EXPLICIT INDENTATION INDICATORS
+
================================================================================
+
+
Literal with explicit 2-space indentation
+
+
$ echo 'text: |2
+
> Two space base
+
> Second line
+
> Extra indent' | yamlcat --json
+
{"text": " Two space base\n Second line\n Extra indent\n"}
+
+
Folded with explicit indentation
+
+
$ echo 'text: >2
+
> Text with two space
+
> base indentation that
+
> will be folded.' | yamlcat --json
+
{"text": " Text with two space\n base indentation that\n will be folded.\n"}
+
+
Combined indentation and chomping indicators
+
+
$ echo 'text: |2-
+
> Indented by 2
+
> No trailing newlines
+
>
+
> ' | yamlcat --json
+
{"text": " Indented by 2\n No trailing newlines"}
+
+
$ echo 'text: |2+
+
> Indented by 2
+
> Keeps trailing newlines
+
>
+
>
+
> ' | yamlcat --json
+
{"text": " Indented by 2\n Keeps trailing newlines\n\n\n\n"}
+
+
================================================================================
+
FILE TESTS - QUOTED SCALARS
+
================================================================================
+
+
Test parsing scalars_quoted.yml file
+
+
$ yamlcat scalars_quoted.yml | head -20
+
single_simple: hello world
+
single_with_double: "He said \"hello\""
+
single_escaped_quote: 'It''s a single quote: ''example'''
+
single_multiline: This is a multi-line single quoted string
+
double_simple: hello world
+
double_with_single: It's easy
+
double_escaped_quote: "She said \"hello\""
+
escaped_newline: "Line one\nLine two\nLine three"
+
escaped_tab: "Column1\tColumn2\tColumn3"
+
escaped_backslash: "Path: C:\\Users\\Name"
+
escaped_carriage: "Before\rAfter"
+
escaped_bell: "Bell\x07"
+
escaped_backspace: "Back\x08"
+
escaped_formfeed: "Form\x0c"
+
escaped_vertical: "Vertical\x0btab"
+
unicode_16bit: 'Snowman: ☃'
+
unicode_32bit: 'Emoji: 😀'
+
unicode_hex: "Null byte: \x00"
+
empty_single: ''
+
empty_double: ''
+
+
Test JSON output for quoted scalars
+
+
$ yamlcat --json scalars_quoted.yml | head -c 500
+
{"single_simple": "hello world", "single_with_double": "He said \"hello\"", "single_escaped_quote": "It's a single quote: 'example'", "single_multiline": "This is a multi-line single quoted string", "double_simple": "hello world", "double_with_single": "It's easy", "double_escaped_quote": "She said \"hello\"", "escaped_newline": "Line one\nLine two\nLine three", "escaped_tab": "Column1\tColumn2\tColumn3", "escaped_backslash": "Path: C:\\Users\\Name", "escaped_carriage": "Before\rAfter", "escaped
+
+
Verify specific escape handling in JSON
+
+
$ yamlcat --json scalars_quoted.yml | grep -o '"escaped_newline": "[^"]*"'
+
"escaped_newline": "Line one\nLine two\nLine three"
+
+
$ yamlcat --json scalars_quoted.yml | grep -o '"escaped_tab": "[^"]*"'
+
"escaped_tab": "Column1\tColumn2\tColumn3"
+
+
Verify Unicode handling
+
+
$ yamlcat --json scalars_quoted.yml | grep -o '"unicode_16bit": "[^"]*"'
+
"unicode_16bit": "Snowman: \226\152\131"
+
+
$ yamlcat --json scalars_quoted.yml | grep -o '"unicode_32bit": "[^"]*"'
+
"unicode_32bit": "Emoji: \240\159\152\128"
+
+
Verify quoted strings preserve type indicators
+
+
$ yamlcat --json scalars_quoted.yml | grep -o '"string_true": "[^"]*"'
+
"string_true": "true"
+
+
$ yamlcat --json scalars_quoted.yml | grep -o '"string_null": "[^"]*"'
+
"string_null": "null"
+
+
$ yamlcat --json scalars_quoted.yml | grep -o '"string_number": "[^"]*"'
+
"string_number": "123"
+
+
================================================================================
+
FILE TESTS - BLOCK SCALARS
+
================================================================================
+
+
Test parsing scalars_block.yml file
+
+
$ yamlcat scalars_block.yml | head -30
+
literal_basic: "Line one\nLine two\nLine three\n"
+
literal_with_indent: "First line\n Indented line\n More indented\n Back to second level\nBack to first level\n"
+
folded_basic: "This is a long paragraph that will be folded into a single line with the newlines converted to spaces.\n"
+
folded_paragraph: "First paragraph flows together into a single line.\nSecond paragraph after blank line also flows together.\n"
+
literal_strip: No trailing newline
+
literal_strip_multiple: Text here
+
folded_strip: Folded text with stripped trailing newlines
+
literal_clip: "One trailing newline\n"
+
literal_clip_explicit: "This is the default behavior\n"
+
folded_clip: "Folded with one trailing newline\n"
+
literal_keep: "Keeps trailing newlines\n\n\n"
+
literal_keep_multiple: "Text here\n\n\n"
+
folded_keep: "Folded text keeps trailing\n\n\n"
+
literal_indent_2: " Two space indentation\n is preserved here\n Extra indent\n Back to two\n"
+
literal_indent_4: " Four space base indent\n Second line\n Extra indent\n Back to base\n"
+
folded_indent_2: " Text with two space\n base indentation that\n will be folded.\n"
+
folded_indent_3: " Three space indent\n for this folded\n text block.\n"
+
literal_indent_strip: " Indented by 2\n No trailing newlines"
+
folded_indent_strip: " Folded with indent\n and stripped end"
+
literal_indent_keep: " Indented by 2\n Keeps trailing newlines\n\n\n"
+
folded_indent_keep: " Folded indent 4\n keeps all trailing\n\n\n"
+
empty_literal: ''
+
empty_folded: ''
+
only_newlines_literal: ''
+
only_newlines_folded: ''
+
complex_literal: "First level\n Second level\n Third level\n Back to second\nBack to first\n\nNew paragraph\n With indent\n\nFinal paragraph\n"
+
complex_folded: "This paragraph flows together.\nThis is separate.\n This line starts more indented\n and continues.\n\nFinal thoughts here.\n"
+
special_chars_literal: "Special: @#$%^&*()\nQuotes: \"double\" 'single'\nBrackets: [array] {object}\nSymbols: | > & * ? : -\n"
+
special_chars_folded: "All special chars are literal in block scalars: []{}|>*&\n"
+
sequence_with_blocks:
+
+
Test JSON output for block scalars
+
+
$ yamlcat --json scalars_block.yml | grep -o '"literal_basic": "[^"]*"'
+
"literal_basic": "Line one\nLine two\nLine three\n"
+
+
$ yamlcat --json scalars_block.yml | grep -o '"folded_basic": "[^"]*"' | head -c 100
+
"folded_basic": "This is a long paragraph that will be folded into a single line with the newlines c
+
+
Verify strip chomping
+
+
$ yamlcat --json scalars_block.yml | grep -o '"literal_strip": "[^"]*"'
+
"literal_strip": "No trailing newline"
+
+
$ yamlcat --json scalars_block.yml | grep -o '"folded_strip": "[^"]*"'
+
"folded_strip": "Folded text with stripped trailing newlines"
+
+
Verify clip chomping (single newline)
+
+
$ yamlcat --json scalars_block.yml | grep -o '"literal_clip": "[^"]*"'
+
"literal_clip": "One trailing newline\n"
+
+
Verify keep chomping (all newlines)
+
+
$ yamlcat --json scalars_block.yml | grep -o '"literal_keep": "[^"]*"'
+
"literal_keep": "Keeps trailing newlines\n\n\n"
+
+
$ yamlcat --json scalars_block.yml | grep -o '"folded_keep": "[^"]*"'
+
"folded_keep": "Folded text keeps trailing\n\n\n"
+
+
Verify indentation handling
+
+
$ yamlcat --json scalars_block.yml | grep -o '"literal_indent_2": "[^"]*"'
+
"literal_indent_2": " Two space indentation\n is preserved here\n Extra indent\n Back to two\n"
+
+
Verify nested structures with block scalars
+
+
$ yamlcat scalars_block.yml | tail -10
+
special_chars_folded: "All special chars are literal in block scalars: []{}|>*&\n"
+
sequence_with_blocks:
+
- "First item\nliteral block\n"
+
- "Second item folded block\n"
+
- "Third item\nstripped"
+
- "Fourth item\nkept\n\n\n"
+
nested:
+
description: "This is a folded description that spans multiple lines.\n"
+
code: "def hello():\n print(\"Hello, World!\")\n return True\n"
+
notes: "Final notes\nwith stripped end"
+
+
================================================================================
+
SPECIAL CASES AND EDGE CASES
+
================================================================================
+
+
Empty block scalars
+
+
$ echo 'empty_literal: |' | yamlcat --json
+
{"empty_literal": ""}
+
+
$ echo 'empty_folded: >' | yamlcat --json
+
{"empty_folded": ""}
+
+
Block scalars with special characters (no escaping needed)
+
+
$ echo 'code: |
+
> Special: @#$%^&*()
+
> Quotes: "double" '"'"'single'"'"'
+
> Brackets: [array] {object}' | yamlcat --json
+
{"code": "Special: @#$%^&*()\nQuotes: \"double\" 'single'\nBrackets: [array] {object}\n"}
+
+
Plain scalar vs quoted string for special values
+
+
$ echo 'unquoted_true: true
+
> quoted_true: "true"' | yamlcat --json
+
{"unquoted_true": true, "quoted_true": "true"}
+
+
$ echo 'unquoted_null: null
+
> quoted_null: "null"' | yamlcat --json
+
{"unquoted_null": null, "quoted_null": "null"}
+
+
Strings that need quoting to preserve leading/trailing spaces
+
+
$ echo 'leading: " spaces"
+
> trailing: "spaces "
+
> both: " spaces "' | yamlcat --json
+
{"leading": " spaces", "trailing": "spaces ", "both": " spaces "}
+
+
Block scalars in sequences
+
+
$ echo 'items:
+
> - |
+
> First item
+
> multiline
+
> - >
+
> Second item
+
> folded' | yamlcat --json
+
{"items": ["First item\nmultiline\n", "Second item folded\n"]}
+
+
Block scalars in nested mappings
+
+
$ echo 'outer:
+
> inner:
+
> description: >
+
> This is a folded
+
> description.
+
> code: |
+
> def test():
+
> return True' | yamlcat --json
+
{"outer": {"inner": {"description": "This is a folded description.\n", "code": "def test():\n return True\n"}}}
+
+
Preserving indentation in literal blocks
+
+
$ echo 'code: |
+
> def hello():
+
> print("Hello")
+
> if True:
+
> return 42' | yamlcat --json
+
{"code": "def hello():\n print(\"Hello\")\n if True:\n return 42\n"}
+
+
Folded scalars preserve more-indented lines
+
+
$ echo 'text: >
+
> Normal paragraph
+
> continues here.
+
>
+
> Indented block
+
> preserved.
+
>
+
> Back to normal.' | yamlcat --json
+
{"text": "Normal paragraph continues here.\n\n Indented block\n preserved.\n\nBack to normal.\n"}
+192
tests/cram/scalars_block.yml
···
+
# Block scalars - literal and folded styles
+
---
+
# Literal style (|) - preserves newlines
+
literal_basic: |
+
Line one
+
Line two
+
Line three
+
+
literal_with_indent: |
+
First line
+
Indented line
+
More indented
+
Back to second level
+
Back to first level
+
+
# Folded style (>) - converts newlines to spaces
+
folded_basic: >
+
This is a long paragraph
+
that will be folded into
+
a single line with the
+
newlines converted to spaces.
+
+
folded_paragraph: >
+
First paragraph flows together
+
into a single line.
+
+
Second paragraph after blank line
+
also flows together.
+
+
# Chomping indicators
+
# Strip (-) - removes trailing newlines
+
literal_strip: |-
+
No trailing newline
+
+
+
literal_strip_multiple: |-
+
Text here
+
+
+
folded_strip: >-
+
Folded text
+
with stripped
+
trailing newlines
+
+
+
# Clip (default) - keeps single trailing newline
+
literal_clip: |
+
One trailing newline
+
+
+
literal_clip_explicit: |
+
This is the default behavior
+
+
+
folded_clip: >
+
Folded with one
+
trailing newline
+
+
+
# Keep (+) - preserves all trailing newlines
+
literal_keep: |+
+
Keeps trailing newlines
+
+
+
literal_keep_multiple: |+
+
Text here
+
+
+
folded_keep: >+
+
Folded text
+
keeps trailing
+
+
+
# Explicit indentation indicators
+
literal_indent_2: |2
+
Two space indentation
+
is preserved here
+
Extra indent
+
Back to two
+
+
literal_indent_4: |4
+
Four space base indent
+
Second line
+
Extra indent
+
Back to base
+
+
folded_indent_2: >2
+
Text with two space
+
base indentation that
+
will be folded.
+
+
folded_indent_3: >3
+
Three space indent
+
for this folded
+
text block.
+
+
# Combinations of indicators
+
literal_indent_strip: |2-
+
Indented by 2
+
No trailing newlines
+
+
+
folded_indent_strip: >3-
+
Folded with indent
+
and stripped end
+
+
+
literal_indent_keep: |2+
+
Indented by 2
+
Keeps trailing newlines
+
+
+
folded_indent_keep: >4+
+
Folded indent 4
+
keeps all trailing
+
+
+
# Empty block scalars
+
empty_literal: |
+
+
empty_folded: >
+
+
# Block scalar with only newlines
+
only_newlines_literal: |
+
+
+
only_newlines_folded: >
+
+
+
# Complex indentation patterns
+
complex_literal: |
+
First level
+
Second level
+
Third level
+
Back to second
+
Back to first
+
+
New paragraph
+
With indent
+
+
Final paragraph
+
+
complex_folded: >
+
This paragraph
+
flows together.
+
+
This is separate.
+
This line starts more indented
+
and continues.
+
+
Final thoughts here.
+
+
# Special characters in block scalars
+
special_chars_literal: |
+
Special: @#$%^&*()
+
Quotes: "double" 'single'
+
Brackets: [array] {object}
+
Symbols: | > & * ? : -
+
+
special_chars_folded: >
+
All special chars are literal
+
in block scalars: []{}|>*&
+
+
# Block scalars in sequences
+
sequence_with_blocks:
+
- |
+
First item
+
literal block
+
- >
+
Second item
+
folded block
+
- |-
+
Third item
+
stripped
+
- |+
+
Fourth item
+
kept
+
+
+
# Block scalars in nested mappings
+
nested:
+
description: >
+
This is a folded
+
description that spans
+
multiple lines.
+
code: |
+
def hello():
+
print("Hello, World!")
+
return True
+
notes: |-
+
Final notes
+
with stripped end
+60
tests/cram/scalars_plain.yml
···
+
# Plain scalars - no quotes needed
+
---
+
# Simple words
+
simple_word: hello
+
single_character: x
+
number_like: 123
+
boolean_like: true
+
null_like: null
+
+
# Multi-word values (no special meaning characters)
+
sentence: this is a plain scalar
+
phrase: plain scalars can have spaces
+
+
# Numbers and special values that remain strings in context
+
age: 42
+
pi: 3.14159
+
negative: -273
+
scientific: 1.23e-4
+
hex_like: 0x1A2B
+
octal_like: 0o755
+
+
# Special characters that are valid in plain scalars
+
with_colon: "value: with colon needs quotes in value"
+
with_comma: "commas, need quotes in flow context"
+
with_hash: "# needs quotes if starting with hash"
+
hyphen_start: "- needs quotes if starting like list"
+
question_start: "? needs quotes if starting like mapping key"
+
+
# Plain scalars with valid special characters
+
email: user@example.com
+
url: http://example.com/path
+
path: /usr/local/bin
+
ratio: 16:9
+
version: v1.2.3
+
+
# Multi-line plain scalars (line folding)
+
# Newlines become spaces, blank lines become newlines
+
folded_plain: This is a long
+
plain scalar that spans
+
multiple lines and will
+
be folded into a single line
+
with spaces.
+
+
another_folded: First paragraph
+
continues here and here.
+
+
Second paragraph after blank line.
+
Also continues.
+
+
# Trailing and leading spaces are trimmed in plain scalars
+
spaces_trimmed: value with spaces
+
+
# Plain scalars can contain most punctuation
+
punctuation: Hello, world! How are you? I'm fine.
+
symbols: $100 & 50% off @ store #1
+
math: 2+2=4 and 3*3=9
+
+
# Empty plain scalar (becomes null)
+
empty_implicit:
+
explicit_empty: ""
+81
tests/cram/scalars_quoted.yml
···
+
# Quoted scalars - single and double quoted strings
+
---
+
# Single-quoted strings
+
single_simple: 'hello world'
+
single_with_double: 'He said "hello"'
+
single_escaped_quote: 'It''s a single quote: ''example'''
+
single_multiline: 'This is a
+
multi-line single
+
quoted string'
+
+
# Double-quoted strings
+
double_simple: "hello world"
+
double_with_single: "It's easy"
+
double_escaped_quote: "She said \"hello\""
+
+
# Escape sequences in double-quoted strings
+
escaped_newline: "Line one\nLine two\nLine three"
+
escaped_tab: "Column1\tColumn2\tColumn3"
+
escaped_backslash: "Path: C:\\Users\\Name"
+
escaped_carriage: "Before\rAfter"
+
escaped_bell: "Bell\a"
+
escaped_backspace: "Back\b"
+
escaped_formfeed: "Form\f"
+
escaped_vertical: "Vertical\vtab"
+
+
# Unicode escapes
+
unicode_16bit: "Snowman: \u2603"
+
unicode_32bit: "Emoji: \U0001F600"
+
unicode_hex: "Null byte: \x00"
+
+
# Empty strings
+
empty_single: ''
+
empty_double: ""
+
+
# Strings that would be interpreted as other types if unquoted
+
string_true: "true"
+
string_false: "false"
+
string_null: "null"
+
string_number: "123"
+
string_float: "45.67"
+
string_octal: "0o755"
+
string_hex: "0xFF"
+
+
# Special YAML characters that need quoting
+
starts_with_at: "@username"
+
starts_with_backtick: "`command`"
+
starts_with_ampersand: "&reference"
+
starts_with_asterisk: "*alias"
+
starts_with_exclamation: "!tag"
+
starts_with_pipe: "|literal"
+
starts_with_gt: ">folded"
+
starts_with_percent: "%directive"
+
+
# Flow indicators that need quoting
+
with_brackets: "[not a list]"
+
with_braces: "{not: a map}"
+
with_comma: "a, b, c"
+
with_colon_space: "key: value"
+
+
# Quoted strings preserve leading/trailing whitespace
+
leading_space: " spaces before"
+
trailing_space: "spaces after "
+
both_spaces: " spaces both "
+
+
# Multi-line quoted strings
+
double_multiline: "This is a string
+
that spans multiple
+
lines with escaped newlines."
+
+
single_fold: 'This single quoted
+
string will fold
+
lines into spaces.'
+
+
# Complex escape sequences
+
complex_escapes: "Tab:\t Newline:\n Quote:\" Backslash:\\ Unicode:\u0041"
+
+
# Edge cases
+
only_spaces_single: ' '
+
only_spaces_double: " "
+
only_newlines: "\n\n\n"
+
mixed_quotes: "She said 'it''s a beautiful day'"
+5
tests/cram/seq.yml
···
+
- hello
+
- whats
+
- up
+
- foo
+
- bar
+2
tests/cram/simple_alias.yml
···
+
anchor: &anc hello
+
alias: *anc
+60
tests/cram/tags.t
···
+
Tag Support Tests
+
+
These tests verify YAML tag support including type coercion and
+
different tag formats.
+
+
Test: String tag shorthand
+
+
$ printf '!!str 123' | yamlcat
+
'123'
+
+
The !!str tag forces the value to be treated as a string.
+
+
Test: Integer tag shorthand
+
+
$ printf '!!int "42"' | yamlcat
+
42
+
+
The !!int tag coerces the quoted string to an integer.
+
+
Test: Boolean tag shorthand
+
+
$ printf '!!bool "yes"' | yamlcat
+
true
+
+
The !!bool tag coerces the string to a boolean.
+
+
Test: Null tag shorthand
+
+
$ printf '!!null ""' | yamlcat
+
null
+
+
The !!null tag coerces the value to null.
+
+
Test: Float tag shorthand
+
+
$ printf '!!float 3.14' | yamlcat
+
3.14
+
+
The !!float tag specifies a floating-point number.
+
+
Test: Tag shorthand in mapping value
+
+
$ printf 'value: !!str 42' | yamlcat
+
value: '42'
+
+
Tags work in mapping values and force type coercion.
+
+
Test: Local tags
+
+
$ printf '!local_tag value' | yamlcat
+
value
+
+
Local tags (single !) are treated as unknown and default to string type.
+
+
Test: Verbatim tags
+
+
$ printf '!<tag:example.com:type> value' | yamlcat
+
value
+
+
Verbatim tags (!<...>) are treated as unknown and default to string type.
+6
tests/cram/valid_alias.yml
···
+
defaults: &defaults
+
timeout: 30
+
retries: 3
+
production:
+
<<: *defaults
+
port: 8080
+85
tests/cram/valid_floats.t
···
+
Test valid YAML float number formats
+
+
This tests various real-world ways to write floating point numbers in YAML:
+
- Basic decimal notation (3.14, -0.5)
+
- Leading dot notation (.5 for 0.5)
+
- Scientific notation (1e10, 1E-10, 1.5e+10)
+
- Positive/negative signs
+
- Edge cases (very large, very small, negative zero)
+
+
$ yamlcat valid_floats.yml
+
basic_positive: 3.14
+
basic_negative: -3.14
+
basic_zero: 0
+
small_positive: 0.5
+
small_negative: -0.5
+
leading_dot: 0.5
+
leading_dot_negative: -0.5
+
leading_dot_positive: 0.5
+
leading_dot_long: 0.123457
+
trailing_zero: 1
+
trailing_zeros: 1
+
many_trailing: 3.14
+
sci_positive: 10000000000
+
sci_negative: 1e-10
+
sci_explicit_positive: 10000000000
+
sci_with_decimal: 15000000000
+
sci_small: 6.022e+23
+
sci_tiny: 1.6e-19
+
sci_upper: 10000000000
+
sci_upper_negative: 1e-10
+
sci_upper_explicit: 10000000000
+
sci_upper_decimal: 250000000
+
neg_sci: -10000000000
+
neg_sci_negative_exp: -1e-10
+
neg_sci_decimal: -314000
+
positive_sign: 3.14
+
positive_sci: 10000000000
+
positive_small: 0.001
+
very_large: 1e+100
+
very_small: 1e-100
+
large_decimal: 1e+09
+
small_decimal: 1e-09
+
negative_zero: -0
+
positive_zero: 0
+
one: 1
+
minus_one: -1
+
pi: 3.14159
+
euler: 2.71828
+
golden_ratio: 1.61803
+
planck: 6.62607e-34
+
avogadro: 6.02214e+23
+
speed_of_light: 299792458
+
gravitational_constant: 6.674e-11
+
coordinates:
+
latitude: 37.7749
+
longitude: -122.419
+
altitude: 16
+
prices:
+
item1: 19.99
+
item2: 0.99
+
discount: -5
+
tax_rate: 0.0825
+
measurements:
+
temperature_c: 23.5
+
temperature_f: 74.3
+
humidity: 0.65
+
pressure_hpa: 1013.25
+
float_sequence:
+
- 1
+
- -1
+
- 0.5
+
- -0.5
+
- 10000000000
+
- 1e-10
+
- 0.5
+
- -0.5
+
integers:
+
simple: 42
+
negative: -17
+
zero: 0
+
hex: 26
+
octal: 12
+
+
$ yamlcat --json valid_floats.yml
+
{"basic_positive": 3.14, "basic_negative": -3.14, "basic_zero": 0, "small_positive": 0.5, "small_negative": -0.5, "leading_dot": 0.5, "leading_dot_negative": -0.5, "leading_dot_positive": 0.5, "leading_dot_long": 0.123457, "trailing_zero": 1, "trailing_zeros": 1, "many_trailing": 3.14, "sci_positive": 10000000000, "sci_negative": 1e-10, "sci_explicit_positive": 10000000000, "sci_with_decimal": 15000000000, "sci_small": 6.022e+23, "sci_tiny": 1.6e-19, "sci_upper": 10000000000, "sci_upper_negative": 1e-10, "sci_upper_explicit": 10000000000, "sci_upper_decimal": 250000000, "neg_sci": -10000000000, "neg_sci_negative_exp": -1e-10, "neg_sci_decimal": -314000, "positive_sign": 3.14, "positive_sci": 10000000000, "positive_small": 0.001, "very_large": 1e+100, "very_small": 1e-100, "large_decimal": 1e+09, "small_decimal": 1e-09, "negative_zero": -0, "positive_zero": 0, "one": 1, "minus_one": -1, "pi": 3.14159, "euler": 2.71828, "golden_ratio": 1.61803, "planck": 6.62607e-34, "avogadro": 6.02214e+23, "speed_of_light": 299792458, "gravitational_constant": 6.674e-11, "coordinates": {"latitude": 37.7749, "longitude": -122.419, "altitude": 16}, "prices": {"item1": 19.99, "item2": 0.99, "discount": -5, "tax_rate": 0.0825}, "measurements": {"temperature_c": 23.5, "temperature_f": 74.3, "humidity": 0.65, "pressure_hpa": 1013.25}, "float_sequence": [1, -1, 0.5, -0.5, 10000000000, 1e-10, 0.5, -0.5], "integers": {"simple": 42, "negative": -17, "zero": 0, "hex": 26, "octal": 12}}
+103
tests/cram/valid_floats.yml
···
+
# Test valid YAML float number formats
+
+
# Basic decimal floats
+
basic_positive: 3.14
+
basic_negative: -3.14
+
basic_zero: 0.0
+
small_positive: 0.5
+
small_negative: -0.5
+
+
# Leading dot (no leading zero)
+
leading_dot: .5
+
leading_dot_negative: -.5
+
leading_dot_positive: +.5
+
leading_dot_long: .123456789
+
+
# Trailing zeros
+
trailing_zero: 1.0
+
trailing_zeros: 1.00
+
many_trailing: 3.14000
+
+
# Scientific notation - lowercase e
+
sci_positive: 1e10
+
sci_negative: 1e-10
+
sci_explicit_positive: 1e+10
+
sci_with_decimal: 1.5e10
+
sci_small: 6.022e23
+
sci_tiny: 1.6e-19
+
+
# Scientific notation - uppercase E
+
sci_upper: 1E10
+
sci_upper_negative: 1E-10
+
sci_upper_explicit: 1E+10
+
sci_upper_decimal: 2.5E8
+
+
# Negative numbers with scientific notation
+
neg_sci: -1e10
+
neg_sci_negative_exp: -1e-10
+
neg_sci_decimal: -3.14e5
+
+
# Positive sign
+
positive_sign: +3.14
+
positive_sci: +1e10
+
positive_small: +0.001
+
+
# Large and small values
+
very_large: 1e100
+
very_small: 1e-100
+
large_decimal: 999999999.999999
+
small_decimal: 0.000000001
+
+
# Edge cases
+
negative_zero: -0.0
+
positive_zero: +0.0
+
one: 1.0
+
minus_one: -1.0
+
+
# Real-world examples
+
pi: 3.14159265359
+
euler: 2.71828182845
+
golden_ratio: 1.61803398875
+
planck: 6.62607015e-34
+
avogadro: 6.02214076e23
+
speed_of_light: 299792458.0
+
gravitational_constant: 6.674e-11
+
+
# Coordinates example
+
coordinates:
+
latitude: 37.7749
+
longitude: -122.4194
+
altitude: 16.0
+
+
# Financial examples
+
prices:
+
item1: 19.99
+
item2: 0.99
+
discount: -5.00
+
tax_rate: 0.0825
+
+
# Measurements
+
measurements:
+
temperature_c: 23.5
+
temperature_f: 74.3
+
humidity: 0.65
+
pressure_hpa: 1013.25
+
+
# Mixed in sequences
+
float_sequence:
+
- 1.0
+
- -1.0
+
- 0.5
+
- -0.5
+
- 1e10
+
- 1e-10
+
- .5
+
- -.5
+
+
# Integers that should stay as integers (for comparison)
+
integers:
+
simple: 42
+
negative: -17
+
zero: 0
+
hex: 0x1A
+
octal: 0o14
+286
tests/cram/values.t
···
+
Test YAML null values from values_null.yml
+
+
$ yamlcat values_null.yml
+
explicit_null: null
+
tilde_null: null
+
empty_null: null
+
flow_null:
+
- null
+
- null
+
sequence_nulls:
+
- null
+
- null
+
- null
+
- explicit: null
+
- tilde: null
+
- empty: null
+
mapping_nulls:
+
key1: null
+
key2: null
+
key3: null
+
'null': null key with string value
+
'~': tilde key with string value
+
nested:
+
level1:
+
null_value: null
+
tilde_value: null
+
empty_value: null
+
list:
+
- null
+
- null
+
- null
+
- some_value
+
map:
+
a: null
+
b: null
+
c: null
+
string_nulls:
+
quoted_null: 'null'
+
quoted_tilde: '~'
+
null_in_string: this is null
+
word_null: 'null'
+
+
$ yamlcat --json values_null.yml
+
{"explicit_null": null, "tilde_null": null, "empty_null": null, "flow_null": [null, null], "sequence_nulls": [null, null, null, {"explicit": null}, {"tilde": null}, {"empty": null}], "mapping_nulls": {"key1": null, "key2": null, "key3": null}, "null": "null key with string value", "~": "tilde key with string value", "nested": {"level1": {"null_value": null, "tilde_value": null, "empty_value": null, "list": [null, null, null, "some_value"], "map": {"a": null, "b": null, "c": null}}}, "string_nulls": {"quoted_null": "null", "quoted_tilde": "~", "null_in_string": "this is null", "word_null": "null"}}
+
+
Test YAML boolean values from values_bool.yml
+
+
$ yamlcat values_bool.yml
+
bool_true: true
+
bool_false: false
+
capitalized_true: true
+
capitalized_false: false
+
yes_value: true
+
no_value: false
+
Yes_value: true
+
No_value: false
+
YES_value: true
+
NO_value: false
+
on_value: true
+
off_value: false
+
On_value: true
+
Off_value: false
+
ON_value: true
+
OFF_value: false
+
bool_sequence:
+
- true
+
- false
+
- true
+
- false
+
- true
+
- false
+
flow_bools:
+
- true
+
- false
+
- true
+
- false
+
bool_mapping:
+
active: true
+
disabled: false
+
enabled: true
+
stopped: false
+
quoted_bools:
+
quoted_true: 'true'
+
quoted_false: 'false'
+
quoted_yes: 'yes'
+
quoted_no: 'no'
+
single_true: 'true'
+
single_false: 'false'
+
nested_bools:
+
settings:
+
debug: true
+
verbose: false
+
legacy_yes: true
+
legacy_no: false
+
flags:
+
- true
+
- false
+
- true
+
- false
+
mixed_case:
+
'TRUE': true
+
'FALSE': false
+
'TrUe': true
+
'FaLsE': false
+
bool_like_strings:
+
truthy: truely
+
falsy: falsetto
+
yes_sir: yessir
+
no_way: noway
+
+
$ yamlcat --json values_bool.yml
+
{"bool_true": true, "bool_false": false, "capitalized_true": true, "capitalized_false": false, "yes_value": true, "no_value": false, "Yes_value": true, "No_value": false, "YES_value": true, "NO_value": false, "on_value": true, "off_value": false, "On_value": true, "Off_value": false, "ON_value": true, "OFF_value": false, "bool_sequence": [true, false, true, false, true, false], "flow_bools": [true, false, true, false], "bool_mapping": {"active": true, "disabled": false, "enabled": true, "stopped": false}, "quoted_bools": {"quoted_true": "true", "quoted_false": "false", "quoted_yes": "yes", "quoted_no": "no", "single_true": "true", "single_false": "false"}, "nested_bools": {"settings": {"debug": true, "verbose": false, "legacy_yes": true, "legacy_no": false}, "flags": [true, false, true, false]}, "mixed_case": {"TRUE": true, "FALSE": false, "TrUe": true, "FaLsE": false}, "bool_like_strings": {"truthy": "truely", "falsy": "falsetto", "yes_sir": "yessir", "no_way": "noway"}}
+
+
Test YAML number values from values_numbers.yml
+
+
$ yamlcat values_numbers.yml
+
int_zero: 0
+
int_positive: 42
+
int_negative: -17
+
int_large: 1000000
+
int_with_underscores: 1000000
+
octal_value: 12
+
octal_zero: 0
+
octal_large: 511
+
hex_lowercase: 26
+
hex_uppercase: 26
+
hex_mixed: 3735928559
+
hex_zero: 0
+
float_simple: 3.14
+
float_negative: -0.5
+
float_zero: 0
+
float_leading_dot: 0.5
+
float_trailing_zero: 1
+
scientific_positive: 10000000000
+
scientific_negative: 0.0015
+
scientific_uppercase: 250
+
scientific_no_sign: 300000
+
positive_infinity: .inf
+
negative_infinity: -.inf
+
not_a_number: .nan
+
infinity_upper: .inf
+
infinity_caps: .inf
+
nan_upper: .nan
+
nan_caps: .nan
+
number_sequence:
+
- 0
+
- 42
+
- -17
+
- 3.14
+
- 10000000000
+
- .inf
+
- .nan
+
flow_numbers:
+
- 0
+
- 42
+
- -17
+
- 3.14
+
- 26
+
- 12
+
number_mapping:
+
count: 100
+
price: 19.99
+
discount: -5
+
hex_color: 16734003
+
octal_perms: 493
+
scientific: 6.022e+23
+
quoted_numbers:
+
string_int: '42'
+
string_float: '3.14'
+
string_hex: '0x1A'
+
string_octal: 0o14
+
string_inf: '.inf'
+
string_nan: '.nan'
+
numeric_strings:
+
phone: 555-1234
+
version: 1.2.3
+
code: 123
+
leading_zero: 7
+
plus_sign: 123
+
edge_cases:
+
min_int: -9.22337e+18
+
max_int: 9.22337e+18
+
very_small: 1e-100
+
very_large: 1e+100
+
negative_zero: -0
+
positive_zero: 0
+
nested_numbers:
+
coordinates:
+
x: 10.5
+
y: -20.3
+
z: 0
+
measurements:
+
- 1.1
+
- 2.2
+
- 3.3
+
stats:
+
count: 1000
+
average: 45.67
+
max: .inf
+
min: -.inf
+
legacy_octal: 14
+
binary_like: 10
+
format_tests:
+
no_decimal: 42
+
with_decimal: 42
+
leading_zero_decimal: 0.42
+
no_leading_digit: 0.42
+
trailing_decimal: 42
+
+
$ yamlcat --json values_numbers.yml
+
{"int_zero": 0, "int_positive": 42, "int_negative": -17, "int_large": 1000000, "int_with_underscores": 1000000, "octal_value": 12, "octal_zero": 0, "octal_large": 511, "hex_lowercase": 26, "hex_uppercase": 26, "hex_mixed": 3735928559, "hex_zero": 0, "float_simple": 3.14, "float_negative": -0.5, "float_zero": 0, "float_leading_dot": 0.5, "float_trailing_zero": 1, "scientific_positive": 10000000000, "scientific_negative": 0.0015, "scientific_uppercase": 250, "scientific_no_sign": 300000, "positive_infinity": inf, "negative_infinity": -inf, "not_a_number": nan, "infinity_upper": inf, "infinity_caps": inf, "nan_upper": nan, "nan_caps": nan, "number_sequence": [0, 42, -17, 3.14, 10000000000, inf, nan], "flow_numbers": [0, 42, -17, 3.14, 26, 12], "number_mapping": {"count": 100, "price": 19.99, "discount": -5, "hex_color": 16734003, "octal_perms": 493, "scientific": 6.022e+23}, "quoted_numbers": {"string_int": "42", "string_float": "3.14", "string_hex": "0x1A", "string_octal": "0o14", "string_inf": ".inf", "string_nan": ".nan"}, "numeric_strings": {"phone": "555-1234", "version": "1.2.3", "code": 123, "leading_zero": 7, "plus_sign": 123}, "edge_cases": {"min_int": -9.22337e+18, "max_int": 9.22337e+18, "very_small": 1e-100, "very_large": 1e+100, "negative_zero": -0, "positive_zero": 0}, "nested_numbers": {"coordinates": {"x": 10.5, "y": -20.3, "z": 0}, "measurements": [1.1, 2.2, 3.3], "stats": {"count": 1000, "average": 45.67, "max": inf, "min": -inf}}, "legacy_octal": 14, "binary_like": 10, "format_tests": {"no_decimal": 42, "with_decimal": 42, "leading_zero_decimal": 0.42, "no_leading_digit": 0.42, "trailing_decimal": 42}}
+
+
Test YAML timestamp values from values_timestamps.yml
+
+
$ yamlcat values_timestamps.yml
+
date_simple: 2001-12-15
+
date_earliest: 1970-01-01
+
date_leap_year: 2020-02-29
+
date_current: 2025-12-04
+
datetime_utc: '2001-12-15T02:59:43.1Z'
+
datetime_utc_full: '2001-12-15T02:59:43.123456Z'
+
datetime_utc_no_frac: '2001-12-15T02:59:43Z'
+
datetime_offset_pos: '2001-12-15T02:59:43.1+05:30'
+
datetime_offset_neg: '2001-12-15T02:59:43.1-05:00'
+
datetime_offset_hours: '2001-12-15T02:59:43+05'
+
datetime_spaced: '2001-12-14 21:59:43.10 -5'
+
datetime_spaced_utc: '2001-12-15 02:59:43.1 Z'
+
datetime_spaced_offset: '2001-12-14 21:59:43.10 -05:00'
+
datetime_no_frac: '2001-12-15T14:30:00Z'
+
date_only: 2001-12-15
+
timestamp_formats:
+
iso_date: 2001-12-15
+
iso_datetime_z: '2001-12-15T02:59:43Z'
+
iso_datetime_offset: '2001-12-15T02:59:43+00:00'
+
spaced_datetime: '2001-12-14 21:59:43.10 -5'
+
canonical: '2001-12-15T02:59:43.1Z'
+
timestamp_sequence:
+
- 2001-12-15
+
- '2001-12-15T02:59:43.1Z'
+
- '2001-12-14 21:59:43.10 -5'
+
- '2025-01-01T00:00:00Z'
+
events:
+
created: '2001-12-15T02:59:43.1Z'
+
modified: '2001-12-16T10:30:00Z'
+
published: '2001-12-14 21:59:43.10 -5'
+
quoted_timestamps:
+
string_date: 2001-12-15
+
string_datetime: '2001-12-15T02:59:43.1Z'
+
string_spaced: '2001-12-14 21:59:43.10 -5'
+
edge_cases:
+
midnight: '2001-12-15T00:00:00Z'
+
end_of_day: '2001-12-15T23:59:59Z'
+
microseconds: '2001-12-15T02:59:43.123456Z'
+
no_seconds: '2001-12-15T02:59Z'
+
hour_only: 2001-12-15T02Z
+
nested_timestamps:
+
project:
+
start_date: 2001-12-15
+
milestones:
+
- date: 2001-12-20
+
time: '2001-12-20T14:00:00Z'
+
- date: 2002-01-15
+
time: '2002-01-15T09:30:00-05:00'
+
metadata:
+
created: '2001-12-14 21:59:43.10 -5'
+
updated: '2001-12-15T02:59:43.1Z'
+
invalid_timestamps:
+
bad_date: 2001-13-45
+
bad_time: '2001-12-15T25:99:99Z'
+
incomplete: 2001-12
+
no_leading_zero: 2001-1-5
+
timezones:
+
utc_z: '2001-12-15T02:59:43Z'
+
utc_offset: '2001-12-15T02:59:43+00:00'
+
est: '2001-12-14T21:59:43-05:00'
+
ist: '2001-12-15T08:29:43+05:30'
+
jst: '2001-12-15T11:59:43+09:00'
+
date_range:
+
past: 1900-01-01
+
unix_epoch: '1970-01-01T00:00:00Z'
+
y2k: '2000-01-01T00:00:00Z'
+
present: 2025-12-04
+
future: '2099-12-31T23:59:59Z'
+
+
$ yamlcat --json values_timestamps.yml
+
{"date_simple": "2001-12-15", "date_earliest": "1970-01-01", "date_leap_year": "2020-02-29", "date_current": "2025-12-04", "datetime_utc": "2001-12-15T02:59:43.1Z", "datetime_utc_full": "2001-12-15T02:59:43.123456Z", "datetime_utc_no_frac": "2001-12-15T02:59:43Z", "datetime_offset_pos": "2001-12-15T02:59:43.1+05:30", "datetime_offset_neg": "2001-12-15T02:59:43.1-05:00", "datetime_offset_hours": "2001-12-15T02:59:43+05", "datetime_spaced": "2001-12-14 21:59:43.10 -5", "datetime_spaced_utc": "2001-12-15 02:59:43.1 Z", "datetime_spaced_offset": "2001-12-14 21:59:43.10 -05:00", "datetime_no_frac": "2001-12-15T14:30:00Z", "date_only": "2001-12-15", "timestamp_formats": {"iso_date": "2001-12-15", "iso_datetime_z": "2001-12-15T02:59:43Z", "iso_datetime_offset": "2001-12-15T02:59:43+00:00", "spaced_datetime": "2001-12-14 21:59:43.10 -5", "canonical": "2001-12-15T02:59:43.1Z"}, "timestamp_sequence": ["2001-12-15", "2001-12-15T02:59:43.1Z", "2001-12-14 21:59:43.10 -5", "2025-01-01T00:00:00Z"], "events": {"created": "2001-12-15T02:59:43.1Z", "modified": "2001-12-16T10:30:00Z", "published": "2001-12-14 21:59:43.10 -5"}, "quoted_timestamps": {"string_date": "2001-12-15", "string_datetime": "2001-12-15T02:59:43.1Z", "string_spaced": "2001-12-14 21:59:43.10 -5"}, "edge_cases": {"midnight": "2001-12-15T00:00:00Z", "end_of_day": "2001-12-15T23:59:59Z", "microseconds": "2001-12-15T02:59:43.123456Z", "no_seconds": "2001-12-15T02:59Z", "hour_only": "2001-12-15T02Z"}, "nested_timestamps": {"project": {"start_date": "2001-12-15", "milestones": [{"date": "2001-12-20", "time": "2001-12-20T14:00:00Z"}, {"date": "2002-01-15", "time": "2002-01-15T09:30:00-05:00"}], "metadata": {"created": "2001-12-14 21:59:43.10 -5", "updated": "2001-12-15T02:59:43.1Z"}}}, "invalid_timestamps": {"bad_date": "2001-13-45", "bad_time": "2001-12-15T25:99:99Z", "incomplete": "2001-12", "no_leading_zero": "2001-1-5"}, "timezones": {"utc_z": "2001-12-15T02:59:43Z", "utc_offset": "2001-12-15T02:59:43+00:00", "est": "2001-12-14T21:59:43-05:00", "ist": "2001-12-15T08:29:43+05:30", "jst": "2001-12-15T11:59:43+09:00"}, "date_range": {"past": "1900-01-01", "unix_epoch": "1970-01-01T00:00:00Z", "y2k": "2000-01-01T00:00:00Z", "present": "2025-12-04", "future": "2099-12-31T23:59:59Z"}}
+82
tests/cram/values_bool.yml
···
+
# Boolean value test cases for YAML 1.2
+
# Note: YAML 1.2 only recognizes 'true' and 'false' as booleans
+
# Other values like yes/no, on/off are treated as strings in 1.2
+
+
# Standard YAML 1.2 booleans (lowercase)
+
bool_true: true
+
bool_false: false
+
+
# Capitalized forms (should be strings in YAML 1.2)
+
capitalized_true: True
+
capitalized_false: False
+
+
# YAML 1.1 style booleans (should be strings in YAML 1.2)
+
yes_value: yes
+
no_value: no
+
Yes_value: Yes
+
No_value: No
+
YES_value: YES
+
NO_value: NO
+
+
# On/Off style (should be strings in YAML 1.2)
+
on_value: on
+
off_value: off
+
On_value: On
+
Off_value: Off
+
ON_value: ON
+
OFF_value: OFF
+
+
# Booleans in sequences
+
bool_sequence:
+
- true
+
- false
+
- yes
+
- no
+
- on
+
- off
+
+
# Booleans in flow style
+
flow_bools: [true, false, yes, no]
+
+
# Booleans in mappings
+
bool_mapping:
+
active: true
+
disabled: false
+
enabled: yes
+
stopped: no
+
+
# String literals that should NOT be parsed as booleans
+
quoted_bools:
+
quoted_true: "true"
+
quoted_false: "false"
+
quoted_yes: "yes"
+
quoted_no: "no"
+
single_true: 'true'
+
single_false: 'false'
+
+
# Nested boolean values
+
nested_bools:
+
settings:
+
debug: true
+
verbose: false
+
legacy_yes: yes
+
legacy_no: no
+
flags:
+
- true
+
- false
+
- on
+
- off
+
+
# Mixed case variations
+
mixed_case:
+
TRUE: TRUE
+
FALSE: FALSE
+
TrUe: TrUe
+
FaLsE: FaLsE
+
+
# Boolean-like strings that should remain strings
+
bool_like_strings:
+
truthy: truely
+
falsy: falsetto
+
yes_sir: yessir
+
no_way: noway
+55
tests/cram/values_null.yml
···
+
# Null value test cases for YAML 1.2
+
+
# Explicit null keyword
+
explicit_null: null
+
+
# Tilde shorthand for null
+
tilde_null: ~
+
+
# Empty value (implicit null)
+
empty_null:
+
+
# Null in flow style
+
flow_null: [null, ~, ]
+
+
# Null in sequences
+
sequence_nulls:
+
- null
+
- ~
+
-
+
- explicit: null
+
- tilde: ~
+
- empty:
+
+
# Null in mappings
+
mapping_nulls:
+
key1: null
+
key2: ~
+
key3:
+
+
# Null as key
+
null: "null key with string value"
+
~: "tilde key with string value"
+
+
# Mixed null values in nested structures
+
nested:
+
level1:
+
null_value: null
+
tilde_value: ~
+
empty_value:
+
list:
+
- null
+
- ~
+
-
+
- some_value
+
map:
+
a: null
+
b: ~
+
c:
+
+
# String literals that contain "null" (should NOT be parsed as null)
+
string_nulls:
+
quoted_null: "null"
+
quoted_tilde: "~"
+
null_in_string: "this is null"
+
word_null: 'null'
+120
tests/cram/values_numbers.yml
···
+
# Numeric value test cases for YAML 1.2
+
+
# Integers
+
int_zero: 0
+
int_positive: 42
+
int_negative: -17
+
int_large: 1000000
+
int_with_underscores: 1_000_000
+
+
# Octal notation (YAML 1.2 style with 0o prefix)
+
octal_value: 0o14
+
octal_zero: 0o0
+
octal_large: 0o777
+
+
# Hexadecimal notation
+
hex_lowercase: 0x1a
+
hex_uppercase: 0x1A
+
hex_mixed: 0xDeadBeef
+
hex_zero: 0x0
+
+
# Floating point numbers
+
float_simple: 3.14
+
float_negative: -0.5
+
float_zero: 0.0
+
float_leading_dot: .5
+
float_trailing_zero: 1.0
+
+
# Scientific notation
+
scientific_positive: 1.0e10
+
scientific_negative: 1.5e-3
+
scientific_uppercase: 2.5E+2
+
scientific_no_sign: 3.0e5
+
+
# Special floating point values
+
positive_infinity: .inf
+
negative_infinity: -.inf
+
not_a_number: .nan
+
infinity_upper: .Inf
+
infinity_caps: .INF
+
nan_upper: .NaN
+
nan_caps: .NAN
+
+
# Numbers in sequences
+
number_sequence:
+
- 0
+
- 42
+
- -17
+
- 3.14
+
- 1.0e10
+
- .inf
+
- .nan
+
+
# Numbers in flow style
+
flow_numbers: [0, 42, -17, 3.14, 0x1A, 0o14]
+
+
# Numbers in mappings
+
number_mapping:
+
count: 100
+
price: 19.99
+
discount: -5.0
+
hex_color: 0xFF5733
+
octal_perms: 0o755
+
scientific: 6.022e23
+
+
# String literals that look like numbers (quoted)
+
quoted_numbers:
+
string_int: "42"
+
string_float: "3.14"
+
string_hex: "0x1A"
+
string_octal: "0o14"
+
string_inf: ".inf"
+
string_nan: ".nan"
+
+
# Numeric strings that should remain strings
+
numeric_strings:
+
phone: 555-1234
+
version: 1.2.3
+
code: 00123
+
leading_zero: 007
+
plus_sign: +123
+
+
# Edge cases
+
edge_cases:
+
min_int: -9223372036854775808
+
max_int: 9223372036854775807
+
very_small: 1.0e-100
+
very_large: 1.0e100
+
negative_zero: -0.0
+
positive_zero: +0.0
+
+
# Nested numeric values
+
nested_numbers:
+
coordinates:
+
x: 10.5
+
y: -20.3
+
z: 0.0
+
measurements:
+
- 1.1
+
- 2.2
+
- 3.3
+
stats:
+
count: 1000
+
average: 45.67
+
max: .inf
+
min: -.inf
+
+
# YAML 1.1 style octals (no 0o prefix) - should be strings in YAML 1.2
+
legacy_octal: 014
+
+
# Binary notation (not part of YAML 1.2 core, but sometimes supported)
+
# These should be treated as strings in strict YAML 1.2
+
binary_like: 0b1010
+
+
# Numbers with various formats
+
format_tests:
+
no_decimal: 42
+
with_decimal: 42.0
+
leading_zero_decimal: 0.42
+
no_leading_digit: .42
+
trailing_decimal: 42.
+101
tests/cram/values_timestamps.yml
···
+
# Timestamp value test cases for YAML 1.1
+
# Note: YAML 1.2 does not have a timestamp type in the core schema
+
# These are recognized in YAML 1.1 and some extended schemas
+
+
# ISO 8601 date format (YYYY-MM-DD)
+
date_simple: 2001-12-15
+
date_earliest: 1970-01-01
+
date_leap_year: 2020-02-29
+
date_current: 2025-12-04
+
+
# ISO 8601 datetime with timezone (UTC)
+
datetime_utc: 2001-12-15T02:59:43.1Z
+
datetime_utc_full: 2001-12-15T02:59:43.123456Z
+
datetime_utc_no_frac: 2001-12-15T02:59:43Z
+
+
# ISO 8601 datetime with timezone offset
+
datetime_offset_pos: 2001-12-15T02:59:43.1+05:30
+
datetime_offset_neg: 2001-12-15T02:59:43.1-05:00
+
datetime_offset_hours: 2001-12-15T02:59:43+05
+
+
# Spaced datetime format (YAML 1.1 style)
+
datetime_spaced: 2001-12-14 21:59:43.10 -5
+
datetime_spaced_utc: 2001-12-15 02:59:43.1 Z
+
datetime_spaced_offset: 2001-12-14 21:59:43.10 -05:00
+
+
# Datetime without fractional seconds
+
datetime_no_frac: 2001-12-15T14:30:00Z
+
+
# Date only (no time component)
+
date_only: 2001-12-15
+
+
# Various formats
+
timestamp_formats:
+
iso_date: 2001-12-15
+
iso_datetime_z: 2001-12-15T02:59:43Z
+
iso_datetime_offset: 2001-12-15T02:59:43+00:00
+
spaced_datetime: 2001-12-14 21:59:43.10 -5
+
canonical: 2001-12-15T02:59:43.1Z
+
+
# Timestamps in sequences
+
timestamp_sequence:
+
- 2001-12-15
+
- 2001-12-15T02:59:43.1Z
+
- 2001-12-14 21:59:43.10 -5
+
- 2025-01-01T00:00:00Z
+
+
# Timestamps in mappings
+
events:
+
created: 2001-12-15T02:59:43.1Z
+
modified: 2001-12-16T10:30:00Z
+
published: 2001-12-14 21:59:43.10 -5
+
+
# String literals that look like timestamps (quoted)
+
quoted_timestamps:
+
string_date: "2001-12-15"
+
string_datetime: "2001-12-15T02:59:43.1Z"
+
string_spaced: "2001-12-14 21:59:43.10 -5"
+
+
# Edge cases and variations
+
edge_cases:
+
midnight: 2001-12-15T00:00:00Z
+
end_of_day: 2001-12-15T23:59:59Z
+
microseconds: 2001-12-15T02:59:43.123456Z
+
no_seconds: 2001-12-15T02:59Z
+
hour_only: 2001-12-15T02Z
+
+
# Nested timestamp values
+
nested_timestamps:
+
project:
+
start_date: 2001-12-15
+
milestones:
+
- date: 2001-12-20
+
time: 2001-12-20T14:00:00Z
+
- date: 2002-01-15
+
time: 2002-01-15T09:30:00-05:00
+
metadata:
+
created: 2001-12-14 21:59:43.10 -5
+
updated: 2001-12-15T02:59:43.1Z
+
+
# Invalid timestamp formats (should be treated as strings)
+
invalid_timestamps:
+
bad_date: 2001-13-45
+
bad_time: 2001-12-15T25:99:99Z
+
incomplete: 2001-12
+
no_leading_zero: 2001-1-5
+
+
# Different timezone representations
+
timezones:
+
utc_z: 2001-12-15T02:59:43Z
+
utc_offset: 2001-12-15T02:59:43+00:00
+
est: 2001-12-14T21:59:43-05:00
+
ist: 2001-12-15T08:29:43+05:30
+
jst: 2001-12-15T11:59:43+09:00
+
+
# Historical and future dates
+
date_range:
+
past: 1900-01-01
+
unix_epoch: 1970-01-01T00:00:00Z
+
y2k: 2000-01-01T00:00:00Z
+
present: 2025-12-04
+
future: 2099-12-31T23:59:59Z
+105
tests/cram/whitespace.yml
···
+
# Whitespace handling test file
+
+
# Section 1: Different indentation levels (2 spaces)
+
two_space_indent:
+
level1:
+
level2:
+
level3: value
+
+
# Section 2: Four space indentation
+
four_space_indent:
+
level1:
+
level2:
+
level3: value
+
+
# Section 3: Mixed content with blank lines
+
+
first_key: first_value
+
+
+
second_key: second_value
+
+
+
+
third_key: third_value
+
+
# Section 4: Sequences with varying indentation
+
sequence_2space:
+
- item1
+
- item2
+
- nested:
+
- nested_item1
+
- nested_item2
+
+
sequence_4space:
+
- item1
+
- item2
+
- nested:
+
- nested_item1
+
- nested_item2
+
+
# Section 5: Trailing whitespace (spaces after values - invisible but present)
+
trailing_spaces: value
+
another_key: another_value
+
+
# Section 6: Leading whitespace preservation in literals
+
literal_block: |
+
This is a literal block
+
with preserved indentation
+
including extra spaces
+
and blank lines
+
+
like this one above
+
+
folded_block: >
+
This is a folded block
+
that will be folded into
+
a single line but preserves
+
+
paragraph breaks like above
+
+
# Section 7: Whitespace in flow collections
+
flow_with_spaces: [ item1 , item2 , item3 ]
+
flow_tight: [item1,item2,item3]
+
flow_map_spaces: { key1: value1 , key2: value2 }
+
flow_map_tight: {key1:value1,key2:value2}
+
+
# Section 8: Multiple consecutive blank lines between top-level keys
+
key_before_blanks: value1
+
+
+
+
+
key_after_blanks: value2
+
+
# Section 9: Indentation in mappings
+
mapping_indent:
+
key1: value1
+
key2: value2
+
nested:
+
nested_key1: nested_value1
+
nested_key2: nested_value2
+
deep_nested:
+
deep_key: deep_value
+
+
# Section 10: Whitespace around colons and hyphens
+
no_space_colon:value
+
space_after_colon: value
+
spaces_around: value
+
- sequence_item_no_space
+
- nested_sequence
+
+
# Section 11: Empty lines in sequences
+
sequence_with_blanks:
+
- item1
+
+
- item2
+
+
- item3
+
+
# Section 12: Whitespace-only mapping values (implicit null)
+
explicit_null: null
+
implicit_null:
+
space_only:
+
+
# End of whitespace test file
+3
tests/cram/yaml-1.2.yml
···
+
- {"when the key is quoted":"space after colon can be omitted."}
+
- "quoted slashes \/ are allowed."
+
- {?"a key can be looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooger": "than 1024 when parsing is unambiguous before seeing the colon."}
+161
tests/cram/yamlcat.t
···
+
Test yamlcat with simple YAML
+
+
$ echo 'hello: world' | yamlcat
+
hello: world
+
+
$ echo 'name: Alice
+
> age: 30' | yamlcat
+
name: Alice
+
age: 30
+
+
Test nested mappings
+
+
$ echo 'server:
+
> host: localhost
+
> port: 8080
+
> database:
+
> name: mydb' | yamlcat
+
server:
+
host: localhost
+
port: 8080
+
database:
+
name: mydb
+
+
Test sequences
+
+
$ echo '- apple
+
> - banana
+
> - cherry' | yamlcat
+
- apple
+
- banana
+
- cherry
+
+
Test mapping with sequence value
+
+
$ echo 'fruits:
+
> - apple
+
> - banana' | yamlcat
+
fruits:
+
- apple
+
- banana
+
+
Test flow style output
+
+
$ echo 'name: Alice
+
> hobbies:
+
> - reading
+
> - coding' | yamlcat --flow
+
{name: Alice, hobbies: [reading, coding]}
+
+
Test JSON output
+
+
$ echo 'name: Alice
+
> age: 30' | yamlcat --json
+
{"name": "Alice", "age": 30}
+
+
Test seq.yml file (multiline plain scalar)
+
+
$ yamlcat seq.yml
+
- hello - whats - up
+
- foo
+
- bar
+
+
Test seq.yml roundtrip preserves data
+
+
$ yamlcat --json seq.yml
+
["hello - whats - up", "foo", "bar"]
+
+
Test cohttp.yml
+
+
$ yamlcat cohttp.yml
+
language: c
+
sudo: false
+
services:
+
- docker
+
install: 'wget https://raw.githubusercontent.com/ocaml/ocaml-travisci-skeleton/master/.travis-docker.sh'
+
script: bash -ex ./.travis-docker.sh
+
env:
+
global:
+
- "EXTRA_REMOTES=\"https://github.com/mirage/mirage-dev.git\""
+
- "PINS=\"cohttp-top:. cohttp-async:. cohttp-lwt-unix:. cohttp-lwt-jsoo:. cohttp-lwt:. cohttp-mirage:. cohttp:.\""
+
matrix:
+
- "PACKAGE=\"cohttp\" DISTRO=\"alpine-3.5\" OCAML_VERSION=\"4.06.0\""
+
- "PACKAGE=\"cohttp-async\" DISTRO=\"alpine\" OCAML_VERSION=\"4.06.0\""
+
- "PACKAGE=\"cohttp-lwt\" DISTRO=\"debian-unstable\" OCAML_VERSION=\"4.03.0\""
+
- "PACKAGE=\"cohttp-mirage\" DISTRO=\"debian-unstable\" OCAML_VERSION=\"4.03.0\""
+
notifications:
+
webhooks:
+
urls:
+
- 'https://webhooks.gitter.im/e/6ee5059c7420709f4ad1'
+
on_success: change
+
on_failure: always
+
on_start: false
+
+
Test cohttp.yml roundtrip with JSON
+
+
$ yamlcat --json cohttp.yml
+
{"language": "c", "sudo": false, "services": ["docker"], "install": "wget https://raw.githubusercontent.com/ocaml/ocaml-travisci-skeleton/master/.travis-docker.sh", "script": "bash -ex ./.travis-docker.sh", "env": {"global": ["EXTRA_REMOTES=\"https://github.com/mirage/mirage-dev.git\"", "PINS=\"cohttp-top:. cohttp-async:. cohttp-lwt-unix:. cohttp-lwt-jsoo:. cohttp-lwt:. cohttp-mirage:. cohttp:.\""], "matrix": ["PACKAGE=\"cohttp\" DISTRO=\"alpine-3.5\" OCAML_VERSION=\"4.06.0\"", "PACKAGE=\"cohttp-async\" DISTRO=\"alpine\" OCAML_VERSION=\"4.06.0\"", "PACKAGE=\"cohttp-lwt\" DISTRO=\"debian-unstable\" OCAML_VERSION=\"4.03.0\"", "PACKAGE=\"cohttp-mirage\" DISTRO=\"debian-unstable\" OCAML_VERSION=\"4.03.0\""]}, "notifications": {"webhooks": {"urls": ["https://webhooks.gitter.im/e/6ee5059c7420709f4ad1"], "on_success": "change", "on_failure": "always", "on_start": false}}}
+
+
Test special values
+
+
$ echo 'null_val: null
+
> bool_true: true
+
> bool_false: false
+
> number: 42
+
> float: 3.14' | yamlcat --json
+
{"null_val": null, "bool_true": true, "bool_false": false, "number": 42, "float": 3.14}
+
+
Test quoted strings
+
+
$ echo 'single: '"'"'hello world'"'"'
+
> double: "hello world"' | yamlcat
+
single: hello world
+
double: hello world
+
+
Test literal block scalar
+
+
$ echo 'text: |
+
> line one
+
> line two' | yamlcat --json
+
{"text": "line one\nline two\n"}
+
+
Test folded block scalar
+
+
$ echo 'text: >
+
> line one
+
> line two' | yamlcat --json
+
{"text": "line one line two\n"}
+
+
Test linuxkit.yml (sequences of mappings)
+
+
$ yamlcat linuxkit.yml | head -30
+
kernel:
+
image: 'linuxkit/kernel:4.9.40'
+
cmdline: console=tty0 console=ttyS0
+
init:
+
- 'linuxkit/init:906e174b3f2e07f97d6fd693a2e8518e98dafa58'
+
- 'linuxkit/runc:90e45f13e1d0a0983f36ef854621e3eac91cf541'
+
- 'linuxkit/containerd:7c986fb7df33bea73b5c8097b46989e46f49d875'
+
- 'linuxkit/ca-certificates:e44b0a66df5a102c0e220f0066b0d904710dcb10'
+
onboot:
+
- name: sysctl
+
image: 'linuxkit/sysctl:184c914d23a017062d7b53d7fc1dfaf47764bef6'
+
- name: dhcpcd
+
image: 'linuxkit/dhcpcd:f3f5413abb78fae9020e35bd4788fa93df4530b7'
+
command:
+
- /sbin/dhcpcd
+
- '--nobackground'
+
- '-f'
+
- /dhcpcd.conf
+
- '-1'
+
onshutdown:
+
- name: shutdown
+
image: 'busybox:latest'
+
command:
+
- /bin/echo
+
- so long and thanks for all the fish
+
services:
+
- name: getty
+
image: 'linuxkit/getty:2c841cdc34396e3fa8f25b62d112808f63f16df6'
+
env:
+
- INSECURE=true
+488
tests/run_all_tests.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
open Yamlrw
+
module TL = Test_suite_lib.Test_suite_loader
+
module TF = Test_suite_lib.Tree_format
+
module JF = Test_suite_lib.Json_format
+
module JC = Test_suite_lib.Json_compare
+
+
let test_suite_path = "../yaml-test-suite"
+
+
(* HTML escape function *)
+
let html_escape s =
+
let buf = Buffer.create (String.length s) in
+
String.iter (function
+
| '<' -> Buffer.add_string buf "&lt;"
+
| '>' -> Buffer.add_string buf "&gt;"
+
| '&' -> Buffer.add_string buf "&amp;"
+
| '"' -> Buffer.add_string buf "&quot;"
+
| c -> Buffer.add_char buf c
+
) s;
+
Buffer.contents buf
+
+
let normalize_tree s =
+
let lines = String.split_on_char '\n' s in
+
let lines = List.filter (fun l -> String.trim l <> "") lines in
+
String.concat "\n" lines
+
+
type test_result = {
+
id : string;
+
name : string;
+
yaml : string;
+
is_error_test : bool;
+
status : [`Pass | `Fail of string | `Skip];
+
output : string;
+
json_status : [`Pass | `Fail of string | `Skip];
+
json_expected : string;
+
json_actual : string;
+
}
+
+
let compare_json expected actual =
+
(* Parse both JSON strings and compare the resulting structures.
+
This handles formatting differences and object key ordering. *)
+
JC.compare_json_strings expected actual
+
+
let run_json_test (test : TL.test_case) : [`Pass | `Fail of string | `Skip] * string =
+
match test.json with
+
| None -> (`Skip, "")
+
| Some expected_json ->
+
if test.fail then
+
(* Error tests shouldn't have JSON comparison *)
+
(`Skip, "")
+
else
+
try
+
(* Handle multi-document YAML by using documents_of_string *)
+
let docs = Loader.documents_of_string test.yaml in
+
let values = List.filter_map (fun doc ->
+
match Document.root doc with
+
| None -> None
+
| Some yaml -> Some (Yaml.to_value ~resolve_aliases_first:true yaml)
+
) docs in
+
let actual_json = match values with
+
| [] -> "" (* Empty document produces empty JSON *)
+
| [v] -> JF.to_json v
+
| vs -> JF.documents_to_json vs
+
in
+
if compare_json expected_json actual_json then
+
(`Pass, actual_json)
+
else
+
(`Fail "JSON mismatch", actual_json)
+
with
+
| Yamlrw_error e ->
+
(`Fail (Format.asprintf "Parse error: %a" Error.pp e), "")
+
| exn ->
+
(`Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)), "")
+
+
let run_test (test : TL.test_case) : test_result =
+
let json_status, json_actual = run_json_test test in
+
let base = {
+
id = test.id;
+
name = test.name;
+
yaml = test.yaml;
+
is_error_test = test.fail;
+
status = `Skip;
+
output = "";
+
json_status;
+
json_expected = Option.value ~default:"" test.json;
+
json_actual;
+
} in
+
if test.fail then begin
+
try
+
let parser = Parser.of_string test.yaml in
+
let events = Parser.to_list parser in
+
let tree = TF.of_spanned_events events in
+
{ base with
+
status = `Fail "Expected parsing to fail";
+
output = tree;
+
}
+
with
+
| Yamlrw_error e ->
+
{ base with
+
status = `Pass;
+
output = Format.asprintf "%a" Error.pp e;
+
}
+
| exn ->
+
{ base with
+
status = `Pass;
+
output = Printexc.to_string exn;
+
}
+
end
+
else begin
+
match test.tree with
+
| None ->
+
(* No expected tree - check if json indicates expected success *)
+
(match test.json with
+
| Some _ ->
+
(* Has json output, so should parse successfully *)
+
(try
+
let parser = Parser.of_string test.yaml in
+
let events = Parser.to_list parser in
+
let tree = TF.of_spanned_events events in
+
{ base with status = `Pass; output = tree }
+
with exn ->
+
{ base with
+
status = `Fail (Printf.sprintf "Should parse but got: %s" (Printexc.to_string exn));
+
output = Printexc.to_string exn;
+
})
+
| None ->
+
(* No tree, no json, no fail - ambiguous edge case, skip *)
+
{ base with status = `Skip; output = "(no expected tree or json)" })
+
| Some expected ->
+
try
+
let parser = Parser.of_string test.yaml in
+
let events = Parser.to_list parser in
+
let actual = TF.of_spanned_events events in
+
let expected_norm = normalize_tree expected in
+
let actual_norm = normalize_tree actual in
+
if expected_norm = actual_norm then
+
{ base with status = `Pass; output = actual }
+
else
+
{ base with
+
status = `Fail (Printf.sprintf "Tree mismatch");
+
output = Printf.sprintf "Expected:\n%s\n\nActual:\n%s" expected_norm actual_norm;
+
}
+
with exn ->
+
{ base with
+
status = `Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn));
+
output = Printexc.to_string exn;
+
}
+
end
+
+
let status_class = function
+
| `Pass -> "pass"
+
| `Fail _ -> "fail"
+
| `Skip -> "skip"
+
+
let status_text = function
+
| `Pass -> "PASS"
+
| `Fail _ -> "FAIL"
+
| `Skip -> "SKIP"
+
+
let generate_html results output_file =
+
let oc = open_out output_file in
+
let pass_count = List.length (List.filter (fun r -> r.status = `Pass) results) in
+
let fail_count = List.length (List.filter (fun r -> match r.status with `Fail _ -> true | _ -> false) results) in
+
let skip_count = List.length (List.filter (fun r -> r.status = `Skip) results) in
+
let total = List.length results in
+
let json_pass_count = List.length (List.filter (fun r -> r.json_status = `Pass) results) in
+
let json_fail_count = List.length (List.filter (fun r -> match r.json_status with `Fail _ -> true | _ -> false) results) in
+
let json_skip_count = List.length (List.filter (fun r -> r.json_status = `Skip) results) in
+
+
Printf.fprintf oc {|<!DOCTYPE html>
+
<html lang="en">
+
<head>
+
<meta charset="UTF-8">
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
+
<title>Yamlrw Test Results</title>
+
<style>
+
:root {
+
--pass-color: #22c55e;
+
--fail-color: #ef4444;
+
--skip-color: #f59e0b;
+
--bg-color: #1a1a2e;
+
--card-bg: #16213e;
+
--text-color: #e2e8f0;
+
--border-color: #334155;
+
}
+
* { box-sizing: border-box; margin: 0; padding: 0; }
+
body {
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+
background: var(--bg-color);
+
color: var(--text-color);
+
line-height: 1.6;
+
padding: 2rem;
+
}
+
.container { max-width: 1400px; margin: 0 auto; }
+
h1 { margin-bottom: 1.5rem; font-size: 2rem; }
+
.summary {
+
display: flex;
+
gap: 1rem;
+
margin-bottom: 2rem;
+
flex-wrap: wrap;
+
}
+
.stat {
+
background: var(--card-bg);
+
padding: 1rem 1.5rem;
+
border-radius: 8px;
+
border-left: 4px solid var(--border-color);
+
}
+
.stat.pass { border-left-color: var(--pass-color); }
+
.stat.fail { border-left-color: var(--fail-color); }
+
.stat.skip { border-left-color: var(--skip-color); }
+
.stat-value { font-size: 2rem; font-weight: bold; }
+
.stat-label { font-size: 0.875rem; opacity: 0.8; }
+
.filters {
+
margin-bottom: 1.5rem;
+
display: flex;
+
gap: 0.5rem;
+
flex-wrap: wrap;
+
}
+
.filter-btn {
+
padding: 0.5rem 1rem;
+
border: 1px solid var(--border-color);
+
background: var(--card-bg);
+
color: var(--text-color);
+
border-radius: 4px;
+
cursor: pointer;
+
transition: all 0.2s;
+
}
+
.filter-btn:hover { border-color: var(--text-color); }
+
.filter-btn.active { background: var(--text-color); color: var(--bg-color); }
+
.search {
+
padding: 0.5rem 1rem;
+
border: 1px solid var(--border-color);
+
background: var(--card-bg);
+
color: var(--text-color);
+
border-radius: 4px;
+
width: 200px;
+
}
+
.tests { display: flex; flex-direction: column; gap: 1rem; }
+
.test {
+
background: var(--card-bg);
+
border-radius: 8px;
+
border: 1px solid var(--border-color);
+
overflow: hidden;
+
}
+
.test-header {
+
padding: 1rem;
+
display: flex;
+
align-items: center;
+
gap: 1rem;
+
cursor: pointer;
+
border-bottom: 1px solid var(--border-color);
+
}
+
.test-header:hover { background: rgba(255,255,255,0.05); }
+
.badge {
+
padding: 0.25rem 0.5rem;
+
border-radius: 4px;
+
font-size: 0.75rem;
+
font-weight: bold;
+
text-transform: uppercase;
+
}
+
.badge.pass { background: var(--pass-color); color: #000; }
+
.badge.fail { background: var(--fail-color); color: #fff; }
+
.badge.skip { background: var(--skip-color); color: #000; }
+
.badge.error-test { background: #8b5cf6; color: #fff; margin-left: auto; }
+
.test-id { font-family: monospace; font-weight: bold; }
+
.test-name { opacity: 0.8; flex: 1; }
+
.test-content { display: none; padding: 1rem; }
+
.test.expanded .test-content { display: block; }
+
.section { margin-bottom: 1rem; }
+
.section-title {
+
font-size: 0.875rem;
+
text-transform: uppercase;
+
opacity: 0.6;
+
margin-bottom: 0.5rem;
+
letter-spacing: 0.05em;
+
}
+
pre {
+
background: #0f172a;
+
padding: 1rem;
+
border-radius: 4px;
+
overflow-x: auto;
+
font-size: 0.875rem;
+
white-space: pre-wrap;
+
word-break: break-all;
+
}
+
.expand-icon { transition: transform 0.2s; }
+
.test.expanded .expand-icon { transform: rotate(90deg); }
+
</style>
+
</head>
+
<body>
+
<div class="container">
+
<h1>Yamlrw Test Results</h1>
+
<div class="summary">
+
<div class="stat pass">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">Passed</div>
+
</div>
+
<div class="stat fail">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">Failed</div>
+
</div>
+
<div class="stat skip">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">Skipped</div>
+
</div>
+
<div class="stat">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">Total</div>
+
</div>
+
</div>
+
<h2 style="margin: 1.5rem 0 1rem;">JSON Output Comparison</h2>
+
<div class="summary">
+
<div class="stat pass">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">JSON Pass</div>
+
</div>
+
<div class="stat fail">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">JSON Fail</div>
+
</div>
+
<div class="stat skip">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">JSON Skip</div>
+
</div>
+
</div>
+
<div class="filters">
+
<button class="filter-btn active" data-filter="all">All</button>
+
<button class="filter-btn" data-filter="pass">Pass</button>
+
<button class="filter-btn" data-filter="fail">Fail</button>
+
<button class="filter-btn" data-filter="skip">Skip</button>
+
<input type="text" class="search" placeholder="Search by ID or name...">
+
</div>
+
<div class="tests">
+
|} pass_count fail_count skip_count total json_pass_count json_fail_count json_skip_count;
+
+
List.iter (fun result ->
+
let error_badge = if result.is_error_test then
+
{|<span class="badge error-test">Error Test</span>|}
+
else "" in
+
let json_badge = Printf.sprintf {|<span class="badge %s" style="margin-left: 4px;">JSON: %s</span>|}
+
(status_class result.json_status) (status_text result.json_status) in
+
let json_section = if result.json_expected <> "" || result.json_actual <> "" then
+
Printf.sprintf {|
+
<div class="section">
+
<div class="section-title">Expected JSON</div>
+
<pre>%s</pre>
+
</div>
+
<div class="section">
+
<div class="section-title">Actual JSON</div>
+
<pre>%s</pre>
+
</div>|}
+
(html_escape result.json_expected)
+
(html_escape result.json_actual)
+
else "" in
+
Printf.fprintf oc {| <div class="test" data-status="%s" data-json-status="%s" data-id="%s" data-name="%s">
+
<div class="test-header" onclick="this.parentElement.classList.toggle('expanded')">
+
<span class="expand-icon">▶</span>
+
<span class="badge %s">%s</span>
+
%s
+
<span class="test-id">%s</span>
+
<span class="test-name">%s</span>
+
%s
+
</div>
+
<div class="test-content">
+
<div class="section">
+
<div class="section-title">YAML Input</div>
+
<pre>%s</pre>
+
</div>
+
<div class="section">
+
<div class="section-title">Event Tree Output</div>
+
<pre>%s</pre>
+
</div>%s
+
</div>
+
</div>
+
|}
+
(status_class result.status)
+
(status_class result.json_status)
+
(html_escape result.id)
+
(html_escape (String.lowercase_ascii result.name))
+
(status_class result.status)
+
(status_text result.status)
+
json_badge
+
(html_escape result.id)
+
(html_escape result.name)
+
error_badge
+
(html_escape result.yaml)
+
(html_escape result.output)
+
json_section
+
) results;
+
+
Printf.fprintf oc {| </div>
+
</div>
+
<script>
+
document.querySelectorAll('.filter-btn').forEach(btn => {
+
btn.addEventListener('click', () => {
+
document.querySelectorAll('.filter-btn').forEach(b => b.classList.remove('active'));
+
btn.classList.add('active');
+
filterTests();
+
});
+
});
+
document.querySelector('.search').addEventListener('input', filterTests);
+
function filterTests() {
+
const filter = document.querySelector('.filter-btn.active').dataset.filter;
+
const search = document.querySelector('.search').value.toLowerCase();
+
document.querySelectorAll('.test').forEach(test => {
+
const status = test.dataset.status;
+
const id = test.dataset.id.toLowerCase();
+
const name = test.dataset.name;
+
const matchesFilter = filter === 'all' || status === filter;
+
const matchesSearch = !search || id.includes(search) || name.includes(search);
+
test.style.display = matchesFilter && matchesSearch ? '' : 'none';
+
});
+
}
+
</script>
+
</body>
+
</html>
+
|};
+
close_out oc
+
+
let () =
+
let html_output = ref None in
+
let show_skipped = ref false in
+
let args = [
+
"--html", Arg.String (fun s -> html_output := Some s),
+
"<file> Generate HTML report to file";
+
"--show-skipped", Arg.Set show_skipped,
+
" Show details of skipped tests";
+
] in
+
Arg.parse args (fun _ -> ()) "Usage: run_all_tests [--html <file>] [--show-skipped]";
+
+
let all_tests = TL.load_directory test_suite_path in
+
Printf.printf "Total tests loaded: %d\n%!" (List.length all_tests);
+
+
let results = List.map run_test all_tests in
+
+
let pass_count = List.length (List.filter (fun r -> r.status = `Pass) results) in
+
let fail_count = List.length (List.filter (fun r -> match r.status with `Fail _ -> true | _ -> false) results) in
+
let skip_count = List.length (List.filter (fun r -> r.status = `Skip) results) in
+
+
let json_pass_count = List.length (List.filter (fun r -> r.json_status = `Pass) results) in
+
let json_fail_count = List.length (List.filter (fun r -> match r.json_status with `Fail _ -> true | _ -> false) results) in
+
let json_skip_count = List.length (List.filter (fun r -> r.json_status = `Skip) results) in
+
+
Printf.printf "\nEvent Tree Results: %d pass, %d fail, %d skip (total: %d)\n%!"
+
pass_count fail_count skip_count (pass_count + fail_count + skip_count);
+
+
Printf.printf "JSON Results: %d pass, %d fail, %d skip\n%!"
+
json_pass_count json_fail_count json_skip_count;
+
+
if fail_count > 0 then begin
+
Printf.printf "\nFailing event tree tests:\n";
+
List.iter (fun r ->
+
match r.status with
+
| `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg
+
| _ -> ()
+
) results
+
end;
+
+
if json_fail_count > 0 then begin
+
Printf.printf "\nFailing JSON tests:\n";
+
List.iter (fun r ->
+
match r.json_status with
+
| `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg
+
| _ -> ()
+
) results
+
end;
+
+
if !show_skipped && skip_count > 0 then begin
+
Printf.printf "\nSkipped tests (no expected tree):\n";
+
List.iter (fun r ->
+
if r.status = `Skip then begin
+
Printf.printf " %s: %s\n" r.id r.name;
+
Printf.printf " YAML (%d chars): %S\n" (String.length r.yaml)
+
(if String.length r.yaml <= 60 then r.yaml
+
else String.sub r.yaml 0 60 ^ "...")
+
end
+
) results
+
end;
+
+
match !html_output with
+
| Some file ->
+
generate_html results file;
+
Printf.printf "\nHTML report generated: %s\n" file
+
| None -> ()
+532
tests/run_all_tests_eio.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(* Run the YAML test suite using Eio for parallel execution *)
+
+
open Yamlrw
+
module TL = Test_suite_lib_eio.Test_suite_loader_eio
+
module TF = Test_suite_lib.Tree_format
+
module JF = Test_suite_lib.Json_format
+
module JC = Test_suite_lib.Json_compare
+
+
let test_suite_path = "../yaml-test-suite"
+
+
(* HTML escape function *)
+
let html_escape s =
+
let buf = Buffer.create (String.length s) in
+
String.iter (function
+
| '<' -> Buffer.add_string buf "&lt;"
+
| '>' -> Buffer.add_string buf "&gt;"
+
| '&' -> Buffer.add_string buf "&amp;"
+
| '"' -> Buffer.add_string buf "&quot;"
+
| c -> Buffer.add_char buf c
+
) s;
+
Buffer.contents buf
+
+
let normalize_tree s =
+
let lines = String.split_on_char '\n' s in
+
let lines = List.filter (fun l -> String.trim l <> "") lines in
+
String.concat "\n" lines
+
+
type test_result = {
+
id : string;
+
name : string;
+
yaml : string;
+
is_error_test : bool;
+
status : [`Pass | `Fail of string | `Skip];
+
output : string;
+
json_status : [`Pass | `Fail of string | `Skip];
+
json_expected : string;
+
json_actual : string;
+
}
+
+
let compare_json expected actual =
+
JC.compare_json_strings expected actual
+
+
let run_json_test (test : TL.test_case) : [`Pass | `Fail of string | `Skip] * string =
+
match test.json with
+
| None -> (`Skip, "")
+
| Some expected_json ->
+
if test.fail then
+
(`Skip, "")
+
else
+
try
+
let docs = Loader.documents_of_string test.yaml in
+
let values = List.filter_map (fun doc ->
+
match Document.root doc with
+
| None -> None
+
| Some yaml -> Some (Yaml.to_value ~resolve_aliases_first:true yaml)
+
) docs in
+
let actual_json = match values with
+
| [] -> ""
+
| [v] -> JF.to_json v
+
| vs -> JF.documents_to_json vs
+
in
+
if compare_json expected_json actual_json then
+
(`Pass, actual_json)
+
else
+
(`Fail "JSON mismatch", actual_json)
+
with
+
| Yamlrw_error e ->
+
(`Fail (Format.asprintf "Parse error: %a" Error.pp e), "")
+
| exn ->
+
(`Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn)), "")
+
+
let run_test (test : TL.test_case) : test_result =
+
let json_status, json_actual = run_json_test test in
+
let base = {
+
id = test.id;
+
name = test.name;
+
yaml = test.yaml;
+
is_error_test = test.fail;
+
status = `Skip;
+
output = "";
+
json_status;
+
json_expected = Option.value ~default:"" test.json;
+
json_actual;
+
} in
+
if test.fail then begin
+
try
+
let parser = Parser.of_string test.yaml in
+
let events = Parser.to_list parser in
+
let tree = TF.of_spanned_events events in
+
{ base with
+
status = `Fail "Expected parsing to fail";
+
output = tree;
+
}
+
with
+
| Yamlrw_error e ->
+
{ base with
+
status = `Pass;
+
output = Format.asprintf "%a" Error.pp e;
+
}
+
| exn ->
+
{ base with
+
status = `Pass;
+
output = Printexc.to_string exn;
+
}
+
end
+
else begin
+
match test.tree with
+
| None ->
+
(match test.json with
+
| Some _ ->
+
(try
+
let parser = Parser.of_string test.yaml in
+
let events = Parser.to_list parser in
+
let tree = TF.of_spanned_events events in
+
{ base with status = `Pass; output = tree }
+
with exn ->
+
{ base with
+
status = `Fail (Printf.sprintf "Should parse but got: %s" (Printexc.to_string exn));
+
output = Printexc.to_string exn;
+
})
+
| None ->
+
{ base with status = `Skip; output = "(no expected tree or json)" })
+
| Some expected ->
+
try
+
let parser = Parser.of_string test.yaml in
+
let events = Parser.to_list parser in
+
let actual = TF.of_spanned_events events in
+
let expected_norm = normalize_tree expected in
+
let actual_norm = normalize_tree actual in
+
if expected_norm = actual_norm then
+
{ base with status = `Pass; output = actual }
+
else
+
{ base with
+
status = `Fail (Printf.sprintf "Tree mismatch");
+
output = Printf.sprintf "Expected:\n%s\n\nActual:\n%s" expected_norm actual_norm;
+
}
+
with exn ->
+
{ base with
+
status = `Fail (Printf.sprintf "Exception: %s" (Printexc.to_string exn));
+
output = Printexc.to_string exn;
+
}
+
end
+
+
(* Run tests in parallel using Eio fibers *)
+
let run_tests_parallel tests =
+
Eio.Fiber.List.map run_test tests
+
+
let status_class = function
+
| `Pass -> "pass"
+
| `Fail _ -> "fail"
+
| `Skip -> "skip"
+
+
let status_text = function
+
| `Pass -> "PASS"
+
| `Fail _ -> "FAIL"
+
| `Skip -> "SKIP"
+
+
let generate_html ~fs results output_file =
+
let pass_count = List.length (List.filter (fun r -> r.status = `Pass) results) in
+
let fail_count = List.length (List.filter (fun r -> match r.status with `Fail _ -> true | _ -> false) results) in
+
let skip_count = List.length (List.filter (fun r -> r.status = `Skip) results) in
+
let total = List.length results in
+
let json_pass_count = List.length (List.filter (fun r -> r.json_status = `Pass) results) in
+
let json_fail_count = List.length (List.filter (fun r -> match r.json_status with `Fail _ -> true | _ -> false) results) in
+
let json_skip_count = List.length (List.filter (fun r -> r.json_status = `Skip) results) in
+
+
let buf = Buffer.create 65536 in
+
Printf.bprintf buf {|<!DOCTYPE html>
+
<html lang="en">
+
<head>
+
<meta charset="UTF-8">
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
+
<title>Yamlrw Test Results (Eio)</title>
+
<style>
+
:root {
+
--pass-color: #22c55e;
+
--fail-color: #ef4444;
+
--skip-color: #f59e0b;
+
--bg-color: #1a1a2e;
+
--card-bg: #16213e;
+
--text-color: #e2e8f0;
+
--border-color: #334155;
+
}
+
* { box-sizing: border-box; margin: 0; padding: 0; }
+
body {
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+
background: var(--bg-color);
+
color: var(--text-color);
+
line-height: 1.6;
+
padding: 2rem;
+
}
+
.container { max-width: 1400px; margin: 0 auto; }
+
h1 { margin-bottom: 1.5rem; font-size: 2rem; }
+
.eio-badge {
+
display: inline-block;
+
background: linear-gradient(135deg, #667eea 0%%, #764ba2 100%%);
+
color: white;
+
padding: 0.25rem 0.75rem;
+
border-radius: 999px;
+
font-size: 0.875rem;
+
margin-left: 1rem;
+
vertical-align: middle;
+
}
+
.summary {
+
display: flex;
+
gap: 1rem;
+
margin-bottom: 2rem;
+
flex-wrap: wrap;
+
}
+
.stat {
+
background: var(--card-bg);
+
padding: 1rem 1.5rem;
+
border-radius: 8px;
+
border-left: 4px solid var(--border-color);
+
}
+
.stat.pass { border-left-color: var(--pass-color); }
+
.stat.fail { border-left-color: var(--fail-color); }
+
.stat.skip { border-left-color: var(--skip-color); }
+
.stat-value { font-size: 2rem; font-weight: bold; }
+
.stat-label { font-size: 0.875rem; opacity: 0.8; }
+
.filters {
+
margin-bottom: 1.5rem;
+
display: flex;
+
gap: 0.5rem;
+
flex-wrap: wrap;
+
}
+
.filter-btn {
+
padding: 0.5rem 1rem;
+
border: 1px solid var(--border-color);
+
background: var(--card-bg);
+
color: var(--text-color);
+
border-radius: 4px;
+
cursor: pointer;
+
transition: all 0.2s;
+
}
+
.filter-btn:hover { border-color: var(--text-color); }
+
.filter-btn.active { background: var(--text-color); color: var(--bg-color); }
+
.search {
+
padding: 0.5rem 1rem;
+
border: 1px solid var(--border-color);
+
background: var(--card-bg);
+
color: var(--text-color);
+
border-radius: 4px;
+
width: 200px;
+
}
+
.tests { display: flex; flex-direction: column; gap: 1rem; }
+
.test {
+
background: var(--card-bg);
+
border-radius: 8px;
+
border: 1px solid var(--border-color);
+
overflow: hidden;
+
}
+
.test-header {
+
padding: 1rem;
+
display: flex;
+
align-items: center;
+
gap: 1rem;
+
cursor: pointer;
+
border-bottom: 1px solid var(--border-color);
+
}
+
.test-header:hover { background: rgba(255,255,255,0.05); }
+
.badge {
+
padding: 0.25rem 0.5rem;
+
border-radius: 4px;
+
font-size: 0.75rem;
+
font-weight: bold;
+
text-transform: uppercase;
+
}
+
.badge.pass { background: var(--pass-color); color: #000; }
+
.badge.fail { background: var(--fail-color); color: #fff; }
+
.badge.skip { background: var(--skip-color); color: #000; }
+
.badge.error-test { background: #8b5cf6; color: #fff; margin-left: auto; }
+
.test-id { font-family: monospace; font-weight: bold; }
+
.test-name { opacity: 0.8; flex: 1; }
+
.test-content { display: none; padding: 1rem; }
+
.test.expanded .test-content { display: block; }
+
.section { margin-bottom: 1rem; }
+
.section-title {
+
font-size: 0.875rem;
+
text-transform: uppercase;
+
opacity: 0.6;
+
margin-bottom: 0.5rem;
+
letter-spacing: 0.05em;
+
}
+
pre {
+
background: #0f172a;
+
padding: 1rem;
+
border-radius: 4px;
+
overflow-x: auto;
+
font-size: 0.875rem;
+
white-space: pre-wrap;
+
word-break: break-all;
+
}
+
.expand-icon { transition: transform 0.2s; }
+
.test.expanded .expand-icon { transform: rotate(90deg); }
+
</style>
+
</head>
+
<body>
+
<div class="container">
+
<h1>Yamlrw Test Results <span class="eio-badge">Eio Parallel</span></h1>
+
<div class="summary">
+
<div class="stat pass">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">Passed</div>
+
</div>
+
<div class="stat fail">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">Failed</div>
+
</div>
+
<div class="stat skip">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">Skipped</div>
+
</div>
+
<div class="stat">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">Total</div>
+
</div>
+
</div>
+
<h2 style="margin: 1.5rem 0 1rem;">JSON Output Comparison</h2>
+
<div class="summary">
+
<div class="stat pass">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">JSON Pass</div>
+
</div>
+
<div class="stat fail">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">JSON Fail</div>
+
</div>
+
<div class="stat skip">
+
<div class="stat-value">%d</div>
+
<div class="stat-label">JSON Skip</div>
+
</div>
+
</div>
+
<div class="filters">
+
<button class="filter-btn active" data-filter="all">All</button>
+
<button class="filter-btn" data-filter="pass">Pass</button>
+
<button class="filter-btn" data-filter="fail">Fail</button>
+
<button class="filter-btn" data-filter="skip">Skip</button>
+
<input type="text" class="search" placeholder="Search by ID or name...">
+
</div>
+
<div class="tests">
+
|} pass_count fail_count skip_count total json_pass_count json_fail_count json_skip_count;
+
+
List.iter (fun result ->
+
let error_badge = if result.is_error_test then
+
{|<span class="badge error-test">Error Test</span>|}
+
else "" in
+
let json_badge = Printf.sprintf {|<span class="badge %s" style="margin-left: 4px;">JSON: %s</span>|}
+
(status_class result.json_status) (status_text result.json_status) in
+
let json_section = if result.json_expected <> "" || result.json_actual <> "" then
+
Printf.sprintf {|
+
<div class="section">
+
<div class="section-title">Expected JSON</div>
+
<pre>%s</pre>
+
</div>
+
<div class="section">
+
<div class="section-title">Actual JSON</div>
+
<pre>%s</pre>
+
</div>|}
+
(html_escape result.json_expected)
+
(html_escape result.json_actual)
+
else "" in
+
Printf.bprintf buf {| <div class="test" data-status="%s" data-json-status="%s" data-id="%s" data-name="%s">
+
<div class="test-header" onclick="this.parentElement.classList.toggle('expanded')">
+
<span class="expand-icon">▶</span>
+
<span class="badge %s">%s</span>
+
%s
+
<span class="test-id">%s</span>
+
<span class="test-name">%s</span>
+
%s
+
</div>
+
<div class="test-content">
+
<div class="section">
+
<div class="section-title">YAML Input</div>
+
<pre>%s</pre>
+
</div>
+
<div class="section">
+
<div class="section-title">Event Tree Output</div>
+
<pre>%s</pre>
+
</div>%s
+
</div>
+
</div>
+
|}
+
(status_class result.status)
+
(status_class result.json_status)
+
(html_escape result.id)
+
(html_escape (String.lowercase_ascii result.name))
+
(status_class result.status)
+
(status_text result.status)
+
json_badge
+
(html_escape result.id)
+
(html_escape result.name)
+
error_badge
+
(html_escape result.yaml)
+
(html_escape result.output)
+
json_section
+
) results;
+
+
Printf.bprintf buf {| </div>
+
</div>
+
<script>
+
document.querySelectorAll('.filter-btn').forEach(btn => {
+
btn.addEventListener('click', () => {
+
document.querySelectorAll('.filter-btn').forEach(b => b.classList.remove('active'));
+
btn.classList.add('active');
+
filterTests();
+
});
+
});
+
document.querySelector('.search').addEventListener('input', filterTests);
+
function filterTests() {
+
const filter = document.querySelector('.filter-btn.active').dataset.filter;
+
const search = document.querySelector('.search').value.toLowerCase();
+
document.querySelectorAll('.test').forEach(test => {
+
const status = test.dataset.status;
+
const id = test.dataset.id.toLowerCase();
+
const name = test.dataset.name;
+
const matchesFilter = filter === 'all' || status === filter;
+
const matchesSearch = !search || id.includes(search) || name.includes(search);
+
test.style.display = matchesFilter && matchesSearch ? '' : 'none';
+
});
+
}
+
</script>
+
</body>
+
</html>
+
|};
+
+
Eio.Path.save ~create:(`Or_truncate 0o644)
+
Eio.Path.(fs / output_file)
+
(Buffer.contents buf)
+
+
let () =
+
let html_output = ref None in
+
let show_skipped = ref false in
+
let sequential = ref false in
+
let args = [
+
"--html", Arg.String (fun s -> html_output := Some s),
+
"<file> Generate HTML report to file";
+
"--show-skipped", Arg.Set show_skipped,
+
" Show details of skipped tests";
+
"--sequential", Arg.Set sequential,
+
" Run tests sequentially instead of in parallel";
+
] in
+
Arg.parse args (fun _ -> ()) "Usage: run_all_tests_eio [--html <file>] [--show-skipped] [--sequential]";
+
+
Eio_main.run @@ fun env ->
+
(* Use fs (full filesystem) rather than cwd (sandboxed) to allow ".." navigation *)
+
let fs = Eio.Stdenv.fs env in
+
(* Get the absolute path to the test suite *)
+
let cwd = Sys.getcwd () in
+
let test_suite_abs = if Filename.is_relative test_suite_path then
+
Filename.concat cwd test_suite_path
+
else
+
test_suite_path
+
in
+
+
let start_time = Unix.gettimeofday () in
+
+
(* Load tests using Eio (parallel by default) *)
+
let all_tests = if !sequential then
+
TL.load_directory ~fs test_suite_abs
+
else
+
TL.load_directory_parallel ~fs test_suite_abs
+
in
+
let load_time = Unix.gettimeofday () in
+
Printf.printf "Loaded %d tests in %.3fs\n%!" (List.length all_tests) (load_time -. start_time);
+
+
(* Run tests (parallel or sequential based on flag) *)
+
let results = if !sequential then
+
List.map run_test all_tests
+
else
+
run_tests_parallel all_tests
+
in
+
let run_time = Unix.gettimeofday () in
+
Printf.printf "Ran tests in %.3fs\n%!" (run_time -. load_time);
+
+
let pass_count = List.length (List.filter (fun r -> r.status = `Pass) results) in
+
let fail_count = List.length (List.filter (fun r -> match r.status with `Fail _ -> true | _ -> false) results) in
+
let skip_count = List.length (List.filter (fun r -> r.status = `Skip) results) in
+
+
let json_pass_count = List.length (List.filter (fun r -> r.json_status = `Pass) results) in
+
let json_fail_count = List.length (List.filter (fun r -> match r.json_status with `Fail _ -> true | _ -> false) results) in
+
let json_skip_count = List.length (List.filter (fun r -> r.json_status = `Skip) results) in
+
+
Printf.printf "\nEvent Tree Results: %d pass, %d fail, %d skip (total: %d)\n%!"
+
pass_count fail_count skip_count (pass_count + fail_count + skip_count);
+
+
Printf.printf "JSON Results: %d pass, %d fail, %d skip\n%!"
+
json_pass_count json_fail_count json_skip_count;
+
+
if fail_count > 0 then begin
+
Printf.printf "\nFailing event tree tests:\n";
+
List.iter (fun r ->
+
match r.status with
+
| `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg
+
| _ -> ()
+
) results
+
end;
+
+
if json_fail_count > 0 then begin
+
Printf.printf "\nFailing JSON tests:\n";
+
List.iter (fun r ->
+
match r.json_status with
+
| `Fail msg -> Printf.printf " %s: %s - %s\n" r.id r.name msg
+
| _ -> ()
+
) results
+
end;
+
+
if !show_skipped && skip_count > 0 then begin
+
Printf.printf "\nSkipped tests (no expected tree):\n";
+
List.iter (fun r ->
+
if r.status = `Skip then begin
+
Printf.printf " %s: %s\n" r.id r.name;
+
Printf.printf " YAML (%d chars): %S\n" (String.length r.yaml)
+
(if String.length r.yaml <= 60 then r.yaml
+
else String.sub r.yaml 0 60 ^ "...")
+
end
+
) results
+
end;
+
+
let total_time = Unix.gettimeofday () in
+
Printf.printf "\nTotal time: %.3fs\n%!" (total_time -. start_time);
+
+
match !html_output with
+
| Some file ->
+
generate_html ~fs results file;
+
Printf.printf "HTML report generated: %s\n" file
+
| None -> ()
+9
tests/test_suite_lib/dune
···
+
(library
+
(name test_suite_lib)
+
(modules test_suite_loader_generic test_suite_loader tree_format json_format json_compare)
+
(libraries yamlrw jsonm))
+
+
(library
+
(name test_suite_lib_eio)
+
(modules test_suite_loader_eio)
+
(libraries test_suite_lib eio))
+146
tests/test_suite_lib/json_compare.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(* Parse JSON using jsonm and compare parsed structures *)
+
+
type json =
+
| Null
+
| Bool of bool
+
| Float of float
+
| String of string
+
| Array of json list
+
| Object of (string * json) list
+
+
let rec equal a b =
+
match a, b with
+
| Null, Null -> true
+
| Bool a, Bool b -> a = b
+
| Float a, Float b -> Float.equal a b
+
| String a, String b -> String.equal a b
+
| Array a, Array b -> List.equal equal a b
+
| Object a, Object b ->
+
(* Compare objects as sets of key-value pairs (order independent) *)
+
let sorted_a = List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) a in
+
let sorted_b = List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) b in
+
List.length sorted_a = List.length sorted_b &&
+
List.for_all2 (fun (k1, v1) (k2, v2) -> k1 = k2 && equal v1 v2) sorted_a sorted_b
+
| _ -> false
+
+
(* Parse JSON string using jsonm *)
+
let parse_json s =
+
let decoder = Jsonm.decoder (`String s) in
+
let rec parse_value () =
+
match Jsonm.decode decoder with
+
| `Lexeme `Null -> Ok Null
+
| `Lexeme (`Bool b) -> Ok (Bool b)
+
| `Lexeme (`Float f) -> Ok (Float f)
+
| `Lexeme (`String s) -> Ok (String s)
+
| `Lexeme `As -> parse_array []
+
| `Lexeme `Os -> parse_object []
+
| `Lexeme _ -> Error "unexpected lexeme"
+
| `Error e -> Error (Format.asprintf "%a" Jsonm.pp_error e)
+
| `End -> Error "unexpected end"
+
| `Await -> Error "unexpected await"
+
and parse_array acc =
+
match Jsonm.decode decoder with
+
| `Lexeme `Ae -> Ok (Array (List.rev acc))
+
| `Lexeme _ as lex ->
+
(* Push back and parse value *)
+
let result = parse_value_with_lex lex in
+
(match result with
+
| Ok v -> parse_array (v :: acc)
+
| Error _ as e -> e)
+
| `Error e -> Error (Format.asprintf "%a" Jsonm.pp_error e)
+
| `End -> Error "unexpected end in array"
+
| `Await -> Error "unexpected await"
+
and parse_object acc =
+
match Jsonm.decode decoder with
+
| `Lexeme `Oe -> Ok (Object (List.rev acc))
+
| `Lexeme (`Name key) ->
+
(match parse_value () with
+
| Ok v -> parse_object ((key, v) :: acc)
+
| Error _ as e -> e)
+
| `Lexeme _ -> Error "expected object key"
+
| `Error e -> Error (Format.asprintf "%a" Jsonm.pp_error e)
+
| `End -> Error "unexpected end in object"
+
| `Await -> Error "unexpected await"
+
and parse_value_with_lex lex =
+
match lex with
+
| `Lexeme `Null -> Ok Null
+
| `Lexeme (`Bool b) -> Ok (Bool b)
+
| `Lexeme (`Float f) -> Ok (Float f)
+
| `Lexeme (`String s) -> Ok (String s)
+
| `Lexeme `As -> parse_array []
+
| `Lexeme `Os -> parse_object []
+
| `Lexeme _ -> Error "unexpected lexeme"
+
| `Error e -> Error (Format.asprintf "%a" Jsonm.pp_error e)
+
| `End -> Error "unexpected end"
+
| `Await -> Error "unexpected await"
+
in
+
parse_value ()
+
+
(* Parse multiple JSON values (for multi-document YAML) *)
+
let parse_json_multi s =
+
let decoder = Jsonm.decoder (`String s) in
+
let rec parse_value () =
+
match Jsonm.decode decoder with
+
| `Lexeme `Null -> Some Null
+
| `Lexeme (`Bool b) -> Some (Bool b)
+
| `Lexeme (`Float f) -> Some (Float f)
+
| `Lexeme (`String s) -> Some (String s)
+
| `Lexeme `As -> parse_array []
+
| `Lexeme `Os -> parse_object []
+
| `Lexeme _ -> None
+
| `Error _ -> None
+
| `End -> None
+
| `Await -> None
+
and parse_array acc =
+
match Jsonm.decode decoder with
+
| `Lexeme `Ae -> Some (Array (List.rev acc))
+
| `Lexeme _ as lex ->
+
(match parse_value_with_lex lex with
+
| Some v -> parse_array (v :: acc)
+
| None -> None)
+
| _ -> None
+
and parse_object acc =
+
match Jsonm.decode decoder with
+
| `Lexeme `Oe -> Some (Object (List.rev acc))
+
| `Lexeme (`Name key) ->
+
(match parse_value () with
+
| Some v -> parse_object ((key, v) :: acc)
+
| None -> None)
+
| _ -> None
+
and parse_value_with_lex lex =
+
match lex with
+
| `Lexeme `Null -> Some Null
+
| `Lexeme (`Bool b) -> Some (Bool b)
+
| `Lexeme (`Float f) -> Some (Float f)
+
| `Lexeme (`String s) -> Some (String s)
+
| `Lexeme `As -> parse_array []
+
| `Lexeme `Os -> parse_object []
+
| _ -> None
+
in
+
let rec collect acc =
+
match parse_value () with
+
| Some v -> collect (v :: acc)
+
| None -> List.rev acc
+
in
+
collect []
+
+
(* Compare two JSON strings *)
+
let compare_json_strings expected actual =
+
(* Handle empty strings *)
+
let expected_trimmed = String.trim expected in
+
let actual_trimmed = String.trim actual in
+
if expected_trimmed = "" && actual_trimmed = "" then
+
true
+
else if expected_trimmed = "" || actual_trimmed = "" then
+
false
+
else
+
(* Parse as potentially multiple JSON values *)
+
let expected_values = parse_json_multi expected in
+
let actual_values = parse_json_multi actual in
+
List.length expected_values = List.length actual_values &&
+
List.for_all2 equal expected_values actual_values
+69
tests/test_suite_lib/json_format.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(* Format Value.t as JSON matching yaml-test-suite expected format *)
+
+
open Yamlrw
+
+
let escape_string s =
+
let buf = Buffer.create (String.length s * 2) in
+
Buffer.add_char buf '"';
+
String.iter (fun c ->
+
match c with
+
| '"' -> Buffer.add_string buf "\\\""
+
| '\\' -> Buffer.add_string buf "\\\\"
+
| '\n' -> Buffer.add_string buf "\\n"
+
| '\r' -> Buffer.add_string buf "\\r"
+
| '\t' -> Buffer.add_string buf "\\t"
+
| '\x08' -> Buffer.add_string buf "\\b"
+
| '\x0c' -> Buffer.add_string buf "\\f"
+
| c when Char.code c < 32 ->
+
Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
+
| c -> Buffer.add_char buf c
+
) s;
+
Buffer.add_char buf '"';
+
Buffer.contents buf
+
+
let rec format_value ?(indent=0) (v : Value.t) =
+
let spaces n = String.make n ' ' in
+
match v with
+
| `Null -> "null"
+
| `Bool true -> "true"
+
| `Bool false -> "false"
+
| `Float f ->
+
if Float.is_nan f then "null" (* JSON doesn't support NaN *)
+
else if f = Float.infinity || f = Float.neg_infinity then "null" (* JSON doesn't support Inf *)
+
else if Float.is_integer f && Float.abs f < 1e15 then
+
Printf.sprintf "%.0f" f
+
else
+
(* Try to match yaml-test-suite's number formatting *)
+
let s = Printf.sprintf "%g" f in
+
(* Ensure we have a decimal point for floats *)
+
if String.contains s '.' || String.contains s 'e' || String.contains s 'E' then s
+
else s ^ ".0"
+
| `String s -> escape_string s
+
| `A [] -> "[]"
+
| `A items ->
+
let inner_indent = indent + 2 in
+
let formatted_items = List.map (fun item ->
+
spaces inner_indent ^ format_value ~indent:inner_indent item
+
) items in
+
"[\n" ^ String.concat ",\n" formatted_items ^ "\n" ^ spaces indent ^ "]"
+
| `O [] -> "{}"
+
| `O pairs ->
+
let inner_indent = indent + 2 in
+
let formatted_pairs = List.map (fun (k, v) ->
+
let key = escape_string k in
+
let value = format_value ~indent:inner_indent v in
+
spaces inner_indent ^ key ^ ": " ^ value
+
) pairs in
+
"{\n" ^ String.concat ",\n" formatted_pairs ^ "\n" ^ spaces indent ^ "}"
+
+
let to_json (v : Value.t) : string =
+
format_value v
+
+
(* Format multiple documents (for multi-doc YAML) *)
+
let documents_to_json (docs : Value.t list) : string =
+
String.concat "\n" (List.map to_json docs)
+45
tests/test_suite_lib/test_suite_loader.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Load yaml-test-suite test cases using standard OCaml I/O *)
+
+
(** Synchronous file I/O implementation *)
+
module Sync_io : Test_suite_loader_generic.FILE_IO with type ctx = unit = struct
+
type ctx = unit
+
+
let read_file () path =
+
try
+
let ic = open_in path in
+
let n = in_channel_length ic in
+
let s = really_input_string ic n in
+
close_in ic;
+
Some s
+
with _ -> None
+
+
let file_exists () path =
+
Sys.file_exists path
+
+
let is_directory () path =
+
Sys.file_exists path && Sys.is_directory path
+
+
let read_dir () path =
+
Array.to_list (Sys.readdir path)
+
end
+
+
(** Internal loader module *)
+
module Loader = Test_suite_loader_generic.Make(Sync_io)
+
+
(** Re-export test_case type from loader *)
+
type test_case = Loader.test_case = {
+
id : string;
+
name : string;
+
yaml : string;
+
tree : string option;
+
json : string option;
+
fail : bool;
+
}
+
+
(** Load tests without needing to pass a context *)
+
let load_directory path : test_case list = Loader.load_directory () path
+67
tests/test_suite_lib/test_suite_loader_eio.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Load yaml-test-suite test cases using Eio for file I/O *)
+
+
module Generic = Test_suite_lib.Test_suite_loader_generic
+
+
(** Eio file I/O implementation *)
+
module Eio_io : Generic.FILE_IO with type ctx = Eio.Fs.dir_ty Eio.Path.t = struct
+
type ctx = Eio.Fs.dir_ty Eio.Path.t
+
+
let read_file fs path =
+
try
+
Some (Eio.Path.load Eio.Path.(fs / path))
+
with _ -> None
+
+
let file_exists fs path =
+
match Eio.Path.kind ~follow:true Eio.Path.(fs / path) with
+
| `Regular_file -> true
+
| _ -> false
+
| exception _ -> false
+
+
let is_directory fs path =
+
match Eio.Path.kind ~follow:true Eio.Path.(fs / path) with
+
| `Directory -> true
+
| _ -> false
+
| exception _ -> false
+
+
let read_dir fs path =
+
Eio.Path.read_dir Eio.Path.(fs / path)
+
end
+
+
(** Internal loader module *)
+
module Loader = Generic.Make(Eio_io)
+
+
(** Re-export test_case type from loader *)
+
type test_case = Loader.test_case = {
+
id : string;
+
name : string;
+
yaml : string;
+
tree : string option;
+
json : string option;
+
fail : bool;
+
}
+
+
(** Load tests with Eio filesystem context *)
+
let load_directory ~fs path : test_case list = Loader.load_directory fs path
+
+
(** Parallel loading of test directories - load all test IDs concurrently *)
+
let load_directory_parallel ~fs test_suite_path : test_case list =
+
if not (Eio_io.is_directory fs test_suite_path) then []
+
else
+
let entries = Eio_io.read_dir fs test_suite_path in
+
let test_ids = entries
+
|> List.filter (fun e ->
+
Eio_io.is_directory fs (Filename.concat test_suite_path e) &&
+
String.length e >= 4 &&
+
e.[0] >= '0' && e.[0] <= 'Z')
+
|> List.sort String.compare
+
in
+
(* Load each test ID in parallel using fibers *)
+
Eio.Fiber.List.map (fun test_id ->
+
Loader.load_test_id fs test_suite_path test_id
+
) test_ids
+
|> List.concat
+121
tests/test_suite_lib/test_suite_loader_generic.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Generic test suite loader - parameterized by file I/O operations *)
+
+
(** Test case representation *)
+
type test_case = {
+
id : string;
+
name : string;
+
yaml : string;
+
tree : string option;
+
json : string option;
+
fail : bool;
+
}
+
+
(** Module type for file I/O operations *)
+
module type FILE_IO = sig
+
(** Context type for file operations (unit for sync, ~fs for Eio) *)
+
type ctx
+
+
(** Read a file, returning None if it doesn't exist or can't be read *)
+
val read_file : ctx -> string -> string option
+
+
(** Check if a path exists and is a regular file *)
+
val file_exists : ctx -> string -> bool
+
+
(** Check if a path exists and is a directory *)
+
val is_directory : ctx -> string -> bool
+
+
(** List directory entries *)
+
val read_dir : ctx -> string -> string list
+
end
+
+
(** Create a test loader from file I/O operations *)
+
module Make (IO : FILE_IO) = struct
+
type test_case = {
+
id : string;
+
name : string;
+
yaml : string;
+
tree : string option;
+
json : string option;
+
fail : bool;
+
}
+
+
let read_file_required ctx path =
+
match IO.read_file ctx path with
+
| Some s -> s
+
| None -> ""
+
+
(** Load a single test from a directory *)
+
let load_test_dir ctx base_id dir_path =
+
let name_file = Filename.concat dir_path "===" in
+
let yaml_file = Filename.concat dir_path "in.yaml" in
+
let tree_file = Filename.concat dir_path "test.event" in
+
let json_file = Filename.concat dir_path "in.json" in
+
let error_file = Filename.concat dir_path "error" in
+
+
(* Must have in.yaml to be a valid test *)
+
if not (IO.file_exists ctx yaml_file) then None
+
else
+
let name = match IO.read_file ctx name_file with
+
| Some s -> String.trim s
+
| None -> base_id
+
in
+
let yaml = read_file_required ctx yaml_file in
+
let tree = IO.read_file ctx tree_file in
+
let json = IO.read_file ctx json_file in
+
let fail = IO.file_exists ctx error_file in
+
Some { id = base_id; name; yaml; tree; json; fail }
+
+
(** Load tests from a test ID directory (may have subdirectories for variants) *)
+
let load_test_id ctx test_suite_path test_id =
+
let dir_path = Filename.concat test_suite_path test_id in
+
if not (IO.is_directory ctx dir_path) then []
+
else
+
let entries = IO.read_dir ctx dir_path in
+
(* Check if this directory has variant subdirectories (00, 01, etc.) *)
+
let has_variants = List.exists (fun e ->
+
let subdir = Filename.concat dir_path e in
+
IO.is_directory ctx subdir &&
+
String.length e >= 2 &&
+
e.[0] >= '0' && e.[0] <= '9'
+
) entries in
+
+
if has_variants then
+
(* Load each variant subdirectory *)
+
let variants = entries
+
|> List.filter (fun e ->
+
let subdir = Filename.concat dir_path e in
+
IO.is_directory ctx subdir &&
+
String.length e >= 2 &&
+
e.[0] >= '0' && e.[0] <= '9')
+
|> List.sort String.compare
+
in
+
List.filter_map (fun variant ->
+
let variant_path = Filename.concat dir_path variant in
+
let variant_id = Printf.sprintf "%s:%s" test_id variant in
+
load_test_dir ctx variant_id variant_path
+
) variants
+
else
+
(* Single test in this directory *)
+
match load_test_dir ctx test_id dir_path with
+
| Some t -> [t]
+
| None -> []
+
+
(** Load all tests from a test suite directory *)
+
let load_directory ctx test_suite_path =
+
if not (IO.is_directory ctx test_suite_path) then []
+
else
+
let entries = IO.read_dir ctx test_suite_path in
+
let test_ids = entries
+
|> List.filter (fun e ->
+
IO.is_directory ctx (Filename.concat test_suite_path e) &&
+
String.length e >= 4 &&
+
e.[0] >= '0' && e.[0] <= 'Z')
+
|> List.sort String.compare
+
in
+
List.concat_map (load_test_id ctx test_suite_path) test_ids
+
end
+74
tests/test_suite_lib/tree_format.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(* Format parser events as tree notation compatible with yaml-test-suite *)
+
+
open Yamlrw
+
+
let escape_string s =
+
let buf = Buffer.create (String.length s * 2) in
+
String.iter (fun c ->
+
match c with
+
| '\n' -> Buffer.add_string buf "\\n"
+
| '\t' -> Buffer.add_string buf "\\t"
+
| '\r' -> Buffer.add_string buf "\\r"
+
| '\\' -> Buffer.add_string buf "\\\\"
+
| '\x00' -> Buffer.add_string buf "\\0"
+
| '\x07' -> Buffer.add_string buf "\\a"
+
| '\x08' -> Buffer.add_string buf "\\b"
+
| '\x0b' -> Buffer.add_string buf "\\v"
+
| '\x0c' -> Buffer.add_string buf "\\f"
+
| '\x1b' -> Buffer.add_string buf "\\e"
+
| '\xa0' -> Buffer.add_string buf "\\_"
+
| c -> Buffer.add_char buf c
+
) s;
+
Buffer.contents buf
+
+
let style_char = function
+
| `Plain -> ':'
+
| `Single_quoted -> '\''
+
| `Double_quoted -> '"'
+
| `Literal -> '|'
+
| `Folded -> '>'
+
| `Any -> ':'
+
+
let format_event { Event.event; span = _span } =
+
match event with
+
| Event.Stream_start _ -> "+STR"
+
| Event.Stream_end -> "-STR"
+
| Event.Document_start { implicit; _ } ->
+
if implicit then "+DOC"
+
else "+DOC ---"
+
| Event.Document_end { implicit } ->
+
if implicit then "-DOC"
+
else "-DOC ..."
+
| Event.Mapping_start { anchor; tag; style; _ } ->
+
let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in
+
let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in
+
let flow_str = match style with `Flow -> " {}" | _ -> "" in
+
Printf.sprintf "+MAP%s%s%s" flow_str anchor_str tag_str
+
| Event.Mapping_end -> "-MAP"
+
| Event.Sequence_start { anchor; tag; style; _ } ->
+
let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in
+
let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in
+
let flow_str = match style with `Flow -> " []" | _ -> "" in
+
Printf.sprintf "+SEQ%s%s%s" flow_str anchor_str tag_str
+
| Event.Sequence_end -> "-SEQ"
+
| Event.Scalar { anchor; tag; value; style; _ } ->
+
let anchor_str = match anchor with Some a -> " &" ^ a | None -> "" in
+
let tag_str = match tag with Some t -> " <" ^ t ^ ">" | None -> "" in
+
let style_c = style_char style in
+
Printf.sprintf "=VAL%s%s %c%s" anchor_str tag_str style_c (escape_string value)
+
| Event.Alias { anchor } ->
+
Printf.sprintf "=ALI *%s" anchor
+
+
let of_spanned_events events =
+
let buf = Buffer.create 256 in
+
List.iter (fun (e : Event.spanned) ->
+
let line = format_event e in
+
Buffer.add_string buf line;
+
Buffer.add_char buf '\n'
+
) events;
+
Buffer.contents buf
+358
tests/test_yamlrw.ml
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Tests for the Yamlrw library *)
+
+
open Yamlrw
+
+
(** Test helpers *)
+
+
let check_value msg expected actual =
+
Alcotest.(check bool) msg true (Value.equal expected actual)
+
+
let _check_string msg expected actual =
+
Alcotest.(check string) msg expected actual
+
+
(** Scanner tests *)
+
+
let test_scanner_simple () =
+
let scanner = Scanner.of_string "hello: world" in
+
let tokens = Scanner.to_list scanner in
+
let token_types = List.map (fun (t : Token.spanned) -> t.token) tokens in
+
Alcotest.(check int) "token count" 8 (List.length token_types);
+
(* Stream_start, Block_mapping_start, Key, Scalar, Value, Scalar, Block_end, Stream_end *)
+
match token_types with
+
| Token.Stream_start _ :: Token.Block_mapping_start :: Token.Key ::
+
Token.Scalar { value = "hello"; _ } :: Token.Value ::
+
Token.Scalar { value = "world"; _ } :: Token.Block_end :: Token.Stream_end :: [] ->
+
()
+
| _ ->
+
Alcotest.fail "unexpected token sequence"
+
+
let test_scanner_sequence () =
+
let scanner = Scanner.of_string "- one\n- two\n- three" in
+
let tokens = Scanner.to_list scanner in
+
Alcotest.(check bool) "has tokens" true (List.length tokens > 0)
+
+
let test_scanner_flow () =
+
let scanner = Scanner.of_string "[1, 2, 3]" in
+
let tokens = Scanner.to_list scanner in
+
let has_flow_start = List.exists (fun (t : Token.spanned) ->
+
match t.token with Token.Flow_sequence_start -> true | _ -> false
+
) tokens in
+
Alcotest.(check bool) "has flow sequence start" true has_flow_start
+
+
let scanner_tests = [
+
"simple mapping", `Quick, test_scanner_simple;
+
"sequence", `Quick, test_scanner_sequence;
+
"flow sequence", `Quick, test_scanner_flow;
+
]
+
+
(** Parser tests *)
+
+
let test_parser_events () =
+
let parser = Parser.of_string "key: value" in
+
let events = Parser.to_list parser in
+
Alcotest.(check bool) "has events" true (List.length events > 0);
+
let has_stream_start = List.exists (fun (e : Event.spanned) ->
+
match e.event with Event.Stream_start _ -> true | _ -> false
+
) events in
+
Alcotest.(check bool) "has stream start" true has_stream_start
+
+
let test_parser_sequence_events () =
+
let parser = Parser.of_string "- a\n- b" in
+
let events = Parser.to_list parser in
+
let has_seq_start = List.exists (fun (e : Event.spanned) ->
+
match e.event with Event.Sequence_start _ -> true | _ -> false
+
) events in
+
Alcotest.(check bool) "has sequence start" true has_seq_start
+
+
let parser_tests = [
+
"parse events", `Quick, test_parser_events;
+
"sequence events", `Quick, test_parser_sequence_events;
+
]
+
+
(** Value parsing tests *)
+
+
let test_parse_null () =
+
check_value "null" `Null (of_string "null");
+
check_value "~" `Null (of_string "~");
+
check_value "empty" `Null (of_string "")
+
+
let test_parse_bool () =
+
check_value "true" (`Bool true) (of_string "true");
+
check_value "false" (`Bool false) (of_string "false");
+
check_value "yes" (`Bool true) (of_string "yes");
+
check_value "no" (`Bool false) (of_string "no")
+
+
let test_parse_number () =
+
check_value "integer" (`Float 42.0) (of_string "42");
+
check_value "negative" (`Float (-17.0)) (of_string "-17");
+
check_value "float" (`Float 3.14) (of_string "3.14")
+
+
let test_parse_string () =
+
check_value "plain" (`String "hello") (of_string "hello world" |> function `String s -> `String (String.sub s 0 5) | v -> v);
+
check_value "quoted" (`String "hello") (of_string {|"hello"|})
+
+
let test_parse_sequence () =
+
let result = of_string "- one\n- two\n- three" in
+
match result with
+
| `A [_; _; _] -> ()
+
| _ -> Alcotest.fail "expected sequence with 3 elements"
+
+
let test_parse_mapping () =
+
let result = of_string "name: Alice\nage: 30" in
+
match result with
+
| `O pairs when List.length pairs = 2 -> ()
+
| _ -> Alcotest.fail "expected mapping with 2 pairs"
+
+
let test_parse_nested () =
+
let yaml = {|
+
person:
+
name: Bob
+
hobbies:
+
- reading
+
- coding
+
|} in
+
let result = of_string yaml in
+
match result with
+
| `O [("person", `O _)] -> ()
+
| _ -> Alcotest.fail "expected nested structure"
+
+
let test_parse_flow_sequence () =
+
let result = of_string "[1, 2, 3]" in
+
match result with
+
| `A [`Float 1.0; `Float 2.0; `Float 3.0] -> ()
+
| _ -> Alcotest.fail "expected flow sequence [1, 2, 3]"
+
+
let test_parse_flow_mapping () =
+
let result = of_string "{a: 1, b: 2}" in
+
match result with
+
| `O [("a", `Float 1.0); ("b", `Float 2.0)] -> ()
+
| _ -> Alcotest.fail "expected flow mapping {a: 1, b: 2}"
+
+
let test_parse_flow_mapping_trailing_comma () =
+
let result = of_string "{ a: 1, }" in
+
match result with
+
| `O [("a", `Float 1.0)] -> ()
+
| `O pairs ->
+
Alcotest.failf "expected 1 pair but got %d pairs (trailing comma should not create empty entry)"
+
(List.length pairs)
+
| _ -> Alcotest.fail "expected flow mapping with 1 pair"
+
+
let value_tests = [
+
"parse null", `Quick, test_parse_null;
+
"parse bool", `Quick, test_parse_bool;
+
"parse number", `Quick, test_parse_number;
+
"parse string", `Quick, test_parse_string;
+
"parse sequence", `Quick, test_parse_sequence;
+
"parse mapping", `Quick, test_parse_mapping;
+
"parse nested", `Quick, test_parse_nested;
+
"parse flow sequence", `Quick, test_parse_flow_sequence;
+
"parse flow mapping", `Quick, test_parse_flow_mapping;
+
"flow mapping trailing comma", `Quick, test_parse_flow_mapping_trailing_comma;
+
]
+
+
(** Emitter tests *)
+
+
let test_emit_null () =
+
let result = to_string `Null in
+
Alcotest.(check bool) "contains null" true (String.length result > 0)
+
+
let starts_with prefix s =
+
String.length s >= String.length prefix &&
+
String.sub s 0 (String.length prefix) = prefix
+
+
let test_emit_mapping () =
+
let value = `O [("name", `String "Alice"); ("age", `Float 30.0)] in
+
let result = to_string value in
+
let trimmed = String.trim result in
+
Alcotest.(check bool) "contains name" true (starts_with "name" trimmed || starts_with "\"name\"" trimmed)
+
+
let test_roundtrip_simple () =
+
let yaml = "name: Alice" in
+
let value = of_string yaml in
+
let _ = to_string value in
+
(* Just check it doesn't crash *)
+
()
+
+
let test_roundtrip_sequence () =
+
let yaml = "- one\n- two\n- three" in
+
let value = of_string yaml in
+
match value with
+
| `A items when List.length items = 3 ->
+
let _ = to_string value in
+
()
+
| _ -> Alcotest.fail "roundtrip failed"
+
+
let emitter_tests = [
+
"emit null", `Quick, test_emit_null;
+
"emit mapping", `Quick, test_emit_mapping;
+
"roundtrip simple", `Quick, test_roundtrip_simple;
+
"roundtrip sequence", `Quick, test_roundtrip_sequence;
+
]
+
+
(** YAML-specific tests *)
+
+
let test_yaml_anchor () =
+
let yaml = "&anchor hello" in
+
let result = yaml_of_string yaml in
+
match result with
+
| `Scalar s when Scalar.anchor s = Some "anchor" -> ()
+
| _ -> Alcotest.fail "expected scalar with anchor"
+
+
let test_yaml_alias () =
+
let yaml = {|
+
defaults: &defaults
+
timeout: 30
+
production:
+
<<: *defaults
+
port: 8080
+
|} in
+
(* Just check it parses without error *)
+
let _ = yaml_of_string yaml in
+
()
+
+
let yaml_tests = [
+
"yaml anchor", `Quick, test_yaml_anchor;
+
"yaml alias", `Quick, test_yaml_alias;
+
]
+
+
(** Multiline scalar tests *)
+
+
let test_literal_block () =
+
let yaml = {|description: |
+
This is a
+
multi-line
+
description
+
|} in
+
let result = of_string yaml in
+
match result with
+
| `O [("description", `String _)] -> ()
+
| _ -> Alcotest.fail "expected mapping with literal block"
+
+
let test_folded_block () =
+
let yaml = {|description: >
+
This is a
+
folded
+
description
+
|} in
+
let result = of_string yaml in
+
match result with
+
| `O [("description", `String _)] -> ()
+
| _ -> Alcotest.fail "expected mapping with folded block"
+
+
let multiline_tests = [
+
"literal block", `Quick, test_literal_block;
+
"folded block", `Quick, test_folded_block;
+
]
+
+
(** Error handling tests *)
+
+
let test_error_position () =
+
try
+
let _ = of_string "key: [unclosed" in
+
Alcotest.fail "expected error"
+
with
+
| Yamlrw_error e ->
+
Alcotest.(check bool) "has span" true (e.span <> None)
+
+
let error_tests = [
+
"error position", `Quick, test_error_position;
+
]
+
+
(** Alias expansion limit tests (billion laughs protection) *)
+
+
let test_node_limit () =
+
(* Small bomb that would expand to 9^4 = 6561 nodes *)
+
let yaml = {|
+
a: &a [1,2,3,4,5,6,7,8,9]
+
b: &b [*a,*a,*a,*a,*a,*a,*a,*a,*a]
+
c: &c [*b,*b,*b,*b,*b,*b,*b,*b,*b]
+
d: &d [*c,*c,*c,*c,*c,*c,*c,*c,*c]
+
|} in
+
(* Should fail with a small node limit *)
+
try
+
let _ = of_string ~max_nodes:100 yaml in
+
Alcotest.fail "expected node limit error"
+
with
+
| Yamlrw_error e ->
+
(match e.Error.kind with
+
| Error.Alias_expansion_node_limit _ -> ()
+
| _ -> Alcotest.fail "expected Alias_expansion_node_limit error")
+
+
let test_depth_limit () =
+
(* Create deeply nested alias chain:
+
*e -> [*d,*d] -> [*c,*c] -> [*b,*b] -> [*a,*a] -> [x,y,z]
+
Each alias resolution increases depth by 1 *)
+
let yaml = {|
+
a: &a [x, y, z]
+
b: &b [*a, *a]
+
c: &c [*b, *b]
+
d: &d [*c, *c]
+
e: &e [*d, *d]
+
result: *e
+
|} in
+
(* Should fail with a small depth limit (depth 3 means max 3 alias hops) *)
+
try
+
let _ = of_string ~max_depth:3 yaml in
+
Alcotest.fail "expected depth limit error"
+
with
+
| Yamlrw_error e ->
+
(match e.Error.kind with
+
| Error.Alias_expansion_depth_limit _ -> ()
+
| _ -> Alcotest.fail ("expected Alias_expansion_depth_limit error, got: " ^ Error.kind_to_string e.Error.kind))
+
+
let test_normal_aliases_work () =
+
(* Normal alias usage should work fine *)
+
let yaml = {|
+
defaults: &defaults
+
timeout: 30
+
retries: 3
+
production:
+
<<: *defaults
+
port: 8080
+
|} in
+
let result = of_string yaml in
+
match result with
+
| `O _ -> ()
+
| _ -> Alcotest.fail "expected mapping"
+
+
let test_resolve_aliases_false () =
+
(* With resolve_aliases=false, aliases should remain unresolved *)
+
let yaml = {|
+
a: &anchor value
+
b: *anchor
+
|} in
+
let result = yaml_of_string ~resolve_aliases:false yaml in
+
(* Check that alias is preserved *)
+
match result with
+
| `O map ->
+
let pairs = Mapping.members map in
+
(match List.assoc_opt (`Scalar (Scalar.make "b")) pairs with
+
| Some (`Alias "anchor") -> ()
+
| _ -> Alcotest.fail "expected alias to be preserved")
+
| _ -> Alcotest.fail "expected mapping"
+
+
let alias_limit_tests = [
+
"node limit", `Quick, test_node_limit;
+
"depth limit", `Quick, test_depth_limit;
+
"normal aliases work", `Quick, test_normal_aliases_work;
+
"resolve_aliases false", `Quick, test_resolve_aliases_false;
+
]
+
+
(** Run all tests *)
+
+
let () =
+
Alcotest.run "yamlrw" [
+
"scanner", scanner_tests;
+
"parser", parser_tests;
+
"value", value_tests;
+
"emitter", emitter_tests;
+
"yaml", yaml_tests;
+
"multiline", multiline_tests;
+
"errors", error_tests;
+
"alias_limits", alias_limit_tests;
+
]
+33
yamlrw-eio.opam
···
+
# This file is generated by dune, edit dune-project instead
+
opam-version: "2.0"
+
synopsis: "Eio support for Yamlrw"
+
description:
+
"Eio-based streaming I/O for Yamlrw. Provides efficient async YAML parsing and emission using the Eio effects-based concurrency library. Requires OCaml 5.0 or later."
+
maintainer: ["Anil Madhavapeddy <anil@recoil.org>"]
+
authors: ["Anil Madhavapeddy"]
+
license: "ISC"
+
homepage: "https://tangled.org/@anil.recoil.org/ocaml-yamlrw"
+
bug-reports: "https://tangled.org/@anil.recoil.org/ocaml-yamlrw/issues"
+
depends: [
+
"dune" {>= "3.18"}
+
"ocaml" {>= "5.0.0"}
+
"yamlrw"
+
"bytesrw-eio"
+
"eio" {>= "1.1"}
+
"odoc" {with-doc}
+
]
+
build: [
+
["dune" "subst"] {dev}
+
[
+
"dune"
+
"build"
+
"-p"
+
name
+
"-j"
+
jobs
+
"@install"
+
"@runtest" {with-test}
+
"@doc" {with-doc}
+
]
+
]
+
x-maintenance-intent: ["(latest)"]
+32
yamlrw-unix.opam
···
+
# This file is generated by dune, edit dune-project instead
+
opam-version: "2.0"
+
synopsis: "Unix I/O for Yamlrw"
+
description:
+
"Unix file and channel operations for Yamlrw. Provides convenient functions for reading and writing YAML files using Unix I/O."
+
maintainer: ["Anil Madhavapeddy <anil@recoil.org>"]
+
authors: ["Anil Madhavapeddy"]
+
license: "ISC"
+
homepage: "https://tangled.org/@anil.recoil.org/ocaml-yamlrw"
+
bug-reports: "https://tangled.org/@anil.recoil.org/ocaml-yamlrw/issues"
+
depends: [
+
"dune" {>= "3.18"}
+
"ocaml" {>= "4.14.0"}
+
"yamlrw"
+
"bytesrw"
+
"odoc" {with-doc}
+
]
+
build: [
+
["dune" "subst"] {dev}
+
[
+
"dune"
+
"build"
+
"-p"
+
name
+
"-j"
+
jobs
+
"@install"
+
"@runtest" {with-test}
+
"@doc" {with-doc}
+
]
+
]
+
x-maintenance-intent: ["(latest)"]
+33
yamlrw.opam
···
+
# This file is generated by dune, edit dune-project instead
+
opam-version: "2.0"
+
synopsis: "Pure OCaml YAML 1.2 parser and emitter"
+
description:
+
"Yamlrw is a pure OCaml implementation of YAML 1.2 parsing and emission. It provides both a high-level JSON-compatible interface for simple data interchange and a lower-level streaming API for fine-grained control over parsing and emission. The library works on all OCaml platforms without C dependencies."
+
maintainer: ["Anil Madhavapeddy <anil@recoil.org>"]
+
authors: ["Anil Madhavapeddy"]
+
license: "ISC"
+
homepage: "https://tangled.org/@anil.recoil.org/ocaml-yamlrw"
+
bug-reports: "https://tangled.org/@anil.recoil.org/ocaml-yamlrw/issues"
+
depends: [
+
"dune" {>= "3.18"}
+
"ocaml" {>= "4.14.0"}
+
"bytesrw"
+
"cmdliner"
+
"odoc" {with-doc}
+
"alcotest" {with-test}
+
]
+
build: [
+
["dune" "subst"] {dev}
+
[
+
"dune"
+
"build"
+
"-p"
+
name
+
"-j"
+
jobs
+
"@install"
+
"@runtest" {with-test}
+
"@doc" {with-doc}
+
]
+
]
+
x-maintenance-intent: ["(latest)"]