Pure OCaml Yaml 1.2 reader and writer using Bytesrw

mli files and cleanups

+33
lib/char_class.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Character classification for YAML parsing *)
+
+
val is_break : char -> bool
+
(** Line break characters (\n or \r) *)
+
+
val is_blank : char -> bool
+
(** Blank (space or tab) *)
+
+
val is_whitespace : char -> bool
+
(** Whitespace (break or blank) *)
+
+
val is_digit : char -> bool
+
(** Decimal digit *)
+
+
val is_hex : char -> bool
+
(** Hexadecimal digit *)
+
+
val is_alpha : char -> bool
+
(** Alphabetic character *)
+
+
val is_alnum : char -> bool
+
(** Alphanumeric character *)
+
+
val is_indicator : char -> bool
+
(** YAML indicator characters *)
+
+
val is_flow_indicator : char -> bool
+
(** Flow context indicator characters (comma and brackets) *)
+26
lib/chomping.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Block scalar chomping indicators *)
+
+
type t =
+
| Strip (** Remove final line break and trailing empty lines *)
+
| Clip (** Keep final line break, remove trailing empty lines (default) *)
+
| Keep (** Keep final line break and trailing empty lines *)
+
+
val to_string : t -> string
+
(** Convert chomping mode to string *)
+
+
val pp : Format.formatter -> t -> unit
+
(** Pretty-print a chomping mode *)
+
+
val of_char : char -> t option
+
(** Parse chomping indicator from character *)
+
+
val to_char : t -> char option
+
(** Convert chomping mode to indicator character (None for Clip) *)
+
+
val equal : t -> t -> bool
+
(** Test equality of two chomping modes *)
+2 -2
lib/document.ml
···
Format.fprintf fmt "@]@,)"
let equal a b =
-
Option.equal (fun (a1, a2) (b1, b2) -> a1 = b1 && a2 = b2) a.version b.version &&
-
List.equal (fun (h1, p1) (h2, p2) -> h1 = h2 && p1 = p2) a.tags b.tags &&
+
Option.equal ( = ) a.version b.version &&
+
List.equal ( = ) a.tags b.tags &&
Option.equal Yaml.equal a.root b.root &&
a.implicit_start = b.implicit_start &&
a.implicit_end = b.implicit_end
+41
lib/document.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML document with directives and content *)
+
+
type t = {
+
version : (int * int) option;
+
tags : (string * string) list;
+
root : Yaml.t option;
+
implicit_start : bool;
+
implicit_end : bool;
+
}
+
+
val make :
+
?version:(int * int) ->
+
?tags:(string * string) list ->
+
?implicit_start:bool ->
+
?implicit_end:bool ->
+
Yaml.t option -> t
+
(** Create a document *)
+
+
(** {2 Accessors} *)
+
+
val version : t -> (int * int) option
+
val tags : t -> (string * string) list
+
val root : t -> Yaml.t option
+
val implicit_start : t -> bool
+
val implicit_end : t -> bool
+
+
(** {2 Modifiers} *)
+
+
val with_version : int * int -> t -> t
+
val with_tags : (string * string) list -> t -> t
+
val with_root : Yaml.t -> t -> t
+
+
(** {2 Comparison} *)
+
+
val pp : Format.formatter -> t -> unit
+
val equal : t -> t -> bool
+1
lib/dune
···
(name yamlrw)
(public_name yamlrw)
(libraries bytesrw)
+
(flags (:standard -w -37-69))
(modules
; Core types
position
+62
lib/emitter.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Emitter - converts YAML data structures to string output
+
+
The emitter can write to either a Buffer (default) or directly to a
+
bytesrw Bytes.Writer for streaming output. *)
+
+
(** {1 Configuration} *)
+
+
type config = {
+
encoding : Encoding.t;
+
scalar_style : Scalar_style.t;
+
layout_style : Layout_style.t;
+
indent : int;
+
width : int;
+
canonical : bool;
+
}
+
+
val default_config : config
+
(** Default emitter configuration *)
+
+
(** {1 Emitter Type} *)
+
+
type t
+
+
(** {1 Constructors} *)
+
+
val create : ?config:config -> unit -> t
+
(** Create an emitter that writes to an internal buffer *)
+
+
val of_writer : ?config:config -> Bytesrw.Bytes.Writer.t -> t
+
(** Create an emitter that writes directly to a Bytes.Writer *)
+
+
(** {1 Output} *)
+
+
val contents : t -> string
+
(** Get accumulated output. Returns empty string for writer-based emitters. *)
+
+
val reset : t -> unit
+
(** Reset emitter state and clear buffer *)
+
+
val buffer : t -> Buffer.t option
+
(** Access underlying buffer (None for writer-based emitters) *)
+
+
val flush : t -> unit
+
(** Flush writer sink (no-op for buffer-based emitters) *)
+
+
(** {1 Event Emission} *)
+
+
val emit : t -> Event.t -> unit
+
(** Emit a single event *)
+
+
(** {1 Accessors} *)
+
+
val config : t -> config
+
(** Get emitter configuration *)
+
+
val is_streaming : t -> bool
+
(** Check if emitter is writing to a Writer (vs buffer) *)
+27
lib/encoding.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Character encoding detection and handling *)
+
+
type t = [
+
| `Utf8
+
| `Utf16be
+
| `Utf16le
+
| `Utf32be
+
| `Utf32le
+
]
+
+
val to_string : t -> string
+
(** Convert encoding to string representation *)
+
+
val pp : Format.formatter -> t -> unit
+
(** Pretty-print an encoding *)
+
+
val detect : string -> t * int
+
(** Detect encoding from BOM or first bytes.
+
Returns (encoding, bom_length) *)
+
+
val equal : t -> t -> bool
+
(** Test equality of two encodings *)
+120
lib/error.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** {1 Error Handling}
+
+
Comprehensive error reporting for YAML parsing and emission.
+
+
This module provides detailed error types that correspond to various
+
failure modes in YAML processing, as specified in the
+
{{:https://yaml.org/spec/1.2.2/}YAML 1.2.2 specification}. *)
+
+
(** {2 Error Classification} *)
+
+
type kind =
+
(* Scanner errors *)
+
| Unexpected_character of char
+
| Unexpected_eof
+
| Invalid_escape_sequence of string
+
| Invalid_unicode_escape of string
+
| Invalid_hex_escape of string
+
| Invalid_tag of string
+
| Invalid_anchor of string
+
| Invalid_alias of string
+
| Invalid_comment
+
| Unclosed_single_quote
+
| Unclosed_double_quote
+
| Unclosed_flow_sequence
+
| Unclosed_flow_mapping
+
| Invalid_indentation of int * int
+
| Invalid_flow_indentation
+
| Tab_in_indentation
+
| Invalid_block_scalar_header of string
+
| Invalid_quoted_scalar_indentation of string
+
| Invalid_directive of string
+
| Invalid_yaml_version of string
+
| Invalid_tag_directive of string
+
| Reserved_directive of string
+
| Illegal_flow_key_line
+
| Block_sequence_disallowed
+
+
(* Parser errors *)
+
| Unexpected_token of string
+
| Expected_document_start
+
| Expected_document_end
+
| Expected_block_entry
+
| Expected_key
+
| Expected_value
+
| Expected_node
+
| Expected_scalar
+
| Expected_sequence_end
+
| Expected_mapping_end
+
| Duplicate_anchor of string
+
| Undefined_alias of string
+
| Alias_cycle of string
+
| Multiple_documents
+
| Mapping_key_too_long
+
+
(* Loader errors *)
+
| Invalid_scalar_conversion of string * string
+
| Type_mismatch of string * string
+
| Unresolved_alias of string
+
| Key_not_found of string
+
| Alias_expansion_node_limit of int
+
| Alias_expansion_depth_limit of int
+
+
(* Emitter errors *)
+
| Invalid_encoding of string
+
| Scalar_contains_invalid_chars of string
+
| Anchor_not_set
+
| Invalid_state of string
+
+
(* Generic *)
+
| Custom of string
+
+
(** {2 Error Value} *)
+
+
type t = {
+
kind : kind;
+
span : Span.t option;
+
context : string list;
+
source : string option;
+
}
+
+
(** {2 Exception} *)
+
+
exception Yamlrw_error of t
+
(** The main exception type raised by all yamlrw operations. *)
+
+
(** {2 Error Construction} *)
+
+
val make : ?span:Span.t -> ?context:string list -> ?source:string -> kind -> t
+
(** Construct an error value. *)
+
+
val raise : ?span:Span.t -> ?context:string list -> ?source:string -> kind -> 'a
+
(** Construct and raise an error. *)
+
+
val raise_at : Position.t -> kind -> 'a
+
(** Raise an error at a specific position. *)
+
+
val raise_span : Span.t -> kind -> 'a
+
(** Raise an error at a specific span. *)
+
+
val with_context : string -> (unit -> 'a) -> 'a
+
(** Execute a function and add context to any raised error. *)
+
+
(** {2 Error Formatting} *)
+
+
val kind_to_string : kind -> string
+
(** Convert an error kind to a human-readable string. *)
+
+
val to_string : t -> string
+
(** Convert an error to a human-readable string. *)
+
+
val pp : Format.formatter -> t -> unit
+
(** Pretty-print an error. *)
+
+
val pp_with_source : source:string -> Format.formatter -> t -> unit
+
(** Pretty-print an error with source context. *)
+10 -15
lib/event.ml
···
span : Span.t;
}
+
let pp_opt_str = Option.value ~default:"none"
+
let pp fmt = function
| Stream_start { encoding } ->
Format.fprintf fmt "stream-start(%a)" Encoding.pp encoding
| Stream_end ->
Format.fprintf fmt "stream-end"
| Document_start { version; implicit } ->
-
Format.fprintf fmt "document-start(version=%s, implicit=%b)"
-
(match version with None -> "none" | Some (maj, min) -> Printf.sprintf "%d.%d" maj min)
-
implicit
+
let version_str = match version with
+
| None -> "none"
+
| Some (maj, min) -> Printf.sprintf "%d.%d" maj min
+
in
+
Format.fprintf fmt "document-start(version=%s, implicit=%b)" version_str implicit
| Document_end { implicit } ->
Format.fprintf fmt "document-end(implicit=%b)" implicit
| Alias { anchor } ->
Format.fprintf fmt "alias(%s)" anchor
| Scalar { anchor; tag; value; style; _ } ->
Format.fprintf fmt "scalar(anchor=%s, tag=%s, style=%a, value=%S)"
-
(Option.value anchor ~default:"none")
-
(Option.value tag ~default:"none")
-
Scalar_style.pp style
-
value
+
(pp_opt_str anchor) (pp_opt_str tag) Scalar_style.pp style value
| Sequence_start { anchor; tag; implicit; style } ->
Format.fprintf fmt "sequence-start(anchor=%s, tag=%s, implicit=%b, style=%a)"
-
(Option.value anchor ~default:"none")
-
(Option.value tag ~default:"none")
-
implicit
-
Layout_style.pp style
+
(pp_opt_str anchor) (pp_opt_str tag) implicit Layout_style.pp style
| Sequence_end ->
Format.fprintf fmt "sequence-end"
| Mapping_start { anchor; tag; implicit; style } ->
Format.fprintf fmt "mapping-start(anchor=%s, tag=%s, implicit=%b, style=%a)"
-
(Option.value anchor ~default:"none")
-
(Option.value tag ~default:"none")
-
implicit
-
Layout_style.pp style
+
(pp_opt_str anchor) (pp_opt_str tag) implicit Layout_style.pp style
| Mapping_end ->
Format.fprintf fmt "mapping-end"
+49
lib/event.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML parser events *)
+
+
type t =
+
| Stream_start of { encoding : Encoding.t }
+
| Stream_end
+
| Document_start of {
+
version : (int * int) option;
+
implicit : bool;
+
}
+
| Document_end of { implicit : bool }
+
| Alias of { anchor : string }
+
| Scalar of {
+
anchor : string option;
+
tag : string option;
+
value : string;
+
plain_implicit : bool;
+
quoted_implicit : bool;
+
style : Scalar_style.t;
+
}
+
| Sequence_start of {
+
anchor : string option;
+
tag : string option;
+
implicit : bool;
+
style : Layout_style.t;
+
}
+
| Sequence_end
+
| Mapping_start of {
+
anchor : string option;
+
tag : string option;
+
implicit : bool;
+
style : Layout_style.t;
+
}
+
| Mapping_end
+
+
type spanned = {
+
event : t;
+
span : Span.t;
+
}
+
+
val pp : Format.formatter -> t -> unit
+
(** Pretty-print an event *)
+
+
val pp_spanned : Format.formatter -> spanned -> unit
+
(** Pretty-print a spanned event *)
+97
lib/input.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Character input source with lookahead, based on Bytes.Reader.t
+
+
This module wraps a bytesrw [Bytes.Reader.t] to provide character-by-character
+
access with lookahead for the YAML scanner. *)
+
+
(** {2 Re-exported Character Classification} *)
+
+
include module type of Char_class
+
+
(** {2 Input Type} *)
+
+
type t
+
+
(** {2 Constructors} *)
+
+
val of_reader : ?initial_position:Position.t -> Bytesrw.Bytes.Reader.t -> t
+
(** Create input from a Bytes.Reader.t *)
+
+
val of_string : string -> t
+
(** Create input from a string *)
+
+
(** {2 Position and State} *)
+
+
val position : t -> Position.t
+
(** Get current position *)
+
+
val is_eof : t -> bool
+
(** Check if at end of input *)
+
+
val mark : t -> Position.t
+
(** Mark current position for span creation *)
+
+
(** {2 Lookahead} *)
+
+
val peek : t -> char option
+
(** Peek at current character without advancing *)
+
+
val peek_exn : t -> char
+
(** Peek at current character, raising on EOF *)
+
+
val peek_nth : t -> int -> char option
+
(** Peek at nth character (0-indexed from current position) *)
+
+
val peek_string : t -> int -> string
+
(** Peek at up to n characters as a string *)
+
+
val peek_back : t -> char option
+
(** Get the character before the current position *)
+
+
(** {2 Consumption} *)
+
+
val next : t -> char option
+
(** Consume and return next character *)
+
+
val next_exn : t -> char
+
(** Consume and return next character, raising on EOF *)
+
+
val skip : t -> int -> unit
+
(** Skip n characters *)
+
+
val skip_while : t -> (char -> bool) -> unit
+
(** Skip characters while predicate holds *)
+
+
val consume_break : t -> unit
+
(** Consume line break, handling \r\n as single break *)
+
+
(** {2 Predicates} *)
+
+
val next_is : (char -> bool) -> t -> bool
+
(** Check if next char satisfies predicate *)
+
+
val next_is_break : t -> bool
+
val next_is_blank : t -> bool
+
val next_is_whitespace : t -> bool
+
val next_is_digit : t -> bool
+
val next_is_hex : t -> bool
+
val next_is_alpha : t -> bool
+
val next_is_indicator : t -> bool
+
+
val at_document_boundary : t -> bool
+
(** Check if at document boundary (--- or ...) *)
+
+
(** {2 Utilities} *)
+
+
val remaining : t -> string
+
(** Get remaining content from current position *)
+
+
val source : t -> string
+
(** Get a sample of the source for encoding detection *)
+
+
val byte_pos : t -> int
+
(** Get the byte position in the underlying stream *)
+24
lib/layout_style.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Collection layout styles *)
+
+
type t = [
+
| `Any (** Let emitter choose *)
+
| `Block (** Indentation-based *)
+
| `Flow (** Inline with brackets *)
+
]
+
+
val to_string : t -> string
+
(** Convert style to string representation *)
+
+
val pp : Format.formatter -> t -> unit
+
(** Pretty-print a style *)
+
+
val equal : t -> t -> bool
+
(** Test equality of two styles *)
+
+
val compare : t -> t -> int
+
(** Compare two styles *)
+58 -87
lib/loader.ml
···
pending_key = None;
} :: rest)
+
(** Internal: parse all documents from a parser *)
+
let parse_all_documents parser =
+
let state = create_state () in
+
Parser.iter (process_event state) parser;
+
List.rev state.documents
+
+
(** Internal: extract single document or raise *)
+
let single_document_or_error docs ~empty =
+
match docs with
+
| [] -> empty
+
| [doc] -> doc
+
| _ -> Error.raise Multiple_documents
+
(** Load single document as Value.
@param resolve_aliases Whether to resolve aliases (default true)
···
?(max_nodes = Yaml.default_max_alias_nodes)
?(max_depth = Yaml.default_max_alias_depth)
s =
-
let parser = Parser.of_string s in
-
let state = create_state () in
-
Parser.iter (process_event state) parser;
-
match state.documents with
-
| [] -> `Null
-
| [doc] ->
-
(match Document.root doc with
-
| None -> `Null
-
| Some yaml ->
-
Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml)
-
| _ -> Error.raise Multiple_documents
+
let docs = parse_all_documents (Parser.of_string s) in
+
let doc = single_document_or_error docs ~empty:(Document.make None) in
+
match Document.root doc with
+
| None -> `Null
+
| Some yaml ->
+
Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml
(** Load single document as Yaml.
···
?(max_nodes = Yaml.default_max_alias_nodes)
?(max_depth = Yaml.default_max_alias_depth)
s =
-
let parser = Parser.of_string s in
-
let state = create_state () in
-
Parser.iter (process_event state) parser;
-
match state.documents with
-
| [] -> `Scalar (Scalar.make "")
-
| [doc] ->
-
(match Document.root doc with
-
| None -> `Scalar (Scalar.make "")
-
| Some yaml ->
-
if resolve_aliases then
-
Yaml.resolve_aliases ~max_nodes ~max_depth yaml
-
else
-
yaml)
-
| _ -> Error.raise Multiple_documents
+
let docs = parse_all_documents (Parser.of_string s) in
+
let doc = single_document_or_error docs ~empty:(Document.make None) in
+
match Document.root doc with
+
| None -> `Scalar (Scalar.make "")
+
| Some yaml ->
+
if resolve_aliases then
+
Yaml.resolve_aliases ~max_nodes ~max_depth yaml
+
else
+
yaml
(** Load all documents *)
let documents_of_string s =
-
let parser = Parser.of_string s in
-
let state = create_state () in
-
Parser.iter (process_event state) parser;
-
List.rev state.documents
+
parse_all_documents (Parser.of_string s)
(** {2 Reader-based loading} *)
···
?(max_nodes = Yaml.default_max_alias_nodes)
?(max_depth = Yaml.default_max_alias_depth)
reader =
-
let parser = Parser.of_reader reader in
-
let state = create_state () in
-
Parser.iter (process_event state) parser;
-
match state.documents with
-
| [] -> `Null
-
| [doc] ->
-
(match Document.root doc with
-
| None -> `Null
-
| Some yaml ->
-
Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml)
-
| _ -> Error.raise Multiple_documents
+
let docs = parse_all_documents (Parser.of_reader reader) in
+
let doc = single_document_or_error docs ~empty:(Document.make None) in
+
match Document.root doc with
+
| None -> `Null
+
| Some yaml ->
+
Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml
(** Load single document as Yaml from a Bytes.Reader.
···
?(max_nodes = Yaml.default_max_alias_nodes)
?(max_depth = Yaml.default_max_alias_depth)
reader =
-
let parser = Parser.of_reader reader in
-
let state = create_state () in
-
Parser.iter (process_event state) parser;
-
match state.documents with
-
| [] -> `Scalar (Scalar.make "")
-
| [doc] ->
-
(match Document.root doc with
-
| None -> `Scalar (Scalar.make "")
-
| Some yaml ->
-
if resolve_aliases then
-
Yaml.resolve_aliases ~max_nodes ~max_depth yaml
-
else
-
yaml)
-
| _ -> Error.raise Multiple_documents
+
let docs = parse_all_documents (Parser.of_reader reader) in
+
let doc = single_document_or_error docs ~empty:(Document.make None) in
+
match Document.root doc with
+
| None -> `Scalar (Scalar.make "")
+
| Some yaml ->
+
if resolve_aliases then
+
Yaml.resolve_aliases ~max_nodes ~max_depth yaml
+
else
+
yaml
(** Load all documents from a Bytes.Reader *)
let documents_of_reader reader =
-
let parser = Parser.of_reader reader in
-
let state = create_state () in
-
Parser.iter (process_event state) parser;
-
List.rev state.documents
+
parse_all_documents (Parser.of_reader reader)
+
+
(** {2 Parser-function based loading}
-
(** Generic document loader - extracts common pattern from load_* functions *)
-
let load_generic extract parser =
+
These functions accept a [unit -> Event.spanned option] function
+
instead of a [Parser.t], allowing them to work with any event source
+
(e.g., streaming parsers). *)
+
+
(** Generic document loader using event source function *)
+
let load_generic_fn extract next_event =
let state = create_state () in
let rec loop () =
-
match Parser.next parser with
+
match next_event () with
| None -> None
| Some ev ->
process_event state ev;
···
| _ -> loop ()
in
loop ()
+
+
(** Generic document loader - extracts common pattern from load_* functions *)
+
let load_generic extract parser =
+
load_generic_fn extract (fun () -> Parser.next parser)
(** Load single Value from parser.
···
| Some doc -> loop (f acc doc)
in
loop init
-
-
(** {2 Parser-function based loading}
-
-
These functions accept a [unit -> Event.spanned option] function
-
instead of a [Parser.t], allowing them to work with any event source
-
(e.g., streaming parsers). *)
-
-
(** Generic document loader using event source function *)
-
let load_generic_fn extract next_event =
-
let state = create_state () in
-
let rec loop () =
-
match next_event () with
-
| None -> None
-
| Some ev ->
-
process_event state ev;
-
match ev.event with
-
| Event.Document_end _ ->
-
(match state.documents with
-
| doc :: _ ->
-
state.documents <- [];
-
Some (extract doc)
-
| [] -> None)
-
| Event.Stream_end -> None
-
| _ -> loop ()
-
in
-
loop ()
(** Load single Value from event source.
+104
lib/loader.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Loader - converts parser events to YAML data structures *)
+
+
(** {1 String-based loading} *)
+
+
val value_of_string :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
string -> Value.t
+
(** Load single document as Value.
+
+
@param resolve_aliases Whether to resolve aliases (default true)
+
@param max_nodes Maximum nodes during alias expansion (default 10M)
+
@param max_depth Maximum alias nesting depth (default 100) *)
+
+
val yaml_of_string :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
string -> Yaml.t
+
(** Load single document as Yaml.
+
+
@param resolve_aliases Whether to resolve aliases (default false)
+
@param max_nodes Maximum nodes during alias expansion (default 10M)
+
@param max_depth Maximum alias nesting depth (default 100) *)
+
+
val documents_of_string : string -> Document.t list
+
(** Load all documents from a string *)
+
+
(** {1 Reader-based loading} *)
+
+
val value_of_reader :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
Bytesrw.Bytes.Reader.t -> Value.t
+
(** Load single document as Value from a Bytes.Reader *)
+
+
val yaml_of_reader :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
Bytesrw.Bytes.Reader.t -> Yaml.t
+
(** Load single document as Yaml from a Bytes.Reader *)
+
+
val documents_of_reader : Bytesrw.Bytes.Reader.t -> Document.t list
+
(** Load all documents from a Bytes.Reader *)
+
+
(** {1 Parser-based loading} *)
+
+
val load_value :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
Parser.t -> Value.t option
+
(** Load single Value from parser *)
+
+
val load_yaml : Parser.t -> Yaml.t option
+
(** Load single Yaml from parser *)
+
+
val load_document : Parser.t -> Document.t option
+
(** Load single Document from parser *)
+
+
val iter_documents : (Document.t -> unit) -> Parser.t -> unit
+
(** Iterate over documents from parser *)
+
+
val fold_documents : ('a -> Document.t -> 'a) -> 'a -> Parser.t -> 'a
+
(** Fold over documents from parser *)
+
+
(** {1 Event function-based loading}
+
+
These functions accept a [unit -> Event.spanned option] function
+
instead of a [Parser.t], allowing them to work with any event source. *)
+
+
val value_of_parser :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
(unit -> Event.spanned option) -> Value.t
+
(** Load single Value from event source function *)
+
+
val yaml_of_parser :
+
?resolve_aliases:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
(unit -> Event.spanned option) -> Yaml.t
+
(** Load single Yaml from event source function *)
+
+
val document_of_parser : (unit -> Event.spanned option) -> Document.t option
+
(** Load single Document from event source function *)
+
+
val documents_of_parser : (unit -> Event.spanned option) -> Document.t list
+
(** Load all documents from event source function *)
+
+
val iter_documents_parser : (Document.t -> unit) -> (unit -> Event.spanned option) -> unit
+
(** Iterate over documents from event source function *)
+
+
val fold_documents_parser : ('a -> Document.t -> 'a) -> 'a -> (unit -> Event.spanned option) -> 'a
+
(** Fold over documents from event source function *)
+2 -6
lib/mapping.ml
···
let pp pp_key pp_val fmt t =
Format.fprintf fmt "@[<hv 2>mapping(@,";
-
(match t.anchor with
-
| Some a -> Format.fprintf fmt "anchor=%s,@ " a
-
| None -> ());
-
(match t.tag with
-
| Some tag -> Format.fprintf fmt "tag=%s,@ " tag
-
| None -> ());
+
Option.iter (Format.fprintf fmt "anchor=%s,@ ") t.anchor;
+
Option.iter (Format.fprintf fmt "tag=%s,@ ") t.tag;
Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style;
Format.fprintf fmt "members={@,";
List.iteri (fun i (k, v) ->
+54
lib/mapping.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML mapping (object) values with metadata *)
+
+
type ('k, 'v) t
+
+
val make :
+
?anchor:string ->
+
?tag:string ->
+
?implicit:bool ->
+
?style:Layout_style.t ->
+
('k * 'v) list -> ('k, 'v) t
+
(** Create a mapping *)
+
+
(** {2 Accessors} *)
+
+
val members : ('k, 'v) t -> ('k * 'v) list
+
val anchor : ('k, 'v) t -> string option
+
val tag : ('k, 'v) t -> string option
+
val implicit : ('k, 'v) t -> bool
+
val style : ('k, 'v) t -> Layout_style.t
+
+
(** {2 Modifiers} *)
+
+
val with_anchor : string -> ('k, 'v) t -> ('k, 'v) t
+
val with_tag : string -> ('k, 'v) t -> ('k, 'v) t
+
val with_style : Layout_style.t -> ('k, 'v) t -> ('k, 'v) t
+
+
(** {2 Operations} *)
+
+
val map_keys : ('k -> 'k2) -> ('k, 'v) t -> ('k2, 'v) t
+
val map_values : ('v -> 'v2) -> ('k, 'v) t -> ('k, 'v2) t
+
val map : ('k -> 'v -> 'k2 * 'v2) -> ('k, 'v) t -> ('k2, 'v2) t
+
val length : ('k, 'v) t -> int
+
val is_empty : ('k, 'v) t -> bool
+
val find : ('k -> bool) -> ('k, 'v) t -> 'v option
+
val find_key : ('k -> bool) -> ('k, 'v) t -> ('k * 'v) option
+
val mem : ('k -> bool) -> ('k, 'v) t -> bool
+
val keys : ('k, 'v) t -> 'k list
+
val values : ('k, 'v) t -> 'v list
+
val iter : ('k -> 'v -> unit) -> ('k, 'v) t -> unit
+
val fold : ('a -> 'k -> 'v -> 'a) -> 'a -> ('k, 'v) t -> 'a
+
+
(** {2 Comparison} *)
+
+
val pp :
+
(Format.formatter -> 'k -> unit) ->
+
(Format.formatter -> 'v -> unit) ->
+
Format.formatter -> ('k, 'v) t -> unit
+
val equal : ('k -> 'k -> bool) -> ('v -> 'v -> bool) -> ('k, 'v) t -> ('k, 'v) t -> bool
+
val compare : ('k -> 'k -> int) -> ('v -> 'v -> int) -> ('k, 'v) t -> ('k, 'v) t -> int
+1 -5
lib/parser.ml
···
let skip_token t =
t.current_token <- None
-
(** Check if current token matches *)
+
(** Check if current token matches predicate *)
let check t pred =
match peek_token t with
| Some tok -> pred tok.token
| None -> false
-
-
(** Check for specific token *)
-
let check_token t token_match =
-
check t token_match
(** Push state onto stack *)
let push_state t s =
+41
lib/parser.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML parser - converts tokens to semantic events via state machine *)
+
+
type t
+
+
(** {2 Constructors} *)
+
+
val of_string : string -> t
+
(** Create parser from a string *)
+
+
val of_scanner : Scanner.t -> t
+
(** Create parser from a scanner *)
+
+
val of_input : Input.t -> t
+
(** Create parser from an input source *)
+
+
val of_reader : Bytesrw.Bytes.Reader.t -> t
+
(** Create parser from a Bytes.Reader *)
+
+
(** {2 Event Access} *)
+
+
val next : t -> Event.spanned option
+
(** Get next event *)
+
+
val peek : t -> Event.spanned option
+
(** Peek at next event without consuming *)
+
+
(** {2 Iteration} *)
+
+
val iter : (Event.spanned -> unit) -> t -> unit
+
(** Iterate over all events *)
+
+
val fold : ('a -> Event.spanned -> 'a) -> 'a -> t -> 'a
+
(** Fold over all events *)
+
+
val to_list : t -> Event.spanned list
+
(** Convert to list of events *)
+42
lib/position.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Position tracking for source locations *)
+
+
type t = {
+
index : int; (** Byte offset from start *)
+
line : int; (** 1-indexed line number *)
+
column : int; (** 1-indexed column number *)
+
}
+
+
val initial : t
+
(** Initial position (index=0, line=1, column=1) *)
+
+
val advance_byte : t -> t
+
(** Advance by one byte (increments index and column) *)
+
+
val advance_line : t -> t
+
(** Advance to next line (increments index and line, resets column to 1) *)
+
+
val advance_char : char -> t -> t
+
(** Advance by one character, handling newlines appropriately *)
+
+
val advance_utf8 : Uchar.t -> t -> t
+
(** Advance by one Unicode character, handling newlines and multi-byte characters *)
+
+
val advance_bytes : int -> t -> t
+
(** Advance by n bytes *)
+
+
val pp : Format.formatter -> t -> unit
+
(** Pretty-print a position *)
+
+
val to_string : t -> string
+
(** Convert position to string *)
+
+
val compare : t -> t -> int
+
(** Compare two positions by index *)
+
+
val equal : t -> t -> bool
+
(** Test equality of two positions *)
+22
lib/quoting.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML scalar quoting detection *)
+
+
val needs_quoting : string -> bool
+
(** Check if a string value needs quoting in YAML output.
+
Returns true if the string:
+
- Is empty
+
- Starts with an indicator character
+
- Is a reserved word (null, true, false, yes, no, etc.)
+
- Contains characters that would be ambiguous
+
- Looks like a number *)
+
+
val needs_double_quotes : string -> bool
+
(** Check if a string requires double quotes (vs single quotes).
+
Returns true if the string contains characters that need escape sequences. *)
+
+
val choose_style : string -> [> `Plain | `Single_quoted | `Double_quoted ]
+
(** Choose the appropriate quoting style for a string value *)
+2 -6
lib/scalar.ml
···
let pp fmt t =
Format.fprintf fmt "scalar(%S" t.value;
-
(match t.anchor with
-
| Some a -> Format.fprintf fmt ", anchor=%s" a
-
| None -> ());
-
(match t.tag with
-
| Some tag -> Format.fprintf fmt ", tag=%s" tag
-
| None -> ());
+
Option.iter (Format.fprintf fmt ", anchor=%s") t.anchor;
+
Option.iter (Format.fprintf fmt ", tag=%s") t.tag;
Format.fprintf fmt ", style=%a)" Scalar_style.pp t.style
let equal a b =
+38
lib/scalar.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML scalar values with metadata *)
+
+
type t
+
+
val make :
+
?anchor:string ->
+
?tag:string ->
+
?plain_implicit:bool ->
+
?quoted_implicit:bool ->
+
?style:Scalar_style.t ->
+
string -> t
+
(** Create a scalar value *)
+
+
(** {2 Accessors} *)
+
+
val value : t -> string
+
val anchor : t -> string option
+
val tag : t -> string option
+
val style : t -> Scalar_style.t
+
val plain_implicit : t -> bool
+
val quoted_implicit : t -> bool
+
+
(** {2 Modifiers} *)
+
+
val with_anchor : string -> t -> t
+
val with_tag : string -> t -> t
+
val with_style : Scalar_style.t -> t -> t
+
+
(** {2 Comparison} *)
+
+
val pp : Format.formatter -> t -> unit
+
val equal : t -> t -> bool
+
val compare : t -> t -> int
+27
lib/scalar_style.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Scalar formatting styles *)
+
+
type t = [
+
| `Any (** Let emitter choose *)
+
| `Plain (** Unquoted: foo *)
+
| `Single_quoted (** 'foo' *)
+
| `Double_quoted (** "foo" *)
+
| `Literal (** | block *)
+
| `Folded (** > block *)
+
]
+
+
val to_string : t -> string
+
(** Convert style to string representation *)
+
+
val pp : Format.formatter -> t -> unit
+
(** Pretty-print a style *)
+
+
val equal : t -> t -> bool
+
(** Test equality of two styles *)
+
+
val compare : t -> t -> int
+
(** Compare two styles *)
+14 -25
lib/scanner.ml
···
emit t span Token.Value;
t.pending_value <- false (* We've emitted a VALUE, no longer pending *)
-
and fetch_alias t =
+
and fetch_anchor_or_alias t ~is_alias =
save_simple_key t;
t.allow_simple_key <- false;
t.document_has_content <- true;
let start = Input.mark t.input in
-
ignore (Input.next t.input); (* consume * *)
+
ignore (Input.next t.input); (* consume * or & *)
let name, span = scan_anchor_alias t in
let span = Span.make ~start ~stop:span.stop in
-
emit t span (Token.Alias name)
+
let token = if is_alias then Token.Alias name else Token.Anchor name in
+
emit t span token
-
and fetch_anchor t =
-
save_simple_key t;
-
t.allow_simple_key <- false;
-
t.document_has_content <- true;
-
let start = Input.mark t.input in
-
ignore (Input.next t.input); (* consume & *)
-
let name, span = scan_anchor_alias t in
-
let span = Span.make ~start ~stop:span.stop in
-
emit t span (Token.Anchor name)
+
and fetch_alias t = fetch_anchor_or_alias t ~is_alias:true
+
and fetch_anchor t = fetch_anchor_or_alias t ~is_alias:false
and fetch_tag t =
save_simple_key t;
···
let value, style, span = scan_block_scalar t literal in
emit t span (Token.Scalar { style; value })
-
and fetch_single_quoted t =
+
and fetch_quoted t ~double =
save_simple_key t;
t.allow_simple_key <- false;
t.document_has_content <- true;
-
let value, span = scan_single_quoted t in
+
let value, span =
+
if double then scan_double_quoted t else scan_single_quoted t
+
in
(* Allow adjacent values after quoted scalars in flow context (for JSON compatibility) *)
skip_to_next_token t;
if t.flow_level > 0 then
t.adjacent_value_allowed_at <- Some (Input.position t.input);
-
emit t span (Token.Scalar { style = `Single_quoted; value })
+
let style = if double then `Double_quoted else `Single_quoted in
+
emit t span (Token.Scalar { style; value })
-
and fetch_double_quoted t =
-
save_simple_key t;
-
t.allow_simple_key <- false;
-
t.document_has_content <- true;
-
let value, span = scan_double_quoted t in
-
(* Allow adjacent values after quoted scalars in flow context (for JSON compatibility) *)
-
skip_to_next_token t;
-
if t.flow_level > 0 then
-
t.adjacent_value_allowed_at <- Some (Input.position t.input);
-
emit t span (Token.Scalar { style = `Double_quoted; value })
+
and fetch_single_quoted t = fetch_quoted t ~double:false
+
and fetch_double_quoted t = fetch_quoted t ~double:true
and can_start_plain t =
(* Check if - ? : can start a plain scalar *)
+43
lib/scanner.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML tokenizer/scanner with lookahead for ambiguity resolution *)
+
+
type t
+
+
(** {2 Constructors} *)
+
+
val of_string : string -> t
+
(** Create scanner from a string *)
+
+
val of_input : Input.t -> t
+
(** Create scanner from an input source *)
+
+
val of_reader : Bytesrw.Bytes.Reader.t -> t
+
(** Create scanner from a Bytes.Reader *)
+
+
(** {2 Position} *)
+
+
val position : t -> Position.t
+
(** Get current position in input *)
+
+
(** {2 Token Access} *)
+
+
val next : t -> Token.spanned option
+
(** Get next token *)
+
+
val peek : t -> Token.spanned option
+
(** Peek at next token without consuming *)
+
+
(** {2 Iteration} *)
+
+
val iter : (Token.spanned -> unit) -> t -> unit
+
(** Iterate over all tokens *)
+
+
val fold : ('a -> Token.spanned -> 'a) -> 'a -> t -> 'a
+
(** Fold over all tokens *)
+
+
val to_list : t -> Token.spanned list
+
(** Convert to list of tokens *)
+2 -6
lib/sequence.ml
···
let pp pp_elem fmt t =
Format.fprintf fmt "@[<hv 2>sequence(@,";
-
(match t.anchor with
-
| Some a -> Format.fprintf fmt "anchor=%s,@ " a
-
| None -> ());
-
(match t.tag with
-
| Some tag -> Format.fprintf fmt "tag=%s,@ " tag
-
| None -> ());
+
Option.iter (Format.fprintf fmt "anchor=%s,@ ") t.anchor;
+
Option.iter (Format.fprintf fmt "tag=%s,@ ") t.tag;
Format.fprintf fmt "style=%a,@ " Layout_style.pp t.style;
Format.fprintf fmt "members=[@,%a@]@,)"
(Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@ ") pp_elem)
+46
lib/sequence.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML sequence (array) values with metadata *)
+
+
type 'a t
+
+
val make :
+
?anchor:string ->
+
?tag:string ->
+
?implicit:bool ->
+
?style:Layout_style.t ->
+
'a list -> 'a t
+
(** Create a sequence *)
+
+
(** {2 Accessors} *)
+
+
val members : 'a t -> 'a list
+
val anchor : 'a t -> string option
+
val tag : 'a t -> string option
+
val implicit : 'a t -> bool
+
val style : 'a t -> Layout_style.t
+
+
(** {2 Modifiers} *)
+
+
val with_anchor : string -> 'a t -> 'a t
+
val with_tag : string -> 'a t -> 'a t
+
val with_style : Layout_style.t -> 'a t -> 'a t
+
+
(** {2 Operations} *)
+
+
val map : ('a -> 'b) -> 'a t -> 'b t
+
val length : 'a t -> int
+
val is_empty : 'a t -> bool
+
val nth : 'a t -> int -> 'a
+
val nth_opt : 'a t -> int -> 'a option
+
val iter : ('a -> unit) -> 'a t -> unit
+
val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b
+
+
(** {2 Comparison} *)
+
+
val pp : (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit
+
val equal : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool
+
val compare : ('a -> 'a -> int) -> 'a t -> 'a t -> int
+10 -20
lib/serialize.ml
···
| `A seq ->
let members = Sequence.members seq in
-
let style =
-
(* Force flow style for empty sequences *)
-
if members = [] then `Flow
-
else Sequence.style seq
-
in
+
(* Force flow style for empty sequences *)
+
let style = if members = [] then `Flow else Sequence.style seq in
emit (Event.Sequence_start {
anchor = Sequence.anchor seq;
tag = Sequence.tag seq;
···
| `O map ->
let members = Mapping.members map in
-
let style =
-
(* Force flow style for empty mappings *)
-
if members = [] then `Flow
-
else Mapping.style map
-
in
+
(* Force flow style for empty mappings *)
+
let style = if members = [] then `Flow else Mapping.style map in
emit (Event.Mapping_start {
anchor = Mapping.anchor map;
tag = Mapping.tag map;
···
})
| `A items ->
+
(* Force flow style for empty sequences, otherwise use config *)
let style =
-
(* Force flow style for empty sequences *)
-
if items = [] then `Flow
-
else if config.Emitter.layout_style = `Flow then `Flow
-
else `Block
+
if items = [] || config.Emitter.layout_style = `Flow then `Flow else `Block
in
emit (Event.Sequence_start {
anchor = None; tag = None;
···
emit Event.Sequence_end
| `O pairs ->
+
(* Force flow style for empty mappings, otherwise use config *)
let style =
-
(* Force flow style for empty mappings *)
-
if pairs = [] then `Flow
-
else if config.Emitter.layout_style = `Flow then `Flow
-
else `Block
+
if pairs = [] || config.Emitter.layout_style = `Flow then `Flow else `Block
in
emit (Event.Mapping_start {
anchor = None; tag = None;
···
emit_yaml_node_impl ~emit:emitter yaml
(** Emit a complete YAML stream using an emitter function *)
-
let emit_yaml ~emitter ~config yaml =
+
let emit_yaml_fn ~emitter ~config yaml =
emitter (Event.Stream_start { encoding = config.Emitter.encoding });
emitter (Event.Document_start { version = None; implicit = true });
emit_yaml_node_fn ~emitter yaml;
···
emit_value_node_impl ~emit:emitter ~config value
(** Emit a complete Value stream using an emitter function *)
-
let emit_value ~emitter ~config value =
+
let emit_value_fn ~emitter ~config value =
emitter (Event.Stream_start { encoding = config.Emitter.encoding });
emitter (Event.Document_start { version = None; implicit = true });
emit_value_node_fn ~emitter ~config value;
+133
lib/serialize.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Serialize - high-level serialization to buffers and event streams
+
+
This module provides functions to convert YAML values to events and strings.
+
Both {!Emitter.t}-based and function-based emission APIs are provided. *)
+
+
(** {1 Emitter.t-based API} *)
+
+
val emit_yaml_node : Emitter.t -> Yaml.t -> unit
+
(** Emit a YAML node to an emitter *)
+
+
val emit_yaml : Emitter.t -> Yaml.t -> unit
+
(** Emit a complete YAML document to an emitter (includes stream/document markers) *)
+
+
val emit_value_node : Emitter.t -> Value.t -> unit
+
(** Emit a Value node to an emitter *)
+
+
val emit_value : Emitter.t -> Value.t -> unit
+
(** Emit a complete Value document to an emitter (includes stream/document markers) *)
+
+
val emit_document : ?resolve_aliases:bool -> Emitter.t -> Document.t -> unit
+
(** Emit a document to an emitter
+
+
@param resolve_aliases Whether to resolve aliases before emission (default true) *)
+
+
(** {1 Buffer-based API} *)
+
+
val value_to_buffer :
+
?config:Emitter.config ->
+
?buffer:Buffer.t ->
+
Value.t -> Buffer.t
+
(** Serialize a Value to a buffer
+
+
@param config Emitter configuration (default: {!Emitter.default_config})
+
@param buffer Optional buffer to append to; creates new one if not provided *)
+
+
val yaml_to_buffer :
+
?config:Emitter.config ->
+
?buffer:Buffer.t ->
+
Yaml.t -> Buffer.t
+
(** Serialize a Yaml.t to a buffer *)
+
+
val documents_to_buffer :
+
?config:Emitter.config ->
+
?resolve_aliases:bool ->
+
?buffer:Buffer.t ->
+
Document.t list -> Buffer.t
+
(** Serialize documents to a buffer
+
+
@param resolve_aliases Whether to resolve aliases before emission (default true) *)
+
+
(** {1 String-based API} *)
+
+
val value_to_string : ?config:Emitter.config -> Value.t -> string
+
(** Serialize a Value to a string *)
+
+
val yaml_to_string : ?config:Emitter.config -> Yaml.t -> string
+
(** Serialize a Yaml.t to a string *)
+
+
val documents_to_string :
+
?config:Emitter.config ->
+
?resolve_aliases:bool ->
+
Document.t list -> string
+
(** Serialize documents to a string *)
+
+
(** {1 Writer-based API}
+
+
These functions write directly to a bytesrw [Bytes.Writer.t],
+
enabling true streaming output without intermediate string allocation. *)
+
+
val value_to_writer :
+
?config:Emitter.config ->
+
?eod:bool ->
+
Bytesrw.Bytes.Writer.t -> Value.t -> unit
+
(** Serialize a Value directly to a Bytes.Writer
+
+
@param eod Whether to write end-of-data after serialization (default true) *)
+
+
val yaml_to_writer :
+
?config:Emitter.config ->
+
?eod:bool ->
+
Bytesrw.Bytes.Writer.t -> Yaml.t -> unit
+
(** Serialize a Yaml.t directly to a Bytes.Writer *)
+
+
val documents_to_writer :
+
?config:Emitter.config ->
+
?resolve_aliases:bool ->
+
?eod:bool ->
+
Bytesrw.Bytes.Writer.t -> Document.t list -> unit
+
(** Serialize documents directly to a Bytes.Writer *)
+
+
(** {1 Function-based API}
+
+
These functions accept an emit function [Event.t -> unit] instead of
+
an {!Emitter.t}, allowing them to work with any event sink. *)
+
+
val emit_yaml_node_fn : emitter:(Event.t -> unit) -> Yaml.t -> unit
+
(** Emit a YAML node using an emitter function *)
+
+
val emit_yaml_fn :
+
emitter:(Event.t -> unit) ->
+
config:Emitter.config ->
+
Yaml.t -> unit
+
(** Emit a complete YAML stream using an emitter function *)
+
+
val emit_value_node_fn :
+
emitter:(Event.t -> unit) ->
+
config:Emitter.config ->
+
Value.t -> unit
+
(** Emit a Value node using an emitter function *)
+
+
val emit_value_fn :
+
emitter:(Event.t -> unit) ->
+
config:Emitter.config ->
+
Value.t -> unit
+
(** Emit a complete Value stream using an emitter function *)
+
+
val emit_document_fn :
+
?resolve_aliases:bool ->
+
emitter:(Event.t -> unit) ->
+
Document.t -> unit
+
(** Emit a document using an emitter function *)
+
+
val emit_documents :
+
emitter:(Event.t -> unit) ->
+
config:Emitter.config ->
+
?resolve_aliases:bool ->
+
Document.t list -> unit
+
(** Emit multiple documents using an emitter function *)
+35
lib/span.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Source spans representing ranges in input *)
+
+
type t = {
+
start : Position.t;
+
stop : Position.t;
+
}
+
+
val make : start:Position.t -> stop:Position.t -> t
+
(** Create a span from start and stop positions *)
+
+
val point : Position.t -> t
+
(** Create a zero-width span at a single position *)
+
+
val merge : t -> t -> t
+
(** Merge two spans into one covering both *)
+
+
val extend : t -> Position.t -> t
+
(** Extend a span to a new stop position *)
+
+
val pp : Format.formatter -> t -> unit
+
(** Pretty-print a span *)
+
+
val to_string : t -> string
+
(** Convert span to string *)
+
+
val compare : t -> t -> int
+
(** Compare two spans *)
+
+
val equal : t -> t -> bool
+
(** Test equality of two spans *)
+54
lib/tag.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML tags for type information *)
+
+
type t = {
+
handle : string; (** e.g., "!" or "!!" or "!foo!" *)
+
suffix : string; (** e.g., "str", "int", "custom/type" *)
+
}
+
+
val make : handle:string -> suffix:string -> t
+
(** Create a tag from handle and suffix *)
+
+
val of_string : string -> t option
+
(** Parse a tag string *)
+
+
val to_string : t -> string
+
(** Convert tag to string representation *)
+
+
val to_uri : t -> string
+
(** Convert tag to full URI representation *)
+
+
val pp : Format.formatter -> t -> unit
+
(** Pretty-print a tag *)
+
+
val equal : t -> t -> bool
+
(** Test equality of two tags *)
+
+
val compare : t -> t -> int
+
(** Compare two tags *)
+
+
(** {2 Standard Tags} *)
+
+
val null : t
+
val bool : t
+
val int : t
+
val float : t
+
val str : t
+
val seq : t
+
val map : t
+
val binary : t
+
val timestamp : t
+
+
(** {2 Tag Predicates} *)
+
+
val is_null : t -> bool
+
val is_bool : t -> bool
+
val is_int : t -> bool
+
val is_float : t -> bool
+
val is_str : t -> bool
+
val is_seq : t -> bool
+
val is_map : t -> bool
+43
lib/token.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** YAML token types produced by the scanner *)
+
+
type t =
+
| Stream_start of Encoding.t
+
| Stream_end
+
| Version_directive of { major : int; minor : int }
+
| Tag_directive of { handle : string; prefix : string }
+
| Document_start (** --- *)
+
| Document_end (** ... *)
+
| Block_sequence_start
+
| Block_mapping_start
+
| Block_entry (** - *)
+
| Block_end (** implicit, from dedent *)
+
| Flow_sequence_start (** \[ *)
+
| Flow_sequence_end (** \] *)
+
| Flow_mapping_start (** \{ *)
+
| Flow_mapping_end (** \} *)
+
| Flow_entry (** , *)
+
| Key (** ? or implicit key *)
+
| Value (** : *)
+
| Anchor of string (** &name *)
+
| Alias of string (** *name *)
+
| Tag of { handle : string; suffix : string }
+
| Scalar of { style : Scalar_style.t; value : string }
+
+
type spanned = {
+
token : t;
+
span : Span.t;
+
}
+
+
val pp_token : Format.formatter -> t -> unit
+
(** Pretty-print a token *)
+
+
val pp : Format.formatter -> t -> unit
+
(** Pretty-print a token (alias for pp_token) *)
+
+
val pp_spanned : Format.formatter -> spanned -> unit
+
(** Pretty-print a spanned token *)
+70
lib/value.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** JSON-compatible YAML value representation *)
+
+
type t = [
+
| `Null
+
| `Bool of bool
+
| `Float of float
+
| `String of string
+
| `A of t list
+
| `O of (string * t) list
+
]
+
+
(** {2 Constructors} *)
+
+
val null : t
+
val bool : bool -> t
+
val int : int -> t
+
val float : float -> t
+
val string : string -> t
+
val list : ('a -> t) -> 'a list -> t
+
val obj : (string * t) list -> t
+
+
(** {2 Type Name} *)
+
+
val type_name : t -> string
+
(** Get the type name for error messages *)
+
+
(** {2 Safe Accessors} *)
+
+
val as_null : t -> unit option
+
val as_bool : t -> bool option
+
val as_float : t -> float option
+
val as_string : t -> string option
+
val as_list : t -> t list option
+
val as_assoc : t -> (string * t) list option
+
val as_int : t -> int option
+
+
(** {2 Unsafe Accessors} *)
+
+
val to_null : t -> unit
+
val to_bool : t -> bool
+
val to_float : t -> float
+
val to_string : t -> string
+
val to_list : t -> t list
+
val to_assoc : t -> (string * t) list
+
val to_int : t -> int
+
+
(** {2 Object Access} *)
+
+
val mem : string -> t -> bool
+
val find : string -> t -> t option
+
val get : string -> t -> t
+
val keys : t -> string list
+
val values : t -> t list
+
+
(** {2 Combinators} *)
+
+
val combine : t -> t -> t
+
val map : (t -> t) -> t -> t
+
val filter : (t -> bool) -> t -> t
+
+
(** {2 Comparison} *)
+
+
val pp : Format.formatter -> t -> unit
+
val equal : t -> t -> bool
+
val compare : t -> t -> int
+3 -9
lib/yaml.ml
···
match v with
| `Scalar s ->
(* Register anchor after we have the resolved node *)
-
(match Scalar.anchor s with
-
| Some name -> register_anchor name v
-
| None -> ());
+
Option.iter (fun name -> register_anchor name v) (Scalar.anchor s);
v
| `Alias name ->
expand_alias ~depth name
···
~style:(Sequence.style seq)
resolved_members) in
(* Register anchor with resolved node *)
-
(match Sequence.anchor seq with
-
| Some name -> register_anchor name resolved
-
| None -> ());
+
Option.iter (fun name -> register_anchor name resolved) (Sequence.anchor seq);
resolved
| `O map ->
(* Process key-value pairs in document order *)
···
~style:(Mapping.style map)
resolved_pairs) in
(* Register anchor with resolved node *)
-
(match Mapping.anchor map with
-
| Some name -> register_anchor name resolved
-
| None -> ());
+
Option.iter (fun name -> register_anchor name resolved) (Mapping.anchor map);
resolved
in
resolve ~depth:0 root
+63
lib/yaml.mli
···
+
(*---------------------------------------------------------------------------
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
+
SPDX-License-Identifier: ISC
+
---------------------------------------------------------------------------*)
+
+
(** Full YAML representation with anchors, tags, and aliases *)
+
+
type t = [
+
| `Scalar of Scalar.t
+
| `Alias of string
+
| `A of t Sequence.t
+
| `O of (t, t) Mapping.t
+
]
+
+
(** {2 Pretty Printing} *)
+
+
val pp : Format.formatter -> t -> unit
+
+
(** {2 Equality} *)
+
+
val equal : t -> t -> bool
+
+
(** {2 Conversion from Value} *)
+
+
val of_value : Value.t -> t
+
(** Construct from JSON-compatible Value *)
+
+
(** {2 Alias Resolution} *)
+
+
val default_max_alias_nodes : int
+
(** Default maximum nodes during alias expansion (10 million) *)
+
+
val default_max_alias_depth : int
+
(** Default maximum alias nesting depth (100) *)
+
+
val resolve_aliases : ?max_nodes:int -> ?max_depth:int -> t -> t
+
(** Resolve aliases by replacing them with referenced nodes.
+
+
@param max_nodes Maximum number of nodes to create during expansion
+
@param max_depth Maximum depth of alias-within-alias resolution
+
@raise Error.Yamlrw_error if limits exceeded or undefined alias found *)
+
+
(** {2 Conversion to Value} *)
+
+
val to_value :
+
?resolve_aliases_first:bool ->
+
?max_nodes:int ->
+
?max_depth:int ->
+
t -> Value.t
+
(** Convert to JSON-compatible Value.
+
+
@param resolve_aliases_first Whether to resolve aliases before conversion (default true)
+
@param max_nodes Maximum nodes during alias expansion
+
@param max_depth Maximum alias nesting depth
+
@raise Error.Yamlrw_error if unresolved aliases encountered *)
+
+
(** {2 Node Accessors} *)
+
+
val anchor : t -> string option
+
(** Get anchor from any node *)
+
+
val tag : t -> string option
+
(** Get tag from any node *)
+140
tests/test_yamlrw.ml
···
"resolve_aliases false", `Quick, test_resolve_aliases_false;
]
+
(** Bug fix regression tests
+
These tests verify that issues fixed in ocaml-yaml don't occur in ocaml-yamlrw *)
+
+
(* Test for roundtrip of special string values (ocaml-yaml fix 225387d)
+
Strings like "true", "1.0", "null" etc. must be quoted on output so that
+
they round-trip correctly as strings, not as booleans/numbers/null *)
+
let test_roundtrip_string_true () =
+
let original = `String "true" in
+
let emitted = to_string original in
+
let parsed = of_string emitted in
+
check_value "String 'true' roundtrips" original parsed
+
+
let test_roundtrip_string_false () =
+
let original = `String "false" in
+
let emitted = to_string original in
+
let parsed = of_string emitted in
+
check_value "String 'false' roundtrips" original parsed
+
+
let test_roundtrip_string_null () =
+
let original = `String "null" in
+
let emitted = to_string original in
+
let parsed = of_string emitted in
+
check_value "String 'null' roundtrips" original parsed
+
+
let test_roundtrip_string_number () =
+
let original = `String "1.0" in
+
let emitted = to_string original in
+
let parsed = of_string emitted in
+
check_value "String '1.0' roundtrips" original parsed
+
+
let test_roundtrip_string_integer () =
+
let original = `String "42" in
+
let emitted = to_string original in
+
let parsed = of_string emitted in
+
check_value "String '42' roundtrips" original parsed
+
+
let test_roundtrip_string_yes () =
+
let original = `String "yes" in
+
let emitted = to_string original in
+
let parsed = of_string emitted in
+
check_value "String 'yes' roundtrips" original parsed
+
+
let test_roundtrip_string_no () =
+
let original = `String "no" in
+
let emitted = to_string original in
+
let parsed = of_string emitted in
+
check_value "String 'no' roundtrips" original parsed
+
+
(* Test for integer display without decimal point (ocaml-yaml fix 999b1aa)
+
Float values that are integers should be emitted as "42" not "42." or "42.0" *)
+
let test_emit_integer_float () =
+
let value = `Float 42.0 in
+
let result = to_string value in
+
(* Check the result doesn't contain "42." or "42.0" *)
+
Alcotest.(check bool) "no trailing dot"
+
true (not (String.length result >= 3 &&
+
result.[0] = '4' && result.[1] = '2' && result.[2] = '.'))
+
+
let test_emit_negative_integer_float () =
+
let value = `Float (-17.0) in
+
let result = to_string value in
+
let parsed = of_string result in
+
check_value "negative integer float roundtrips" value parsed
+
+
(* Test for special YAML floats: .nan, .inf, -.inf *)
+
let test_parse_special_floats () =
+
let inf_result = of_string ".inf" in
+
(match inf_result with
+
| `Float f when Float.is_inf f && f > 0.0 -> ()
+
| _ -> Alcotest.fail "expected positive infinity");
+
let neg_inf_result = of_string "-.inf" in
+
(match neg_inf_result with
+
| `Float f when Float.is_inf f && f < 0.0 -> ()
+
| _ -> Alcotest.fail "expected negative infinity");
+
let nan_result = of_string ".nan" in
+
(match nan_result with
+
| `Float f when Float.is_nan f -> ()
+
| _ -> Alcotest.fail "expected NaN")
+
+
(* Test that bare "inf", "nan", "infinity" are NOT parsed as floats
+
(ocaml-yaml issue - OCaml's Float.of_string accepts these but YAML doesn't) *)
+
let test_bare_inf_nan_are_strings () =
+
let inf_result = of_string "inf" in
+
(match inf_result with
+
| `String "inf" -> ()
+
| `Float _ -> Alcotest.fail "'inf' should be string, not float"
+
| _ -> Alcotest.fail "expected string 'inf'");
+
let nan_result = of_string "nan" in
+
(match nan_result with
+
| `String "nan" -> ()
+
| `Float _ -> Alcotest.fail "'nan' should be string, not float"
+
| _ -> Alcotest.fail "expected string 'nan'");
+
let infinity_result = of_string "infinity" in
+
(match infinity_result with
+
| `String "infinity" -> ()
+
| `Float _ -> Alcotest.fail "'infinity' should be string, not float"
+
| _ -> Alcotest.fail "expected string 'infinity'")
+
+
(* Test for quoted scalar preservation *)
+
let test_quoted_scalar_preserved () =
+
(* When a scalar is quoted, it should be preserved as a string even if
+
it looks like a number/boolean *)
+
check_value "double-quoted true is string"
+
(`String "true") (of_string {|"true"|});
+
check_value "single-quoted 42 is string"
+
(`String "42") (of_string "'42'");
+
check_value "double-quoted null is string"
+
(`String "null") (of_string {|"null"|})
+
+
(* Test complex roundtrip with mixed types *)
+
let test_complex_roundtrip () =
+
let original = `O [
+
("string_true", `String "true");
+
("bool_true", `Bool true);
+
("string_42", `String "42");
+
("int_42", `Float 42.0);
+
("string_null", `String "null");
+
("actual_null", `Null);
+
] in
+
let emitted = to_string original in
+
let parsed = of_string emitted in
+
check_value "complex roundtrip preserves types" original parsed
+
+
let bugfix_regression_tests = [
+
"roundtrip string 'true'", `Quick, test_roundtrip_string_true;
+
"roundtrip string 'false'", `Quick, test_roundtrip_string_false;
+
"roundtrip string 'null'", `Quick, test_roundtrip_string_null;
+
"roundtrip string '1.0'", `Quick, test_roundtrip_string_number;
+
"roundtrip string '42'", `Quick, test_roundtrip_string_integer;
+
"roundtrip string 'yes'", `Quick, test_roundtrip_string_yes;
+
"roundtrip string 'no'", `Quick, test_roundtrip_string_no;
+
"emit integer float without decimal", `Quick, test_emit_integer_float;
+
"emit negative integer float", `Quick, test_emit_negative_integer_float;
+
"parse special floats (.inf, -.inf, .nan)", `Quick, test_parse_special_floats;
+
"bare inf/nan/infinity are strings", `Quick, test_bare_inf_nan_are_strings;
+
"quoted scalars preserved as strings", `Quick, test_quoted_scalar_preserved;
+
"complex roundtrip preserves types", `Quick, test_complex_roundtrip;
+
]
+
(** Run all tests *)
let () =
···
"multiline", multiline_tests;
"errors", error_tests;
"alias_limits", alias_limit_tests;
+
"bugfix_regression", bugfix_regression_tests;
]