Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** {1 Yamlrw - A Pure OCaml YAML Parser and Emitter} 7 8 Yamlrw is a pure OCaml implementation of YAML 1.1/1.2 parsing and emission. 9 It provides both a high-level JSON-compatible interface and a lower-level 10 streaming API for fine-grained control. 11 12 {2 Quick Start} 13 14 Parse a YAML string: 15 {[ 16 let value = Yamlrw.of_string "name: Alice\nage: 30" in 17 match value with 18 | `O [("name", `String "Alice"); ("age", `Float 30.)] -> ... 19 | _ -> ... 20 ]} 21 22 Serialize to YAML: 23 {[ 24 let yaml = `O [("name", `String "Bob"); ("active", `Bool true)] in 25 let s = Yamlrw.to_string yaml in 26 (* "name: Bob\nactive: true\n" *) 27 ]} 28 29 Use the Util module for convenient access: 30 {[ 31 let name = Yamlrw.Util.(get_string (get "name" value)) in 32 let age = Yamlrw.Util.(get_int (get "age" value)) in 33 ]} *) 34 35 36(** {2 Error Handling} *) 37 38module Error = Error 39 40exception Yamlrw_error of Error.t 41(** Raised on parse or emit errors. *) 42 43 44(** {2 Core Types} *) 45 46type value = [ 47 | `Null (** YAML null, ~, or empty values *) 48 | `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *) 49 | `Float of float (** All YAML numbers (integers stored as floats) *) 50 | `String of string (** YAML strings *) 51 | `A of value list (** YAML sequences/arrays *) 52 | `O of (string * value) list (** YAML mappings/objects with string keys *) 53] 54(** JSON-compatible YAML representation. Use this for simple data interchange. 55 56 This type is structurally equivalent to {!Value.t} and compatible with the 57 ezjsonm representation. For additional operations, see {!Value} and {!Util}. *) 58 59type yaml = [ 60 | `Scalar of Scalar.t (** YAML scalar value with style and metadata *) 61 | `Alias of string (** Alias reference to an anchored node *) 62 | `A of yaml Sequence.t (** YAML sequence with style and metadata *) 63 | `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *) 64] 65(** Full YAML representation preserving anchors, tags, and aliases. 66 67 This type is structurally equivalent to {!Yaml.t}. Use this when you need 68 access to YAML-specific features like anchors and aliases for node reuse, 69 type tags for custom types, scalar styles (plain, quoted, literal, folded), 70 and collection styles (block vs flow). 71 72 For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and {!Mapping}. *) 73 74type document = { 75 version : (int * int) option; (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *) 76 tags : (string * string) list; (** TAG directives mapping handles to prefixes *) 77 root : yaml option; (** Root content of the document *) 78 implicit_start : bool; (** Whether the document start marker (---) is implicit *) 79 implicit_end : bool; (** Whether the document end marker (...) is implicit *) 80} 81(** A YAML document with directives and metadata. 82 83 This type is structurally equivalent to {!Document.t}. A YAML stream can 84 contain multiple documents, each separated by document markers. 85 86 For additional operations, see {!Document}. *) 87 88 89(** {2 Character Encoding} *) 90 91module Encoding = Encoding 92 93 94(** {2 Parsing} *) 95 96type version = [ `V1_1 | `V1_2 ] 97(** YAML specification version. *) 98 99val default_max_alias_nodes : int 100(** Default maximum nodes during alias expansion (10 million). *) 101 102val default_max_alias_depth : int 103(** Default maximum alias nesting depth (100). *) 104 105val of_string : 106 ?resolve_aliases:bool -> 107 ?max_nodes:int -> 108 ?max_depth:int -> 109 string -> value 110(** Parse a YAML string into a JSON-compatible value. 111 112 @param resolve_aliases Whether to expand aliases (default: true) 113 @param max_nodes Maximum nodes during alias expansion (default: 10M) 114 @param max_depth Maximum alias nesting depth (default: 100) 115 @raise Yamlrw_error on parse error or if multiple documents found *) 116 117val yaml_of_string : 118 ?resolve_aliases:bool -> 119 ?max_nodes:int -> 120 ?max_depth:int -> 121 string -> yaml 122(** Parse a YAML string preserving full YAML metadata (anchors, tags, etc). 123 124 By default, aliases are NOT resolved, preserving the document structure. 125 126 @param resolve_aliases Whether to expand aliases (default: false) 127 @param max_nodes Maximum nodes during alias expansion (default: 10M) 128 @param max_depth Maximum alias nesting depth (default: 100) 129 @raise Yamlrw_error on parse error or if multiple documents found *) 130 131val documents_of_string : string -> document list 132(** Parse a multi-document YAML stream. 133 134 Use this when your YAML input contains multiple documents separated 135 by document markers (---). 136 137 @raise Yamlrw_error on parse error *) 138 139 140(** {2 Formatting Styles} *) 141 142module Scalar_style = Scalar_style 143 144module Layout_style = Layout_style 145 146 147(** {2 Serialization} *) 148 149val to_buffer : 150 ?encoding:Encoding.t -> 151 ?scalar_style:Scalar_style.t -> 152 ?layout_style:Layout_style.t -> 153 ?buffer:Buffer.t -> 154 value -> Buffer.t 155(** Serialize a value to a buffer. 156 157 @param encoding Output encoding (default: UTF-8) 158 @param scalar_style Preferred scalar style (default: Any) 159 @param layout_style Preferred layout style (default: Any) 160 @param buffer Optional buffer to append to (allocates new one if not provided) 161 @return The buffer containing the serialized YAML *) 162 163val to_string : 164 ?encoding:Encoding.t -> 165 ?scalar_style:Scalar_style.t -> 166 ?layout_style:Layout_style.t -> 167 value -> string 168(** Serialize a value to a YAML string. 169 170 @param encoding Output encoding (default: UTF-8) 171 @param scalar_style Preferred scalar style (default: Any) 172 @param layout_style Preferred layout style (default: Any) *) 173 174val yaml_to_buffer : 175 ?encoding:Encoding.t -> 176 ?scalar_style:Scalar_style.t -> 177 ?layout_style:Layout_style.t -> 178 ?buffer:Buffer.t -> 179 yaml -> Buffer.t 180(** Serialize a full YAML value to a buffer. 181 182 @param encoding Output encoding (default: UTF-8) 183 @param scalar_style Preferred scalar style (default: Any) 184 @param layout_style Preferred layout style (default: Any) 185 @param buffer Optional buffer to append to (allocates new one if not provided) 186 @return The buffer containing the serialized YAML *) 187 188val yaml_to_string : 189 ?encoding:Encoding.t -> 190 ?scalar_style:Scalar_style.t -> 191 ?layout_style:Layout_style.t -> 192 yaml -> string 193(** Serialize a full YAML value to a string. 194 195 @param encoding Output encoding (default: UTF-8) 196 @param scalar_style Preferred scalar style (default: Any) 197 @param layout_style Preferred layout style (default: Any) *) 198 199val documents_to_buffer : 200 ?encoding:Encoding.t -> 201 ?scalar_style:Scalar_style.t -> 202 ?layout_style:Layout_style.t -> 203 ?resolve_aliases:bool -> 204 ?buffer:Buffer.t -> 205 document list -> Buffer.t 206(** Serialize multiple documents to a buffer. 207 208 @param encoding Output encoding (default: UTF-8) 209 @param scalar_style Preferred scalar style (default: Any) 210 @param layout_style Preferred layout style (default: Any) 211 @param resolve_aliases Whether to expand aliases (default: true) 212 @param buffer Optional buffer to append to (allocates new one if not provided) 213 @return The buffer containing the serialized YAML *) 214 215val documents_to_string : 216 ?encoding:Encoding.t -> 217 ?scalar_style:Scalar_style.t -> 218 ?layout_style:Layout_style.t -> 219 ?resolve_aliases:bool -> 220 document list -> string 221(** Serialize multiple documents to a YAML stream. 222 223 @param encoding Output encoding (default: UTF-8) 224 @param scalar_style Preferred scalar style (default: Any) 225 @param layout_style Preferred layout style (default: Any) 226 @param resolve_aliases Whether to expand aliases (default: true) *) 227 228(** {2 Buffer Parsing} *) 229 230val of_buffer : 231 ?resolve_aliases:bool -> 232 ?max_nodes:int -> 233 ?max_depth:int -> 234 Buffer.t -> value 235(** Parse YAML from a buffer into a JSON-compatible value. 236 237 @param resolve_aliases Whether to expand aliases (default: true) 238 @param max_nodes Maximum nodes during alias expansion (default: 10M) 239 @param max_depth Maximum alias nesting depth (default: 100) 240 @raise Yamlrw_error on parse error or if multiple documents found *) 241 242val yaml_of_buffer : 243 ?resolve_aliases:bool -> 244 ?max_nodes:int -> 245 ?max_depth:int -> 246 Buffer.t -> yaml 247(** Parse YAML from a buffer preserving full YAML metadata. 248 249 @param resolve_aliases Whether to expand aliases (default: false) 250 @param max_nodes Maximum nodes during alias expansion (default: 10M) 251 @param max_depth Maximum alias nesting depth (default: 100) 252 @raise Yamlrw_error on parse error or if multiple documents found *) 253 254val documents_of_buffer : Buffer.t -> document list 255(** Parse a multi-document YAML stream from a buffer. 256 257 @raise Yamlrw_error on parse error *) 258 259 260(** {2 Conversion} *) 261 262val to_json : 263 ?resolve_aliases:bool -> 264 ?max_nodes:int -> 265 ?max_depth:int -> 266 yaml -> value 267(** Convert full YAML to JSON-compatible value. 268 269 @param resolve_aliases Whether to expand aliases (default: true) 270 @param max_nodes Maximum nodes during alias expansion (default: 10M) 271 @param max_depth Maximum alias nesting depth (default: 100) 272 @raise Yamlrw_error if alias limits exceeded or complex keys found *) 273 274val of_json : value -> yaml 275(** Convert JSON-compatible value to full YAML representation. *) 276 277 278(** {2 Pretty Printing & Equality} *) 279 280val pp : Format.formatter -> value -> unit 281(** Pretty-print a value. *) 282 283val equal : value -> value -> bool 284(** Test equality of two values. *) 285 286 287(** {2 Util - Value Combinators} 288 289 Combinators for working with {!type:value} values. 290 291 This module provides constructors, accessors, and transformations 292 for JSON-compatible YAML values. *) 293 294module Util : sig 295 type t = Value.t 296 (** Alias for {!type:value}. *) 297 298 (** {3 Type Error} *) 299 300 exception Type_error of string * t 301 (** Raised when a value has unexpected type. 302 [Type_error (expected, actual_value)] *) 303 304 (** {3 Constructors} *) 305 306 val null : t 307 (** The null value. *) 308 309 val bool : bool -> t 310 (** Create a boolean value. *) 311 312 val int : int -> t 313 (** Create an integer value (stored as float). *) 314 315 val float : float -> t 316 (** Create a float value. *) 317 318 val string : string -> t 319 (** Create a string value. *) 320 321 val strings : string list -> t 322 (** Create a list of strings. *) 323 324 val list : t list -> t 325 (** Create a list value. *) 326 327 val obj : (string * t) list -> t 328 (** Create an object value from key-value pairs. *) 329 330 (** {3 Type Predicates} *) 331 332 val is_null : t -> bool 333 (** Check if value is null. *) 334 335 val is_bool : t -> bool 336 (** Check if value is a boolean. *) 337 338 val is_number : t -> bool 339 (** Check if value is a number. *) 340 341 val is_string : t -> bool 342 (** Check if value is a string. *) 343 344 val is_list : t -> bool 345 (** Check if value is a list. *) 346 347 val is_obj : t -> bool 348 (** Check if value is an object. *) 349 350 (** {3 Safe Accessors} 351 352 These return [None] if the value has the wrong type. *) 353 354 val as_null : t -> unit option 355 (** Get unit if value is null. *) 356 357 val as_bool : t -> bool option 358 (** Get boolean value. *) 359 360 val as_float : t -> float option 361 (** Get float value. *) 362 363 val as_string : t -> string option 364 (** Get string value. *) 365 366 val as_list : t -> t list option 367 (** Get list value. *) 368 369 val as_obj : t -> (string * t) list option 370 (** Get object as association list. *) 371 372 val as_int : t -> int option 373 (** Get integer value if float is an exact integer. *) 374 375 (** {3 Unsafe Accessors} 376 377 These raise {!Type_error} if the value has the wrong type. *) 378 379 val get_null : t -> unit 380 (** Get unit or raise {!Type_error}. *) 381 382 val get_bool : t -> bool 383 (** Get boolean or raise {!Type_error}. *) 384 385 val get_float : t -> float 386 (** Get float or raise {!Type_error}. *) 387 388 val get_string : t -> string 389 (** Get string or raise {!Type_error}. *) 390 391 val get_list : t -> t list 392 (** Get list or raise {!Type_error}. *) 393 394 val get_obj : t -> (string * t) list 395 (** Get object or raise {!Type_error}. *) 396 397 val get_int : t -> int 398 (** Get integer or raise {!Type_error}. *) 399 400 (** {3 Object Operations} *) 401 402 val mem : string -> t -> bool 403 (** [mem key obj] checks if [key] exists in object [obj]. 404 Returns [false] if [obj] is not an object. *) 405 406 val find : string -> t -> t option 407 (** [find key obj] looks up [key] in object [obj]. 408 Returns [None] if key not found or if [obj] is not an object. *) 409 410 val get : string -> t -> t 411 (** [get key obj] looks up [key] in object [obj]. 412 Raises [Not_found] if key not found. *) 413 414 val keys : t -> string list 415 (** Get all keys from an object. 416 @raise Type_error if not an object *) 417 418 val values : t -> t list 419 (** Get all values from an object. 420 @raise Type_error if not an object *) 421 422 val update : string -> t -> t -> t 423 (** [update key value obj] sets [key] to [value] in [obj]. 424 Adds the key if it doesn't exist. 425 @raise Type_error if [obj] is not an object *) 426 427 val remove : string -> t -> t 428 (** [remove key obj] removes [key] from [obj]. 429 @raise Type_error if [obj] is not an object *) 430 431 val combine : t -> t -> t 432 (** [combine obj1 obj2] merges two objects, with [obj2] values taking precedence. 433 @raise Type_error if either argument is not an object *) 434 435 (** {3 List Operations} *) 436 437 val map : (t -> t) -> t -> t 438 (** [map f lst] applies [f] to each element of list [lst]. 439 @raise Type_error if [lst] is not a list *) 440 441 val mapi : (int -> t -> t) -> t -> t 442 (** [mapi f lst] applies [f i x] to each element [x] at index [i]. 443 @raise Type_error if [lst] is not a list *) 444 445 val filter : (t -> bool) -> t -> t 446 (** [filter pred lst] keeps elements satisfying [pred]. 447 @raise Type_error if [lst] is not a list *) 448 449 val fold : ('a -> t -> 'a) -> 'a -> t -> 'a 450 (** [fold f init lst] folds [f] over list [lst]. 451 @raise Type_error if [lst] is not a list *) 452 453 val nth : int -> t -> t option 454 (** [nth n lst] gets element at index [n]. 455 Returns [None] if [lst] is not a list or index out of bounds. *) 456 457 val length : t -> int 458 (** Get the length of a list or object. Returns 0 for other types. *) 459 460 val flatten : t -> t 461 (** Flatten a list of lists into a single list. 462 Non-list elements are kept as-is. 463 @raise Type_error if not a list *) 464 465 (** {3 Path Operations} *) 466 467 val get_path : string list -> t -> t option 468 (** [get_path ["a"; "b"; "c"] obj] looks up nested path [obj.a.b.c]. 469 Returns [None] if any key is not found. *) 470 471 val get_path_exn : string list -> t -> t 472 (** Like {!get_path} but raises [Not_found] if path not found. *) 473 474 (** {3 Iteration} *) 475 476 val iter_obj : (string -> t -> unit) -> t -> unit 477 (** [iter_obj f obj] calls [f key value] for each pair in [obj]. 478 @raise Type_error if [obj] is not an object *) 479 480 val iter_list : (t -> unit) -> t -> unit 481 (** [iter_list f lst] calls [f] on each element of [lst]. 482 @raise Type_error if [lst] is not a list *) 483 484 val fold_obj : ('a -> string -> t -> 'a) -> 'a -> t -> 'a 485 (** [fold_obj f init obj] folds over object key-value pairs. 486 @raise Type_error if [obj] is not an object *) 487 488 (** {3 Mapping} *) 489 490 val map_obj : (string -> t -> t) -> t -> t 491 (** [map_obj f obj] maps [f key value] over each pair in [obj]. 492 @raise Type_error if [obj] is not an object *) 493 494 val filter_obj : (string -> t -> bool) -> t -> t 495 (** [filter_obj pred obj] keeps pairs satisfying [pred key value]. 496 @raise Type_error if [obj] is not an object *) 497 498 (** {3 Conversion Helpers} 499 500 Get values with optional defaults. If no default is provided and the type 501 doesn't match, these raise {!Type_error}. *) 502 503 val to_bool : ?default:bool -> t -> bool 504 (** Get boolean or return default. 505 @raise Type_error if type doesn't match and no default provided *) 506 507 val to_int : ?default:int -> t -> int 508 (** Get integer or return default. 509 @raise Type_error if type doesn't match and no default provided *) 510 511 val to_float : ?default:float -> t -> float 512 (** Get float or return default. 513 @raise Type_error if type doesn't match and no default provided *) 514 515 val to_string : ?default:string -> t -> string 516 (** Get string or return default. 517 @raise Type_error if type doesn't match and no default provided *) 518 519 val to_list : ?default:t list -> t -> t list 520 (** Get list or return default. 521 @raise Type_error if type doesn't match and no default provided *) 522end 523 524 525(** {2 Stream - Low-Level Event API} 526 527 Low-level streaming API for event-based YAML processing. 528 529 This is useful for: 530 - Processing very large YAML files incrementally 531 - Building custom YAML transformers 532 - Fine-grained control over YAML emission *) 533 534module Stream : sig 535 536 (** {3 Event Types} *) 537 538 type event = Event.t 539 (** A parsing or emitting event. *) 540 541 type position = Position.t 542 (** A position in the source (line, column, byte offset). *) 543 544 type event_result = { 545 event : event; 546 start_pos : position; 547 end_pos : position; 548 } 549 (** Result of parsing an event with its source location. *) 550 551 (** {3 Parsing} *) 552 553 type parser 554 (** A streaming YAML parser. *) 555 556 val parser : string -> parser 557 (** Create a parser from a string. *) 558 559 val next : parser -> event_result option 560 (** Get the next event from the parser. 561 Returns [None] when parsing is complete. *) 562 563 val iter : (event -> position -> position -> unit) -> parser -> unit 564 (** [iter f parser] calls [f event start_pos end_pos] for each event. *) 565 566 val fold : ('a -> event -> 'a) -> 'a -> parser -> 'a 567 (** [fold f init parser] folds [f] over all events. *) 568 569 (** {3 Emitting} *) 570 571 type emitter 572 (** A streaming YAML emitter. *) 573 574 val emitter : ?len:int -> unit -> emitter 575 (** Create a new emitter. *) 576 577 val contents : emitter -> string 578 (** Get the emitted YAML string. *) 579 580 val emit : emitter -> event -> unit 581 (** Emit an event. 582 @raise Yamlrw_error if the event sequence is invalid *) 583 584 (** {3 Event Emission Helpers} *) 585 586 val stream_start : emitter -> Encoding.t -> unit 587 (** Emit a stream start event. *) 588 589 val stream_end : emitter -> unit 590 (** Emit a stream end event. *) 591 592 val document_start : emitter -> ?version:version -> ?implicit:bool -> unit -> unit 593 (** Emit a document start event. 594 @param version YAML version directive 595 @param implicit Whether start marker is implicit (default: true) *) 596 597 val document_end : emitter -> ?implicit:bool -> unit -> unit 598 (** Emit a document end event. 599 @param implicit Whether end marker is implicit (default: true) *) 600 601 val scalar : emitter -> ?anchor:string -> ?tag:string -> ?style:Scalar_style.t -> string -> unit 602 (** Emit a scalar value. 603 @param anchor Optional anchor name 604 @param tag Optional type tag 605 @param style Scalar style (default: Any) *) 606 607 val alias : emitter -> string -> unit 608 (** Emit an alias reference. *) 609 610 val sequence_start : emitter -> ?anchor:string -> ?tag:string -> ?style:Layout_style.t -> unit -> unit 611 (** Emit a sequence start event. 612 @param anchor Optional anchor name 613 @param tag Optional type tag 614 @param style Layout style (default: Any) *) 615 616 val sequence_end : emitter -> unit 617 (** Emit a sequence end event. *) 618 619 val mapping_start : emitter -> ?anchor:string -> ?tag:string -> ?style:Layout_style.t -> unit -> unit 620 (** Emit a mapping start event. 621 @param anchor Optional anchor name 622 @param tag Optional type tag 623 @param style Layout style (default: Any) *) 624 625 val mapping_end : emitter -> unit 626 (** Emit a mapping end event. *) 627end 628 629 630(** {2 Internal Modules} 631 632 These modules are exposed for advanced use cases requiring 633 fine-grained control over parsing, emission, or data structures. 634 635 For typical usage, prefer the top-level functions and {!Util}. *) 636 637module Position = Position 638(** Source position tracking. *) 639 640module Span = Span 641(** Source span (range of positions). *) 642 643module Chomping = Chomping 644(** Block scalar chomping modes. *) 645 646module Tag = Tag 647(** YAML type tags. *) 648 649module Value = Value 650(** JSON-compatible value type and operations. *) 651 652module Scalar = Scalar 653(** YAML scalar with metadata. *) 654 655module Sequence = Sequence 656(** YAML sequence with metadata. *) 657 658module Mapping = Mapping 659(** YAML mapping with metadata. *) 660 661module Yaml = Yaml 662(** Full YAML value type. *) 663 664module Document = Document 665(** YAML document with directives. *) 666 667module Token = Token 668(** Lexical tokens. *) 669 670module Scanner = Scanner 671(** Lexical scanner. *) 672 673module Event = Event 674(** Parser events. *) 675 676module Parser = Parser 677(** Event-based parser. *) 678 679module Loader = Loader 680(** Document loader. *) 681 682module Emitter = Emitter 683(** Event-based emitter. *) 684 685module Input = Input 686(** Input stream utilities. *) 687 688module Serialize = Serialize 689(** Buffer serialization utilities. *)