Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** {1 Yamlrw - A Pure OCaml YAML Parser and Emitter} *) 7 8(** {2 Error Handling} *) 9 10module Error = Error 11 12exception Yamlrw_error = Error.Yamlrw_error 13 14 15(** {2 Core Types} *) 16 17(** JSON-compatible YAML representation. Use this for simple data interchange. 18 19 This type is structurally equivalent to {!Value.t} and compatible with the 20 ezjsonm representation. For additional operations, see {!Value} and {!Util}. *) 21type value = [ 22 | `Null (** YAML null, ~, or empty values *) 23 | `Bool of bool (** YAML booleans (true, false, yes, no, on, off) *) 24 | `Float of float (** All YAML numbers (integers stored as floats) *) 25 | `String of string (** YAML strings *) 26 | `A of value list (** YAML sequences/arrays *) 27 | `O of (string * value) list (** YAML mappings/objects with string keys *) 28] 29 30(** Full YAML representation preserving anchors, tags, and aliases. 31 32 This type is structurally equivalent to {!Yaml.t}. Use this when you need 33 access to YAML-specific features like anchors and aliases for node reuse, 34 type tags for custom types, scalar styles (plain, quoted, literal, folded), 35 and collection styles (block vs flow). 36 37 For additional operations, see {!Yaml}, {!Scalar}, {!Sequence}, and {!Mapping}. *) 38type yaml = [ 39 | `Scalar of Scalar.t (** YAML scalar value with style and metadata *) 40 | `Alias of string (** Alias reference to an anchored node *) 41 | `A of yaml Sequence.t (** YAML sequence with style and metadata *) 42 | `O of (yaml, yaml) Mapping.t (** YAML mapping with style and metadata *) 43] 44 45(** A YAML document with directives and metadata. 46 47 This type is structurally equivalent to {!Document.t}. A YAML stream can 48 contain multiple documents, each separated by document markers. 49 50 For additional operations, see {!Document}. *) 51type document = { 52 version : (int * int) option; (** Optional YAML version directive (e.g., (1, 2) for YAML 1.2) *) 53 tags : (string * string) list; (** TAG directives mapping handles to prefixes *) 54 root : yaml option; (** Root content of the document *) 55 implicit_start : bool; (** Whether the document start marker (---) is implicit *) 56 implicit_end : bool; (** Whether the document end marker (...) is implicit *) 57} 58 59 60(** {2 Character Encoding} *) 61 62module Encoding = Encoding 63 64 65(** {2 Parsing} *) 66 67type version = [ `V1_1 | `V1_2 ] 68 69(** Default maximum nodes during alias expansion (10 million). *) 70let default_max_alias_nodes = Yaml.default_max_alias_nodes 71 72(** Default maximum alias nesting depth (100). *) 73let default_max_alias_depth = Yaml.default_max_alias_depth 74 75let of_string 76 ?(resolve_aliases = true) 77 ?(max_nodes = default_max_alias_nodes) 78 ?(max_depth = default_max_alias_depth) 79 s : value = 80 (Loader.value_of_string ~resolve_aliases ~max_nodes ~max_depth s :> value) 81(** Parse a YAML string into a JSON-compatible value. 82 83 @param resolve_aliases Whether to expand aliases (default: true) 84 @param max_nodes Maximum nodes during alias expansion (default: 10M) 85 @param max_depth Maximum alias nesting depth (default: 100) 86 @raise Yamlrw_error on parse error or if multiple documents found *) 87 88let yaml_of_string 89 ?(resolve_aliases = false) 90 ?(max_nodes = default_max_alias_nodes) 91 ?(max_depth = default_max_alias_depth) 92 s : yaml = 93 (Loader.yaml_of_string ~resolve_aliases ~max_nodes ~max_depth s :> yaml) 94(** Parse a YAML string preserving full YAML metadata (anchors, tags, etc). 95 96 By default, aliases are NOT resolved, preserving the document structure. 97 98 @param resolve_aliases Whether to expand aliases (default: false) 99 @param max_nodes Maximum nodes during alias expansion (default: 10M) 100 @param max_depth Maximum alias nesting depth (default: 100) 101 @raise Yamlrw_error on parse error or if multiple documents found *) 102 103let documents_of_string s : document list = 104 let docs = Loader.documents_of_string s in 105 List.map (fun (d : Document.t) : document -> { 106 version = d.version; 107 tags = d.tags; 108 root = (d.root :> yaml option); 109 implicit_start = d.implicit_start; 110 implicit_end = d.implicit_end; 111 }) docs 112(** Parse a multi-document YAML stream. 113 114 Use this when your YAML input contains multiple documents separated 115 by document markers (---). 116 117 @raise Yamlrw_error on parse error *) 118 119 120(** {2 Formatting Styles} *) 121 122module Scalar_style = Scalar_style 123 124module Layout_style = Layout_style 125 126 127(** {2 Serialization} *) 128 129let make_config ~encoding ~scalar_style ~layout_style = 130 { Emitter.default_config with encoding; scalar_style; layout_style } 131 132let to_buffer 133 ?(encoding = `Utf8) 134 ?(scalar_style = `Any) 135 ?(layout_style = `Any) 136 ?buffer 137 (value : value) = 138 let config = make_config ~encoding ~scalar_style ~layout_style in 139 Serialize.value_to_buffer ~config ?buffer (value :> Value.t) 140(** Serialize a value to a buffer. 141 142 @param encoding Output encoding (default: UTF-8) 143 @param scalar_style Preferred scalar style (default: Any) 144 @param layout_style Preferred layout style (default: Any) 145 @param buffer Optional buffer to append to (allocates new one if not provided) 146 @return The buffer containing the serialized YAML *) 147 148let to_string 149 ?(encoding = `Utf8) 150 ?(scalar_style = `Any) 151 ?(layout_style = `Any) 152 (value : value) = 153 Buffer.contents (to_buffer ~encoding ~scalar_style ~layout_style value) 154(** Serialize a value to a YAML string. 155 156 @param encoding Output encoding (default: UTF-8) 157 @param scalar_style Preferred scalar style (default: Any) 158 @param layout_style Preferred layout style (default: Any) *) 159 160let yaml_to_buffer 161 ?(encoding = `Utf8) 162 ?(scalar_style = `Any) 163 ?(layout_style = `Any) 164 ?buffer 165 (yaml : yaml) = 166 let config = make_config ~encoding ~scalar_style ~layout_style in 167 Serialize.yaml_to_buffer ~config ?buffer (yaml :> Yaml.t) 168(** Serialize a full YAML value to a buffer. 169 170 @param encoding Output encoding (default: UTF-8) 171 @param scalar_style Preferred scalar style (default: Any) 172 @param layout_style Preferred layout style (default: Any) 173 @param buffer Optional buffer to append to (allocates new one if not provided) 174 @return The buffer containing the serialized YAML *) 175 176let yaml_to_string 177 ?(encoding = `Utf8) 178 ?(scalar_style = `Any) 179 ?(layout_style = `Any) 180 (yaml : yaml) = 181 Buffer.contents (yaml_to_buffer ~encoding ~scalar_style ~layout_style yaml) 182(** Serialize a full YAML value to a string. 183 184 @param encoding Output encoding (default: UTF-8) 185 @param scalar_style Preferred scalar style (default: Any) 186 @param layout_style Preferred layout style (default: Any) *) 187 188let documents_to_buffer 189 ?(encoding = `Utf8) 190 ?(scalar_style = `Any) 191 ?(layout_style = `Any) 192 ?(resolve_aliases = true) 193 ?buffer 194 (documents : document list) = 195 let config = make_config ~encoding ~scalar_style ~layout_style in 196 let docs' = List.map (fun (d : document) : Document.t -> { 197 Document.version = d.version; 198 Document.tags = d.tags; 199 Document.root = (d.root :> Yaml.t option); 200 Document.implicit_start = d.implicit_start; 201 Document.implicit_end = d.implicit_end; 202 }) documents in 203 Serialize.documents_to_buffer ~config ~resolve_aliases ?buffer docs' 204(** Serialize multiple documents to a buffer. 205 206 @param encoding Output encoding (default: UTF-8) 207 @param scalar_style Preferred scalar style (default: Any) 208 @param layout_style Preferred layout style (default: Any) 209 @param resolve_aliases Whether to expand aliases (default: true) 210 @param buffer Optional buffer to append to (allocates new one if not provided) 211 @return The buffer containing the serialized YAML *) 212 213let documents_to_string 214 ?(encoding = `Utf8) 215 ?(scalar_style = `Any) 216 ?(layout_style = `Any) 217 ?(resolve_aliases = true) 218 (documents : document list) = 219 Buffer.contents (documents_to_buffer ~encoding ~scalar_style ~layout_style ~resolve_aliases documents) 220(** Serialize multiple documents to a YAML stream. 221 222 @param encoding Output encoding (default: UTF-8) 223 @param scalar_style Preferred scalar style (default: Any) 224 @param layout_style Preferred layout style (default: Any) 225 @param resolve_aliases Whether to expand aliases (default: true) *) 226 227(** {2 Buffer Parsing} *) 228 229let of_buffer 230 ?(resolve_aliases = true) 231 ?(max_nodes = default_max_alias_nodes) 232 ?(max_depth = default_max_alias_depth) 233 buffer : value = 234 of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer) 235(** Parse YAML from a buffer into a JSON-compatible value. 236 237 @param resolve_aliases Whether to expand aliases (default: true) 238 @param max_nodes Maximum nodes during alias expansion (default: 10M) 239 @param max_depth Maximum alias nesting depth (default: 100) 240 @raise Yamlrw_error on parse error or if multiple documents found *) 241 242let yaml_of_buffer 243 ?(resolve_aliases = false) 244 ?(max_nodes = default_max_alias_nodes) 245 ?(max_depth = default_max_alias_depth) 246 buffer : yaml = 247 yaml_of_string ~resolve_aliases ~max_nodes ~max_depth (Buffer.contents buffer) 248(** Parse YAML from a buffer preserving full YAML metadata. 249 250 @param resolve_aliases Whether to expand aliases (default: false) 251 @param max_nodes Maximum nodes during alias expansion (default: 10M) 252 @param max_depth Maximum alias nesting depth (default: 100) 253 @raise Yamlrw_error on parse error or if multiple documents found *) 254 255let documents_of_buffer buffer : document list = 256 documents_of_string (Buffer.contents buffer) 257(** Parse a multi-document YAML stream from a buffer. 258 259 @raise Yamlrw_error on parse error *) 260 261 262(** {2 Conversion} *) 263 264let to_json 265 ?(resolve_aliases = true) 266 ?(max_nodes = default_max_alias_nodes) 267 ?(max_depth = default_max_alias_depth) 268 (yaml : yaml) : value = 269 (Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth (yaml :> Yaml.t) :> value) 270(** Convert full YAML to JSON-compatible value. 271 272 @param resolve_aliases Whether to expand aliases (default: true) 273 @param max_nodes Maximum nodes during alias expansion (default: 10M) 274 @param max_depth Maximum alias nesting depth (default: 100) 275 @raise Yamlrw_error if alias limits exceeded or complex keys found *) 276 277let of_json (value : value) : yaml = 278 (Yaml.of_value (value :> Value.t) :> yaml) 279(** Convert JSON-compatible value to full YAML representation. *) 280 281 282(** {2 Pretty Printing & Equality} *) 283 284let pp = Value.pp 285(** Pretty-print a value. *) 286 287let equal = Value.equal 288(** Test equality of two values. *) 289 290 291(** {2 Util - Value Combinators} *) 292 293module Util = struct 294 (** Combinators for working with {!type:value} values. 295 296 This module provides constructors, accessors, and transformations 297 for JSON-compatible YAML values. *) 298 299 type t = Value.t 300 301 (** {3 Type Error} *) 302 303 exception Type_error of string * t 304 (** Raised when a value has unexpected type. 305 [Type_error (expected, actual_value)] *) 306 307 let type_error expected v = raise (Type_error (expected, v)) 308 309 (** {3 Constructors} *) 310 311 let null : t = `Null 312 let bool b : t = `Bool b 313 let int n : t = `Float (Float.of_int n) 314 let float f : t = `Float f 315 let string s : t = `String s 316 let strings ss : t = `A (List.map (fun s -> `String s) ss) 317 let list vs : t = `A vs 318 let obj pairs : t = `O pairs 319 320 (** {3 Type Predicates} *) 321 322 let is_null = function `Null -> true | _ -> false 323 let is_bool = function `Bool _ -> true | _ -> false 324 let is_number = function `Float _ -> true | _ -> false 325 let is_string = function `String _ -> true | _ -> false 326 let is_list = function `A _ -> true | _ -> false 327 let is_obj = function `O _ -> true | _ -> false 328 329 (** {3 Safe Accessors} *) 330 331 let as_null = function `Null -> Some () | _ -> None 332 let as_bool = function `Bool b -> Some b | _ -> None 333 let as_float = function `Float f -> Some f | _ -> None 334 let as_string = function `String s -> Some s | _ -> None 335 let as_list = function `A l -> Some l | _ -> None 336 let as_obj = function `O o -> Some o | _ -> None 337 338 let as_int = function 339 | `Float f -> 340 let i = Float.to_int f in 341 if Float.equal (Float.of_int i) f then Some i else None 342 | _ -> None 343 344 (** {3 Unsafe Accessors} *) 345 346 let get_null v = match v with `Null -> () | _ -> type_error "null" v 347 let get_bool v = match v with `Bool b -> b | _ -> type_error "bool" v 348 let get_float v = match v with `Float f -> f | _ -> type_error "float" v 349 let get_string v = match v with `String s -> s | _ -> type_error "string" v 350 let get_list v = match v with `A l -> l | _ -> type_error "list" v 351 let get_obj v = match v with `O o -> o | _ -> type_error "object" v 352 353 let get_int v = 354 match as_int v with 355 | Some i -> i 356 | None -> type_error "int" v 357 358 (** {3 Object Operations} *) 359 360 let mem key = function 361 | `O pairs -> List.exists (fun (k, _) -> k = key) pairs 362 | _ -> false 363 364 let find key = function 365 | `O pairs -> List.assoc_opt key pairs 366 | _ -> None 367 368 let get key v = 369 match find key v with 370 | Some v -> v 371 | None -> raise Not_found 372 373 let keys v = match v with 374 | `O pairs -> List.map fst pairs 375 | _ -> type_error "object" v 376 377 let values v = match v with 378 | `O pairs -> List.map snd pairs 379 | _ -> type_error "object" v 380 381 let update key value = function 382 | `O pairs -> 383 let rec go = function 384 | [] -> [(key, value)] 385 | (k, _) :: rest when k = key -> (key, value) :: rest 386 | kv :: rest -> kv :: go rest 387 in 388 `O (go pairs) 389 | v -> type_error "object" v 390 391 let remove key = function 392 | `O pairs -> `O (List.filter (fun (k, _) -> k <> key) pairs) 393 | v -> type_error "object" v 394 395 let combine v1 v2 = 396 match v1, v2 with 397 | `O o1, `O o2 -> `O (o1 @ o2) 398 | `O _, _ -> type_error "object" v2 399 | _, _ -> type_error "object" v1 400 401 (** {3 List Operations} *) 402 403 let map f = function 404 | `A l -> `A (List.map f l) 405 | v -> type_error "list" v 406 407 let mapi f = function 408 | `A l -> `A (List.mapi f l) 409 | v -> type_error "list" v 410 411 let filter pred = function 412 | `A l -> `A (List.filter pred l) 413 | v -> type_error "list" v 414 415 let fold f init = function 416 | `A l -> List.fold_left f init l 417 | v -> type_error "list" v 418 419 let nth n = function 420 | `A l -> List.nth_opt l n 421 | _ -> None 422 423 let length = function 424 | `A l -> List.length l 425 | `O o -> List.length o 426 | _ -> 0 427 428 let flatten = function 429 | `A l -> 430 `A (List.concat_map (function `A inner -> inner | v -> [v]) l) 431 | v -> type_error "list" v 432 433 (** {3 Path Operations} *) 434 435 let rec get_path path v = 436 match path with 437 | [] -> Some v 438 | key :: rest -> 439 match find key v with 440 | Some child -> get_path rest child 441 | None -> None 442 443 let get_path_exn path v = 444 match get_path path v with 445 | Some v -> v 446 | None -> raise Not_found 447 448 (** {3 Iteration} *) 449 450 let iter_obj f = function 451 | `O pairs -> List.iter (fun (k, v) -> f k v) pairs 452 | v -> type_error "object" v 453 454 let iter_list f = function 455 | `A l -> List.iter f l 456 | v -> type_error "list" v 457 458 let fold_obj f init = function 459 | `O pairs -> List.fold_left (fun acc (k, v) -> f acc k v) init pairs 460 | v -> type_error "object" v 461 462 (** {3 Mapping} *) 463 464 let map_obj f = function 465 | `O pairs -> `O (List.map (fun (k, v) -> (k, f k v)) pairs) 466 | v -> type_error "object" v 467 468 let filter_obj pred = function 469 | `O pairs -> `O (List.filter (fun (k, v) -> pred k v) pairs) 470 | v -> type_error "object" v 471 472 (** {3 Conversion Helpers} *) 473 474 let to_bool ?default v = 475 match as_bool v, default with 476 | Some b, _ -> b 477 | None, Some d -> d 478 | None, None -> type_error "bool" v 479 480 let to_int ?default v = 481 match as_int v, default with 482 | Some i, _ -> i 483 | None, Some d -> d 484 | None, None -> type_error "int" v 485 486 let to_float ?default v = 487 match as_float v, default with 488 | Some f, _ -> f 489 | None, Some d -> d 490 | None, None -> type_error "float" v 491 492 let to_string ?default v = 493 match as_string v, default with 494 | Some s, _ -> s 495 | None, Some d -> d 496 | None, None -> type_error "string" v 497 498 let to_list ?default v = 499 match as_list v, default with 500 | Some l, _ -> l 501 | None, Some d -> d 502 | None, None -> type_error "list" v 503end 504 505 506(** {2 Stream - Low-Level Event API} *) 507 508module Stream = struct 509 (** Low-level streaming API for event-based YAML processing. 510 511 This is useful for: 512 - Processing very large YAML files incrementally 513 - Building custom YAML transformers 514 - Fine-grained control over YAML emission *) 515 516 (** {3 Event Types} *) 517 518 type event = Event.t 519 (** A parsing or emitting event. *) 520 521 type position = Position.t 522 (** A position in the source (line, column, byte offset). *) 523 524 (** Result of parsing an event. *) 525 type event_result = { 526 event : event; 527 start_pos : position; 528 end_pos : position; 529 } 530 531 (** {3 Parsing} *) 532 533 type parser = Parser.t 534 (** A streaming YAML parser. *) 535 536 let parser s = Parser.of_string s 537 (** Create a parser from a string. *) 538 539 let next p = 540 match Parser.next p with 541 | Some { event; span } -> 542 Some { 543 event; 544 start_pos = span.start; 545 end_pos = span.stop; 546 } 547 | None -> None 548 (** Get the next event from the parser. 549 Returns [None] when parsing is complete. *) 550 551 let iter f p = 552 let rec go () = 553 match next p with 554 | Some { event; start_pos; end_pos } -> 555 f event start_pos end_pos; 556 go () 557 | None -> () 558 in 559 go () 560 (** Iterate over all events from the parser. *) 561 562 let fold f init p = 563 let rec go acc = 564 match Parser.next p with 565 | Some { event; _ } -> go (f acc event) 566 | None -> acc 567 in 568 go init 569 (** Fold over all events from the parser. *) 570 571 (** {3 Emitting} *) 572 573 type emitter = Emitter.t 574 (** A streaming YAML emitter. *) 575 576 let emitter ?len:_ () = Emitter.create () 577 (** Create a new emitter. *) 578 579 let contents e = Emitter.contents e 580 (** Get the emitted YAML string. *) 581 582 let emit e ev = Emitter.emit e ev 583 (** Emit an event. *) 584 585 (** {3 Event Emission Helpers} *) 586 587 let stream_start e enc = 588 Emitter.emit e (Event.Stream_start { encoding = enc }) 589 590 let stream_end e = 591 Emitter.emit e Event.Stream_end 592 593 let document_start e ?version ?(implicit = true) () = 594 let version = match version with 595 | Some `V1_1 -> Some (1, 1) 596 | Some `V1_2 -> Some (1, 2) 597 | None -> None 598 in 599 Emitter.emit e (Event.Document_start { version; implicit }) 600 601 let document_end e ?(implicit = true) () = 602 Emitter.emit e (Event.Document_end { implicit }) 603 604 let scalar e ?anchor ?tag ?(style = `Any) value = 605 Emitter.emit e (Event.Scalar { 606 anchor; 607 tag; 608 value; 609 plain_implicit = true; 610 quoted_implicit = true; 611 style; 612 }) 613 614 let alias e name = 615 Emitter.emit e (Event.Alias { anchor = name }) 616 617 let sequence_start e ?anchor ?tag ?(style = `Any) () = 618 Emitter.emit e (Event.Sequence_start { 619 anchor; 620 tag; 621 implicit = true; 622 style; 623 }) 624 625 let sequence_end e = 626 Emitter.emit e Event.Sequence_end 627 628 let mapping_start e ?anchor ?tag ?(style = `Any) () = 629 Emitter.emit e (Event.Mapping_start { 630 anchor; 631 tag; 632 implicit = true; 633 style; 634 }) 635 636 let mapping_end e = 637 Emitter.emit e Event.Mapping_end 638end 639 640 641(** {2 Internal Modules} *) 642 643(** These modules are exposed for advanced use cases requiring 644 fine-grained control over parsing, emission, or data structures. 645 646 For typical usage, prefer the top-level functions and {!Util}. *) 647 648(** Source position tracking. *) 649module Position = Position 650 651(** Source span (range of positions). *) 652module Span = Span 653 654(** Block scalar chomping modes. *) 655module Chomping = Chomping 656 657(** YAML type tags. *) 658module Tag = Tag 659 660(** JSON-compatible value type and operations. *) 661module Value = Value 662 663(** YAML scalar with metadata. *) 664module Scalar = Scalar 665 666(** YAML sequence with metadata. *) 667module Sequence = Sequence 668 669(** YAML mapping with metadata. *) 670module Mapping = Mapping 671 672(** Full YAML value type. *) 673module Yaml = Yaml 674 675(** YAML document with directives. *) 676module Document = Document 677 678(** Lexical tokens. *) 679module Token = Token 680 681(** Lexical scanner. *) 682module Scanner = Scanner 683 684(** Parser events. *) 685module Event = Event 686 687(** Event-based parser. *) 688module Parser = Parser 689 690(** Document loader. *) 691module Loader = Loader 692 693(** Event-based emitter. *) 694module Emitter = Emitter 695 696(** Input stream utilities. *) 697module Input = Input 698 699(** Buffer serialization utilities. *) 700module Serialize = Serialize