Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** Emitter - converts YAML data structures to string output 7 8 The emitter can write to either a Buffer (default) or directly to a 9 bytesrw Bytes.Writer for streaming output. *) 10 11type config = { 12 encoding : Encoding.t; 13 scalar_style : Scalar_style.t; 14 layout_style : Layout_style.t; 15 indent : int; 16 width : int; 17 canonical : bool; 18} 19 20let default_config = { 21 encoding = `Utf8; 22 scalar_style = `Any; 23 layout_style = `Any; 24 indent = 2; 25 width = 80; 26 canonical = false; 27} 28 29type state = 30 | Initial 31 | Stream_started 32 | Document_started 33 | In_block_sequence of int (* indent level *) 34 | In_block_mapping_key of int 35 | In_block_mapping_value of int 36 | In_block_mapping_first_key of int (* first key after "- ", no indent needed *) 37 | In_flow_sequence 38 | In_flow_mapping_key 39 | In_flow_mapping_value 40 | Document_ended 41 | Stream_ended 42 43(** Output sink - either a Buffer or a bytesrw Writer *) 44type sink = 45 | Buffer_sink of Buffer.t 46 | Writer_sink of Bytesrw.Bytes.Writer.t 47 48type t = { 49 config : config; 50 sink : sink; 51 mutable state : state; 52 mutable states : state list; 53 mutable indent : int; 54 mutable flow_level : int; 55 mutable need_separator : bool; 56} 57 58let create ?(config = default_config) () = { 59 config; 60 sink = Buffer_sink (Buffer.create 1024); 61 state = Initial; 62 states = []; 63 indent = 0; 64 flow_level = 0; 65 need_separator = false; 66} 67 68(** Create an emitter that writes directly to a Bytes.Writer *) 69let of_writer ?(config = default_config) writer = { 70 config; 71 sink = Writer_sink writer; 72 state = Initial; 73 states = []; 74 indent = 0; 75 flow_level = 0; 76 need_separator = false; 77} 78 79let contents t = 80 match t.sink with 81 | Buffer_sink buf -> Buffer.contents buf 82 | Writer_sink _ -> "" (* No accumulated content for writer sink *) 83 84let reset t = 85 (match t.sink with 86 | Buffer_sink buf -> Buffer.clear buf 87 | Writer_sink _ -> ()); 88 t.state <- Initial; 89 t.states <- []; 90 t.indent <- 0; 91 t.flow_level <- 0; 92 t.need_separator <- false 93 94(** Output helpers - write to appropriate sink *) 95 96let write t s = 97 match t.sink with 98 | Buffer_sink buf -> Buffer.add_string buf s 99 | Writer_sink w -> Bytesrw.Bytes.Writer.write_string w s 100 101let write_char t c = 102 match t.sink with 103 | Buffer_sink buf -> Buffer.add_char buf c 104 | Writer_sink w -> 105 let b = Bytes.make 1 c in 106 Bytesrw.Bytes.Writer.write_bytes w b 107 108let write_indent t = 109 if t.indent <= 8 then 110 for _ = 1 to t.indent do write_char t ' ' done 111 else 112 write t (String.make t.indent ' ') 113 114let write_newline t = 115 write_char t '\n' 116 117let push_state t s = 118 t.states <- t.state :: t.states; 119 t.state <- s 120 121let pop_state t = 122 match t.states with 123 | s :: rest -> 124 t.state <- s; 125 t.states <- rest 126 | [] -> 127 t.state <- Stream_ended 128 129(** Escape a string for double-quoted output. 130 Uses a buffer to batch writes instead of character-by-character. *) 131let escape_double_quoted value = 132 let len = String.length value in 133 (* Check if any escaping is needed *) 134 let needs_escape = ref false in 135 for i = 0 to len - 1 do 136 match value.[i] with 137 | '"' | '\\' | '\n' | '\r' | '\t' -> needs_escape := true 138 | c when c < ' ' -> needs_escape := true 139 | _ -> () 140 done; 141 if not !needs_escape then value 142 else begin 143 let buf = Buffer.create (len + len / 4) in 144 for i = 0 to len - 1 do 145 match value.[i] with 146 | '"' -> Buffer.add_string buf "\\\"" 147 | '\\' -> Buffer.add_string buf "\\\\" 148 | '\n' -> Buffer.add_string buf "\\n" 149 | '\r' -> Buffer.add_string buf "\\r" 150 | '\t' -> Buffer.add_string buf "\\t" 151 | c when c < ' ' -> Buffer.add_string buf (Printf.sprintf "\\x%02x" (Char.code c)) 152 | c -> Buffer.add_char buf c 153 done; 154 Buffer.contents buf 155 end 156 157(** Escape a string for single-quoted output. *) 158let escape_single_quoted value = 159 if not (String.contains value '\'') then value 160 else begin 161 let len = String.length value in 162 let buf = Buffer.create (len + len / 8) in 163 for i = 0 to len - 1 do 164 let c = value.[i] in 165 if c = '\'' then Buffer.add_string buf "''" 166 else Buffer.add_char buf c 167 done; 168 Buffer.contents buf 169 end 170 171(** Write scalar with appropriate quoting *) 172let write_scalar t ?(style = `Any) value = 173 match (match style with `Any -> Quoting.choose_style value | s -> s) with 174 | `Plain | `Any -> 175 write t value 176 | `Single_quoted -> 177 write_char t '\''; 178 write t (escape_single_quoted value); 179 write_char t '\'' 180 | `Double_quoted -> 181 write_char t '"'; 182 write t (escape_double_quoted value); 183 write_char t '"' 184 | `Literal -> 185 write t "|"; 186 write_newline t; 187 String.split_on_char '\n' value |> List.iter (fun line -> 188 write_indent t; 189 write t line; 190 write_newline t 191 ) 192 | `Folded -> 193 write t ">"; 194 write_newline t; 195 String.split_on_char '\n' value |> List.iter (fun line -> 196 write_indent t; 197 write t line; 198 write_newline t 199 ) 200 201(** Write anchor if present *) 202let write_anchor t anchor = 203 match anchor with 204 | Some name -> 205 write_char t '&'; 206 write t name; 207 write_char t ' ' 208 | None -> () 209 210(** Write tag if present and not implicit *) 211let write_tag t ~implicit tag = 212 if not implicit then 213 match tag with 214 | Some tag_str -> 215 write_char t '!'; 216 write t tag_str; 217 write_char t ' ' 218 | None -> () 219 220(** Emit events *) 221 222let emit t (ev : Event.t) = 223 match ev with 224 | Event.Stream_start _ -> 225 t.state <- Stream_started 226 227 | Event.Stream_end -> 228 t.state <- Stream_ended 229 230 | Event.Document_start { version; implicit } -> 231 if not implicit then begin 232 (match version with 233 | Some (maj, min) -> 234 write t (Printf.sprintf "%%YAML %d.%d\n" maj min) 235 | None -> ()); 236 write t "---"; 237 write_newline t 238 end; 239 t.state <- Document_started 240 241 | Event.Document_end { implicit } -> 242 if not implicit then begin 243 write t "..."; 244 write_newline t 245 end; 246 t.state <- Document_ended 247 248 | Event.Alias { anchor } -> 249 if t.flow_level > 0 then begin 250 if t.need_separator then write t ", "; 251 t.need_separator <- true; 252 write_char t '*'; 253 write t anchor 254 end else begin 255 (match t.state with 256 | In_block_sequence _ -> 257 write_indent t; 258 write t "- *"; 259 write t anchor; 260 write_newline t 261 | In_block_mapping_key _ -> 262 write_indent t; 263 write_char t '*'; 264 write t anchor; 265 write t ": "; 266 t.state <- In_block_mapping_value t.indent 267 | In_block_mapping_value indent -> 268 write_char t '*'; 269 write t anchor; 270 write_newline t; 271 t.state <- In_block_mapping_key indent 272 | _ -> 273 write_char t '*'; 274 write t anchor; 275 write_newline t) 276 end 277 278 | Event.Scalar { anchor; tag; value; plain_implicit; style; _ } -> 279 if t.flow_level > 0 then begin 280 (match t.state with 281 | In_flow_mapping_key -> 282 if t.need_separator then write t ", "; 283 write_anchor t anchor; 284 write_tag t ~implicit:plain_implicit tag; 285 write_scalar t ~style value; 286 write t ": "; 287 t.need_separator <- false; 288 t.state <- In_flow_mapping_value 289 | In_flow_mapping_value -> 290 write_anchor t anchor; 291 write_tag t ~implicit:plain_implicit tag; 292 write_scalar t ~style value; 293 t.need_separator <- true; 294 t.state <- In_flow_mapping_key 295 | _ -> 296 if t.need_separator then write t ", "; 297 t.need_separator <- true; 298 write_anchor t anchor; 299 write_tag t ~implicit:plain_implicit tag; 300 write_scalar t ~style value) 301 end else begin 302 match t.state with 303 | In_block_sequence _ -> 304 write_indent t; 305 write t "- "; 306 write_anchor t anchor; 307 write_tag t ~implicit:plain_implicit tag; 308 write_scalar t ~style value; 309 write_newline t 310 | In_block_mapping_key indent -> 311 write_indent t; 312 write_anchor t anchor; 313 write_tag t ~implicit:plain_implicit tag; 314 write_scalar t ~style value; 315 write_char t ':'; 316 t.state <- In_block_mapping_value indent 317 | In_block_mapping_first_key indent -> 318 (* First key after "- ", no indent needed *) 319 write_anchor t anchor; 320 write_tag t ~implicit:plain_implicit tag; 321 write_scalar t ~style value; 322 write_char t ':'; 323 t.state <- In_block_mapping_value indent 324 | In_block_mapping_value indent -> 325 write_char t ' '; 326 write_anchor t anchor; 327 write_tag t ~implicit:plain_implicit tag; 328 write_scalar t ~style value; 329 write_newline t; 330 t.state <- In_block_mapping_key indent 331 | _ -> 332 write_anchor t anchor; 333 write_tag t ~implicit:plain_implicit tag; 334 write_scalar t ~style value; 335 write_newline t 336 end 337 338 | Event.Sequence_start { anchor; tag; implicit; style } -> 339 let use_flow = style = `Flow || t.flow_level > 0 in 340 if t.flow_level > 0 then begin 341 (match t.state with 342 | In_flow_mapping_key -> 343 if t.need_separator then write t ", "; 344 write_anchor t anchor; 345 write_tag t ~implicit tag; 346 write_char t '['; 347 t.flow_level <- t.flow_level + 1; 348 t.need_separator <- false; 349 push_state t In_flow_mapping_value; (* After ] we'll be in value position but sequence handles it *) 350 t.state <- In_flow_sequence 351 | In_flow_mapping_value -> 352 write_anchor t anchor; 353 write_tag t ~implicit tag; 354 write_char t '['; 355 t.flow_level <- t.flow_level + 1; 356 t.need_separator <- false; 357 push_state t In_flow_mapping_key; 358 t.state <- In_flow_sequence 359 | _ -> 360 if t.need_separator then write t ", "; 361 write_anchor t anchor; 362 write_tag t ~implicit tag; 363 write_char t '['; 364 t.flow_level <- t.flow_level + 1; 365 t.need_separator <- false; 366 push_state t In_flow_sequence) 367 end else begin 368 match t.state with 369 | In_block_sequence _ -> 370 write_indent t; 371 write t "- "; 372 write_anchor t anchor; 373 write_tag t ~implicit tag; 374 if use_flow then begin 375 write_char t '['; 376 t.flow_level <- t.flow_level + 1; 377 t.need_separator <- false; 378 push_state t In_flow_sequence 379 end else begin 380 write_newline t; 381 push_state t (In_block_sequence t.indent); 382 t.indent <- t.indent + t.config.indent 383 end 384 | In_block_mapping_key indent -> 385 write_indent t; 386 write_anchor t anchor; 387 write_tag t ~implicit tag; 388 write t ":"; 389 write_newline t; 390 push_state t (In_block_mapping_key indent); 391 t.indent <- t.indent + t.config.indent; 392 t.state <- In_block_sequence t.indent 393 | In_block_mapping_first_key indent -> 394 (* First key after "- " with sequence value - no indent *) 395 write_anchor t anchor; 396 write_tag t ~implicit tag; 397 write t ":"; 398 write_newline t; 399 push_state t (In_block_mapping_key indent); 400 t.indent <- t.indent + t.config.indent; 401 t.state <- In_block_sequence t.indent 402 | In_block_mapping_value indent -> 403 write_anchor t anchor; 404 write_tag t ~implicit tag; 405 if use_flow then begin 406 write_char t ' '; 407 write_char t '['; 408 t.flow_level <- t.flow_level + 1; 409 t.need_separator <- false; 410 (* Save key state to return to after flow sequence *) 411 t.state <- In_block_mapping_key indent; 412 push_state t In_flow_sequence 413 end else begin 414 write_newline t; 415 (* Save key state to return to after nested sequence *) 416 t.state <- In_block_mapping_key indent; 417 push_state t (In_block_sequence (t.indent + t.config.indent)); 418 t.indent <- t.indent + t.config.indent 419 end 420 | _ -> 421 write_anchor t anchor; 422 write_tag t ~implicit tag; 423 if use_flow then begin 424 write_char t '['; 425 t.flow_level <- t.flow_level + 1; 426 t.need_separator <- false; 427 push_state t In_flow_sequence 428 end else begin 429 push_state t (In_block_sequence t.indent); 430 t.state <- In_block_sequence t.indent 431 end 432 end 433 434 | Event.Sequence_end -> 435 if t.flow_level > 0 then begin 436 write_char t ']'; 437 t.flow_level <- t.flow_level - 1; 438 t.need_separator <- true; 439 pop_state t; 440 (* Write newline if returning to block context *) 441 (match t.state with 442 | In_block_mapping_key _ | In_block_sequence _ -> write_newline t 443 | _ -> ()) 444 end else begin 445 t.indent <- t.indent - t.config.indent; 446 pop_state t 447 end 448 449 | Event.Mapping_start { anchor; tag; implicit; style } -> 450 let use_flow = style = `Flow || t.flow_level > 0 in 451 if t.flow_level > 0 then begin 452 (match t.state with 453 | In_flow_mapping_key -> 454 if t.need_separator then write t ", "; 455 write_anchor t anchor; 456 write_tag t ~implicit tag; 457 write_char t '{'; 458 t.flow_level <- t.flow_level + 1; 459 t.need_separator <- false; 460 push_state t In_flow_mapping_value; 461 t.state <- In_flow_mapping_key 462 | In_flow_mapping_value -> 463 write_anchor t anchor; 464 write_tag t ~implicit tag; 465 write_char t '{'; 466 t.flow_level <- t.flow_level + 1; 467 t.need_separator <- false; 468 push_state t In_flow_mapping_key; 469 t.state <- In_flow_mapping_key 470 | _ -> 471 if t.need_separator then write t ", "; 472 write_anchor t anchor; 473 write_tag t ~implicit tag; 474 write_char t '{'; 475 t.flow_level <- t.flow_level + 1; 476 t.need_separator <- false; 477 push_state t In_flow_mapping_key) 478 end else begin 479 match t.state with 480 | In_block_sequence _ -> 481 write_indent t; 482 write t "- "; 483 write_anchor t anchor; 484 write_tag t ~implicit tag; 485 if use_flow then begin 486 write_char t '{'; 487 t.flow_level <- t.flow_level + 1; 488 t.need_separator <- false; 489 push_state t In_flow_mapping_key 490 end else begin 491 (* Don't write newline - first key goes on same line as "- " *) 492 push_state t (In_block_sequence t.indent); 493 t.indent <- t.indent + t.config.indent; 494 t.state <- In_block_mapping_first_key t.indent 495 end 496 | In_block_mapping_key indent -> 497 write_indent t; 498 write_anchor t anchor; 499 write_tag t ~implicit tag; 500 write t ":"; 501 write_newline t; 502 push_state t (In_block_mapping_key indent); 503 t.indent <- t.indent + t.config.indent; 504 t.state <- In_block_mapping_key t.indent 505 | In_block_mapping_first_key indent -> 506 (* First key after "- " with mapping value - no indent *) 507 write_anchor t anchor; 508 write_tag t ~implicit tag; 509 write t ":"; 510 write_newline t; 511 push_state t (In_block_mapping_key indent); 512 t.indent <- t.indent + t.config.indent; 513 t.state <- In_block_mapping_key t.indent 514 | In_block_mapping_value indent -> 515 write_anchor t anchor; 516 write_tag t ~implicit tag; 517 if use_flow then begin 518 write_char t ' '; 519 write_char t '{'; 520 t.flow_level <- t.flow_level + 1; 521 t.need_separator <- false; 522 (* Save key state to return to after flow mapping *) 523 t.state <- In_block_mapping_key indent; 524 push_state t In_flow_mapping_key 525 end else begin 526 write_newline t; 527 (* Save key state to return to after nested mapping *) 528 t.state <- In_block_mapping_key indent; 529 push_state t (In_block_mapping_key (t.indent + t.config.indent)); 530 t.indent <- t.indent + t.config.indent 531 end 532 | _ -> 533 write_anchor t anchor; 534 write_tag t ~implicit tag; 535 if use_flow then begin 536 write_char t '{'; 537 t.flow_level <- t.flow_level + 1; 538 t.need_separator <- false; 539 push_state t In_flow_mapping_key 540 end else begin 541 push_state t (In_block_mapping_key t.indent); 542 t.state <- In_block_mapping_key t.indent 543 end 544 end 545 546 | Event.Mapping_end -> 547 if t.flow_level > 0 then begin 548 write_char t '}'; 549 t.flow_level <- t.flow_level - 1; 550 t.need_separator <- true; 551 pop_state t; 552 (* Write newline if returning to block context *) 553 (match t.state with 554 | In_block_mapping_key _ | In_block_sequence _ -> write_newline t 555 | _ -> ()) 556 end else begin 557 t.indent <- t.indent - t.config.indent; 558 pop_state t 559 end 560 561(** Access to the underlying buffer for advanced use. 562 Returns None if emitter is writing to a Writer instead of Buffer. *) 563let buffer t = 564 match t.sink with 565 | Buffer_sink buf -> Some buf 566 | Writer_sink _ -> None 567 568(** Get config *) 569let config t = t.config 570 571(** Check if emitter is writing to a Writer *) 572let is_streaming t = 573 match t.sink with 574 | Writer_sink _ -> true 575 | Buffer_sink _ -> false 576 577(** Flush the writer sink (no-op for buffer sink) *) 578let flush t = 579 match t.sink with 580 | Writer_sink w -> Bytesrw.Bytes.Writer.write_eod w 581 | Buffer_sink _ -> ()