Pure OCaml Yaml 1.2 reader and writer using Bytesrw
at main 19 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** Emitter - converts YAML data structures to string output 7 8 The emitter can write to either a Buffer (default) or directly to a bytesrw 9 Bytes.Writer for streaming output. *) 10 11type config = { 12 encoding : Encoding.t; 13 scalar_style : Scalar_style.t; 14 layout_style : Layout_style.t; 15 indent : int; 16 width : int; 17 canonical : bool; 18} 19 20let default_config = 21 { 22 encoding = `Utf8; 23 scalar_style = `Any; 24 layout_style = `Any; 25 indent = 2; 26 width = 80; 27 canonical = false; 28 } 29 30type state = 31 | Initial 32 | Stream_started 33 | Document_started 34 | In_block_sequence of int (* indent level *) 35 | In_block_mapping_key of int 36 | In_block_mapping_value of int 37 | In_block_mapping_first_key of 38 int (* first key after "- ", no indent needed *) 39 | In_flow_sequence 40 | In_flow_mapping_key 41 | In_flow_mapping_value 42 | Document_ended 43 | Stream_ended 44 45(** Output sink - either a Buffer or a bytesrw Writer *) 46type sink = Buffer_sink of Buffer.t | Writer_sink of Bytesrw.Bytes.Writer.t 47 48type t = { 49 config : config; 50 sink : sink; 51 mutable state : state; 52 mutable states : state list; 53 mutable indent : int; 54 mutable flow_level : int; 55 mutable need_separator : bool; 56} 57 58let create ?(config = default_config) () = 59 { 60 config; 61 sink = Buffer_sink (Buffer.create 1024); 62 state = Initial; 63 states = []; 64 indent = 0; 65 flow_level = 0; 66 need_separator = false; 67 } 68 69(** Create an emitter that writes directly to a Bytes.Writer *) 70let of_writer ?(config = default_config) writer = 71 { 72 config; 73 sink = Writer_sink writer; 74 state = Initial; 75 states = []; 76 indent = 0; 77 flow_level = 0; 78 need_separator = false; 79 } 80 81let contents t = 82 match t.sink with 83 | Buffer_sink buf -> Buffer.contents buf 84 | Writer_sink _ -> "" (* No accumulated content for writer sink *) 85 86let reset t = 87 (match t.sink with 88 | Buffer_sink buf -> Buffer.clear buf 89 | Writer_sink _ -> ()); 90 t.state <- Initial; 91 t.states <- []; 92 t.indent <- 0; 93 t.flow_level <- 0; 94 t.need_separator <- false 95 96(** Output helpers - write to appropriate sink *) 97 98let write t s = 99 match t.sink with 100 | Buffer_sink buf -> Buffer.add_string buf s 101 | Writer_sink w -> Bytesrw.Bytes.Writer.write_string w s 102 103let write_char t c = 104 match t.sink with 105 | Buffer_sink buf -> Buffer.add_char buf c 106 | Writer_sink w -> 107 let b = Bytes.make 1 c in 108 Bytesrw.Bytes.Writer.write_bytes w b 109 110let write_indent t = 111 if t.indent <= 8 then 112 for _ = 1 to t.indent do 113 write_char t ' ' 114 done 115 else write t (String.make t.indent ' ') 116 117let write_newline t = write_char t '\n' 118 119let push_state t s = 120 t.states <- t.state :: t.states; 121 t.state <- s 122 123let pop_state t = 124 match t.states with 125 | s :: rest -> 126 t.state <- s; 127 t.states <- rest 128 | [] -> t.state <- Stream_ended 129 130(** Escape a string for double-quoted output. Uses a buffer to batch writes 131 instead of character-by-character. *) 132let escape_double_quoted value = 133 let len = String.length value in 134 (* Check if any escaping is needed *) 135 let needs_escape = ref false in 136 for i = 0 to len - 1 do 137 match value.[i] with 138 | '"' | '\\' | '\n' | '\r' | '\t' -> needs_escape := true 139 | c when c < ' ' -> needs_escape := true 140 | _ -> () 141 done; 142 if not !needs_escape then value 143 else begin 144 let buf = Buffer.create (len + (len / 4)) in 145 for i = 0 to len - 1 do 146 match value.[i] with 147 | '"' -> Buffer.add_string buf "\\\"" 148 | '\\' -> Buffer.add_string buf "\\\\" 149 | '\n' -> Buffer.add_string buf "\\n" 150 | '\r' -> Buffer.add_string buf "\\r" 151 | '\t' -> Buffer.add_string buf "\\t" 152 | c when c < ' ' -> 153 Buffer.add_string buf (Printf.sprintf "\\x%02x" (Char.code c)) 154 | c -> Buffer.add_char buf c 155 done; 156 Buffer.contents buf 157 end 158 159(** Escape a string for single-quoted output. *) 160let escape_single_quoted value = 161 if not (String.contains value '\'') then value 162 else begin 163 let len = String.length value in 164 let buf = Buffer.create (len + (len / 8)) in 165 for i = 0 to len - 1 do 166 let c = value.[i] in 167 if c = '\'' then Buffer.add_string buf "''" else Buffer.add_char buf c 168 done; 169 Buffer.contents buf 170 end 171 172(** Write scalar with appropriate quoting *) 173let write_scalar t ?(style = `Any) value = 174 match match style with `Any -> Quoting.choose_style value | s -> s with 175 | `Plain | `Any -> write t value 176 | `Single_quoted -> 177 write_char t '\''; 178 write t (escape_single_quoted value); 179 write_char t '\'' 180 | `Double_quoted -> 181 write_char t '"'; 182 write t (escape_double_quoted value); 183 write_char t '"' 184 | `Literal -> 185 write t "|"; 186 write_newline t; 187 String.split_on_char '\n' value 188 |> List.iter (fun line -> 189 write_indent t; 190 write t line; 191 write_newline t) 192 | `Folded -> 193 write t ">"; 194 write_newline t; 195 String.split_on_char '\n' value 196 |> List.iter (fun line -> 197 write_indent t; 198 write t line; 199 write_newline t) 200 201(** Write anchor if present *) 202let write_anchor t anchor = 203 match anchor with 204 | Some name -> 205 write_char t '&'; 206 write t name; 207 write_char t ' ' 208 | None -> () 209 210(** Write tag if present and not implicit *) 211let write_tag t ~implicit tag = 212 if not implicit then 213 match tag with 214 | Some tag_str -> 215 write_char t '!'; 216 write t tag_str; 217 write_char t ' ' 218 | None -> () 219 220(** Emit events *) 221 222let emit t (ev : Event.t) = 223 match ev with 224 | Event.Stream_start _ -> t.state <- Stream_started 225 | Event.Stream_end -> t.state <- Stream_ended 226 | Event.Document_start { version; implicit } -> 227 if not implicit then begin 228 (match version with 229 | Some (maj, min) -> write t (Printf.sprintf "%%YAML %d.%d\n" maj min) 230 | None -> ()); 231 write t "---"; 232 write_newline t 233 end; 234 t.state <- Document_started 235 | Event.Document_end { implicit } -> 236 if not implicit then begin 237 write t "..."; 238 write_newline t 239 end; 240 t.state <- Document_ended 241 | Event.Alias { anchor } -> 242 if t.flow_level > 0 then begin 243 if t.need_separator then write t ", "; 244 t.need_separator <- true; 245 write_char t '*'; 246 write t anchor 247 end 248 else begin 249 match t.state with 250 | In_block_sequence _ -> 251 write_indent t; 252 write t "- *"; 253 write t anchor; 254 write_newline t 255 | In_block_mapping_key _ -> 256 write_indent t; 257 write_char t '*'; 258 write t anchor; 259 write t ": "; 260 t.state <- In_block_mapping_value t.indent 261 | In_block_mapping_value indent -> 262 write_char t '*'; 263 write t anchor; 264 write_newline t; 265 t.state <- In_block_mapping_key indent 266 | _ -> 267 write_char t '*'; 268 write t anchor; 269 write_newline t 270 end 271 | Event.Scalar { anchor; tag; value; plain_implicit; style; _ } -> 272 if t.flow_level > 0 then begin 273 match t.state with 274 | In_flow_mapping_key -> 275 if t.need_separator then write t ", "; 276 write_anchor t anchor; 277 write_tag t ~implicit:plain_implicit tag; 278 write_scalar t ~style value; 279 write t ": "; 280 t.need_separator <- false; 281 t.state <- In_flow_mapping_value 282 | In_flow_mapping_value -> 283 if t.need_separator then begin 284 (* We just finished a nested structure (array/mapping), 285 so this scalar is the next key, not a value *) 286 write t ", "; 287 write_anchor t anchor; 288 write_tag t ~implicit:plain_implicit tag; 289 write_scalar t ~style value; 290 write t ": "; 291 t.need_separator <- false; 292 t.state <- In_flow_mapping_value 293 end 294 else begin 295 (* Normal value scalar *) 296 write_anchor t anchor; 297 write_tag t ~implicit:plain_implicit tag; 298 write_scalar t ~style value; 299 t.need_separator <- true; 300 t.state <- In_flow_mapping_key 301 end 302 | _ -> 303 if t.need_separator then write t ", "; 304 t.need_separator <- true; 305 write_anchor t anchor; 306 write_tag t ~implicit:plain_implicit tag; 307 write_scalar t ~style value 308 end 309 else begin 310 match t.state with 311 | In_block_sequence _ -> 312 write_indent t; 313 write t "- "; 314 write_anchor t anchor; 315 write_tag t ~implicit:plain_implicit tag; 316 write_scalar t ~style value; 317 write_newline t 318 | In_block_mapping_key indent -> 319 write_indent t; 320 write_anchor t anchor; 321 write_tag t ~implicit:plain_implicit tag; 322 write_scalar t ~style value; 323 write_char t ':'; 324 t.state <- In_block_mapping_value indent 325 | In_block_mapping_first_key indent -> 326 (* First key after "- ", no indent needed *) 327 write_anchor t anchor; 328 write_tag t ~implicit:plain_implicit tag; 329 write_scalar t ~style value; 330 write_char t ':'; 331 t.state <- In_block_mapping_value indent 332 | In_block_mapping_value indent -> 333 write_char t ' '; 334 write_anchor t anchor; 335 write_tag t ~implicit:plain_implicit tag; 336 write_scalar t ~style value; 337 write_newline t; 338 t.state <- In_block_mapping_key indent 339 | _ -> 340 write_anchor t anchor; 341 write_tag t ~implicit:plain_implicit tag; 342 write_scalar t ~style value; 343 write_newline t 344 end 345 | Event.Sequence_start { anchor; tag; implicit; style } -> 346 let use_flow = style = `Flow || t.flow_level > 0 in 347 if t.flow_level > 0 then begin 348 match t.state with 349 | In_flow_mapping_key -> 350 if t.need_separator then write t ", "; 351 write_anchor t anchor; 352 write_tag t ~implicit tag; 353 write_char t '['; 354 t.flow_level <- t.flow_level + 1; 355 t.need_separator <- false; 356 push_state t In_flow_mapping_value; 357 (* After ] we'll be in value position but sequence handles it *) 358 t.state <- In_flow_sequence 359 | In_flow_mapping_value -> 360 write_anchor t anchor; 361 write_tag t ~implicit tag; 362 write_char t '['; 363 t.flow_level <- t.flow_level + 1; 364 t.need_separator <- false; 365 push_state t In_flow_mapping_key; 366 t.state <- In_flow_sequence 367 | _ -> 368 if t.need_separator then write t ", "; 369 write_anchor t anchor; 370 write_tag t ~implicit tag; 371 write_char t '['; 372 t.flow_level <- t.flow_level + 1; 373 t.need_separator <- false; 374 push_state t In_flow_sequence 375 end 376 else begin 377 match t.state with 378 | In_block_sequence _ -> 379 write_indent t; 380 write t "- "; 381 write_anchor t anchor; 382 write_tag t ~implicit tag; 383 if use_flow then begin 384 write_char t '['; 385 t.flow_level <- t.flow_level + 1; 386 t.need_separator <- false; 387 push_state t In_flow_sequence 388 end 389 else begin 390 write_newline t; 391 push_state t (In_block_sequence t.indent); 392 t.indent <- t.indent + t.config.indent 393 end 394 | In_block_mapping_key indent -> 395 write_indent t; 396 write_anchor t anchor; 397 write_tag t ~implicit tag; 398 write t ":"; 399 write_newline t; 400 push_state t (In_block_mapping_key indent); 401 t.indent <- t.indent + t.config.indent; 402 t.state <- In_block_sequence t.indent 403 | In_block_mapping_first_key indent -> 404 (* First key after "- " with sequence value - no indent *) 405 write_anchor t anchor; 406 write_tag t ~implicit tag; 407 write t ":"; 408 write_newline t; 409 push_state t (In_block_mapping_key indent); 410 t.indent <- t.indent + t.config.indent; 411 t.state <- In_block_sequence t.indent 412 | In_block_mapping_value indent -> 413 write_anchor t anchor; 414 write_tag t ~implicit tag; 415 if use_flow then begin 416 write_char t ' '; 417 write_char t '['; 418 t.flow_level <- t.flow_level + 1; 419 t.need_separator <- false; 420 (* Save key state to return to after flow sequence *) 421 t.state <- In_block_mapping_key indent; 422 push_state t In_flow_sequence 423 end 424 else begin 425 write_newline t; 426 (* Save key state to return to after nested sequence *) 427 t.state <- In_block_mapping_key indent; 428 push_state t (In_block_sequence (t.indent + t.config.indent)); 429 t.indent <- t.indent + t.config.indent 430 end 431 | _ -> 432 write_anchor t anchor; 433 write_tag t ~implicit tag; 434 if use_flow then begin 435 write_char t '['; 436 t.flow_level <- t.flow_level + 1; 437 t.need_separator <- false; 438 push_state t In_flow_sequence 439 end 440 else begin 441 push_state t (In_block_sequence t.indent); 442 t.state <- In_block_sequence t.indent 443 end 444 end 445 | Event.Sequence_end -> 446 if t.flow_level > 0 then begin 447 write_char t ']'; 448 t.flow_level <- t.flow_level - 1; 449 t.need_separator <- true; 450 pop_state t; 451 (* Write newline if returning to block context *) 452 match t.state with 453 | In_block_mapping_key _ | In_block_sequence _ -> write_newline t 454 | _ -> () 455 end 456 else begin 457 t.indent <- t.indent - t.config.indent; 458 pop_state t 459 end 460 | Event.Mapping_start { anchor; tag; implicit; style } -> 461 let use_flow = style = `Flow || t.flow_level > 0 in 462 if t.flow_level > 0 then begin 463 match t.state with 464 | In_flow_mapping_key -> 465 if t.need_separator then write t ", "; 466 write_anchor t anchor; 467 write_tag t ~implicit tag; 468 write_char t '{'; 469 t.flow_level <- t.flow_level + 1; 470 t.need_separator <- false; 471 push_state t In_flow_mapping_value; 472 t.state <- In_flow_mapping_key 473 | In_flow_mapping_value -> 474 write_anchor t anchor; 475 write_tag t ~implicit tag; 476 write_char t '{'; 477 t.flow_level <- t.flow_level + 1; 478 t.need_separator <- false; 479 push_state t In_flow_mapping_key; 480 t.state <- In_flow_mapping_key 481 | _ -> 482 if t.need_separator then write t ", "; 483 write_anchor t anchor; 484 write_tag t ~implicit tag; 485 write_char t '{'; 486 t.flow_level <- t.flow_level + 1; 487 t.need_separator <- false; 488 push_state t In_flow_mapping_key 489 end 490 else begin 491 match t.state with 492 | In_block_sequence _ -> 493 write_indent t; 494 write t "- "; 495 write_anchor t anchor; 496 write_tag t ~implicit tag; 497 if use_flow then begin 498 write_char t '{'; 499 t.flow_level <- t.flow_level + 1; 500 t.need_separator <- false; 501 push_state t In_flow_mapping_key 502 end 503 else begin 504 (* Don't write newline - first key goes on same line as "- " *) 505 push_state t (In_block_sequence t.indent); 506 t.indent <- t.indent + t.config.indent; 507 t.state <- In_block_mapping_first_key t.indent 508 end 509 | In_block_mapping_key indent -> 510 write_indent t; 511 write_anchor t anchor; 512 write_tag t ~implicit tag; 513 write t ":"; 514 write_newline t; 515 push_state t (In_block_mapping_key indent); 516 t.indent <- t.indent + t.config.indent; 517 t.state <- In_block_mapping_key t.indent 518 | In_block_mapping_first_key indent -> 519 (* First key after "- " with mapping value - no indent *) 520 write_anchor t anchor; 521 write_tag t ~implicit tag; 522 write t ":"; 523 write_newline t; 524 push_state t (In_block_mapping_key indent); 525 t.indent <- t.indent + t.config.indent; 526 t.state <- In_block_mapping_key t.indent 527 | In_block_mapping_value indent -> 528 write_anchor t anchor; 529 write_tag t ~implicit tag; 530 if use_flow then begin 531 write_char t ' '; 532 write_char t '{'; 533 t.flow_level <- t.flow_level + 1; 534 t.need_separator <- false; 535 (* Save key state to return to after flow mapping *) 536 t.state <- In_block_mapping_key indent; 537 push_state t In_flow_mapping_key 538 end 539 else begin 540 write_newline t; 541 (* Save key state to return to after nested mapping *) 542 t.state <- In_block_mapping_key indent; 543 push_state t (In_block_mapping_key (t.indent + t.config.indent)); 544 t.indent <- t.indent + t.config.indent 545 end 546 | _ -> 547 write_anchor t anchor; 548 write_tag t ~implicit tag; 549 if use_flow then begin 550 write_char t '{'; 551 t.flow_level <- t.flow_level + 1; 552 t.need_separator <- false; 553 push_state t In_flow_mapping_key 554 end 555 else begin 556 push_state t (In_block_mapping_key t.indent); 557 t.state <- In_block_mapping_key t.indent 558 end 559 end 560 | Event.Mapping_end -> 561 if t.flow_level > 0 then begin 562 write_char t '}'; 563 t.flow_level <- t.flow_level - 1; 564 t.need_separator <- true; 565 pop_state t; 566 (* Write newline if returning to block context *) 567 match t.state with 568 | In_block_mapping_key _ | In_block_sequence _ -> write_newline t 569 | _ -> () 570 end 571 else begin 572 t.indent <- t.indent - t.config.indent; 573 pop_state t 574 end 575 576(** Access to the underlying buffer for advanced use. Returns None if emitter is 577 writing to a Writer instead of Buffer. *) 578let buffer t = 579 match t.sink with Buffer_sink buf -> Some buf | Writer_sink _ -> None 580 581(** Get config *) 582let config t = t.config 583 584(** Check if emitter is writing to a Writer *) 585let is_streaming t = 586 match t.sink with Writer_sink _ -> true | Buffer_sink _ -> false 587 588(** Flush the writer sink (no-op for buffer sink) *) 589let flush t = 590 match t.sink with 591 | Writer_sink w -> Bytesrw.Bytes.Writer.write_eod w 592 | Buffer_sink _ -> ()