Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** Emitter - converts YAML data structures to string output
7
8 The emitter can write to either a Buffer (default) or directly to a bytesrw
9 Bytes.Writer for streaming output. *)
10
11type config = {
12 encoding : Encoding.t;
13 scalar_style : Scalar_style.t;
14 layout_style : Layout_style.t;
15 indent : int;
16 width : int;
17 canonical : bool;
18}
19
20let default_config =
21 {
22 encoding = `Utf8;
23 scalar_style = `Any;
24 layout_style = `Any;
25 indent = 2;
26 width = 80;
27 canonical = false;
28 }
29
30type state =
31 | Initial
32 | Stream_started
33 | Document_started
34 | In_block_sequence of int (* indent level *)
35 | In_block_mapping_key of int
36 | In_block_mapping_value of int
37 | In_block_mapping_first_key of
38 int (* first key after "- ", no indent needed *)
39 | In_flow_sequence
40 | In_flow_mapping_key
41 | In_flow_mapping_value
42 | Document_ended
43 | Stream_ended
44
45(** Output sink - either a Buffer or a bytesrw Writer *)
46type sink = Buffer_sink of Buffer.t | Writer_sink of Bytesrw.Bytes.Writer.t
47
48type t = {
49 config : config;
50 sink : sink;
51 mutable state : state;
52 mutable states : state list;
53 mutable indent : int;
54 mutable flow_level : int;
55 mutable need_separator : bool;
56}
57
58let create ?(config = default_config) () =
59 {
60 config;
61 sink = Buffer_sink (Buffer.create 1024);
62 state = Initial;
63 states = [];
64 indent = 0;
65 flow_level = 0;
66 need_separator = false;
67 }
68
69(** Create an emitter that writes directly to a Bytes.Writer *)
70let of_writer ?(config = default_config) writer =
71 {
72 config;
73 sink = Writer_sink writer;
74 state = Initial;
75 states = [];
76 indent = 0;
77 flow_level = 0;
78 need_separator = false;
79 }
80
81let contents t =
82 match t.sink with
83 | Buffer_sink buf -> Buffer.contents buf
84 | Writer_sink _ -> "" (* No accumulated content for writer sink *)
85
86let reset t =
87 (match t.sink with
88 | Buffer_sink buf -> Buffer.clear buf
89 | Writer_sink _ -> ());
90 t.state <- Initial;
91 t.states <- [];
92 t.indent <- 0;
93 t.flow_level <- 0;
94 t.need_separator <- false
95
96(** Output helpers - write to appropriate sink *)
97
98let write t s =
99 match t.sink with
100 | Buffer_sink buf -> Buffer.add_string buf s
101 | Writer_sink w -> Bytesrw.Bytes.Writer.write_string w s
102
103let write_char t c =
104 match t.sink with
105 | Buffer_sink buf -> Buffer.add_char buf c
106 | Writer_sink w ->
107 let b = Bytes.make 1 c in
108 Bytesrw.Bytes.Writer.write_bytes w b
109
110let write_indent t =
111 if t.indent <= 8 then
112 for _ = 1 to t.indent do
113 write_char t ' '
114 done
115 else write t (String.make t.indent ' ')
116
117let write_newline t = write_char t '\n'
118
119let push_state t s =
120 t.states <- t.state :: t.states;
121 t.state <- s
122
123let pop_state t =
124 match t.states with
125 | s :: rest ->
126 t.state <- s;
127 t.states <- rest
128 | [] -> t.state <- Stream_ended
129
130(** Escape a string for double-quoted output. Uses a buffer to batch writes
131 instead of character-by-character. *)
132let escape_double_quoted value =
133 let len = String.length value in
134 (* Check if any escaping is needed *)
135 let needs_escape = ref false in
136 for i = 0 to len - 1 do
137 match value.[i] with
138 | '"' | '\\' | '\n' | '\r' | '\t' -> needs_escape := true
139 | c when c < ' ' -> needs_escape := true
140 | _ -> ()
141 done;
142 if not !needs_escape then value
143 else begin
144 let buf = Buffer.create (len + (len / 4)) in
145 for i = 0 to len - 1 do
146 match value.[i] with
147 | '"' -> Buffer.add_string buf "\\\""
148 | '\\' -> Buffer.add_string buf "\\\\"
149 | '\n' -> Buffer.add_string buf "\\n"
150 | '\r' -> Buffer.add_string buf "\\r"
151 | '\t' -> Buffer.add_string buf "\\t"
152 | c when c < ' ' ->
153 Buffer.add_string buf (Printf.sprintf "\\x%02x" (Char.code c))
154 | c -> Buffer.add_char buf c
155 done;
156 Buffer.contents buf
157 end
158
159(** Escape a string for single-quoted output. *)
160let escape_single_quoted value =
161 if not (String.contains value '\'') then value
162 else begin
163 let len = String.length value in
164 let buf = Buffer.create (len + (len / 8)) in
165 for i = 0 to len - 1 do
166 let c = value.[i] in
167 if c = '\'' then Buffer.add_string buf "''" else Buffer.add_char buf c
168 done;
169 Buffer.contents buf
170 end
171
172(** Write scalar with appropriate quoting *)
173let write_scalar t ?(style = `Any) value =
174 match match style with `Any -> Quoting.choose_style value | s -> s with
175 | `Plain | `Any -> write t value
176 | `Single_quoted ->
177 write_char t '\'';
178 write t (escape_single_quoted value);
179 write_char t '\''
180 | `Double_quoted ->
181 write_char t '"';
182 write t (escape_double_quoted value);
183 write_char t '"'
184 | `Literal ->
185 write t "|";
186 write_newline t;
187 String.split_on_char '\n' value
188 |> List.iter (fun line ->
189 write_indent t;
190 write t line;
191 write_newline t)
192 | `Folded ->
193 write t ">";
194 write_newline t;
195 String.split_on_char '\n' value
196 |> List.iter (fun line ->
197 write_indent t;
198 write t line;
199 write_newline t)
200
201(** Write anchor if present *)
202let write_anchor t anchor =
203 match anchor with
204 | Some name ->
205 write_char t '&';
206 write t name;
207 write_char t ' '
208 | None -> ()
209
210(** Write tag if present and not implicit *)
211let write_tag t ~implicit tag =
212 if not implicit then
213 match tag with
214 | Some tag_str ->
215 write_char t '!';
216 write t tag_str;
217 write_char t ' '
218 | None -> ()
219
220(** Emit events *)
221
222let emit t (ev : Event.t) =
223 match ev with
224 | Event.Stream_start _ -> t.state <- Stream_started
225 | Event.Stream_end -> t.state <- Stream_ended
226 | Event.Document_start { version; implicit } ->
227 if not implicit then begin
228 (match version with
229 | Some (maj, min) -> write t (Printf.sprintf "%%YAML %d.%d\n" maj min)
230 | None -> ());
231 write t "---";
232 write_newline t
233 end;
234 t.state <- Document_started
235 | Event.Document_end { implicit } ->
236 if not implicit then begin
237 write t "...";
238 write_newline t
239 end;
240 t.state <- Document_ended
241 | Event.Alias { anchor } ->
242 if t.flow_level > 0 then begin
243 if t.need_separator then write t ", ";
244 t.need_separator <- true;
245 write_char t '*';
246 write t anchor
247 end
248 else begin
249 match t.state with
250 | In_block_sequence _ ->
251 write_indent t;
252 write t "- *";
253 write t anchor;
254 write_newline t
255 | In_block_mapping_key _ ->
256 write_indent t;
257 write_char t '*';
258 write t anchor;
259 write t ": ";
260 t.state <- In_block_mapping_value t.indent
261 | In_block_mapping_value indent ->
262 write_char t '*';
263 write t anchor;
264 write_newline t;
265 t.state <- In_block_mapping_key indent
266 | _ ->
267 write_char t '*';
268 write t anchor;
269 write_newline t
270 end
271 | Event.Scalar { anchor; tag; value; plain_implicit; style; _ } ->
272 if t.flow_level > 0 then begin
273 match t.state with
274 | In_flow_mapping_key ->
275 if t.need_separator then write t ", ";
276 write_anchor t anchor;
277 write_tag t ~implicit:plain_implicit tag;
278 write_scalar t ~style value;
279 write t ": ";
280 t.need_separator <- false;
281 t.state <- In_flow_mapping_value
282 | In_flow_mapping_value ->
283 if t.need_separator then begin
284 (* We just finished a nested structure (array/mapping),
285 so this scalar is the next key, not a value *)
286 write t ", ";
287 write_anchor t anchor;
288 write_tag t ~implicit:plain_implicit tag;
289 write_scalar t ~style value;
290 write t ": ";
291 t.need_separator <- false;
292 t.state <- In_flow_mapping_value
293 end
294 else begin
295 (* Normal value scalar *)
296 write_anchor t anchor;
297 write_tag t ~implicit:plain_implicit tag;
298 write_scalar t ~style value;
299 t.need_separator <- true;
300 t.state <- In_flow_mapping_key
301 end
302 | _ ->
303 if t.need_separator then write t ", ";
304 t.need_separator <- true;
305 write_anchor t anchor;
306 write_tag t ~implicit:plain_implicit tag;
307 write_scalar t ~style value
308 end
309 else begin
310 match t.state with
311 | In_block_sequence _ ->
312 write_indent t;
313 write t "- ";
314 write_anchor t anchor;
315 write_tag t ~implicit:plain_implicit tag;
316 write_scalar t ~style value;
317 write_newline t
318 | In_block_mapping_key indent ->
319 write_indent t;
320 write_anchor t anchor;
321 write_tag t ~implicit:plain_implicit tag;
322 write_scalar t ~style value;
323 write_char t ':';
324 t.state <- In_block_mapping_value indent
325 | In_block_mapping_first_key indent ->
326 (* First key after "- ", no indent needed *)
327 write_anchor t anchor;
328 write_tag t ~implicit:plain_implicit tag;
329 write_scalar t ~style value;
330 write_char t ':';
331 t.state <- In_block_mapping_value indent
332 | In_block_mapping_value indent ->
333 write_char t ' ';
334 write_anchor t anchor;
335 write_tag t ~implicit:plain_implicit tag;
336 write_scalar t ~style value;
337 write_newline t;
338 t.state <- In_block_mapping_key indent
339 | _ ->
340 write_anchor t anchor;
341 write_tag t ~implicit:plain_implicit tag;
342 write_scalar t ~style value;
343 write_newline t
344 end
345 | Event.Sequence_start { anchor; tag; implicit; style } ->
346 let use_flow = style = `Flow || t.flow_level > 0 in
347 if t.flow_level > 0 then begin
348 match t.state with
349 | In_flow_mapping_key ->
350 if t.need_separator then write t ", ";
351 write_anchor t anchor;
352 write_tag t ~implicit tag;
353 write_char t '[';
354 t.flow_level <- t.flow_level + 1;
355 t.need_separator <- false;
356 push_state t In_flow_mapping_value;
357 (* After ] we'll be in value position but sequence handles it *)
358 t.state <- In_flow_sequence
359 | In_flow_mapping_value ->
360 write_anchor t anchor;
361 write_tag t ~implicit tag;
362 write_char t '[';
363 t.flow_level <- t.flow_level + 1;
364 t.need_separator <- false;
365 push_state t In_flow_mapping_key;
366 t.state <- In_flow_sequence
367 | _ ->
368 if t.need_separator then write t ", ";
369 write_anchor t anchor;
370 write_tag t ~implicit tag;
371 write_char t '[';
372 t.flow_level <- t.flow_level + 1;
373 t.need_separator <- false;
374 push_state t In_flow_sequence
375 end
376 else begin
377 match t.state with
378 | In_block_sequence _ ->
379 write_indent t;
380 write t "- ";
381 write_anchor t anchor;
382 write_tag t ~implicit tag;
383 if use_flow then begin
384 write_char t '[';
385 t.flow_level <- t.flow_level + 1;
386 t.need_separator <- false;
387 push_state t In_flow_sequence
388 end
389 else begin
390 write_newline t;
391 push_state t (In_block_sequence t.indent);
392 t.indent <- t.indent + t.config.indent
393 end
394 | In_block_mapping_key indent ->
395 write_indent t;
396 write_anchor t anchor;
397 write_tag t ~implicit tag;
398 write t ":";
399 write_newline t;
400 push_state t (In_block_mapping_key indent);
401 t.indent <- t.indent + t.config.indent;
402 t.state <- In_block_sequence t.indent
403 | In_block_mapping_first_key indent ->
404 (* First key after "- " with sequence value - no indent *)
405 write_anchor t anchor;
406 write_tag t ~implicit tag;
407 write t ":";
408 write_newline t;
409 push_state t (In_block_mapping_key indent);
410 t.indent <- t.indent + t.config.indent;
411 t.state <- In_block_sequence t.indent
412 | In_block_mapping_value indent ->
413 write_anchor t anchor;
414 write_tag t ~implicit tag;
415 if use_flow then begin
416 write_char t ' ';
417 write_char t '[';
418 t.flow_level <- t.flow_level + 1;
419 t.need_separator <- false;
420 (* Save key state to return to after flow sequence *)
421 t.state <- In_block_mapping_key indent;
422 push_state t In_flow_sequence
423 end
424 else begin
425 write_newline t;
426 (* Save key state to return to after nested sequence *)
427 t.state <- In_block_mapping_key indent;
428 push_state t (In_block_sequence (t.indent + t.config.indent));
429 t.indent <- t.indent + t.config.indent
430 end
431 | _ ->
432 write_anchor t anchor;
433 write_tag t ~implicit tag;
434 if use_flow then begin
435 write_char t '[';
436 t.flow_level <- t.flow_level + 1;
437 t.need_separator <- false;
438 push_state t In_flow_sequence
439 end
440 else begin
441 push_state t (In_block_sequence t.indent);
442 t.state <- In_block_sequence t.indent
443 end
444 end
445 | Event.Sequence_end ->
446 if t.flow_level > 0 then begin
447 write_char t ']';
448 t.flow_level <- t.flow_level - 1;
449 t.need_separator <- true;
450 pop_state t;
451 (* Write newline if returning to block context *)
452 match t.state with
453 | In_block_mapping_key _ | In_block_sequence _ -> write_newline t
454 | _ -> ()
455 end
456 else begin
457 t.indent <- t.indent - t.config.indent;
458 pop_state t
459 end
460 | Event.Mapping_start { anchor; tag; implicit; style } ->
461 let use_flow = style = `Flow || t.flow_level > 0 in
462 if t.flow_level > 0 then begin
463 match t.state with
464 | In_flow_mapping_key ->
465 if t.need_separator then write t ", ";
466 write_anchor t anchor;
467 write_tag t ~implicit tag;
468 write_char t '{';
469 t.flow_level <- t.flow_level + 1;
470 t.need_separator <- false;
471 push_state t In_flow_mapping_value;
472 t.state <- In_flow_mapping_key
473 | In_flow_mapping_value ->
474 write_anchor t anchor;
475 write_tag t ~implicit tag;
476 write_char t '{';
477 t.flow_level <- t.flow_level + 1;
478 t.need_separator <- false;
479 push_state t In_flow_mapping_key;
480 t.state <- In_flow_mapping_key
481 | _ ->
482 if t.need_separator then write t ", ";
483 write_anchor t anchor;
484 write_tag t ~implicit tag;
485 write_char t '{';
486 t.flow_level <- t.flow_level + 1;
487 t.need_separator <- false;
488 push_state t In_flow_mapping_key
489 end
490 else begin
491 match t.state with
492 | In_block_sequence _ ->
493 write_indent t;
494 write t "- ";
495 write_anchor t anchor;
496 write_tag t ~implicit tag;
497 if use_flow then begin
498 write_char t '{';
499 t.flow_level <- t.flow_level + 1;
500 t.need_separator <- false;
501 push_state t In_flow_mapping_key
502 end
503 else begin
504 (* Don't write newline - first key goes on same line as "- " *)
505 push_state t (In_block_sequence t.indent);
506 t.indent <- t.indent + t.config.indent;
507 t.state <- In_block_mapping_first_key t.indent
508 end
509 | In_block_mapping_key indent ->
510 write_indent t;
511 write_anchor t anchor;
512 write_tag t ~implicit tag;
513 write t ":";
514 write_newline t;
515 push_state t (In_block_mapping_key indent);
516 t.indent <- t.indent + t.config.indent;
517 t.state <- In_block_mapping_key t.indent
518 | In_block_mapping_first_key indent ->
519 (* First key after "- " with mapping value - no indent *)
520 write_anchor t anchor;
521 write_tag t ~implicit tag;
522 write t ":";
523 write_newline t;
524 push_state t (In_block_mapping_key indent);
525 t.indent <- t.indent + t.config.indent;
526 t.state <- In_block_mapping_key t.indent
527 | In_block_mapping_value indent ->
528 write_anchor t anchor;
529 write_tag t ~implicit tag;
530 if use_flow then begin
531 write_char t ' ';
532 write_char t '{';
533 t.flow_level <- t.flow_level + 1;
534 t.need_separator <- false;
535 (* Save key state to return to after flow mapping *)
536 t.state <- In_block_mapping_key indent;
537 push_state t In_flow_mapping_key
538 end
539 else begin
540 write_newline t;
541 (* Save key state to return to after nested mapping *)
542 t.state <- In_block_mapping_key indent;
543 push_state t (In_block_mapping_key (t.indent + t.config.indent));
544 t.indent <- t.indent + t.config.indent
545 end
546 | _ ->
547 write_anchor t anchor;
548 write_tag t ~implicit tag;
549 if use_flow then begin
550 write_char t '{';
551 t.flow_level <- t.flow_level + 1;
552 t.need_separator <- false;
553 push_state t In_flow_mapping_key
554 end
555 else begin
556 push_state t (In_block_mapping_key t.indent);
557 t.state <- In_block_mapping_key t.indent
558 end
559 end
560 | Event.Mapping_end ->
561 if t.flow_level > 0 then begin
562 write_char t '}';
563 t.flow_level <- t.flow_level - 1;
564 t.need_separator <- true;
565 pop_state t;
566 (* Write newline if returning to block context *)
567 match t.state with
568 | In_block_mapping_key _ | In_block_sequence _ -> write_newline t
569 | _ -> ()
570 end
571 else begin
572 t.indent <- t.indent - t.config.indent;
573 pop_state t
574 end
575
576(** Access to the underlying buffer for advanced use. Returns None if emitter is
577 writing to a Writer instead of Buffer. *)
578let buffer t =
579 match t.sink with Buffer_sink buf -> Some buf | Writer_sink _ -> None
580
581(** Get config *)
582let config t = t.config
583
584(** Check if emitter is writing to a Writer *)
585let is_streaming t =
586 match t.sink with Writer_sink _ -> true | Buffer_sink _ -> false
587
588(** Flush the writer sink (no-op for buffer sink) *)
589let flush t =
590 match t.sink with
591 | Writer_sink w -> Bytesrw.Bytes.Writer.write_eod w
592 | Buffer_sink _ -> ()