Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** Emitter - converts YAML data structures to string output
7
8 The emitter can write to either a Buffer (default) or directly to a
9 bytesrw Bytes.Writer for streaming output. *)
10
11type config = {
12 encoding : Encoding.t;
13 scalar_style : Scalar_style.t;
14 layout_style : Layout_style.t;
15 indent : int;
16 width : int;
17 canonical : bool;
18}
19
20let default_config = {
21 encoding = `Utf8;
22 scalar_style = `Any;
23 layout_style = `Any;
24 indent = 2;
25 width = 80;
26 canonical = false;
27}
28
29type state =
30 | Initial
31 | Stream_started
32 | Document_started
33 | In_block_sequence of int (* indent level *)
34 | In_block_mapping_key of int
35 | In_block_mapping_value of int
36 | In_block_mapping_first_key of int (* first key after "- ", no indent needed *)
37 | In_flow_sequence
38 | In_flow_mapping_key
39 | In_flow_mapping_value
40 | Document_ended
41 | Stream_ended
42
43(** Output sink - either a Buffer or a bytesrw Writer *)
44type sink =
45 | Buffer_sink of Buffer.t
46 | Writer_sink of Bytesrw.Bytes.Writer.t
47
48type t = {
49 config : config;
50 sink : sink;
51 mutable state : state;
52 mutable states : state list;
53 mutable indent : int;
54 mutable flow_level : int;
55 mutable need_separator : bool;
56}
57
58let create ?(config = default_config) () = {
59 config;
60 sink = Buffer_sink (Buffer.create 1024);
61 state = Initial;
62 states = [];
63 indent = 0;
64 flow_level = 0;
65 need_separator = false;
66}
67
68(** Create an emitter that writes directly to a Bytes.Writer *)
69let of_writer ?(config = default_config) writer = {
70 config;
71 sink = Writer_sink writer;
72 state = Initial;
73 states = [];
74 indent = 0;
75 flow_level = 0;
76 need_separator = false;
77}
78
79let contents t =
80 match t.sink with
81 | Buffer_sink buf -> Buffer.contents buf
82 | Writer_sink _ -> "" (* No accumulated content for writer sink *)
83
84let reset t =
85 (match t.sink with
86 | Buffer_sink buf -> Buffer.clear buf
87 | Writer_sink _ -> ());
88 t.state <- Initial;
89 t.states <- [];
90 t.indent <- 0;
91 t.flow_level <- 0;
92 t.need_separator <- false
93
94(** Output helpers - write to appropriate sink *)
95
96let write t s =
97 match t.sink with
98 | Buffer_sink buf -> Buffer.add_string buf s
99 | Writer_sink w -> Bytesrw.Bytes.Writer.write_string w s
100
101let write_char t c =
102 match t.sink with
103 | Buffer_sink buf -> Buffer.add_char buf c
104 | Writer_sink w ->
105 let b = Bytes.make 1 c in
106 Bytesrw.Bytes.Writer.write_bytes w b
107
108let write_indent t =
109 if t.indent <= 8 then
110 for _ = 1 to t.indent do write_char t ' ' done
111 else
112 write t (String.make t.indent ' ')
113
114let write_newline t =
115 write_char t '\n'
116
117let push_state t s =
118 t.states <- t.state :: t.states;
119 t.state <- s
120
121let pop_state t =
122 match t.states with
123 | s :: rest ->
124 t.state <- s;
125 t.states <- rest
126 | [] ->
127 t.state <- Stream_ended
128
129(** Escape a string for double-quoted output.
130 Uses a buffer to batch writes instead of character-by-character. *)
131let escape_double_quoted value =
132 let len = String.length value in
133 (* Check if any escaping is needed *)
134 let needs_escape = ref false in
135 for i = 0 to len - 1 do
136 match value.[i] with
137 | '"' | '\\' | '\n' | '\r' | '\t' -> needs_escape := true
138 | c when c < ' ' -> needs_escape := true
139 | _ -> ()
140 done;
141 if not !needs_escape then value
142 else begin
143 let buf = Buffer.create (len + len / 4) in
144 for i = 0 to len - 1 do
145 match value.[i] with
146 | '"' -> Buffer.add_string buf "\\\""
147 | '\\' -> Buffer.add_string buf "\\\\"
148 | '\n' -> Buffer.add_string buf "\\n"
149 | '\r' -> Buffer.add_string buf "\\r"
150 | '\t' -> Buffer.add_string buf "\\t"
151 | c when c < ' ' -> Buffer.add_string buf (Printf.sprintf "\\x%02x" (Char.code c))
152 | c -> Buffer.add_char buf c
153 done;
154 Buffer.contents buf
155 end
156
157(** Escape a string for single-quoted output. *)
158let escape_single_quoted value =
159 if not (String.contains value '\'') then value
160 else begin
161 let len = String.length value in
162 let buf = Buffer.create (len + len / 8) in
163 for i = 0 to len - 1 do
164 let c = value.[i] in
165 if c = '\'' then Buffer.add_string buf "''"
166 else Buffer.add_char buf c
167 done;
168 Buffer.contents buf
169 end
170
171(** Write scalar with appropriate quoting *)
172let write_scalar t ?(style = `Any) value =
173 match (match style with `Any -> Quoting.choose_style value | s -> s) with
174 | `Plain | `Any ->
175 write t value
176 | `Single_quoted ->
177 write_char t '\'';
178 write t (escape_single_quoted value);
179 write_char t '\''
180 | `Double_quoted ->
181 write_char t '"';
182 write t (escape_double_quoted value);
183 write_char t '"'
184 | `Literal ->
185 write t "|";
186 write_newline t;
187 String.split_on_char '\n' value |> List.iter (fun line ->
188 write_indent t;
189 write t line;
190 write_newline t
191 )
192 | `Folded ->
193 write t ">";
194 write_newline t;
195 String.split_on_char '\n' value |> List.iter (fun line ->
196 write_indent t;
197 write t line;
198 write_newline t
199 )
200
201(** Write anchor if present *)
202let write_anchor t anchor =
203 match anchor with
204 | Some name ->
205 write_char t '&';
206 write t name;
207 write_char t ' '
208 | None -> ()
209
210(** Write tag if present and not implicit *)
211let write_tag t ~implicit tag =
212 if not implicit then
213 match tag with
214 | Some tag_str ->
215 write_char t '!';
216 write t tag_str;
217 write_char t ' '
218 | None -> ()
219
220(** Emit events *)
221
222let emit t (ev : Event.t) =
223 match ev with
224 | Event.Stream_start _ ->
225 t.state <- Stream_started
226
227 | Event.Stream_end ->
228 t.state <- Stream_ended
229
230 | Event.Document_start { version; implicit } ->
231 if not implicit then begin
232 (match version with
233 | Some (maj, min) ->
234 write t (Printf.sprintf "%%YAML %d.%d\n" maj min)
235 | None -> ());
236 write t "---";
237 write_newline t
238 end;
239 t.state <- Document_started
240
241 | Event.Document_end { implicit } ->
242 if not implicit then begin
243 write t "...";
244 write_newline t
245 end;
246 t.state <- Document_ended
247
248 | Event.Alias { anchor } ->
249 if t.flow_level > 0 then begin
250 if t.need_separator then write t ", ";
251 t.need_separator <- true;
252 write_char t '*';
253 write t anchor
254 end else begin
255 (match t.state with
256 | In_block_sequence _ ->
257 write_indent t;
258 write t "- *";
259 write t anchor;
260 write_newline t
261 | In_block_mapping_key _ ->
262 write_indent t;
263 write_char t '*';
264 write t anchor;
265 write t ": ";
266 t.state <- In_block_mapping_value t.indent
267 | In_block_mapping_value indent ->
268 write_char t '*';
269 write t anchor;
270 write_newline t;
271 t.state <- In_block_mapping_key indent
272 | _ ->
273 write_char t '*';
274 write t anchor;
275 write_newline t)
276 end
277
278 | Event.Scalar { anchor; tag; value; plain_implicit; style; _ } ->
279 if t.flow_level > 0 then begin
280 (match t.state with
281 | In_flow_mapping_key ->
282 if t.need_separator then write t ", ";
283 write_anchor t anchor;
284 write_tag t ~implicit:plain_implicit tag;
285 write_scalar t ~style value;
286 write t ": ";
287 t.need_separator <- false;
288 t.state <- In_flow_mapping_value
289 | In_flow_mapping_value ->
290 write_anchor t anchor;
291 write_tag t ~implicit:plain_implicit tag;
292 write_scalar t ~style value;
293 t.need_separator <- true;
294 t.state <- In_flow_mapping_key
295 | _ ->
296 if t.need_separator then write t ", ";
297 t.need_separator <- true;
298 write_anchor t anchor;
299 write_tag t ~implicit:plain_implicit tag;
300 write_scalar t ~style value)
301 end else begin
302 match t.state with
303 | In_block_sequence _ ->
304 write_indent t;
305 write t "- ";
306 write_anchor t anchor;
307 write_tag t ~implicit:plain_implicit tag;
308 write_scalar t ~style value;
309 write_newline t
310 | In_block_mapping_key indent ->
311 write_indent t;
312 write_anchor t anchor;
313 write_tag t ~implicit:plain_implicit tag;
314 write_scalar t ~style value;
315 write_char t ':';
316 t.state <- In_block_mapping_value indent
317 | In_block_mapping_first_key indent ->
318 (* First key after "- ", no indent needed *)
319 write_anchor t anchor;
320 write_tag t ~implicit:plain_implicit tag;
321 write_scalar t ~style value;
322 write_char t ':';
323 t.state <- In_block_mapping_value indent
324 | In_block_mapping_value indent ->
325 write_char t ' ';
326 write_anchor t anchor;
327 write_tag t ~implicit:plain_implicit tag;
328 write_scalar t ~style value;
329 write_newline t;
330 t.state <- In_block_mapping_key indent
331 | _ ->
332 write_anchor t anchor;
333 write_tag t ~implicit:plain_implicit tag;
334 write_scalar t ~style value;
335 write_newline t
336 end
337
338 | Event.Sequence_start { anchor; tag; implicit; style } ->
339 let use_flow = style = `Flow || t.flow_level > 0 in
340 if t.flow_level > 0 then begin
341 (match t.state with
342 | In_flow_mapping_key ->
343 if t.need_separator then write t ", ";
344 write_anchor t anchor;
345 write_tag t ~implicit tag;
346 write_char t '[';
347 t.flow_level <- t.flow_level + 1;
348 t.need_separator <- false;
349 push_state t In_flow_mapping_value; (* After ] we'll be in value position but sequence handles it *)
350 t.state <- In_flow_sequence
351 | In_flow_mapping_value ->
352 write_anchor t anchor;
353 write_tag t ~implicit tag;
354 write_char t '[';
355 t.flow_level <- t.flow_level + 1;
356 t.need_separator <- false;
357 push_state t In_flow_mapping_key;
358 t.state <- In_flow_sequence
359 | _ ->
360 if t.need_separator then write t ", ";
361 write_anchor t anchor;
362 write_tag t ~implicit tag;
363 write_char t '[';
364 t.flow_level <- t.flow_level + 1;
365 t.need_separator <- false;
366 push_state t In_flow_sequence)
367 end else begin
368 match t.state with
369 | In_block_sequence _ ->
370 write_indent t;
371 write t "- ";
372 write_anchor t anchor;
373 write_tag t ~implicit tag;
374 if use_flow then begin
375 write_char t '[';
376 t.flow_level <- t.flow_level + 1;
377 t.need_separator <- false;
378 push_state t In_flow_sequence
379 end else begin
380 write_newline t;
381 push_state t (In_block_sequence t.indent);
382 t.indent <- t.indent + t.config.indent
383 end
384 | In_block_mapping_key indent ->
385 write_indent t;
386 write_anchor t anchor;
387 write_tag t ~implicit tag;
388 write t ":";
389 write_newline t;
390 push_state t (In_block_mapping_key indent);
391 t.indent <- t.indent + t.config.indent;
392 t.state <- In_block_sequence t.indent
393 | In_block_mapping_first_key indent ->
394 (* First key after "- " with sequence value - no indent *)
395 write_anchor t anchor;
396 write_tag t ~implicit tag;
397 write t ":";
398 write_newline t;
399 push_state t (In_block_mapping_key indent);
400 t.indent <- t.indent + t.config.indent;
401 t.state <- In_block_sequence t.indent
402 | In_block_mapping_value indent ->
403 write_anchor t anchor;
404 write_tag t ~implicit tag;
405 if use_flow then begin
406 write_char t ' ';
407 write_char t '[';
408 t.flow_level <- t.flow_level + 1;
409 t.need_separator <- false;
410 (* Save key state to return to after flow sequence *)
411 t.state <- In_block_mapping_key indent;
412 push_state t In_flow_sequence
413 end else begin
414 write_newline t;
415 (* Save key state to return to after nested sequence *)
416 t.state <- In_block_mapping_key indent;
417 push_state t (In_block_sequence (t.indent + t.config.indent));
418 t.indent <- t.indent + t.config.indent
419 end
420 | _ ->
421 write_anchor t anchor;
422 write_tag t ~implicit tag;
423 if use_flow then begin
424 write_char t '[';
425 t.flow_level <- t.flow_level + 1;
426 t.need_separator <- false;
427 push_state t In_flow_sequence
428 end else begin
429 push_state t (In_block_sequence t.indent);
430 t.state <- In_block_sequence t.indent
431 end
432 end
433
434 | Event.Sequence_end ->
435 if t.flow_level > 0 then begin
436 write_char t ']';
437 t.flow_level <- t.flow_level - 1;
438 t.need_separator <- true;
439 pop_state t;
440 (* Write newline if returning to block context *)
441 (match t.state with
442 | In_block_mapping_key _ | In_block_sequence _ -> write_newline t
443 | _ -> ())
444 end else begin
445 t.indent <- t.indent - t.config.indent;
446 pop_state t
447 end
448
449 | Event.Mapping_start { anchor; tag; implicit; style } ->
450 let use_flow = style = `Flow || t.flow_level > 0 in
451 if t.flow_level > 0 then begin
452 (match t.state with
453 | In_flow_mapping_key ->
454 if t.need_separator then write t ", ";
455 write_anchor t anchor;
456 write_tag t ~implicit tag;
457 write_char t '{';
458 t.flow_level <- t.flow_level + 1;
459 t.need_separator <- false;
460 push_state t In_flow_mapping_value;
461 t.state <- In_flow_mapping_key
462 | In_flow_mapping_value ->
463 write_anchor t anchor;
464 write_tag t ~implicit tag;
465 write_char t '{';
466 t.flow_level <- t.flow_level + 1;
467 t.need_separator <- false;
468 push_state t In_flow_mapping_key;
469 t.state <- In_flow_mapping_key
470 | _ ->
471 if t.need_separator then write t ", ";
472 write_anchor t anchor;
473 write_tag t ~implicit tag;
474 write_char t '{';
475 t.flow_level <- t.flow_level + 1;
476 t.need_separator <- false;
477 push_state t In_flow_mapping_key)
478 end else begin
479 match t.state with
480 | In_block_sequence _ ->
481 write_indent t;
482 write t "- ";
483 write_anchor t anchor;
484 write_tag t ~implicit tag;
485 if use_flow then begin
486 write_char t '{';
487 t.flow_level <- t.flow_level + 1;
488 t.need_separator <- false;
489 push_state t In_flow_mapping_key
490 end else begin
491 (* Don't write newline - first key goes on same line as "- " *)
492 push_state t (In_block_sequence t.indent);
493 t.indent <- t.indent + t.config.indent;
494 t.state <- In_block_mapping_first_key t.indent
495 end
496 | In_block_mapping_key indent ->
497 write_indent t;
498 write_anchor t anchor;
499 write_tag t ~implicit tag;
500 write t ":";
501 write_newline t;
502 push_state t (In_block_mapping_key indent);
503 t.indent <- t.indent + t.config.indent;
504 t.state <- In_block_mapping_key t.indent
505 | In_block_mapping_first_key indent ->
506 (* First key after "- " with mapping value - no indent *)
507 write_anchor t anchor;
508 write_tag t ~implicit tag;
509 write t ":";
510 write_newline t;
511 push_state t (In_block_mapping_key indent);
512 t.indent <- t.indent + t.config.indent;
513 t.state <- In_block_mapping_key t.indent
514 | In_block_mapping_value indent ->
515 write_anchor t anchor;
516 write_tag t ~implicit tag;
517 if use_flow then begin
518 write_char t ' ';
519 write_char t '{';
520 t.flow_level <- t.flow_level + 1;
521 t.need_separator <- false;
522 (* Save key state to return to after flow mapping *)
523 t.state <- In_block_mapping_key indent;
524 push_state t In_flow_mapping_key
525 end else begin
526 write_newline t;
527 (* Save key state to return to after nested mapping *)
528 t.state <- In_block_mapping_key indent;
529 push_state t (In_block_mapping_key (t.indent + t.config.indent));
530 t.indent <- t.indent + t.config.indent
531 end
532 | _ ->
533 write_anchor t anchor;
534 write_tag t ~implicit tag;
535 if use_flow then begin
536 write_char t '{';
537 t.flow_level <- t.flow_level + 1;
538 t.need_separator <- false;
539 push_state t In_flow_mapping_key
540 end else begin
541 push_state t (In_block_mapping_key t.indent);
542 t.state <- In_block_mapping_key t.indent
543 end
544 end
545
546 | Event.Mapping_end ->
547 if t.flow_level > 0 then begin
548 write_char t '}';
549 t.flow_level <- t.flow_level - 1;
550 t.need_separator <- true;
551 pop_state t;
552 (* Write newline if returning to block context *)
553 (match t.state with
554 | In_block_mapping_key _ | In_block_sequence _ -> write_newline t
555 | _ -> ())
556 end else begin
557 t.indent <- t.indent - t.config.indent;
558 pop_state t
559 end
560
561(** Access to the underlying buffer for advanced use.
562 Returns None if emitter is writing to a Writer instead of Buffer. *)
563let buffer t =
564 match t.sink with
565 | Buffer_sink buf -> Some buf
566 | Writer_sink _ -> None
567
568(** Get config *)
569let config t = t.config
570
571(** Check if emitter is writing to a Writer *)
572let is_streaming t =
573 match t.sink with
574 | Writer_sink _ -> true
575 | Buffer_sink _ -> false
576
577(** Flush the writer sink (no-op for buffer sink) *)
578let flush t =
579 match t.sink with
580 | Writer_sink w -> Bytesrw.Bytes.Writer.write_eod w
581 | Buffer_sink _ -> ()