Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** Loader - converts parser events to YAML data structures *)
7
8(** Stack frame for building nested structures *)
9type frame =
10 | Sequence_frame of {
11 anchor : string option;
12 tag : string option;
13 implicit : bool;
14 style : Layout_style.t;
15 items : Yaml.t list;
16 }
17 | Mapping_frame of {
18 anchor : string option;
19 tag : string option;
20 implicit : bool;
21 style : Layout_style.t;
22 pairs : (Yaml.t * Yaml.t) list;
23 pending_key : Yaml.t option;
24 }
25
26type state = {
27 mutable stack : frame list;
28 mutable current : Yaml.t option;
29 mutable documents : Document.t list;
30 mutable doc_version : (int * int) option;
31 mutable doc_implicit_start : bool;
32}
33
34let create_state () = {
35 stack = [];
36 current = None;
37 documents = [];
38 doc_version = None;
39 doc_implicit_start = true;
40}
41
42(** Process a single event *)
43let rec process_event state (ev : Event.spanned) =
44 match ev.event with
45 | Event.Stream_start _ -> ()
46
47 | Event.Stream_end -> ()
48
49 | Event.Document_start { version; implicit } ->
50 state.doc_version <- version;
51 state.doc_implicit_start <- implicit
52
53 | Event.Document_end { implicit } ->
54 let doc = Document.make
55 ?version:state.doc_version
56 ~implicit_start:state.doc_implicit_start
57 ~implicit_end:implicit
58 state.current
59 in
60 state.documents <- doc :: state.documents;
61 state.current <- None;
62 state.doc_version <- None;
63 state.doc_implicit_start <- true
64
65 | Event.Alias { anchor } ->
66 let node : Yaml.t = `Alias anchor in
67 add_node state node
68
69 | Event.Scalar { anchor; tag; value; plain_implicit; quoted_implicit; style } ->
70 let scalar = Scalar.make
71 ?anchor ?tag
72 ~plain_implicit ~quoted_implicit
73 ~style value
74 in
75 let node : Yaml.t = `Scalar scalar in
76 add_node state node
77
78 | Event.Sequence_start { anchor; tag; implicit; style } ->
79 let frame = Sequence_frame {
80 anchor; tag; implicit; style;
81 items = [];
82 } in
83 state.stack <- frame :: state.stack
84
85 | Event.Sequence_end ->
86 (match state.stack with
87 | Sequence_frame { anchor; tag; implicit; style; items } :: rest ->
88 let seq = Sequence.make ?anchor ?tag ~implicit ~style (List.rev items) in
89 let node : Yaml.t = `A seq in
90 state.stack <- rest;
91 add_node state node
92 | _ -> Error.raise (Invalid_state "unexpected sequence end"))
93
94 | Event.Mapping_start { anchor; tag; implicit; style } ->
95 let frame = Mapping_frame {
96 anchor; tag; implicit; style;
97 pairs = [];
98 pending_key = None;
99 } in
100 state.stack <- frame :: state.stack
101
102 | Event.Mapping_end ->
103 (match state.stack with
104 | Mapping_frame { anchor; tag; implicit; style; pairs; pending_key = None } :: rest ->
105 let map = Mapping.make ?anchor ?tag ~implicit ~style (List.rev pairs) in
106 let node : Yaml.t = `O map in
107 state.stack <- rest;
108 add_node state node
109 | Mapping_frame { pending_key = Some _; _ } :: _ ->
110 Error.raise (Invalid_state "mapping ended with pending key")
111 | _ -> Error.raise (Invalid_state "unexpected mapping end"))
112
113(** Add a node to current context *)
114and add_node state node =
115 match state.stack with
116 | [] ->
117 state.current <- Some node
118
119 | Sequence_frame f :: rest ->
120 state.stack <- Sequence_frame { f with items = node :: f.items } :: rest
121
122 | Mapping_frame f :: rest ->
123 (match f.pending_key with
124 | None ->
125 (* This is a key *)
126 state.stack <- Mapping_frame { f with pending_key = Some node } :: rest
127 | Some key ->
128 (* This is a value *)
129 state.stack <- Mapping_frame {
130 f with
131 pairs = (key, node) :: f.pairs;
132 pending_key = None;
133 } :: rest)
134
135(** Load single document as Value.
136
137 @param resolve_aliases Whether to resolve aliases (default true)
138 @param max_nodes Maximum nodes during alias expansion (default 10M)
139 @param max_depth Maximum alias nesting depth (default 100)
140*)
141let value_of_string
142 ?(resolve_aliases = true)
143 ?(max_nodes = Yaml.default_max_alias_nodes)
144 ?(max_depth = Yaml.default_max_alias_depth)
145 s =
146 let parser = Parser.of_string s in
147 let state = create_state () in
148 Parser.iter (process_event state) parser;
149 match state.documents with
150 | [] -> `Null
151 | [doc] ->
152 (match Document.root doc with
153 | None -> `Null
154 | Some yaml ->
155 Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml)
156 | _ -> Error.raise Multiple_documents
157
158(** Load single document as Yaml.
159
160 @param resolve_aliases Whether to resolve aliases (default false for Yaml.t)
161 @param max_nodes Maximum nodes during alias expansion (default 10M)
162 @param max_depth Maximum alias nesting depth (default 100)
163*)
164let yaml_of_string
165 ?(resolve_aliases = false)
166 ?(max_nodes = Yaml.default_max_alias_nodes)
167 ?(max_depth = Yaml.default_max_alias_depth)
168 s =
169 let parser = Parser.of_string s in
170 let state = create_state () in
171 Parser.iter (process_event state) parser;
172 match state.documents with
173 | [] -> `Scalar (Scalar.make "")
174 | [doc] ->
175 (match Document.root doc with
176 | None -> `Scalar (Scalar.make "")
177 | Some yaml ->
178 if resolve_aliases then
179 Yaml.resolve_aliases ~max_nodes ~max_depth yaml
180 else
181 yaml)
182 | _ -> Error.raise Multiple_documents
183
184(** Load all documents *)
185let documents_of_string s =
186 let parser = Parser.of_string s in
187 let state = create_state () in
188 Parser.iter (process_event state) parser;
189 List.rev state.documents
190
191(** {2 Reader-based loading} *)
192
193(** Load single document as Value from a Bytes.Reader.
194
195 @param resolve_aliases Whether to resolve aliases (default true)
196 @param max_nodes Maximum nodes during alias expansion (default 10M)
197 @param max_depth Maximum alias nesting depth (default 100)
198*)
199let value_of_reader
200 ?(resolve_aliases = true)
201 ?(max_nodes = Yaml.default_max_alias_nodes)
202 ?(max_depth = Yaml.default_max_alias_depth)
203 reader =
204 let parser = Parser.of_reader reader in
205 let state = create_state () in
206 Parser.iter (process_event state) parser;
207 match state.documents with
208 | [] -> `Null
209 | [doc] ->
210 (match Document.root doc with
211 | None -> `Null
212 | Some yaml ->
213 Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml)
214 | _ -> Error.raise Multiple_documents
215
216(** Load single document as Yaml from a Bytes.Reader.
217
218 @param resolve_aliases Whether to resolve aliases (default false for Yaml.t)
219 @param max_nodes Maximum nodes during alias expansion (default 10M)
220 @param max_depth Maximum alias nesting depth (default 100)
221*)
222let yaml_of_reader
223 ?(resolve_aliases = false)
224 ?(max_nodes = Yaml.default_max_alias_nodes)
225 ?(max_depth = Yaml.default_max_alias_depth)
226 reader =
227 let parser = Parser.of_reader reader in
228 let state = create_state () in
229 Parser.iter (process_event state) parser;
230 match state.documents with
231 | [] -> `Scalar (Scalar.make "")
232 | [doc] ->
233 (match Document.root doc with
234 | None -> `Scalar (Scalar.make "")
235 | Some yaml ->
236 if resolve_aliases then
237 Yaml.resolve_aliases ~max_nodes ~max_depth yaml
238 else
239 yaml)
240 | _ -> Error.raise Multiple_documents
241
242(** Load all documents from a Bytes.Reader *)
243let documents_of_reader reader =
244 let parser = Parser.of_reader reader in
245 let state = create_state () in
246 Parser.iter (process_event state) parser;
247 List.rev state.documents
248
249(** Generic document loader - extracts common pattern from load_* functions *)
250let load_generic extract parser =
251 let state = create_state () in
252 let rec loop () =
253 match Parser.next parser with
254 | None -> None
255 | Some ev ->
256 process_event state ev;
257 match ev.event with
258 | Event.Document_end _ ->
259 (match state.documents with
260 | doc :: _ ->
261 state.documents <- [];
262 Some (extract doc)
263 | [] -> None)
264 | Event.Stream_end -> None
265 | _ -> loop ()
266 in
267 loop ()
268
269(** Load single Value from parser.
270
271 @param resolve_aliases Whether to resolve aliases (default true)
272 @param max_nodes Maximum nodes during alias expansion (default 10M)
273 @param max_depth Maximum alias nesting depth (default 100)
274*)
275let load_value
276 ?(resolve_aliases = true)
277 ?(max_nodes = Yaml.default_max_alias_nodes)
278 ?(max_depth = Yaml.default_max_alias_depth)
279 parser =
280 load_generic (fun doc ->
281 match Document.root doc with
282 | None -> `Null
283 | Some yaml ->
284 Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml
285 ) parser
286
287(** Load single Yaml from parser *)
288let load_yaml parser =
289 load_generic (fun doc ->
290 Document.root doc |> Option.value ~default:(`Scalar (Scalar.make ""))
291 ) parser
292
293(** Load single Document from parser *)
294let load_document parser =
295 load_generic Fun.id parser
296
297(** Iterate over documents *)
298let iter_documents f parser =
299 let rec loop () =
300 match load_document parser with
301 | None -> ()
302 | Some doc -> f doc; loop ()
303 in
304 loop ()
305
306(** Fold over documents *)
307let fold_documents f init parser =
308 let rec loop acc =
309 match load_document parser with
310 | None -> acc
311 | Some doc -> loop (f acc doc)
312 in
313 loop init
314
315(** {2 Parser-function based loading}
316
317 These functions accept a [unit -> Event.spanned option] function
318 instead of a [Parser.t], allowing them to work with any event source
319 (e.g., streaming parsers). *)
320
321(** Generic document loader using event source function *)
322let load_generic_fn extract next_event =
323 let state = create_state () in
324 let rec loop () =
325 match next_event () with
326 | None -> None
327 | Some ev ->
328 process_event state ev;
329 match ev.event with
330 | Event.Document_end _ ->
331 (match state.documents with
332 | doc :: _ ->
333 state.documents <- [];
334 Some (extract doc)
335 | [] -> None)
336 | Event.Stream_end -> None
337 | _ -> loop ()
338 in
339 loop ()
340
341(** Load single Value from event source.
342
343 @param resolve_aliases Whether to resolve aliases (default true)
344 @param max_nodes Maximum nodes during alias expansion (default 10M)
345 @param max_depth Maximum alias nesting depth (default 100)
346*)
347let value_of_parser
348 ?(resolve_aliases = true)
349 ?(max_nodes = Yaml.default_max_alias_nodes)
350 ?(max_depth = Yaml.default_max_alias_depth)
351 next_event =
352 match load_generic_fn (fun doc ->
353 match Document.root doc with
354 | None -> `Null
355 | Some yaml ->
356 Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth yaml
357 ) next_event with
358 | Some v -> v
359 | None -> `Null
360
361(** Load single Yaml from event source.
362
363 @param resolve_aliases Whether to resolve aliases (default false)
364 @param max_nodes Maximum nodes during alias expansion (default 10M)
365 @param max_depth Maximum alias nesting depth (default 100)
366*)
367let yaml_of_parser
368 ?(resolve_aliases = false)
369 ?(max_nodes = Yaml.default_max_alias_nodes)
370 ?(max_depth = Yaml.default_max_alias_depth)
371 next_event =
372 match load_generic_fn (fun doc ->
373 match Document.root doc with
374 | None -> `Scalar (Scalar.make "")
375 | Some yaml ->
376 if resolve_aliases then
377 Yaml.resolve_aliases ~max_nodes ~max_depth yaml
378 else
379 yaml
380 ) next_event with
381 | Some v -> v
382 | None -> `Scalar (Scalar.make "")
383
384(** Load single Document from event source *)
385let document_of_parser next_event =
386 load_generic_fn Fun.id next_event
387
388(** Load all documents from event source *)
389let documents_of_parser next_event =
390 let state = create_state () in
391 let rec loop () =
392 match next_event () with
393 | None -> List.rev state.documents
394 | Some ev ->
395 process_event state ev;
396 loop ()
397 in
398 loop ()
399
400(** Iterate over documents from event source *)
401let iter_documents_parser f next_event =
402 let rec loop () =
403 match document_of_parser next_event with
404 | None -> ()
405 | Some doc -> f doc; loop ()
406 in
407 loop ()
408
409(** Fold over documents from event source *)
410let fold_documents_parser f init next_event =
411 let rec loop acc =
412 match document_of_parser next_event with
413 | None -> acc
414 | Some doc -> loop (f acc doc)
415 in
416 loop init