Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** Loader - converts parser events to YAML data structures *)
7
8(** Stack frame for building nested structures *)
9type frame =
10 | Sequence_frame of {
11 anchor : string option;
12 tag : string option;
13 implicit : bool;
14 style : Layout_style.t;
15 items : Yaml.t list;
16 }
17 | Mapping_frame of {
18 anchor : string option;
19 tag : string option;
20 implicit : bool;
21 style : Layout_style.t;
22 pairs : (Yaml.t * Yaml.t) list;
23 pending_key : Yaml.t option;
24 }
25
26type state = {
27 mutable stack : frame list;
28 mutable current : Yaml.t option;
29 mutable documents : Document.t list;
30 mutable doc_version : (int * int) option;
31 mutable doc_implicit_start : bool;
32}
33
34let create_state () =
35 {
36 stack = [];
37 current = None;
38 documents = [];
39 doc_version = None;
40 doc_implicit_start = true;
41 }
42
43(** Process a single event *)
44let rec process_event state (ev : Event.spanned) =
45 match ev.event with
46 | Event.Stream_start _ -> ()
47 | Event.Stream_end -> ()
48 | Event.Document_start { version; implicit } ->
49 state.doc_version <- version;
50 state.doc_implicit_start <- implicit
51 | Event.Document_end { implicit } ->
52 let doc =
53 Document.make ?version:state.doc_version
54 ~implicit_start:state.doc_implicit_start ~implicit_end:implicit
55 state.current
56 in
57 state.documents <- doc :: state.documents;
58 state.current <- None;
59 state.doc_version <- None;
60 state.doc_implicit_start <- true
61 | Event.Alias { anchor } ->
62 let node : Yaml.t = `Alias anchor in
63 add_node state node
64 | Event.Scalar { anchor; tag; value; plain_implicit; quoted_implicit; style }
65 ->
66 let scalar =
67 Scalar.make ?anchor ?tag ~plain_implicit ~quoted_implicit ~style value
68 in
69 let node : Yaml.t = `Scalar scalar in
70 add_node state node
71 | Event.Sequence_start { anchor; tag; implicit; style } ->
72 let frame = Sequence_frame { anchor; tag; implicit; style; items = [] } in
73 state.stack <- frame :: state.stack
74 | Event.Sequence_end -> (
75 match state.stack with
76 | Sequence_frame { anchor; tag; implicit; style; items } :: rest ->
77 let seq =
78 Sequence.make ?anchor ?tag ~implicit ~style (List.rev items)
79 in
80 let node : Yaml.t = `A seq in
81 state.stack <- rest;
82 add_node state node
83 | _ -> Error.raise (Invalid_state "unexpected sequence end"))
84 | Event.Mapping_start { anchor; tag; implicit; style } ->
85 let frame =
86 Mapping_frame
87 { anchor; tag; implicit; style; pairs = []; pending_key = None }
88 in
89 state.stack <- frame :: state.stack
90 | Event.Mapping_end -> (
91 match state.stack with
92 | Mapping_frame
93 { anchor; tag; implicit; style; pairs; pending_key = None }
94 :: rest ->
95 let map =
96 Mapping.make ?anchor ?tag ~implicit ~style (List.rev pairs)
97 in
98 let node : Yaml.t = `O map in
99 state.stack <- rest;
100 add_node state node
101 | Mapping_frame { pending_key = Some _; _ } :: _ ->
102 Error.raise (Invalid_state "mapping ended with pending key")
103 | _ -> Error.raise (Invalid_state "unexpected mapping end"))
104
105(** Add a node to current context *)
106and add_node state node =
107 match state.stack with
108 | [] -> state.current <- Some node
109 | Sequence_frame f :: rest ->
110 state.stack <- Sequence_frame { f with items = node :: f.items } :: rest
111 | Mapping_frame f :: rest -> (
112 match f.pending_key with
113 | None ->
114 (* This is a key *)
115 state.stack <-
116 Mapping_frame { f with pending_key = Some node } :: rest
117 | Some key ->
118 (* This is a value *)
119 state.stack <-
120 Mapping_frame
121 { f with pairs = (key, node) :: f.pairs; pending_key = None }
122 :: rest)
123
124(** Internal: parse all documents from a parser *)
125let parse_all_documents parser =
126 let state = create_state () in
127 Parser.iter (process_event state) parser;
128 List.rev state.documents
129
130(** Internal: extract single document or raise *)
131let single_document_or_error docs ~empty =
132 match docs with
133 | [] -> empty
134 | [doc] -> doc
135 | _ -> Error.raise Multiple_documents
136
137(** Load single document as Value.
138
139 @param resolve_aliases Whether to resolve aliases (default true)
140 @param max_nodes Maximum nodes during alias expansion (default 10M)
141 @param max_depth Maximum alias nesting depth (default 100) *)
142let value_of_string ?(resolve_aliases = true)
143 ?(max_nodes = Yaml.default_max_alias_nodes)
144 ?(max_depth = Yaml.default_max_alias_depth) s =
145 let docs = parse_all_documents (Parser.of_string s) in
146 let doc = single_document_or_error docs ~empty:(Document.make None) in
147 match Document.root doc with
148 | None -> `Null
149 | Some yaml ->
150 Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth
151 yaml
152
153(** Load single document as Yaml.
154
155 @param resolve_aliases Whether to resolve aliases (default false for Yaml.t)
156 @param max_nodes Maximum nodes during alias expansion (default 10M)
157 @param max_depth Maximum alias nesting depth (default 100) *)
158let yaml_of_string ?(resolve_aliases = false)
159 ?(max_nodes = Yaml.default_max_alias_nodes)
160 ?(max_depth = Yaml.default_max_alias_depth) s =
161 let docs = parse_all_documents (Parser.of_string s) in
162 let doc = single_document_or_error docs ~empty:(Document.make None) in
163 match Document.root doc with
164 | None -> `Scalar (Scalar.make "")
165 | Some yaml ->
166 if resolve_aliases then Yaml.resolve_aliases ~max_nodes ~max_depth yaml
167 else yaml
168
169(** Load all documents *)
170let documents_of_string s =
171 parse_all_documents (Parser.of_string s)
172
173(** {2 Reader-based loading} *)
174
175(** Load single document as Value from a Bytes.Reader.
176
177 @param resolve_aliases Whether to resolve aliases (default true)
178 @param max_nodes Maximum nodes during alias expansion (default 10M)
179 @param max_depth Maximum alias nesting depth (default 100) *)
180let value_of_reader ?(resolve_aliases = true)
181 ?(max_nodes = Yaml.default_max_alias_nodes)
182 ?(max_depth = Yaml.default_max_alias_depth) reader =
183 let docs = parse_all_documents (Parser.of_reader reader) in
184 let doc = single_document_or_error docs ~empty:(Document.make None) in
185 match Document.root doc with
186 | None -> `Null
187 | Some yaml ->
188 Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes ~max_depth
189 yaml
190
191(** Load single document as Yaml from a Bytes.Reader.
192
193 @param resolve_aliases Whether to resolve aliases (default false for Yaml.t)
194 @param max_nodes Maximum nodes during alias expansion (default 10M)
195 @param max_depth Maximum alias nesting depth (default 100) *)
196let yaml_of_reader ?(resolve_aliases = false)
197 ?(max_nodes = Yaml.default_max_alias_nodes)
198 ?(max_depth = Yaml.default_max_alias_depth) reader =
199 let docs = parse_all_documents (Parser.of_reader reader) in
200 let doc = single_document_or_error docs ~empty:(Document.make None) in
201 match Document.root doc with
202 | None -> `Scalar (Scalar.make "")
203 | Some yaml ->
204 if resolve_aliases then Yaml.resolve_aliases ~max_nodes ~max_depth yaml
205 else yaml
206
207(** Load all documents from a Bytes.Reader *)
208let documents_of_reader reader =
209 parse_all_documents (Parser.of_reader reader)
210
211(** {2 Parser-function based loading}
212
213 These functions accept a [unit -> Event.spanned option] function
214 instead of a [Parser.t], allowing them to work with any event source
215 (e.g., streaming parsers). *)
216
217(** Generic document loader using event source function *)
218let load_generic_fn extract next_event =
219 let state = create_state () in
220 let rec loop () =
221 match next_event () with
222 | None -> None
223 | Some ev -> (
224 process_event state ev;
225 match ev.event with
226 | Event.Document_end _ -> (
227 match state.documents with
228 | doc :: _ ->
229 state.documents <- [];
230 Some (extract doc)
231 | [] -> None)
232 | Event.Stream_end -> None
233 | _ -> loop ())
234 in
235 loop ()
236
237(** Generic document loader - extracts common pattern from load_* functions *)
238let load_generic extract parser =
239 load_generic_fn extract (fun () -> Parser.next parser)
240
241(** Load single Value from parser.
242
243 @param resolve_aliases Whether to resolve aliases (default true)
244 @param max_nodes Maximum nodes during alias expansion (default 10M)
245 @param max_depth Maximum alias nesting depth (default 100) *)
246let load_value ?(resolve_aliases = true)
247 ?(max_nodes = Yaml.default_max_alias_nodes)
248 ?(max_depth = Yaml.default_max_alias_depth) parser =
249 load_generic
250 (fun doc ->
251 match Document.root doc with
252 | None -> `Null
253 | Some yaml ->
254 Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes
255 ~max_depth yaml)
256 parser
257
258(** Load single Yaml from parser *)
259let load_yaml parser =
260 load_generic
261 (fun doc ->
262 Document.root doc |> Option.value ~default:(`Scalar (Scalar.make "")))
263 parser
264
265(** Load single Document from parser *)
266let load_document parser = load_generic Fun.id parser
267
268(** Iterate over documents *)
269let iter_documents f parser =
270 let rec loop () =
271 match load_document parser with
272 | None -> ()
273 | Some doc ->
274 f doc;
275 loop ()
276 in
277 loop ()
278
279(** Fold over documents *)
280let fold_documents f init parser =
281 let rec loop acc =
282 match load_document parser with None -> acc | Some doc -> loop (f acc doc)
283 in
284 loop init
285
286
287(** Load single Value from event source.
288
289 @param resolve_aliases Whether to resolve aliases (default true)
290 @param max_nodes Maximum nodes during alias expansion (default 10M)
291 @param max_depth Maximum alias nesting depth (default 100) *)
292let value_of_parser ?(resolve_aliases = true)
293 ?(max_nodes = Yaml.default_max_alias_nodes)
294 ?(max_depth = Yaml.default_max_alias_depth) next_event =
295 match
296 load_generic_fn
297 (fun doc ->
298 match Document.root doc with
299 | None -> `Null
300 | Some yaml ->
301 Yaml.to_value ~resolve_aliases_first:resolve_aliases ~max_nodes
302 ~max_depth yaml)
303 next_event
304 with
305 | Some v -> v
306 | None -> `Null
307
308(** Load single Yaml from event source.
309
310 @param resolve_aliases Whether to resolve aliases (default false)
311 @param max_nodes Maximum nodes during alias expansion (default 10M)
312 @param max_depth Maximum alias nesting depth (default 100) *)
313let yaml_of_parser ?(resolve_aliases = false)
314 ?(max_nodes = Yaml.default_max_alias_nodes)
315 ?(max_depth = Yaml.default_max_alias_depth) next_event =
316 match
317 load_generic_fn
318 (fun doc ->
319 match Document.root doc with
320 | None -> `Scalar (Scalar.make "")
321 | Some yaml ->
322 if resolve_aliases then
323 Yaml.resolve_aliases ~max_nodes ~max_depth yaml
324 else yaml)
325 next_event
326 with
327 | Some v -> v
328 | None -> `Scalar (Scalar.make "")
329
330(** Load single Document from event source *)
331let document_of_parser next_event = load_generic_fn Fun.id next_event
332
333(** Load all documents from event source *)
334let documents_of_parser next_event =
335 let state = create_state () in
336 let rec loop () =
337 match next_event () with
338 | None -> List.rev state.documents
339 | Some ev ->
340 process_event state ev;
341 loop ()
342 in
343 loop ()
344
345(** Iterate over documents from event source *)
346let iter_documents_parser f next_event =
347 let rec loop () =
348 match document_of_parser next_event with
349 | None -> ()
350 | Some doc ->
351 f doc;
352 loop ()
353 in
354 loop ()
355
356(** Fold over documents from event source *)
357let fold_documents_parser f init next_event =
358 let rec loop acc =
359 match document_of_parser next_event with
360 | None -> acc
361 | Some doc -> loop (f acc doc)
362 in
363 loop init