Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** Full YAML representation with anchors, tags, and aliases *)
7
8type t =
9 [ `Scalar of Scalar.t
10 | `Alias of string
11 | `A of t Sequence.t
12 | `O of (t, t) Mapping.t ]
13
14(** Pretty printing *)
15
16let rec pp fmt (v : t) =
17 match v with
18 | `Scalar s -> Scalar.pp fmt s
19 | `Alias name -> Format.fprintf fmt "*%s" name
20 | `A seq -> Sequence.pp pp fmt seq
21 | `O map -> Mapping.pp pp pp fmt map
22
23(** Equality *)
24
25let rec equal (a : t) (b : t) =
26 match (a, b) with
27 | `Scalar a, `Scalar b -> Scalar.equal a b
28 | `Alias a, `Alias b -> String.equal a b
29 | `A a, `A b -> Sequence.equal equal a b
30 | `O a, `O b -> Mapping.equal equal equal a b
31 | _ -> false
32
33(** Construct from JSON-compatible Value *)
34
35let rec of_value (v : Value.t) : t =
36 match v with
37 | `Null -> `Scalar (Scalar.make "null")
38 | `Bool true -> `Scalar (Scalar.make "true")
39 | `Bool false -> `Scalar (Scalar.make "false")
40 | `Float f ->
41 let s =
42 if Float.is_integer f && Float.abs f < 1e15 then Printf.sprintf "%.0f" f
43 else Printf.sprintf "%g" f
44 in
45 `Scalar (Scalar.make s)
46 | `String s -> `Scalar (Scalar.make s ~style:`Double_quoted)
47 | `A items -> `A (Sequence.make (List.map of_value items))
48 | `O pairs ->
49 `O
50 (Mapping.make
51 (List.map
52 (fun (k, v) -> (`Scalar (Scalar.make k), of_value v))
53 pairs))
54
55(** Default limits for alias expansion (protection against billion laughs
56 attack) *)
57let default_max_alias_nodes = 10_000_000
58
59let default_max_alias_depth = 100
60
61(** Resolve aliases by replacing them with referenced nodes.
62
63 Processes the tree in document order so that aliases resolve to the anchor
64 value that was defined at the point the alias was encountered.
65
66 See
67 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2
68 (Anchors and Aliases)} of the YAML 1.2.2 specification for details on how
69 anchors and aliases work in YAML.
70
71 This implements protection against the "billion laughs attack" (see
72 {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}) by
73 limiting both the total number of nodes and the nesting depth during
74 expansion.
75
76 @param max_nodes
77 Maximum number of nodes to create during expansion (default 10M)
78 @param max_depth
79 Maximum depth of alias-within-alias resolution (default 100)
80 @raise Error.Yamlrw_error
81 with {!type:Error.kind} [Alias_expansion_node_limit] if max_nodes is
82 exceeded
83 @raise Error.Yamlrw_error
84 with {!type:Error.kind} [Alias_expansion_depth_limit] if max_depth is
85 exceeded *)
86let resolve_aliases ?(max_nodes = default_max_alias_nodes)
87 ?(max_depth = default_max_alias_depth) (root : t) : t =
88 let anchors = Hashtbl.create 16 in
89 let node_count = ref 0 in
90
91 (* Check node limit *)
92 let check_node_limit () =
93 incr node_count;
94 if !node_count > max_nodes then
95 Error.raise (Alias_expansion_node_limit max_nodes)
96 in
97
98 (* Register anchor if present on a node *)
99 let register_anchor name resolved_node =
100 Hashtbl.replace anchors name resolved_node
101 in
102
103 (* Resolve an alias by looking up and expanding the target *)
104 let rec expand_alias ~depth name =
105 if depth >= max_depth then
106 Error.raise (Alias_expansion_depth_limit max_depth);
107 match Hashtbl.find_opt anchors name with
108 | Some target ->
109 (* The target is already resolved, but may contain aliases that
110 need expansion if it was registered before those anchors existed *)
111 resolve ~depth:(depth + 1) target
112 | None -> Error.raise (Undefined_alias name)
113 (* Single pass: process in document order, registering anchors and resolving aliases *)
114 and resolve ~depth (v : t) : t =
115 check_node_limit ();
116 match v with
117 | `Scalar s ->
118 (* Register anchor after we have the resolved node *)
119 Option.iter (fun name -> register_anchor name v) (Scalar.anchor s);
120 v
121 | `Alias name -> expand_alias ~depth name
122 | `A seq ->
123 (* First resolve all members in order *)
124 let resolved_members =
125 List.map (resolve ~depth) (Sequence.members seq)
126 in
127 let resolved =
128 `A
129 (Sequence.make ?anchor:(Sequence.anchor seq) ?tag:(Sequence.tag seq)
130 ~implicit:(Sequence.implicit seq) ~style:(Sequence.style seq)
131 resolved_members)
132 in
133 (* Register anchor with resolved node *)
134 Option.iter (fun name -> register_anchor name resolved) (Sequence.anchor seq);
135 resolved
136 | `O map ->
137 (* Process key-value pairs in document order *)
138 let resolved_pairs =
139 List.map
140 (fun (k, v) ->
141 let resolved_k = resolve ~depth k in
142 let resolved_v = resolve ~depth v in
143 (resolved_k, resolved_v))
144 (Mapping.members map)
145 in
146 let resolved =
147 `O
148 (Mapping.make ?anchor:(Mapping.anchor map) ?tag:(Mapping.tag map)
149 ~implicit:(Mapping.implicit map) ~style:(Mapping.style map)
150 resolved_pairs)
151 in
152 (* Register anchor with resolved node *)
153 Option.iter (fun name -> register_anchor name resolved) (Mapping.anchor map);
154 resolved
155 in
156 resolve ~depth:0 root
157
158(** Convert scalar to JSON value based on content *)
159let rec scalar_to_value s =
160 let value = Scalar.value s in
161 let tag = Scalar.tag s in
162 let style = Scalar.style s in
163
164 (* If explicitly tagged, respect the tag *)
165 match tag with
166 | Some "tag:yaml.org,2002:null" | Some "!!null" -> `Null
167 | Some "tag:yaml.org,2002:bool" | Some "!!bool" -> (
168 match String.lowercase_ascii value with
169 | "true" | "yes" | "on" -> `Bool true
170 | "false" | "no" | "off" -> `Bool false
171 | _ -> Error.raise (Invalid_scalar_conversion (value, "bool")))
172 | Some "tag:yaml.org,2002:int" | Some "!!int" -> (
173 try `Float (Float.of_string value)
174 with _ -> Error.raise (Invalid_scalar_conversion (value, "int")))
175 | Some "tag:yaml.org,2002:float" | Some "!!float" -> (
176 try `Float (Float.of_string value)
177 with _ -> Error.raise (Invalid_scalar_conversion (value, "float")))
178 | Some "tag:yaml.org,2002:str" | Some "!!str" -> `String value
179 | Some _ ->
180 (* Unknown tag - treat as string *)
181 `String value
182 | None ->
183 (* Implicit type resolution for plain scalars *)
184 if style <> `Plain then `String value else infer_scalar_type value
185
186(** Infer type from plain scalar value *)
187and infer_scalar_type value =
188 match String.lowercase_ascii value with
189 (* Null *)
190 | "" | "null" | "~" -> `Null
191 (* Boolean true *)
192 | "true" | "yes" | "on" -> `Bool true
193 (* Boolean false *)
194 | "false" | "no" | "off" -> `Bool false
195 (* Special floats *)
196 | ".inf" | "+.inf" -> `Float Float.infinity
197 | "-.inf" -> `Float Float.neg_infinity
198 | ".nan" -> `Float Float.nan
199 (* Try numeric *)
200 | _ -> try_parse_number value
201
202(** Try to parse as number *)
203and try_parse_number value =
204 (* Check if value looks like a valid YAML number (not inf/nan without dot)
205 This guards against OCaml's Float.of_string accepting "inf", "nan", etc.
206 See: https://github.com/avsm/ocaml-yaml/issues/82 *)
207 let looks_like_number () =
208 let len = String.length value in
209 if len = 0 then false
210 else
211 let first = value.[0] in
212 if first >= '0' && first <= '9' then true
213 else if (first = '-' || first = '+') && len >= 2 then
214 let second = value.[1] in
215 (* After sign, must be digit or dot-digit (for +.5, -.5) *)
216 (second >= '0' && second <= '9')
217 || (second = '.' && len >= 3 && value.[2] >= '0' && value.[2] <= '9')
218 else false
219 in
220 (* Try integer/float *)
221 let try_numeric () =
222 if looks_like_number () then
223 try
224 (* Handle octal: 0o prefix or leading 0 *)
225 if String.length value > 2 && value.[0] = '0' then
226 match value.[1] with
227 | 'x' | 'X' ->
228 (* Hex *)
229 Some (`Float (Float.of_int (int_of_string value)))
230 | 'o' | 'O' ->
231 (* Octal *)
232 Some (`Float (Float.of_int (int_of_string value)))
233 | 'b' | 'B' ->
234 (* Binary *)
235 Some (`Float (Float.of_int (int_of_string value)))
236 | _ ->
237 (* Decimal with leading zero or octal in YAML 1.1 *)
238 Some (`Float (Float.of_string value))
239 else Some (`Float (Float.of_string value))
240 with _ -> None
241 else None
242 in
243 match try_numeric () with
244 | Some v -> v
245 | None ->
246 (* Try float starting with dot (e.g., .5 for 0.5)
247 Note: We must NOT use Float.of_string as a general fallback because
248 OCaml accepts "nan", "inf", "infinity" which are NOT valid YAML floats.
249 YAML requires the leading dot: .nan, .inf, -.inf
250 See: https://github.com/avsm/ocaml-yaml/issues/82 *)
251 if
252 String.length value >= 2
253 && value.[0] = '.'
254 && value.[1] >= '0'
255 && value.[1] <= '9'
256 then try `Float (Float.of_string value) with _ -> `String value
257 else `String value
258
259(** Convert to JSON-compatible Value.
260
261 Converts a full YAML representation to a simplified JSON-compatible value
262 type. This process implements the representation graph to serialization tree
263 conversion described in
264 {{:https://yaml.org/spec/1.2.2/#32-processes}Section 3.2 (Processes)} of the
265 YAML 1.2.2 specification.
266
267 See also
268 {{:https://yaml.org/spec/1.2.2/#10212-json-schema}Section 10.2.1.2 (JSON
269 Schema)} for the tag resolution used during conversion.
270
271 @param resolve_aliases_first
272 Whether to resolve aliases before conversion (default true)
273 @param max_nodes Maximum nodes during alias expansion (default 10M)
274 @param max_depth Maximum alias nesting depth (default 100)
275 @raise Error.Yamlrw_error
276 with {!type:Error.kind} [Unresolved_alias] if resolve_aliases_first is
277 false and an alias is encountered *)
278let to_value ?(resolve_aliases_first = true)
279 ?(max_nodes = default_max_alias_nodes)
280 ?(max_depth = default_max_alias_depth) (v : t) : Value.t =
281 let v =
282 if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v
283 in
284 let rec convert (v : t) : Value.t =
285 match v with
286 | `Scalar s -> scalar_to_value s
287 | `Alias name -> Error.raise (Unresolved_alias name)
288 | `A seq -> `A (List.map convert (Sequence.members seq))
289 | `O map ->
290 `O
291 (List.map
292 (fun (k, v) ->
293 let key =
294 match k with
295 | `Scalar s -> Scalar.value s
296 | _ ->
297 Error.raise (Type_mismatch ("string key", "complex key"))
298 in
299 (key, convert v))
300 (Mapping.members map))
301 in
302 convert v
303
304(** Get anchor from any node *)
305let anchor (v : t) =
306 match v with
307 | `Scalar s -> Scalar.anchor s
308 | `Alias _ -> None
309 | `A seq -> Sequence.anchor seq
310 | `O map -> Mapping.anchor map
311
312(** Get tag from any node *)
313let tag (v : t) =
314 match v with
315 | `Scalar s -> Scalar.tag s
316 | `Alias _ -> None
317 | `A seq -> Sequence.tag seq
318 | `O map -> Mapping.tag map