Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** Full YAML representation with anchors, tags, and aliases *)
7
8type t = [
9 | `Scalar of Scalar.t
10 | `Alias of string
11 | `A of t Sequence.t
12 | `O of (t, t) Mapping.t
13]
14
15(** Pretty printing *)
16
17let rec pp fmt (v : t) =
18 match v with
19 | `Scalar s -> Scalar.pp fmt s
20 | `Alias name -> Format.fprintf fmt "*%s" name
21 | `A seq -> Sequence.pp pp fmt seq
22 | `O map -> Mapping.pp pp pp fmt map
23
24(** Equality *)
25
26let rec equal (a : t) (b : t) =
27 match a, b with
28 | `Scalar a, `Scalar b -> Scalar.equal a b
29 | `Alias a, `Alias b -> String.equal a b
30 | `A a, `A b -> Sequence.equal equal a b
31 | `O a, `O b -> Mapping.equal equal equal a b
32 | _ -> false
33
34(** Construct from JSON-compatible Value *)
35
36let rec of_value (v : Value.t) : t =
37 match v with
38 | `Null -> `Scalar (Scalar.make "null")
39 | `Bool true -> `Scalar (Scalar.make "true")
40 | `Bool false -> `Scalar (Scalar.make "false")
41 | `Float f ->
42 let s =
43 if Float.is_integer f && Float.abs f < 1e15 then
44 Printf.sprintf "%.0f" f
45 else
46 Printf.sprintf "%g" f
47 in
48 `Scalar (Scalar.make s)
49 | `String s ->
50 `Scalar (Scalar.make s ~style:`Double_quoted)
51 | `A items ->
52 `A (Sequence.make (List.map of_value items))
53 | `O pairs ->
54 `O (Mapping.make (List.map (fun (k, v) ->
55 (`Scalar (Scalar.make k), of_value v)
56 ) pairs))
57
58(** Default limits for alias expansion (protection against billion laughs attack) *)
59let default_max_alias_nodes = 10_000_000
60let default_max_alias_depth = 100
61
62(** Resolve aliases by replacing them with referenced nodes.
63
64 Processes the tree in document order so that aliases resolve to the
65 anchor value that was defined at the point the alias was encountered.
66
67 See {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2
68 (Anchors and Aliases)} of the YAML 1.2.2 specification for details on
69 how anchors and aliases work in YAML.
70
71 This implements protection against the "billion laughs attack"
72 (see {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)})
73 by limiting both the total number of nodes and the nesting depth during expansion.
74
75 @param max_nodes Maximum number of nodes to create during expansion (default 10M)
76 @param max_depth Maximum depth of alias-within-alias resolution (default 100)
77 @raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_node_limit] if max_nodes is exceeded
78 @raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_depth_limit] if max_depth is exceeded
79*)
80let resolve_aliases ?(max_nodes = default_max_alias_nodes) ?(max_depth = default_max_alias_depth) (root : t) : t =
81 let anchors = Hashtbl.create 16 in
82 let node_count = ref 0 in
83
84 (* Check node limit *)
85 let check_node_limit () =
86 incr node_count;
87 if !node_count > max_nodes then
88 Error.raise (Alias_expansion_node_limit max_nodes)
89 in
90
91 (* Register anchor if present on a node *)
92 let register_anchor name resolved_node =
93 Hashtbl.replace anchors name resolved_node
94 in
95
96 (* Resolve an alias by looking up and expanding the target *)
97 let rec expand_alias ~depth name =
98 if depth >= max_depth then
99 Error.raise (Alias_expansion_depth_limit max_depth);
100 match Hashtbl.find_opt anchors name with
101 | Some target ->
102 (* The target is already resolved, but may contain aliases that
103 need expansion if it was registered before those anchors existed *)
104 resolve ~depth:(depth + 1) target
105 | None -> Error.raise (Undefined_alias name)
106
107 (* Single pass: process in document order, registering anchors and resolving aliases *)
108 and resolve ~depth (v : t) : t =
109 check_node_limit ();
110 match v with
111 | `Scalar s ->
112 (* Register anchor after we have the resolved node *)
113 (match Scalar.anchor s with
114 | Some name -> register_anchor name v
115 | None -> ());
116 v
117 | `Alias name ->
118 expand_alias ~depth name
119 | `A seq ->
120 (* First resolve all members in order *)
121 let resolved_members = List.map (resolve ~depth) (Sequence.members seq) in
122 let resolved = `A (Sequence.make
123 ?anchor:(Sequence.anchor seq)
124 ?tag:(Sequence.tag seq)
125 ~implicit:(Sequence.implicit seq)
126 ~style:(Sequence.style seq)
127 resolved_members) in
128 (* Register anchor with resolved node *)
129 (match Sequence.anchor seq with
130 | Some name -> register_anchor name resolved
131 | None -> ());
132 resolved
133 | `O map ->
134 (* Process key-value pairs in document order *)
135 let resolved_pairs = List.map (fun (k, v) ->
136 let resolved_k = resolve ~depth k in
137 let resolved_v = resolve ~depth v in
138 (resolved_k, resolved_v)
139 ) (Mapping.members map) in
140 let resolved = `O (Mapping.make
141 ?anchor:(Mapping.anchor map)
142 ?tag:(Mapping.tag map)
143 ~implicit:(Mapping.implicit map)
144 ~style:(Mapping.style map)
145 resolved_pairs) in
146 (* Register anchor with resolved node *)
147 (match Mapping.anchor map with
148 | Some name -> register_anchor name resolved
149 | None -> ());
150 resolved
151 in
152 resolve ~depth:0 root
153
154(** Convert scalar to JSON value based on content *)
155let rec scalar_to_value s =
156 let value = Scalar.value s in
157 let tag = Scalar.tag s in
158 let style = Scalar.style s in
159
160 (* If explicitly tagged, respect the tag *)
161 match tag with
162 | Some "tag:yaml.org,2002:null" | Some "!!null" ->
163 `Null
164 | Some "tag:yaml.org,2002:bool" | Some "!!bool" ->
165 (match String.lowercase_ascii value with
166 | "true" | "yes" | "on" -> `Bool true
167 | "false" | "no" | "off" -> `Bool false
168 | _ -> Error.raise (Invalid_scalar_conversion (value, "bool")))
169 | Some "tag:yaml.org,2002:int" | Some "!!int" ->
170 (try `Float (Float.of_string value)
171 with _ -> Error.raise (Invalid_scalar_conversion (value, "int")))
172 | Some "tag:yaml.org,2002:float" | Some "!!float" ->
173 (try `Float (Float.of_string value)
174 with _ -> Error.raise (Invalid_scalar_conversion (value, "float")))
175 | Some "tag:yaml.org,2002:str" | Some "!!str" ->
176 `String value
177 | Some _ ->
178 (* Unknown tag - treat as string *)
179 `String value
180 | None ->
181 (* Implicit type resolution for plain scalars *)
182 if style <> `Plain then
183 `String value
184 else
185 infer_scalar_type value
186
187(** Infer type from plain scalar value *)
188and infer_scalar_type value =
189 match String.lowercase_ascii value with
190 (* Null *)
191 | "" | "null" | "~" -> `Null
192 (* Boolean true *)
193 | "true" | "yes" | "on" -> `Bool true
194 (* Boolean false *)
195 | "false" | "no" | "off" -> `Bool false
196 (* Special floats *)
197 | ".inf" | "+.inf" -> `Float Float.infinity
198 | "-.inf" -> `Float Float.neg_infinity
199 | ".nan" -> `Float Float.nan
200 (* Try numeric *)
201 | _ -> try_parse_number value
202
203(** Try to parse as number *)
204and try_parse_number value =
205 (* Check if value looks like a valid YAML number (not inf/nan without dot)
206 This guards against OCaml's Float.of_string accepting "inf", "nan", etc.
207 See: https://github.com/avsm/ocaml-yaml/issues/82 *)
208 let looks_like_number () =
209 let len = String.length value in
210 if len = 0 then false
211 else
212 let first = value.[0] in
213 if first >= '0' && first <= '9' then true
214 else if (first = '-' || first = '+') && len >= 2 then
215 let second = value.[1] in
216 (* After sign, must be digit or dot-digit (for +.5, -.5) *)
217 second >= '0' && second <= '9' ||
218 (second = '.' && len >= 3 && value.[2] >= '0' && value.[2] <= '9')
219 else false
220 in
221 (* Try integer/float *)
222 let try_numeric () =
223 if looks_like_number () then
224 try
225 (* Handle octal: 0o prefix or leading 0 *)
226 if String.length value > 2 && value.[0] = '0' then
227 match value.[1] with
228 | 'x' | 'X' ->
229 (* Hex *)
230 Some (`Float (Float.of_int (int_of_string value)))
231 | 'o' | 'O' ->
232 (* Octal *)
233 Some (`Float (Float.of_int (int_of_string value)))
234 | 'b' | 'B' ->
235 (* Binary *)
236 Some (`Float (Float.of_int (int_of_string value)))
237 | _ ->
238 (* Decimal with leading zero or octal in YAML 1.1 *)
239 Some (`Float (Float.of_string value))
240 else
241 Some (`Float (Float.of_string value))
242 with _ -> None
243 else None
244 in
245 match try_numeric () with
246 | Some v -> v
247 | None ->
248 (* Try float starting with dot (e.g., .5 for 0.5)
249 Note: We must NOT use Float.of_string as a general fallback because
250 OCaml accepts "nan", "inf", "infinity" which are NOT valid YAML floats.
251 YAML requires the leading dot: .nan, .inf, -.inf
252 See: https://github.com/avsm/ocaml-yaml/issues/82 *)
253 if String.length value >= 2 && value.[0] = '.' &&
254 value.[1] >= '0' && value.[1] <= '9' then
255 try `Float (Float.of_string value)
256 with _ -> `String value
257 else
258 `String value
259
260(** Convert to JSON-compatible Value.
261
262 Converts a full YAML representation to a simplified JSON-compatible value type.
263 This process implements the representation graph to serialization tree conversion
264 described in {{:https://yaml.org/spec/1.2.2/#32-processes}Section 3.2 (Processes)}
265 of the YAML 1.2.2 specification.
266
267 See also {{:https://yaml.org/spec/1.2.2/#10212-json-schema}Section 10.2.1.2
268 (JSON Schema)} for the tag resolution used during conversion.
269
270 @param resolve_aliases_first Whether to resolve aliases before conversion (default true)
271 @param max_nodes Maximum nodes during alias expansion (default 10M)
272 @param max_depth Maximum alias nesting depth (default 100)
273 @raise Error.Yamlrw_error with {!type:Error.kind} [Unresolved_alias] if resolve_aliases_first is false and an alias is encountered
274*)
275let to_value
276 ?(resolve_aliases_first = true)
277 ?(max_nodes = default_max_alias_nodes)
278 ?(max_depth = default_max_alias_depth)
279 (v : t) : Value.t =
280 let v = if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v in
281 let rec convert (v : t) : Value.t =
282 match v with
283 | `Scalar s -> scalar_to_value s
284 | `Alias name -> Error.raise (Unresolved_alias name)
285 | `A seq -> `A (List.map convert (Sequence.members seq))
286 | `O map ->
287 `O (List.map (fun (k, v) ->
288 let key = match k with
289 | `Scalar s -> Scalar.value s
290 | _ -> Error.raise (Type_mismatch ("string key", "complex key"))
291 in
292 (key, convert v)
293 ) (Mapping.members map))
294 in
295 convert v
296
297(** Get anchor from any node *)
298let anchor (v : t) =
299 match v with
300 | `Scalar s -> Scalar.anchor s
301 | `Alias _ -> None
302 | `A seq -> Sequence.anchor seq
303 | `O map -> Mapping.anchor map
304
305(** Get tag from any node *)
306let tag (v : t) =
307 match v with
308 | `Scalar s -> Scalar.tag s
309 | `Alias _ -> None
310 | `A seq -> Sequence.tag seq
311 | `O map -> Mapping.tag map