Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** Full YAML representation with anchors, tags, and aliases *)
7
8type t = [
9 | `Scalar of Scalar.t
10 | `Alias of string
11 | `A of t Sequence.t
12 | `O of (t, t) Mapping.t
13]
14
15(** Pretty printing *)
16
17let rec pp fmt (v : t) =
18 match v with
19 | `Scalar s -> Scalar.pp fmt s
20 | `Alias name -> Format.fprintf fmt "*%s" name
21 | `A seq -> Sequence.pp pp fmt seq
22 | `O map -> Mapping.pp pp pp fmt map
23
24(** Equality *)
25
26let rec equal (a : t) (b : t) =
27 match a, b with
28 | `Scalar a, `Scalar b -> Scalar.equal a b
29 | `Alias a, `Alias b -> String.equal a b
30 | `A a, `A b -> Sequence.equal equal a b
31 | `O a, `O b -> Mapping.equal equal equal a b
32 | _ -> false
33
34(** Construct from JSON-compatible Value *)
35
36let rec of_value (v : Value.t) : t =
37 match v with
38 | `Null -> `Scalar (Scalar.make "null")
39 | `Bool true -> `Scalar (Scalar.make "true")
40 | `Bool false -> `Scalar (Scalar.make "false")
41 | `Float f ->
42 let s =
43 if Float.is_integer f && Float.abs f < 1e15 then
44 Printf.sprintf "%.0f" f
45 else
46 Printf.sprintf "%g" f
47 in
48 `Scalar (Scalar.make s)
49 | `String s ->
50 `Scalar (Scalar.make s ~style:`Double_quoted)
51 | `A items ->
52 `A (Sequence.make (List.map of_value items))
53 | `O pairs ->
54 `O (Mapping.make (List.map (fun (k, v) ->
55 (`Scalar (Scalar.make k), of_value v)
56 ) pairs))
57
58(** Default limits for alias expansion (protection against billion laughs attack) *)
59let default_max_alias_nodes = 10_000_000
60let default_max_alias_depth = 100
61
62(** Resolve aliases by replacing them with referenced nodes.
63
64 Processes the tree in document order so that aliases resolve to the
65 anchor value that was defined at the point the alias was encountered.
66
67 See {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2
68 (Anchors and Aliases)} of the YAML 1.2.2 specification for details on
69 how anchors and aliases work in YAML.
70
71 This implements protection against the "billion laughs attack"
72 (see {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)})
73 by limiting both the total number of nodes and the nesting depth during expansion.
74
75 @param max_nodes Maximum number of nodes to create during expansion (default 10M)
76 @param max_depth Maximum depth of alias-within-alias resolution (default 100)
77 @raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_node_limit] if max_nodes is exceeded
78 @raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_depth_limit] if max_depth is exceeded
79*)
80let resolve_aliases ?(max_nodes = default_max_alias_nodes) ?(max_depth = default_max_alias_depth) (root : t) : t =
81 let anchors = Hashtbl.create 16 in
82 let node_count = ref 0 in
83
84 (* Check node limit *)
85 let check_node_limit () =
86 incr node_count;
87 if !node_count > max_nodes then
88 Error.raise (Alias_expansion_node_limit max_nodes)
89 in
90
91 (* Register anchor if present on a node *)
92 let register_anchor name resolved_node =
93 Hashtbl.replace anchors name resolved_node
94 in
95
96 (* Resolve an alias by looking up and expanding the target *)
97 let rec expand_alias ~depth name =
98 if depth >= max_depth then
99 Error.raise (Alias_expansion_depth_limit max_depth);
100 match Hashtbl.find_opt anchors name with
101 | Some target ->
102 (* The target is already resolved, but may contain aliases that
103 need expansion if it was registered before those anchors existed *)
104 resolve ~depth:(depth + 1) target
105 | None -> Error.raise (Undefined_alias name)
106
107 (* Single pass: process in document order, registering anchors and resolving aliases *)
108 and resolve ~depth (v : t) : t =
109 check_node_limit ();
110 match v with
111 | `Scalar s ->
112 (* Register anchor after we have the resolved node *)
113 Option.iter (fun name -> register_anchor name v) (Scalar.anchor s);
114 v
115 | `Alias name ->
116 expand_alias ~depth name
117 | `A seq ->
118 (* First resolve all members in order *)
119 let resolved_members = List.map (resolve ~depth) (Sequence.members seq) in
120 let resolved = `A (Sequence.make
121 ?anchor:(Sequence.anchor seq)
122 ?tag:(Sequence.tag seq)
123 ~implicit:(Sequence.implicit seq)
124 ~style:(Sequence.style seq)
125 resolved_members) in
126 (* Register anchor with resolved node *)
127 Option.iter (fun name -> register_anchor name resolved) (Sequence.anchor seq);
128 resolved
129 | `O map ->
130 (* Process key-value pairs in document order *)
131 let resolved_pairs = List.map (fun (k, v) ->
132 let resolved_k = resolve ~depth k in
133 let resolved_v = resolve ~depth v in
134 (resolved_k, resolved_v)
135 ) (Mapping.members map) in
136 let resolved = `O (Mapping.make
137 ?anchor:(Mapping.anchor map)
138 ?tag:(Mapping.tag map)
139 ~implicit:(Mapping.implicit map)
140 ~style:(Mapping.style map)
141 resolved_pairs) in
142 (* Register anchor with resolved node *)
143 Option.iter (fun name -> register_anchor name resolved) (Mapping.anchor map);
144 resolved
145 in
146 resolve ~depth:0 root
147
148(** Convert scalar to JSON value based on content *)
149let rec scalar_to_value s =
150 let value = Scalar.value s in
151 let tag = Scalar.tag s in
152 let style = Scalar.style s in
153
154 (* If explicitly tagged, respect the tag *)
155 match tag with
156 | Some "tag:yaml.org,2002:null" | Some "!!null" ->
157 `Null
158 | Some "tag:yaml.org,2002:bool" | Some "!!bool" ->
159 (match String.lowercase_ascii value with
160 | "true" | "yes" | "on" -> `Bool true
161 | "false" | "no" | "off" -> `Bool false
162 | _ -> Error.raise (Invalid_scalar_conversion (value, "bool")))
163 | Some "tag:yaml.org,2002:int" | Some "!!int" ->
164 (try `Float (Float.of_string value)
165 with _ -> Error.raise (Invalid_scalar_conversion (value, "int")))
166 | Some "tag:yaml.org,2002:float" | Some "!!float" ->
167 (try `Float (Float.of_string value)
168 with _ -> Error.raise (Invalid_scalar_conversion (value, "float")))
169 | Some "tag:yaml.org,2002:str" | Some "!!str" ->
170 `String value
171 | Some _ ->
172 (* Unknown tag - treat as string *)
173 `String value
174 | None ->
175 (* Implicit type resolution for plain scalars *)
176 if style <> `Plain then
177 `String value
178 else
179 infer_scalar_type value
180
181(** Infer type from plain scalar value *)
182and infer_scalar_type value =
183 match String.lowercase_ascii value with
184 (* Null *)
185 | "" | "null" | "~" -> `Null
186 (* Boolean true *)
187 | "true" | "yes" | "on" -> `Bool true
188 (* Boolean false *)
189 | "false" | "no" | "off" -> `Bool false
190 (* Special floats *)
191 | ".inf" | "+.inf" -> `Float Float.infinity
192 | "-.inf" -> `Float Float.neg_infinity
193 | ".nan" -> `Float Float.nan
194 (* Try numeric *)
195 | _ -> try_parse_number value
196
197(** Try to parse as number *)
198and try_parse_number value =
199 (* Check if value looks like a valid YAML number (not inf/nan without dot)
200 This guards against OCaml's Float.of_string accepting "inf", "nan", etc.
201 See: https://github.com/avsm/ocaml-yaml/issues/82 *)
202 let looks_like_number () =
203 let len = String.length value in
204 if len = 0 then false
205 else
206 let first = value.[0] in
207 if first >= '0' && first <= '9' then true
208 else if (first = '-' || first = '+') && len >= 2 then
209 let second = value.[1] in
210 (* After sign, must be digit or dot-digit (for +.5, -.5) *)
211 second >= '0' && second <= '9' ||
212 (second = '.' && len >= 3 && value.[2] >= '0' && value.[2] <= '9')
213 else false
214 in
215 (* Try integer/float *)
216 let try_numeric () =
217 if looks_like_number () then
218 try
219 (* Handle octal: 0o prefix or leading 0 *)
220 if String.length value > 2 && value.[0] = '0' then
221 match value.[1] with
222 | 'x' | 'X' ->
223 (* Hex *)
224 Some (`Float (Float.of_int (int_of_string value)))
225 | 'o' | 'O' ->
226 (* Octal *)
227 Some (`Float (Float.of_int (int_of_string value)))
228 | 'b' | 'B' ->
229 (* Binary *)
230 Some (`Float (Float.of_int (int_of_string value)))
231 | _ ->
232 (* Decimal with leading zero or octal in YAML 1.1 *)
233 Some (`Float (Float.of_string value))
234 else
235 Some (`Float (Float.of_string value))
236 with _ -> None
237 else None
238 in
239 match try_numeric () with
240 | Some v -> v
241 | None ->
242 (* Try float starting with dot (e.g., .5 for 0.5)
243 Note: We must NOT use Float.of_string as a general fallback because
244 OCaml accepts "nan", "inf", "infinity" which are NOT valid YAML floats.
245 YAML requires the leading dot: .nan, .inf, -.inf
246 See: https://github.com/avsm/ocaml-yaml/issues/82 *)
247 if String.length value >= 2 && value.[0] = '.' &&
248 value.[1] >= '0' && value.[1] <= '9' then
249 try `Float (Float.of_string value)
250 with _ -> `String value
251 else
252 `String value
253
254(** Convert to JSON-compatible Value.
255
256 Converts a full YAML representation to a simplified JSON-compatible value type.
257 This process implements the representation graph to serialization tree conversion
258 described in {{:https://yaml.org/spec/1.2.2/#32-processes}Section 3.2 (Processes)}
259 of the YAML 1.2.2 specification.
260
261 See also {{:https://yaml.org/spec/1.2.2/#10212-json-schema}Section 10.2.1.2
262 (JSON Schema)} for the tag resolution used during conversion.
263
264 @param resolve_aliases_first Whether to resolve aliases before conversion (default true)
265 @param max_nodes Maximum nodes during alias expansion (default 10M)
266 @param max_depth Maximum alias nesting depth (default 100)
267 @raise Error.Yamlrw_error with {!type:Error.kind} [Unresolved_alias] if resolve_aliases_first is false and an alias is encountered
268*)
269let to_value
270 ?(resolve_aliases_first = true)
271 ?(max_nodes = default_max_alias_nodes)
272 ?(max_depth = default_max_alias_depth)
273 (v : t) : Value.t =
274 let v = if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v in
275 let rec convert (v : t) : Value.t =
276 match v with
277 | `Scalar s -> scalar_to_value s
278 | `Alias name -> Error.raise (Unresolved_alias name)
279 | `A seq -> `A (List.map convert (Sequence.members seq))
280 | `O map ->
281 `O (List.map (fun (k, v) ->
282 let key = match k with
283 | `Scalar s -> Scalar.value s
284 | _ -> Error.raise (Type_mismatch ("string key", "complex key"))
285 in
286 (key, convert v)
287 ) (Mapping.members map))
288 in
289 convert v
290
291(** Get anchor from any node *)
292let anchor (v : t) =
293 match v with
294 | `Scalar s -> Scalar.anchor s
295 | `Alias _ -> None
296 | `A seq -> Sequence.anchor seq
297 | `O map -> Mapping.anchor map
298
299(** Get tag from any node *)
300let tag (v : t) =
301 match v with
302 | `Scalar s -> Scalar.tag s
303 | `Alias _ -> None
304 | `A seq -> Sequence.tag seq
305 | `O map -> Mapping.tag map