Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** Full YAML representation with anchors, tags, and aliases *) 7 8type t = [ 9 | `Scalar of Scalar.t 10 | `Alias of string 11 | `A of t Sequence.t 12 | `O of (t, t) Mapping.t 13] 14 15(** Pretty printing *) 16 17let rec pp fmt (v : t) = 18 match v with 19 | `Scalar s -> Scalar.pp fmt s 20 | `Alias name -> Format.fprintf fmt "*%s" name 21 | `A seq -> Sequence.pp pp fmt seq 22 | `O map -> Mapping.pp pp pp fmt map 23 24(** Equality *) 25 26let rec equal (a : t) (b : t) = 27 match a, b with 28 | `Scalar a, `Scalar b -> Scalar.equal a b 29 | `Alias a, `Alias b -> String.equal a b 30 | `A a, `A b -> Sequence.equal equal a b 31 | `O a, `O b -> Mapping.equal equal equal a b 32 | _ -> false 33 34(** Construct from JSON-compatible Value *) 35 36let rec of_value (v : Value.t) : t = 37 match v with 38 | `Null -> `Scalar (Scalar.make "null") 39 | `Bool true -> `Scalar (Scalar.make "true") 40 | `Bool false -> `Scalar (Scalar.make "false") 41 | `Float f -> 42 let s = 43 if Float.is_integer f && Float.abs f < 1e15 then 44 Printf.sprintf "%.0f" f 45 else 46 Printf.sprintf "%g" f 47 in 48 `Scalar (Scalar.make s) 49 | `String s -> 50 `Scalar (Scalar.make s ~style:`Double_quoted) 51 | `A items -> 52 `A (Sequence.make (List.map of_value items)) 53 | `O pairs -> 54 `O (Mapping.make (List.map (fun (k, v) -> 55 (`Scalar (Scalar.make k), of_value v) 56 ) pairs)) 57 58(** Default limits for alias expansion (protection against billion laughs attack) *) 59let default_max_alias_nodes = 10_000_000 60let default_max_alias_depth = 100 61 62(** Resolve aliases by replacing them with referenced nodes. 63 64 Processes the tree in document order so that aliases resolve to the 65 anchor value that was defined at the point the alias was encountered. 66 67 See {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 68 (Anchors and Aliases)} of the YAML 1.2.2 specification for details on 69 how anchors and aliases work in YAML. 70 71 This implements protection against the "billion laughs attack" 72 (see {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}) 73 by limiting both the total number of nodes and the nesting depth during expansion. 74 75 @param max_nodes Maximum number of nodes to create during expansion (default 10M) 76 @param max_depth Maximum depth of alias-within-alias resolution (default 100) 77 @raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_node_limit] if max_nodes is exceeded 78 @raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_depth_limit] if max_depth is exceeded 79*) 80let resolve_aliases ?(max_nodes = default_max_alias_nodes) ?(max_depth = default_max_alias_depth) (root : t) : t = 81 let anchors = Hashtbl.create 16 in 82 let node_count = ref 0 in 83 84 (* Check node limit *) 85 let check_node_limit () = 86 incr node_count; 87 if !node_count > max_nodes then 88 Error.raise (Alias_expansion_node_limit max_nodes) 89 in 90 91 (* Register anchor if present on a node *) 92 let register_anchor name resolved_node = 93 Hashtbl.replace anchors name resolved_node 94 in 95 96 (* Resolve an alias by looking up and expanding the target *) 97 let rec expand_alias ~depth name = 98 if depth >= max_depth then 99 Error.raise (Alias_expansion_depth_limit max_depth); 100 match Hashtbl.find_opt anchors name with 101 | Some target -> 102 (* The target is already resolved, but may contain aliases that 103 need expansion if it was registered before those anchors existed *) 104 resolve ~depth:(depth + 1) target 105 | None -> Error.raise (Undefined_alias name) 106 107 (* Single pass: process in document order, registering anchors and resolving aliases *) 108 and resolve ~depth (v : t) : t = 109 check_node_limit (); 110 match v with 111 | `Scalar s -> 112 (* Register anchor after we have the resolved node *) 113 (match Scalar.anchor s with 114 | Some name -> register_anchor name v 115 | None -> ()); 116 v 117 | `Alias name -> 118 expand_alias ~depth name 119 | `A seq -> 120 (* First resolve all members in order *) 121 let resolved_members = List.map (resolve ~depth) (Sequence.members seq) in 122 let resolved = `A (Sequence.make 123 ?anchor:(Sequence.anchor seq) 124 ?tag:(Sequence.tag seq) 125 ~implicit:(Sequence.implicit seq) 126 ~style:(Sequence.style seq) 127 resolved_members) in 128 (* Register anchor with resolved node *) 129 (match Sequence.anchor seq with 130 | Some name -> register_anchor name resolved 131 | None -> ()); 132 resolved 133 | `O map -> 134 (* Process key-value pairs in document order *) 135 let resolved_pairs = List.map (fun (k, v) -> 136 let resolved_k = resolve ~depth k in 137 let resolved_v = resolve ~depth v in 138 (resolved_k, resolved_v) 139 ) (Mapping.members map) in 140 let resolved = `O (Mapping.make 141 ?anchor:(Mapping.anchor map) 142 ?tag:(Mapping.tag map) 143 ~implicit:(Mapping.implicit map) 144 ~style:(Mapping.style map) 145 resolved_pairs) in 146 (* Register anchor with resolved node *) 147 (match Mapping.anchor map with 148 | Some name -> register_anchor name resolved 149 | None -> ()); 150 resolved 151 in 152 resolve ~depth:0 root 153 154(** Convert scalar to JSON value based on content *) 155let rec scalar_to_value s = 156 let value = Scalar.value s in 157 let tag = Scalar.tag s in 158 let style = Scalar.style s in 159 160 (* If explicitly tagged, respect the tag *) 161 match tag with 162 | Some "tag:yaml.org,2002:null" | Some "!!null" -> 163 `Null 164 | Some "tag:yaml.org,2002:bool" | Some "!!bool" -> 165 (match String.lowercase_ascii value with 166 | "true" | "yes" | "on" -> `Bool true 167 | "false" | "no" | "off" -> `Bool false 168 | _ -> Error.raise (Invalid_scalar_conversion (value, "bool"))) 169 | Some "tag:yaml.org,2002:int" | Some "!!int" -> 170 (try `Float (Float.of_string value) 171 with _ -> Error.raise (Invalid_scalar_conversion (value, "int"))) 172 | Some "tag:yaml.org,2002:float" | Some "!!float" -> 173 (try `Float (Float.of_string value) 174 with _ -> Error.raise (Invalid_scalar_conversion (value, "float"))) 175 | Some "tag:yaml.org,2002:str" | Some "!!str" -> 176 `String value 177 | Some _ -> 178 (* Unknown tag - treat as string *) 179 `String value 180 | None -> 181 (* Implicit type resolution for plain scalars *) 182 if style <> `Plain then 183 `String value 184 else 185 infer_scalar_type value 186 187(** Infer type from plain scalar value *) 188and infer_scalar_type value = 189 match String.lowercase_ascii value with 190 (* Null *) 191 | "" | "null" | "~" -> `Null 192 (* Boolean true *) 193 | "true" | "yes" | "on" -> `Bool true 194 (* Boolean false *) 195 | "false" | "no" | "off" -> `Bool false 196 (* Special floats *) 197 | ".inf" | "+.inf" -> `Float Float.infinity 198 | "-.inf" -> `Float Float.neg_infinity 199 | ".nan" -> `Float Float.nan 200 (* Try numeric *) 201 | _ -> try_parse_number value 202 203(** Try to parse as number *) 204and try_parse_number value = 205 (* Check if value looks like a valid YAML number (not inf/nan without dot) 206 This guards against OCaml's Float.of_string accepting "inf", "nan", etc. 207 See: https://github.com/avsm/ocaml-yaml/issues/82 *) 208 let looks_like_number () = 209 let len = String.length value in 210 if len = 0 then false 211 else 212 let first = value.[0] in 213 if first >= '0' && first <= '9' then true 214 else if (first = '-' || first = '+') && len >= 2 then 215 let second = value.[1] in 216 (* After sign, must be digit or dot-digit (for +.5, -.5) *) 217 second >= '0' && second <= '9' || 218 (second = '.' && len >= 3 && value.[2] >= '0' && value.[2] <= '9') 219 else false 220 in 221 (* Try integer/float *) 222 let try_numeric () = 223 if looks_like_number () then 224 try 225 (* Handle octal: 0o prefix or leading 0 *) 226 if String.length value > 2 && value.[0] = '0' then 227 match value.[1] with 228 | 'x' | 'X' -> 229 (* Hex *) 230 Some (`Float (Float.of_int (int_of_string value))) 231 | 'o' | 'O' -> 232 (* Octal *) 233 Some (`Float (Float.of_int (int_of_string value))) 234 | 'b' | 'B' -> 235 (* Binary *) 236 Some (`Float (Float.of_int (int_of_string value))) 237 | _ -> 238 (* Decimal with leading zero or octal in YAML 1.1 *) 239 Some (`Float (Float.of_string value)) 240 else 241 Some (`Float (Float.of_string value)) 242 with _ -> None 243 else None 244 in 245 match try_numeric () with 246 | Some v -> v 247 | None -> 248 (* Try float starting with dot (e.g., .5 for 0.5) 249 Note: We must NOT use Float.of_string as a general fallback because 250 OCaml accepts "nan", "inf", "infinity" which are NOT valid YAML floats. 251 YAML requires the leading dot: .nan, .inf, -.inf 252 See: https://github.com/avsm/ocaml-yaml/issues/82 *) 253 if String.length value >= 2 && value.[0] = '.' && 254 value.[1] >= '0' && value.[1] <= '9' then 255 try `Float (Float.of_string value) 256 with _ -> `String value 257 else 258 `String value 259 260(** Convert to JSON-compatible Value. 261 262 Converts a full YAML representation to a simplified JSON-compatible value type. 263 This process implements the representation graph to serialization tree conversion 264 described in {{:https://yaml.org/spec/1.2.2/#32-processes}Section 3.2 (Processes)} 265 of the YAML 1.2.2 specification. 266 267 See also {{:https://yaml.org/spec/1.2.2/#10212-json-schema}Section 10.2.1.2 268 (JSON Schema)} for the tag resolution used during conversion. 269 270 @param resolve_aliases_first Whether to resolve aliases before conversion (default true) 271 @param max_nodes Maximum nodes during alias expansion (default 10M) 272 @param max_depth Maximum alias nesting depth (default 100) 273 @raise Error.Yamlrw_error with {!type:Error.kind} [Unresolved_alias] if resolve_aliases_first is false and an alias is encountered 274*) 275let to_value 276 ?(resolve_aliases_first = true) 277 ?(max_nodes = default_max_alias_nodes) 278 ?(max_depth = default_max_alias_depth) 279 (v : t) : Value.t = 280 let v = if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v in 281 let rec convert (v : t) : Value.t = 282 match v with 283 | `Scalar s -> scalar_to_value s 284 | `Alias name -> Error.raise (Unresolved_alias name) 285 | `A seq -> `A (List.map convert (Sequence.members seq)) 286 | `O map -> 287 `O (List.map (fun (k, v) -> 288 let key = match k with 289 | `Scalar s -> Scalar.value s 290 | _ -> Error.raise (Type_mismatch ("string key", "complex key")) 291 in 292 (key, convert v) 293 ) (Mapping.members map)) 294 in 295 convert v 296 297(** Get anchor from any node *) 298let anchor (v : t) = 299 match v with 300 | `Scalar s -> Scalar.anchor s 301 | `Alias _ -> None 302 | `A seq -> Sequence.anchor seq 303 | `O map -> Mapping.anchor map 304 305(** Get tag from any node *) 306let tag (v : t) = 307 match v with 308 | `Scalar s -> Scalar.tag s 309 | `Alias _ -> None 310 | `A seq -> Sequence.tag seq 311 | `O map -> Mapping.tag map