Pure OCaml Yaml 1.2 reader and writer using Bytesrw
at main 12 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** Full YAML representation with anchors, tags, and aliases *) 7 8type t = 9 [ `Scalar of Scalar.t 10 | `Alias of string 11 | `A of t Sequence.t 12 | `O of (t, t) Mapping.t ] 13 14(** Pretty printing *) 15 16let rec pp fmt (v : t) = 17 match v with 18 | `Scalar s -> Scalar.pp fmt s 19 | `Alias name -> Format.fprintf fmt "*%s" name 20 | `A seq -> Sequence.pp pp fmt seq 21 | `O map -> Mapping.pp pp pp fmt map 22 23(** Equality *) 24 25let rec equal (a : t) (b : t) = 26 match (a, b) with 27 | `Scalar a, `Scalar b -> Scalar.equal a b 28 | `Alias a, `Alias b -> String.equal a b 29 | `A a, `A b -> Sequence.equal equal a b 30 | `O a, `O b -> Mapping.equal equal equal a b 31 | _ -> false 32 33(** Construct from JSON-compatible Value *) 34 35let rec of_value (v : Value.t) : t = 36 match v with 37 | `Null -> `Scalar (Scalar.make "null") 38 | `Bool true -> `Scalar (Scalar.make "true") 39 | `Bool false -> `Scalar (Scalar.make "false") 40 | `Float f -> 41 let s = 42 if Float.is_integer f && Float.abs f < 1e15 then Printf.sprintf "%.0f" f 43 else Printf.sprintf "%g" f 44 in 45 `Scalar (Scalar.make s) 46 | `String s -> `Scalar (Scalar.make s ~style:`Double_quoted) 47 | `A items -> `A (Sequence.make (List.map of_value items)) 48 | `O pairs -> 49 `O 50 (Mapping.make 51 (List.map 52 (fun (k, v) -> (`Scalar (Scalar.make k), of_value v)) 53 pairs)) 54 55(** Default limits for alias expansion (protection against billion laughs 56 attack) *) 57let default_max_alias_nodes = 10_000_000 58 59let default_max_alias_depth = 100 60 61(** Resolve aliases by replacing them with referenced nodes. 62 63 Processes the tree in document order so that aliases resolve to the anchor 64 value that was defined at the point the alias was encountered. 65 66 See 67 {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 68 (Anchors and Aliases)} of the YAML 1.2.2 specification for details on how 69 anchors and aliases work in YAML. 70 71 This implements protection against the "billion laughs attack" (see 72 {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}) by 73 limiting both the total number of nodes and the nesting depth during 74 expansion. 75 76 @param max_nodes 77 Maximum number of nodes to create during expansion (default 10M) 78 @param max_depth 79 Maximum depth of alias-within-alias resolution (default 100) 80 @raise Error.Yamlrw_error 81 with {!type:Error.kind} [Alias_expansion_node_limit] if max_nodes is 82 exceeded 83 @raise Error.Yamlrw_error 84 with {!type:Error.kind} [Alias_expansion_depth_limit] if max_depth is 85 exceeded *) 86let resolve_aliases ?(max_nodes = default_max_alias_nodes) 87 ?(max_depth = default_max_alias_depth) (root : t) : t = 88 let anchors = Hashtbl.create 16 in 89 let node_count = ref 0 in 90 91 (* Check node limit *) 92 let check_node_limit () = 93 incr node_count; 94 if !node_count > max_nodes then 95 Error.raise (Alias_expansion_node_limit max_nodes) 96 in 97 98 (* Register anchor if present on a node *) 99 let register_anchor name resolved_node = 100 Hashtbl.replace anchors name resolved_node 101 in 102 103 (* Resolve an alias by looking up and expanding the target *) 104 let rec expand_alias ~depth name = 105 if depth >= max_depth then 106 Error.raise (Alias_expansion_depth_limit max_depth); 107 match Hashtbl.find_opt anchors name with 108 | Some target -> 109 (* The target is already resolved, but may contain aliases that 110 need expansion if it was registered before those anchors existed *) 111 resolve ~depth:(depth + 1) target 112 | None -> Error.raise (Undefined_alias name) 113 (* Single pass: process in document order, registering anchors and resolving aliases *) 114 and resolve ~depth (v : t) : t = 115 check_node_limit (); 116 match v with 117 | `Scalar s -> 118 (* Register anchor after we have the resolved node *) 119 Option.iter (fun name -> register_anchor name v) (Scalar.anchor s); 120 v 121 | `Alias name -> expand_alias ~depth name 122 | `A seq -> 123 (* First resolve all members in order *) 124 let resolved_members = 125 List.map (resolve ~depth) (Sequence.members seq) 126 in 127 let resolved = 128 `A 129 (Sequence.make ?anchor:(Sequence.anchor seq) ?tag:(Sequence.tag seq) 130 ~implicit:(Sequence.implicit seq) ~style:(Sequence.style seq) 131 resolved_members) 132 in 133 (* Register anchor with resolved node *) 134 Option.iter (fun name -> register_anchor name resolved) (Sequence.anchor seq); 135 resolved 136 | `O map -> 137 (* Process key-value pairs in document order *) 138 let resolved_pairs = 139 List.map 140 (fun (k, v) -> 141 let resolved_k = resolve ~depth k in 142 let resolved_v = resolve ~depth v in 143 (resolved_k, resolved_v)) 144 (Mapping.members map) 145 in 146 let resolved = 147 `O 148 (Mapping.make ?anchor:(Mapping.anchor map) ?tag:(Mapping.tag map) 149 ~implicit:(Mapping.implicit map) ~style:(Mapping.style map) 150 resolved_pairs) 151 in 152 (* Register anchor with resolved node *) 153 Option.iter (fun name -> register_anchor name resolved) (Mapping.anchor map); 154 resolved 155 in 156 resolve ~depth:0 root 157 158(** Convert scalar to JSON value based on content *) 159let rec scalar_to_value s = 160 let value = Scalar.value s in 161 let tag = Scalar.tag s in 162 let style = Scalar.style s in 163 164 (* If explicitly tagged, respect the tag *) 165 match tag with 166 | Some "tag:yaml.org,2002:null" | Some "!!null" -> `Null 167 | Some "tag:yaml.org,2002:bool" | Some "!!bool" -> ( 168 match String.lowercase_ascii value with 169 | "true" | "yes" | "on" -> `Bool true 170 | "false" | "no" | "off" -> `Bool false 171 | _ -> Error.raise (Invalid_scalar_conversion (value, "bool"))) 172 | Some "tag:yaml.org,2002:int" | Some "!!int" -> ( 173 try `Float (Float.of_string value) 174 with _ -> Error.raise (Invalid_scalar_conversion (value, "int"))) 175 | Some "tag:yaml.org,2002:float" | Some "!!float" -> ( 176 try `Float (Float.of_string value) 177 with _ -> Error.raise (Invalid_scalar_conversion (value, "float"))) 178 | Some "tag:yaml.org,2002:str" | Some "!!str" -> `String value 179 | Some _ -> 180 (* Unknown tag - treat as string *) 181 `String value 182 | None -> 183 (* Implicit type resolution for plain scalars *) 184 if style <> `Plain then `String value else infer_scalar_type value 185 186(** Infer type from plain scalar value *) 187and infer_scalar_type value = 188 match String.lowercase_ascii value with 189 (* Null *) 190 | "" | "null" | "~" -> `Null 191 (* Boolean true *) 192 | "true" | "yes" | "on" -> `Bool true 193 (* Boolean false *) 194 | "false" | "no" | "off" -> `Bool false 195 (* Special floats *) 196 | ".inf" | "+.inf" -> `Float Float.infinity 197 | "-.inf" -> `Float Float.neg_infinity 198 | ".nan" -> `Float Float.nan 199 (* Try numeric *) 200 | _ -> try_parse_number value 201 202(** Try to parse as number *) 203and try_parse_number value = 204 (* Check if value looks like a valid YAML number (not inf/nan without dot) 205 This guards against OCaml's Float.of_string accepting "inf", "nan", etc. 206 See: https://github.com/avsm/ocaml-yaml/issues/82 *) 207 let looks_like_number () = 208 let len = String.length value in 209 if len = 0 then false 210 else 211 let first = value.[0] in 212 if first >= '0' && first <= '9' then true 213 else if (first = '-' || first = '+') && len >= 2 then 214 let second = value.[1] in 215 (* After sign, must be digit or dot-digit (for +.5, -.5) *) 216 (second >= '0' && second <= '9') 217 || (second = '.' && len >= 3 && value.[2] >= '0' && value.[2] <= '9') 218 else false 219 in 220 (* Try integer/float *) 221 let try_numeric () = 222 if looks_like_number () then 223 try 224 (* Handle octal: 0o prefix or leading 0 *) 225 if String.length value > 2 && value.[0] = '0' then 226 match value.[1] with 227 | 'x' | 'X' -> 228 (* Hex *) 229 Some (`Float (Float.of_int (int_of_string value))) 230 | 'o' | 'O' -> 231 (* Octal *) 232 Some (`Float (Float.of_int (int_of_string value))) 233 | 'b' | 'B' -> 234 (* Binary *) 235 Some (`Float (Float.of_int (int_of_string value))) 236 | _ -> 237 (* Decimal with leading zero or octal in YAML 1.1 *) 238 Some (`Float (Float.of_string value)) 239 else Some (`Float (Float.of_string value)) 240 with _ -> None 241 else None 242 in 243 match try_numeric () with 244 | Some v -> v 245 | None -> 246 (* Try float starting with dot (e.g., .5 for 0.5) 247 Note: We must NOT use Float.of_string as a general fallback because 248 OCaml accepts "nan", "inf", "infinity" which are NOT valid YAML floats. 249 YAML requires the leading dot: .nan, .inf, -.inf 250 See: https://github.com/avsm/ocaml-yaml/issues/82 *) 251 if 252 String.length value >= 2 253 && value.[0] = '.' 254 && value.[1] >= '0' 255 && value.[1] <= '9' 256 then try `Float (Float.of_string value) with _ -> `String value 257 else `String value 258 259(** Convert to JSON-compatible Value. 260 261 Converts a full YAML representation to a simplified JSON-compatible value 262 type. This process implements the representation graph to serialization tree 263 conversion described in 264 {{:https://yaml.org/spec/1.2.2/#32-processes}Section 3.2 (Processes)} of the 265 YAML 1.2.2 specification. 266 267 See also 268 {{:https://yaml.org/spec/1.2.2/#10212-json-schema}Section 10.2.1.2 (JSON 269 Schema)} for the tag resolution used during conversion. 270 271 @param resolve_aliases_first 272 Whether to resolve aliases before conversion (default true) 273 @param max_nodes Maximum nodes during alias expansion (default 10M) 274 @param max_depth Maximum alias nesting depth (default 100) 275 @raise Error.Yamlrw_error 276 with {!type:Error.kind} [Unresolved_alias] if resolve_aliases_first is 277 false and an alias is encountered *) 278let to_value ?(resolve_aliases_first = true) 279 ?(max_nodes = default_max_alias_nodes) 280 ?(max_depth = default_max_alias_depth) (v : t) : Value.t = 281 let v = 282 if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v 283 in 284 let rec convert (v : t) : Value.t = 285 match v with 286 | `Scalar s -> scalar_to_value s 287 | `Alias name -> Error.raise (Unresolved_alias name) 288 | `A seq -> `A (List.map convert (Sequence.members seq)) 289 | `O map -> 290 `O 291 (List.map 292 (fun (k, v) -> 293 let key = 294 match k with 295 | `Scalar s -> Scalar.value s 296 | _ -> 297 Error.raise (Type_mismatch ("string key", "complex key")) 298 in 299 (key, convert v)) 300 (Mapping.members map)) 301 in 302 convert v 303 304(** Get anchor from any node *) 305let anchor (v : t) = 306 match v with 307 | `Scalar s -> Scalar.anchor s 308 | `Alias _ -> None 309 | `A seq -> Sequence.anchor seq 310 | `O map -> Mapping.anchor map 311 312(** Get tag from any node *) 313let tag (v : t) = 314 match v with 315 | `Scalar s -> Scalar.tag s 316 | `Alias _ -> None 317 | `A seq -> Sequence.tag seq 318 | `O map -> Mapping.tag map