Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** Full YAML representation with anchors, tags, and aliases *) 7 8type t = [ 9 | `Scalar of Scalar.t 10 | `Alias of string 11 | `A of t Sequence.t 12 | `O of (t, t) Mapping.t 13] 14 15(** Pretty printing *) 16 17let rec pp fmt (v : t) = 18 match v with 19 | `Scalar s -> Scalar.pp fmt s 20 | `Alias name -> Format.fprintf fmt "*%s" name 21 | `A seq -> Sequence.pp pp fmt seq 22 | `O map -> Mapping.pp pp pp fmt map 23 24(** Equality *) 25 26let rec equal (a : t) (b : t) = 27 match a, b with 28 | `Scalar a, `Scalar b -> Scalar.equal a b 29 | `Alias a, `Alias b -> String.equal a b 30 | `A a, `A b -> Sequence.equal equal a b 31 | `O a, `O b -> Mapping.equal equal equal a b 32 | _ -> false 33 34(** Construct from JSON-compatible Value *) 35 36let rec of_value (v : Value.t) : t = 37 match v with 38 | `Null -> `Scalar (Scalar.make "null") 39 | `Bool true -> `Scalar (Scalar.make "true") 40 | `Bool false -> `Scalar (Scalar.make "false") 41 | `Float f -> 42 let s = 43 if Float.is_integer f && Float.abs f < 1e15 then 44 Printf.sprintf "%.0f" f 45 else 46 Printf.sprintf "%g" f 47 in 48 `Scalar (Scalar.make s) 49 | `String s -> 50 `Scalar (Scalar.make s ~style:`Double_quoted) 51 | `A items -> 52 `A (Sequence.make (List.map of_value items)) 53 | `O pairs -> 54 `O (Mapping.make (List.map (fun (k, v) -> 55 (`Scalar (Scalar.make k), of_value v) 56 ) pairs)) 57 58(** Default limits for alias expansion (protection against billion laughs attack) *) 59let default_max_alias_nodes = 10_000_000 60let default_max_alias_depth = 100 61 62(** Resolve aliases by replacing them with referenced nodes. 63 64 Processes the tree in document order so that aliases resolve to the 65 anchor value that was defined at the point the alias was encountered. 66 67 See {{:https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases}Section 3.2.2.2 68 (Anchors and Aliases)} of the YAML 1.2.2 specification for details on 69 how anchors and aliases work in YAML. 70 71 This implements protection against the "billion laughs attack" 72 (see {{:https://yaml.org/spec/1.2.2/#321-processes}Section 3.2.1 (Processes)}) 73 by limiting both the total number of nodes and the nesting depth during expansion. 74 75 @param max_nodes Maximum number of nodes to create during expansion (default 10M) 76 @param max_depth Maximum depth of alias-within-alias resolution (default 100) 77 @raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_node_limit] if max_nodes is exceeded 78 @raise Error.Yamlrw_error with {!type:Error.kind} [Alias_expansion_depth_limit] if max_depth is exceeded 79*) 80let resolve_aliases ?(max_nodes = default_max_alias_nodes) ?(max_depth = default_max_alias_depth) (root : t) : t = 81 let anchors = Hashtbl.create 16 in 82 let node_count = ref 0 in 83 84 (* Check node limit *) 85 let check_node_limit () = 86 incr node_count; 87 if !node_count > max_nodes then 88 Error.raise (Alias_expansion_node_limit max_nodes) 89 in 90 91 (* Register anchor if present on a node *) 92 let register_anchor name resolved_node = 93 Hashtbl.replace anchors name resolved_node 94 in 95 96 (* Resolve an alias by looking up and expanding the target *) 97 let rec expand_alias ~depth name = 98 if depth >= max_depth then 99 Error.raise (Alias_expansion_depth_limit max_depth); 100 match Hashtbl.find_opt anchors name with 101 | Some target -> 102 (* The target is already resolved, but may contain aliases that 103 need expansion if it was registered before those anchors existed *) 104 resolve ~depth:(depth + 1) target 105 | None -> Error.raise (Undefined_alias name) 106 107 (* Single pass: process in document order, registering anchors and resolving aliases *) 108 and resolve ~depth (v : t) : t = 109 check_node_limit (); 110 match v with 111 | `Scalar s -> 112 (* Register anchor after we have the resolved node *) 113 Option.iter (fun name -> register_anchor name v) (Scalar.anchor s); 114 v 115 | `Alias name -> 116 expand_alias ~depth name 117 | `A seq -> 118 (* First resolve all members in order *) 119 let resolved_members = List.map (resolve ~depth) (Sequence.members seq) in 120 let resolved = `A (Sequence.make 121 ?anchor:(Sequence.anchor seq) 122 ?tag:(Sequence.tag seq) 123 ~implicit:(Sequence.implicit seq) 124 ~style:(Sequence.style seq) 125 resolved_members) in 126 (* Register anchor with resolved node *) 127 Option.iter (fun name -> register_anchor name resolved) (Sequence.anchor seq); 128 resolved 129 | `O map -> 130 (* Process key-value pairs in document order *) 131 let resolved_pairs = List.map (fun (k, v) -> 132 let resolved_k = resolve ~depth k in 133 let resolved_v = resolve ~depth v in 134 (resolved_k, resolved_v) 135 ) (Mapping.members map) in 136 let resolved = `O (Mapping.make 137 ?anchor:(Mapping.anchor map) 138 ?tag:(Mapping.tag map) 139 ~implicit:(Mapping.implicit map) 140 ~style:(Mapping.style map) 141 resolved_pairs) in 142 (* Register anchor with resolved node *) 143 Option.iter (fun name -> register_anchor name resolved) (Mapping.anchor map); 144 resolved 145 in 146 resolve ~depth:0 root 147 148(** Convert scalar to JSON value based on content *) 149let rec scalar_to_value s = 150 let value = Scalar.value s in 151 let tag = Scalar.tag s in 152 let style = Scalar.style s in 153 154 (* If explicitly tagged, respect the tag *) 155 match tag with 156 | Some "tag:yaml.org,2002:null" | Some "!!null" -> 157 `Null 158 | Some "tag:yaml.org,2002:bool" | Some "!!bool" -> 159 (match String.lowercase_ascii value with 160 | "true" | "yes" | "on" -> `Bool true 161 | "false" | "no" | "off" -> `Bool false 162 | _ -> Error.raise (Invalid_scalar_conversion (value, "bool"))) 163 | Some "tag:yaml.org,2002:int" | Some "!!int" -> 164 (try `Float (Float.of_string value) 165 with _ -> Error.raise (Invalid_scalar_conversion (value, "int"))) 166 | Some "tag:yaml.org,2002:float" | Some "!!float" -> 167 (try `Float (Float.of_string value) 168 with _ -> Error.raise (Invalid_scalar_conversion (value, "float"))) 169 | Some "tag:yaml.org,2002:str" | Some "!!str" -> 170 `String value 171 | Some _ -> 172 (* Unknown tag - treat as string *) 173 `String value 174 | None -> 175 (* Implicit type resolution for plain scalars *) 176 if style <> `Plain then 177 `String value 178 else 179 infer_scalar_type value 180 181(** Infer type from plain scalar value *) 182and infer_scalar_type value = 183 match String.lowercase_ascii value with 184 (* Null *) 185 | "" | "null" | "~" -> `Null 186 (* Boolean true *) 187 | "true" | "yes" | "on" -> `Bool true 188 (* Boolean false *) 189 | "false" | "no" | "off" -> `Bool false 190 (* Special floats *) 191 | ".inf" | "+.inf" -> `Float Float.infinity 192 | "-.inf" -> `Float Float.neg_infinity 193 | ".nan" -> `Float Float.nan 194 (* Try numeric *) 195 | _ -> try_parse_number value 196 197(** Try to parse as number *) 198and try_parse_number value = 199 (* Check if value looks like a valid YAML number (not inf/nan without dot) 200 This guards against OCaml's Float.of_string accepting "inf", "nan", etc. 201 See: https://github.com/avsm/ocaml-yaml/issues/82 *) 202 let looks_like_number () = 203 let len = String.length value in 204 if len = 0 then false 205 else 206 let first = value.[0] in 207 if first >= '0' && first <= '9' then true 208 else if (first = '-' || first = '+') && len >= 2 then 209 let second = value.[1] in 210 (* After sign, must be digit or dot-digit (for +.5, -.5) *) 211 second >= '0' && second <= '9' || 212 (second = '.' && len >= 3 && value.[2] >= '0' && value.[2] <= '9') 213 else false 214 in 215 (* Try integer/float *) 216 let try_numeric () = 217 if looks_like_number () then 218 try 219 (* Handle octal: 0o prefix or leading 0 *) 220 if String.length value > 2 && value.[0] = '0' then 221 match value.[1] with 222 | 'x' | 'X' -> 223 (* Hex *) 224 Some (`Float (Float.of_int (int_of_string value))) 225 | 'o' | 'O' -> 226 (* Octal *) 227 Some (`Float (Float.of_int (int_of_string value))) 228 | 'b' | 'B' -> 229 (* Binary *) 230 Some (`Float (Float.of_int (int_of_string value))) 231 | _ -> 232 (* Decimal with leading zero or octal in YAML 1.1 *) 233 Some (`Float (Float.of_string value)) 234 else 235 Some (`Float (Float.of_string value)) 236 with _ -> None 237 else None 238 in 239 match try_numeric () with 240 | Some v -> v 241 | None -> 242 (* Try float starting with dot (e.g., .5 for 0.5) 243 Note: We must NOT use Float.of_string as a general fallback because 244 OCaml accepts "nan", "inf", "infinity" which are NOT valid YAML floats. 245 YAML requires the leading dot: .nan, .inf, -.inf 246 See: https://github.com/avsm/ocaml-yaml/issues/82 *) 247 if String.length value >= 2 && value.[0] = '.' && 248 value.[1] >= '0' && value.[1] <= '9' then 249 try `Float (Float.of_string value) 250 with _ -> `String value 251 else 252 `String value 253 254(** Convert to JSON-compatible Value. 255 256 Converts a full YAML representation to a simplified JSON-compatible value type. 257 This process implements the representation graph to serialization tree conversion 258 described in {{:https://yaml.org/spec/1.2.2/#32-processes}Section 3.2 (Processes)} 259 of the YAML 1.2.2 specification. 260 261 See also {{:https://yaml.org/spec/1.2.2/#10212-json-schema}Section 10.2.1.2 262 (JSON Schema)} for the tag resolution used during conversion. 263 264 @param resolve_aliases_first Whether to resolve aliases before conversion (default true) 265 @param max_nodes Maximum nodes during alias expansion (default 10M) 266 @param max_depth Maximum alias nesting depth (default 100) 267 @raise Error.Yamlrw_error with {!type:Error.kind} [Unresolved_alias] if resolve_aliases_first is false and an alias is encountered 268*) 269let to_value 270 ?(resolve_aliases_first = true) 271 ?(max_nodes = default_max_alias_nodes) 272 ?(max_depth = default_max_alias_depth) 273 (v : t) : Value.t = 274 let v = if resolve_aliases_first then resolve_aliases ~max_nodes ~max_depth v else v in 275 let rec convert (v : t) : Value.t = 276 match v with 277 | `Scalar s -> scalar_to_value s 278 | `Alias name -> Error.raise (Unresolved_alias name) 279 | `A seq -> `A (List.map convert (Sequence.members seq)) 280 | `O map -> 281 `O (List.map (fun (k, v) -> 282 let key = match k with 283 | `Scalar s -> Scalar.value s 284 | _ -> Error.raise (Type_mismatch ("string key", "complex key")) 285 in 286 (key, convert v) 287 ) (Mapping.members map)) 288 in 289 convert v 290 291(** Get anchor from any node *) 292let anchor (v : t) = 293 match v with 294 | `Scalar s -> Scalar.anchor s 295 | `Alias _ -> None 296 | `A seq -> Sequence.anchor seq 297 | `O map -> Mapping.anchor map 298 299(** Get tag from any node *) 300let tag (v : t) = 301 match v with 302 | `Scalar s -> Scalar.tag s 303 | `Alias _ -> None 304 | `A seq -> Sequence.tag seq 305 | `O map -> Mapping.tag map