Pure OCaml Yaml 1.2 reader and writer using Bytesrw
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** yamlcat - parse and reprint YAML files *)
7
8open Cmdliner
9
10type output_format = Yaml | Json | Flow | Debug
11
12let rec json_to_string buf (v : Yamlrw.value) =
13 match v with
14 | `Null -> Buffer.add_string buf "null"
15 | `Bool b -> Buffer.add_string buf (if b then "true" else "false")
16 | `Float f ->
17 if Float.is_integer f && Float.abs f < 1e15 then
18 Buffer.add_string buf (Printf.sprintf "%.0f" f)
19 else Buffer.add_string buf (Printf.sprintf "%g" f)
20 | `String s -> Buffer.add_string buf (Printf.sprintf "%S" s)
21 | `A items ->
22 Buffer.add_char buf '[';
23 List.iteri
24 (fun i item ->
25 if i > 0 then Buffer.add_string buf ", ";
26 json_to_string buf item)
27 items;
28 Buffer.add_char buf ']'
29 | `O pairs ->
30 Buffer.add_char buf '{';
31 List.iteri
32 (fun i (k, v) ->
33 if i > 0 then Buffer.add_string buf ", ";
34 Buffer.add_string buf (Printf.sprintf "%S: " k);
35 json_to_string buf v)
36 pairs;
37 Buffer.add_char buf '}'
38
39let value_to_json v =
40 let buf = Buffer.create 256 in
41 json_to_string buf v;
42 Buffer.contents buf
43
44let process_string ~format ~resolve_aliases ~max_nodes ~max_depth content =
45 try
46 (* Always parse as multi-document stream *)
47 let documents = Yamlrw.documents_of_string content in
48
49 match format with
50 | Yaml ->
51 (* Convert through Value to apply tag-based type coercion *)
52 let first = ref true in
53 List.iter
54 (fun (doc : Yamlrw.document) ->
55 if not !first then print_string "---\n";
56 first := false;
57 match doc.root with
58 | None -> print_endline ""
59 | Some yaml ->
60 let value =
61 Yamlrw.to_json ~resolve_aliases ~max_nodes ~max_depth yaml
62 in
63 print_string (Yamlrw.to_string value))
64 documents
65 | Flow ->
66 (* Convert through Value to apply tag-based type coercion *)
67 let first = ref true in
68 List.iter
69 (fun (doc : Yamlrw.document) ->
70 if not !first then print_string "---\n";
71 first := false;
72 match doc.root with
73 | None -> print_endline ""
74 | Some yaml ->
75 let value =
76 Yamlrw.to_json ~resolve_aliases ~max_nodes ~max_depth yaml
77 in
78 print_string (Yamlrw.to_string ~layout_style:`Flow value))
79 documents
80 | Json ->
81 let first = ref true in
82 List.iter
83 (fun (doc : Yamlrw.document) ->
84 match doc.root with
85 | None -> ()
86 | Some yaml ->
87 if not !first then print_endline "---";
88 first := false;
89 let value =
90 Yamlrw.to_json ~resolve_aliases ~max_nodes ~max_depth yaml
91 in
92 print_endline (value_to_json value))
93 documents
94 | Debug ->
95 List.iteri
96 (fun i (doc : Yamlrw.document) ->
97 Format.printf "Document %d:@." (i + 1);
98 (* Convert back to Document.t for printing *)
99 let doc' : Yamlrw.Document.t =
100 {
101 Yamlrw.Document.version = doc.version;
102 Yamlrw.Document.tags = doc.tags;
103 Yamlrw.Document.root = (doc.root :> Yamlrw.Yaml.t option);
104 Yamlrw.Document.implicit_start = doc.implicit_start;
105 Yamlrw.Document.implicit_end = doc.implicit_end;
106 }
107 in
108 Format.printf "%a@." Yamlrw.Document.pp doc')
109 documents
110 with Yamlrw.Yamlrw_error e ->
111 Printf.eprintf "Error: %s\n" (Yamlrw.Error.to_string e);
112 exit 1
113
114let process_file ~format ~resolve_aliases ~max_nodes ~max_depth filename =
115 let content =
116 if filename = "-" then In_channel.input_all In_channel.stdin
117 else In_channel.with_open_text filename In_channel.input_all
118 in
119 process_string ~format ~resolve_aliases ~max_nodes ~max_depth content
120
121let run format _all resolve_aliases max_nodes max_depth files =
122 let files = if files = [] then [ "-" ] else files in
123 List.iter (process_file ~format ~resolve_aliases ~max_nodes ~max_depth) files;
124 `Ok ()
125
126(* Command-line arguments *)
127
128let format_arg =
129 let doc = "Output format: yaml (default), json, flow, or debug." in
130 let formats =
131 [ ("yaml", Yaml); ("json", Json); ("flow", Flow); ("debug", Debug) ]
132 in
133 Arg.(
134 value & opt (enum formats) Yaml & info [ "format"; "f" ] ~docv:"FORMAT" ~doc)
135
136let json_arg =
137 let doc = "Output as JSON (shorthand for --format=json)." in
138 Arg.(value & flag & info [ "json" ] ~doc)
139
140let flow_arg =
141 let doc = "Output in flow style (shorthand for --format=flow)." in
142 Arg.(value & flag & info [ "flow" ] ~doc)
143
144let debug_arg =
145 let doc = "Output internal representation (shorthand for --format=debug)." in
146 Arg.(value & flag & info [ "debug" ] ~doc)
147
148let all_arg =
149 let doc = "Output all documents (for multi-document YAML)." in
150 Arg.(value & flag & info [ "all"; "a" ] ~doc)
151
152let no_resolve_aliases_arg =
153 let doc = "Don't resolve aliases (keep them as references)." in
154 Arg.(value & flag & info [ "no-resolve-aliases" ] ~doc)
155
156let max_nodes_arg =
157 let doc =
158 "Maximum number of nodes during alias expansion (default: 10000000). \
159 Protection against billion laughs attack."
160 in
161 Arg.(
162 value
163 & opt int Yamlrw.default_max_alias_nodes
164 & info [ "max-nodes" ] ~docv:"N" ~doc)
165
166let max_depth_arg =
167 let doc =
168 "Maximum alias nesting depth (default: 100). Protection against deeply \
169 nested alias chains."
170 in
171 Arg.(
172 value
173 & opt int Yamlrw.default_max_alias_depth
174 & info [ "max-depth" ] ~docv:"N" ~doc)
175
176let files_arg =
177 let doc = "YAML file(s) to process. Use '-' for stdin." in
178 Arg.(value & pos_all file [] & info [] ~docv:"FILE" ~doc)
179
180let combined_format format json flow debug =
181 if json then Json else if flow then Flow else if debug then Debug else format
182
183let term =
184 let combine format json flow debug all no_resolve max_nodes max_depth files =
185 let format = combined_format format json flow debug in
186 let resolve_aliases = not no_resolve in
187 run format all resolve_aliases max_nodes max_depth files
188 in
189 Term.(
190 ret
191 (const combine $ format_arg $ json_arg $ flow_arg $ debug_arg $ all_arg
192 $ no_resolve_aliases_arg $ max_nodes_arg $ max_depth_arg $ files_arg))
193
194let info =
195 let doc = "Parse and reprint YAML files" in
196 let man =
197 [
198 `S Manpage.s_description;
199 `P
200 "$(tname) parses YAML files and reprints them in various formats. It \
201 can be used to validate YAML, convert between styles, or convert to \
202 JSON.";
203 `S Manpage.s_examples;
204 `P "Parse and reprint a YAML file:";
205 `Pre " $(tname) config.yaml";
206 `P "Convert YAML to JSON:";
207 `Pre " $(tname) --json config.yaml";
208 `P "Process multi-document YAML:";
209 `Pre " $(tname) --all multi.yaml";
210 `P "Limit alias expansion (protection against malicious YAML):";
211 `Pre " $(tname) --max-nodes 1000 --max-depth 10 untrusted.yaml";
212 `S Manpage.s_bugs;
213 `P "Report bugs at https://github.com/avsm/ocaml-yaml/issues";
214 ]
215 in
216 Cmd.info "yamlcat" ~version:"0.1.0" ~doc ~man
217
218let () = exit (Cmd.eval (Cmd.v info term))