My agentic slop goes here. Not intended for anyone else!
1(** CLI tool for generating Pooch-compatible registry files from directories *)
2
3open Cmdliner
4open Eio.Std
5
6(* CLI argument types *)
7type output_format = Pooch | JSON
8type path_format = Relative | Absolute
9
10(* CLI arguments *)
11let directory_arg =
12 let doc = "Directory to scan for files" in
13 Arg.(required & pos 0 (some dir) None & info [] ~docv:"DIRECTORY" ~doc)
14
15let output_arg =
16 let doc = "Output file for registry (default: stdout)" in
17 Arg.(value & pos 1 (some string) None & info [] ~docv:"OUTPUT" ~doc)
18
19let recursive_arg =
20 let doc = "Scan directories recursively" in
21 Arg.(value & flag & info ["r"; "recursive"] ~doc)
22
23let follow_symlinks_arg =
24 let doc = "Follow symbolic links during traversal" in
25 Arg.(value & flag & info ["L"; "follow-symlinks"] ~doc)
26
27let algorithm_arg =
28 let algorithms = [("sha256", Toru.Hash.SHA256); ("sha1", Toru.Hash.SHA1); ("md5", Toru.Hash.MD5)] in
29 let doc = "Hash algorithm to use: sha256, sha1, or md5" in
30 Arg.(value & opt (enum algorithms) Toru.Hash.SHA256 & info ["a"; "algorithm"] ~docv:"ALGO" ~doc)
31
32let exclude_arg =
33 let doc = "Exclude files matching glob pattern (can be repeated)" in
34 Arg.(value & opt_all string [] & info ["e"; "exclude"] ~docv:"PATTERN" ~doc)
35
36let include_hidden_arg =
37 let doc = "Include hidden files (starting with .)" in
38 Arg.(value & flag & info ["H"; "include-hidden"] ~doc)
39
40let update_arg =
41 let doc = "Update existing registry file instead of creating new one" in
42 Arg.(value & opt (some file) None & info ["u"; "update"] ~docv:"FILE" ~doc)
43
44let progress_arg =
45 let doc = "Show progress during scanning" in
46 Arg.(value & flag & info ["p"; "progress"] ~doc)
47
48let format_arg =
49 let formats = [("pooch", Pooch); ("json", JSON)] in
50 let doc = "Output format: pooch or json" in
51 Arg.(value & opt (enum formats) Pooch & info ["f"; "format"] ~docv:"FORMAT" ~doc)
52
53let path_format_arg =
54 let formats = [("relative", Relative); ("absolute", Absolute)] in
55 let doc = "Path format in output: relative or absolute" in
56 Arg.(value & opt (enum formats) Relative & info ["path-format"] ~docv:"FORMAT" ~doc)
57
58(* Progress reporting *)
59let create_progress_reporter show_progress =
60 if show_progress then (
61 let last_update = ref (Unix.gettimeofday ()) in
62 fun filename current total ->
63 let now = Unix.gettimeofday () in
64 if now -. !last_update > 0.1 || current = total then (
65 last_update := now;
66 let percentage = if total > 0 then (current * 100) / total else 0 in
67 Printf.eprintf "\r\027[K[%3d%%] %s (%d/%d)" percentage filename current total;
68 if current = total then Printf.eprintf "\n";
69 flush stderr
70 )
71 ) else (
72 fun _ _ _ -> ()
73 )
74
75(* Output functions *)
76let output_registry format registry output_file =
77 let content = match format with
78 | Pooch ->
79 let header = Printf.sprintf "# Generated by toru-make-registry on %s\n# Algorithm: %s\n"
80 (Ptime.to_rfc3339 (Ptime_clock.now ()))
81 (Toru.Hash.algorithm_to_string Toru.Hash.SHA256)
82 in
83 header ^ Toru.Registry.to_string registry
84 | JSON ->
85 (* For JSON, we need enhanced entries *)
86 failwith "JSON output requires enhanced entries (not yet implemented in this path)"
87 in
88 match output_file with
89 | Some filename ->
90 let oc = open_out filename in
91 output_string oc content;
92 close_out oc;
93 Printf.printf "Registry written to %s\n" filename
94 | None ->
95 print_string content
96
97let output_enhanced_entries format enhanced_entries algorithm output_file =
98 let content = match format with
99 | Pooch ->
100 let header = Printf.sprintf "# Generated by toru-make-registry on %s\n# Algorithm: %s\n"
101 (Ptime.to_rfc3339 (Ptime_clock.now ()))
102 (Toru.Hash.algorithm_to_string algorithm)
103 in
104 let entries_str = String.concat "\n" (List.map (fun enhanced_entry ->
105 let entry = Toru.Make_registry.get_entry enhanced_entry in
106 let filename = Toru.Registry.filename entry in
107 let hash = Toru.Registry.hash entry in
108 Printf.sprintf "%s %s" filename (Toru.Hash.value hash)
109 ) enhanced_entries) in
110 header ^ entries_str ^ "\n"
111 | JSON ->
112 let json = Toru.Make_registry.enhanced_entries_to_json
113 ~algorithm ~generated:(Ptime_clock.now ()) enhanced_entries in
114 Yojson.Safe.pretty_to_string json
115 in
116 match output_file with
117 | Some filename ->
118 let oc = open_out filename in
119 output_string oc content;
120 close_out oc;
121 Printf.printf "Registry written to %s\n" filename
122 | None ->
123 print_string content
124
125(* Main function *)
126let make_registry_main directory output recursive follow_symlinks algorithm
127 excludes include_hidden update_file show_progress format path_format =
128
129 Eio_main.run @@ fun env ->
130 Eio.Switch.run @@ fun sw ->
131 try
132 let dir_path = env#fs |> Eio.Path.(fun fs -> fs / directory) in
133
134 let options = {
135 Toru.Make_registry.recursive;
136 follow_symlinks;
137 hash_algorithm = algorithm;
138 exclude_patterns = excludes;
139 include_hidden;
140 } in
141
142 let progress_fn = create_progress_reporter show_progress in
143
144 let result = match update_file with
145 | Some update_filename ->
146 (* Update existing registry *)
147 let existing_registry =
148 let update_path = env#fs |> Eio.Path.(fun fs -> fs / update_filename) in
149 Toru.Registry.load update_path
150 in
151 if show_progress then Printf.eprintf "Updating registry from %s...\n" update_filename;
152 let updated_registry = Toru.Make_registry.update_registry ~sw ~env ~options
153 existing_registry dir_path in
154 output_registry format updated_registry output;
155 Ok ()
156
157 | None ->
158 (* Create new registry *)
159 if show_progress then Printf.eprintf "Scanning directory %s...\n" directory;
160 let enhanced_entries = Toru.Make_registry.scan_directory_enhanced ~sw ~env ~options dir_path in
161
162 (* Apply path format conversion if needed *)
163 let processed_entries = match path_format with
164 | Relative -> enhanced_entries
165 | Absolute ->
166 List.map (fun enhanced_entry ->
167 let metadata = Toru.Make_registry.get_metadata enhanced_entry in
168 let entry = Toru.Make_registry.get_entry enhanced_entry in
169 let abs_filename = metadata.absolute_path in
170 let abs_entry = Toru.Registry.create_entry
171 ~filename:abs_filename
172 ~hash:(Toru.Registry.hash entry) () in
173 Toru.Make_registry.update_entry enhanced_entry abs_entry
174 ) enhanced_entries
175 in
176
177 output_enhanced_entries format processed_entries algorithm output;
178 Ok ()
179 in
180
181 match result with
182 | Ok () -> 0
183 | Error msg ->
184 Printf.eprintf "Error: %s\n" msg;
185 1
186
187 with
188 | exn ->
189 Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
190 1
191
192(* Command definition *)
193let cmd =
194 let doc = "Generate Pooch-compatible registry files from directories" in
195 let man = [
196 `S Manpage.s_description;
197 `P "$(tname) scans directories and generates registry files compatible with Python Pooch library.";
198 `P "The registry format is: 'filename hash' per line, with optional comments starting with #.";
199 `S Manpage.s_examples;
200 `P "Generate registry for data directory:";
201 `P "$(tname) data/ registry.txt";
202 `P "";
203 `P "Recursive scan with SHA256 and exclude patterns:";
204 `P "$(tname) -r -a sha256 -e '*.tmp' -e '*.log' ./dataset/";
205 `P "";
206 `P "Update existing registry with progress:";
207 `P "$(tname) --update existing.txt --progress data/";
208 `P "";
209 `P "Generate JSON format with absolute paths:";
210 `P "$(tname) --format json --path-format absolute data/";
211 ] in
212
213 let info = Cmd.info "toru-make-registry" ~version:"1.0" ~doc ~man in
214
215 Cmd.v info Term.(const make_registry_main
216 $ directory_arg $ output_arg $ recursive_arg $ follow_symlinks_arg
217 $ algorithm_arg $ exclude_arg $ include_hidden_arg $ update_arg
218 $ progress_arg $ format_arg $ path_format_arg)
219
220let () = exit (Cmd.eval cmd)