···
1
+
(** Cmdliner interface for Toru CLI tools
3
+
This module provides reusable Cmdliner terms and converters
4
+
for building Toru-based CLI applications.
9
+
(** {1 Common Arguments} *)
12
+
let doc = "Base URL for downloading files not in registry" in
13
+
Arg.(value & opt string "https://example.com/data/"
14
+
& info ["b"; "base-url"] ~docv:"URL" ~doc)
17
+
let default = Toru.default_cache_path ~app_name:"toru" () in
18
+
let doc = "Directory for caching downloaded files" in
19
+
Arg.(value & opt string default
20
+
& info ["c"; "cache"] ~docv:"DIR" ~doc)
23
+
let doc = "Version subdirectory in cache" in
24
+
Arg.(value & opt (some string) None
25
+
& info ["v"; "version"] ~docv:"VERSION" ~doc)
27
+
let registry_file_arg =
28
+
let doc = "Path to local registry file" in
29
+
Arg.(value & opt (some string) None
30
+
& info ["r"; "registry"] ~docv:"FILE" ~doc)
32
+
let registry_url_arg =
33
+
let doc = "URL of remote registry file" in
34
+
Arg.(value & opt (some string) None
35
+
& info ["R"; "registry-url"] ~docv:"URL" ~doc)
37
+
let concurrency_arg =
38
+
let doc = "Number of concurrent downloads" in
39
+
Arg.(value & opt int 4
40
+
& info ["j"; "concurrency"] ~docv:"N" ~doc)
43
+
let doc = "Verbose output" in
44
+
Arg.(value & flag & info ["v"; "verbose"] ~doc)
47
+
let doc = "Quiet mode (errors only)" in
48
+
Arg.(value & flag & info ["q"; "quiet"] ~doc)
50
+
(** {1 Downloader Configuration} *)
53
+
let doc = "Download timeout in seconds" in
54
+
Arg.(value & opt float 300.0
55
+
& info ["timeout"] ~docv:"SECONDS" ~doc)
57
+
let max_redirects_arg =
58
+
let doc = "Maximum number of redirects to follow" in
59
+
Arg.(value & opt int 10
60
+
& info ["max-redirects"] ~docv:"N" ~doc)
63
+
let doc = "Number of retry attempts" in
64
+
Arg.(value & opt int 3
65
+
& info ["retry"] ~docv:"N" ~doc)
67
+
let no_verify_tls_arg =
68
+
let doc = "Disable TLS certificate verification (insecure)" in
69
+
Arg.(value & flag & info ["no-verify-tls"] ~doc)
72
+
let doc = "Basic authentication (username:password)" in
74
+
match String.split_on_char ':' s with
75
+
| [user; pass] -> Ok (user, pass)
76
+
| _ -> Error (`Msg "Auth must be username:password")
78
+
let pp fmt (u, p) = Format.fprintf fmt "%s:%s" u p in
79
+
let auth_conv = Arg.conv (parse, pp) in
80
+
Arg.(value & opt (some auth_conv) None
81
+
& info ["auth"] ~docv:"USER:PASS" ~doc)
85
+
(** Fetch command arguments *)
87
+
let doc = "File to fetch from registry" in
88
+
Arg.(required & pos 0 (some string) None & info [] ~docv:"FILE" ~doc)
91
+
let doc = "Files to fetch from registry" in
92
+
Arg.(non_empty & pos_all string [] & info [] ~docv:"FILE" ~doc)
95
+
let doc = "Post-processor to apply (auto, untar-gz, unzip, gunzip, none)" in
96
+
let parse = function
97
+
| "auto" -> Ok `Auto
98
+
| "untar-gz" -> Ok `UntarGz
99
+
| "untar-xz" -> Ok `UntarXz
100
+
| "unzip" -> Ok `Unzip
101
+
| "gunzip" -> Ok `Gunzip
102
+
| "bunzip2" -> Ok `Bunzip2
103
+
| "none" -> Ok `None
104
+
| s -> Error (`Msg ("Unknown processor: " ^ s))
106
+
let pp fmt = function
107
+
| `Auto -> Format.pp_print_string fmt "auto"
108
+
| `UntarGz -> Format.pp_print_string fmt "untar-gz"
109
+
| `UntarXz -> Format.pp_print_string fmt "untar-xz"
110
+
| `Unzip -> Format.pp_print_string fmt "unzip"
111
+
| `Gunzip -> Format.pp_print_string fmt "gunzip"
112
+
| `Bunzip2 -> Format.pp_print_string fmt "bunzip2"
113
+
| `None -> Format.pp_print_string fmt "none"
115
+
let proc_conv = Arg.conv (parse, pp) in
116
+
Arg.(value & opt proc_conv `Auto
117
+
& info ["p"; "processor"] ~docv:"PROC" ~doc)
119
+
let output_dir_arg =
120
+
let doc = "Output directory for extracted files" in
121
+
Arg.(value & opt string "."
122
+
& info ["o"; "output"] ~docv:"DIR" ~doc)
124
+
(** Cache management commands *)
125
+
let clear_cmd_arg =
126
+
let doc = "Clear all cached files" in
127
+
Arg.(value & flag & info ["clear"] ~doc)
129
+
let stats_cmd_arg =
130
+
let doc = "Show cache statistics" in
131
+
Arg.(value & flag & info ["stats"] ~doc)
133
+
let expire_cmd_arg =
134
+
let doc = "Expire old cache entries" in
135
+
Arg.(value & flag & info ["expire"] ~doc)
137
+
(** Registry management *)
139
+
let doc = "List files in registry" in
140
+
Arg.(value & flag & info ["list"; "ls"] ~doc)
142
+
let verify_cmd_arg =
143
+
let doc = "Verify cached files against registry" in
144
+
Arg.(value & flag & info ["verify"] ~doc)
146
+
(** {1 Logging Setup} *)
148
+
let setup_logging verbose quiet =
150
+
if quiet then Logs.Error
151
+
else if verbose then Logs.Debug
154
+
Logs.set_level (Some level);
155
+
Logs.set_reporter (Logs_fmt.reporter ())
157
+
(** {1 Downloader Configuration Builder} *)
159
+
let create_downloader_config ~timeout ~max_redirects ~retry ~verify_tls ~auth =
162
+
Some (Requests.Retry.create_config ~max_retries:retry ~backoff_factor:1.0 ())
165
+
let auth_config = Option.map (fun (user, pass) ->
166
+
Requests.Auth.basic ~username:user ~password:pass
168
+
Toru.Downloader.create_config
171
+
~retry:retry_config
176
+
(** {1 Main Entry Points} *)
178
+
let fetch_main base_url cache_dir version registry_file registry_url
179
+
filename processor output_dir
180
+
timeout max_redirects retry verify_tls auth
182
+
setup_logging verbose quiet;
184
+
Eio_main.run @@ fun env ->
185
+
Eio.Switch.run @@ fun sw ->
187
+
(* Create downloader config *)
188
+
let downloader_config = create_downloader_config
189
+
~timeout ~max_redirects ~retry ~verify_tls:(not verify_tls) ~auth
192
+
(* Create Toru instance *)
193
+
let toru = Toru.create ~sw ~env
194
+
~base_url ~cache_path:cache_dir ?version
195
+
?registry_file ?registry_url ~downloader_config
199
+
(* Determine processor *)
200
+
let proc = match processor with
201
+
| `Auto -> Toru.Processors.detect_processor filename
202
+
| `UntarGz -> Some (Toru.Processors.untar_gz output_dir)
203
+
| `UntarXz -> Some (Toru.Processors.untar_xz output_dir)
204
+
| `Unzip -> Some (Toru.Processors.unzip output_dir)
205
+
| `Gunzip -> Some Toru.Processors.gunzip
206
+
| `Bunzip2 -> Some Toru.Processors.bunzip2
210
+
(* Fetch the file *)
211
+
match Toru.fetch toru ~filename ?processor:proc () with
213
+
Printf.printf "File available at: %s\n" (Eio.Path.native_exn path);
216
+
Printf.eprintf "Error: %s\n" msg;
219
+
let fetch_all_main base_url cache_dir version registry_file registry_url
220
+
concurrency timeout max_redirects retry verify_tls auth
222
+
setup_logging verbose quiet;
224
+
Eio_main.run @@ fun env ->
225
+
Eio.Switch.run @@ fun sw ->
227
+
(* Create downloader config *)
228
+
let downloader_config = create_downloader_config
229
+
~timeout ~max_redirects ~retry ~verify_tls:(not verify_tls) ~auth
232
+
(* Create Toru instance *)
233
+
let toru = Toru.create ~sw ~env
234
+
~base_url ~cache_path:cache_dir ?version
235
+
?registry_file ?registry_url ~downloader_config
239
+
(* Fetch all files *)
240
+
match Toru.fetch_all toru ~concurrency () with
242
+
Printf.printf "All files downloaded successfully\n";
245
+
Printf.eprintf "Error: %s\n" msg;
248
+
let cache_main cache_dir version clear stats expire verbose quiet =
249
+
setup_logging verbose quiet;
251
+
if not (clear || stats || expire) then begin
252
+
Printf.eprintf "Error: Must specify --clear, --stats, or --expire\n";
255
+
Eio_main.run @@ fun env ->
256
+
Eio.Switch.run @@ fun sw ->
258
+
(* Create cache instance *)
259
+
let cache = Toru.Cache.create ~sw ~fs:env#fs ?version cache_dir in
261
+
if clear then begin
262
+
Toru.Cache.clear cache;
263
+
Printf.printf "Cache cleared\n"
266
+
if stats then begin
267
+
let stats = Toru.Cache.stats cache in
268
+
Printf.printf "Cache statistics:\n";
269
+
Printf.printf " Total files: %d\n" (Cacheio.Stats.entry_count stats);
270
+
Printf.printf " Total size: %Ld bytes\n" (Cacheio.Stats.total_size stats);
271
+
Printf.printf " Expired files: %d\n" (Cacheio.Stats.expired_count stats)
274
+
if expire then begin
275
+
let expired = Toru.Cache.expire cache in
276
+
Printf.printf "Expired %d entries\n" expired
281
+
let registry_main base_url cache_dir version registry_file registry_url
282
+
list verify verbose quiet =
283
+
setup_logging verbose quiet;
285
+
if not (list || verify) then begin
286
+
Printf.eprintf "Error: Must specify --list or --verify\n";
289
+
Eio_main.run @@ fun env ->
290
+
Eio.Switch.run @@ fun sw ->
292
+
(* Create Toru instance *)
293
+
let toru = Toru.create ~sw ~env
294
+
~base_url ~cache_path:cache_dir ?version
295
+
?registry_file ?registry_url
300
+
let registry = Toru.registry toru in
301
+
let entries = Toru.Registry.entries registry in
302
+
Printf.printf "Registry contains %d files:\n" (List.length entries);
303
+
List.iter (fun entry ->
304
+
let filename = Toru.Registry.filename entry in
305
+
let hash = Toru.Registry.hash entry in
306
+
Printf.printf " %s (%s:%s)\n"
308
+
(Toru.Hash.algorithm_to_string (Toru.Hash.algorithm hash))
309
+
(String.sub (Toru.Hash.value hash) 0 8)
313
+
if verify then begin
314
+
let registry = Toru.registry toru in
315
+
let cache = Toru.cache toru in
316
+
let entries = Toru.Registry.entries registry in
317
+
let errors = ref 0 in
319
+
List.iter (fun entry ->
320
+
let filename = Toru.Registry.filename entry in
321
+
if Toru.Cache.exists cache filename then
322
+
let path = Toru.Cache.file_path cache filename in
323
+
let hash = Toru.Registry.hash entry in
324
+
if Toru.Hash.verify path hash then
325
+
Printf.printf " ✓ %s\n" filename
327
+
Printf.printf " ✗ %s (hash mismatch)\n" filename;
331
+
Printf.printf " - %s (not cached)\n" filename
334
+
if !errors > 0 then begin
335
+
Printf.eprintf "\n%d verification failures\n" !errors;
338
+
Printf.printf "\nAll cached files verified\n";
344
+
(** {1 Command Terms} *)
347
+
let doc = "Fetch a file from the registry" in
349
+
`S Manpage.s_description;
350
+
`P "Downloads and caches a file from the registry, verifying its hash.";
351
+
`P "Files are cached locally and reused on subsequent fetches.";
353
+
let info = Cmd.info "fetch" ~doc ~man in
354
+
let term = Term.(const fetch_main
355
+
$ base_url_arg $ cache_dir_arg $ version_arg
356
+
$ registry_file_arg $ registry_url_arg
357
+
$ filename_arg $ processor_arg $ output_dir_arg
358
+
$ timeout_arg $ max_redirects_arg $ retry_arg
359
+
$ no_verify_tls_arg $ auth_arg
360
+
$ verbose_arg $ quiet_arg) in
363
+
let fetch_all_cmd =
364
+
let doc = "Fetch all files from the registry" in
366
+
`S Manpage.s_description;
367
+
`P "Downloads all files listed in the registry concurrently.";
369
+
let info = Cmd.info "fetch-all" ~doc ~man in
370
+
let term = Term.(const fetch_all_main
371
+
$ base_url_arg $ cache_dir_arg $ version_arg
372
+
$ registry_file_arg $ registry_url_arg
374
+
$ timeout_arg $ max_redirects_arg $ retry_arg
375
+
$ no_verify_tls_arg $ auth_arg
376
+
$ verbose_arg $ quiet_arg) in
380
+
let doc = "Manage the local cache" in
382
+
`S Manpage.s_description;
383
+
`P "Commands for managing the local file cache.";
385
+
let info = Cmd.info "cache" ~doc ~man in
386
+
let term = Term.(const cache_main
387
+
$ cache_dir_arg $ version_arg
388
+
$ clear_cmd_arg $ stats_cmd_arg $ expire_cmd_arg
389
+
$ verbose_arg $ quiet_arg) in
393
+
let doc = "Manage and inspect the registry" in
395
+
`S Manpage.s_description;
396
+
`P "Commands for working with registry files.";
398
+
let info = Cmd.info "registry" ~doc ~man in
399
+
let term = Term.(const registry_main
400
+
$ base_url_arg $ cache_dir_arg $ version_arg
401
+
$ registry_file_arg $ registry_url_arg
402
+
$ list_cmd_arg $ verify_cmd_arg
403
+
$ verbose_arg $ quiet_arg) in
407
+
let doc = "Toru - Data repository management" in
409
+
`S Manpage.s_description;
410
+
`P "Toru is a data repository management tool compatible with Python Pooch.";
411
+
`P "It downloads, caches, and verifies data files from remote repositories.";
413
+
let info = Cmd.info "toru" ~version:"%%VERSION%%" ~doc ~man in
414
+
let default = Term.(ret (const (`Help (`Pager, None)))) in
415
+
Cmd.group info ~default [