My agentic slop goes here. Not intended for anyone else!

more

Changed files
+420
stack
toru
lib
toru
+420
stack/toru/lib/toru/cmd.ml
···
···
+
(** Cmdliner interface for Toru CLI tools
+
+
This module provides reusable Cmdliner terms and converters
+
for building Toru-based CLI applications.
+
*)
+
+
open Cmdliner
+
+
(** {1 Common Arguments} *)
+
+
let base_url_arg =
+
let doc = "Base URL for downloading files not in registry" in
+
Arg.(value & opt string "https://example.com/data/"
+
& info ["b"; "base-url"] ~docv:"URL" ~doc)
+
+
let cache_dir_arg =
+
let default = Toru.default_cache_path ~app_name:"toru" () in
+
let doc = "Directory for caching downloaded files" in
+
Arg.(value & opt string default
+
& info ["c"; "cache"] ~docv:"DIR" ~doc)
+
+
let version_arg =
+
let doc = "Version subdirectory in cache" in
+
Arg.(value & opt (some string) None
+
& info ["v"; "version"] ~docv:"VERSION" ~doc)
+
+
let registry_file_arg =
+
let doc = "Path to local registry file" in
+
Arg.(value & opt (some string) None
+
& info ["r"; "registry"] ~docv:"FILE" ~doc)
+
+
let registry_url_arg =
+
let doc = "URL of remote registry file" in
+
Arg.(value & opt (some string) None
+
& info ["R"; "registry-url"] ~docv:"URL" ~doc)
+
+
let concurrency_arg =
+
let doc = "Number of concurrent downloads" in
+
Arg.(value & opt int 4
+
& info ["j"; "concurrency"] ~docv:"N" ~doc)
+
+
let verbose_arg =
+
let doc = "Verbose output" in
+
Arg.(value & flag & info ["v"; "verbose"] ~doc)
+
+
let quiet_arg =
+
let doc = "Quiet mode (errors only)" in
+
Arg.(value & flag & info ["q"; "quiet"] ~doc)
+
+
(** {1 Downloader Configuration} *)
+
+
let timeout_arg =
+
let doc = "Download timeout in seconds" in
+
Arg.(value & opt float 300.0
+
& info ["timeout"] ~docv:"SECONDS" ~doc)
+
+
let max_redirects_arg =
+
let doc = "Maximum number of redirects to follow" in
+
Arg.(value & opt int 10
+
& info ["max-redirects"] ~docv:"N" ~doc)
+
+
let retry_arg =
+
let doc = "Number of retry attempts" in
+
Arg.(value & opt int 3
+
& info ["retry"] ~docv:"N" ~doc)
+
+
let no_verify_tls_arg =
+
let doc = "Disable TLS certificate verification (insecure)" in
+
Arg.(value & flag & info ["no-verify-tls"] ~doc)
+
+
let auth_arg =
+
let doc = "Basic authentication (username:password)" in
+
let parse s =
+
match String.split_on_char ':' s with
+
| [user; pass] -> Ok (user, pass)
+
| _ -> Error (`Msg "Auth must be username:password")
+
in
+
let pp fmt (u, p) = Format.fprintf fmt "%s:%s" u p in
+
let auth_conv = Arg.conv (parse, pp) in
+
Arg.(value & opt (some auth_conv) None
+
& info ["auth"] ~docv:"USER:PASS" ~doc)
+
+
(** {1 Commands} *)
+
+
(** Fetch command arguments *)
+
let filename_arg =
+
let doc = "File to fetch from registry" in
+
Arg.(required & pos 0 (some string) None & info [] ~docv:"FILE" ~doc)
+
+
let files_arg =
+
let doc = "Files to fetch from registry" in
+
Arg.(non_empty & pos_all string [] & info [] ~docv:"FILE" ~doc)
+
+
let processor_arg =
+
let doc = "Post-processor to apply (auto, untar-gz, unzip, gunzip, none)" in
+
let parse = function
+
| "auto" -> Ok `Auto
+
| "untar-gz" -> Ok `UntarGz
+
| "untar-xz" -> Ok `UntarXz
+
| "unzip" -> Ok `Unzip
+
| "gunzip" -> Ok `Gunzip
+
| "bunzip2" -> Ok `Bunzip2
+
| "none" -> Ok `None
+
| s -> Error (`Msg ("Unknown processor: " ^ s))
+
in
+
let pp fmt = function
+
| `Auto -> Format.pp_print_string fmt "auto"
+
| `UntarGz -> Format.pp_print_string fmt "untar-gz"
+
| `UntarXz -> Format.pp_print_string fmt "untar-xz"
+
| `Unzip -> Format.pp_print_string fmt "unzip"
+
| `Gunzip -> Format.pp_print_string fmt "gunzip"
+
| `Bunzip2 -> Format.pp_print_string fmt "bunzip2"
+
| `None -> Format.pp_print_string fmt "none"
+
in
+
let proc_conv = Arg.conv (parse, pp) in
+
Arg.(value & opt proc_conv `Auto
+
& info ["p"; "processor"] ~docv:"PROC" ~doc)
+
+
let output_dir_arg =
+
let doc = "Output directory for extracted files" in
+
Arg.(value & opt string "."
+
& info ["o"; "output"] ~docv:"DIR" ~doc)
+
+
(** Cache management commands *)
+
let clear_cmd_arg =
+
let doc = "Clear all cached files" in
+
Arg.(value & flag & info ["clear"] ~doc)
+
+
let stats_cmd_arg =
+
let doc = "Show cache statistics" in
+
Arg.(value & flag & info ["stats"] ~doc)
+
+
let expire_cmd_arg =
+
let doc = "Expire old cache entries" in
+
Arg.(value & flag & info ["expire"] ~doc)
+
+
(** Registry management *)
+
let list_cmd_arg =
+
let doc = "List files in registry" in
+
Arg.(value & flag & info ["list"; "ls"] ~doc)
+
+
let verify_cmd_arg =
+
let doc = "Verify cached files against registry" in
+
Arg.(value & flag & info ["verify"] ~doc)
+
+
(** {1 Logging Setup} *)
+
+
let setup_logging verbose quiet =
+
let level =
+
if quiet then Logs.Error
+
else if verbose then Logs.Debug
+
else Logs.Info
+
in
+
Logs.set_level (Some level);
+
Logs.set_reporter (Logs_fmt.reporter ())
+
+
(** {1 Downloader Configuration Builder} *)
+
+
let create_downloader_config ~timeout ~max_redirects ~retry ~verify_tls ~auth =
+
let retry_config =
+
if retry > 0 then
+
Some (Requests.Retry.create_config ~max_retries:retry ~backoff_factor:1.0 ())
+
else None
+
in
+
let auth_config = Option.map (fun (user, pass) ->
+
Requests.Auth.basic ~username:user ~password:pass
+
) auth in
+
Toru.Downloader.create_config
+
~timeout
+
~max_redirects
+
~retry:retry_config
+
~auth:auth_config
+
~verify_tls
+
()
+
+
(** {1 Main Entry Points} *)
+
+
let fetch_main base_url cache_dir version registry_file registry_url
+
filename processor output_dir
+
timeout max_redirects retry verify_tls auth
+
verbose quiet =
+
setup_logging verbose quiet;
+
+
Eio_main.run @@ fun env ->
+
Eio.Switch.run @@ fun sw ->
+
+
(* Create downloader config *)
+
let downloader_config = create_downloader_config
+
~timeout ~max_redirects ~retry ~verify_tls:(not verify_tls) ~auth
+
in
+
+
(* Create Toru instance *)
+
let toru = Toru.create ~sw ~env
+
~base_url ~cache_path:cache_dir ?version
+
?registry_file ?registry_url ~downloader_config
+
()
+
in
+
+
(* Determine processor *)
+
let proc = match processor with
+
| `Auto -> Toru.Processors.detect_processor filename
+
| `UntarGz -> Some (Toru.Processors.untar_gz output_dir)
+
| `UntarXz -> Some (Toru.Processors.untar_xz output_dir)
+
| `Unzip -> Some (Toru.Processors.unzip output_dir)
+
| `Gunzip -> Some Toru.Processors.gunzip
+
| `Bunzip2 -> Some Toru.Processors.bunzip2
+
| `None -> None
+
in
+
+
(* Fetch the file *)
+
match Toru.fetch toru ~filename ?processor:proc () with
+
| Ok path ->
+
Printf.printf "File available at: %s\n" (Eio.Path.native_exn path);
+
0
+
| Error msg ->
+
Printf.eprintf "Error: %s\n" msg;
+
1
+
+
let fetch_all_main base_url cache_dir version registry_file registry_url
+
concurrency timeout max_redirects retry verify_tls auth
+
verbose quiet =
+
setup_logging verbose quiet;
+
+
Eio_main.run @@ fun env ->
+
Eio.Switch.run @@ fun sw ->
+
+
(* Create downloader config *)
+
let downloader_config = create_downloader_config
+
~timeout ~max_redirects ~retry ~verify_tls:(not verify_tls) ~auth
+
in
+
+
(* Create Toru instance *)
+
let toru = Toru.create ~sw ~env
+
~base_url ~cache_path:cache_dir ?version
+
?registry_file ?registry_url ~downloader_config
+
()
+
in
+
+
(* Fetch all files *)
+
match Toru.fetch_all toru ~concurrency () with
+
| Ok () ->
+
Printf.printf "All files downloaded successfully\n";
+
0
+
| Error msg ->
+
Printf.eprintf "Error: %s\n" msg;
+
1
+
+
let cache_main cache_dir version clear stats expire verbose quiet =
+
setup_logging verbose quiet;
+
+
if not (clear || stats || expire) then begin
+
Printf.eprintf "Error: Must specify --clear, --stats, or --expire\n";
+
1
+
end else
+
Eio_main.run @@ fun env ->
+
Eio.Switch.run @@ fun sw ->
+
+
(* Create cache instance *)
+
let cache = Toru.Cache.create ~sw ~fs:env#fs ?version cache_dir in
+
+
if clear then begin
+
Toru.Cache.clear cache;
+
Printf.printf "Cache cleared\n"
+
end;
+
+
if stats then begin
+
let stats = Toru.Cache.stats cache in
+
Printf.printf "Cache statistics:\n";
+
Printf.printf " Total files: %d\n" (Cacheio.Stats.entry_count stats);
+
Printf.printf " Total size: %Ld bytes\n" (Cacheio.Stats.total_size stats);
+
Printf.printf " Expired files: %d\n" (Cacheio.Stats.expired_count stats)
+
end;
+
+
if expire then begin
+
let expired = Toru.Cache.expire cache in
+
Printf.printf "Expired %d entries\n" expired
+
end;
+
+
0
+
+
let registry_main base_url cache_dir version registry_file registry_url
+
list verify verbose quiet =
+
setup_logging verbose quiet;
+
+
if not (list || verify) then begin
+
Printf.eprintf "Error: Must specify --list or --verify\n";
+
1
+
end else
+
Eio_main.run @@ fun env ->
+
Eio.Switch.run @@ fun sw ->
+
+
(* Create Toru instance *)
+
let toru = Toru.create ~sw ~env
+
~base_url ~cache_path:cache_dir ?version
+
?registry_file ?registry_url
+
()
+
in
+
+
if list then begin
+
let registry = Toru.registry toru in
+
let entries = Toru.Registry.entries registry in
+
Printf.printf "Registry contains %d files:\n" (List.length entries);
+
List.iter (fun entry ->
+
let filename = Toru.Registry.filename entry in
+
let hash = Toru.Registry.hash entry in
+
Printf.printf " %s (%s:%s)\n"
+
filename
+
(Toru.Hash.algorithm_to_string (Toru.Hash.algorithm hash))
+
(String.sub (Toru.Hash.value hash) 0 8)
+
) entries
+
end;
+
+
if verify then begin
+
let registry = Toru.registry toru in
+
let cache = Toru.cache toru in
+
let entries = Toru.Registry.entries registry in
+
let errors = ref 0 in
+
+
List.iter (fun entry ->
+
let filename = Toru.Registry.filename entry in
+
if Toru.Cache.exists cache filename then
+
let path = Toru.Cache.file_path cache filename in
+
let hash = Toru.Registry.hash entry in
+
if Toru.Hash.verify path hash then
+
Printf.printf " ✓ %s\n" filename
+
else begin
+
Printf.printf " ✗ %s (hash mismatch)\n" filename;
+
incr errors
+
end
+
else
+
Printf.printf " - %s (not cached)\n" filename
+
) entries;
+
+
if !errors > 0 then begin
+
Printf.eprintf "\n%d verification failures\n" !errors;
+
1
+
end else begin
+
Printf.printf "\nAll cached files verified\n";
+
0
+
end
+
end else
+
0
+
+
(** {1 Command Terms} *)
+
+
let fetch_cmd =
+
let doc = "Fetch a file from the registry" in
+
let man = [
+
`S Manpage.s_description;
+
`P "Downloads and caches a file from the registry, verifying its hash.";
+
`P "Files are cached locally and reused on subsequent fetches.";
+
] in
+
let info = Cmd.info "fetch" ~doc ~man in
+
let term = Term.(const fetch_main
+
$ base_url_arg $ cache_dir_arg $ version_arg
+
$ registry_file_arg $ registry_url_arg
+
$ filename_arg $ processor_arg $ output_dir_arg
+
$ timeout_arg $ max_redirects_arg $ retry_arg
+
$ no_verify_tls_arg $ auth_arg
+
$ verbose_arg $ quiet_arg) in
+
Cmd.v info term
+
+
let fetch_all_cmd =
+
let doc = "Fetch all files from the registry" in
+
let man = [
+
`S Manpage.s_description;
+
`P "Downloads all files listed in the registry concurrently.";
+
] in
+
let info = Cmd.info "fetch-all" ~doc ~man in
+
let term = Term.(const fetch_all_main
+
$ base_url_arg $ cache_dir_arg $ version_arg
+
$ registry_file_arg $ registry_url_arg
+
$ concurrency_arg
+
$ timeout_arg $ max_redirects_arg $ retry_arg
+
$ no_verify_tls_arg $ auth_arg
+
$ verbose_arg $ quiet_arg) in
+
Cmd.v info term
+
+
let cache_cmd =
+
let doc = "Manage the local cache" in
+
let man = [
+
`S Manpage.s_description;
+
`P "Commands for managing the local file cache.";
+
] in
+
let info = Cmd.info "cache" ~doc ~man in
+
let term = Term.(const cache_main
+
$ cache_dir_arg $ version_arg
+
$ clear_cmd_arg $ stats_cmd_arg $ expire_cmd_arg
+
$ verbose_arg $ quiet_arg) in
+
Cmd.v info term
+
+
let registry_cmd =
+
let doc = "Manage and inspect the registry" in
+
let man = [
+
`S Manpage.s_description;
+
`P "Commands for working with registry files.";
+
] in
+
let info = Cmd.info "registry" ~doc ~man in
+
let term = Term.(const registry_main
+
$ base_url_arg $ cache_dir_arg $ version_arg
+
$ registry_file_arg $ registry_url_arg
+
$ list_cmd_arg $ verify_cmd_arg
+
$ verbose_arg $ quiet_arg) in
+
Cmd.v info term
+
+
let main_cmd =
+
let doc = "Toru - Data repository management" in
+
let man = [
+
`S Manpage.s_description;
+
`P "Toru is a data repository management tool compatible with Python Pooch.";
+
`P "It downloads, caches, and verifies data files from remote repositories.";
+
] in
+
let info = Cmd.info "toru" ~version:"%%VERSION%%" ~doc ~man in
+
let default = Term.(ret (const (`Help (`Pager, None)))) in
+
Cmd.group info ~default [
+
fetch_cmd;
+
fetch_all_cmd;
+
cache_cmd;
+
registry_cmd;
+
]