My agentic slop goes here. Not intended for anyone else!
at main 13 kB view raw
1(** Main Toru CLI tool for registry inspection and manipulation *) 2 3open Cmdliner 4open Toru 5 6(** Global options shared across commands *) 7type global_opts = { 8 app_name : string; 9 cache_dir : string option; 10 version : string option; 11 verbose_level : int; 12 env : Eio_unix.Stdenv.base; 13} 14 15(** Resolve cache directory using XDG if none specified *) 16let resolve_cache_dir app_name cache_dir = 17 match cache_dir with 18 | Some dir -> dir 19 | None -> 20 (* Try application-specific environment variable first *) 21 let app_env_var = String.uppercase_ascii app_name ^ "_CACHE_DIR" in 22 (match Sys.getenv_opt app_env_var with 23 | Some dir -> dir 24 | None -> 25 (* Use XDG cache directory as default *) 26 (match Sys.getenv_opt "XDG_CACHE_HOME" with 27 | Some dir -> Filename.concat dir app_name 28 | None -> 29 (match Sys.getenv_opt "HOME" with 30 | Some home -> Filename.concat (Filename.concat home ".cache") app_name 31 | None -> Filename.concat "/tmp" ("cache-" ^ app_name)))) 32 33let create_global_opts app_name cache_dir version verbose_level env = 34 (* Setup logging based on verbosity level *) 35 Toru.Logging.setup_logging verbose_level; 36 (* Resolve cache directory using XDG *) 37 let resolved_cache_dir = resolve_cache_dir app_name cache_dir in 38 { app_name; cache_dir = Some resolved_cache_dir; version; verbose_level; env } 39 40let global_opts_t eio_env = 41 let app_name = 42 let doc = "Application name for cache and config directories" in 43 Arg.(value & opt string "toru" & info ["app-name"; "a"] ~doc) 44 in 45 let cache_dir = 46 let doc = "Override default cache directory path (respects XDG_CACHE_HOME)" in 47 Arg.(value & opt (some string) None & info ["cache-dir"; "c"] ~doc) 48 in 49 let version = 50 let doc = "Version string for cache subdirectory organization" in 51 Arg.(value & opt (some string) None & info ["data-version"] ~doc) 52 in 53 let verbose_count = 54 let doc = "Verbose output (repeat for more verbosity: -v=info, -vv=debug)" in 55 Arg.(value & flag_all & info ["verbose"; "v"] ~doc) 56 in 57 Term.(const (fun app_name cache_dir version verbose_flags -> 58 create_global_opts app_name cache_dir version (List.length verbose_flags) eio_env 59 ) $ app_name $ cache_dir $ version $ verbose_count) 60 61(** Registry inspect command *) 62let inspect_cmd eio_env = 63 let registry_source = 64 let doc = "Registry file path or URL to inspect" in 65 Arg.(required & pos 0 (some string) None & info [] ~docv:"REGISTRY" ~doc) 66 in 67 68 let show_stats = 69 let doc = "Show detailed statistics about the registry" in 70 Arg.(value & flag & info ["stats"; "s"] ~doc) 71 in 72 73 let list_files = 74 let doc = "List all files in the registry" in 75 Arg.(value & flag & info ["list"; "l"] ~doc) 76 in 77 78 let search_pattern = 79 let doc = "Search for files matching this pattern" in 80 Arg.(value & opt (some string) None & info ["search"] ~docv:"PATTERN" ~doc) 81 in 82 83 let inspect global_opts registry_source show_stats list_files search_pattern = 84 try 85 Toru.Logging.Cli.debug (fun m -> m "Starting inspect function"); 86 87 Toru.Logging.Cli.info (fun m -> m "Loading registry from: %s" registry_source); 88 89 Toru.Logging.Cli.debug (fun m -> m "Determining if source is URL or file path"); 90 91 (* Load the registry *) 92 let registry = 93 if (String.starts_with ~prefix:"http://" registry_source || 94 String.starts_with ~prefix:"https://" registry_source) then ( 95 (* Definitely a URL *) 96 Toru.Logging.Cli.debug (fun m -> m "Detected URL, loading from network"); 97 Registry.load_from_url registry_source 98 ) else ( 99 (* Treat as file path *) 100 Toru.Logging.Cli.debug (fun m -> m "Detected file path, loading with Eio"); 101 (* Use Eio for file loading - this is the correct approach *) 102 Registry.load (Eio.Path.(global_opts.env#fs / registry_source)) 103 ) 104 in 105 106 Toru.Logging.Cli.debug (fun m -> m "Registry loaded successfully"); 107 108 let total_entries = Registry.size registry in 109 Printf.printf "Registry contains %d entries\n" total_entries; 110 111 if show_stats then ( 112 Printf.printf "\nRegistry Statistics:\n"; 113 Printf.printf "===================\n"; 114 let entries = Registry.entries registry in 115 116 (* Count by hash algorithm *) 117 let hash_counts = Hashtbl.create 8 in 118 List.iter (fun entry -> 119 let hash = Registry.hash entry in 120 let algo = Hash.algorithm hash in 121 let algo_str = Hash.algorithm_to_string algo in 122 let count = match Hashtbl.find_opt hash_counts algo_str with 123 | Some n -> n + 1 124 | None -> 1 125 in 126 Hashtbl.replace hash_counts algo_str count 127 ) entries; 128 129 Printf.printf "Hash algorithms:\n"; 130 Hashtbl.iter (fun algo count -> 131 Printf.printf " %s: %d entries\n" algo count 132 ) hash_counts; 133 134 (* File extension analysis *) 135 let ext_counts = Hashtbl.create 16 in 136 List.iter (fun entry -> 137 let filename = Registry.filename entry in 138 let ext = 139 match String.rindex_opt filename '.' with 140 | Some idx -> String.sub filename (idx + 1) (String.length filename - idx - 1) 141 | None -> "(no extension)" 142 in 143 let count = match Hashtbl.find_opt ext_counts ext with 144 | Some n -> n + 1 145 | None -> 1 146 in 147 Hashtbl.replace ext_counts ext count 148 ) entries; 149 150 Printf.printf "\nFile extensions:\n"; 151 Hashtbl.iter (fun ext count -> 152 Printf.printf " .%s: %d files\n" ext count 153 ) ext_counts 154 ); 155 156 if list_files then ( 157 Printf.printf "\nFiles in registry:\n"; 158 Printf.printf "==================\n"; 159 let entries = Registry.entries registry in 160 List.iter (fun entry -> 161 let filename = Registry.filename entry in 162 let hash = Registry.hash entry in 163 Printf.printf "%s %s\n" (Hash.value hash) filename 164 ) entries 165 ); 166 167 (match search_pattern with 168 | Some pattern -> 169 Printf.printf "\nFiles matching '%s':\n" pattern; 170 Printf.printf "=======================\n"; 171 let entries = Registry.entries registry in 172 let matches = List.filter (fun entry -> 173 let filename = Registry.filename entry in 174 String.contains filename (String.get pattern 0) || 175 String.length filename >= String.length pattern && 176 String.sub filename 0 (String.length pattern) = pattern 177 ) entries in 178 List.iter (fun entry -> 179 let filename = Registry.filename entry in 180 let hash = Registry.hash entry in 181 Printf.printf "%s %s\n" (Hash.value hash) filename 182 ) matches 183 | None -> ()); 184 185 `Ok () 186 with 187 | exn -> `Error (false, "Failed to inspect registry: " ^ (Printexc.to_string exn)) 188 in 189 190 let term eio_env = Term.(ret (const inspect $ global_opts_t eio_env $ registry_source $ show_stats $ list_files $ search_pattern)) in 191 let info = Cmd.info "inspect" ~doc:"Inspect a registry file or URL" in 192 Cmd.v info (term eio_env) 193 194(** Registry validate command *) 195let validate_cmd eio_env = 196 let registry_source = 197 let doc = "Registry file path or URL to validate" in 198 Arg.(required & pos 0 (some string) None & info [] ~docv:"REGISTRY" ~doc) 199 in 200 201 let check_hashes = 202 let doc = "Check if all hash formats are valid" in 203 Arg.(value & flag & info ["check-hashes"] ~doc) 204 in 205 206 let validate global_opts registry_source check_hashes = 207 try 208 Toru.Logging.Cli.info (fun m -> m "Validating registry: %s" registry_source); 209 210 let registry = 211 if (String.starts_with ~prefix:"http://" registry_source || 212 String.starts_with ~prefix:"https://" registry_source) then ( 213 Toru.Logging.Cli.debug (fun m -> m "Detected URL, using load_from_url"); 214 Registry.load_from_url registry_source 215 ) else ( 216 Toru.Logging.Cli.debug (fun m -> m "Detected file path, using Eio"); 217 (* Use Eio for file loading *) 218 Registry.load (Eio.Path.(global_opts.env#fs / registry_source)) 219 ) 220 in 221 222 let entries = Registry.entries registry in 223 let total = List.length entries in 224 Printf.printf "✓ Registry loaded successfully with %d entries\n" total; 225 226 if check_hashes then ( 227 Printf.printf "Validating hash formats...\n"; 228 let valid_count = ref 0 in 229 let invalid_count = ref 0 in 230 List.iter (fun entry -> 231 let filename = Registry.filename entry in 232 let hash = Registry.hash entry in 233 let hash_str = Hash.value hash in 234 let algo = Hash.algorithm hash in 235 let expected_len = match algo with 236 | SHA256 -> 64 237 | SHA1 -> 40 238 | MD5 -> 32 239 in 240 if String.length hash_str = expected_len then 241 incr valid_count 242 else ( 243 Printf.printf "✗ Invalid hash length for %s: expected %d chars, got %d\n" 244 filename expected_len (String.length hash_str); 245 incr invalid_count 246 ) 247 ) entries; 248 249 if !invalid_count = 0 then 250 Printf.printf "✓ All %d hash formats are valid\n" !valid_count 251 else 252 Printf.printf "✗ Found %d invalid hashes out of %d total\n" !invalid_count total 253 ); 254 255 `Ok () 256 with 257 | exn -> `Error (false, "Registry validation failed: " ^ (Printexc.to_string exn)) 258 in 259 260 let term eio_env = Term.(ret (const validate $ global_opts_t eio_env $ registry_source $ check_hashes)) in 261 let info = Cmd.info "validate" ~doc:"Validate a registry file format and integrity" in 262 Cmd.v info (term eio_env) 263 264(** Registry convert command *) 265let convert_cmd eio_env = 266 let input_registry = 267 let doc = "Input registry file path or URL" in 268 Arg.(required & pos 0 (some string) None & info [] ~docv:"INPUT" ~doc) 269 in 270 271 let output_file = 272 let doc = "Output registry file path" in 273 Arg.(required & pos 1 (some string) None & info [] ~docv:"OUTPUT" ~doc) 274 in 275 276 let convert global_opts input_registry output_file = 277 try 278 Toru.Logging.Cli.info (fun m -> m "Converting %s -> %s" input_registry output_file); 279 280 (* Load input registry *) 281 let registry = 282 if (String.starts_with ~prefix:"http://" input_registry || 283 String.starts_with ~prefix:"https://" input_registry) then ( 284 Toru.Logging.Cli.debug (fun m -> m "Detected URL, using load_from_url"); 285 Registry.load_from_url input_registry 286 ) else ( 287 Toru.Logging.Cli.debug (fun m -> m "Detected file path, using Eio"); 288 (* Use Eio for file loading *) 289 Registry.load (Eio.Path.(global_opts.env#fs / input_registry)) 290 ) 291 in 292 293 (* Save to output file using Eio *) 294 Registry.save (Eio.Path.(global_opts.env#fs / output_file)) registry; 295 296 let count = Registry.size registry in 297 Printf.printf "✓ Converted %d entries from %s to %s\n" count input_registry output_file; 298 299 `Ok () 300 with 301 | exn -> `Error (false, "Conversion failed: " ^ (Printexc.to_string exn)) 302 in 303 304 let term eio_env = Term.(ret (const convert $ global_opts_t eio_env $ input_registry $ output_file)) in 305 let info = Cmd.info "convert" ~doc:"Convert registry between different formats or sources" in 306 Cmd.v info (term eio_env) 307 308(** Main command *) 309let main_cmd env = 310 let doc = "Toru data repository management tool" in 311 let man = [ 312 `S Cmdliner.Manpage.s_description; 313 `P "Toru is an OCaml data repository manager compatible with Python Pooch registry files."; 314 `P "It provides automatic downloading, caching, and hash verification of data files from remote repositories."; 315 `S "ENVIRONMENT VARIABLES"; 316 `P "Toru respects the XDG Base Directory Specification:"; 317 `P "XDG_CACHE_HOME - Override default cache directory location"; 318 `P "TORU_CACHE_DIR - Application-specific cache directory override"; 319 `S Cmdliner.Manpage.s_examples; 320 `P "Inspect a registry file:"; 321 `P "$(b,toru inspect registry.txt)"; 322 `P "Show detailed statistics:"; 323 `P "$(b,toru inspect --stats registry.txt)"; 324 `P "Validate registry with hash checking:"; 325 `P "$(b,toru validate --check-hashes registry.txt)"; 326 ] in 327 let info = Cmd.info "toru" ~version:"0.1.0" ~doc ~man in 328 let default_term = Term.(ret (const (`Help (`Pager, None)))) in 329 Cmd.group info ~default:default_term [ 330 inspect_cmd env; 331 validate_cmd env; 332 convert_cmd env; 333 ] 334 335let () = 336 (* Run the entire CLI inside Eio_main.run *) 337 print_endline "initialising eio"; 338 Eio_main.run @@ fun env -> 339 print_endline "starting"; 340 exit (Cmd.eval (main_cmd env))