My agentic slop goes here. Not intended for anyone else!
at main 8.5 kB view raw
1(** CLI tool for generating Pooch-compatible registry files from directories *) 2 3open Cmdliner 4open Eio.Std 5 6(* CLI argument types *) 7type output_format = Pooch | JSON 8type path_format = Relative | Absolute 9 10(* CLI arguments *) 11let directory_arg = 12 let doc = "Directory to scan for files" in 13 Arg.(required & pos 0 (some dir) None & info [] ~docv:"DIRECTORY" ~doc) 14 15let output_arg = 16 let doc = "Output file for registry (default: stdout)" in 17 Arg.(value & pos 1 (some string) None & info [] ~docv:"OUTPUT" ~doc) 18 19let recursive_arg = 20 let doc = "Scan directories recursively" in 21 Arg.(value & flag & info ["r"; "recursive"] ~doc) 22 23let follow_symlinks_arg = 24 let doc = "Follow symbolic links during traversal" in 25 Arg.(value & flag & info ["L"; "follow-symlinks"] ~doc) 26 27let algorithm_arg = 28 let algorithms = [("sha256", Toru.Hash.SHA256); ("sha1", Toru.Hash.SHA1); ("md5", Toru.Hash.MD5)] in 29 let doc = "Hash algorithm to use: sha256, sha1, or md5" in 30 Arg.(value & opt (enum algorithms) Toru.Hash.SHA256 & info ["a"; "algorithm"] ~docv:"ALGO" ~doc) 31 32let exclude_arg = 33 let doc = "Exclude files matching glob pattern (can be repeated)" in 34 Arg.(value & opt_all string [] & info ["e"; "exclude"] ~docv:"PATTERN" ~doc) 35 36let include_hidden_arg = 37 let doc = "Include hidden files (starting with .)" in 38 Arg.(value & flag & info ["H"; "include-hidden"] ~doc) 39 40let update_arg = 41 let doc = "Update existing registry file instead of creating new one" in 42 Arg.(value & opt (some file) None & info ["u"; "update"] ~docv:"FILE" ~doc) 43 44let progress_arg = 45 let doc = "Show progress during scanning" in 46 Arg.(value & flag & info ["p"; "progress"] ~doc) 47 48let format_arg = 49 let formats = [("pooch", Pooch); ("json", JSON)] in 50 let doc = "Output format: pooch or json" in 51 Arg.(value & opt (enum formats) Pooch & info ["f"; "format"] ~docv:"FORMAT" ~doc) 52 53let path_format_arg = 54 let formats = [("relative", Relative); ("absolute", Absolute)] in 55 let doc = "Path format in output: relative or absolute" in 56 Arg.(value & opt (enum formats) Relative & info ["path-format"] ~docv:"FORMAT" ~doc) 57 58(* Progress reporting *) 59let create_progress_reporter show_progress = 60 if show_progress then ( 61 let last_update = ref (Unix.gettimeofday ()) in 62 fun filename current total -> 63 let now = Unix.gettimeofday () in 64 if now -. !last_update > 0.1 || current = total then ( 65 last_update := now; 66 let percentage = if total > 0 then (current * 100) / total else 0 in 67 Printf.eprintf "\r\027[K[%3d%%] %s (%d/%d)" percentage filename current total; 68 if current = total then Printf.eprintf "\n"; 69 flush stderr 70 ) 71 ) else ( 72 fun _ _ _ -> () 73 ) 74 75(* Output functions *) 76let output_registry format registry output_file = 77 let content = match format with 78 | Pooch -> 79 let header = Printf.sprintf "# Generated by toru-make-registry on %s\n# Algorithm: %s\n" 80 (Ptime.to_rfc3339 (Ptime_clock.now ())) 81 (Toru.Hash.algorithm_to_string Toru.Hash.SHA256) 82 in 83 header ^ Toru.Registry.to_string registry 84 | JSON -> 85 (* For JSON, we need enhanced entries *) 86 failwith "JSON output requires enhanced entries (not yet implemented in this path)" 87 in 88 match output_file with 89 | Some filename -> 90 let oc = open_out filename in 91 output_string oc content; 92 close_out oc; 93 Printf.printf "Registry written to %s\n" filename 94 | None -> 95 print_string content 96 97let output_enhanced_entries format enhanced_entries algorithm output_file = 98 let content = match format with 99 | Pooch -> 100 let header = Printf.sprintf "# Generated by toru-make-registry on %s\n# Algorithm: %s\n" 101 (Ptime.to_rfc3339 (Ptime_clock.now ())) 102 (Toru.Hash.algorithm_to_string algorithm) 103 in 104 let entries_str = String.concat "\n" (List.map (fun enhanced_entry -> 105 let entry = Toru.Make_registry.get_entry enhanced_entry in 106 let filename = Toru.Registry.filename entry in 107 let hash = Toru.Registry.hash entry in 108 Printf.sprintf "%s %s" filename (Toru.Hash.value hash) 109 ) enhanced_entries) in 110 header ^ entries_str ^ "\n" 111 | JSON -> 112 let json = Toru.Make_registry.enhanced_entries_to_json 113 ~algorithm ~generated:(Ptime_clock.now ()) enhanced_entries in 114 Yojson.Safe.pretty_to_string json 115 in 116 match output_file with 117 | Some filename -> 118 let oc = open_out filename in 119 output_string oc content; 120 close_out oc; 121 Printf.printf "Registry written to %s\n" filename 122 | None -> 123 print_string content 124 125(* Main function *) 126let make_registry_main directory output recursive follow_symlinks algorithm 127 excludes include_hidden update_file show_progress format path_format = 128 129 Eio_main.run @@ fun env -> 130 Eio.Switch.run @@ fun sw -> 131 try 132 let dir_path = env#fs |> Eio.Path.(fun fs -> fs / directory) in 133 134 let options = { 135 Toru.Make_registry.recursive; 136 follow_symlinks; 137 hash_algorithm = algorithm; 138 exclude_patterns = excludes; 139 include_hidden; 140 } in 141 142 let progress_fn = create_progress_reporter show_progress in 143 144 let result = match update_file with 145 | Some update_filename -> 146 (* Update existing registry *) 147 let existing_registry = 148 let update_path = env#fs |> Eio.Path.(fun fs -> fs / update_filename) in 149 Toru.Registry.load update_path 150 in 151 if show_progress then Printf.eprintf "Updating registry from %s...\n" update_filename; 152 let updated_registry = Toru.Make_registry.update_registry ~sw ~env ~options 153 existing_registry dir_path in 154 output_registry format updated_registry output; 155 Ok () 156 157 | None -> 158 (* Create new registry *) 159 if show_progress then Printf.eprintf "Scanning directory %s...\n" directory; 160 let enhanced_entries = Toru.Make_registry.scan_directory_enhanced ~sw ~env ~options dir_path in 161 162 (* Apply path format conversion if needed *) 163 let processed_entries = match path_format with 164 | Relative -> enhanced_entries 165 | Absolute -> 166 List.map (fun enhanced_entry -> 167 let metadata = Toru.Make_registry.get_metadata enhanced_entry in 168 let entry = Toru.Make_registry.get_entry enhanced_entry in 169 let abs_filename = metadata.absolute_path in 170 let abs_entry = Toru.Registry.create_entry 171 ~filename:abs_filename 172 ~hash:(Toru.Registry.hash entry) () in 173 Toru.Make_registry.update_entry enhanced_entry abs_entry 174 ) enhanced_entries 175 in 176 177 output_enhanced_entries format processed_entries algorithm output; 178 Ok () 179 in 180 181 match result with 182 | Ok () -> 0 183 | Error msg -> 184 Printf.eprintf "Error: %s\n" msg; 185 1 186 187 with 188 | exn -> 189 Printf.eprintf "Error: %s\n" (Printexc.to_string exn); 190 1 191 192(* Command definition *) 193let cmd = 194 let doc = "Generate Pooch-compatible registry files from directories" in 195 let man = [ 196 `S Manpage.s_description; 197 `P "$(tname) scans directories and generates registry files compatible with Python Pooch library."; 198 `P "The registry format is: 'filename hash' per line, with optional comments starting with #."; 199 `S Manpage.s_examples; 200 `P "Generate registry for data directory:"; 201 `P "$(tname) data/ registry.txt"; 202 `P ""; 203 `P "Recursive scan with SHA256 and exclude patterns:"; 204 `P "$(tname) -r -a sha256 -e '*.tmp' -e '*.log' ./dataset/"; 205 `P ""; 206 `P "Update existing registry with progress:"; 207 `P "$(tname) --update existing.txt --progress data/"; 208 `P ""; 209 `P "Generate JSON format with absolute paths:"; 210 `P "$(tname) --format json --path-format absolute data/"; 211 ] in 212 213 let info = Cmd.info "toru-make-registry" ~version:"1.0" ~doc ~man in 214 215 Cmd.v info Term.(const make_registry_main 216 $ directory_arg $ output_arg $ recursive_arg $ follow_symlinks_arg 217 $ algorithm_arg $ exclude_arg $ include_hidden_arg $ update_arg 218 $ progress_arg $ format_arg $ path_format_arg) 219 220let () = exit (Cmd.eval cmd)