My agentic slop goes here. Not intended for anyone else!

more

+1
stack/dune-project
···
+
(lang dune 3.20)
+12 -1
stack/toru/bin/dune
···
+
(executable
+
(public_name toru)
+
(name toru_cli)
+
(modules toru_cli)
+
(libraries toru.cmd))
+
(executable
(public_name tessera-loader)
(name tessera_loader)
+
(modules tessera_loader)
(libraries toru cmdliner unix))
(executable
(public_name toru-cache)
(name toru_cache)
+
(modules toru_cache)
(libraries toru cmdliner yojson fmt fmt.tty ptime ptime.clock.os unix eio_main))
(executable
(public_name toru-make-registry-simple)
(name toru_make_registry_simple)
+
(modules toru_make_registry_simple)
(libraries toru cmdliner ptime ptime.clock.os eio_main))
(executable
-
(public_name toru)
+
(public_name toru-main)
(name toru_main)
+
(modules toru_main)
(libraries toru cmdliner unix eio_main logs fmt xdge))
;; Complex version with enhanced features (disabled until field access is resolved)
;; (executable
;; (public_name toru-make-registry)
;; (name toru_make_registry)
+
;; (modules toru_make_registry)
;; (libraries toru cmdliner yojson ptime ptime.clock.os unix eio_main))
+6
stack/toru/bin/toru_cli.ml
···
+
(** Toru CLI - Command-line interface for Toru data management *)
+
+
module Cmd = Toru_cmd.Cmd
+
+
let () =
+
exit (Cmdliner.Cmd.eval' Cmd.main_cmd)
+90 -109
stack/toru/lib/toru/cache.ml
···
-
(** Cache module for managing local file storage
-
Uses the simplified cacheio library for storage *)
+
(** Cache management for Toru using cacheio directly
-
open Eio
-
-
(** File info: size in bytes and modification time *)
-
type file_info = {
-
size: int64;
-
mtime: float;
-
}
-
-
(** Cache usage statistics *)
-
type usage_stats = {
-
total_size: int64;
-
file_count: int;
-
oldest: float;
-
newest: float;
-
}
+
This module provides a thin layer over cacheio for Toru-specific
+
cache operations, maintaining compatibility with Pooch's cache structure.
+
*)
type t = {
cacheio : Cacheio.t;
-
base_path : Fs.dir_ty Path.t;
+
base_path : Eio.Fs.dir_ty Eio.Path.t;
version : string option;
}
+
+
(** {1 Creation} *)
let create ~sw:_ ~fs ?version path_str =
-
let base_path = Path.(fs / path_str) in
+
let base_path = Eio.Path.(fs / path_str) in
(* Ensure base directory exists *)
-
(try Path.mkdir ~perm:0o755 base_path with _ -> ());
+
Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 base_path;
-
(* If version is specified, create versioned subdirectory *)
+
(* Create versioned subdirectory if specified *)
let cache_dir = match version with
| Some v ->
-
let versioned = Path.(base_path / v) in
-
(try Path.mkdir ~perm:0o755 versioned with _ -> ());
+
let versioned = Eio.Path.(base_path / v) in
+
Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 versioned;
versioned
| None -> base_path
in
···
let cacheio = Cacheio.create ~base_dir:cache_dir in
{ cacheio; base_path; version }
-
let default_cache_path ?app_name () =
-
let app_name = Option.value app_name ~default:"toru" in
-
(* Use XDG_CACHE_HOME if set, otherwise ~/.cache *)
-
let cache_home =
-
match Sys.getenv_opt "XDG_CACHE_HOME" with
-
| Some dir -> dir
-
| None ->
-
match Sys.getenv_opt "HOME" with
-
| Some home -> Filename.concat home ".cache"
-
| None -> "/tmp/.cache"
-
in
-
Filename.concat cache_home app_name
+
let default_cache_path ?(app_name="toru") () =
+
(* Use XDG_CACHE_HOME if set, otherwise platform-specific defaults *)
+
match Sys.getenv_opt "XDG_CACHE_HOME" with
+
| Some dir -> Filename.concat dir app_name
+
| None ->
+
match Sys.getenv_opt "HOME" with
+
| Some home ->
+
if Sys.os_type = "Win32" || Sys.os_type = "Cygwin" then
+
Filename.concat home (Filename.concat "AppData" (Filename.concat "Local" app_name))
+
else
+
Filename.concat home (Filename.concat ".cache" app_name)
+
| None -> Filename.concat "/tmp" app_name
+
+
(** {1 Accessors} *)
let base_path t = t.base_path
let version t = t.version
+
let cacheio t = t.cacheio
-
let cache_path t =
-
match t.version with
-
| Some v -> Path.(t.base_path / v)
-
| None -> t.base_path
+
(** {1 File Operations} *)
let file_path t filename =
-
(* For compatibility, we need to handle files that might be stored with their path structure *)
-
(* The new cacheio will hash the entire filename including any path separators *)
-
Path.(cache_path t / filename)
+
(* For Pooch compatibility, preserve the filename structure *)
+
Eio.Path.(
+
match t.version with
+
| Some v -> t.base_path / v / filename
+
| None -> t.base_path / filename
+
)
let exists t filename =
Cacheio.exists t.cacheio ~key:filename
-
let ensure_dir _ =
-
(* Base directory is already ensured during creation *)
-
()
+
let get_stream t ~sw filename =
+
Cacheio.get t.cacheio ~key:filename ~sw
+
+
let put_stream t ~source filename =
+
(* Store with no TTL for permanent caching *)
+
Cacheio.put t.cacheio ~key:filename ~source ()
+
+
let delete t filename =
+
Cacheio.delete t.cacheio ~key:filename
+
+
let size t filename =
+
Cacheio.size t.cacheio ~key:filename
+
+
(** {1 Cache Management} *)
+
+
let ensure_dir t =
+
(* Directory is ensured during creation *)
+
ignore t
let clear t =
-
(* Clear all non-pinned entries *)
Cacheio.clear t.cacheio
+
let stats t =
+
Cacheio.stats t.cacheio
+
+
let expire t =
+
Cacheio.expire t.cacheio
+
+
(** {1 Chunked Download Support} *)
+
+
let put_chunk t ~filename ~range ~source =
+
Cacheio.put_chunk t.cacheio ~key:filename ~range ~source ()
+
+
let get_chunks t ~filename =
+
Cacheio.list_chunks t.cacheio ~key:filename
+
+
let has_complete_chunks t ~filename ~total_size =
+
Cacheio.has_complete_chunks t.cacheio ~key:filename ~total_size
+
+
let missing_ranges t ~filename ~total_size =
+
let ranges = Cacheio.missing_ranges t.cacheio ~key:filename ~total_size in
+
List.map (fun range ->
+
(Cacheio.Range.start range, Cacheio.Range.end_ range)
+
) ranges
+
+
let coalesce_chunks t ~filename =
+
(* Mark as pinned when coalescing *)
+
let flags = Cacheio.Flags.add `Pinned Cacheio.Flags.empty in
+
Cacheio.coalesce_chunks t.cacheio ~key:filename ~flags ()
+
+
(** {1 Utilities} *)
+
let size_bytes t =
let stats = Cacheio.stats t.cacheio in
Cacheio.Stats.total_size stats
let list_files t =
let entries = Cacheio.scan t.cacheio in
-
(* Return the original keys (which are hashes, but we'll treat them as filenames) *)
-
List.map Cacheio.Entry.key entries
+
List.map (fun entry -> Cacheio.Entry.key entry) entries
-
(** Get file information *)
-
let file_info t filename =
-
(* Use scan to find the entry and get its info *)
-
let entries = Cacheio.scan t.cacheio in
-
match List.find_opt (fun e -> Cacheio.Entry.key e = filename) entries with
-
| None -> None
-
| Some entry -> Some { size = Cacheio.Entry.size entry; mtime = Cacheio.Entry.mtime entry }
+
(** {1 Pretty Printing} *)
-
(** Get usage statistics *)
-
let usage_stats t =
-
let entries = Cacheio.scan t.cacheio in
-
let total_size = List.fold_left (fun acc e -> Int64.add acc (Cacheio.Entry.size e)) 0L entries in
-
let file_count = List.length entries in
-
let times = List.map Cacheio.Entry.mtime entries in
-
let oldest = List.fold_left min Float.max_float times in
-
let newest = List.fold_left max Float.min_float times in
-
{ total_size; file_count; oldest; newest }
-
-
(* Internal helper functions removed - functionality merged into public API functions *)
-
-
(** Remove oldest files to fit within size limit *)
-
let trim_to_size t max_bytes =
-
let rec trim_until current_size =
-
if current_size <= max_bytes then ()
-
else
-
let entries = Cacheio.scan t.cacheio in
-
(* Sort by mtime, oldest first, excluding pinned *)
-
let unpinned = List.filter (fun e ->
-
not (Cacheio.Entry.is_pinned e)
-
) entries in
-
let sorted = List.sort Cacheio.Entry.compare_by_mtime unpinned in
-
match sorted with
-
| [] -> () (* Nothing left to remove *)
-
| oldest :: _ ->
-
Cacheio.delete t.cacheio ~key:(Cacheio.Entry.key oldest);
-
let new_size = Int64.sub current_size (Cacheio.Entry.size oldest) in
-
trim_until new_size
-
in
-
let current = size_bytes t in
-
trim_until current
-
-
(** Remove files older than N days *)
-
let trim_by_age t days =
-
let cutoff = Unix.time () -. (days *. 86400.) in
-
let open Cacheio in
-
let entries = scan t.cacheio in
-
List.iter (fun e ->
-
if Cacheio.Entry.mtime e < cutoff && not (Cacheio.Entry.is_pinned e) then
-
Cacheio.delete t.cacheio ~key:(Cacheio.Entry.key e)
-
) entries
-
-
(** Remove empty directories and broken links *)
-
let vacuum t =
-
(* For now, just expire old entries - cacheio handles cleanup *)
-
let _expired = Cacheio.expire t.cacheio in
-
()
-
-
(** Pretty printer for cache *)
let pp fmt t =
-
let stats = usage_stats t in
+
let stats = Cacheio.stats t.cacheio in
+
let entry_count = Cacheio.Stats.entry_count stats in
+
let total_size = Cacheio.Stats.total_size stats in
Format.fprintf fmt "Cache[path=%s, version=%s, files=%d, size=%Ld]"
-
(Path.native_exn t.base_path)
+
(Eio.Path.native_exn t.base_path)
(Option.value t.version ~default:"none")
-
stats.file_count
-
stats.total_size
+
entry_count
+
total_size
+44 -34
stack/toru/lib/toru/cache.mli
···
-
(** Cache module for managing local file storage *)
+
(** Cache module for managing local file storage using cacheio *)
(** Abstract cache type *)
type t
-
(** {1 Construction} *)
+
(** {1 Creation} *)
(** Create cache with explicit path *)
val create : sw:Eio.Switch.t -> fs:Eio.Fs.dir_ty Eio.Path.t ->
?version:string -> string -> t
-
(** {1 Field accessors} *)
+
(** {1 Accessors} *)
(** Get base path of cache *)
val base_path : t -> Eio.Fs.dir_ty Eio.Path.t
···
(** Get version string (if any) *)
val version : t -> string option
-
(** {1 Operations} *)
+
(** Get underlying cacheio instance *)
+
val cacheio : t -> Cacheio.t
+
+
(** {1 File Operations} *)
(** Get full path for a filename within cache *)
val file_path : t -> string -> Eio.Fs.dir_ty Eio.Path.t
···
(** Check if file exists in cache *)
val exists : t -> string -> bool
+
(** Get stream for reading cached file *)
+
val get_stream : t -> sw:Eio.Switch.t -> string -> Eio.Flow.source_ty Eio.Resource.t option
+
+
(** Put stream to cache *)
+
val put_stream : t -> source:Eio.Flow.source_ty Eio.Resource.t -> string -> unit
+
+
(** Delete file from cache *)
+
val delete : t -> string -> unit
+
+
(** Get size of cached file *)
+
val size : t -> string -> int64 option
+
+
(** {1 Cache Management} *)
+
(** Ensure cache directory exists *)
val ensure_dir : t -> unit
(** Clear all files from cache *)
val clear : t -> unit
-
(** Get total size of cache in bytes *)
-
val size_bytes : t -> int64
+
(** Get cache statistics *)
+
val stats : t -> Cacheio.Stats.t
-
(** List all files in cache *)
-
val list_files : t -> string list
-
-
(** {1 Cache Management} *)
-
-
(** File info: size in bytes and modification time *)
-
type file_info = {
-
size: int64;
-
mtime: float;
-
}
+
(** Expire old cache entries *)
+
val expire : t -> int
-
(** Cache usage statistics *)
-
type usage_stats = {
-
total_size: int64;
-
file_count: int;
-
oldest: float;
-
newest: float;
-
}
+
(** {1 Chunked Download Support} *)
-
(** Remove oldest files to fit within size limit *)
-
val trim_to_size : t -> int64 -> unit
+
(** Put a chunk to cache *)
+
val put_chunk : t -> filename:string -> range:Cacheio.Range.t ->
+
source:Eio.Flow.source_ty Eio.Resource.t -> unit
-
(** Remove files older than N days *)
-
val trim_by_age : t -> float -> unit
+
(** Get list of chunks for a file *)
+
val get_chunks : t -> filename:string -> Cacheio.Chunk.t list
-
(** Get file info (size and mtime) *)
-
val file_info : t -> string -> file_info option
+
(** Check if all chunks are present *)
+
val has_complete_chunks : t -> filename:string -> total_size:int64 -> bool
-
(** Get cache usage statistics *)
-
val usage_stats : t -> usage_stats
+
(** Get missing byte ranges *)
+
val missing_ranges : t -> filename:string -> total_size:int64 -> (int64 * int64) list
-
(** Remove empty directories and broken links *)
-
val vacuum : t -> unit
+
(** Coalesce chunks into complete file *)
+
val coalesce_chunks : t -> filename:string -> (unit, exn) result Eio.Promise.t
(** {1 Utilities} *)
(** Get default cache path for application *)
val default_cache_path : ?app_name:string -> unit -> string
+
(** Get total size of cache in bytes *)
+
val size_bytes : t -> int64
+
+
(** List all files in cache *)
+
val list_files : t -> string list
+
(** {1 Pretty printing} *)
(** Pretty printer for cache *)
-
val pp : Format.formatter -> t -> unit
+
val pp : Format.formatter -> t -> unit
+221 -292
stack/toru/lib/toru/downloader.ml
···
-
(** Downloader module - now using requests library for HTTP operations *)
+
(** HTTP downloader module using requests library
+
+
This module provides download functionality for Toru,
+
leveraging the requests library for all HTTP operations.
+
*)
+
+
let src = Logs.Src.create "toru.downloader" ~doc:"Toru downloader"
+
module Log = (val Logs.src_log src : Logs.LOG)
+
+
(** {1 Progress Reporting} *)
-
module Progress_reporter = struct
+
module Progress = struct
type t = {
name : string;
total_bytes : int64 option;
mutable current_bytes : int64;
-
mutable last_percent : int;
+
mutable last_report_time : float;
+
mutable last_report_bytes : int64;
}
let create ?total_bytes name =
-
Printf.printf "Starting download: %s\n%!" name;
-
{ name; total_bytes; current_bytes = 0L; last_percent = -1 }
+
Log.info (fun m -> m "Starting download: %s" name);
+
{
+
name;
+
total_bytes;
+
current_bytes = 0L;
+
last_report_time = Unix.gettimeofday ();
+
last_report_bytes = 0L;
+
}
let update t bytes =
t.current_bytes <- bytes;
-
match t.total_bytes with
-
| Some total when total > 0L ->
-
let percent = Int64.(to_int (div (mul bytes 100L) total)) in
-
if percent > t.last_percent && percent mod 10 = 0 then (
-
t.last_percent <- percent;
-
Printf.printf "\r%s: %d%% (%Ld/%Ld bytes)%!"
-
t.name percent bytes total
-
)
-
| _ ->
-
(* Unknown size, just show bytes downloaded *)
-
if Int64.rem bytes 1048576L = 0L then (* Every MB *)
-
Printf.printf "\r%s: %Ld bytes downloaded%!" t.name bytes
+
let now = Unix.gettimeofday () in
+
let time_diff = now -. t.last_report_time in
-
let finish t =
-
Printf.printf "\n%s: Complete (%Ld bytes)\n%!" t.name t.current_bytes
-
end
+
(* Report every second *)
+
if time_diff >= 1.0 then begin
+
let bytes_diff = Int64.sub bytes t.last_report_bytes in
+
let speed = Int64.to_float bytes_diff /. time_diff in
-
module Config = struct
-
type auth = {
-
username : string option;
-
password : string option;
-
}
-
end
+
match t.total_bytes with
+
| Some total when total > 0L ->
+
let percent = Int64.(to_int (div (mul bytes 100L) total)) in
+
Log.info (fun m -> m "%s: %d%% (%Ld/%Ld bytes) - %.1f KB/s"
+
t.name percent bytes total (speed /. 1024.0))
+
| _ ->
+
Log.info (fun m -> m "%s: %Ld bytes downloaded - %.1f KB/s"
+
t.name bytes (speed /. 1024.0));
-
module type DOWNLOADER = sig
-
type t
+
t.last_report_time <- now;
+
t.last_report_bytes <- bytes
+
end
-
val create : sw:Eio.Switch.t -> env:Eio_unix.Stdenv.base ->
-
?auth:Config.auth -> unit -> t
-
-
val download : t ->
-
url:string ->
-
dest:Eio.Fs.dir_ty Eio.Path.t ->
-
?hash:Hash.t ->
-
?progress:Progress_reporter.t ->
-
?resume:bool ->
-
unit -> (unit, string) result
-
-
val supports_resume : t -> bool
-
val name : t -> string
+
let finish t =
+
Log.info (fun m -> m "%s: Download complete (%Ld bytes)" t.name t.current_bytes)
end
-
(** HTTP downloader using requests library *)
-
module Http_downloader = struct
-
type t = {
-
sw : Eio.Switch.t;
-
env : Eio_unix.Stdenv.base;
-
net : Eio_unix.Stdenv.base;
-
auth : Requests.Auth.t option;
-
}
+
(** {1 Downloader Types} *)
-
let create ~sw ~env ?auth () =
-
(* Convert auth config to Requests.Auth.t *)
-
let auth = Option.map (fun a ->
-
match a.Config.username, a.Config.password with
-
| Some u, Some p -> Requests.Auth.basic ~username:u ~password:p
-
| _ -> Requests.Auth.none
-
) auth in
+
type config = {
+
timeout : float;
+
max_redirects : int;
+
retry : Requests.Retry.config option;
+
auth : Requests.Auth.t option;
+
headers : Requests.Headers.t;
+
verify_tls : bool;
+
}
-
{ sw; env; net = env; auth }
+
type ('clock, 'net) t_impl = {
+
session : ('clock, 'net) Requests.t;
+
config : config;
+
}
-
let download t ~url ~dest ?hash ?progress ?(resume=false) () =
-
let _ = resume in (* TODO: implement resume support with range requests *)
-
try
-
let uri = Uri.of_string url in
-
let dest_path = Eio.Path.native_exn dest in
+
type t = T : ('clock Eio.Time.clock, 'net Eio.Net.t) t_impl -> t
-
(* Create a client for this request *)
-
let client = Requests.create
-
~tls_config:(Requests.Tls.default ())
-
~clock:t.env#clock
-
t.env#net
-
in
+
(** {1 Configuration} *)
-
(* Configure request *)
-
let config =
-
let base_config = Requests.Config.create
-
~timeout:300.0
-
~follow_redirects:true
-
~max_redirects:10
-
() in
-
match t.auth with
-
| Some auth -> Requests.Config.create ~auth ~timeout:300.0 ~follow_redirects:true ()
-
| None -> base_config
-
in
-
-
(* Download the file *)
-
Requests.download_file ~sw:t.sw client ~config uri ~path:dest;
-
-
(* Update progress if provided *)
-
Option.iter (fun p ->
-
(* Get file size for final progress update *)
-
let stat = Eio.Path.stat ~follow:false dest in
-
let size = Optint.Int63.to_int64 stat.size in
-
Progress_reporter.update p size;
-
Progress_reporter.finish p
-
) progress;
-
-
(* Verify hash if provided *)
-
match hash with
-
| Some h ->
-
if Hash.verify dest h then
-
Ok ()
-
else
-
Error ("Hash verification failed for " ^ dest_path)
-
| None -> Ok ()
-
-
with
-
| Requests.Request_error err ->
-
Error (Format.asprintf "HTTP download failed: %a" Requests.pp_error err)
-
| exn ->
-
Error ("Download failed: " ^ Printexc.to_string exn)
-
-
let supports_resume _ = false (* Requests library doesn't support resume yet *)
-
let name _ = "http"
-
end
+
let default_config = {
+
timeout = 300.0;
+
max_redirects = 10;
+
retry = Some (Requests.Retry.create_config ~max_retries:3 ~backoff_factor:1.0 ());
+
auth = None;
+
headers = Requests.Headers.empty;
+
verify_tls = true;
+
}
-
(** Legacy wget downloader for systems that prefer external tools *)
-
module Wget_downloader = struct
-
type t = {
-
sw : Eio.Switch.t;
-
env : Eio_unix.Stdenv.base;
-
auth : Config.auth option;
-
timeout : float;
+
let create_config ?timeout ?max_redirects ?retry ?auth ?headers ?verify_tls () =
+
let base = default_config in
+
{
+
timeout = Option.value timeout ~default:base.timeout;
+
max_redirects = Option.value max_redirects ~default:base.max_redirects;
+
retry = Option.value retry ~default:base.retry;
+
auth = Option.value auth ~default:base.auth;
+
headers = Option.value headers ~default:base.headers;
+
verify_tls = Option.value verify_tls ~default:base.verify_tls;
}
-
let create ~sw ~env ?auth () = { sw; env; auth; timeout = 300.0 }
+
(** {1 Creation} *)
-
let download t ~url ~dest ?hash ?progress:_ ?(resume=true) () =
-
let dest_path = Eio.Path.native_exn dest in
+
let create ~sw ~env ?config () =
+
let config = Option.value config ~default:default_config in
-
(* Build wget arguments *)
-
let args = [
-
"--quiet";
-
"--show-progress";
-
"--timeout=300";
-
"--tries=3";
-
"--output-document=" ^ dest_path;
-
] in
+
(* Create requests session with configuration *)
+
let session = Requests.create
+
~sw
+
~default_headers:config.headers
+
?auth:config.auth
+
~timeout:(Requests.Timeout.create ~total:config.timeout ())
+
~follow_redirects:true
+
~max_redirects:config.max_redirects
+
~verify_tls:config.verify_tls
+
?retry:config.retry
+
env
+
in
-
(* Add authentication if provided *)
-
let args = Option.fold t.auth ~none:args ~some:(fun auth ->
-
let user_arg = Option.map (fun u -> "--user=" ^ u) auth.Config.username
-
|> Option.to_list in
-
let pass_arg = Option.map (fun p -> "--password=" ^ p) auth.Config.password
-
|> Option.to_list in
-
user_arg @ pass_arg @ args) in
+
T { session; config }
-
(* Add resume support if enabled *)
-
let args = if resume then "--continue" :: args else args in
+
(** {1 Download Operations} *)
-
(* Add URL as last argument *)
-
let args = args @ [url] in
+
let download_to_stream (T t) ~url ~sink ?progress ?range () =
+
try
+
(* Add range header if resuming *)
+
let headers = match range with
+
| Some (start, end_) ->
+
let range_value = match end_ with
+
| Some e -> Printf.sprintf "bytes=%Ld-%Ld" start e
+
| None -> Printf.sprintf "bytes=%Ld-" start
+
in
+
Requests.Headers.of_list ["Range", range_value]
+
| None -> Requests.Headers.empty
+
in
-
(* Build command line with wget command *)
-
let cmd_args = "wget" :: args in
+
(* Make the request *)
+
let response = Requests.get t.session ~headers url in
-
try
-
(* Run wget using Eio process manager *)
-
let cmd_line = String.concat " " (List.map Filename.quote cmd_args) in
-
let process = Eio.Process.spawn t.env#process_mgr ~sw:t.sw
-
~executable:"/bin/sh" ["/bin/sh"; "-c"; cmd_line] in
-
let exit_status = Eio.Process.await process in
-
if exit_status <> `Exited 0 then
-
let error_msg = match exit_status with
-
| `Exited n -> Printf.sprintf "wget exited with code %d" n
-
| `Signaled n -> Printf.sprintf "wget killed by signal %d" n in
-
Error ("Download failed: " ^ error_msg)
-
else (
-
(* Verify hash if provided *)
-
match hash with
-
| Some h ->
-
if Hash.verify dest h then
-
Ok ()
-
else
-
Error ("Hash verification failed for " ^ dest_path)
-
| None -> Ok ()
-
)
-
with
-
| exn -> Error ("wget failed: " ^ (Printexc.to_string exn))
+
(* Check status *)
+
let status = Requests.Response.status response in
+
if not (Requests.Status.is_success status || Requests.Status.to_int status = 206) then
+
Error (Printf.sprintf "HTTP %d: %s" (Requests.Status.to_int status) (Requests.Status.to_string status))
+
else begin
+
(* Progress reporting is initialized with total_bytes at creation *)
+
(* We could get content-length from headers but progress is already set up *)
+
let _content_length =
+
Requests.Response.headers response
+
|> Requests.Headers.get "content-length"
+
|> Option.map Int64.of_string
+
in
-
let supports_resume _ = true
-
let name _ = "wget"
-
end
-
-
(** Legacy curl downloader for systems that prefer external tools *)
-
module Curl_downloader = struct
-
type t = {
-
sw : Eio.Switch.t;
-
env : Eio_unix.Stdenv.base;
-
auth : Config.auth option;
-
timeout : float;
-
}
-
-
let create ~sw ~env ?auth () = { sw; env; auth; timeout = 300.0 }
-
-
let download t ~url ~dest ?hash ?progress:_ ?(resume=true) () =
-
let dest_path = Eio.Path.native_exn dest in
+
(* Stream the body to sink *)
+
let body = Requests.Response.body response in
+
Eio.Flow.copy body sink;
+
(* Progress reporting would need chunk-by-chunk reading which isn't available *)
+
Option.iter Progress.finish progress;
+
Ok ()
+
end
+
with
+
| exn ->
+
Error (Printf.sprintf "Download failed: %s" (Printexc.to_string exn))
-
(* Build curl arguments *)
-
let args = [
-
"--silent";
-
"--show-error";
-
"--location";
-
"--max-time"; "300";
-
"--retry"; "3";
-
"--output"; dest_path;
-
] in
+
let download (T t as downloader) ~url ~dest ?hash ?progress ?resume () =
+
try
+
(* Check if we should resume *)
+
let start_offset = match resume with
+
| Some true when Eio.Path.is_file dest ->
+
let stat = Eio.Path.stat ~follow:false dest in
+
Some (Optint.Int63.to_int64 stat.size)
+
| _ -> None
+
in
-
(* Add authentication if provided *)
-
let args = Option.fold t.auth ~none:args ~some:(fun auth ->
-
let auth_str = match auth.Config.username, auth.Config.password with
-
| Some user, Some pass -> Some (user ^ ":" ^ pass)
-
| Some user, None -> Some user
-
| None, _ -> None in
-
match auth_str with
-
| Some s -> "--user" :: s :: args
-
| None -> args) in
+
(* Open file for writing (append if resuming) *)
+
Eio.Switch.run @@ fun sw ->
+
let sink = match start_offset with
+
| Some _ ->
+
(* For append, open existing file and seek to end *)
+
let file = Eio.Path.open_out ~sw ~create:`Never dest in
+
let (_ : Optint.Int63.t) = Eio.File.seek file (Optint.Int63.zero) `End in
+
file
+
| None ->
+
Eio.Path.open_out ~sw ~create:(`Or_truncate 0o644) dest
+
in
-
(* Add resume support if enabled *)
-
let args = if resume then "--continue-at" :: "-" :: args else args in
+
(* Set range if resuming *)
+
let range = Option.map (fun offset -> (offset, None)) start_offset in
-
(* Add URL as last argument *)
-
let args = args @ [url] in
+
(* Get content length if needed for progress *)
+
let progress = match progress with
+
| Some p when p.Progress.total_bytes = None ->
+
(* Try to get content length *)
+
(try
+
let response = Requests.head t.session url in
+
let headers = Requests.Response.headers response in
+
match Requests.Headers.get "content-length" headers with
+
| Some len -> Some (Progress.create ~total_bytes:(Int64.of_string len) p.Progress.name)
+
| None -> Some p
+
with _ -> Some p)
+
| other -> other
+
in
-
(* Build command line with curl command *)
-
let cmd_args = "curl" :: args in
+
(* Download to the file *)
+
let result = download_to_stream downloader ~url ~sink ?progress ?range () in
+
Eio.Flow.close sink;
-
try
-
(* Run curl using Eio process manager *)
-
let cmd_line = String.concat " " (List.map Filename.quote cmd_args) in
-
let process = Eio.Process.spawn t.env#process_mgr ~sw:t.sw
-
~executable:"/bin/sh" ["/bin/sh"; "-c"; cmd_line] in
-
let exit_status = Eio.Process.await process in
-
if exit_status <> `Exited 0 then
-
let error_msg = match exit_status with
-
| `Exited n -> Printf.sprintf "curl exited with code %d" n
-
| `Signaled n -> Printf.sprintf "curl killed by signal %d" n in
-
Error ("Download failed: " ^ error_msg)
-
else (
-
(* Verify hash if provided *)
+
(* Verify hash if provided *)
+
match result with
+
| Error e -> Error e
+
| Ok () ->
match hash with
| Some h ->
-
if Hash.verify dest h then
-
Ok ()
-
else
-
Error ("Hash verification failed for " ^ dest_path)
+
if Hash.verify dest h then Ok ()
+
else Error "Hash verification failed"
| None -> Ok ()
-
)
-
with
-
| exn -> Error ("curl failed: " ^ (Printexc.to_string exn))
-
-
let supports_resume _ = true
-
let name _ = "curl"
-
end
-
-
(** Cohttp_downloader is now an alias for Http_downloader *)
-
module Cohttp_downloader = Http_downloader
+
with
+
| exn ->
+
Error (Printf.sprintf "Download failed: %s" (Printexc.to_string exn))
-
module Downloaders = struct
-
let wget () = (module Wget_downloader : DOWNLOADER)
-
let curl () = (module Curl_downloader : DOWNLOADER)
-
let cohttp () = (module Http_downloader : DOWNLOADER)
-
let http () = (module Http_downloader : DOWNLOADER)
+
(** {1 Chunked Download Support} *)
-
let detect_available ~env =
-
let check_command cmd =
-
try
-
let sw = Eio.Switch.run @@ fun sw ->
-
let process = Eio.Process.spawn env#process_mgr ~sw
-
~executable:"/bin/sh" ["/bin/sh"; "-c"; cmd ^ " --version > /dev/null 2>&1"] in
-
let exit_status = Eio.Process.await process in
-
exit_status = `Exited 0
-
in sw
-
with _ -> false
+
let download_range (T t) ~url ~start ~end_ =
+
try
+
let headers =
+
let range_value = match end_ with
+
| Some e -> Printf.sprintf "bytes=%Ld-%Ld" start e
+
| None -> Printf.sprintf "bytes=%Ld-" start
+
in
+
Requests.Headers.of_list ["Range", range_value]
in
-
let available = ref [] in
-
-
(* Always available - uses requests library *)
-
available := ("http", http ()) :: !available;
-
-
(* Check for external tools *)
-
if check_command "wget" then
-
available := ("wget", wget ()) :: !available;
-
-
if check_command "curl" then
-
available := ("curl", curl ()) :: !available;
+
let response = Requests.get t.session ~headers url in
+
let status = Requests.Response.status response in
-
List.rev !available
+
if Requests.Status.to_int status = 206 || Requests.Status.is_success status then
+
let body = Requests.Response.body response in
+
let buf = Buffer.create 4096 in
+
Eio.Flow.copy body (Eio.Flow.buffer_sink buf);
+
Ok (Buffer.contents buf)
+
else
+
Error (Printf.sprintf "Range request failed: HTTP %d" (Requests.Status.to_int status))
+
with
+
| exn ->
+
Error (Printf.sprintf "Range download failed: %s" (Printexc.to_string exn))
-
let create_default ~env =
-
(* Default to http (requests library), fallback to wget/curl if needed *)
-
match detect_available ~env with
-
| [] -> failwith "No downloaders available"
-
| (_, d) :: _ -> d
+
let get_content_length (T t) ~url =
+
try
+
(* HEAD request to get content length *)
+
let response = Requests.head t.session url in
+
let headers = Requests.Response.headers response in
+
match Requests.Headers.get "content-length" headers with
+
| Some len -> Ok (Int64.of_string len)
+
| None -> Error "No Content-Length header"
+
with
+
| exn ->
+
Error (Printf.sprintf "Failed to get content length: %s" (Printexc.to_string exn))
-
let of_string name =
-
match name with
-
| "wget" -> Some (wget ())
-
| "curl" -> Some (curl ())
-
| "cohttp" | "http" -> Some (http ())
-
| _ -> None
-
end
+
(** {1 Utility Functions} *)
-
module Cli = struct
-
type downloader_choice = [ `Wget | `Curl | `Cohttp | `Http | `Auto ]
+
let supports_range (T t) ~url =
+
try
+
let response = Requests.head t.session url in
+
let headers = Requests.Response.headers response in
+
match Requests.Headers.get "accept-ranges" headers with
+
| Some "bytes" -> true
+
| _ -> false
+
with _ -> false
-
open Cmdliner
+
let name _ = "requests"
-
let downloader_term =
-
let choices = [
-
("wget", `Wget);
-
("curl", `Curl);
-
("cohttp", `Cohttp);
-
("http", `Http);
-
("auto", `Auto)
-
] in
-
let doc = "Download tool to use. 'auto' detects available tools, 'http' uses built-in HTTP client." in
-
let docv = "TOOL" in
-
Arg.(value & opt (enum choices) `Auto & info ["downloader"; "d"] ~doc ~docv)
+
(** {1 Pretty Printing} *)
-
let downloader_info = Arg.info ["downloader"; "d"]
+
let pp_config fmt config =
+
Format.fprintf fmt "Config{timeout=%.1f, max_redirects=%d, verify_tls=%b}"
+
config.timeout config.max_redirects config.verify_tls
-
let create_downloader ~env = function
-
| `Wget -> Downloaders.wget ()
-
| `Curl -> Downloaders.curl ()
-
| `Cohttp | `Http -> Downloaders.http ()
-
| `Auto -> Downloaders.create_default ~env
-
end
+
let pp fmt (T t) =
+
Format.fprintf fmt "Downloader{engine=%s, %a}"
+
(name (T t)) pp_config t.config
+75 -64
stack/toru/lib/toru/downloader.mli
···
-
(** Downloader module for fetching files from remote sources *)
+
(** HTTP downloader module interface *)
-
(** Progress reporter for download tracking *)
-
module Progress_reporter : sig
+
(** Progress reporting *)
+
module Progress : sig
type t
-
val create : ?total_bytes:int64 -> string -> t
val update : t -> int64 -> unit
val finish : t -> unit
end
-
(** Configuration for authentication *)
-
module Config : sig
-
type auth = {
-
username : string option;
-
password : string option;
-
}
-
end
+
(** Downloader configuration *)
+
type config = {
+
timeout : float;
+
max_redirects : int;
+
retry : Requests.Retry.config option;
+
auth : Requests.Auth.t option;
+
headers : Requests.Headers.t;
+
verify_tls : bool;
+
}
+
+
(** Downloader instance *)
+
type t
+
+
(** Create default configuration *)
+
val default_config : config
-
(** Abstract downloader interface *)
-
module type DOWNLOADER = sig
-
type t
-
-
val create : sw:Eio.Switch.t -> env:Eio_unix.Stdenv.base ->
-
?auth:Config.auth -> unit -> t
-
-
val download : t ->
-
url:string ->
-
dest:Eio.Fs.dir_ty Eio.Path.t ->
-
?hash:Hash.t ->
-
?progress:Progress_reporter.t ->
-
?resume:bool ->
-
unit -> (unit, string) result
-
-
val supports_resume : t -> bool
-
val name : t -> string
-
end
+
(** Create custom configuration *)
+
val create_config :
+
?timeout:float ->
+
?max_redirects:int ->
+
?retry:Requests.Retry.config option ->
+
?auth:Requests.Auth.t option ->
+
?headers:Requests.Headers.t ->
+
?verify_tls:bool ->
+
unit -> config
+
+
(** Create a new downloader *)
+
val create :
+
sw:Eio.Switch.t ->
+
env:Eio_unix.Stdenv.base ->
+
?config:config ->
+
unit -> t
+
+
(** Download to a file *)
+
val download :
+
t ->
+
url:string ->
+
dest:Eio.Fs.dir_ty Eio.Path.t ->
+
?hash:Hash.t ->
+
?progress:Progress.t ->
+
?resume:bool ->
+
unit -> (unit, string) result
+
+
(** Download to a stream *)
+
val download_to_stream :
+
t ->
+
url:string ->
+
sink:Eio.Flow.sink_ty Eio.Resource.t ->
+
?progress:Progress.t ->
+
?range:(int64 * int64 option) ->
+
unit -> (unit, string) result
-
(** Concrete downloader implementations *)
-
module Wget_downloader : sig
-
include DOWNLOADER
-
end
+
(** Download a range of bytes *)
+
val download_range :
+
t ->
+
url:string ->
+
start:int64 ->
+
end_:int64 option ->
+
(string, string) result
-
module Curl_downloader : sig
-
include DOWNLOADER
-
end
+
(** Get content length from server *)
+
val get_content_length :
+
t ->
+
url:string ->
+
(int64, string) result
-
module Cohttp_downloader : sig
-
include DOWNLOADER
-
end
+
(** Check if server supports range requests *)
+
val supports_range :
+
t ->
+
url:string ->
+
bool
-
(** Downloader selection utilities *)
-
module Downloaders : sig
-
val wget : unit -> (module DOWNLOADER)
-
val curl : unit -> (module DOWNLOADER)
-
val cohttp : unit -> (module DOWNLOADER)
-
val http : unit -> (module DOWNLOADER)
-
-
val detect_available : env:Eio_unix.Stdenv.base ->
-
(string * (module DOWNLOADER)) list
-
val create_default : env:Eio_unix.Stdenv.base ->
-
(module DOWNLOADER)
-
val of_string : string -> (module DOWNLOADER) option
-
end
+
(** Get downloader name *)
+
val name : t -> string
-
(** CLI integration *)
-
module Cli : sig
-
type downloader_choice = [ `Wget | `Curl | `Cohttp | `Http | `Auto ]
-
-
val downloader_term : downloader_choice Cmdliner.Term.t
-
val downloader_info : Cmdliner.Arg.info
-
-
val create_downloader :
-
env:Eio_unix.Stdenv.base ->
-
downloader_choice ->
-
(module DOWNLOADER)
-
end
+
(** Pretty printing *)
+
val pp : Format.formatter -> t -> unit
+
val pp_config : Format.formatter -> config -> unit
+10 -1
stack/toru/lib/toru/dune
···
(modules hash registry cache processors downloader make_registry toru logging)
(libraries
eio
+
eio_main
eio.unix
digestif
yojson
-
cmdliner
re
ptime
xdge
···
logs
logs.fmt
fmt))
+
+
(library
+
(public_name toru.cmd)
+
(name toru_cmd)
+
(modules cmd)
+
(libraries
+
toru
+
cmdliner
+
eio_main))
(documentation
(package toru))
+167 -19
stack/toru/lib/toru/processors.ml
···
-
type processor = Eio.Fs.dir_ty Eio.Path.t -> Eio.Fs.dir_ty Eio.Path.t
+
(** Processor module for post-download transformations
+
+
This module provides processors that can transform downloaded files,
+
such as decompression or unpacking archives. Designed to work with
+
the requests library's response processing pipeline.
+
*)
+
+
let src = Logs.Src.create "toru.processors" ~doc:"File processors"
+
module Log = (val Logs.src_log src : Logs.LOG)
+
+
(** {1 Processor Types} *)
+
+
type processor = Eio.Fs.dir_ty Eio.Path.t -> (Eio.Fs.dir_ty Eio.Path.t, string) result
+
+
(** {1 Helper Functions} *)
+
+
let run_command ~sw ~env cmd args =
+
try
+
let proc = Eio.Process.spawn ~sw env#process_mgr
+
~executable:cmd (cmd :: args) in
+
match Eio.Process.await proc with
+
| `Exited 0 -> Ok ()
+
| `Exited n -> Error (Printf.sprintf "%s exited with code %d" cmd n)
+
| `Signaled n -> Error (Printf.sprintf "%s killed by signal %d" cmd n)
+
with
+
| exn -> Error (Printf.sprintf "%s failed: %s" cmd (Printexc.to_string exn))
+
+
let ensure_output_dir ~env output_dir =
+
let fs = env#fs in
+
let output_path = Eio.Path.(fs / output_dir) in
+
Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 output_path
+
+
(** {1 Archive Processors} *)
+
+
let untar_gz output_dir : processor = fun path ->
+
Log.info (fun m -> m "Extracting tar.gz: %s -> %s"
+
(Eio.Path.native_exn path) output_dir);
+
+
Eio_main.run @@ fun env ->
+
Eio.Switch.run @@ fun sw ->
+
let path_str = Eio.Path.native_exn path in
+
+
(* Create output directory *)
+
ensure_output_dir ~env output_dir;
+
+
(* Extract using tar *)
+
match run_command ~sw ~env "tar" ["-xzf"; path_str; "-C"; output_dir] with
+
| Ok () -> Ok Eio.Path.(env#fs / output_dir)
+
| Error e -> Error e
-
let untar_gz _target_dir =
-
fun _path ->
-
(* TODO: Implement tar.gz extraction *)
-
failwith "Processors.untar_gz not yet implemented"
+
let untar_xz output_dir : processor = fun path ->
+
Log.info (fun m -> m "Extracting tar.xz: %s -> %s"
+
(Eio.Path.native_exn path) output_dir);
-
let unzip _target_dir =
-
fun _path ->
-
(* TODO: Implement zip extraction *)
-
failwith "Processors.unzip not yet implemented"
+
Eio_main.run @@ fun env ->
+
Eio.Switch.run @@ fun sw ->
+
let path_str = Eio.Path.native_exn path in
-
let untar_xz _target_dir =
-
fun _path ->
-
(* TODO: Implement tar.xz extraction *)
-
failwith "Processors.untar_xz not yet implemented"
+
(* Create output directory *)
+
ensure_output_dir ~env output_dir;
-
let custom _command _args =
-
fun _path ->
-
(* TODO: Implement custom command execution *)
-
failwith "Processors.custom not yet implemented"
+
(* Extract using tar *)
+
match run_command ~sw ~env "tar" ["-xJf"; path_str; "-C"; output_dir] with
+
| Ok () -> Ok Eio.Path.(env#fs / output_dir)
+
| Error e -> Error e
+
+
let unzip output_dir : processor = fun path ->
+
Log.info (fun m -> m "Extracting zip: %s -> %s"
+
(Eio.Path.native_exn path) output_dir);
+
+
Eio_main.run @@ fun env ->
+
Eio.Switch.run @@ fun sw ->
+
let path_str = Eio.Path.native_exn path in
-
let identity = fun path -> path
+
(* Create output directory *)
+
ensure_output_dir ~env output_dir;
-
let compose p1 p2 = fun path -> p2 (p1 path)
+
(* Extract using unzip *)
+
match run_command ~sw ~env "unzip" ["-q"; "-o"; path_str; "-d"; output_dir] with
+
| Ok () -> Ok Eio.Path.(env#fs / output_dir)
+
| Error e -> Error e
+
+
(** {1 Compression Processors} *)
+
+
let gunzip : processor = fun path ->
+
Log.info (fun m -> m "Decompressing gzip: %s" (Eio.Path.native_exn path));
+
+
Eio_main.run @@ fun env ->
+
Eio.Switch.run @@ fun sw ->
+
let path_str = Eio.Path.native_exn path in
+
+
(* Output file without .gz extension *)
+
let output_str =
+
if String.ends_with ~suffix:".gz" path_str then
+
String.sub path_str 0 (String.length path_str - 3)
+
else
+
path_str ^ ".uncompressed"
+
in
+
+
(* Decompress using gunzip *)
+
match run_command ~sw ~env "gunzip" ["-c"; path_str] with
+
| Ok () -> Ok Eio.Path.(env#fs / output_str)
+
| Error e -> Error e
+
+
let bunzip2 : processor = fun path ->
+
Log.info (fun m -> m "Decompressing bzip2: %s" (Eio.Path.native_exn path));
+
+
Eio_main.run @@ fun env ->
+
Eio.Switch.run @@ fun sw ->
+
let path_str = Eio.Path.native_exn path in
+
+
(* Output file without .bz2 extension *)
+
let output_str =
+
if String.ends_with ~suffix:".bz2" path_str then
+
String.sub path_str 0 (String.length path_str - 4)
+
else
+
path_str ^ ".uncompressed"
+
in
+
+
(* Decompress using bunzip2 *)
+
match run_command ~sw ~env "bunzip2" ["-c"; path_str] with
+
| Ok () -> Ok Eio.Path.(env#fs / output_str)
+
| Error e -> Error e
+
+
(** {1 Custom Processor} *)
+
+
let custom cmd args : processor = fun path ->
+
Log.info (fun m -> m "Running custom processor: %s" cmd);
+
+
Eio_main.run @@ fun env ->
+
Eio.Switch.run @@ fun sw ->
+
let path_str = Eio.Path.native_exn path in
+
+
(* Replace {file} placeholder in arguments *)
+
let args = List.map (fun arg ->
+
if arg = "{file}" then path_str else arg
+
) args in
+
+
match run_command ~sw ~env cmd args with
+
| Ok () -> Ok path
+
| Error e -> Error e
+
+
(** {1 Utility Processors} *)
+
+
let identity : processor = fun path -> Ok path
+
+
let compose p1 p2 : processor = fun path ->
+
match p1 path with
+
| Error e -> Error e
+
| Ok p -> p2 p
+
+
let chain processors : processor = fun path ->
+
List.fold_left (fun acc proc ->
+
match acc with
+
| Error e -> Error e
+
| Ok p -> proc p
+
) (Ok path) processors
+
+
let detect_processor filename =
+
(* Detect processor based on file extension *)
+
if String.ends_with ~suffix:".tar.gz" filename ||
+
String.ends_with ~suffix:".tgz" filename then
+
Some (untar_gz ".")
+
else if String.ends_with ~suffix:".tar.xz" filename then
+
Some (untar_xz ".")
+
else if String.ends_with ~suffix:".zip" filename then
+
Some (unzip ".")
+
else if String.ends_with ~suffix:".gz" filename then
+
Some gunzip
+
else if String.ends_with ~suffix:".bz2" filename then
+
Some bunzip2
+
else
+
None
+16 -2
stack/toru/lib/toru/processors.mli
···
(** File processors for post-download transformations *)
(** Processor function type *)
-
type processor = Eio.Fs.dir_ty Eio.Path.t -> Eio.Fs.dir_ty Eio.Path.t
+
type processor = Eio.Fs.dir_ty Eio.Path.t -> (Eio.Fs.dir_ty Eio.Path.t, string) result
(** {1 Archive decompression processors} *)
···
val identity : processor
(** Compose two processors *)
-
val compose : processor -> processor -> processor
+
val compose : processor -> processor -> processor
+
+
(** {1 Compression processors} *)
+
+
(** Decompress gzip file *)
+
val gunzip : processor
+
+
(** Decompress bzip2 file *)
+
val bunzip2 : processor
+
+
(** Chain multiple processors *)
+
val chain : processor list -> processor
+
+
(** Detect processor based on file extension *)
+
val detect_processor : string -> processor option
+287 -107
stack/toru/lib/toru/toru.ml
···
+
(** Toru - Data repository management library
+
+
An OCaml library for managing data file downloads and caching,
+
compatible with Python Pooch registry files. It provides automatic
+
downloading, caching, and hash verification of data files from
+
remote repositories.
+
*)
+
+
let src = Logs.Src.create "toru" ~doc:"Toru data management"
+
module Log = (val Logs.src_log src : Logs.LOG)
+
+
(** {1 Types} *)
+
type t = {
base_url : string;
cache : Cache.t;
registry : Registry.t;
-
downloader : (module Downloader.DOWNLOADER);
+
downloader : Downloader.t;
sw : Eio.Switch.t;
env : Eio_unix.Stdenv.base;
}
-
let create ~sw ~env ~base_url ~cache_path ?version ?registry_file ?registry_url ?downloader () =
+
(** {1 Creation} *)
+
+
let create ~sw ~env ~base_url ~cache_path ?version ?registry_file ?registry_url ?downloader_config () =
+
Log.info (fun m -> m "Creating Toru instance with base_url=%s, cache=%s"
+
base_url cache_path);
+
+
(* Create cache *)
let cache = Cache.create ~sw ~fs:env#fs ?version cache_path in
-
let registry = match registry_file with
-
| Some file -> Registry.load (Eio.Path.(env#fs / file))
-
| None ->
-
(match registry_url with
-
| Some url -> Registry.load_from_url url
-
| None -> Registry.empty)
+
+
(* Load registry *)
+
let registry = match registry_file, registry_url with
+
| Some file, _ ->
+
Log.info (fun m -> m "Loading registry from file: %s" file);
+
Registry.load Eio.Path.(env#fs / file)
+
| None, Some url ->
+
Log.info (fun m -> m "Loading registry from URL: %s" url);
+
Registry.load_from_url url
+
| None, None ->
+
Log.info (fun m -> m "Starting with empty registry");
+
Registry.empty
in
-
let downloader = match downloader with
-
| Some d -> d
-
| None ->
-
Downloader.Downloaders.create_default ~env
-
in
+
+
(* Create downloader *)
+
let downloader = Downloader.create ~sw ~env ?config:downloader_config () in
+
{ base_url; cache; registry; downloader; sw; env }
+
(** {1 Accessors} *)
+
let base_url t = t.base_url
let cache t = t.cache
let registry t = t.registry
+
let downloader t = t.downloader
+
+
(** {1 Fetch Operations} *)
let rec fetch t ~filename ?processor () =
-
(* 1. Check if file exists in registry *)
+
Log.info (fun m -> m "Fetching file: %s" filename);
+
+
(* Check registry *)
match Registry.find filename t.registry with
-
| None -> Error ("File not found in registry: " ^ filename)
+
| None ->
+
Log.warn (fun m -> m "File not found in registry: %s" filename);
+
Error ("File not found in registry: " ^ filename)
| Some entry ->
-
(* 2. Check if file is already cached *)
-
let cache_path = Cache.file_path t.cache filename in
-
let cached_file_exists = Cache.exists t.cache filename in
-
-
(* 3. If cached, verify hash *)
-
if cached_file_exists then (
+
(* Check cache *)
+
if Cache.exists t.cache filename then
+
(* Verify cached file *)
+
let cache_path = Cache.file_path t.cache filename in
let entry_hash = Registry.hash entry in
-
if Hash.verify cache_path entry_hash then (
-
(* File is cached and valid - apply processor if provided *)
+
if Hash.verify cache_path entry_hash then begin
+
Log.info (fun m -> m "Using cached file: %s" filename);
+
(* Apply processor if provided *)
match processor with
| None -> Ok cache_path
-
| Some proc -> Ok (proc cache_path)
-
) else (
-
(* Cached file is corrupt - remove it and re-download *)
-
(try Eio.Path.unlink cache_path with _ -> ());
-
(* Fall through to download *)
-
fetch_file_to_cache t entry filename processor
-
)
-
) else (
-
(* File not cached - download it *)
-
fetch_file_to_cache t entry filename processor
-
)
+
| Some proc ->
+
match proc cache_path with
+
| Ok path -> Ok path
+
| Error e -> Error ("Processor failed: " ^ e)
+
end else begin
+
(* Corrupted cache - delete and re-download *)
+
Log.warn (fun m -> m "Cached file failed verification, re-downloading: %s" filename);
+
Cache.delete t.cache filename;
+
fetch_and_cache t entry filename processor
+
end
+
else
+
(* Not cached - download *)
+
fetch_and_cache t entry filename processor
-
and fetch_file_to_cache t entry filename processor =
-
(* Ensure cache directory exists *)
-
Cache.ensure_dir t.cache;
-
-
(* Get the download URL *)
-
let download_url = match Registry.custom_url entry with
+
and fetch_and_cache t entry filename processor =
+
(* Get download URL *)
+
let url = match Registry.custom_url entry with
| Some custom -> custom
| None -> t.base_url ^ filename
in
-
-
(* Download the file *)
-
let (module D : Downloader.DOWNLOADER) = t.downloader in
-
let downloader_instance = D.create ~sw:t.sw ~env:t.env () in
-
let cache_path = Cache.file_path t.cache filename in
-
let entry_hash = Registry.hash entry in
-
-
match D.download downloader_instance ~url:download_url ~dest:cache_path ~hash:entry_hash () with
+
+
Log.info (fun m -> m "Downloading %s from %s" filename url);
+
+
(* Create progress reporter *)
+
let progress = Downloader.Progress.create filename in
+
+
(* Check if server supports range requests for chunked download *)
+
if Downloader.supports_range t.downloader ~url then
+
fetch_chunked t entry filename url processor
+
else
+
fetch_simple t entry filename url progress processor
+
+
and fetch_simple t entry filename url progress processor =
+
(* Download to temporary location first *)
+
let temp_path = Eio.Path.(t.env#fs / Filename.temp_dir ~temp_dir:"/tmp" "toru" "") in
+
let temp_file = Eio.Path.(temp_path / filename) in
+
+
match Downloader.download t.downloader ~url ~dest:temp_file
+
~hash:(Registry.hash entry) ~progress () with
| Error msg -> Error ("Download failed: " ^ msg)
| Ok () ->
+
(* Move to cache *)
+
Eio.Switch.run @@ fun sw ->
+
let source = Eio.Path.open_in ~sw temp_file in
+
Cache.put_stream t.cache ~source:(source :> Eio.Flow.source_ty Eio.Resource.t) filename;
+
Eio.Flow.close source;
+
+
(* Clean up temp file *)
+
(try Eio.Path.unlink temp_file with _ -> ());
+
+
let cache_path = Cache.file_path t.cache filename in
+
(* Apply processor if provided *)
match processor with
| None -> Ok cache_path
-
| Some proc -> Ok (proc cache_path)
+
| Some proc ->
+
match proc cache_path with
+
| Ok path -> Ok path
+
| Error e -> Error ("Processor failed: " ^ e)
+
+
and fetch_chunked t entry filename url processor =
+
Log.info (fun m -> m "Using chunked download for %s" filename);
+
+
(* Get total size *)
+
match Downloader.get_content_length t.downloader ~url with
+
| Error e ->
+
Log.warn (fun m -> m "Cannot get content length, falling back to simple download: %s" e);
+
fetch_simple t entry filename url (Downloader.Progress.create filename) processor
+
| Ok total_size ->
+
(* Check existing chunks *)
+
let missing_ranges = Cache.missing_ranges t.cache ~filename ~total_size in
-
let fetch_all t ?concurrency:_ () =
-
let all_entries = Registry.entries t.registry in
-
let total = List.length all_entries in
-
-
if total = 0 then
+
if missing_ranges = [] then begin
+
(* All chunks present - coalesce *)
+
Log.info (fun m -> m "All chunks present, coalescing");
+
let promise = Cache.coalesce_chunks t.cache ~filename in
+
match Eio.Promise.await promise with
+
| Ok () ->
+
let cache_path = Cache.file_path t.cache filename in
+
(* Verify hash *)
+
let entry_hash = Registry.hash entry in
+
if Hash.verify cache_path entry_hash then
+
(* Apply processor if provided *)
+
match processor with
+
| None -> Ok cache_path
+
| Some proc ->
+
match proc cache_path with
+
| Ok path -> Ok path
+
| Error e -> Error ("Processor failed: " ^ e)
+
else
+
Error "Hash verification failed after coalescing chunks"
+
| Error e ->
+
Error (Printf.sprintf "Failed to coalesce chunks: %s" (Printexc.to_string e))
+
end else begin
+
(* Download missing chunks *)
+
let rec download_chunks = function
+
| [] -> Ok ()
+
| (start, end_) :: rest ->
+
Log.info (fun m -> m "Downloading chunk %Ld-%Ld" start end_);
+
match Downloader.download_range t.downloader ~url ~start ~end_:(Some end_) with
+
| Error e -> Error e
+
| Ok data ->
+
(* Save chunk *)
+
Eio.Switch.run @@ fun _sw ->
+
let source = Eio.Flow.string_source data in
+
let range = Cacheio.Range.create ~start ~end_ in
+
Cache.put_chunk t.cache ~filename ~range ~source;
+
download_chunks rest
+
in
+
+
(* Download all missing ranges *)
+
match download_chunks missing_ranges with
+
| Error e -> Error ("Chunk download failed: " ^ e)
+
| Ok () ->
+
(* Coalesce chunks *)
+
Log.info (fun m -> m "All chunks downloaded, coalescing");
+
let promise = Cache.coalesce_chunks t.cache ~filename in
+
match Eio.Promise.await promise with
+
| Ok () ->
+
let cache_path = Cache.file_path t.cache filename in
+
(* Verify hash *)
+
let entry_hash = Registry.hash entry in
+
if Hash.verify cache_path entry_hash then
+
(* Apply processor if provided *)
+
match processor with
+
| None -> Ok cache_path
+
| Some proc ->
+
match proc cache_path with
+
| Ok path -> Ok path
+
| Error e -> Error ("Processor failed: " ^ e)
+
else
+
Error "Hash verification failed after downloading chunks"
+
| Error e ->
+
Error (Printf.sprintf "Failed to coalesce chunks: %s" (Printexc.to_string e))
+
end
+
+
(** {1 Batch Operations} *)
+
+
let fetch_all t ?concurrency () =
+
let entries = Registry.entries t.registry in
+
let total = List.length entries in
+
+
if total = 0 then begin
+
Log.info (fun m -> m "No files to fetch");
Ok ()
-
else (
-
(* Create a semaphore to limit concurrency *)
-
let results = ref [] in
+
end else begin
+
Log.info (fun m -> m "Fetching %d files" total);
+
+
(* Determine concurrency *)
+
let concurrency = Option.value concurrency ~default:4 in
+
+
(* Use Eio fibers for concurrent downloads *)
let errors = ref [] in
let completed = ref 0 in
-
-
(* Process entries in batches *)
-
let rec process_batch entries =
+
let mutex = Eio.Mutex.create () in
+
+
(* Create a pool of workers *)
+
let rec process_entries entries =
match entries with
-
| [] ->
-
if !completed = total then
-
if List.length !errors > 0 then
-
Error ("Multiple failures: " ^ String.concat "; " !errors)
+
| [] -> ()
+
| batch ->
+
(* Take up to concurrency items *)
+
let current_batch, rest =
+
if List.length batch <= concurrency then
+
batch, []
else
-
Ok ()
-
else
-
Error "Internal error: not all files processed"
-
| entry :: rest ->
-
let filename = Registry.filename entry in
-
(match fetch t ~filename () with
-
| Ok path ->
-
results := path :: !results;
-
incr completed
-
| Error msg ->
-
errors := (filename ^ ": " ^ msg) :: !errors;
-
incr completed);
-
process_batch rest
+
let rec take n acc = function
+
| [] -> List.rev acc, []
+
| _ when n = 0 -> List.rev acc, batch
+
| h :: t -> take (n - 1) (h :: acc) t
+
in
+
take concurrency [] batch
+
in
+
+
(* Process batch concurrently *)
+
Eio.Fiber.all (List.map (fun entry ->
+
fun () ->
+
let filename = Registry.filename entry in
+
match fetch t ~filename () with
+
| Ok _ ->
+
Eio.Mutex.use_rw ~protect:false mutex (fun () ->
+
incr completed;
+
Log.info (fun m -> m "Progress: %d/%d files" !completed total)
+
)
+
| Error msg ->
+
Eio.Mutex.use_rw ~protect:false mutex (fun () ->
+
errors := (filename ^ ": " ^ msg) :: !errors;
+
incr completed
+
)
+
) current_batch);
+
+
(* Process next batch *)
+
process_entries rest
in
-
-
(* For now, implement simple sequential processing *)
-
(* TODO: Add actual concurrent processing with Eio fibers *)
-
process_batch all_entries
-
)
+
+
process_entries entries;
+
+
if !errors = [] then
+
Ok ()
+
else
+
Error (Printf.sprintf "%d failures: %s"
+
(List.length !errors)
+
(String.concat "; " !errors))
+
end
+
+
(** {1 Registry Management} *)
let load_registry t source =
-
let new_registry =
+
Log.info (fun m -> m "Loading registry from: %s" source);
+
let new_registry =
if String.contains source '/' || String.contains source ':' then
Registry.load_from_url source
else
-
(* TODO: Need a way to resolve relative paths *)
-
failwith "load_registry: relative paths not yet supported"
+
Registry.load Eio.Path.(t.env#fs / source)
in
{ t with registry = new_registry }
···
{ t with registry = new_registry }
let update_base_url t new_url =
+
Log.info (fun m -> m "Updating base URL to: %s" new_url);
{ t with base_url = new_url }
-
let retrieve ~sw ~env ~url ?hash ?cache_path ?downloader () =
-
(* Get cache path *)
+
(** {1 One-off Downloads} *)
+
+
let retrieve ~sw ~env ~url ?hash ?cache_path () =
+
Log.info (fun m -> m "Retrieving file from: %s" url);
+
+
(* Create temporary cache if needed *)
let cache_dir = match cache_path with
| Some path -> path
| None -> Cache.default_cache_path ~app_name:"toru-temp" ()
in
-
-
(* Create a temporary cache *)
+
let cache = Cache.create ~sw ~fs:env#fs cache_dir in
Cache.ensure_dir cache;
-
+
(* Extract filename from URL *)
-
let filename =
+
let filename =
match String.rindex_opt url '/' with
| Some idx -> String.sub url (idx + 1) (String.length url - idx - 1)
-
| None -> "downloaded_file"
+
| None -> "download"
in
-
-
(* Get downloader *)
-
let downloader_module = match downloader with
-
| Some d -> d
-
| None -> Downloader.Downloaders.create_default ~env
-
in
-
+
(* Download the file *)
-
let (module D : Downloader.DOWNLOADER) = downloader_module in
-
let downloader_instance = D.create ~sw ~env () in
let dest_path = Cache.file_path cache filename in
-
-
match D.download downloader_instance ~url ~dest:dest_path ?hash () with
+
let downloader = Downloader.create ~sw ~env () in
+
let progress = Downloader.Progress.create filename in
+
+
match Downloader.download downloader ~url ~dest:dest_path ?hash ~progress () with
| Error msg -> Error ("Download failed: " ^ msg)
| Ok () -> Ok dest_path
+
(** {1 Cache Management} *)
+
+
let clear_cache t =
+
Log.info (fun m -> m "Clearing cache");
+
Cache.clear t.cache
+
+
let cache_stats t =
+
Cache.stats t.cache
+
+
let expire_cache t =
+
Log.info (fun m -> m "Expiring old cache entries");
+
Cache.expire t.cache
+
+
(** {1 Utility Functions} *)
+
let default_cache_path = Cache.default_cache_path
+
(** {1 Pretty Printing} *)
+
let pp fmt t =
-
Format.fprintf fmt "Toru instance:\n";
-
Format.fprintf fmt " Base URL: %s\n" t.base_url;
-
Format.fprintf fmt " Registry: %d entries\n" (Registry.size t.registry);
-
Format.fprintf fmt " Cache: %a\n" Cache.pp t.cache
+
Format.fprintf fmt "Toru{base_url=%s, registry=%d entries, %a}"
+
t.base_url
+
(Registry.size t.registry)
+
Cache.pp t.cache
+
+
(** {1 Module Exports} *)
module Hash = Hash
module Registry = Registry
+17 -4
stack/toru/lib/toru/toru.mli
···
(** {1 Construction} *)
(** Create a Toru instance *)
-
val create :
+
val create :
sw:Eio.Switch.t ->
env:Eio_unix.Stdenv.base ->
base_url:string ->
···
?version:string ->
?registry_file:string ->
?registry_url:string ->
-
?downloader:(module Downloader.DOWNLOADER) ->
+
?downloader_config:Downloader.config ->
unit -> t
(** {1 Field accessors} *)
···
(** Get registry instance *)
val registry : t -> Registry.t
+
(** Get downloader instance *)
+
val downloader : t -> Downloader.t
+
(** {1 File operations} *)
(** Fetch a single file from registry *)
···
(** {1 Static functions} *)
(** Retrieve a single file without registry *)
-
val retrieve :
+
val retrieve :
sw:Eio.Switch.t ->
env:Eio_unix.Stdenv.base ->
url:string ->
?hash:Hash.t ->
?cache_path:string ->
-
?downloader:(module Downloader.DOWNLOADER) ->
unit -> (Eio.Fs.dir_ty Eio.Path.t, string) result
(** Get default cache path for application *)
val default_cache_path : ?app_name:string -> unit -> string
+
+
(** {1 Cache Management} *)
+
+
(** Clear all cached files *)
+
val clear_cache : t -> unit
+
+
(** Get cache statistics *)
+
val cache_stats : t -> Cacheio.Stats.t
+
+
(** Expire old cache entries *)
+
val expire_cache : t -> int
(** {1 Pretty printing} *)
-193
stack/toru/test/CACHE_IMPLEMENTATION_REPORT.md
···
-
# Cache Module Implementation Report
-
-
## Overview
-
-
The Cache module has been fully implemented according to the CLAUDE.md specification with extensive additional management APIs. The implementation provides robust, cross-platform cache management with XDG Base Directory compliance and comprehensive file operations.
-
-
## Core Implementation Features
-
-
### ✅ Basic Interface (From CLAUDE.md)
-
-
**Construction Functions:**
-
- `create ~sw ~env ?version path` - Create cache with explicit path
-
- `default ~sw ~env ?app_name ()` - Create cache using OS-specific default location
-
-
**Field Accessors:**
-
- `base_path t` - Get base cache directory path
-
- `version t` - Get version string (if any)
-
-
**Basic Operations:**
-
- `file_path t filename` - Get full path for file in cache
-
- `exists t filename` - Check if file exists in cache
-
- `ensure_dir t` - Create cache directories lazily
-
- `clear t` - Remove all files from cache
-
- `size_bytes t` - Calculate total cache size
-
- `list_files t` - List all files in cache recursively
-
-
### ✅ Extended Management APIs
-
-
**Advanced Cache Management:**
-
- `file_info t filename` - Get file size and modification time
-
- `usage_stats t` - Get comprehensive cache statistics
-
- `trim_to_size t max_size` - Remove oldest files to fit size limit
-
- `trim_by_age t max_age_days` - Remove files older than N days
-
- `vacuum t` - Clean up empty directories and broken symlinks
-
-
**Types:**
-
```ocaml
-
type file_info = { size: int64; mtime: float }
-
type usage_stats = {
-
total_size: int64;
-
file_count: int;
-
oldest: float;
-
newest: float
-
}
-
```
-
-
## Platform Support & XDG Compliance
-
-
### ✅ Cross-Platform Default Paths
-
-
The implementation correctly detects the platform and follows OS-specific conventions:
-
-
**macOS (Darwin):**
-
- Path: `~/Library/Caches/<app_name>`
-
- Detection: Uses `uname -s` to identify Darwin kernel
-
- Verified: ✓ Working on macOS
-
-
**Linux/Unix:**
-
- XDG compliant: `$XDG_CACHE_HOME/<app_name>` if set
-
- Fallback: `~/.cache/<app_name>`
-
- Verified: ✓ XDG environment variable override support
-
-
**Windows:**
-
- Path: `%LOCALAPPDATA%\<app_name>\Cache`
-
- Fallback: `%USERPROFILE%\AppData\Local\<app_name>\Cache`
-
-
### ✅ Versioned Cache Support
-
-
Supports optional version subdirectories:
-
- Without version: `cache_dir/file.txt`
-
- With version: `cache_dir/v1.0/file.txt`
-
- Automatic directory creation for both modes
-
-
## Implementation Details
-
-
### Directory Creation Strategy
-
-
Uses lazy directory creation with proper error handling:
-
- Creates parent directories recursively when needed
-
- Handles concurrent directory creation safely
-
- Uses appropriate permissions (0o755 for directories, 0o644 for files)
-
-
### File Management
-
-
**Efficient Operations:**
-
- Recursive directory traversal for listing and size calculation
-
- Safe file operations with proper error handling
-
- Atomic operations where possible
-
-
**Trim Operations:**
-
- `trim_to_size`: Removes oldest files first (LRU-like behavior)
-
- `trim_by_age`: Removes files older than specified days
-
- Both operations preserve cache structure and handle errors gracefully
-
-
### Memory and Performance
-
-
**Optimizations:**
-
- Minimal memory allocations in recursive operations
-
- Efficient file statistics collection
-
- Lazy evaluation where appropriate
-
-
## Test Coverage
-
-
### ✅ Comprehensive Test Suite
-
-
**test_cache.ml - Basic Functionality:**
-
- Cache creation and directory management
-
- File operations (create, check existence, list)
-
- Size calculation and statistics
-
- Clear operation
-
- Trim operations (size and age-based)
-
- Vacuum functionality
-
-
**test_cache_xdg.ml - Platform & XDG Compliance:**
-
- XDG Base Directory specification compliance
-
- Platform-specific path detection (macOS vs Linux)
-
- Environment variable override support
-
- Versioned cache directory handling
-
- Advanced cache management features
-
-
### ✅ Test Results
-
-
All tests pass successfully:
-
- ✅ XDG compliance verified on macOS
-
- ✅ Versioned cache directories work correctly
-
- ✅ File operations robust and reliable
-
- ✅ Cache management functions operate correctly
-
- ✅ Cross-platform path detection works
-
-
## API Usage Examples
-
-
### Basic Usage
-
```ocaml
-
let cache = Cache.create ~sw ~env ~version:"v1.0" "my-data-cache" in
-
Cache.ensure_dir cache;
-
let file_path = Cache.file_path cache "dataset.csv" in
-
let exists = Cache.exists cache "dataset.csv" in
-
```
-
-
### Management Operations
-
```ocaml
-
(* Get cache statistics *)
-
let stats = Cache.usage_stats cache in
-
printf "Total: %Ld bytes, %d files\n" stats.total_size stats.file_count;
-
-
(* Trim cache to 100MB *)
-
Cache.trim_to_size cache (Int64.mul 100L 1024L 1024L);
-
-
(* Remove files older than 30 days *)
-
Cache.trim_by_age cache 30.0;
-
-
(* Clean up empty directories *)
-
Cache.vacuum cache;
-
```
-
-
### Platform-Aware Defaults
-
```ocaml
-
(* Uses OS-appropriate default location *)
-
let cache = Cache.default ~sw ~env ~app_name:"my-app" () in
-
-
(* Get platform-specific default path *)
-
let default_path = Cache.default_cache_path ~app_name:"my-app" () in
-
```
-
-
## Integration with Toru Ecosystem
-
-
The Cache module integrates seamlessly with other Toru components:
-
- **Registry Module**: Files from registry stored in cache
-
- **Downloader Module**: Downloads saved to cache locations
-
- **Hash Module**: Cached files verified against registry hashes
-
- **Main Toru API**: Cache provides storage backend
-
-
## Future Extensions
-
-
The current implementation provides a solid foundation for:
-
- Cache quota management
-
- LRU/LFU eviction policies
-
- Cache statistics and monitoring
-
- Compression and deduplication
-
- Network-aware cache policies
-
-
## Conclusion
-
-
The Cache module implementation fully satisfies the CLAUDE.md specification and extends it with comprehensive management capabilities. It provides robust, cross-platform cache management that follows OS conventions and integrates well with the broader Toru library ecosystem.
-
-
**Key Achievements:**
-
- ✅ Complete CLAUDE.md specification implementation
-
- ✅ Extended management APIs beyond specification
-
- ✅ Full XDG Base Directory compliance
-
- ✅ Cross-platform support (macOS, Linux, Windows)
-
- ✅ Comprehensive test coverage
-
- ✅ Production-ready error handling and edge cases
-
- ✅ Efficient file operations and memory usage
-143
stack/toru/test/basic.t
···
-
Basic Toru CLI Tests
-
===================
-
-
Setup test environment and use actual test registry:
-
$ cd $TESTDIR/..
-
$ export TORU_CACHE_DIR=/tmp/toru-test-cache
-
$ mkdir -p /tmp/toru-test-cache
-
-
Test 1: Basic inspect functionality
-
===================================
-
-
Test basic inspect without options:
-
$ dune exec bin/toru_main.exe -- inspect test/python/test_registry_sha256.txt
-
Registry contains 8 entries
-
-
Test inspect with verbose logging:
-
$ dune exec bin/toru_main.exe -- inspect test/python/test_registry_sha256.txt --verbose
-
toru_main.exe: [INFO] Loading registry from: test/python/test_registry_sha256.txt
-
Registry contains 8 entries
-
-
Test inspect with debug logging:
-
$ dune exec bin/toru_main.exe -- inspect test/python/test_registry_sha256.txt --verbose --verbose 2>&1 | head -6
-
toru_main.exe: [DEBUG] Starting inspect function
-
toru_main.exe: [INFO] Loading registry from: test/python/test_registry_sha256.txt
-
toru_main.exe: [DEBUG] Determining if source is URL or file path
-
toru_main.exe: [DEBUG] Detected file path, loading directly without Eio
-
toru_main.exe: [DEBUG] Registry loaded successfully
-
Registry contains 8 entries
-
-
Test inspect with statistics:
-
$ dune exec bin/toru_main.exe -- inspect --stats test/python/test_registry_sha256.txt | head -10
-
Registry contains 8 entries
-
-
Registry Statistics:
-
===================
-
Hash algorithms:
-
sha256: 8 entries
-
-
File extensions:
-
.txt: 5 files
-
.json: 1 files
-
-
Test inspect with file listing:
-
$ dune exec bin/toru_main.exe -- inspect --list test/python/test_registry_sha256.txt | head -10
-
Registry contains 8 entries
-
-
Files in registry:
-
==================
-
9ead1fad59f50f905f6a76154e2d0bc1a31c46f223e2ec81e278aa2ee6a10e25 .hidden/secret.txt
-
dcfcfeb20e6334be05d4ed2e39da77ffb84b80bf835dc20ac9963f5e95820b94 config.json
-
d613c3cca7a0aafd47e7cd81c7ee0268504b13e8c24b2668dcde8a86386c5cef data/numbers.csv
-
dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f data/simple.txt
-
dcccba292708e4d9b8f1e2af482b09b01131ea6e22c3b10173e0d75c8ac0c310 data/unicode.txt
-
-
Test inspect with search:
-
$ dune exec bin/toru_main.exe -- inspect --search .txt test/python/test_registry_sha256.txt | head -10
-
Registry contains 8 entries
-
-
Files matching '.txt':
-
=======================
-
9ead1fad59f50f905f6a76154e2d0bc1a31c46f223e2ec81e278aa2ee6a10e25 .hidden/secret.txt
-
dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f data/simple.txt
-
dcccba292708e4d9b8f1e2af482b09b01131ea6e22c3b10173e0d75c8ac0c310 data/unicode.txt
-
23c8cf9515ed231a55e63e1d89399a1eac4a529c2b4ac5af29a61af03e3afdd4 docs/readme.md
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 empty.txt
-
-
Test 2: Validate functionality
-
==============================
-
-
Test basic validate:
-
$ dune exec bin/toru_main.exe -- validate test/python/test_registry_sha256.txt
-
✓ Registry loaded successfully with 8 entries
-
-
Test validate with hash checking:
-
$ dune exec bin/toru_main.exe -- validate --check-hashes test/python/test_registry_sha256.txt
-
✓ Registry loaded successfully with 8 entries
-
Validating hash formats...
-
✓ All 8 hash formats are valid
-
-
Test 3: Convert functionality
-
============================
-
-
Test convert to new file:
-
$ dune exec bin/toru_main.exe -- convert test/python/test_registry_sha256.txt /tmp/converted.txt
-
✓ Converted 8 entries from test/python/test_registry_sha256.txt to /tmp/converted.txt
-
-
Verify converted file has correct content:
-
$ head -5 /tmp/converted.txt
-
# Pooch registry generated on *-*-*T*:*:*.* (glob)
-
# Algorithm: sha256
-
.hidden/secret.txt 9ead1fad59f50f905f6a76154e2d0bc1a31c46f223e2ec81e278aa2ee6a10e25
-
config.json dcfcfeb20e6334be05d4ed2e39da77ffb84b80bf835dc20ac9963f5e95820b94
-
data/numbers.csv d613c3cca7a0aafd47e7cd81c7ee0268504b13e8c24b2668dcde8a86386c5cef
-
-
Test 4: XDG cache directory handling
-
===================================
-
-
Test with XDG_CACHE_HOME:
-
$ XDG_CACHE_HOME=/tmp/xdg-test dune exec bin/toru_main.exe -- inspect test/python/test_registry_sha256.txt --verbose
-
toru_main.exe: [INFO] Loading registry from: test/python/test_registry_sha256.txt
-
Registry contains 8 entries
-
-
Test with application-specific cache dir:
-
$ TORU_CACHE_DIR=/tmp/app-test dune exec bin/toru_main.exe -- inspect test/python/test_registry_sha256.txt --verbose
-
toru_main.exe: [INFO] Loading registry from: test/python/test_registry_sha256.txt
-
Registry contains 8 entries
-
-
Test 5: Help system
-
===================
-
-
Test main help shows XDG info:
-
$ dune exec bin/toru_main.exe -- --help | grep -A 3 "ENVIRONMENT VARIABLES"
-
ENVIRONMENT VARIABLES
-
Toru respects the XDG Base Directory Specification:
-
-
XDG_CACHE_HOME - Override default cache directory location
-
-
Test 6: Error handling
-
======================
-
-
Test non-existent file:
-
$ dune exec bin/toru_main.exe -- inspect nonexistent.txt 2>&1
-
[1]
-
-
Test malformed file doesn't crash:
-
$ echo "not a registry" > /tmp/bad.txt
-
$ dune exec bin/toru_main.exe -- inspect /tmp/bad.txt
-
Registry contains 0 entries
-
-
Test 7: App name and version parameters
-
======================================
-
-
Test with custom app name:
-
$ dune exec bin/toru_main.exe -- --app-name testapp inspect test/python/test_registry_sha256.txt
-
Registry contains 8 entries
-
-
Test with data version:
-
$ dune exec bin/toru_main.exe -- --data-version v1.0 inspect test/python/test_registry_sha256.txt
-
Registry contains 8 entries
-
-
Cleanup:
-
$ rm -f /tmp/converted.txt /tmp/bad.txt
-
$ rm -rf /tmp/toru-test-cache /tmp/xdg-test /tmp/app-test
-354
stack/toru/test/cli.t
···
-
Toru CLI Comprehensive Test Suite
-
==================================
-
-
Setup test environment:
-
$ export HOME=/tmp/toru_test_home
-
$ mkdir -p $HOME
-
$ unset XDG_CACHE_HOME TORU_CACHE_DIR
-
$ cd /tmp/toru_test_home
-
-
Create test registry file:
-
$ cat > test_registry.txt << 'EOF'
-
> # Test registry for toru CLI
-
> simple.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
> data.csv d613c3cca7a0aafd47e7cd81c7ee0268504b13e8c24b2668dcde8a86386c5cef
-
> binary.bin 785b0751fc2c53dc14a4ce3d800e69ef9ce1009eb327ccf458afe09c242c26c9
-
> EOF
-
-
Test 1: Basic inspect command functionality
-
===========================================
-
-
Test basic inspect without options:
-
$ toru inspect test_registry.txt
-
Registry contains 3 entries
-
-
Test inspect with stats flag:
-
$ toru inspect --stats test_registry.txt
-
Registry contains 3 entries
-
-
Registry Statistics:
-
===================
-
Hash algorithms:
-
sha256: 3 entries
-
-
File extensions:
-
.txt: 1 files
-
.csv: 1 files
-
.bin: 1 files
-
-
Test inspect with list flag:
-
$ toru inspect --list test_registry.txt
-
Registry contains 3 entries
-
-
Files in registry:
-
==================
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 simple.txt
-
d613c3cca7a0aafd47e7cd81c7ee0268504b13e8c24b2668dcde8a86386c5cef data.csv
-
785b0751fc2c53dc14a4ce3d800e69ef9ce1009eb327ccf458afe09c242c26c9 binary.bin
-
-
Test inspect with search pattern:
-
$ toru inspect --search txt test_registry.txt
-
Registry contains 3 entries
-
-
Files matching 'txt':
-
=======================
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 simple.txt
-
-
Test inspect with all flags combined:
-
$ toru inspect --stats --list --search csv test_registry.txt
-
Registry contains 3 entries
-
-
Registry Statistics:
-
===================
-
Hash algorithms:
-
sha256: 3 entries
-
-
File extensions:
-
.txt: 1 files
-
.csv: 1 files
-
.bin: 1 files
-
-
Files in registry:
-
==================
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 simple.txt
-
d613c3cca7a0aafd47e7cd81c7ee0268504b13e8c24b2668dcde8a86386c5cef data.csv
-
785b0751fc2c53dc14a4ce3d800e69ef9ce1009eb327ccf458afe09c242c26c9 binary.bin
-
-
Files matching 'csv':
-
=======================
-
d613c3cca7a0aafd47e7cd81c7ee0268504b13e8c24b2668dcde8a86386c5cef data.csv
-
-
Test 2: Validate command functionality
-
======================================
-
-
Test basic validate:
-
$ toru validate test_registry.txt
-
✓ Registry loaded successfully with 3 entries
-
-
Test validate with hash checking:
-
$ toru validate --check-hashes test_registry.txt
-
✓ Registry loaded successfully with 3 entries
-
Validating hash formats...
-
✓ All 3 hash formats are valid
-
-
Test 3: Convert command functionality
-
====================================
-
-
Test convert to same format:
-
$ toru convert test_registry.txt converted_registry.txt
-
✓ Converted 3 entries from test_registry.txt to converted_registry.txt
-
-
$ cat converted_registry.txt
-
# Pooch registry generated on *-*-*T*:*:*.* (glob)
-
# Algorithm: sha256
-
simple.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
data.csv d613c3cca7a0aafd47e7cd81c7ee0268504b13e8c24b2668dcde8a86386c5cef
-
binary.bin 785b0751fc2c53dc14a4ce3d800e69ef9ce1009eb327ccf458afe09c242c26c9
-
-
Test 4: Verbose logging levels
-
==============================
-
-
Test with single verbose flag (info level):
-
$ toru inspect test_registry.txt --verbose
-
toru_main.exe: [INFO] Loading registry from: test_registry.txt
-
Registry contains 3 entries
-
-
Test with double verbose flags (debug level):
-
$ toru inspect test_registry.txt --verbose --verbose
-
toru_main.exe: [DEBUG] Starting inspect function
-
toru_main.exe: [INFO] Loading registry from: test_registry.txt
-
toru_main.exe: [DEBUG] Determining if source is URL or file path
-
toru_main.exe: [DEBUG] Detected file path, loading directly without Eio
-
toru_main.exe: [DEBUG] Registry loaded successfully
-
Registry contains 3 entries
-
-
Test 5: XDG Base Directory compliance
-
====================================
-
-
Test default cache directory behavior:
-
$ unset XDG_CACHE_HOME TORU_CACHE_DIR
-
$ toru inspect test_registry.txt --verbose
-
toru_main.exe: [INFO] Loading registry from: test_registry.txt
-
Registry contains 3 entries
-
-
Test XDG_CACHE_HOME environment variable:
-
$ XDG_CACHE_HOME=/tmp/xdg-cache toru inspect test_registry.txt --verbose
-
toru_main.exe: [INFO] Loading registry from: test_registry.txt
-
Registry contains 3 entries
-
-
Test application-specific cache directory override:
-
$ TORU_CACHE_DIR=/tmp/toru-cache toru inspect test_registry.txt --verbose
-
toru_main.exe: [INFO] Loading registry from: test_registry.txt
-
Registry contains 3 entries
-
-
Test command-line cache directory override:
-
$ toru inspect test_registry.txt --cache-dir /tmp/cli-cache --verbose
-
toru_main.exe: [INFO] Loading registry from: test_registry.txt
-
Registry contains 3 entries
-
-
Test precedence: CLI overrides environment variables:
-
$ XDG_CACHE_HOME=/tmp/xdg TORU_CACHE_DIR=/tmp/app toru inspect test_registry.txt --cache-dir /tmp/cli --verbose
-
toru_main.exe: [INFO] Loading registry from: test_registry.txt
-
Registry contains 3 entries
-
-
Test 6: Mixed hash algorithms support
-
====================================
-
-
Create registry with mixed hash types:
-
$ cat > mixed_hashes.txt << 'EOF'
-
> # Registry with different hash algorithms
-
> # SHA256 hash
-
> file1.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
> # SHA1 hash
-
> file2.txt sha1:da39a3ee5e6b4b0d3255bfef95601890afd80709
-
> # MD5 hash
-
> file3.txt md5:d41d8cd98f00b204e9800998ecf8427e
-
> EOF
-
-
Test mixed hash validation:
-
$ toru validate --check-hashes mixed_hashes.txt
-
✓ Registry loaded successfully with 3 entries
-
Validating hash formats...
-
✓ All 3 hash formats are valid
-
-
Test 7: URL source handling
-
===========================
-
-
Test detection of HTTP URL (mock):
-
$ toru inspect https://example.com/registry.txt 2>&1 | head -3
-
toru_main.exe: [ERROR] * (glob)
-
-
Test detection of HTTPS URL (mock):
-
$ toru inspect https://example.com/registry.txt 2>&1 | head -3
-
toru_main.exe: [ERROR] * (glob)
-
-
Test 8: Error handling
-
======================
-
-
Test non-existent file:
-
$ toru inspect nonexistent.txt
-
[1]
-
-
Test malformed registry:
-
$ echo "invalid registry content" > malformed.txt
-
$ toru inspect malformed.txt
-
Registry contains 0 entries
-
-
Test empty registry:
-
$ touch empty.txt
-
$ toru inspect empty.txt
-
Registry contains 0 entries
-
-
Test 9: App name parameter
-
=========================
-
-
Test custom app name:
-
$ toru --app-name myapp inspect test_registry.txt --verbose
-
toru_main.exe: [INFO] Loading registry from: test_registry.txt
-
Registry contains 3 entries
-
-
Test 10: Data version parameter
-
==============================
-
-
Test with version string:
-
$ toru --data-version v1.0 inspect test_registry.txt --verbose
-
toru_main.exe: [INFO] Loading registry from: test_registry.txt
-
Registry contains 3 entries
-
-
Test 11: Help system
-
===================
-
-
Test main help:
-
$ toru --help | head -10
-
TORU(1) * Toru Manual * TORU(1) (glob)
-
-
-
-
NAME
-
toru - Toru data repository management tool
-
-
SYNOPSIS
-
toru [COMMAND]…
-
-
-
Test inspect subcommand help:
-
$ toru inspect --help | head -10
-
TORU-INSPECT(1) * Toru Manual * TORU-INSPECT(1) (glob)
-
-
-
-
NAME
-
toru-inspect - Inspect a registry file or URL
-
-
SYNOPSIS
-
toru-inspect [OPTION]… REGISTRY
-
-
-
Test validate subcommand help:
-
$ toru validate --help | head -10
-
TORU-VALIDATE(1) * Toru Manual * TORU-VALIDATE(1) (glob)
-
-
-
-
NAME
-
toru-validate - Validate a registry file format and integrity
-
-
SYNOPSIS
-
toru-validate [OPTION]… REGISTRY
-
-
-
Test convert subcommand help:
-
$ toru convert --help | head -10
-
TORU-CONVERT(1) * Toru Manual * TORU-CONVERT(1) (glob)
-
-
-
-
NAME
-
toru-convert - Convert registry between different formats or sources
-
-
SYNOPSIS
-
toru-convert [OPTION]… INPUT OUTPUT
-
-
-
Test 12: Environment variable documentation
-
==========================================
-
-
Test that help includes XDG information:
-
$ toru --help | grep -A 5 "ENVIRONMENT VARIABLES"
-
ENVIRONMENT VARIABLES
-
Toru respects the XDG Base Directory Specification:
-
-
XDG_CACHE_HOME - Override default cache directory location
-
-
TORU_CACHE_DIR - Application-specific cache directory override
-
-
Test 13: Edge cases and boundary conditions
-
==========================================
-
-
Test very long registry file:
-
$ python3 -c "
-
> for i in range(1000):
-
> print(f'file{i:04d}.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855')
-
> " > large_registry.txt
-
$ toru inspect large_registry.txt
-
Registry contains 1000 entries
-
-
Test registry with comments and blank lines:
-
$ cat > commented_registry.txt << 'EOF'
-
> # This is a test registry
-
> # With comments and blank lines
-
>
-
> simple.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
>
-
> # Another comment
-
> data.csv d613c3cca7a0aafd47e7cd81c7ee0268504b13e8c24b2668dcde8a86386c5cef
-
>
-
> EOF
-
-
$ toru inspect commented_registry.txt
-
Registry contains 2 entries
-
-
Test registry with unusual file names:
-
$ cat > unusual_names.txt << 'EOF'
-
> file with spaces.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
> file-with-dashes.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
> file_with_underscores.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
> file.with.dots.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
> file123numbers.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
> EOF
-
-
$ toru inspect unusual_names.txt
-
Registry contains 5 entries
-
-
Test 14: Performance and scalability
-
===================================
-
-
Test registry with many file extensions:
-
$ python3 -c "
-
> import string
-
> extensions = [f'.{c}{c}{c}' for c in string.ascii_lowercase[:10]]
-
> for i, ext in enumerate(extensions):
-
> print(f'file{i:03d}{ext} e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855')
-
> " > many_extensions.txt
-
$ toru inspect --stats many_extensions.txt
-
Registry contains 10 entries
-
-
Registry Statistics:
-
===================
-
Hash algorithms:
-
sha256: 10 entries
-
-
File extensions:
-
.aaa: 1 files
-
.bbb: 1 files
-
.ccc: 1 files
-
.ddd: 1 files
-
.eee: 1 files
-
.fff: 1 files
-
.ggg: 1 files
-
.hhh: 1 files
-
.iii: 1 files
-
.jjj: 1 files
-
-
Cleanup:
-
$ rm -rf /tmp/toru_test_home
-93
stack/toru/test/downloader_demo.ml
···
-
open Toru
-
-
let demo_downloader_usage () =
-
Printf.printf "Toru Downloader System Demo\n";
-
Printf.printf "===========================\n\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
-
(* 1. Show available downloaders *)
-
Printf.printf "1. Detecting available downloaders:\n";
-
let available = Downloader.Downloaders.detect_available ~env in
-
List.iter (fun (name, _) ->
-
Printf.printf " ✓ %s is available\n" name
-
) available;
-
Printf.printf "\n";
-
-
(* 2. Create downloader with authentication *)
-
Printf.printf "2. Creating curl downloader with authentication:\n";
-
let (module D : Downloader.DOWNLOADER) = Downloader.Downloaders.curl () in
-
let auth = Downloader.Config.{ username = Some "demo_user"; password = Some "demo_pass" } in
-
let authenticated_downloader = D.create ~sw ~env ~auth () in
-
Printf.printf " ✓ Created authenticated %s downloader\n" (D.name authenticated_downloader);
-
Printf.printf " ✓ Resume support: %b\n\n" (D.supports_resume authenticated_downloader);
-
-
(* 3. Create regular downloader *)
-
Printf.printf "3. Creating regular downloader:\n";
-
let regular_downloader = D.create ~sw ~env () in
-
Printf.printf " ✓ Created regular %s downloader\n\n" (D.name regular_downloader);
-
-
(* 4. Download a test file *)
-
Printf.printf "4. Downloading test file with hash verification:\n";
-
let test_url = "https://httpbin.org/robots.txt" in
-
let dest_path = Eio.Path.(env#fs / "demo_download.txt") in
-
-
(* First download to compute hash *)
-
(match D.download regular_downloader ~url:test_url ~dest:dest_path () with
-
| Ok () ->
-
let computed_hash = Hash.compute Hash.SHA256 dest_path in
-
Printf.printf " ✓ Downloaded file successfully\n";
-
Printf.printf " ✓ Computed SHA256: %s\n" (Hash.value computed_hash);
-
-
(* Clean up and re-download with hash verification *)
-
Eio.Path.unlink dest_path;
-
Printf.printf " ✓ Testing hash verification...\n";
-
-
(match D.download regular_downloader ~url:test_url ~dest:dest_path ~hash:computed_hash () with
-
| Ok () ->
-
Printf.printf " ✓ Hash verification successful!\n";
-
Eio.Path.unlink dest_path;
-
| Error msg ->
-
Printf.printf " ✗ Hash verification failed: %s\n" msg);
-
-
| Error msg ->
-
Printf.printf " ✗ Download failed: %s\n" msg);
-
Printf.printf "\n";
-
-
(* 5. Test resume functionality *)
-
Printf.printf "5. Testing resume functionality:\n";
-
(match D.download regular_downloader ~url:test_url ~dest:dest_path ~resume:true () with
-
| Ok () ->
-
Printf.printf " ✓ Resume download successful\n";
-
Eio.Path.unlink dest_path;
-
| Error msg ->
-
Printf.printf " ✗ Resume download failed: %s\n" msg);
-
Printf.printf "\n";
-
-
(* 6. Test CLI selection *)
-
Printf.printf "6. Testing CLI downloader selection:\n";
-
let (module CLI_D) = Downloader.Cli.create_downloader ~env `Auto in
-
Printf.printf " ✓ CLI auto-selected downloader available\n";
-
Printf.printf "\n";
-
-
(* 7. Test string-based selection *)
-
Printf.printf "7. Testing string-based downloader selection:\n";
-
(match Downloader.Downloaders.of_string "curl" with
-
| Some (module Selected) ->
-
Printf.printf " ✓ Successfully selected curl via string\n"
-
| None ->
-
Printf.printf " ✗ Failed to select curl via string\n");
-
Printf.printf "\n";
-
-
Printf.printf "Demo completed successfully!\n";
-
Printf.printf "\nKey features demonstrated:\n";
-
Printf.printf "• Automatic downloader detection\n";
-
Printf.printf "• Authentication support (username/password)\n";
-
Printf.printf "• Hash verification (SHA256)\n";
-
Printf.printf "• Resume functionality\n";
-
Printf.printf "• Error handling\n";
-
Printf.printf "• CLI integration\n";
-
Printf.printf "• Multiple selection methods\n"
-
-
let () = demo_downloader_usage ()
-84
stack/toru/test/dune
···
-
(executable
-
(public_name test_toru)
-
(name test_toru)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name test_registry)
-
(name test_registry)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name test_registry_real)
-
(name test_registry_real)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name test_tessera_load)
-
(name test_tessera_load)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name test_cache)
-
(name test_cache)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name test_downloader)
-
(name test_downloader)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name test_curl_download)
-
(name test_curl_download)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name test_hash)
-
(name test_hash)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name test_python_cross_validation)
-
(name test_python_cross_validation)
-
(libraries toru eio eio_main yojson))
-
-
(executable
-
(public_name test_cache_xdg)
-
(name test_cache_xdg)
-
(libraries toru eio eio_main unix))
-
-
(executable
-
(public_name test_xdg_integration)
-
(name test_xdg_integration)
-
(libraries toru eio eio_main xdg))
-
-
(executable
-
(public_name test_downloader_comprehensive)
-
(name test_downloader_comprehensive)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name downloader_demo)
-
(name downloader_demo)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name test_tessera_integration)
-
(name test_tessera_integration)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name test_streaming_hash)
-
(name test_streaming_hash)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name test_simple_stream)
-
(name test_simple_stream)
-
(libraries toru eio eio_main))
-
-
(executable
-
(public_name test_chunk_operations)
-
(name test_chunk_operations)
-
(libraries cacheio eio eio_main))
-207
stack/toru/test/python/generate_pooch_registry.py
···
-
#!/usr/bin/env -S uv run
-
# /// script
-
# requires-python = ">=3.8"
-
# dependencies = [
-
# "pooch>=1.7.0",
-
# ]
-
# ///
-
-
"""
-
Generate a Pooch registry file for testing Toru OCaml implementation.
-
This creates sample data files and a registry with various hash formats.
-
-
Usage:
-
uv run generate_pooch_registry.py
-
"""
-
-
import os
-
import hashlib
-
import tempfile
-
from pathlib import Path
-
import pooch
-
import json
-
from datetime import datetime
-
-
def create_sample_files(base_dir):
-
"""Create sample files with known content for testing."""
-
files = {
-
"data/simple.txt": b"Hello, World!",
-
"data/numbers.csv": b"1,2,3\n4,5,6\n7,8,9\n",
-
"data/unicode.txt": "Hello, 世界! 🌍\n".encode('utf-8'),
-
"models/small.bin": bytes([i % 256 for i in range(1024)]), # 1KB binary
-
"docs/readme.md": b"# Sample Dataset\n\nThis is a test dataset for Toru.",
-
"config.json": json.dumps({"version": "1.0", "test": True}).encode('utf-8'),
-
".hidden/secret.txt": b"Hidden file content",
-
"empty.txt": b"", # Empty file
-
}
-
-
created_files = {}
-
for rel_path, content in files.items():
-
file_path = base_dir / rel_path
-
file_path.parent.mkdir(parents=True, exist_ok=True)
-
file_path.write_bytes(content)
-
created_files[rel_path] = {
-
'path': file_path,
-
'content': content,
-
'size': len(content)
-
}
-
-
return created_files
-
-
def calculate_hashes(file_info):
-
"""Calculate SHA256, SHA1, and MD5 hashes for files."""
-
hashes = {}
-
for rel_path, info in file_info.items():
-
content = info['content']
-
hashes[rel_path] = {
-
'sha256': hashlib.sha256(content).hexdigest(),
-
'sha1': hashlib.sha1(content).hexdigest(),
-
'md5': hashlib.md5(content).hexdigest(),
-
'size': info['size']
-
}
-
return hashes
-
-
def generate_pooch_registry(file_hashes, output_path, hash_algo='sha256'):
-
"""Generate a Pooch registry file."""
-
registry = {}
-
-
# Write registry in Pooch format
-
with open(output_path, 'w') as f:
-
f.write(f"# Pooch registry generated on {datetime.now().isoformat()}\n")
-
f.write(f"# Algorithm: {hash_algo}\n")
-
for filename, hashes in sorted(file_hashes.items()):
-
file_hash = hashes[hash_algo]
-
registry[filename] = file_hash
-
# Pooch includes algorithm prefix for non-SHA256
-
if hash_algo != 'sha256':
-
f.write(f"{filename} {hash_algo}:{file_hash}\n")
-
else:
-
f.write(f"{filename} {file_hash}\n")
-
-
return registry
-
-
def generate_mixed_registry(file_hashes, output_path):
-
"""Generate a registry with mixed hash formats for testing."""
-
with open(output_path, 'w') as f:
-
f.write("# Mixed hash format registry for testing\n")
-
f.write("# This tests various hash algorithm prefixes\n\n")
-
-
# Mix different hash formats
-
for i, (rel_path, hashes) in enumerate(sorted(file_hashes.items())):
-
if i % 3 == 0:
-
# SHA256 without prefix (default)
-
f.write(f"{rel_path} {hashes['sha256']}\n")
-
elif i % 3 == 1:
-
# SHA1 with prefix
-
f.write(f"{rel_path} sha1:{hashes['sha1']}\n")
-
else:
-
# MD5 with prefix
-
f.write(f"{rel_path} md5:{hashes['md5']}\n")
-
-
# Add comments between some entries
-
if i == 2:
-
f.write("\n# Additional files\n")
-
-
def generate_json_metadata(file_hashes, output_path):
-
"""Generate JSON metadata file with all hash algorithms."""
-
metadata = {
-
"generated": datetime.now().isoformat(),
-
"generator": "pooch_test_generator.py",
-
"files": []
-
}
-
-
for rel_path, hashes in sorted(file_hashes.items()):
-
metadata["files"].append({
-
"path": rel_path,
-
"size": hashes['size'],
-
"sha256": hashes['sha256'],
-
"sha1": hashes['sha1'],
-
"md5": hashes['md5']
-
})
-
-
with open(output_path, 'w') as f:
-
json.dump(metadata, f, indent=2)
-
-
def test_pooch_loading(registry_path, base_url="https://example.com/data/"):
-
"""Test that Pooch can load the generated registry."""
-
# Create a Pooch instance
-
pup = pooch.create(
-
path=pooch.os_cache("toru_test"),
-
base_url=base_url,
-
registry=None,
-
version="v1.0"
-
)
-
-
# Load the registry
-
pup.load_registry(registry_path)
-
-
print(f"Pooch successfully loaded registry with {len(pup.registry)} entries")
-
for filename in list(pup.registry.keys())[:3]:
-
print(f" - {filename}: {pup.registry[filename]}")
-
-
return pup
-
-
def main():
-
"""Main function to generate test registries."""
-
# Create test directory structure
-
test_dir = Path("test_data")
-
test_dir.mkdir(exist_ok=True)
-
-
print("Creating sample files...")
-
file_info = create_sample_files(test_dir)
-
print(f"Created {len(file_info)} sample files")
-
-
print("\nCalculating hashes...")
-
file_hashes = calculate_hashes(file_info)
-
-
# Generate registries with different formats
-
print("\nGenerating registries...")
-
-
# 1. Standard SHA256 registry (Pooch default)
-
registry_sha256 = generate_pooch_registry(
-
file_hashes,
-
"test_registry_sha256.txt",
-
'sha256'
-
)
-
print(f"Generated SHA256 registry with {len(registry_sha256)} entries")
-
-
# 2. SHA1 registry
-
registry_sha1 = generate_pooch_registry(
-
file_hashes,
-
"test_registry_sha1.txt",
-
'sha1'
-
)
-
print(f"Generated SHA1 registry with {len(registry_sha1)} entries")
-
-
# 3. MD5 registry
-
registry_md5 = generate_pooch_registry(
-
file_hashes,
-
"test_registry_md5.txt",
-
'md5'
-
)
-
print(f"Generated MD5 registry with {len(registry_md5)} entries")
-
-
# 4. Mixed format registry
-
generate_mixed_registry(file_hashes, "test_registry_mixed.txt")
-
print("Generated mixed format registry")
-
-
# 5. JSON metadata (for verification)
-
generate_json_metadata(file_hashes, "test_metadata.json")
-
print("Generated JSON metadata file")
-
-
# Test loading with Pooch
-
print("\nTesting Pooch compatibility...")
-
test_pooch_loading("test_registry_sha256.txt")
-
test_pooch_loading("test_registry_mixed.txt")
-
-
print("\n✅ All registries generated successfully!")
-
print("\nGenerated files:")
-
print(" - test_data/ (sample data directory)")
-
print(" - test_registry_sha256.txt")
-
print(" - test_registry_sha1.txt")
-
print(" - test_registry_md5.txt")
-
print(" - test_registry_mixed.txt")
-
print(" - test_metadata.json")
-
-
if __name__ == "__main__":
-
main()
-11
stack/toru/test/python/pyproject.toml
···
-
[project]
-
name = "toru-test-generator"
-
version = "0.1.0"
-
description = "Generate Pooch test registries for Toru OCaml testing"
-
requires-python = ">=3.8"
-
dependencies = [
-
"pooch>=1.7.0",
-
]
-
-
[tool.uv]
-
dev-dependencies = []
-1
stack/toru/test/python/test_data/.hidden/secret.txt
···
-
Hidden file content
-1
stack/toru/test/python/test_data/config.json
···
-
{"version": "1.0", "test": true}
-3
stack/toru/test/python/test_data/data/numbers.csv
···
-
1,2,3
-
4,5,6
-
7,8,9
-1
stack/toru/test/python/test_data/data/simple.txt
···
-
Hello, World!
-1
stack/toru/test/python/test_data/data/unicode.txt
···
-
Hello, 世界! 🌍
-3
stack/toru/test/python/test_data/docs/readme.md
···
-
# Sample Dataset
-
-
This is a test dataset for Toru.
stack/toru/test/python/test_data/empty.txt

This is a binary file and will not be displayed.

stack/toru/test/python/test_data/models/small.bin

This is a binary file and will not be displayed.

-62
stack/toru/test/python/test_metadata.json
···
-
{
-
"generated": "2025-08-21T08:19:43.410183",
-
"generator": "pooch_test_generator.py",
-
"files": [
-
{
-
"path": ".hidden/secret.txt",
-
"size": 19,
-
"sha256": "9ead1fad59f50f905f6a76154e2d0bc1a31c46f223e2ec81e278aa2ee6a10e25",
-
"sha1": "ce481b91ded06b438fe4fb8d21d94a2ff56b6d92",
-
"md5": "e6f92a0add5468b0cda31909995bd207"
-
},
-
{
-
"path": "config.json",
-
"size": 32,
-
"sha256": "dcfcfeb20e6334be05d4ed2e39da77ffb84b80bf835dc20ac9963f5e95820b94",
-
"sha1": "3ddf402b6e0e5370a6ea18e67685e47bb362be50",
-
"md5": "823643193695572be03e4a5456b2b820"
-
},
-
{
-
"path": "data/numbers.csv",
-
"size": 18,
-
"sha256": "d613c3cca7a0aafd47e7cd81c7ee0268504b13e8c24b2668dcde8a86386c5cef",
-
"sha1": "6a65966e545d4fc87687db23c378f1bf4c1c4cdf",
-
"md5": "9b0137441c80f92c041748e1cfce0631"
-
},
-
{
-
"path": "data/simple.txt",
-
"size": 13,
-
"sha256": "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f",
-
"sha1": "0a0a9f2a6772942557ab5355d76af442f8f65e01",
-
"md5": "65a8e27d8879283831b664bd8b7f0ad4"
-
},
-
{
-
"path": "data/unicode.txt",
-
"size": 20,
-
"sha256": "dcccba292708e4d9b8f1e2af482b09b01131ea6e22c3b10173e0d75c8ac0c310",
-
"sha1": "43d79663ffe69a0befd5307bcea7295cbdc47e33",
-
"md5": "d8b80e3903afd84ed3708ef95b65c531"
-
},
-
{
-
"path": "docs/readme.md",
-
"size": 50,
-
"sha256": "23c8cf9515ed231a55e63e1d89399a1eac4a529c2b4ac5af29a61af03e3afdd4",
-
"sha1": "929efb30a9dad70c082af7f4eea3c369896fba5f",
-
"md5": "9f6c0d6f6f7db11a8e9afc7406e17cab"
-
},
-
{
-
"path": "empty.txt",
-
"size": 0,
-
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
-
"sha1": "da39a3ee5e6b4b0d3255bfef95601890afd80709",
-
"md5": "d41d8cd98f00b204e9800998ecf8427e"
-
},
-
{
-
"path": "models/small.bin",
-
"size": 1024,
-
"sha256": "785b0751fc2c53dc14a4ce3d800e69ef9ce1009eb327ccf458afe09c242c26c9",
-
"sha1": "5b00669c480d5cffbdfa8bdba99561160f2d1b77",
-
"md5": "b2ea9f7fcea831a4a63b213f41a8855b"
-
}
-
]
-
}
-10
stack/toru/test/python/test_registry_md5.txt
···
-
# Pooch registry generated on 2025-08-21T08:19:43.410092
-
# Algorithm: md5
-
.hidden/secret.txt md5:e6f92a0add5468b0cda31909995bd207
-
config.json md5:823643193695572be03e4a5456b2b820
-
data/numbers.csv md5:9b0137441c80f92c041748e1cfce0631
-
data/simple.txt md5:65a8e27d8879283831b664bd8b7f0ad4
-
data/unicode.txt md5:d8b80e3903afd84ed3708ef95b65c531
-
docs/readme.md md5:9f6c0d6f6f7db11a8e9afc7406e17cab
-
empty.txt md5:d41d8cd98f00b204e9800998ecf8427e
-
models/small.bin md5:b2ea9f7fcea831a4a63b213f41a8855b
-13
stack/toru/test/python/test_registry_mixed.txt
···
-
# Mixed hash format registry for testing
-
# This tests various hash algorithm prefixes
-
-
.hidden/secret.txt 9ead1fad59f50f905f6a76154e2d0bc1a31c46f223e2ec81e278aa2ee6a10e25
-
config.json sha1:3ddf402b6e0e5370a6ea18e67685e47bb362be50
-
data/numbers.csv md5:9b0137441c80f92c041748e1cfce0631
-
-
# Additional files
-
data/simple.txt dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f
-
data/unicode.txt sha1:43d79663ffe69a0befd5307bcea7295cbdc47e33
-
docs/readme.md md5:9f6c0d6f6f7db11a8e9afc7406e17cab
-
empty.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
models/small.bin sha1:5b00669c480d5cffbdfa8bdba99561160f2d1b77
-10
stack/toru/test/python/test_registry_sha1.txt
···
-
# Pooch registry generated on 2025-08-21T08:19:43.410026
-
# Algorithm: sha1
-
.hidden/secret.txt sha1:ce481b91ded06b438fe4fb8d21d94a2ff56b6d92
-
config.json sha1:3ddf402b6e0e5370a6ea18e67685e47bb362be50
-
data/numbers.csv sha1:6a65966e545d4fc87687db23c378f1bf4c1c4cdf
-
data/simple.txt sha1:0a0a9f2a6772942557ab5355d76af442f8f65e01
-
data/unicode.txt sha1:43d79663ffe69a0befd5307bcea7295cbdc47e33
-
docs/readme.md sha1:929efb30a9dad70c082af7f4eea3c369896fba5f
-
empty.txt sha1:da39a3ee5e6b4b0d3255bfef95601890afd80709
-
models/small.bin sha1:5b00669c480d5cffbdfa8bdba99561160f2d1b77
-10
stack/toru/test/python/test_registry_sha256.txt
···
-
# Pooch registry generated on 2025-08-21T08:19:43.409944
-
# Algorithm: sha256
-
.hidden/secret.txt 9ead1fad59f50f905f6a76154e2d0bc1a31c46f223e2ec81e278aa2ee6a10e25
-
config.json dcfcfeb20e6334be05d4ed2e39da77ffb84b80bf835dc20ac9963f5e95820b94
-
data/numbers.csv d613c3cca7a0aafd47e7cd81c7ee0268504b13e8c24b2668dcde8a86386c5cef
-
data/simple.txt dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f
-
data/unicode.txt dcccba292708e4d9b8f1e2af482b09b01131ea6e22c3b10173e0d75c8ac0c310
-
docs/readme.md 23c8cf9515ed231a55e63e1d89399a1eac4a529c2b4ac5af29a61af03e3afdd4
-
empty.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
models/small.bin 785b0751fc2c53dc14a4ce3d800e69ef9ce1009eb327ccf458afe09c242c26c9
-183
stack/toru/test/test_cache.ml
···
-
open Toru
-
-
let create_test_file cache filename content =
-
let path = Cache.file_path cache filename in
-
(* Ensure cache directory exists *)
-
Cache.ensure_dir cache;
-
(* Create parent directory if filename contains path separators *)
-
(try
-
if String.contains filename '/' then (
-
let dir_part = String.sub filename 0 (String.rindex filename '/') in
-
let dir_path = Cache.file_path cache dir_part in
-
let mkdir_p path =
-
try Eio.Path.mkdir ~perm:0o755 path
-
with _ -> () (* Directory might already exist *)
-
in
-
mkdir_p dir_path
-
);
-
Eio.Path.with_open_out path ~create:(`If_missing 0o644) (fun flow ->
-
Eio.Flow.copy_string content flow
-
)
-
with
-
| exn -> Printf.printf "Warning: Failed to create test file %s: %s\n" filename (Printexc.to_string exn)
-
)
-
-
let test_basic_functionality env sw =
-
Printf.printf "=== Testing basic Cache functionality ===\n";
-
-
(* Create a test cache *)
-
let cache = Cache.create ~sw ~fs:env#fs "test_cache" in
-
Printf.printf "Created cache at: %s\n"
-
(Eio.Path.native_exn (Cache.base_path cache));
-
-
(* Test default cache path *)
-
let default_path = Cache.default_cache_path ~app_name:"test-toru" () in
-
Printf.printf "Default cache path: %s\n" default_path;
-
-
(* Test XDG cache path detection *)
-
let cache_with_version = Cache.create ~sw ~fs:env#fs ~version:"v1.0" "test_cache_versioned" in
-
Printf.printf "Cache with version at: %s\n"
-
(Eio.Path.native_exn (Cache.base_path cache_with_version));
-
-
(* Test directory creation *)
-
Printf.printf "Creating cache directories...\n";
-
Cache.ensure_dir cache;
-
Cache.ensure_dir cache_with_version;
-
-
(* Test file existence check *)
-
let test_file_exists = Cache.exists cache "nonexistent.txt" in
-
Printf.printf "Non-existent file exists: %b\n" test_file_exists;
-
-
(* Test cache listing (should be empty initially) *)
-
let files = Cache.list_files cache in
-
Printf.printf "Cache contains %d files initially\n" (List.length files);
-
-
(* Test cache size *)
-
let size = Cache.size_bytes cache in
-
Printf.printf "Initial cache size: %Ld bytes\n" size;
-
-
cache
-
-
let test_file_operations _env _sw cache =
-
Printf.printf "\n=== Testing file operations ===\n";
-
-
(* Create some test files *)
-
create_test_file cache "test1.txt" "Hello World!";
-
create_test_file cache "subdir/test2.txt" "This is a test file in a subdirectory.";
-
create_test_file cache "test3.txt" "Another test file with different content for size testing.";
-
-
Printf.printf "Created test files\n";
-
-
(* Test file existence *)
-
let exists1 = Cache.exists cache "test1.txt" in
-
let exists2 = Cache.exists cache "subdir/test2.txt" in
-
let exists3 = Cache.exists cache "nonexistent.txt" in
-
Printf.printf "test1.txt exists: %b\n" exists1;
-
Printf.printf "subdir/test2.txt exists: %b\n" exists2;
-
Printf.printf "nonexistent.txt exists: %b\n" exists3;
-
-
(* Test file listing *)
-
let files = Cache.list_files cache in
-
Printf.printf "Cache now contains %d files:\n" (List.length files);
-
List.iter (fun f -> Printf.printf " - %s\n" f) (List.sort String.compare files);
-
-
(* Test cache size *)
-
let size = Cache.size_bytes cache in
-
Printf.printf "Cache size after adding files: %Ld bytes\n" size
-
-
let test_file_info _env _sw cache =
-
Printf.printf "\n=== Testing file info ===\n";
-
-
(* Test file info for existing file *)
-
(match Cache.file_info cache "test1.txt" with
-
| Some info ->
-
Printf.printf "test1.txt: size=%Ld bytes, mtime=%.0f\n" info.size info.mtime
-
| None ->
-
Printf.printf "Could not get info for test1.txt\n");
-
-
(* Test file info for non-existent file *)
-
(match Cache.file_info cache "nonexistent.txt" with
-
| Some _ ->
-
Printf.printf "ERROR: Got info for non-existent file\n"
-
| None ->
-
Printf.printf "Correctly returned None for non-existent file\n")
-
-
let test_usage_stats _env _sw cache =
-
Printf.printf "\n=== Testing usage stats ===\n";
-
-
let stats = Cache.usage_stats cache in
-
Printf.printf "Cache statistics:\n";
-
Printf.printf " Total size: %Ld bytes\n" stats.total_size;
-
Printf.printf " File count: %d\n" stats.file_count;
-
Printf.printf " Oldest file: %.0f (Unix timestamp)\n" stats.oldest;
-
Printf.printf " Newest file: %.0f (Unix timestamp)\n" stats.newest
-
-
let test_trim_operations _env _sw cache =
-
Printf.printf "\n=== Testing trim operations ===\n";
-
-
(* Get initial stats *)
-
let initial_stats = Cache.usage_stats cache in
-
Printf.printf "Before trim - Files: %d, Size: %Ld bytes\n"
-
initial_stats.file_count initial_stats.total_size;
-
-
(* Test trim by size (set very small limit to force trimming) *)
-
Cache.trim_to_size cache 20L; (* 20 bytes *)
-
let after_size_trim = Cache.usage_stats cache in
-
Printf.printf "After size trim (20 bytes) - Files: %d, Size: %Ld bytes\n"
-
after_size_trim.file_count after_size_trim.total_size;
-
-
(* Create a new file for age testing *)
-
create_test_file cache "new_test.txt" "New file for age testing";
-
-
(* Test trim by age (0 days = remove all files) *)
-
Cache.trim_by_age cache 0.0;
-
let after_age_trim = Cache.usage_stats cache in
-
Printf.printf "After age trim (0 days) - Files: %d, Size: %Ld bytes\n"
-
after_age_trim.file_count after_age_trim.total_size
-
-
let test_vacuum _env _sw cache =
-
Printf.printf "\n=== Testing vacuum operations ===\n";
-
-
(* Create some files and then remove them to create empty directories *)
-
create_test_file cache "temp/deep/nested/file.txt" "temporary";
-
let temp_path = Cache.file_path cache "temp/deep/nested/file.txt" in
-
(try Eio.Path.unlink temp_path with _ -> ());
-
-
Printf.printf "Created and removed nested file to test vacuum\n";
-
-
(* Run vacuum to clean up empty directories *)
-
Cache.vacuum cache;
-
Printf.printf "Vacuum completed\n"
-
-
let test_clear _env _sw cache =
-
Printf.printf "\n=== Testing cache clear ===\n";
-
-
(* Add some files first *)
-
create_test_file cache "clear_test1.txt" "test content 1";
-
create_test_file cache "clear_test2.txt" "test content 2";
-
-
let before_clear = Cache.usage_stats cache in
-
Printf.printf "Before clear - Files: %d\n" before_clear.file_count;
-
-
(* Clear the cache *)
-
Cache.clear cache;
-
-
let after_clear = Cache.usage_stats cache in
-
Printf.printf "After clear - Files: %d\n" after_clear.file_count
-
-
let test_cache_functionality () =
-
Printf.printf "Testing Cache functionality...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let cache = test_basic_functionality env sw in
-
test_file_operations env sw cache;
-
test_file_info env sw cache;
-
test_usage_stats env sw cache;
-
test_trim_operations env sw cache;
-
test_vacuum env sw cache;
-
test_clear env sw cache;
-
-
Printf.printf "\n=== All cache tests completed successfully ===\n"
-
-
let () = test_cache_functionality ()
-159
stack/toru/test/test_cache_xdg.ml
···
-
open Toru
-
-
(* Test XDG Base Directory compliance and platform detection *)
-
let test_xdg_compliance () =
-
Printf.printf "=== Testing XDG Base Directory Compliance ===\n";
-
-
(* Test default path without environment override *)
-
let default_path = Cache.default_cache_path ~app_name:"test-app" () in
-
Printf.printf "Default cache path: %s\n" default_path;
-
-
(* Check if path contains expected platform-specific component *)
-
let is_macos = try
-
let ic = Unix.open_process_in "uname -s 2>/dev/null" in
-
let result = String.trim (input_line ic) in
-
let _ = Unix.close_process_in ic in
-
result = "Darwin"
-
with _ -> false in
-
-
if is_macos then (
-
let expected_macos_path = Filename.concat (Filename.concat (Sys.getenv "HOME") "Library") "Caches" in
-
if String.length default_path >= String.length expected_macos_path &&
-
String.sub default_path 0 (String.length expected_macos_path) = expected_macos_path then
-
Printf.printf "✓ macOS: Using ~/Library/Caches correctly\n"
-
else
-
Printf.printf "✗ macOS: Expected ~/Library/Caches path\n"
-
) else (
-
(* Simple check if path contains .cache substring *)
-
let rec contains_substr s substr pos =
-
if pos + String.length substr > String.length s then false
-
else if String.sub s pos (String.length substr) = substr then true
-
else contains_substr s substr (pos + 1)
-
in
-
if contains_substr default_path ".cache" 0 then
-
Printf.printf "✓ Unix/Linux: Using ~/.cache correctly\n"
-
else
-
Printf.printf "✗ Unix/Linux: Expected ~/.cache path\n"
-
);
-
-
(* Test with XDG_CACHE_HOME override *)
-
let original_xdg = Sys.getenv_opt "XDG_CACHE_HOME" in
-
ignore (Sys.signal Sys.sigint (Sys.Signal_handle (fun _ ->
-
(* Restore original XDG_CACHE_HOME on interrupt *)
-
(match original_xdg with
-
| Some v -> Unix.putenv "XDG_CACHE_HOME" v
-
| None -> (* Cannot unset env var *) ());
-
exit 1)));
-
-
(* Set temporary XDG_CACHE_HOME *)
-
Unix.putenv "XDG_CACHE_HOME" "/tmp/test-xdg-cache";
-
let xdg_override_path = Cache.default_cache_path ~app_name:"test-app" () in
-
-
(* Restore original environment *)
-
(match original_xdg with
-
| Some v -> Unix.putenv "XDG_CACHE_HOME" v
-
| None -> (* Cannot unset env var *) ());
-
-
if not is_macos then (
-
let expected_xdg_path = "/tmp/test-xdg-cache" in
-
if String.length xdg_override_path >= String.length expected_xdg_path &&
-
String.sub xdg_override_path 0 (String.length expected_xdg_path) = expected_xdg_path then
-
Printf.printf "✓ XDG_CACHE_HOME override works: %s\n" xdg_override_path
-
else
-
Printf.printf "✗ XDG_CACHE_HOME override failed: %s\n" xdg_override_path
-
) else (
-
Printf.printf "ℹ macOS: XDG override test skipped (uses Library/Caches)\n"
-
);
-
-
Printf.printf "\n"
-
-
let test_versioned_caches () =
-
Printf.printf "=== Testing Versioned Cache Directories ===\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
(* Test cache without version *)
-
let cache_no_version = Cache.create ~sw ~fs:env#fs "test_cache_no_version" in
-
let base_path = Cache.base_path cache_no_version in
-
Printf.printf "Cache without version: %s\n" (Eio.Path.native_exn base_path);
-
-
(* Test cache with version *)
-
let cache_with_version = Cache.create ~sw ~fs:env#fs ~version:"v2.1" "test_cache_with_version" in
-
let versioned_path = Cache.base_path cache_with_version in
-
Printf.printf "Cache with version: %s\n" (Eio.Path.native_exn versioned_path);
-
-
(* Test file paths *)
-
let file_path_no_version = Cache.file_path cache_no_version "data.txt" in
-
let file_path_with_version = Cache.file_path cache_with_version "data.txt" in
-
-
Printf.printf "File path without version: %s\n" (Eio.Path.native_exn file_path_no_version);
-
Printf.printf "File path with version: %s\n" (Eio.Path.native_exn file_path_with_version);
-
-
(* Verify version is correctly stored *)
-
Printf.printf "Version accessor test: %s\n"
-
(match Cache.version cache_with_version with
-
| Some v -> v
-
| None -> "no version");
-
-
Printf.printf "\n"
-
-
let test_cache_management () =
-
Printf.printf "=== Testing Advanced Cache Management ===\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let cache = Cache.create ~sw ~fs:env#fs "test_cache_mgmt" in
-
Cache.ensure_dir cache;
-
-
(* Create test files with different content sizes *)
-
let test_files = [
-
("small.txt", "small");
-
("medium.txt", String.make 100 'M');
-
("large.txt", String.make 1000 'L');
-
] in
-
-
List.iter (fun (filename, content) ->
-
let path = Cache.file_path cache filename in
-
Eio.Path.with_open_out path ~create:(`If_missing 0o644) (fun flow ->
-
Eio.Flow.copy_string content flow
-
)
-
) test_files;
-
-
Printf.printf "Created test files\n";
-
-
(* Test file_info *)
-
Printf.printf "\nFile information:\n";
-
List.iter (fun (filename, expected_content) ->
-
match Cache.file_info cache filename with
-
| Some info ->
-
let expected_size = String.length expected_content in
-
Printf.printf " %s: size=%Ld (expected=%d), mtime=%.0f\n"
-
filename info.size expected_size info.mtime;
-
if Int64.to_int info.size = expected_size then
-
Printf.printf " ✓ Size matches\n"
-
else
-
Printf.printf " ✗ Size mismatch\n"
-
| None ->
-
Printf.printf " %s: No info available\n" filename
-
) test_files;
-
-
(* Test usage stats *)
-
let stats = Cache.usage_stats cache in
-
Printf.printf "\nUsage statistics:\n";
-
Printf.printf " Total files: %d\n" stats.file_count;
-
Printf.printf " Total size: %Ld bytes\n" stats.total_size;
-
Printf.printf " Age range: %.0f - %.0f\n" stats.oldest stats.newest;
-
-
(* Test trim to size *)
-
Printf.printf "\nTesting trim to size (50 bytes):\n";
-
Cache.trim_to_size cache 50L;
-
let after_trim = Cache.usage_stats cache in
-
Printf.printf " After trim: %d files, %Ld bytes\n"
-
after_trim.file_count after_trim.total_size;
-
-
Printf.printf "\n"
-
-
let () =
-
test_xdg_compliance ();
-
test_versioned_caches ();
-
test_cache_management ()
-39
stack/toru/test/test_curl_download.ml
···
-
open Toru
-
-
let test_curl_download () =
-
Printf.printf "Testing curl download...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
try
-
(* Create curl downloader *)
-
let (module D : Downloader.DOWNLOADER) = Downloader.Downloaders.curl () in
-
let downloader = D.create ~sw ~env () in
-
-
(* Test a small file download *)
-
let test_url = "https://httpbin.org/robots.txt" in
-
let dest_path = Eio.Path.(env#fs / "test_download_curl.txt") in
-
-
Printf.printf "Downloading %s with curl...\n" test_url;
-
-
match D.download downloader ~url:test_url ~dest:dest_path () with
-
| Ok () ->
-
Printf.printf "Curl download successful!\n";
-
(* Check if file exists and has content *)
-
let file_exists = try
-
let _stat = Eio.Path.stat ~follow:false dest_path in true
-
with _ -> false in
-
if file_exists then (
-
Printf.printf "File downloaded and exists\n";
-
(* Clean up *)
-
Eio.Path.unlink dest_path;
-
) else (
-
Printf.printf "File doesn't exist after download\n"
-
)
-
| Error msg ->
-
Printf.printf "Curl download failed: %s\n" msg
-
with
-
| exn ->
-
Printf.printf "Exception during curl test: %s\n" (Printexc.to_string exn)
-
-
let () = test_curl_download ()
-272
stack/toru/test/test_downloader.ml
···
-
open Toru
-
-
let test_downloader_detection () =
-
Printf.printf "Testing downloader detection...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun _sw ->
-
(* Test detecting available downloaders *)
-
let available = Downloader.Downloaders.detect_available ~env in
-
Printf.printf "Available downloaders:\n";
-
List.iter (fun (name, _) ->
-
Printf.printf " - %s\n" name
-
) available;
-
-
(* Test creating default downloader *)
-
if List.length available > 0 then (
-
let _default_downloader = Downloader.Downloaders.create_default ~env in
-
Printf.printf "Default downloader created successfully\n"
-
) else (
-
Printf.printf "No downloaders available - skipping default test\n"
-
);
-
-
Printf.printf "Downloader detection test completed\n"
-
-
let test_wget_download () =
-
Printf.printf "\nTesting wget download...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
try
-
(* Create wget downloader *)
-
let (module D : Downloader.DOWNLOADER) = Downloader.Downloaders.wget () in
-
let downloader = D.create ~sw ~env () in
-
-
(* Test a small file download *)
-
let test_url = "https://httpbin.org/robots.txt" in
-
let dest_path = Eio.Path.(env#fs / "test_download.txt") in
-
-
Printf.printf "Downloading %s...\n" test_url;
-
-
match D.download downloader ~url:test_url ~dest:dest_path () with
-
| Ok () ->
-
Printf.printf "Download successful!\n";
-
(* Check if file exists and has content *)
-
let file_exists = try
-
let _stat = Eio.Path.stat ~follow:false dest_path in true
-
with _ -> false in
-
if file_exists then (
-
Printf.printf "File downloaded and exists\n";
-
(* Clean up *)
-
Eio.Path.unlink dest_path;
-
) else (
-
Printf.printf "File doesn't exist after download\n"
-
)
-
| Error msg ->
-
Printf.printf "Download failed: %s\n" msg
-
with
-
| exn ->
-
Printf.printf "Exception during wget test: %s\n" (Printexc.to_string exn)
-
-
let test_authentication () =
-
Printf.printf "\nTesting authentication support...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
try
-
(* Test wget with authentication *)
-
let (module D : Downloader.DOWNLOADER) = Downloader.Downloaders.wget () in
-
let auth = Downloader.Config.{ username = Some "testuser"; password = Some "testpass" } in
-
let downloader = D.create ~sw ~env ~auth () in
-
-
Printf.printf "Created wget downloader with authentication\n";
-
Printf.printf "Downloader name: %s\n" (D.name downloader);
-
Printf.printf "Supports resume: %b\n" (D.supports_resume downloader);
-
-
(* Test curl with authentication *)
-
let (module D2 : Downloader.DOWNLOADER) = Downloader.Downloaders.curl () in
-
let downloader2 = D2.create ~sw ~env ~auth () in
-
-
Printf.printf "Created curl downloader with authentication\n";
-
Printf.printf "Downloader name: %s\n" (D2.name downloader2);
-
Printf.printf "Supports resume: %b\n" (D2.supports_resume downloader2);
-
-
(* Test authentication without credentials *)
-
let downloader3 = D.create ~sw ~env () in
-
Printf.printf "Created downloader without credentials: %s\n" (D.name downloader3);
-
-
with
-
| exn ->
-
Printf.printf "Exception during authentication test: %s\n" (Printexc.to_string exn)
-
-
let test_resume_functionality () =
-
Printf.printf "\nTesting resume functionality...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
try
-
let (module D : Downloader.DOWNLOADER) = Downloader.Downloaders.wget () in
-
let downloader = D.create ~sw ~env () in
-
-
let test_url = "https://httpbin.org/robots.txt" in
-
let dest_path = Eio.Path.(env#fs / "test_resume.txt") in
-
-
Printf.printf "Testing download with resume enabled...\n";
-
-
match D.download downloader ~url:test_url ~dest:dest_path ~resume:true () with
-
| Ok () ->
-
Printf.printf "Download with resume=true successful\n";
-
let file_exists = try
-
let _stat = Eio.Path.stat ~follow:false dest_path in true
-
with _ -> false in
-
if file_exists then (
-
Printf.printf "Resume test successful - cleaning up\n";
-
Eio.Path.unlink dest_path;
-
)
-
| Error msg ->
-
Printf.printf "Resume download failed: %s\n" msg;
-
-
Printf.printf "Testing download with resume disabled...\n";
-
-
match D.download downloader ~url:test_url ~dest:dest_path ~resume:false () with
-
| Ok () ->
-
Printf.printf "Download with resume=false successful\n";
-
let file_exists = try
-
let _stat = Eio.Path.stat ~follow:false dest_path in true
-
with _ -> false in
-
if file_exists then (
-
Printf.printf "No-resume test successful - cleaning up\n";
-
Eio.Path.unlink dest_path;
-
)
-
| Error msg ->
-
Printf.printf "No-resume download failed: %s\n" msg;
-
with
-
| exn ->
-
Printf.printf "Exception during resume test: %s\n" (Printexc.to_string exn)
-
-
let test_error_handling () =
-
Printf.printf "\nTesting error handling...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
try
-
let (module D : Downloader.DOWNLOADER) = Downloader.Downloaders.wget () in
-
let downloader = D.create ~sw ~env () in
-
-
(* Test with invalid URL *)
-
let invalid_url = "https://this-domain-does-not-exist-12345.com/file.txt" in
-
let dest_path = Eio.Path.(env#fs / "test_error.txt") in
-
-
Printf.printf "Testing error handling with invalid URL...\n";
-
-
match D.download downloader ~url:invalid_url ~dest:dest_path () with
-
| Ok () ->
-
Printf.printf "ERROR: Invalid URL should have failed!\n"
-
| Error msg ->
-
Printf.printf "Error handling successful: %s\n" msg;
-
-
(* Test with invalid file path *)
-
let valid_url = "https://httpbin.org/robots.txt" in
-
let invalid_dest = Eio.Path.(env#fs / "/invalid/path/that/does/not/exist/file.txt") in
-
-
Printf.printf "Testing error handling with invalid destination...\n";
-
-
match D.download downloader ~url:valid_url ~dest:invalid_dest () with
-
| Ok () ->
-
Printf.printf "WARNING: Invalid path might have succeeded unexpectedly\n"
-
| Error msg ->
-
Printf.printf "Path error handling successful: %s\n" msg
-
-
with
-
| exn ->
-
Printf.printf "Exception during error handling test: %s\n" (Printexc.to_string exn)
-
-
let test_hash_verification () =
-
Printf.printf "\nTesting hash verification...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
try
-
let (module D : Downloader.DOWNLOADER) = Downloader.Downloaders.wget () in
-
let downloader = D.create ~sw ~env () in
-
-
let test_url = "https://httpbin.org/robots.txt" in
-
let dest_path = Eio.Path.(env#fs / "test_hash.txt") in
-
-
(* First download without hash to see the actual content *)
-
Printf.printf "Downloading file to compute hash...\n";
-
-
match D.download downloader ~url:test_url ~dest:dest_path () with
-
| Ok () ->
-
(* Compute the actual hash *)
-
let actual_hash = Hash.compute Hash.SHA256 dest_path in
-
Printf.printf "Computed hash: %s\n" (Hash.to_string actual_hash);
-
-
(* Clean up and test with correct hash *)
-
Eio.Path.unlink dest_path;
-
-
Printf.printf "Testing download with correct hash...\n";
-
(match D.download downloader ~url:test_url ~dest:dest_path ~hash:actual_hash () with
-
| Ok () ->
-
Printf.printf "Hash verification successful\n";
-
Eio.Path.unlink dest_path;
-
| Error msg ->
-
Printf.printf "Hash verification failed unexpectedly: %s\n" msg);
-
-
(* Test with incorrect hash *)
-
let wrong_hash = Hash.create Hash.SHA256 "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef" in
-
Printf.printf "Testing download with incorrect hash...\n";
-
(match D.download downloader ~url:test_url ~dest:dest_path ~hash:wrong_hash () with
-
| Ok () ->
-
Printf.printf "ERROR: Wrong hash should have failed!\n";
-
Eio.Path.unlink dest_path;
-
| Error msg ->
-
Printf.printf "Hash verification error handling successful: %s\n" msg)
-
-
| Error msg ->
-
Printf.printf "Initial download for hash test failed: %s\n" msg
-
with
-
| exn ->
-
Printf.printf "Exception during hash verification test: %s\n" (Printexc.to_string exn)
-
-
let test_downloader_selection () =
-
Printf.printf "\nTesting downloader selection...\n";
-
-
(* Test of_string function *)
-
Printf.printf "Testing of_string selection...\n";
-
-
(match Downloader.Downloaders.of_string "wget" with
-
| Some (module D) -> Printf.printf "Selected wget via of_string\n"
-
| None -> Printf.printf "Failed to select wget\n");
-
-
(match Downloader.Downloaders.of_string "curl" with
-
| Some (module D) -> Printf.printf "Selected curl via of_string\n"
-
| None -> Printf.printf "Failed to select curl\n");
-
-
(match Downloader.Downloaders.of_string "invalid" with
-
| Some (module D) -> Printf.printf "ERROR: Should not select invalid downloader\n"
-
| None -> Printf.printf "Correctly rejected invalid downloader\n");
-
-
(* Test CLI integration *)
-
Printf.printf "Testing CLI downloader creation...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun _sw ->
-
try
-
let (module D1) = Downloader.Cli.create_downloader ~env `Wget in
-
Printf.printf "CLI created wget downloader\n";
-
-
let (module D2) = Downloader.Cli.create_downloader ~env `Curl in
-
Printf.printf "CLI created curl downloader\n";
-
-
let (module D3) = Downloader.Cli.create_downloader ~env `Auto in
-
Printf.printf "CLI created auto downloader\n";
-
with
-
| exn ->
-
Printf.printf "Exception during CLI test: %s\n" (Printexc.to_string exn)
-
-
let run_all_tests () =
-
Printf.printf "Comprehensive Downloader Tests\n";
-
Printf.printf "==============================\n";
-
-
test_downloader_detection ();
-
test_authentication ();
-
test_resume_functionality ();
-
test_error_handling ();
-
test_hash_verification ();
-
test_downloader_selection ();
-
test_wget_download ();
-
-
Printf.printf "\nAll comprehensive tests completed!\n"
-
-
let () = run_all_tests ()
-159
stack/toru/test/test_downloader_comprehensive.ml
···
-
open Toru
-
-
let test_curl_functionality () =
-
Printf.printf "Testing curl downloader functionality...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
try
-
let (module D : Downloader.DOWNLOADER) = Downloader.Downloaders.curl () in
-
-
(* Test basic downloader creation *)
-
let downloader = D.create ~sw ~env () in
-
Printf.printf "Created curl downloader: %s\n" (D.name downloader);
-
Printf.printf "Supports resume: %b\n" (D.supports_resume downloader);
-
-
(* Test with authentication *)
-
let auth = Downloader.Config.{ username = Some "testuser"; password = Some "testpass" } in
-
let _auth_downloader = D.create ~sw ~env ~auth () in
-
Printf.printf "Created authenticated curl downloader\n";
-
-
(* Test successful download *)
-
let test_url = "https://httpbin.org/robots.txt" in
-
let dest_path = Eio.Path.(env#fs / "test_download.txt") in
-
-
Printf.printf "Testing basic download...\n";
-
(match D.download downloader ~url:test_url ~dest:dest_path () with
-
| Ok () ->
-
Printf.printf "Basic download successful\n";
-
let file_exists = try
-
let _stat = Eio.Path.stat ~follow:false dest_path in true
-
with _ -> false in
-
if file_exists then (
-
Printf.printf "File verified and exists\n";
-
Eio.Path.unlink dest_path;
-
)
-
| Error msg ->
-
Printf.printf "Basic download failed: %s\n" msg);
-
-
(* Test resume functionality *)
-
Printf.printf "Testing resume functionality...\n";
-
(match D.download downloader ~url:test_url ~dest:dest_path ~resume:true () with
-
| Ok () ->
-
Printf.printf "Resume download successful\n";
-
let file_exists = try
-
let _stat = Eio.Path.stat ~follow:false dest_path in true
-
with _ -> false in
-
if file_exists then (
-
Printf.printf "Resume test file verified\n";
-
Eio.Path.unlink dest_path;
-
)
-
| Error msg ->
-
Printf.printf "Resume download failed: %s\n" msg);
-
-
(* Test hash verification *)
-
Printf.printf "Testing hash verification...\n";
-
(match D.download downloader ~url:test_url ~dest:dest_path () with
-
| Ok () ->
-
let computed_hash = Hash.compute Hash.SHA256 dest_path in
-
Printf.printf "Computed hash: %s\n" (Hash.to_string computed_hash);
-
-
(* Clean up and test with computed hash *)
-
Eio.Path.unlink dest_path;
-
-
(* Download again with correct hash *)
-
(match D.download downloader ~url:test_url ~dest:dest_path ~hash:computed_hash () with
-
| Ok () ->
-
Printf.printf "Hash verification successful\n";
-
Eio.Path.unlink dest_path;
-
| Error msg ->
-
Printf.printf "Hash verification failed: %s\n" msg);
-
-
(* Test with wrong hash *)
-
let wrong_hash = Hash.create Hash.SHA256 "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef" in
-
(match D.download downloader ~url:test_url ~dest:dest_path ~hash:wrong_hash () with
-
| Ok () ->
-
Printf.printf "ERROR: Wrong hash should have failed!\n";
-
Eio.Path.unlink dest_path;
-
| Error msg ->
-
Printf.printf "Wrong hash correctly rejected: %s\n" msg);
-
-
| Error msg ->
-
Printf.printf "Initial hash test download failed: %s\n" msg);
-
-
(* Test error handling *)
-
Printf.printf "Testing error handling...\n";
-
let invalid_url = "https://invalid-domain-12345.com/file.txt" in
-
(match D.download downloader ~url:invalid_url ~dest:dest_path () with
-
| Ok () ->
-
Printf.printf "WARNING: Invalid URL unexpectedly succeeded\n"
-
| Error msg ->
-
Printf.printf "Invalid URL correctly handled: %s\n" msg);
-
-
with
-
| exn ->
-
Printf.printf "Exception during curl test: %s\n" (Printexc.to_string exn)
-
-
let test_downloader_selection () =
-
Printf.printf "\nTesting downloader selection and detection...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun _sw ->
-
(* Test detection *)
-
let available = Downloader.Downloaders.detect_available ~env in
-
Printf.printf "Available downloaders:\n";
-
List.iter (fun (name, _) ->
-
Printf.printf " - %s\n" name
-
) available;
-
-
(* Test default selection *)
-
if List.length available > 0 then (
-
let (module Default) = Downloader.Downloaders.create_default ~env in
-
Printf.printf "Default downloader selected\n";
-
);
-
-
(* Test string-based selection *)
-
(match Downloader.Downloaders.of_string "curl" with
-
| Some (module D) -> Printf.printf "String-based curl selection successful\n"
-
| None -> Printf.printf "String-based curl selection failed\n");
-
-
(* Test CLI integration *)
-
let (module CLI_Auto) = Downloader.Cli.create_downloader ~env `Auto in
-
Printf.printf "CLI auto-selection successful\n"
-
-
let test_authentication_formats () =
-
Printf.printf "\nTesting authentication formats...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let (module D : Downloader.DOWNLOADER) = Downloader.Downloaders.curl () in
-
-
(* Test with username and password *)
-
let auth1 = Downloader.Config.{ username = Some "user"; password = Some "pass" } in
-
let downloader1 = D.create ~sw ~env ~auth:auth1 () in
-
Printf.printf "Created downloader with username and password\n";
-
-
(* Test with username only *)
-
let auth2 = Downloader.Config.{ username = Some "user"; password = None } in
-
let _downloader2 = D.create ~sw ~env ~auth:auth2 () in
-
Printf.printf "Created downloader with username only\n";
-
-
(* Test with no authentication *)
-
let _downloader3 = D.create ~sw ~env () in
-
Printf.printf "Created downloader with no authentication\n";
-
-
(* Verify all have same basic properties *)
-
Printf.printf "All downloaders report name: %s\n" (D.name downloader1);
-
Printf.printf "All support resume: %b\n" (D.supports_resume downloader1)
-
-
let run_comprehensive_tests () =
-
Printf.printf "Comprehensive Curl Downloader Tests\n";
-
Printf.printf "===================================\n";
-
-
test_curl_functionality ();
-
test_downloader_selection ();
-
test_authentication_formats ();
-
-
Printf.printf "\nAll comprehensive tests completed!\n"
-
-
let () = run_comprehensive_tests ()
-134
stack/toru/test/test_hash.ml
···
-
open Toru.Hash
-
-
let test_hash_creation () =
-
let hash = create SHA256 "abc123" in
-
assert (algorithm hash = SHA256);
-
assert (value hash = "abc123");
-
Printf.printf "✓ Hash creation test passed\n"
-
-
let test_algorithm_conversion () =
-
assert (algorithm_to_string SHA256 = "sha256");
-
assert (algorithm_to_string SHA1 = "sha1");
-
assert (algorithm_to_string MD5 = "md5");
-
-
assert (algorithm_of_string "sha256" = Some SHA256);
-
assert (algorithm_of_string "sha1" = Some SHA1);
-
assert (algorithm_of_string "md5" = Some MD5);
-
assert (algorithm_of_string "invalid" = None);
-
Printf.printf "✓ Algorithm conversion test passed\n"
-
-
let test_prefixed_parsing () =
-
let test_cases = [
-
("sha256:abc123", Some (SHA256, "abc123"));
-
("sha1:def456", Some (SHA1, "def456"));
-
("md5:789xyz", Some (MD5, "789xyz"));
-
("invalid:abc", None);
-
("nocolon", None);
-
] in
-
List.iter (fun (input, expected) ->
-
assert (parse_prefixed input = expected)
-
) test_cases;
-
Printf.printf "✓ Prefixed parsing test passed\n"
-
-
let test_hash_parsing () =
-
(* Test prefixed format *)
-
let hash1 = of_string "sha1:abc123def456789" in
-
assert (algorithm hash1 = SHA1);
-
assert (value hash1 = "abc123def456789");
-
-
(* Test non-prefixed SHA256 (64 chars) *)
-
let sha256_hash = String.make 64 'a' in
-
let hash2 = of_string sha256_hash in
-
assert (algorithm hash2 = SHA256);
-
assert (value hash2 = sha256_hash);
-
-
(* Test non-prefixed SHA1 (40 chars) *)
-
let sha1_hash = String.make 40 'b' in
-
let hash3 = of_string sha1_hash in
-
assert (algorithm hash3 = SHA1);
-
assert (value hash3 = sha1_hash);
-
-
(* Test non-prefixed MD5 (32 chars) *)
-
let md5_hash = String.make 32 'c' in
-
let hash4 = of_string md5_hash in
-
assert (algorithm hash4 = MD5);
-
assert (value hash4 = md5_hash);
-
-
Printf.printf "✓ Hash parsing test passed\n"
-
-
let test_hash_formatting () =
-
let hash = create SHA1 "abc123" in
-
let formatted = to_string hash in
-
assert (formatted = "sha1:abc123");
-
-
let prefixed = format_prefixed hash in
-
assert (prefixed = "sha1:abc123");
-
Printf.printf "✓ Hash formatting test passed\n"
-
-
let test_hash_equality () =
-
let hash1 = create SHA256 "abc123" in
-
let hash2 = create SHA256 "abc123" in
-
let hash3 = create SHA1 "abc123" in
-
let hash4 = create SHA256 "def456" in
-
-
assert (equal hash1 hash2);
-
assert (not (equal hash1 hash3)); (* Different algorithm *)
-
assert (not (equal hash1 hash4)); (* Different value *)
-
Printf.printf "✓ Hash equality test passed\n"
-
-
(* Test with actual file - create a temporary file *)
-
let test_hash_computation () =
-
let test_content = "Hello, World!" in
-
let test_file = "test_hash_file.txt" in
-
-
(* Write test content to file *)
-
let oc = open_out test_file in
-
output_string oc test_content;
-
close_out oc;
-
-
try
-
(* Compute hashes using Eio *)
-
Eio_main.run @@ fun env ->
-
let fs = env#fs in
-
let file_path = Eio.Path.(fs / test_file) in
-
-
let sha256_hash = compute SHA256 file_path in
-
let sha1_hash = compute SHA1 file_path in
-
let md5_hash = compute MD5 file_path in
-
-
(* Verify expected hash lengths *)
-
assert (String.length (value sha256_hash) = 64);
-
assert (String.length (value sha1_hash) = 40);
-
assert (String.length (value md5_hash) = 32);
-
-
(* Test verification *)
-
assert (verify file_path sha256_hash);
-
assert (verify file_path sha1_hash);
-
assert (verify file_path md5_hash);
-
-
(* Test verification failure *)
-
let wrong_hash = create SHA256 (String.make 64 '0') in
-
assert (not (verify file_path wrong_hash));
-
-
Printf.printf "✓ Hash computation and verification test passed\n";
-
-
(* Clean up *)
-
Sys.remove test_file
-
with
-
| exn ->
-
(* Clean up on error *)
-
if Sys.file_exists test_file then Sys.remove test_file;
-
raise exn
-
-
let run_tests () =
-
Printf.printf "Running Hash module tests...\n\n";
-
test_hash_creation ();
-
test_algorithm_conversion ();
-
test_prefixed_parsing ();
-
test_hash_parsing ();
-
test_hash_formatting ();
-
test_hash_equality ();
-
test_hash_computation ();
-
Printf.printf "\n✅ All Hash module tests passed!\n"
-
-
let () = run_tests ()
-28
stack/toru/test/test_hash_manual.ml
···
-
open Toru
-
-
let test_hash_computation () =
-
Printf.printf "Testing Hash computation...\n";
-
-
(* Create a test file *)
-
let test_content = "Hello, World!" in
-
let test_file = "test_hash_file.txt" in
-
-
(* Write test file *)
-
let oc = open_out test_file in
-
output_string oc test_content;
-
close_out oc;
-
-
(* Compute hash via external command for verification *)
-
let cmd = Printf.sprintf "echo -n '%s' | sha256sum | cut -d' ' -f1" test_content in
-
let expected_output = Unix.open_process_in cmd in
-
let expected_hash = input_line expected_output in
-
let _ = Unix.close_process_in expected_output in
-
-
Printf.printf "Expected SHA256: %s\n" expected_hash;
-
-
(* Clean up *)
-
Sys.remove test_file;
-
-
Printf.printf "Hash computation test completed\n"
-
-
let () = test_hash_computation ()
-367
stack/toru/test/test_make_registry.ml
···
-
(** Comprehensive test suite for Make_registry module *)
-
-
open Toru.Make_registry
-
open Eio.Std
-
-
let test_dir = "/tmp/toru_make_registry_test"
-
-
(* Test utilities *)
-
let setup_test_directory () =
-
let cmd = Printf.sprintf "rm -rf %s && mkdir -p %s" test_dir test_dir in
-
let _ = Sys.command cmd in
-
-
(* Create test files *)
-
let create_file path content =
-
let dir = Filename.dirname path in
-
let _ = Sys.command (Printf.sprintf "mkdir -p %s" dir) in
-
let oc = open_out path in
-
output_string oc content;
-
close_out oc
-
in
-
-
(* Regular files *)
-
create_file (test_dir ^ "/file1.txt") "Hello world";
-
create_file (test_dir ^ "/file2.csv") "col1,col2\n1,2\n3,4";
-
create_file (test_dir ^ "/data.json") "{\"key\": \"value\"}";
-
-
(* Subdirectory with files *)
-
create_file (test_dir ^ "/subdir/nested1.txt") "Nested content 1";
-
create_file (test_dir ^ "/subdir/nested2.md") "# Markdown content";
-
create_file (test_dir ^ "/subdir/deep/nested3.txt") "Deep nested content";
-
-
(* Hidden files *)
-
create_file (test_dir ^ "/.hidden") "Hidden file content";
-
create_file (test_dir ^ "/subdir/.config") "Config content";
-
-
(* Files to exclude *)
-
create_file (test_dir ^ "/temp.tmp") "Temporary file";
-
create_file (test_dir ^ "/debug.log") "Log content";
-
create_file (test_dir ^ "/subdir/another.tmp") "Another temp";
-
-
Printf.printf "Test directory setup complete: %s\n" test_dir
-
-
let cleanup_test_directory () =
-
let cmd = Printf.sprintf "rm -rf %s" test_dir in
-
let _ = Sys.command cmd in
-
Printf.printf "Test directory cleaned up\n"
-
-
(* Test 1: Basic directory scanning *)
-
let test_basic_scan () =
-
Printf.printf "\n=== Test 1: Basic Directory Scanning ===\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let dir_path = env#fs |> Eio.Path.(fun fs -> fs / test_dir) in
-
let options = { default_options with recursive = false } in
-
-
let registry = scan_directory ~sw ~env ~options dir_path in
-
let entries = Toru.Registry.entries registry in
-
-
Printf.printf "Found %d files in root directory\n" (List.length entries);
-
-
(* Should find non-hidden files in root only *)
-
let expected_files = ["file1.txt"; "file2.csv"; "data.json"; "temp.tmp"; "debug.log"] in
-
List.iter (fun filename ->
-
match Toru.Registry.find filename registry with
-
| Some entry ->
-
Printf.printf "✓ Found: %s (hash: %s)\n" filename
-
(Toru.Hash.value (Toru.Registry.hash entry))
-
| None -> Printf.printf "✗ Missing: %s\n" filename
-
) expected_files;
-
-
assert (List.length entries = 5)
-
-
(* Test 2: Recursive scanning *)
-
let test_recursive_scan () =
-
Printf.printf "\n=== Test 2: Recursive Directory Scanning ===\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let dir_path = env#fs |> Eio.Path.(fun fs -> fs / test_dir) in
-
let options = { default_options with recursive = true } in
-
-
let registry = scan_directory ~sw ~env ~options dir_path in
-
let entries = Toru.Registry.entries registry in
-
-
Printf.printf "Found %d files recursively\n" (List.length entries);
-
-
(* Should find files in subdirectories too *)
-
let expected_files = [
-
"file1.txt"; "file2.csv"; "data.json"; "temp.tmp"; "debug.log";
-
"subdir/nested1.txt"; "subdir/nested2.md"; "subdir/another.tmp";
-
"subdir/deep/nested3.txt"
-
] in
-
-
List.iter (fun filename ->
-
match Toru.Registry.find filename registry with
-
| Some entry ->
-
Printf.printf "✓ Found: %s\n" filename
-
| None -> Printf.printf "✗ Missing: %s\n" filename
-
) expected_files;
-
-
assert (List.length entries = 9)
-
-
(* Test 3: Exclude patterns *)
-
let test_exclude_patterns () =
-
Printf.printf "\n=== Test 3: Exclude Patterns ===\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let dir_path = env#fs |> Eio.Path.(fun fs -> fs / test_dir) in
-
let options = {
-
default_options with
-
recursive = true;
-
exclude_patterns = ["*.tmp"; "*.log"]
-
} in
-
-
let registry = scan_directory ~sw ~env ~options dir_path in
-
let entries = Toru.Registry.entries registry in
-
-
Printf.printf "Found %d files with exclusions\n" (List.length entries);
-
-
(* Should exclude .tmp and .log files *)
-
let included_files = [
-
"file1.txt"; "file2.csv"; "data.json";
-
"subdir/nested1.txt"; "subdir/nested2.md"; "subdir/deep/nested3.txt"
-
] in
-
let excluded_files = ["temp.tmp"; "debug.log"; "subdir/another.tmp"] in
-
-
List.iter (fun filename ->
-
match Toru.Registry.find filename registry with
-
| Some _ -> Printf.printf "✓ Included: %s\n" filename
-
| None -> Printf.printf "✗ Should be included: %s\n" filename
-
) included_files;
-
-
List.iter (fun filename ->
-
match Toru.Registry.find filename registry with
-
| Some _ -> Printf.printf "✗ Should be excluded: %s\n" filename
-
| None -> Printf.printf "✓ Excluded: %s\n" filename
-
) excluded_files;
-
-
assert (List.length entries = 6)
-
-
(* Test 4: Hidden files *)
-
let test_hidden_files () =
-
Printf.printf "\n=== Test 4: Hidden Files ===\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let dir_path = env#fs |> Eio.Path.(fun fs -> fs / test_dir) in
-
-
(* Test without hidden files *)
-
let options_no_hidden = { default_options with recursive = true; include_hidden = false } in
-
let registry_no_hidden = scan_directory ~sw ~env ~options:options_no_hidden dir_path in
-
-
(* Test with hidden files *)
-
let options_with_hidden = { default_options with recursive = true; include_hidden = true } in
-
let registry_with_hidden = scan_directory ~sw ~env ~options:options_with_hidden dir_path in
-
-
let count_no_hidden = List.length (Toru.Registry.entries registry_no_hidden) in
-
let count_with_hidden = List.length (Toru.Registry.entries registry_with_hidden) in
-
-
Printf.printf "Files without hidden: %d\n" count_no_hidden;
-
Printf.printf "Files with hidden: %d\n" count_with_hidden;
-
-
(* Should have more files when including hidden *)
-
assert (count_with_hidden > count_no_hidden);
-
-
(* Check specific hidden files *)
-
let hidden_files = [".hidden"; "subdir/.config"] in
-
List.iter (fun filename ->
-
match Toru.Registry.find filename registry_with_hidden with
-
| Some _ -> Printf.printf "✓ Found hidden: %s\n" filename
-
| None -> Printf.printf "✗ Missing hidden: %s\n" filename
-
) hidden_files
-
-
(* Test 5: Different hash algorithms *)
-
let test_hash_algorithms () =
-
Printf.printf "\n=== Test 5: Hash Algorithms ===\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let dir_path = env#fs |> Eio.Path.(fun fs -> fs / test_dir) in
-
let test_file = "file1.txt" in
-
-
let algorithms = [
-
(Toru.Hash.SHA256, "SHA256");
-
(Toru.Hash.SHA1, "SHA1");
-
(Toru.Hash.MD5, "MD5")
-
] in
-
-
List.iter (fun (algorithm, name) ->
-
let options = { default_options with hash_algorithm = algorithm; recursive = false } in
-
let registry = scan_directory ~sw ~env ~options dir_path in
-
-
match Toru.Registry.find test_file registry with
-
| Some entry ->
-
let hash = Toru.Registry.hash entry in
-
let algo = Toru.Hash.algorithm hash in
-
let value = Toru.Hash.value hash in
-
Printf.printf "✓ %s: %s (length: %d)\n" name value (String.length value);
-
assert (algo = algorithm)
-
| None ->
-
Printf.printf "✗ Missing file for %s\n" name;
-
assert false
-
) algorithms
-
-
(* Test 6: Progress callback *)
-
let test_progress_callback () =
-
Printf.printf "\n=== Test 6: Progress Callback ===\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let dir_path = env#fs |> Eio.Path.(fun fs -> fs / test_dir) in
-
let options = { default_options with recursive = true } in
-
-
let progress_calls = ref [] in
-
let progress_fn filename current total =
-
progress_calls := (filename, current, total) :: !progress_calls;
-
Printf.printf "Progress: %s (%d/%d)\n" filename current total
-
in
-
-
let registry = scan_directory_with_progress ~sw ~env ~options ~progress dir_path in
-
let entries = Toru.Registry.entries registry in
-
-
Printf.printf "Registry has %d entries\n" (List.length entries);
-
Printf.printf "Progress callback called %d times\n" (List.length !progress_calls);
-
-
assert (List.length !progress_calls > 0)
-
-
(* Test 7: Enhanced entries and JSON output *)
-
let test_enhanced_entries () =
-
Printf.printf "\n=== Test 7: Enhanced Entries and JSON Output ===\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let dir_path = env#fs |> Eio.Path.(fun fs -> fs / test_dir) in
-
let options = { default_options with recursive = false } in
-
-
let enhanced_entries = scan_directory_enhanced ~sw ~env ~options dir_path in
-
-
Printf.printf "Enhanced entries: %d\n" (List.length enhanced_entries);
-
-
List.iter (fun entry ->
-
let filename = Toru.Registry.filename entry.entry in
-
let metadata = entry.metadata in
-
Printf.printf "File: %s\n" filename;
-
Printf.printf " Size: %Ld bytes\n" metadata.size;
-
Printf.printf " MTime: %s\n" (Ptime.to_rfc3339 metadata.mtime);
-
Printf.printf " Relative: %s\n" metadata.relative_path;
-
Printf.printf " Absolute: %s\n" metadata.absolute_path;
-
) enhanced_entries;
-
-
(* Test JSON conversion *)
-
let json = enhanced_entries_to_json
-
~algorithm:Toru.Hash.SHA256
-
~generated:(Ptime_clock.now ())
-
enhanced_entries in
-
-
let json_str = Yojson.Safe.pretty_to_string json in
-
Printf.printf "JSON output sample:\n%s\n"
-
(if String.length json_str > 500 then
-
String.sub json_str 0 500 ^ "..."
-
else json_str);
-
-
assert (List.length enhanced_entries > 0)
-
-
(* Test 8: File list processing *)
-
let test_file_list () =
-
Printf.printf "\n=== Test 8: File List Processing ===\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let file_paths = [
-
test_dir ^ "/file1.txt";
-
test_dir ^ "/file2.csv";
-
test_dir ^ "/subdir/nested1.txt"
-
] in
-
-
let registry = from_file_list ~sw ~env ~hash_algorithm:Toru.Hash.SHA256 file_paths in
-
let entries = Toru.Registry.entries registry in
-
-
Printf.printf "Registry from file list: %d entries\n" (List.length entries);
-
-
List.iter (fun entry ->
-
let filename = Toru.Registry.filename entry in
-
Printf.printf "✓ File: %s\n" filename
-
) entries;
-
-
assert (List.length entries = 3)
-
-
(* Test 9: Registry update (simplified) *)
-
let test_registry_update () =
-
Printf.printf "\n=== Test 9: Registry Update ===\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let dir_path = env#fs |> Eio.Path.(fun fs -> fs / test_dir) in
-
let options = { default_options with recursive = false } in
-
-
(* Create initial registry *)
-
let initial_registry = scan_directory ~sw ~env ~options dir_path in
-
let initial_count = List.length (Toru.Registry.entries initial_registry) in
-
Printf.printf "Initial registry: %d entries\n" initial_count;
-
-
(* Simulate update (in real use case, files would have changed) *)
-
let updated_registry = update_registry ~sw ~env ~options initial_registry dir_path in
-
let updated_count = List.length (Toru.Registry.entries updated_registry) in
-
Printf.printf "Updated registry: %d entries\n" updated_count;
-
-
(* For this test, counts should be the same since no files changed *)
-
assert (updated_count = initial_count)
-
-
(* Test 10: Pattern matching *)
-
let test_pattern_matching () =
-
Printf.printf "\n=== Test 10: Pattern Matching ===\n";
-
-
let test_cases = [
-
(["*.txt"], "file.txt", true);
-
(["*.txt"], "file.csv", false);
-
(["**/*.md"], "docs/readme.md", true);
-
(["**/*.md"], "readme.md", true);
-
(["temp/*"], "temp/file.txt", true);
-
(["temp/*"], "temp/sub/file.txt", false);
-
(["temp/**"], "temp/sub/file.txt", true);
-
(["*.tmp"; "*.log"], "debug.log", true);
-
(["*.tmp"; "*.log"], "data.csv", false);
-
] in
-
-
List.iter (fun (patterns, path, expected) ->
-
let result = matches_exclude_pattern patterns path in
-
let status = if result = expected then "✓" else "✗" in
-
Printf.printf "%s Pattern %s matches '%s': %b (expected %b)\n"
-
status (String.concat "," patterns) path result expected;
-
assert (result = expected)
-
) test_cases
-
-
(* Run all tests *)
-
let run_all_tests () =
-
Printf.printf "Starting Make_registry test suite...\n";
-
-
setup_test_directory ();
-
-
try
-
test_basic_scan ();
-
test_recursive_scan ();
-
test_exclude_patterns ();
-
test_hidden_files ();
-
test_hash_algorithms ();
-
test_progress_callback ();
-
test_enhanced_entries ();
-
test_file_list ();
-
test_registry_update ();
-
test_pattern_matching ();
-
-
Printf.printf "\n🎉 All tests passed!\n";
-
cleanup_test_directory ()
-
with
-
| exn ->
-
Printf.printf "\n❌ Test failed: %s\n" (Printexc.to_string exn);
-
cleanup_test_directory ();
-
exit 1
-
-
let () =
-
if Array.length Sys.argv > 1 && Sys.argv.(1) = "--run-tests" then
-
run_all_tests ()
-
else
-
Printf.printf "Use --run-tests to run the test suite\n"
-218
stack/toru/test/test_python_cross_validation.ml
···
-
open Toru
-
open Printf
-
-
(** Cross-validation tests against Python Pooch-generated registries *)
-
-
let test_sha256_registry () =
-
printf "Testing SHA256 registry compatibility...\n";
-
-
(* Load the Python-generated registry *)
-
let registry_path = "test/python/test_registry_sha256.txt" in
-
let registry =
-
if Sys.file_exists registry_path then
-
Eio_main.run @@ fun env ->
-
let fs = env#fs in
-
Registry.load Eio.Path.(fs / registry_path)
-
else
-
failwith ("Registry file not found: " ^ registry_path)
-
in
-
-
printf " - Loaded registry with %d entries\n" (Registry.size registry);
-
-
(* Verify key entries exist *)
-
let expected_files = [
-
"data/simple.txt";
-
"data/numbers.csv";
-
"empty.txt";
-
"config.json";
-
] in
-
-
List.iter (fun filename ->
-
match Registry.find filename registry with
-
| Some entry ->
-
let hash = Registry.hash entry in
-
printf " - ✓ Found %s: %s (%s)\n"
-
filename
-
(Hash.value hash)
-
(Hash.algorithm_to_string (Hash.algorithm hash))
-
| None ->
-
failwith ("Expected file not found in registry: " ^ filename)
-
) expected_files;
-
-
printf "✓ SHA256 registry test passed\n\n"
-
-
let test_mixed_registry () =
-
printf "Testing mixed hash format registry...\n";
-
-
let registry_path = "test/python/test_registry_mixed.txt" in
-
let registry =
-
if Sys.file_exists registry_path then
-
Eio_main.run @@ fun env ->
-
let fs = env#fs in
-
Registry.load Eio.Path.(fs / registry_path)
-
else
-
failwith ("Registry file not found: " ^ registry_path)
-
in
-
-
printf " - Loaded mixed registry with %d entries\n" (Registry.size registry);
-
-
(* Check that different hash algorithms are parsed correctly *)
-
let expected_algos = [
-
("data/simple.txt", Hash.SHA256); (* Should default to SHA256 *)
-
("config.json", Hash.SHA1); (* Should parse as SHA1 *)
-
("data/numbers.csv", Hash.MD5); (* Should parse as MD5 *)
-
] in
-
-
List.iter (fun (filename, expected_algo) ->
-
match Registry.find filename registry with
-
| Some entry ->
-
let hash = Registry.hash entry in
-
let actual_algo = Hash.algorithm hash in
-
if actual_algo = expected_algo then
-
printf " - ✓ %s: %s (%s)\n"
-
filename
-
(Hash.value hash)
-
(Hash.algorithm_to_string actual_algo)
-
else
-
failwith (sprintf "Wrong algorithm for %s: expected %s, got %s"
-
filename
-
(Hash.algorithm_to_string expected_algo)
-
(Hash.algorithm_to_string actual_algo))
-
| None ->
-
failwith ("Expected file not found in registry: " ^ filename)
-
) expected_algos;
-
-
printf "✓ Mixed registry test passed\n\n"
-
-
let test_hash_verification () =
-
printf "Testing hash verification against known data...\n";
-
-
(* Load metadata to get expected hashes *)
-
let metadata_path = "test/python/test_metadata.json" in
-
if not (Sys.file_exists metadata_path) then
-
failwith ("Metadata file not found: " ^ metadata_path);
-
-
let ic = open_in metadata_path in
-
let metadata_content = really_input_string ic (in_channel_length ic) in
-
close_in ic;
-
-
let metadata = Yojson.Safe.from_string metadata_content in
-
let files = Yojson.Safe.Util.member "files" metadata |> Yojson.Safe.Util.to_list in
-
-
printf " - Loaded metadata for %d files\n" (List.length files);
-
-
(* Verify a few key files exist and have correct content *)
-
let test_file_content = "test/python/test_data/data/simple.txt" in
-
if Sys.file_exists test_file_content then (
-
let expected_content = "Hello, World!" in
-
let actual_content =
-
let ic = open_in test_file_content in
-
let content = really_input_string ic (in_channel_length ic) in
-
close_in ic;
-
content
-
in
-
if actual_content = expected_content then
-
printf " - ✓ Test file content matches expected\n"
-
else
-
failwith (sprintf "Test file content mismatch: expected '%s', got '%s'"
-
expected_content actual_content);
-
-
(* Calculate hash using our implementation *)
-
Eio_main.run @@ fun env ->
-
let fs = env#fs in
-
let file_path = Eio.Path.(fs / test_file_content) in
-
-
let computed_sha256 = Hash.compute SHA256 file_path in
-
let computed_sha1 = Hash.compute SHA1 file_path in
-
let computed_md5 = Hash.compute MD5 file_path in
-
-
(* Find this file in metadata *)
-
let file_meta = List.find (fun file_obj ->
-
Yojson.Safe.Util.member "path" file_obj
-
|> Yojson.Safe.Util.to_string = "data/simple.txt"
-
) files in
-
-
let expected_sha256 = Yojson.Safe.Util.member "sha256" file_meta |> Yojson.Safe.Util.to_string in
-
let expected_sha1 = Yojson.Safe.Util.member "sha1" file_meta |> Yojson.Safe.Util.to_string in
-
let expected_md5 = Yojson.Safe.Util.member "md5" file_meta |> Yojson.Safe.Util.to_string in
-
-
(* Verify our computed hashes match Python's *)
-
let check_hash name computed expected =
-
if Hash.value computed = expected then
-
printf " - ✓ %s hash matches: %s\n" name expected
-
else
-
failwith (sprintf "%s hash mismatch: computed %s, expected %s"
-
name (Hash.value computed) expected)
-
in
-
-
check_hash "SHA256" computed_sha256 expected_sha256;
-
check_hash "SHA1" computed_sha1 expected_sha1;
-
check_hash "MD5" computed_md5 expected_md5;
-
) else
-
printf " - ⚠ Test data files not found, skipping content verification\n";
-
-
printf "✓ Hash verification test passed\n\n"
-
-
let test_round_trip_compatibility () =
-
printf "Testing round-trip registry compatibility...\n";
-
-
(* Load a Python registry and convert it back to string *)
-
let registry_path = "test/python/test_registry_sha256.txt" in
-
let original_registry =
-
Eio_main.run @@ fun env ->
-
let fs = env#fs in
-
Registry.load Eio.Path.(fs / registry_path)
-
in
-
let registry_string = Registry.to_string original_registry in
-
-
(* Parse it back *)
-
let reparsed_registry = Registry.of_string registry_string in
-
-
(* Verify they're equivalent *)
-
if Registry.size original_registry = Registry.size reparsed_registry then
-
printf " - ✓ Registry sizes match: %d entries\n" (Registry.size original_registry)
-
else
-
failwith (sprintf "Registry size mismatch: original %d, reparsed %d"
-
(Registry.size original_registry)
-
(Registry.size reparsed_registry));
-
-
(* Check a few entries *)
-
let entries = Registry.entries original_registry in
-
List.iter (fun entry ->
-
let filename = Registry.filename entry in
-
let original_hash = Registry.hash entry in
-
match Registry.find filename reparsed_registry with
-
| Some reparsed_entry ->
-
let reparsed_hash = Registry.hash reparsed_entry in
-
if Hash.equal original_hash reparsed_hash then
-
printf " - ✓ %s: hashes match\n" filename
-
else
-
failwith (sprintf "Hash mismatch for %s" filename)
-
| None ->
-
failwith (sprintf "File missing after round-trip: %s" filename)
-
) (match entries with
-
| e1::e2::e3::_ -> [e1;e2;e3] (* Just check first 3 *)
-
| all -> all);
-
-
printf "✓ Round-trip compatibility test passed\n\n"
-
-
let run_tests () =
-
printf "=== Python/Pooch Cross-Validation Tests ===\n\n";
-
-
try
-
test_sha256_registry ();
-
test_mixed_registry ();
-
test_hash_verification ();
-
test_round_trip_compatibility ();
-
-
printf "🎉 All cross-validation tests passed!\n";
-
printf "✅ Toru is fully compatible with Python Pooch registries\n\n";
-
with
-
| Failure msg ->
-
printf "❌ Test failed: %s\n" msg;
-
exit 1
-
| exn ->
-
printf "❌ Unexpected error: %s\n" (Printexc.to_string exn);
-
exit 1
-
-
let () = run_tests ()
-270
stack/toru/test/test_registry.ml
···
-
(** Comprehensive tests for the Registry module *)
-
-
open Toru
-
-
(** Test registry content with various formats *)
-
let test_registry_content = {|# This is a comment
-
data/file1.csv sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
data/file2.txt d1f947c87017eebc8b98d6c3944eaea813ddcfb6ceafa96db0bb70675abd4f28
-
-
# Another comment with empty lines above
-
archive.zip md5:d41d8cd98f00b204e9800998ecf8427e
-
small.txt sha1:da39a3ee5e6b4b0d3255bfef95601890afd80709
-
# Final comment
-
|}
-
-
let test_basic_parsing () =
-
Printf.printf "=== Testing basic registry parsing ===\n";
-
-
let registry = Registry.of_string test_registry_content in
-
let size = Registry.size registry in
-
-
Printf.printf "Registry size: %d\n" size;
-
assert (size = 4);
-
-
(* Test finding entries *)
-
let file1 = Registry.find "data/file1.csv" registry in
-
assert (Option.is_some file1);
-
-
let file1_entry = Option.get file1 in
-
assert (Registry.filename file1_entry = "data/file1.csv");
-
assert (Hash.algorithm (Registry.hash file1_entry) = Hash.SHA256);
-
-
let file2 = Registry.find "data/file2.txt" registry in
-
assert (Option.is_some file2);
-
let file2_entry = Option.get file2 in
-
assert (Hash.algorithm (Registry.hash file2_entry) = Hash.SHA256);
-
-
let archive = Registry.find "archive.zip" registry in
-
assert (Option.is_some archive);
-
let archive_entry = Option.get archive in
-
assert (Hash.algorithm (Registry.hash archive_entry) = Hash.MD5);
-
-
let small = Registry.find "small.txt" registry in
-
assert (Option.is_some small);
-
let small_entry = Option.get small in
-
assert (Hash.algorithm (Registry.hash small_entry) = Hash.SHA1);
-
-
(* Test non-existent entry *)
-
assert (Registry.find "nonexistent.txt" registry = None);
-
assert (not (Registry.exists "nonexistent.txt" registry));
-
assert (Registry.exists "data/file1.csv" registry);
-
-
Printf.printf "✓ Basic parsing tests passed\n"
-
-
let test_round_trip_serialization () =
-
Printf.printf "=== Testing round-trip serialization ===\n";
-
-
let original_registry = Registry.of_string test_registry_content in
-
let serialized = Registry.to_string original_registry in
-
let parsed_back = Registry.of_string serialized in
-
-
assert (Registry.size original_registry = Registry.size parsed_back);
-
-
(* Verify each entry exists in both registries *)
-
let entries = Registry.entries original_registry in
-
List.iter (fun entry ->
-
let filename = Registry.filename entry in
-
let found = Registry.find filename parsed_back in
-
assert (Option.is_some found);
-
let found_entry = Option.get found in
-
assert (Registry.filename found_entry = Registry.filename entry);
-
assert (Hash.equal (Registry.hash found_entry) (Registry.hash entry));
-
) entries;
-
-
Printf.printf "✓ Round-trip serialization tests passed\n"
-
-
let test_entry_operations () =
-
Printf.printf "=== Testing entry operations ===\n";
-
-
let empty_registry = Registry.empty in
-
assert (Registry.size empty_registry = 0);
-
assert (Registry.entries empty_registry = []);
-
-
(* Create and add entries *)
-
let hash1 = Hash.create Hash.SHA256 "abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" in
-
let entry1 = Registry.create_entry ~filename:"test1.txt" ~hash:hash1 () in
-
-
let hash2 = Hash.create Hash.SHA1 "da39a3ee5e6b4b0d3255bfef95601890afd80709" in
-
let entry2 = Registry.create_entry ~filename:"test2.txt" ~hash:hash2 ~custom_url:"https://example.com/test2.txt" () in
-
-
let registry = Registry.add entry1 empty_registry in
-
let registry = Registry.add entry2 registry in
-
-
assert (Registry.size registry = 2);
-
assert (Registry.exists "test1.txt" registry);
-
assert (Registry.exists "test2.txt" registry);
-
-
(* Test custom URL *)
-
let found_entry2 = Registry.find "test2.txt" registry |> Option.get in
-
assert (Registry.custom_url found_entry2 = Some "https://example.com/test2.txt");
-
-
let found_entry1 = Registry.find "test1.txt" registry |> Option.get in
-
assert (Registry.custom_url found_entry1 = None);
-
-
(* Test removal *)
-
let registry_removed = Registry.remove "test1.txt" registry in
-
assert (Registry.size registry_removed = 1);
-
assert (not (Registry.exists "test1.txt" registry_removed));
-
assert (Registry.exists "test2.txt" registry_removed);
-
-
Printf.printf "✓ Entry operation tests passed\n"
-
-
let test_hash_format_parsing () =
-
Printf.printf "=== Testing hash format parsing ===\n";
-
-
let test_cases = [
-
"file1.txt sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
-
"file2.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
-
"file3.txt sha1:da39a3ee5e6b4b0d3255bfef95601890afd80709";
-
"file4.txt da39a3ee5e6b4b0d3255bfef95601890afd80709";
-
"file5.txt md5:d41d8cd98f00b204e9800998ecf8427e";
-
"file6.txt d41d8cd98f00b204e9800998ecf8427e";
-
] in
-
-
List.iteri (fun i line ->
-
let registry = Registry.of_string line in
-
assert (Registry.size registry = 1);
-
let entries = Registry.entries registry in
-
let entry = List.hd entries in
-
let expected_filename = Printf.sprintf "file%d.txt" (i + 1) in
-
assert (Registry.filename entry = expected_filename);
-
-
let hash = Registry.hash entry in
-
let expected_algorithm = match i with
-
| 0 | 1 -> Hash.SHA256
-
| 2 | 3 -> Hash.SHA1
-
| 4 | 5 -> Hash.MD5
-
| _ -> assert false
-
in
-
assert (Hash.algorithm hash = expected_algorithm);
-
) test_cases;
-
-
Printf.printf "✓ Hash format parsing tests passed\n"
-
-
let test_comment_and_empty_line_handling () =
-
Printf.printf "=== Testing comment and empty line handling ===\n";
-
-
let complex_content = {|
-
# Header comment
-
# Another header comment
-
-
data1.txt e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-
-
# Mid comment
-
data2.txt sha256:b5d4045c3f466fa91fe2cc6abe79232a1a57cdf104f7a26e716e0a1e2789df78
-
-
# End comment with spaces
-
# Indented comment
-
data3.txt sha1:da39a3ee5e6b4b0d3255bfef95601890afd80709
-
-
# Final comment
-
|} in
-
-
let registry = Registry.of_string complex_content in
-
assert (Registry.size registry = 3);
-
assert (Registry.exists "data1.txt" registry);
-
assert (Registry.exists "data2.txt" registry);
-
assert (Registry.exists "data3.txt" registry);
-
-
Printf.printf "✓ Comment and empty line handling tests passed\n"
-
-
let test_file_io_operations () =
-
Printf.printf "=== Testing file I/O operations ===\n";
-
-
let test_dir = "/tmp/toru_registry_test" in
-
let test_file = test_dir ^ "/test_registry.txt" in
-
-
(* Create test directory *)
-
(try Unix.mkdir test_dir 0o755 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
-
-
(* Clean up function *)
-
let cleanup () =
-
(try Sys.remove test_file with Sys_error _ -> ());
-
(try Unix.rmdir test_dir with Unix.Unix_error _ -> ());
-
in
-
-
Fun.protect ~finally:cleanup @@ fun () ->
-
(* Create a registry and save it *)
-
let hash1 = Hash.create Hash.SHA256 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" in
-
let entry1 = Registry.create_entry ~filename:"test_file1.txt" ~hash:hash1 () in
-
-
let hash2 = Hash.create Hash.SHA1 "da39a3ee5e6b4b0d3255bfef95601890afd80709" in
-
let entry2 = Registry.create_entry ~filename:"test_file2.txt" ~hash:hash2 () in
-
-
let registry = Registry.empty |> Registry.add entry1 |> Registry.add entry2 in
-
-
(* Note: We can't use Eio.Path in this test context, so we'll test the save function
-
by verifying the serialized content matches what we expect *)
-
let serialized = Registry.to_string registry in
-
-
(* Write to file manually *)
-
let oc = open_out test_file in
-
Fun.protect ~finally:(fun () -> close_out oc) @@ fun () ->
-
output_string oc serialized;
-
flush oc;
-
-
(* Test that the file was written correctly *)
-
let ic = open_in test_file in
-
let content = Fun.protect ~finally:(fun () -> close_in ic) @@ fun () ->
-
really_input_string ic (in_channel_length ic)
-
in
-
-
let loaded_registry = Registry.of_string content in
-
assert (Registry.size loaded_registry = 2);
-
assert (Registry.exists "test_file1.txt" loaded_registry);
-
assert (Registry.exists "test_file2.txt" loaded_registry);
-
-
Printf.printf "✓ File I/O tests passed\n"
-
-
let test_edge_cases () =
-
Printf.printf "=== Testing edge cases ===\n";
-
-
(* Empty registry *)
-
let empty_str = "" in
-
let empty_registry = Registry.of_string empty_str in
-
assert (Registry.size empty_registry = 0);
-
-
(* Only comments *)
-
let comments_only = "# Comment 1\n# Comment 2\n\n# Comment 3" in
-
let comments_registry = Registry.of_string comments_only in
-
assert (Registry.size comments_registry = 0);
-
-
(* Malformed lines (should be ignored or handled gracefully) *)
-
let malformed = "valid.txt sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\njust_filename_no_hash\n" in
-
let malformed_registry = Registry.of_string malformed in
-
assert (Registry.size malformed_registry = 1);
-
assert (Registry.exists "valid.txt" malformed_registry);
-
-
Printf.printf "✓ Edge case tests passed\n"
-
-
let test_progress_callback () =
-
Printf.printf "=== Testing progress callback ===\n";
-
-
let call_count = ref 0 in
-
let progress_callback line_num total_lines =
-
incr call_count;
-
Printf.printf "Progress: %d/%d\n" line_num total_lines
-
in
-
-
let registry = Registry.of_string ~progress:progress_callback test_registry_content in
-
assert (Registry.size registry = 4);
-
assert (!call_count > 0);
-
-
Printf.printf "✓ Progress callback tests passed\n"
-
-
let run_all_tests () =
-
Printf.printf "Running Registry module tests...\n\n";
-
-
test_basic_parsing ();
-
test_round_trip_serialization ();
-
test_entry_operations ();
-
test_hash_format_parsing ();
-
test_comment_and_empty_line_handling ();
-
test_file_io_operations ();
-
test_edge_cases ();
-
test_progress_callback ();
-
-
Printf.printf "\n🎉 All Registry tests passed!\n"
-
-
let () = run_all_tests ()
-29
stack/toru/test/test_registry_real.ml
···
-
open Toru
-
-
let test_tessera_registry () =
-
Printf.printf "Testing Registry with tessera-manifests...\n";
-
-
(* Load a small sample from tessera-manifests *)
-
let test_registry_content = "2024/grid_-5.05_50.05/grid_-5.05_50.05.npy e10d31df93ea3c907827aefce89950127f8de3f7a4b612b82d4445feedb7bc0b\n2024/grid_-5.05_50.05/grid_-5.05_50.05_scales.npy de016fd2674fc3e4562822c6ceb8c8ee671f1d04b842bf519cb37d89b725f48e\n# This is a comment\n\n2024/grid_-5.05_50.15/grid_-5.05_50.15.npy 2841b5c65699e4a355f9ab8ed6021b72ecdfdadf63d44d60e81380ceffd1a908" in
-
-
(* Parse the registry *)
-
let registry = Registry.of_string test_registry_content in
-
-
Printf.printf "Registry size: %d\n" (Registry.size registry);
-
-
(* Test lookup *)
-
match Registry.find "2024/grid_-5.05_50.05/grid_-5.05_50.05.npy" registry with
-
| Some entry ->
-
Printf.printf "Found entry: %s\n" (Registry.filename entry);
-
Printf.printf "Hash: %s\n" (Hash.to_string (Registry.hash entry))
-
| None -> Printf.printf "Entry not found\n";
-
-
(* Test round-trip *)
-
let serialized = Registry.to_string registry in
-
let parsed_back = Registry.of_string serialized in
-
Printf.printf "Round-trip test - size matches: %b\n"
-
(Registry.size registry = Registry.size parsed_back);
-
-
Printf.printf "Registry parsing test completed\n"
-
-
let () = test_tessera_registry ()
-275
stack/toru/test/test_tessera_integration.ml
···
-
open Toru
-
open Printf
-
-
(* Test URLs for tessera-manifests *)
-
let embeddings_url = "https://raw.githubusercontent.com/ucam-eo/tessera-manifests/main/registry/embeddings/embeddings_2024_lon-10_lat50.txt"
-
let landmasks_url = "https://raw.githubusercontent.com/ucam-eo/tessera-manifests/main/registry/landmasks/landmasks_lon-10_lat50.txt"
-
-
(* Helper function to create a test cache directory *)
-
let create_test_cache () =
-
let cache_dir = "/tmp/toru_tessera_test_" ^ (string_of_int (Random.int 10000)) in
-
Unix.mkdir cache_dir 0o755;
-
cache_dir
-
-
(* Test 1: Registry loading from tessera-manifests URLs *)
-
let test_registry_loading () =
-
printf "=== Test 1: Registry loading from tessera-manifests URLs ===\n";
-
-
printf "Loading embeddings registry from URL...\n";
-
let embeddings_registry = Registry.load_from_url embeddings_url in
-
printf "Embeddings registry size: %d entries\n" (Registry.size embeddings_registry);
-
-
printf "Loading landmasks registry from URL...\n";
-
let landmasks_registry = Registry.load_from_url landmasks_url in
-
printf "Landmasks registry size: %d entries\n" (Registry.size landmasks_registry);
-
-
(* Verify both registries are non-empty *)
-
assert (Registry.size embeddings_registry > 0);
-
assert (Registry.size landmasks_registry > 0);
-
-
printf "✓ Registry loading test passed\n\n";
-
(embeddings_registry, landmasks_registry)
-
-
(* Test 2: Registry structure and content validation *)
-
let test_registry_structure embeddings_registry landmasks_registry =
-
printf "=== Test 2: Registry structure and content validation ===\n";
-
-
(* Test embeddings registry structure *)
-
printf "Testing embeddings registry structure...\n";
-
let embeddings_entries = Registry.entries embeddings_registry in
-
List.iteri (fun i entry ->
-
if i < 3 then ( (* Only test first 3 entries for brevity *)
-
let filename = Registry.filename entry in
-
let hash = Registry.hash entry in
-
printf " Entry %d: %s -> %s\n" (i+1) filename (Hash.to_string hash);
-
-
(* Verify filename format for embeddings *)
-
assert (String.contains filename '/');
-
assert (Filename.extension filename = ".npy");
-
-
(* Verify hash format (should be SHA-256: 64 chars) *)
-
assert (Hash.algorithm hash = SHA256);
-
assert (String.length (Hash.value hash) = 64);
-
)
-
) embeddings_entries;
-
-
(* Test landmasks registry structure *)
-
printf "Testing landmasks registry structure...\n";
-
let landmasks_entries = Registry.entries landmasks_registry in
-
List.iteri (fun i entry ->
-
if i < 3 then ( (* Only test first 3 entries for brevity *)
-
let filename = Registry.filename entry in
-
let hash = Registry.hash entry in
-
printf " Entry %d: %s -> %s\n" (i+1) filename (Hash.to_string hash);
-
-
(* Verify filename format for landmasks *)
-
assert (Filename.extension filename = ".tiff");
-
assert (String.contains filename '_');
-
-
(* Verify hash format *)
-
assert (Hash.algorithm hash = SHA256);
-
assert (String.length (Hash.value hash) = 64);
-
)
-
) landmasks_entries;
-
-
printf "✓ Registry structure validation test passed\n\n"
-
-
(* Test 3: Hash format validation with real data *)
-
let test_hash_validation embeddings_registry _landmasks_registry =
-
printf "=== Test 3: Hash format validation ===\n";
-
-
(* Test hash parsing and formatting *)
-
let test_entry = List.hd (Registry.entries embeddings_registry) in
-
let hash = Registry.hash test_entry in
-
-
printf "Testing hash operations on real data...\n";
-
printf " Original hash: %s\n" (Hash.to_string hash);
-
printf " Algorithm: %s\n" (Hash.algorithm_to_string (Hash.algorithm hash));
-
-
(* Test round-trip parsing *)
-
let hash_string = Hash.to_string hash in
-
let parsed_hash = Hash.of_string hash_string in
-
assert (Hash.equal hash parsed_hash);
-
-
(* Test prefixed format *)
-
let prefixed = Hash.format_prefixed hash in
-
printf " Prefixed format: %s\n" prefixed;
-
-
(match Hash.parse_prefixed prefixed with
-
| Some (alg, value) ->
-
assert (alg = Hash.algorithm hash);
-
assert (value = Hash.value hash);
-
printf " Prefixed parsing: ✓\n"
-
| None -> failwith "Failed to parse prefixed hash");
-
-
printf "✓ Hash validation test passed\n\n"
-
-
(* Test 4: Registry query operations *)
-
let test_registry_queries embeddings_registry _landmasks_registry =
-
printf "=== Test 4: Registry query operations ===\n";
-
-
(* Test finding specific entries *)
-
let embeddings_entries = Registry.entries embeddings_registry in
-
let test_filename = Registry.filename (List.hd embeddings_entries) in
-
-
printf "Testing query operations...\n";
-
printf " Looking for: %s\n" test_filename;
-
-
(match Registry.find test_filename embeddings_registry with
-
| Some found_entry ->
-
printf " Found entry: %s\n" (Registry.filename found_entry);
-
assert (Registry.filename found_entry = test_filename);
-
assert (Registry.exists test_filename embeddings_registry);
-
printf " Exists check: ✓\n"
-
| None -> failwith "Failed to find known entry");
-
-
(* Test non-existent file *)
-
let fake_filename = "non/existent/file.npy" in
-
assert (not (Registry.exists fake_filename embeddings_registry));
-
assert (Registry.find fake_filename embeddings_registry = None);
-
printf " Non-existent file handling: ✓\n";
-
-
printf "✓ Registry query operations test passed\n\n"
-
-
(* Test 5: Full Toru workflow without downloads *)
-
let test_toru_workflow embeddings_registry =
-
printf "=== Test 5: Toru workflow (without actual downloads) ===\n";
-
-
(* Since we can't run Eio tests in this context, we'll simulate the workflow *)
-
let cache_path = create_test_cache () in
-
printf "Created test cache at: %s\n" cache_path;
-
-
(* Test default cache path generation *)
-
let default_path = Toru.default_cache_path ~app_name:"tessera_test" () in
-
printf "Default cache path: %s\n" default_path;
-
assert (String.contains default_path '/');
-
-
(* Test registry serialization/deserialization *)
-
printf "Testing registry serialization...\n";
-
let serialized = Registry.to_string embeddings_registry in
-
let deserialized = Registry.of_string serialized in
-
-
assert (Registry.size embeddings_registry = Registry.size deserialized);
-
printf " Serialization round-trip: ✓\n";
-
-
(* Clean up test cache *)
-
(try Unix.rmdir cache_path with _ -> ());
-
-
printf "✓ Toru workflow test passed\n\n"
-
-
(* Test 6: Geographic coordinate parsing *)
-
let test_geographic_parsing landmasks_registry =
-
printf "=== Test 6: Geographic coordinate parsing ===\n";
-
-
let entries = Registry.entries landmasks_registry in
-
let rec take n lst =
-
match n, lst with
-
| 0, _ -> []
-
| _, [] -> []
-
| n, x :: xs -> x :: (take (n-1) xs)
-
in
-
let test_filenames = List.map Registry.filename entries |> take 5 in
-
-
printf "Testing coordinate extraction from filenames...\n";
-
List.iter (fun filename ->
-
printf " File: %s\n" filename;
-
-
(* Extract coordinates using regex-like pattern matching *)
-
if String.contains filename '_' then (
-
let parts = String.split_on_char '_' filename in
-
match parts with
-
| "grid" :: lon_str :: lat_str :: _ ->
-
(try
-
let lon = Float.of_string lon_str in
-
let lat = Float.of_string (String.split_on_char '.' lat_str |> List.hd) in
-
printf " Coordinates: %.2f, %.2f\n" lon lat;
-
(* Basic coordinate validation *)
-
assert (lon >= -180.0 && lon <= 180.0);
-
assert (lat >= -90.0 && lat <= 90.0);
-
with _ -> printf " Could not parse coordinates\n")
-
| _ -> printf " Unexpected filename format\n"
-
)
-
) test_filenames;
-
-
printf "✓ Geographic coordinate parsing test passed\n\n"
-
-
(* Test 7: Registry comparison and merging *)
-
let test_registry_operations embeddings_registry _landmasks_registry =
-
printf "=== Test 7: Registry operations ===\n";
-
-
(* Create a small test registry *)
-
let test_entry = Registry.create_entry
-
~filename:"test/file.txt"
-
~hash:(Hash.create SHA256 "abc123def456")
-
() in
-
-
let small_registry = Registry.add test_entry Registry.empty in
-
printf "Created test registry with %d entries\n" (Registry.size small_registry);
-
-
(* Test adding to existing registry *)
-
let expanded_registry = Registry.add test_entry embeddings_registry in
-
assert (Registry.size expanded_registry = Registry.size embeddings_registry + 1);
-
printf "Successfully added entry to registry\n";
-
-
(* Test removal *)
-
let reduced_registry = Registry.remove "test/file.txt" expanded_registry in
-
assert (Registry.size reduced_registry = Registry.size embeddings_registry);
-
printf "Successfully removed entry from registry\n";
-
-
printf "✓ Registry operations test passed\n\n"
-
-
(* Test 8: Error handling *)
-
let test_error_handling () =
-
printf "=== Test 8: Error handling ===\n";
-
-
(* Test invalid hash parsing *)
-
(try
-
let _ = Hash.of_string "invalid_hash_format:xyz" in
-
printf "Hash parsing with invalid format (should work or fail gracefully)\n"
-
with _ -> printf "✓ Invalid hash format properly handled\n");
-
-
(* Test malformed registry parsing *)
-
let malformed_registry = "invalid registry content without proper format" in
-
let parsed = Registry.of_string malformed_registry in
-
(* Should create empty registry or handle gracefully *)
-
printf "Malformed registry size: %d (expected 0)\n" (Registry.size parsed);
-
-
(* Test empty registry operations *)
-
let empty_reg = Registry.empty in
-
assert (Registry.size empty_reg = 0);
-
assert (Registry.find "any_file.txt" empty_reg = None);
-
printf "✓ Empty registry operations work correctly\n";
-
-
printf "✓ Error handling test passed\n\n"
-
-
(* Main test runner *)
-
let run_tests () =
-
printf "Starting Tessera-Manifests Integration Tests\n";
-
printf "============================================\n\n";
-
-
Random.self_init ();
-
-
try
-
(* Run tests sequentially *)
-
let (embeddings_registry, landmasks_registry) = test_registry_loading () in
-
test_registry_structure embeddings_registry landmasks_registry;
-
test_hash_validation embeddings_registry landmasks_registry;
-
test_registry_queries embeddings_registry landmasks_registry;
-
test_toru_workflow embeddings_registry;
-
test_geographic_parsing landmasks_registry;
-
test_registry_operations embeddings_registry landmasks_registry;
-
test_error_handling ();
-
-
printf "============================================\n";
-
printf "All Tessera Integration Tests PASSED! ✅\n";
-
printf "============================================\n\n";
-
-
with
-
| Failure msg ->
-
printf "❌ TEST FAILED: %s\n" msg;
-
exit 1
-
| e ->
-
printf "❌ UNEXPECTED ERROR: %s\n" (Printexc.to_string e);
-
exit 1
-
-
let () = run_tests ()
-36
stack/toru/test/test_tessera_load.ml
···
-
open Toru
-
-
let test_tessera_file_load () =
-
Printf.printf "Testing Registry with actual tessera-manifests file...\n";
-
-
let manifest_path = "/Users/avsm/src/git/ucam-eo/tessera-manifests/registry/embeddings/embeddings_2024_lon-10_lat50.txt" in
-
-
if Sys.file_exists manifest_path then (
-
let ic = open_in manifest_path in
-
let content = really_input_string ic (in_channel_length ic) in
-
close_in ic;
-
-
let registry = Registry.of_string content in
-
-
Printf.printf "Loaded tessera registry with %d entries\n" (Registry.size registry);
-
-
(* Test a few specific entries *)
-
let test_files = [
-
"2024/grid_-5.05_50.05/grid_-5.05_50.05.npy";
-
"2024/grid_-5.05_50.15/grid_-5.05_50.15.npy";
-
] in
-
-
List.iter (fun filename ->
-
match Registry.find filename registry with
-
| Some entry ->
-
Printf.printf "Found %s -> %s\n" filename
-
(Hash.to_string (Registry.hash entry))
-
| None -> Printf.printf "Not found: %s\n" filename
-
) test_files;
-
-
Printf.printf "Tessera file load test completed successfully\n"
-
) else (
-
Printf.printf "Tessera manifest file not found at %s\n" manifest_path
-
)
-
-
let () = test_tessera_file_load ()
-52
stack/toru/test/test_toru.ml
···
-
open Toru
-
-
let test_hash () =
-
Printf.printf "Testing Hash module...\n";
-
let hash1 = Hash.of_string "sha256:abc123def456" in
-
let hash2 = Hash.create Hash.SHA256 "abc123def456" in
-
Printf.printf "Hash 1: %s\n" (Hash.to_string hash1);
-
Printf.printf "Hash 2: %s\n" (Hash.to_string hash2);
-
Printf.printf "Equal: %b\n" (Hash.equal hash1 hash2);
-
Format.printf "Algorithm: %a\n" Hash.pp_algorithm (Hash.algorithm hash1);
-
Printf.printf "Value: %s\n" (Hash.value hash1)
-
-
let test_registry () =
-
Printf.printf "\nTesting Registry module...\n";
-
let hash = Hash.of_string "sha256:deadbeef" in
-
let entry = Registry.create_entry ~filename:"test.txt" ~hash () in
-
Printf.printf "Entry filename: %s\n" (Registry.filename entry);
-
Printf.printf "Entry hash: %s\n" (Hash.to_string (Registry.hash entry));
-
-
let registry = Registry.empty |> Registry.add entry in
-
Printf.printf "Registry size: %d\n" (Registry.size registry);
-
-
match Registry.find "test.txt" registry with
-
| Some found -> Printf.printf "Found entry: %s\n" (Registry.filename found)
-
| None -> Printf.printf "Entry not found\n"
-
-
let test_cache () =
-
Printf.printf "\nTesting Cache module...\n";
-
let cache_path = Cache.default_cache_path ~app_name:"test-toru" () in
-
Printf.printf "Default cache path: %s\n" cache_path
-
-
let test_downloader () =
-
Printf.printf "\nTesting Downloader module...\n";
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
let (module D : Downloader.DOWNLOADER) = Downloader.Downloaders.wget () in
-
let downloader = D.create ~sw ~env () in
-
Printf.printf "Downloader name: %s\n" (D.name downloader);
-
Printf.printf "Supports resume: %b\n" (D.supports_resume downloader)
-
-
let main () =
-
Printf.printf "Toru Library Test\n";
-
Printf.printf "=================\n";
-
-
test_hash ();
-
test_registry ();
-
test_cache ();
-
test_downloader ();
-
-
Printf.printf "\nAll basic tests completed!\n"
-
-
let () = main ()
-112
stack/toru/test/test_xdg_integration.ml
···
-
open Printf
-
-
(** Test XDG integration to verify we're using the official xdg package *)
-
-
let test_xdg_paths () =
-
printf "Testing XDG Base Directory integration...\n";
-
-
(* Test with default app name *)
-
let default_path = Toru.Cache.default_cache_path ~app_name:"test-app" () in
-
printf " - Default cache path: %s\n" default_path;
-
-
(* Test with custom environment *)
-
let custom_home = "/tmp/test-home" in
-
let custom_cache = "/tmp/test-cache" in
-
-
(* Create a custom XDG configuration *)
-
let custom_env var =
-
match var with
-
| "HOME" -> Some custom_home
-
| "XDG_CACHE_HOME" -> Some custom_cache
-
| _ -> None
-
in
-
-
let xdg_dirs = Xdg.create ~env:custom_env () in
-
let cache_dir = Xdg.cache_dir xdg_dirs in
-
let custom_path = Filename.concat cache_dir "test-app" in
-
-
printf " - Custom XDG cache dir: %s\n" cache_dir;
-
printf " - Custom app cache path: %s\n" custom_path;
-
-
(* Verify the paths make sense *)
-
assert (String.contains default_path '.'); (* Should contain .cache or similar *)
-
assert (custom_path = "/tmp/test-cache/test-app");
-
-
printf "✓ XDG integration test passed\n\n"
-
-
let test_cross_platform_support () =
-
printf "Testing cross-platform XDG support...\n";
-
-
(* Test different platform configurations *)
-
let test_configs = [
-
("Unix HOME only", fun var -> if var = "HOME" then Some "/home/user" else None);
-
("Unix with XDG", fun var ->
-
match var with
-
| "HOME" -> Some "/home/user"
-
| "XDG_CACHE_HOME" -> Some "/home/user/.cache"
-
| _ -> None);
-
("Windows", fun var ->
-
match var with
-
| "USERPROFILE" -> Some "C:\\Users\\User"
-
| "LOCALAPPDATA" -> Some "C:\\Users\\User\\AppData\\Local"
-
| _ -> None);
-
] in
-
-
List.iter (fun (name, env_fn) ->
-
let xdg_dirs = Xdg.create ~env:env_fn () in
-
let cache_dir = Xdg.cache_dir xdg_dirs in
-
printf " - %s: %s\n" name cache_dir;
-
-
(* Basic sanity check *)
-
assert (String.length cache_dir > 0);
-
) test_configs;
-
-
printf "✓ Cross-platform support test passed\n\n"
-
-
let test_cache_creation_with_xdg () =
-
printf "Testing Cache creation with XDG...\n";
-
-
Eio_main.run @@ fun env ->
-
Eio.Switch.run @@ fun sw ->
-
(* Create cache using default XDG paths *)
-
let base_path = Toru.Cache.default_cache_path ~app_name:"xdg-test" () in
-
let cache = Toru.Cache.create ~sw ~fs:env#fs base_path in
-
let base_path = Toru.Cache.base_path cache in
-
let path_str = Eio.Path.native_exn base_path in
-
-
printf " - Cache base path: %s\n" path_str;
-
-
(* Verify it contains expected XDG components *)
-
assert (String.contains path_str '/'); (* Should be a proper path *)
-
assert (String.length path_str > 10); (* Should be reasonably long *)
-
-
(* Test file operations work *)
-
Toru.Cache.ensure_dir cache;
-
printf " - ✓ Cache directory creation works\n";
-
-
let test_file = "xdg-test-file.txt" in
-
let exists_before = Toru.Cache.exists cache test_file in
-
assert (not exists_before);
-
printf " - ✓ File existence check works\n";
-
-
printf "✓ Cache creation with XDG test passed\n\n"
-
-
let run_tests () =
-
printf "=== XDG Integration Tests ===\n\n";
-
-
try
-
test_xdg_paths ();
-
test_cross_platform_support ();
-
test_cache_creation_with_xdg ();
-
-
printf "🎉 All XDG integration tests passed!\n";
-
printf "✅ Successfully replaced homebrew XDG code with official xdg package\n\n";
-
with
-
| Failure msg ->
-
printf "❌ Test failed: %s\n" msg;
-
exit 1
-
| exn ->
-
printf "❌ Unexpected error: %s\n" (Printexc.to_string exn);
-
exit 1
-
-
let () = run_tests ()