My agentic slop goes here. Not intended for anyone else!

more

Changed files
+677 -595
stack
toru
cacheio/.gitignore stack/cacheio/.gitignore
cacheio/README.md stack/cacheio/README.md
cacheio/bin/dune stack/cacheio/bin/dune
+3 -3
cacheio/bin/example.ml stack/cacheio/bin/example.ml
···
] in
(* Cache for 1 hour *)
-
Cacheio.put_json cache ~key:url ~data:response_data ~ttl:3600.0;
Logs.info (fun m -> m "Cached response for %s" url)
) urls;
···
List.iter (fun components ->
let key = Cacheio.make_key components in
let data = Printf.sprintf "Data for %s" key in
-
Cacheio.put cache ~key ~data ~ttl:1800.0;
Logs.info (fun m -> m "Stored data with key: %s" key)
) repos;
···
for i = 1 to 15 do
let key = Printf.sprintf "key_%d" i in
let data = Printf.sprintf "value_%d" i in
-
Cacheio.put mem_cache ~key ~data ~ttl:60.0
done;
(* Check that eviction worked (max 10 entries) *)
···
] in
(* Cache for 1 hour *)
+
Cacheio.put_json cache ~key:url ~data:response_data ~ttl:(Some 3600.0);
Logs.info (fun m -> m "Cached response for %s" url)
) urls;
···
List.iter (fun components ->
let key = Cacheio.make_key components in
let data = Printf.sprintf "Data for %s" key in
+
Cacheio.put cache ~key ~data ~ttl:(Some 1800.0);
Logs.info (fun m -> m "Stored data with key: %s" key)
) repos;
···
for i = 1 to 15 do
let key = Printf.sprintf "key_%d" i in
let data = Printf.sprintf "value_%d" i in
+
Cacheio.put mem_cache ~key ~data ~ttl:(Some 60.0)
done;
(* Check that eviction worked (max 10 entries) *)
+1
cacheio/cacheio.opam stack/cacheio/cacheio.opam
···
"ptime"
"logs"
"fmt"
"alcotest" {with-test}
"odoc" {with-doc}
]
···
"ptime"
"logs"
"fmt"
+
"xdge"
"alcotest" {with-test}
"odoc" {with-doc}
]
+1
cacheio/dune-project stack/cacheio/dune-project
···
ptime
logs
fmt
(alcotest :with-test))
(tags
(cache filesystem xdg cmdliner eio)))
···
ptime
logs
fmt
+
xdge
(alcotest :with-test))
(tags
(cache filesystem xdg cmdliner eio)))
+42 -24
cacheio/lib/cacheio.ml stack/cacheio/lib/cacheio.ml
···
module type STORAGE = sig
type t
val get : t -> key:string -> string option
-
val put : t -> key:string -> data:string -> ttl:float -> unit
val delete : t -> key:string -> unit
val clear : t -> unit
val exists : t -> key:string -> bool
···
module FileStorage = struct
type t = {
base_dir : Fs.dir_ty Path.t;
-
max_size : int64 option;
app_name : string;
mutable hits : int;
mutable misses : int;
mutex : Eio.Mutex.t;
}
-
let create ~base_dir ?max_size ~app_name () =
-
{ base_dir; max_size; app_name; hits = 0; misses = 0;
mutex = Eio.Mutex.create () }
(** Ensure directory structure exists. TODO simplify with create_dirs? *)
···
(** Cache entry metadata *)
type entry_metadata = {
created_at : Ptime.t;
-
ttl : float;
size : int;
}
let metadata_to_json metadata =
-
`Assoc [
("created_at", `String (Ptime.to_rfc3339 metadata.created_at));
-
("ttl", `Float metadata.ttl);
("size", `Int metadata.size);
-
]
let metadata_from_json json =
let open Yojson.Safe.Util in
···
| Ok (t, _, _) -> t
| Error _ -> Ptime.epoch
in
{
created_at;
-
ttl = json |> member "ttl" |> to_float;
size = json |> member "size" |> to_int;
}
let is_expired metadata =
-
let now = Ptime_clock.now () in
-
let age_span = Ptime.diff now metadata.created_at in
-
let age_seconds = Ptime.Span.to_float_s age_span in
-
age_seconds > metadata.ttl
let get t ~key =
Mutex.use_rw ~protect:true t.mutex @@ fun () ->
···
type entry = {
data : string;
created_at : Ptime.t;
-
ttl : float;
}
type t = {
···
mutex = Eio.Mutex.create () }
let is_expired entry =
-
let now = Ptime_clock.now () in
-
let age_span = Ptime.diff now entry.created_at in
-
let age_seconds = Ptime.Span.to_float_s age_span in
-
age_seconds > entry.ttl
let get t ~key =
Mutex.use_rw ~protect:true t.mutex @@ fun () ->
···
let cache_dir_path = match config.cache_dir with
| Some dir -> dir
| None ->
-
let xdg = Xdg.create ~env:Sys.getenv_opt () in
-
Filename.concat (Xdg.cache_dir xdg) app_name
in
let base_dir = Eio.Path.(Eio.Stdenv.fs env / cache_dir_path) in
···
float_of_int (hours * 3600)
let is_expired ~created_at ~ttl =
-
let now = Ptime_clock.now () in
-
let age_span = Ptime.diff now created_at in
-
let age_seconds = Ptime.Span.to_float_s age_span in
-
age_seconds > ttl
let format_size bytes =
let open Int64 in
···
module type STORAGE = sig
type t
val get : t -> key:string -> string option
+
val put : t -> key:string -> data:string -> ttl:float option -> unit
val delete : t -> key:string -> unit
val clear : t -> unit
val exists : t -> key:string -> bool
···
module FileStorage = struct
type t = {
base_dir : Fs.dir_ty Path.t;
+
(* max_size : int64 option; TODO: implement max size enforcement *)
app_name : string;
mutable hits : int;
mutable misses : int;
mutex : Eio.Mutex.t;
}
+
let create ~base_dir ?max_size:_ ~app_name () =
+
{ base_dir; app_name; hits = 0; misses = 0;
mutex = Eio.Mutex.create () }
(** Ensure directory structure exists. TODO simplify with create_dirs? *)
···
(** Cache entry metadata *)
type entry_metadata = {
created_at : Ptime.t;
+
ttl : float option; (* None means no expiration *)
size : int;
}
let metadata_to_json metadata =
+
let fields = [
("created_at", `String (Ptime.to_rfc3339 metadata.created_at));
("size", `Int metadata.size);
+
] in
+
let fields = match metadata.ttl with
+
| Some ttl -> ("ttl", `Float ttl) :: fields
+
| None -> fields
+
in
+
`Assoc fields
let metadata_from_json json =
let open Yojson.Safe.Util in
···
| Ok (t, _, _) -> t
| Error _ -> Ptime.epoch
in
+
let ttl =
+
try Some (json |> member "ttl" |> to_float)
+
with _ -> None
+
in
{
created_at;
+
ttl;
size = json |> member "size" |> to_int;
}
let is_expired metadata =
+
match metadata.ttl with
+
| None -> false (* No TTL means never expires *)
+
| Some ttl ->
+
let now = Ptime_clock.now () in
+
let age_span = Ptime.diff now metadata.created_at in
+
let age_seconds = Ptime.Span.to_float_s age_span in
+
age_seconds > ttl
let get t ~key =
Mutex.use_rw ~protect:true t.mutex @@ fun () ->
···
type entry = {
data : string;
created_at : Ptime.t;
+
ttl : float option; (* None means no expiration *)
}
type t = {
···
mutex = Eio.Mutex.create () }
let is_expired entry =
+
match entry.ttl with
+
| None -> false (* No TTL means never expires *)
+
| Some ttl ->
+
let now = Ptime_clock.now () in
+
let age_span = Ptime.diff now entry.created_at in
+
let age_seconds = Ptime.Span.to_float_s age_span in
+
age_seconds > ttl
let get t ~key =
Mutex.use_rw ~protect:true t.mutex @@ fun () ->
···
let cache_dir_path = match config.cache_dir with
| Some dir -> dir
| None ->
+
(* Use xdge library for XDG paths *)
+
let xdg = Xdge.create (Eio.Stdenv.fs env) app_name in
+
Eio.Path.native_exn (Xdge.cache_dir xdg)
in
let base_dir = Eio.Path.(Eio.Stdenv.fs env / cache_dir_path) in
···
float_of_int (hours * 3600)
let is_expired ~created_at ~ttl =
+
match ttl with
+
| None -> false (* No TTL means never expires *)
+
| Some ttl_val ->
+
let now = Ptime_clock.now () in
+
let age_span = Ptime.diff now created_at in
+
let age_seconds = Ptime.Span.to_float_s age_span in
+
age_seconds > ttl_val
let format_size bytes =
let open Int64 in
+9 -7
cacheio/lib/cacheio.mli stack/cacheio/lib/cacheio.mli
···
type t
val get : t -> key:string -> string option
-
val put : t -> key:string -> data:string -> ttl:float -> unit
val delete : t -> key:string -> unit
val clear : t -> unit
val exists : t -> key:string -> bool
···
(** Get a value from the cache *)
val get : t -> key:string -> string option
-
(** Put a value into the cache with TTL in seconds *)
-
val put : t -> key:string -> data:string -> ttl:float -> unit
(** Delete a key from the cache *)
val delete : t -> key:string -> unit
···
(** Get and parse JSON from cache *)
val get_json : t -> key:string -> Yojson.Safe.t option
-
(** Store JSON in cache *)
-
val put_json : t -> key:string -> data:Yojson.Safe.t -> ttl:float -> unit
(** {2 Hierarchical Keys} *)
···
(** Convert TTL in hours to seconds *)
val hours_to_ttl : int -> float
-
(** Check if a timestamp is expired given a TTL *)
-
val is_expired : created_at:Ptime.t -> ttl:float -> bool
(** Format cache size for display *)
val format_size : int64 -> string
···
type t
val get : t -> key:string -> string option
+
val put : t -> key:string -> data:string -> ttl:float option -> unit
val delete : t -> key:string -> unit
val clear : t -> unit
val exists : t -> key:string -> bool
···
(** Get a value from the cache *)
val get : t -> key:string -> string option
+
(** Put a value into the cache with optional TTL in seconds.
+
If TTL is None, the entry never expires. *)
+
val put : t -> key:string -> data:string -> ttl:float option -> unit
(** Delete a key from the cache *)
val delete : t -> key:string -> unit
···
(** Get and parse JSON from cache *)
val get_json : t -> key:string -> Yojson.Safe.t option
+
(** Store JSON in cache with optional TTL *)
+
val put_json : t -> key:string -> data:Yojson.Safe.t -> ttl:float option -> unit
(** {2 Hierarchical Keys} *)
···
(** Convert TTL in hours to seconds *)
val hours_to_ttl : int -> float
+
(** Check if a timestamp is expired given an optional TTL.
+
Returns false if TTL is None (never expires). *)
+
val is_expired : created_at:Ptime.t -> ttl:float option -> bool
(** Format cache size for display *)
val format_size : int64 -> string
+1 -1
cacheio/lib/dune stack/cacheio/lib/dune
···
(library
(public_name cacheio)
(name cacheio)
-
(libraries eio cmdliner yojson ptime ptime.clock.os logs fmt xdg))
···
(library
(public_name cacheio)
(name cacheio)
+
(libraries eio cmdliner yojson ptime ptime.clock.os logs fmt xdge))
cacheio/xdg-eio stack/cacheio/xdg-eio
requests/.gitignore stack/requests/.gitignore
requests/CLAUDE.md stack/requests/CLAUDE.md
requests/DESIGN.md stack/requests/DESIGN.md
requests/bin/dune stack/requests/bin/dune
requests/bin/ocurl.ml stack/requests/bin/ocurl.ml
requests/dune-project stack/requests/dune-project
requests/lib/dune stack/requests/lib/dune
requests/lib/requests.ml stack/requests/lib/requests.ml
requests/lib/requests.mli stack/requests/lib/requests.mli
requests/requests.opam stack/requests/requests.opam
requests/test/dune stack/requests/test/dune
requests/test/test_requests.ml stack/requests/test/test_requests.ml
+266
stack/toru/lib/toru/cache.ml
···
···
+
(** Cache module for managing local file storage
+
Now uses cacheio for the underlying cache implementation *)
+
+
(** File info: size in bytes and modification time *)
+
type file_info = {
+
size: int64;
+
mtime: float;
+
}
+
+
(** Cache usage statistics *)
+
type usage_stats = {
+
total_size: int64;
+
file_count: int;
+
oldest: float;
+
newest: float;
+
}
+
+
type t = {
+
cacheio : Cacheio.t;
+
base_path : Eio.Fs.dir_ty Eio.Path.t;
+
version : string option;
+
sw : Eio.Switch.t;
+
app_name : string;
+
}
+
+
let create ~sw ~fs ?version path_str =
+
let base_path = Eio.Path.(fs / path_str) in
+
let app_name = match version with
+
| Some v -> Printf.sprintf "toru-%s" v
+
| None -> "toru"
+
in
+
+
(* Create cacheio FileStorage backend *)
+
let storage = Cacheio.FileStorage.create ~base_dir:base_path ~app_name () in
+
let cacheio = Cacheio.create ~storage:(`File storage) in
+
+
{ cacheio; base_path; version; sw; app_name }
+
+
let default_cache_path ?app_name () =
+
let app_name = Option.value app_name ~default:"toru" in
+
(* Using xdge library for XDG paths - but this needs env at runtime *)
+
(* For now, use a simple default *)
+
Filename.concat (Filename.concat (Sys.getenv "HOME") ".cache") app_name
+
+
let base_path t = t.base_path
+
let version t = t.version
+
+
let cache_path t =
+
Option.fold t.version ~none:t.base_path
+
~some:(fun v -> Eio.Path.(t.base_path / v))
+
+
let file_path t filename =
+
Option.fold t.version ~none:Eio.Path.(t.base_path / filename)
+
~some:(fun v -> Eio.Path.(t.base_path / v / filename))
+
+
let exists t filename =
+
(* Check if the file exists in cacheio *)
+
Cacheio.exists t.cacheio ~key:filename
+
+
let exists_path path =
+
try
+
let _stat = Eio.Path.stat ~follow:false path in
+
true
+
with
+
| _ -> false
+
+
let ensure_dir t =
+
let create_dir_recursive path =
+
if not (exists_path path) then
+
try
+
Eio.Path.mkdir path ~perm:0o755
+
with
+
| _ -> () (* Directory may already exist or creation failed *)
+
in
+
(* Create base directory first *)
+
create_dir_recursive t.base_path;
+
(* If version is specified, create version subdirectory *)
+
Option.iter (fun v ->
+
let version_path = Eio.Path.(t.base_path / v) in
+
create_dir_recursive version_path) t.version
+
+
let clear t =
+
(* Clear all entries from cacheio *)
+
Cacheio.clear t.cacheio
+
+
let size_bytes t =
+
let stats = Cacheio.stats t.cacheio in
+
stats.size
+
+
let list_files t =
+
Cacheio.list_keys t.cacheio
+
+
(** Get file info (size and mtime) *)
+
let file_info t filename =
+
let path = file_path t filename in
+
try
+
let stat = Eio.Path.stat ~follow:false path in
+
match stat.kind with
+
| `Regular_file ->
+
let size = Optint.Int63.to_int64 stat.size in
+
let mtime = stat.mtime in
+
Some { size; mtime }
+
| _ -> None
+
with
+
| _ -> None
+
+
(** Get cache usage statistics *)
+
let usage_stats t =
+
let stats = Cacheio.stats t.cacheio in
+
let cache_dir = cache_path t in
+
+
(* For oldest/newest, we need to scan the actual files *)
+
let rec collect_timestamps path acc_oldest acc_newest =
+
if not (exists_path path) then (acc_oldest, acc_newest)
+
else
+
match Eio.Path.read_dir path with
+
| [] -> (acc_oldest, acc_newest)
+
| entries ->
+
List.fold_left (fun (oldest, newest) entry ->
+
let entry_path = Eio.Path.(path / entry) in
+
let stat = Eio.Path.stat ~follow:false entry_path in
+
match stat.kind with
+
| `Regular_file ->
+
let mtime = stat.mtime in
+
let new_oldest = if oldest = 0.0 || mtime < oldest then mtime else oldest in
+
let new_newest = if newest = 0.0 || mtime > newest then mtime else newest in
+
(new_oldest, new_newest)
+
| `Directory ->
+
collect_timestamps entry_path oldest newest
+
| _ -> (oldest, newest)
+
) (acc_oldest, acc_newest) entries
+
in
+
+
let (oldest, newest) = collect_timestamps cache_dir 0.0 0.0 in
+
+
{
+
total_size = stats.size;
+
file_count = stats.entries;
+
oldest;
+
newest;
+
}
+
+
(** Remove oldest files to fit within size limit *)
+
let trim_to_size t max_size =
+
let cache_dir = cache_path t in
+
let rec collect_files_with_stats path prefix acc =
+
if not (exists_path path) then acc
+
else
+
match Eio.Path.read_dir path with
+
| [] -> acc
+
| entries ->
+
List.fold_left (fun files entry ->
+
let entry_path = Eio.Path.(path / entry) in
+
let stat = Eio.Path.stat ~follow:false entry_path in
+
match stat.kind with
+
| `Regular_file ->
+
let full_name = if prefix = "" then entry else prefix ^ "/" ^ entry in
+
let size = Optint.Int63.to_int64 stat.size in
+
let mtime = stat.mtime in
+
(full_name, entry_path, size, mtime) :: files
+
| `Directory ->
+
let new_prefix = if prefix = "" then entry else prefix ^ "/" ^ entry in
+
collect_files_with_stats entry_path new_prefix files
+
| _ -> files
+
) acc entries
+
in
+
+
let files = collect_files_with_stats cache_dir "" [] in
+
+
(* Sort by mtime (oldest first) *)
+
let sorted_files = List.sort (fun (_, _, _, a) (_, _, _, b) -> Float.compare a b) files in
+
+
(* Calculate total size and remove oldest until under limit *)
+
let rec remove_until_fit current_size files_to_check =
+
if current_size <= max_size then ()
+
else
+
match files_to_check with
+
| [] -> ()
+
| (name, path, size, _) :: rest ->
+
Eio.Path.unlink path;
+
(* Also remove from cacheio *)
+
Cacheio.delete t.cacheio ~key:name;
+
remove_until_fit (Int64.sub current_size size) rest
+
in
+
+
let total = List.fold_left (fun acc (_, _, size, _) -> Int64.add acc size) 0L files in
+
remove_until_fit total sorted_files
+
+
(** Remove files older than N days *)
+
let trim_by_age t max_age_days =
+
let max_age_seconds = max_age_days *. 86400.0 in
+
let now = Unix.time () in
+
+
let cache_dir = cache_path t in
+
let rec remove_old_files path prefix =
+
if not (exists_path path) then ()
+
else
+
match Eio.Path.read_dir path with
+
| [] -> ()
+
| entries ->
+
List.iter (fun entry ->
+
let entry_path = Eio.Path.(path / entry) in
+
let stat = Eio.Path.stat ~follow:false entry_path in
+
match stat.kind with
+
| `Regular_file ->
+
let age = now -. stat.mtime in
+
if age > max_age_seconds then begin
+
Eio.Path.unlink entry_path;
+
let full_name = if prefix = "" then entry else prefix ^ "/" ^ entry in
+
Cacheio.delete t.cacheio ~key:full_name
+
end
+
| `Directory ->
+
let new_prefix = if prefix = "" then entry else prefix ^ "/" ^ entry in
+
remove_old_files entry_path new_prefix
+
| _ -> ()
+
) entries
+
in
+
+
remove_old_files cache_dir ""
+
+
(** Remove empty directories and broken links *)
+
let vacuum t =
+
let cache_dir = cache_path t in
+
let rec clean_dir path =
+
if not (exists_path path) then false
+
else
+
match Eio.Path.read_dir path with
+
| [] ->
+
(* Empty directory, can be removed *)
+
(try Eio.Path.rmdir path; true with _ -> false)
+
| entries ->
+
let all_removed = List.fold_left (fun acc entry ->
+
let entry_path = Eio.Path.(path / entry) in
+
let stat = Eio.Path.stat ~follow:false entry_path in
+
match stat.kind with
+
| `Directory -> clean_dir entry_path && acc
+
| `Symbolic_link ->
+
(* Check if link is broken *)
+
(try
+
let _ = Eio.Path.stat ~follow:true entry_path in
+
false
+
with _ ->
+
(* Broken link, remove it *)
+
Eio.Path.unlink entry_path;
+
acc)
+
| _ -> false
+
) true entries in
+
+
if all_removed && path <> cache_dir then
+
(* All entries removed and not the root cache dir *)
+
(try Eio.Path.rmdir path; true with _ -> false)
+
else
+
false
+
in
+
+
let _ = clean_dir cache_dir in
+
()
+
+
(** Pretty printer for cache *)
+
let pp fmt t =
+
let stats = usage_stats t in
+
Format.fprintf fmt "Cache(path=%s, version=%s, files=%d, size=%s)"
+
(Eio.Path.native_exn t.base_path)
+
(Option.value t.version ~default:"none")
+
stats.file_count
+
(Cacheio.format_size stats.total_size)
+346
stack/toru/lib/toru/downloader.ml
···
···
+
(** Downloader module - now using requests library for HTTP operations *)
+
+
module Progress_reporter = struct
+
type t = {
+
name : string;
+
total_bytes : int64 option;
+
mutable current_bytes : int64;
+
mutable last_percent : int;
+
}
+
+
let create ?total_bytes name =
+
Printf.printf "Starting download: %s\n%!" name;
+
{ name; total_bytes; current_bytes = 0L; last_percent = -1 }
+
+
let update t bytes =
+
t.current_bytes <- bytes;
+
match t.total_bytes with
+
| Some total when total > 0L ->
+
let percent = Int64.(to_int (div (mul bytes 100L) total)) in
+
if percent > t.last_percent && percent mod 10 = 0 then (
+
t.last_percent <- percent;
+
Printf.printf "\r%s: %d%% (%Ld/%Ld bytes)%!"
+
t.name percent bytes total
+
)
+
| _ ->
+
(* Unknown size, just show bytes downloaded *)
+
if Int64.rem bytes 1048576L = 0L then (* Every MB *)
+
Printf.printf "\r%s: %Ld bytes downloaded%!" t.name bytes
+
+
let finish t =
+
Printf.printf "\n%s: Complete (%Ld bytes)\n%!" t.name t.current_bytes
+
end
+
+
module Config = struct
+
type auth = {
+
username : string option;
+
password : string option;
+
}
+
end
+
+
module type DOWNLOADER = sig
+
type t
+
+
val create : sw:Eio.Switch.t -> env:Eio_unix.Stdenv.base ->
+
?auth:Config.auth -> unit -> t
+
+
val download : t ->
+
url:string ->
+
dest:Eio.Fs.dir_ty Eio.Path.t ->
+
?hash:Hash.t ->
+
?progress:Progress_reporter.t ->
+
?resume:bool ->
+
unit -> (unit, string) result
+
+
val supports_resume : t -> bool
+
val name : t -> string
+
end
+
+
(** HTTP downloader using requests library *)
+
module Http_downloader = struct
+
type t = {
+
sw : Eio.Switch.t;
+
env : Eio_unix.Stdenv.base;
+
net : Eio_unix.Stdenv.base;
+
auth : Requests.Auth.t option;
+
}
+
+
let create ~sw ~env ?auth () =
+
(* Convert auth config to Requests.Auth.t *)
+
let auth = Option.map (fun a ->
+
match a.Config.username, a.Config.password with
+
| Some u, Some p -> Requests.Auth.basic ~username:u ~password:p
+
| _ -> Requests.Auth.none
+
) auth in
+
+
{ sw; env; net = env; auth }
+
+
let download t ~url ~dest ?hash ?progress ?(resume=false) () =
+
let _ = resume in (* TODO: implement resume support with range requests *)
+
try
+
let uri = Uri.of_string url in
+
let dest_path = Eio.Path.native_exn dest in
+
+
(* Create a client for this request *)
+
let client = Requests.create
+
~tls_config:(Requests.Tls.default ())
+
~clock:t.env#clock
+
t.env#net
+
in
+
+
(* Configure request *)
+
let config =
+
let base_config = Requests.Config.create
+
~timeout:300.0
+
~follow_redirects:true
+
~max_redirects:10
+
() in
+
match t.auth with
+
| Some auth -> Requests.Config.create ~auth ~timeout:300.0 ~follow_redirects:true ()
+
| None -> base_config
+
in
+
+
(* Download the file *)
+
Requests.download_file ~sw:t.sw client ~config uri ~path:dest_path;
+
+
(* Update progress if provided *)
+
Option.iter (fun p ->
+
(* Get file size for final progress update *)
+
let stat = Eio.Path.stat ~follow:false dest in
+
let size = Optint.Int63.to_int64 stat.size in
+
Progress_reporter.update p size;
+
Progress_reporter.finish p
+
) progress;
+
+
(* Verify hash if provided *)
+
match hash with
+
| Some h ->
+
if Hash.verify dest h then
+
Ok ()
+
else
+
Error ("Hash verification failed for " ^ dest_path)
+
| None -> Ok ()
+
+
with
+
| Requests.Request_error err ->
+
Error (Format.asprintf "HTTP download failed: %a" Requests.pp_error err)
+
| exn ->
+
Error ("Download failed: " ^ Printexc.to_string exn)
+
+
let supports_resume _ = false (* Requests library doesn't support resume yet *)
+
let name _ = "http"
+
end
+
+
(** Legacy wget downloader for systems that prefer external tools *)
+
module Wget_downloader = struct
+
type t = {
+
sw : Eio.Switch.t;
+
env : Eio_unix.Stdenv.base;
+
auth : Config.auth option;
+
timeout : float;
+
}
+
+
let create ~sw ~env ?auth () = { sw; env; auth; timeout = 300.0 }
+
+
let download t ~url ~dest ?hash ?progress:_ ?(resume=true) () =
+
let dest_path = Eio.Path.native_exn dest in
+
+
(* Build wget arguments *)
+
let args = [
+
"--quiet";
+
"--show-progress";
+
"--timeout=300";
+
"--tries=3";
+
"--output-document=" ^ dest_path;
+
] in
+
+
(* Add authentication if provided *)
+
let args = Option.fold t.auth ~none:args ~some:(fun auth ->
+
let user_arg = Option.map (fun u -> "--user=" ^ u) auth.Config.username
+
|> Option.to_list in
+
let pass_arg = Option.map (fun p -> "--password=" ^ p) auth.Config.password
+
|> Option.to_list in
+
user_arg @ pass_arg @ args) in
+
+
(* Add resume support if enabled *)
+
let args = if resume then "--continue" :: args else args in
+
+
(* Add URL as last argument *)
+
let args = args @ [url] in
+
+
(* Build command line with wget command *)
+
let cmd_args = "wget" :: args in
+
+
try
+
(* Run wget using Eio process manager *)
+
let cmd_line = String.concat " " (List.map Filename.quote cmd_args) in
+
let process = Eio.Process.spawn t.env#process_mgr ~sw:t.sw
+
~executable:"/bin/sh" ["/bin/sh"; "-c"; cmd_line] in
+
let exit_status = Eio.Process.await process in
+
if exit_status <> `Exited 0 then
+
let error_msg = match exit_status with
+
| `Exited n -> Printf.sprintf "wget exited with code %d" n
+
| `Signaled n -> Printf.sprintf "wget killed by signal %d" n in
+
Error ("Download failed: " ^ error_msg)
+
else (
+
(* Verify hash if provided *)
+
match hash with
+
| Some h ->
+
if Hash.verify dest h then
+
Ok ()
+
else
+
Error ("Hash verification failed for " ^ dest_path)
+
| None -> Ok ()
+
)
+
with
+
| exn -> Error ("wget failed: " ^ (Printexc.to_string exn))
+
+
let supports_resume _ = true
+
let name _ = "wget"
+
end
+
+
(** Legacy curl downloader for systems that prefer external tools *)
+
module Curl_downloader = struct
+
type t = {
+
sw : Eio.Switch.t;
+
env : Eio_unix.Stdenv.base;
+
auth : Config.auth option;
+
timeout : float;
+
}
+
+
let create ~sw ~env ?auth () = { sw; env; auth; timeout = 300.0 }
+
+
let download t ~url ~dest ?hash ?progress:_ ?(resume=true) () =
+
let dest_path = Eio.Path.native_exn dest in
+
+
(* Build curl arguments *)
+
let args = [
+
"--silent";
+
"--show-error";
+
"--location";
+
"--max-time"; "300";
+
"--retry"; "3";
+
"--output"; dest_path;
+
] in
+
+
(* Add authentication if provided *)
+
let args = Option.fold t.auth ~none:args ~some:(fun auth ->
+
let auth_str = match auth.Config.username, auth.Config.password with
+
| Some user, Some pass -> Some (user ^ ":" ^ pass)
+
| Some user, None -> Some user
+
| None, _ -> None in
+
match auth_str with
+
| Some s -> "--user" :: s :: args
+
| None -> args) in
+
+
(* Add resume support if enabled *)
+
let args = if resume then "--continue-at" :: "-" :: args else args in
+
+
(* Add URL as last argument *)
+
let args = args @ [url] in
+
+
(* Build command line with curl command *)
+
let cmd_args = "curl" :: args in
+
+
try
+
(* Run curl using Eio process manager *)
+
let cmd_line = String.concat " " (List.map Filename.quote cmd_args) in
+
let process = Eio.Process.spawn t.env#process_mgr ~sw:t.sw
+
~executable:"/bin/sh" ["/bin/sh"; "-c"; cmd_line] in
+
let exit_status = Eio.Process.await process in
+
if exit_status <> `Exited 0 then
+
let error_msg = match exit_status with
+
| `Exited n -> Printf.sprintf "curl exited with code %d" n
+
| `Signaled n -> Printf.sprintf "curl killed by signal %d" n in
+
Error ("Download failed: " ^ error_msg)
+
else (
+
(* Verify hash if provided *)
+
match hash with
+
| Some h ->
+
if Hash.verify dest h then
+
Ok ()
+
else
+
Error ("Hash verification failed for " ^ dest_path)
+
| None -> Ok ()
+
)
+
with
+
| exn -> Error ("curl failed: " ^ (Printexc.to_string exn))
+
+
let supports_resume _ = true
+
let name _ = "curl"
+
end
+
+
(** Cohttp_downloader is now an alias for Http_downloader *)
+
module Cohttp_downloader = Http_downloader
+
+
module Downloaders = struct
+
let wget () = (module Wget_downloader : DOWNLOADER)
+
let curl () = (module Curl_downloader : DOWNLOADER)
+
let cohttp () = (module Http_downloader : DOWNLOADER)
+
let http () = (module Http_downloader : DOWNLOADER)
+
+
let detect_available ~env =
+
let check_command cmd =
+
try
+
let sw = Eio.Switch.run @@ fun sw ->
+
let process = Eio.Process.spawn env#process_mgr ~sw
+
~executable:"/bin/sh" ["/bin/sh"; "-c"; cmd ^ " --version > /dev/null 2>&1"] in
+
let exit_status = Eio.Process.await process in
+
exit_status = `Exited 0
+
in sw
+
with _ -> false
+
in
+
+
let available = ref [] in
+
+
(* Always available - uses requests library *)
+
available := ("http", http ()) :: !available;
+
+
(* Check for external tools *)
+
if check_command "wget" then
+
available := ("wget", wget ()) :: !available;
+
+
if check_command "curl" then
+
available := ("curl", curl ()) :: !available;
+
+
List.rev !available
+
+
let create_default ~env =
+
(* Default to http (requests library), fallback to wget/curl if needed *)
+
match detect_available ~env with
+
| [] -> failwith "No downloaders available"
+
| (_, d) :: _ -> d
+
+
let of_string name =
+
match name with
+
| "wget" -> Some (wget ())
+
| "curl" -> Some (curl ())
+
| "cohttp" | "http" -> Some (http ())
+
| _ -> None
+
end
+
+
module Cli = struct
+
type downloader_choice = [ `Wget | `Curl | `Cohttp | `Http | `Auto ]
+
+
open Cmdliner
+
+
let downloader_term =
+
let choices = [
+
("wget", `Wget);
+
("curl", `Curl);
+
("cohttp", `Cohttp);
+
("http", `Http);
+
("auto", `Auto)
+
] in
+
let doc = "Download tool to use. 'auto' detects available tools, 'http' uses built-in HTTP client." in
+
let docv = "TOOL" in
+
Arg.(value & opt (enum choices) `Auto & info ["downloader"; "d"] ~doc ~docv)
+
+
let downloader_info = Arg.info ["downloader"; "d"]
+
+
let create_downloader ~env = function
+
| `Wget -> Downloaders.wget ()
+
| `Curl -> Downloaders.curl ()
+
| `Cohttp | `Http -> Downloaders.http ()
+
| `Auto -> Downloaders.create_default ~env
+
end
toru/.gitignore stack/toru/.gitignore
toru/CLAUDE.md stack/toru/CLAUDE.md
toru/README.md stack/toru/README.md
toru/TODO.md stack/toru/TODO.md
toru/bin/dune stack/toru/bin/dune
toru/bin/tessera_loader.ml stack/toru/bin/tessera_loader.ml
toru/bin/toru_cache.ml stack/toru/bin/toru_cache.ml
toru/bin/toru_main.ml stack/toru/bin/toru_main.ml
toru/bin/toru_make_registry.ml stack/toru/bin/toru_make_registry.ml
toru/bin/toru_make_registry_simple.ml stack/toru/bin/toru_make_registry_simple.ml
+2
toru/dune-project stack/toru/dune-project
···
fmt
ptime
xdge
(logs (>= 0.7.0))
unix)
(authors "Toru Development Team")
···
fmt
ptime
xdge
+
cacheio
+
requests
(logs (>= 0.7.0))
unix)
(authors "Toru Development Team")
-289
toru/lib/toru/cache.ml
···
-
(** File info: size in bytes and modification time *)
-
type file_info = {
-
size: int64;
-
mtime: float;
-
}
-
-
(** Cache usage statistics *)
-
type usage_stats = {
-
total_size: int64;
-
file_count: int;
-
oldest: float;
-
newest: float;
-
}
-
-
type t = {
-
base_path : Eio.Fs.dir_ty Eio.Path.t;
-
version : string option;
-
sw : Eio.Switch.t;
-
}
-
-
let create ~sw ~fs ?version path_str =
-
let base_path = Eio.Path.(fs / path_str) in
-
{ base_path; version; sw }
-
-
let default_cache_path ?app_name () =
-
let app_name = Option.value app_name ~default:"toru" in
-
let xdg_dirs = Xdg.create ~env:Sys.getenv_opt () in
-
let cache_dir = Xdg.cache_dir xdg_dirs in
-
Filename.concat cache_dir app_name
-
-
let base_path t = t.base_path
-
let version t = t.version
-
-
let cache_path t =
-
Option.fold t.version ~none:t.base_path
-
~some:(fun v -> Eio.Path.(t.base_path / v))
-
-
let file_path t filename =
-
Option.fold t.version ~none:Eio.Path.(t.base_path / filename)
-
~some:(fun v -> Eio.Path.(t.base_path / v / filename))
-
-
let exists t filename =
-
let path = file_path t filename in
-
(* TODO: Use Eio.Path.exists when available *)
-
try
-
let _stat = Eio.Path.stat ~follow:false path in
-
true
-
with
-
| _ -> false
-
-
let exists_path path =
-
try
-
let _stat = Eio.Path.stat ~follow:false path in
-
true
-
with
-
| _ -> false
-
-
let ensure_dir t =
-
let create_dir_recursive path =
-
if not (exists_path path) then
-
try
-
(* Try to create parent directory first *)
-
(* Skip parent creation for now, rely on mkdir -p behavior if available *)
-
Eio.Path.mkdir path ~perm:0o755
-
with
-
| _ -> () (* Directory may already exist or creation failed *)
-
in
-
(* Create base directory first *)
-
create_dir_recursive t.base_path;
-
(* If version is specified, create version subdirectory *)
-
Option.iter (fun v ->
-
let version_path = Eio.Path.(t.base_path / v) in
-
create_dir_recursive version_path) t.version
-
-
let clear t =
-
let cache_dir = cache_path t in
-
let rec remove_contents path =
-
match Eio.Path.read_dir path with
-
| [] -> ()
-
| entries ->
-
List.iter (fun entry ->
-
let entry_path = Eio.Path.(path / entry) in
-
let stat = Eio.Path.stat ~follow:false entry_path in
-
match stat.kind with
-
| `Directory ->
-
remove_contents entry_path;
-
Eio.Path.rmdir entry_path
-
| `Regular_file | `Symbolic_link ->
-
Eio.Path.unlink entry_path
-
| _ -> () (* Skip other file types *)
-
) entries
-
in
-
if exists_path cache_dir then
-
remove_contents cache_dir
-
-
let size_bytes t =
-
let cache_dir = cache_path t in
-
let rec calculate_size path acc =
-
if not (exists_path path) then acc
-
else
-
match Eio.Path.read_dir path with
-
| [] -> acc
-
| entries ->
-
List.fold_left (fun total entry ->
-
let entry_path = Eio.Path.(path / entry) in
-
let stat = Eio.Path.stat ~follow:false entry_path in
-
match stat.kind with
-
| `Regular_file -> Int64.add total (Optint.Int63.to_int64 stat.size)
-
| `Directory -> calculate_size entry_path total
-
| _ -> total
-
) acc entries
-
in
-
calculate_size cache_dir 0L
-
-
let list_files t =
-
let cache_dir = cache_path t in
-
let rec collect_files path prefix acc =
-
if not (exists_path path) then acc
-
else
-
match Eio.Path.read_dir path with
-
| [] -> acc
-
| entries ->
-
List.fold_left (fun files entry ->
-
let entry_path = Eio.Path.(path / entry) in
-
let stat = Eio.Path.stat ~follow:false entry_path in
-
match stat.kind with
-
| `Regular_file ->
-
let full_name = if prefix = "" then entry else prefix ^ "/" ^ entry in
-
full_name :: files
-
| `Directory ->
-
let new_prefix = if prefix = "" then entry else prefix ^ "/" ^ entry in
-
collect_files entry_path new_prefix files
-
| _ -> files
-
) acc entries
-
in
-
List.rev (collect_files cache_dir "" [])
-
-
(** Get file info (size and mtime) *)
-
let file_info t filename =
-
let path = file_path t filename in
-
try
-
let stat = Eio.Path.stat ~follow:false path in
-
match stat.kind with
-
| `Regular_file ->
-
let size = Optint.Int63.to_int64 stat.size in
-
let mtime = stat.mtime in
-
Some { size; mtime }
-
| _ -> None
-
with
-
| _ -> None
-
-
(** Get cache usage statistics *)
-
let usage_stats t =
-
let cache_dir = cache_path t in
-
let rec collect_stats path acc_size acc_count acc_oldest acc_newest =
-
if not (exists_path path) then (acc_size, acc_count, acc_oldest, acc_newest)
-
else
-
match Eio.Path.read_dir path with
-
| [] -> (acc_size, acc_count, acc_oldest, acc_newest)
-
| entries ->
-
List.fold_left (fun (total_size, file_count, oldest, newest) entry ->
-
let entry_path = Eio.Path.(path / entry) in
-
let stat = Eio.Path.stat ~follow:false entry_path in
-
match stat.kind with
-
| `Regular_file ->
-
let size = Optint.Int63.to_int64 stat.size in
-
let mtime = stat.mtime in
-
let new_oldest = if oldest = 0.0 || mtime < oldest then mtime else oldest in
-
let new_newest = if newest = 0.0 || mtime > newest then mtime else newest in
-
(Int64.add total_size size, file_count + 1, new_oldest, new_newest)
-
| `Directory ->
-
collect_stats entry_path total_size file_count oldest newest
-
| _ -> (total_size, file_count, oldest, newest)
-
) (acc_size, acc_count, acc_oldest, acc_newest) entries
-
in
-
let (total_size, file_count, oldest, newest) = collect_stats cache_dir 0L 0 0.0 0.0 in
-
{ total_size; file_count; oldest; newest }
-
-
(** Remove oldest files to fit within size limit *)
-
let trim_to_size t max_size =
-
let cache_dir = cache_path t in
-
let rec collect_files_with_stats path prefix acc =
-
if not (exists_path path) then acc
-
else
-
match Eio.Path.read_dir path with
-
| [] -> acc
-
| entries ->
-
List.fold_left (fun files entry ->
-
let entry_path = Eio.Path.(path / entry) in
-
let stat = Eio.Path.stat ~follow:false entry_path in
-
match stat.kind with
-
| `Regular_file ->
-
let full_name = if prefix = "" then entry else prefix ^ "/" ^ entry in
-
let size = Optint.Int63.to_int64 stat.size in
-
let mtime = stat.mtime in
-
(full_name, entry_path, size, mtime) :: files
-
| `Directory ->
-
let new_prefix = if prefix = "" then entry else prefix ^ "/" ^ entry in
-
collect_files_with_stats entry_path new_prefix files
-
| _ -> files
-
) acc entries
-
in
-
let files = collect_files_with_stats cache_dir "" [] in
-
let total_size = List.fold_left (fun acc (_, _, size, _) -> Int64.add acc size) 0L files in
-
if Int64.compare total_size max_size > 0 then (
-
(* Sort by modification time (oldest first) *)
-
let sorted_files = List.sort (fun (_, _, _, mtime1) (_, _, _, mtime2) ->
-
Float.compare mtime1 mtime2) files in
-
let rec remove_files remaining_files current_size =
-
if Int64.compare current_size max_size <= 0 then ()
-
else
-
match remaining_files with
-
| [] -> ()
-
| (_, path, size, _) :: rest ->
-
(try Eio.Path.unlink path with _ -> ());
-
remove_files rest (Int64.sub current_size size)
-
in
-
remove_files sorted_files total_size
-
)
-
-
(** Remove files older than N days *)
-
let trim_by_age t max_age_days =
-
let cache_dir = cache_path t in
-
let current_time = Unix.time () in
-
let max_age_seconds = max_age_days *. 86400.0 in (* days to seconds *)
-
let rec remove_old_files path =
-
if exists_path path then
-
match Eio.Path.read_dir path with
-
| [] -> ()
-
| entries ->
-
List.iter (fun entry ->
-
let entry_path = Eio.Path.(path / entry) in
-
let stat = Eio.Path.stat ~follow:false entry_path in
-
match stat.kind with
-
| `Regular_file ->
-
let file_age = current_time -. stat.mtime in
-
if file_age > max_age_seconds then (
-
try Eio.Path.unlink entry_path with _ -> ()
-
)
-
| `Directory ->
-
remove_old_files entry_path
-
| _ -> ()
-
) entries
-
in
-
remove_old_files cache_dir
-
-
(** Remove empty directories and broken links *)
-
let vacuum t =
-
let cache_dir = cache_path t in
-
let rec vacuum_directory path =
-
if exists_path path then
-
match Eio.Path.read_dir path with
-
| [] ->
-
(* Try to remove empty directory if it's not the base cache path *)
-
if path <> cache_dir then (
-
try Eio.Path.rmdir path with _ -> ()
-
)
-
| entries ->
-
List.iter (fun entry ->
-
let entry_path = Eio.Path.(path / entry) in
-
try
-
let stat = Eio.Path.stat ~follow:false entry_path in
-
match stat.kind with
-
| `Directory -> vacuum_directory entry_path
-
| `Symbolic_link ->
-
(* Check if symlink is broken *)
-
(try
-
let _ = Eio.Path.stat ~follow:true entry_path in ()
-
with
-
| _ -> Eio.Path.unlink entry_path)
-
| _ -> ()
-
with
-
| _ ->
-
(* If we can't stat it, it might be broken - try to remove *)
-
(try Eio.Path.unlink entry_path with _ -> ())
-
) entries;
-
(* Check again if directory is now empty *)
-
(match Eio.Path.read_dir path with
-
| [] when path <> cache_dir ->
-
(try Eio.Path.rmdir path with _ -> ())
-
| _ -> ())
-
in
-
vacuum_directory cache_dir
-
-
let pp fmt t =
-
let version_str = Option.fold t.version ~none:"no version"
-
~some:(fun v -> "version " ^ v) in
-
Format.fprintf fmt "Cache at %s (%s)"
-
(Eio.Path.native_exn t.base_path) version_str
···
toru/lib/toru/cache.mli stack/toru/lib/toru/cache.mli
-268
toru/lib/toru/downloader.ml
···
-
module Progress_reporter = struct
-
type t = {
-
name : string;
-
total_bytes : int64 option;
-
mutable current_bytes : int64;
-
mutable last_percent : int;
-
}
-
-
let create ?total_bytes name =
-
Printf.printf "Starting download: %s\n%!" name;
-
{ name; total_bytes; current_bytes = 0L; last_percent = -1 }
-
-
let update t bytes =
-
t.current_bytes <- bytes;
-
match t.total_bytes with
-
| Some total when total > 0L ->
-
let percent = Int64.(to_int (div (mul bytes 100L) total)) in
-
if percent > t.last_percent && percent mod 10 = 0 then (
-
t.last_percent <- percent;
-
Printf.printf "\r%s: %d%% (%Ld/%Ld bytes)%!"
-
t.name percent bytes total
-
)
-
| _ ->
-
(* Unknown size, just show bytes downloaded *)
-
if Int64.rem bytes 1048576L = 0L then (* Every MB *)
-
Printf.printf "\r%s: %Ld bytes downloaded%!" t.name bytes
-
-
let finish t =
-
Printf.printf "\n%s: Complete (%Ld bytes)\n%!" t.name t.current_bytes
-
end
-
-
module Config = struct
-
type auth = {
-
username : string option;
-
password : string option;
-
}
-
end
-
-
module type DOWNLOADER = sig
-
type t
-
-
val create : sw:Eio.Switch.t -> env:Eio_unix.Stdenv.base ->
-
?auth:Config.auth -> unit -> t
-
-
val download : t ->
-
url:string ->
-
dest:Eio.Fs.dir_ty Eio.Path.t ->
-
?hash:Hash.t ->
-
?progress:Progress_reporter.t ->
-
?resume:bool ->
-
unit -> (unit, string) result
-
-
val supports_resume : t -> bool
-
val name : t -> string
-
end
-
-
module Wget_downloader = struct
-
type t = {
-
sw : Eio.Switch.t;
-
env : Eio_unix.Stdenv.base;
-
auth : Config.auth option;
-
timeout : float;
-
}
-
-
let create ~sw ~env ?auth () = { sw; env; auth; timeout = 300.0 }
-
-
let download t ~url ~dest ?hash ?progress:_ ?(resume=true) () =
-
let dest_path = Eio.Path.native_exn dest in
-
-
(* Build wget arguments (excluding command name) *)
-
let args = [
-
"--quiet"; (* Reduce output noise *)
-
"--show-progress"; (* Show progress bar *)
-
"--timeout=300"; (* 5 minute timeout *)
-
"--tries=3"; (* Retry 3 times *)
-
"--output-document=" ^ dest_path; (* Output file *)
-
] in
-
-
(* Add authentication if provided *)
-
let args = Option.fold t.auth ~none:args ~some:(fun auth ->
-
let user_arg = Option.map (fun u -> "--user=" ^ u) auth.Config.username
-
|> Option.to_list in
-
let pass_arg = Option.map (fun p -> "--password=" ^ p) auth.Config.password
-
|> Option.to_list in
-
user_arg @ pass_arg @ args) in
-
-
(* Add resume support if enabled *)
-
let args = if resume then "--continue" :: args else args in
-
-
(* Add URL as last argument *)
-
let args = args @ [url] in
-
-
(* Build command line with wget command *)
-
let cmd_args = "wget" :: args in
-
-
try
-
(* Run wget using Eio process manager - use shell to handle PATH *)
-
let cmd_line = String.concat " " (List.map Filename.quote cmd_args) in
-
let process = Eio.Process.spawn t.env#process_mgr ~sw:t.sw
-
~executable:"/bin/sh" ["/bin/sh"; "-c"; cmd_line] in
-
let exit_status = Eio.Process.await process in
-
if exit_status <> `Exited 0 then
-
let error_msg = match exit_status with
-
| `Exited n -> Printf.sprintf "wget exited with code %d" n
-
| `Signaled n -> Printf.sprintf "wget killed by signal %d" n in
-
Error ("Download failed: " ^ error_msg)
-
else (
-
(* Verify hash if provided *)
-
match hash with
-
| Some h ->
-
if Hash.verify dest h then
-
Ok ()
-
else
-
Error ("Hash verification failed for " ^ dest_path)
-
| None -> Ok ()
-
)
-
with
-
| exn -> Error ("wget failed: " ^ (Printexc.to_string exn))
-
-
let supports_resume _ = true
-
let name _ = "wget"
-
end
-
-
module Curl_downloader = struct
-
type t = {
-
sw : Eio.Switch.t;
-
env : Eio_unix.Stdenv.base;
-
auth : Config.auth option;
-
timeout : float;
-
}
-
-
let create ~sw ~env ?auth () = { sw; env; auth; timeout = 300.0 }
-
-
let download t ~url ~dest ?hash ?progress:_ ?(resume=true) () =
-
let dest_path = Eio.Path.native_exn dest in
-
-
(* Build curl arguments (excluding command name) *)
-
let args = [
-
"--silent"; (* Reduce output noise *)
-
"--show-error"; (* Show error messages *)
-
"--location"; (* Follow redirects *)
-
"--max-time"; "300"; (* 5 minute timeout *)
-
"--retry"; "3"; (* Retry 3 times *)
-
"--output"; dest_path; (* Output file *)
-
] in
-
-
(* Add authentication if provided *)
-
let args = Option.fold t.auth ~none:args ~some:(fun auth ->
-
let auth_str = match auth.Config.username, auth.Config.password with
-
| Some user, Some pass -> Some (user ^ ":" ^ pass)
-
| Some user, None -> Some user
-
| None, _ -> None in
-
Option.fold auth_str ~none:args ~some:(fun str -> "--user" :: str :: args)) in
-
-
(* Add resume support if enabled *)
-
let args = if resume then args @ ["--continue-at"; "-"] else args in
-
-
(* Add URL as last argument *)
-
let args = args @ [url] in
-
-
(* Build command line with curl command *)
-
let cmd_args = "curl" :: args in
-
-
try
-
(* Run curl using Eio process manager - use shell to handle PATH *)
-
let cmd_line = String.concat " " (List.map Filename.quote cmd_args) in
-
let process = Eio.Process.spawn t.env#process_mgr ~sw:t.sw
-
~executable:"/bin/sh" ["/bin/sh"; "-c"; cmd_line] in
-
let exit_status = Eio.Process.await process in
-
if exit_status <> `Exited 0 then
-
let error_msg = match exit_status with
-
| `Exited n -> Printf.sprintf "curl exited with code %d" n
-
| `Signaled n -> Printf.sprintf "curl killed by signal %d" n in
-
Error ("Download failed: " ^ error_msg)
-
else (
-
(* Verify hash if provided *)
-
match hash with
-
| Some h ->
-
if Hash.verify dest h then
-
Ok ()
-
else
-
Error ("Hash verification failed for " ^ dest_path)
-
| None -> Ok ()
-
)
-
with
-
| exn -> Error ("curl failed: " ^ (Printexc.to_string exn))
-
-
let supports_resume _ = true
-
let name _ = "curl"
-
end
-
-
module Cohttp_downloader = struct
-
type t = {
-
sw : Eio.Switch.t;
-
env : Eio_unix.Stdenv.base;
-
auth : Config.auth option;
-
timeout : float;
-
}
-
-
let create ~sw ~env ?auth () = { sw; env; auth; timeout = 300.0 }
-
-
let download _t ~url:_ ~dest:_ ?hash:_ ?progress:_ ?resume:_ () =
-
Error "Cohttp_downloader.download not yet implemented"
-
-
let supports_resume _ = false
-
let name _ = "cohttp-eio"
-
end
-
-
module Downloaders = struct
-
let wget () = (module Wget_downloader : DOWNLOADER)
-
-
let curl () = (module Curl_downloader : DOWNLOADER)
-
-
let cohttp () = (module Cohttp_downloader : DOWNLOADER)
-
-
let detect_available ~env =
-
let test_command cmd =
-
try
-
Eio.Switch.run @@ fun sw ->
-
(* Use 'which' command to check if command exists on PATH *)
-
let process = Eio.Process.spawn env#process_mgr ~sw
-
~executable:"/bin/sh" ["/bin/sh"; "-c"; "which " ^ cmd ^ " >/dev/null 2>&1"] in
-
let exit_status = Eio.Process.await process in
-
exit_status = `Exited 0
-
with
-
| _ -> false
-
in
-
[("wget", (module Wget_downloader : DOWNLOADER));
-
("curl", (module Curl_downloader : DOWNLOADER))]
-
|> List.filter (fun (cmd, _) -> test_command cmd)
-
-
let create_default ~env =
-
let available = detect_available ~env in
-
match available with
-
| (name, downloader) :: _ ->
-
Printf.eprintf "Using %s downloader\n" name;
-
downloader
-
| [] ->
-
failwith "No downloaders available (wget or curl required)"
-
-
let of_string name =
-
[("wget", (module Wget_downloader : DOWNLOADER));
-
("curl", (module Curl_downloader : DOWNLOADER))]
-
|> List.assoc_opt name
-
end
-
-
module Cli = struct
-
type downloader_choice = [ `Wget | `Curl | `Cohttp | `Auto ]
-
-
let downloader_term =
-
let open Cmdliner in
-
let doc = "Download tool to use. 'auto' detects available tools." in
-
let docv = "TOOL" in
-
Arg.(value & opt (enum [
-
("wget", `Wget); ("curl", `Curl);
-
("cohttp", `Cohttp); ("auto", `Auto)
-
]) `Auto & info ["downloader"; "d"] ~doc ~docv)
-
-
let downloader_info =
-
Cmdliner.Arg.info ["downloader"; "d"]
-
~doc:"Download tool to use"
-
-
let create_downloader ~env = function
-
| `Wget -> Downloaders.wget ()
-
| `Curl -> Downloaders.curl ()
-
| `Cohttp -> Downloaders.cohttp ()
-
| `Auto -> Downloaders.create_default ~env
-
end
···
+2 -1
toru/lib/toru/downloader.mli stack/toru/lib/toru/downloader.mli
···
val wget : unit -> (module DOWNLOADER)
val curl : unit -> (module DOWNLOADER)
val cohttp : unit -> (module DOWNLOADER)
val detect_available : env:Eio_unix.Stdenv.base ->
(string * (module DOWNLOADER)) list
···
(** CLI integration *)
module Cli : sig
-
type downloader_choice = [ `Wget | `Curl | `Cohttp | `Auto ]
val downloader_term : downloader_choice Cmdliner.Term.t
val downloader_info : Cmdliner.Arg.info
···
val wget : unit -> (module DOWNLOADER)
val curl : unit -> (module DOWNLOADER)
val cohttp : unit -> (module DOWNLOADER)
+
val http : unit -> (module DOWNLOADER)
val detect_available : env:Eio_unix.Stdenv.base ->
(string * (module DOWNLOADER)) list
···
(** CLI integration *)
module Cli : sig
+
type downloader_choice = [ `Wget | `Curl | `Cohttp | `Http | `Auto ]
val downloader_term : downloader_choice Cmdliner.Term.t
val downloader_info : Cmdliner.Arg.info
+4 -2
toru/lib/toru/dune stack/toru/lib/toru/dune
···
(public_name toru)
(name toru)
(modules hash registry cache processors downloader make_registry toru logging)
-
(libraries
eio
eio.unix
digestif
···
cmdliner
re
ptime
-
xdg
unix
logs
logs.fmt
···
(public_name toru)
(name toru)
(modules hash registry cache processors downloader make_registry toru logging)
+
(libraries
eio
eio.unix
digestif
···
cmdliner
re
ptime
+
xdge
+
cacheio
+
requests
unix
logs
logs.fmt
toru/lib/toru/hash.ml stack/toru/lib/toru/hash.ml
toru/lib/toru/hash.mli stack/toru/lib/toru/hash.mli
toru/lib/toru/logging.ml stack/toru/lib/toru/logging.ml
toru/lib/toru/logging.mli stack/toru/lib/toru/logging.mli
toru/lib/toru/make_registry.ml stack/toru/lib/toru/make_registry.ml
toru/lib/toru/make_registry.mli stack/toru/lib/toru/make_registry.mli
toru/lib/toru/processors.ml stack/toru/lib/toru/processors.ml
toru/lib/toru/processors.mli stack/toru/lib/toru/processors.mli
toru/lib/toru/registry.ml stack/toru/lib/toru/registry.ml
toru/lib/toru/registry.mli stack/toru/lib/toru/registry.mli
toru/lib/toru/toru.ml stack/toru/lib/toru/toru.ml
toru/lib/toru/toru.mli stack/toru/lib/toru/toru.mli
toru/test/CACHE_IMPLEMENTATION_REPORT.md stack/toru/test/CACHE_IMPLEMENTATION_REPORT.md
toru/test/basic.t stack/toru/test/basic.t
toru/test/cli.t stack/toru/test/cli.t
toru/test/downloader_demo.ml stack/toru/test/downloader_demo.ml
toru/test/dune stack/toru/test/dune
toru/test/python/generate_pooch_registry.py stack/toru/test/python/generate_pooch_registry.py
toru/test/python/pyproject.toml stack/toru/test/python/pyproject.toml
toru/test/python/test_data/.hidden/secret.txt stack/toru/test/python/test_data/.hidden/secret.txt
toru/test/python/test_data/config.json stack/toru/test/python/test_data/config.json
toru/test/python/test_data/data/numbers.csv stack/toru/test/python/test_data/data/numbers.csv
toru/test/python/test_data/data/simple.txt stack/toru/test/python/test_data/data/simple.txt
toru/test/python/test_data/data/unicode.txt stack/toru/test/python/test_data/data/unicode.txt
toru/test/python/test_data/docs/readme.md stack/toru/test/python/test_data/docs/readme.md
toru/test/python/test_data/empty.txt stack/toru/test/python/test_data/empty.txt
toru/test/python/test_data/models/small.bin stack/toru/test/python/test_data/models/small.bin
toru/test/python/test_metadata.json stack/toru/test/python/test_metadata.json
toru/test/python/test_registry_md5.txt stack/toru/test/python/test_registry_md5.txt
toru/test/python/test_registry_mixed.txt stack/toru/test/python/test_registry_mixed.txt
toru/test/python/test_registry_sha1.txt stack/toru/test/python/test_registry_sha1.txt
toru/test/python/test_registry_sha256.txt stack/toru/test/python/test_registry_sha256.txt
toru/test/test_cache.ml stack/toru/test/test_cache.ml
toru/test/test_cache_xdg.ml stack/toru/test/test_cache_xdg.ml
toru/test/test_curl_download.ml stack/toru/test/test_curl_download.ml
toru/test/test_downloader.ml stack/toru/test/test_downloader.ml
toru/test/test_downloader_comprehensive.ml stack/toru/test/test_downloader_comprehensive.ml
toru/test/test_hash.ml stack/toru/test/test_hash.ml
toru/test/test_hash_manual.ml stack/toru/test/test_hash_manual.ml
toru/test/test_make_registry.ml stack/toru/test/test_make_registry.ml
toru/test/test_python_cross_validation.ml stack/toru/test/test_python_cross_validation.ml
toru/test/test_registry.ml stack/toru/test/test_registry.ml
toru/test/test_registry_real.ml stack/toru/test/test_registry_real.ml
toru/test/test_tessera_integration.ml stack/toru/test/test_tessera_integration.ml
toru/test/test_tessera_load.ml stack/toru/test/test_tessera_load.ml
toru/test/test_toru.ml stack/toru/test/test_toru.ml
toru/test/test_xdg_integration.ml stack/toru/test/test_xdg_integration.ml
xdg-eio/.gitignore stack/xdge/.gitignore
xdg-eio/.ocamlformat stack/xdge/.ocamlformat
xdg-eio/CLAUDE.md stack/xdge/CLAUDE.md
xdg-eio/dune-project stack/xdge/dune-project
xdg-eio/example/dune stack/xdge/example/dune
xdg-eio/example/minimal_test.cmi stack/xdge/example/minimal_test.cmi
xdg-eio/example/minimal_test.cmo stack/xdge/example/minimal_test.cmo
xdg-eio/example/xdg_example.ml stack/xdge/example/xdg_example.ml
xdg-eio/lib/dune stack/xdge/lib/dune
xdg-eio/lib/xdge.ml stack/xdge/lib/xdge.ml
xdg-eio/lib/xdge.mli stack/xdge/lib/xdge.mli
xdg-eio/test/dune stack/xdge/test/dune
xdg-eio/test/test_paths.ml stack/xdge/test/test_paths.ml
xdg-eio/test/xdg.t stack/xdge/test/xdg.t
xdg-eio/xdge.opam stack/xdge/xdge.opam