My agentic slop goes here. Not intended for anyone else!
1let src = Logs.Src.create "requests.cache" ~doc:"HTTP cache with cacheio" 2module Log = (val Logs.src_log src : Logs.LOG) 3 4type cached_response = { 5 status : Cohttp.Code.status_code; 6 headers : Cohttp.Header.t; 7 body : string; 8} 9 10type t = { 11 sw : Eio.Switch.t; 12 enabled : bool; 13 cache_get_requests : bool; 14 cache_range_requests : bool; 15 cacheio : Cacheio.t option; 16 memory_cache : (string, cached_response * float) Hashtbl.t; 17} 18 19let create ~sw ~enabled ?(cache_get_requests=true) ?(cache_range_requests=true) ~cache_dir () = 20 let cacheio = 21 match cache_dir with 22 | Some dir when enabled -> 23 (try 24 Some (Cacheio.create ~base_dir:dir) 25 with e -> 26 Log.warn (fun m -> m "Failed to create cacheio backend: %s. Using memory cache only." 27 (Printexc.to_string e)); 28 None) 29 | _ -> None 30 in 31 { sw; enabled; cache_get_requests; cache_range_requests; cacheio; 32 memory_cache = Hashtbl.create 100 } 33 34let make_cache_key ~method_ ~url ~headers = 35 let method_str = match method_ with 36 | `GET -> "GET" | `HEAD -> "HEAD" 37 | _ -> "OTHER" 38 in 39 let url_str = Uri.to_string url in 40 let range_str = match Cohttp.Header.get headers "range" with 41 | Some r -> "_range:" ^ r 42 | None -> "" 43 in 44 Printf.sprintf "%s_%s%s" method_str url_str range_str 45 46let is_cacheable ~method_ ~status ~headers = 47 match method_ with 48 | `GET | `HEAD -> 49 let code = Cohttp.Code.code_of_status status in 50 if code >= 200 && code < 300 then 51 match Cohttp.Header.get headers "cache-control" with 52 | Some cc -> 53 let cc_lower = String.lowercase_ascii cc in 54 let rec contains s sub pos = 55 if pos + String.length sub > String.length s then false 56 else if String.sub s pos (String.length sub) = sub then true 57 else contains s sub (pos + 1) 58 in 59 not (contains cc_lower "no-store" 0 || 60 contains cc_lower "no-cache" 0 || 61 contains cc_lower "private" 0) 62 | None -> true 63 else 64 code = 301 || code = 308 65 | _ -> false 66 67let parse_max_age headers = 68 match Cohttp.Header.get headers "cache-control" with 69 | Some cc -> 70 let parts = String.split_on_char ',' cc |> List.map String.trim in 71 List.find_map (fun part -> 72 let prefix = "max-age=" in 73 if String.starts_with ~prefix part then 74 let value = String.sub part (String.length prefix) 75 (String.length part - String.length prefix) in 76 try Some (float_of_string value) with _ -> None 77 else None 78 ) parts 79 | None -> None 80 81let serialize_metadata ~status ~headers = 82 let status_code = Cohttp.Code.code_of_status status in 83 let headers_assoc = Cohttp.Header.to_list headers in 84 let json = `Assoc [ 85 ("status_code", `Int status_code); 86 ("headers", `Assoc (List.map (fun (k, v) -> (k, `String v)) headers_assoc)); 87 ] in 88 Yojson.Basic.to_string json 89 90let deserialize_metadata json_str = 91 try 92 let open Yojson.Basic.Util in 93 let json = Yojson.Basic.from_string json_str in 94 let status_code = json |> member "status_code" |> to_int in 95 let status = Cohttp.Code.status_of_code status_code in 96 let headers_json = json |> member "headers" |> to_assoc in 97 let headers = headers_json 98 |> List.map (fun (k, v) -> (k, to_string v)) 99 |> Cohttp.Header.of_list in 100 Some (status, headers) 101 with _ -> None 102 103let get t ~method_ ~url ~headers = 104 if not t.enabled then None 105 else if method_ = `GET && not t.cache_get_requests then None 106 else 107 let key = make_cache_key ~method_ ~url ~headers in 108 109 (* Try cacheio first *) 110 match t.cacheio with 111 | Some cache -> 112 (* Check for metadata entry *) 113 let metadata_key = key ^ ".meta" in 114 let body_key = key ^ ".body" in 115 116 if Cacheio.exists cache ~key:metadata_key && Cacheio.exists cache ~key:body_key then 117 Eio.Switch.run @@ fun sw -> 118 (* Read metadata *) 119 let metadata_opt = match Cacheio.get cache ~key:metadata_key ~sw with 120 | Some source -> 121 let buf = Buffer.create 256 in 122 Eio.Flow.copy source (Eio.Flow.buffer_sink buf); 123 deserialize_metadata (Buffer.contents buf) 124 | None -> None 125 in 126 127 (match metadata_opt with 128 | Some (status, resp_headers) -> 129 (* Read body *) 130 (match Cacheio.get cache ~key:body_key ~sw with 131 | Some source -> 132 let buf = Buffer.create 4096 in 133 Eio.Flow.copy source (Eio.Flow.buffer_sink buf); 134 let body = Buffer.contents buf in 135 Log.debug (fun m -> m "Cache hit for %s" (Uri.to_string url)); 136 Some { status; headers = resp_headers; body } 137 | None -> 138 Log.debug (fun m -> m "Cache body missing for %s" (Uri.to_string url)); 139 None) 140 | None -> 141 Log.debug (fun m -> m "Cache metadata missing for %s" (Uri.to_string url)); 142 None) 143 else 144 (Log.debug (fun m -> m "Cache miss for %s" (Uri.to_string url)); 145 None) 146 | None -> 147 (* Fall back to memory cache *) 148 match Hashtbl.find_opt t.memory_cache key with 149 | Some (response, expiry) when expiry > Unix.gettimeofday () -> 150 Log.debug (fun m -> m "Memory cache hit for %s" (Uri.to_string url)); 151 Some response 152 | _ -> 153 Log.debug (fun m -> m "Cache miss for %s" (Uri.to_string url)); 154 None 155 156let get_stream t ~method_ ~url ~headers ~sw = 157 if not t.enabled then None 158 else if method_ = `GET && not t.cache_get_requests then None 159 else 160 let key = make_cache_key ~method_ ~url ~headers in 161 162 match t.cacheio with 163 | Some cache -> 164 let metadata_key = key ^ ".meta" in 165 let body_key = key ^ ".body" in 166 167 if Cacheio.exists cache ~key:metadata_key && Cacheio.exists cache ~key:body_key then 168 (* Read metadata first *) 169 let metadata_opt = 170 match Cacheio.get cache ~key:metadata_key ~sw with 171 | Some source -> 172 let buf = Buffer.create 256 in 173 Eio.Flow.copy source (Eio.Flow.buffer_sink buf); 174 deserialize_metadata (Buffer.contents buf) 175 | None -> None 176 in 177 178 (match metadata_opt with 179 | Some (status, resp_headers) -> 180 (* Return body stream directly *) 181 (match Cacheio.get cache ~key:body_key ~sw with 182 | Some source -> 183 Log.debug (fun m -> m "Streaming cache hit for %s" (Uri.to_string url)); 184 Some (status, resp_headers, source) 185 | None -> None) 186 | None -> None) 187 else None 188 | None -> None 189 190let put t ~method_ ~url ~request_headers ~status ~headers ~body = 191 if not t.enabled then () 192 else if is_cacheable ~method_ ~status ~headers then 193 let key = make_cache_key ~method_ ~url ~headers:request_headers in 194 let ttl = parse_max_age headers in 195 196 Log.debug (fun m -> m "Caching response for %s (ttl: %s)" 197 (Uri.to_string url) 198 (match ttl with Some t -> Printf.sprintf "%.0fs" t | None -> "3600s")); 199 200 (match t.cacheio with 201 | Some cache -> 202 Eio.Switch.run @@ fun _sw -> 203 let metadata_key = key ^ ".meta" in 204 let metadata = serialize_metadata ~status ~headers in 205 let metadata_source = Eio.Flow.string_source metadata in 206 Cacheio.put cache ~key:metadata_key ~source:metadata_source ~ttl (); 207 208 let body_key = key ^ ".body" in 209 let body_source = Eio.Flow.string_source body in 210 Cacheio.put cache ~key:body_key ~source:body_source ~ttl () 211 | None -> ()); 212 213 let cached_resp = { status; headers; body } in 214 let expiry = Unix.gettimeofday () +. Option.value ttl ~default:3600.0 in 215 Hashtbl.replace t.memory_cache key (cached_resp, expiry) 216 217let put_stream t ~method_ ~url ~request_headers ~status ~headers ~body_source ~ttl = 218 if not t.enabled then () 219 else if is_cacheable ~method_ ~status ~headers then 220 let key = make_cache_key ~method_ ~url ~headers:request_headers in 221 222 Log.debug (fun m -> m "Caching streamed response for %s (ttl: %s)" 223 (Uri.to_string url) 224 (match ttl with Some t -> Printf.sprintf "%.0fs" t | None -> "3600s")); 225 226 match t.cacheio with 227 | Some cache -> 228 Eio.Switch.run @@ fun _sw -> 229 230 (* Store metadata *) 231 let metadata_key = key ^ ".meta" in 232 let metadata = serialize_metadata ~status ~headers in 233 let metadata_source = Eio.Flow.string_source metadata in 234 Cacheio.put cache ~key:metadata_key ~source:metadata_source ~ttl (); 235 236 (* Store body directly from source *) 237 let body_key = key ^ ".body" in 238 Cacheio.put cache ~key:body_key ~source:body_source ~ttl () 239 | None -> () 240 241module Range = struct 242 type t = { 243 start : int64; 244 end_ : int64 option; (* None means to end of file *) 245 } 246 247 let of_header header = 248 (* Parse Range: bytes=start-end *) 249 let prefix = "bytes=" in 250 let prefix_len = String.length prefix in 251 if String.length header >= prefix_len && 252 String.sub header 0 prefix_len = prefix then 253 let range_str = String.sub header prefix_len (String.length header - prefix_len) in 254 match String.split_on_char '-' range_str with 255 | [start; ""] -> 256 (* bytes=N- means from N to end *) 257 (try Some { start = Int64.of_string start; end_ = None } 258 with _ -> None) 259 | [start; end_] -> 260 (* bytes=N-M *) 261 (try Some { 262 start = Int64.of_string start; 263 end_ = Some (Int64.of_string end_) 264 } 265 with _ -> None) 266 | _ -> None 267 else None 268 269 let to_header t = 270 match t.end_ with 271 | None -> Printf.sprintf "bytes=%Ld-" t.start 272 | Some e -> Printf.sprintf "bytes=%Ld-%Ld" t.start e 273 274 let to_cacheio_range t ~total_size = 275 let end_ = match t.end_ with 276 | None -> Int64.pred total_size 277 | Some e -> min e (Int64.pred total_size) 278 in 279 (* Convert to Cacheio.Range.t *) 280 Cacheio.Range.create ~start:t.start ~end_ 281end 282 283let download_range t ~sw ~url ~range ~on_chunk = 284 let range_header = Range.to_header range in 285 Log.debug (fun m -> m "Range request for %s: %s" 286 (Uri.to_string url) range_header); 287 288 match t.cacheio with 289 | Some cache -> 290 let key = Uri.to_string url in 291 let cacheio_range = Range.to_cacheio_range range ~total_size:Int64.max_int in 292 293 (match Cacheio.get_range cache ~key ~range:cacheio_range ~sw with 294 | `Complete source -> 295 let rec read_chunks () = 296 let chunk = Cstruct.create 8192 in 297 try 298 let n = Eio.Flow.single_read source chunk in 299 if n > 0 then begin 300 on_chunk (Cstruct.to_string ~off:0 ~len:n chunk); 301 read_chunks () 302 end 303 with End_of_file -> () 304 in 305 read_chunks (); 306 Some true 307 | `Chunks chunk_sources -> 308 List.iter (fun (_range, source) -> 309 let rec read_chunk () = 310 let chunk = Cstruct.create 8192 in 311 try 312 let n = Eio.Flow.single_read source chunk in 313 if n > 0 then begin 314 on_chunk (Cstruct.to_string ~off:0 ~len:n chunk); 315 read_chunk () 316 end 317 with End_of_file -> () 318 in 319 read_chunk () 320 ) chunk_sources; 321 Some true 322 | `Not_found -> None) 323 | None -> None 324 325let put_chunk t ~url ~range ~data = 326 if not t.enabled || not t.cache_range_requests then () 327 else 328 match t.cacheio with 329 | Some cache -> 330 let key = Uri.to_string url in 331 let cacheio_range = Range.to_cacheio_range range ~total_size:Int64.max_int in 332 Eio.Switch.run @@ fun _sw -> 333 let source = Eio.Flow.string_source data in 334 Cacheio.put_chunk cache ~key ~range:cacheio_range ~source () 335 | None -> 336 Log.debug (fun m -> m "Cannot cache chunk for %s: no cacheio backend" 337 (Uri.to_string url)) 338 339let has_complete t ~url ~total_size = 340 if not t.enabled then false 341 else 342 match t.cacheio with 343 | Some cache -> 344 let key = Uri.to_string url in 345 Cacheio.has_complete_chunks cache ~key ~total_size 346 | None -> false 347 348let missing_ranges t ~url ~total_size = 349 if not t.enabled then 350 [{ Range.start = 0L; end_ = Some (Int64.pred total_size) }] 351 else 352 match t.cacheio with 353 | Some cache -> 354 let key = Uri.to_string url in 355 let cacheio_ranges = Cacheio.missing_ranges cache ~key ~total_size in 356 List.map (fun r -> 357 { Range.start = Cacheio.Range.start r; 358 end_ = Some (Cacheio.Range.end_ r) } 359 ) cacheio_ranges 360 | None -> 361 [{ Range.start = 0L; end_ = Some (Int64.pred total_size) }] 362 363let coalesce_chunks t ~url = 364 if not t.enabled then false 365 else 366 match t.cacheio with 367 | Some cache -> 368 let key = Uri.to_string url in 369 let promise = Cacheio.coalesce_chunks cache ~key ~verify:true () in 370 (match Eio.Promise.await promise with 371 | Ok () -> 372 Log.info (fun m -> m "Successfully coalesced chunks for %s" key); 373 true 374 | Error exn -> 375 Log.warn (fun m -> m "Failed to coalesce chunks for %s: %s" 376 key (Printexc.to_string exn)); 377 false) 378 | None -> false 379 380let evict t ~url = 381 if not t.enabled then () 382 else 383 let key = make_cache_key ~method_:`GET ~url ~headers:(Cohttp.Header.init ()) in 384 (match t.cacheio with 385 | Some cache -> 386 Cacheio.delete cache ~key:(key ^ ".meta"); 387 Cacheio.delete cache ~key:(key ^ ".body") 388 | None -> ()); 389 Log.debug (fun m -> m "Evicting cache for %s" (Uri.to_string url)); 390 Hashtbl.remove t.memory_cache key 391 392let clear t = 393 Log.info (fun m -> m "Clearing entire cache"); 394 (match t.cacheio with 395 | Some cache -> Cacheio.clear cache 396 | None -> ()); 397 Hashtbl.clear t.memory_cache 398 399let stats t = 400 let cacheio_stats = 401 match t.cacheio with 402 | Some cache -> 403 let stats = Cacheio.stats cache in 404 `Assoc [ 405 ("total_entries", `Int (Cacheio.Stats.entry_count stats)); 406 ("total_bytes", `Int (Int64.to_int (Cacheio.Stats.total_size stats))); 407 ("expired_entries", `Int (Cacheio.Stats.expired_count stats)); 408 ("pinned_entries", `Int (Cacheio.Stats.pinned_count stats)); 409 ("temporary_entries", `Int (Cacheio.Stats.temporary_count stats)); 410 ] 411 | None -> `Assoc [] 412 in 413 `Assoc [ 414 ("memory_cache_entries", `Int (Hashtbl.length t.memory_cache)); 415 ("cache_backend", `String (if Option.is_some t.cacheio then "cacheio" else "memory")); 416 ("enabled", `Bool t.enabled); 417 ("cache_get_requests", `Bool t.cache_get_requests); 418 ("cache_range_requests", `Bool t.cache_range_requests); 419 ("cacheio_stats", cacheio_stats); 420 ]