My agentic slop goes here. Not intended for anyone else!
1let src = Logs.Src.create "requests.cache" ~doc:"HTTP cache with cacheio"
2module Log = (val Logs.src_log src : Logs.LOG)
3
4type cached_response = {
5 status : Cohttp.Code.status_code;
6 headers : Cohttp.Header.t;
7 body : string;
8}
9
10type t = {
11 sw : Eio.Switch.t;
12 enabled : bool;
13 cache_get_requests : bool;
14 cache_range_requests : bool;
15 cacheio : Cacheio.t option;
16 memory_cache : (string, cached_response * float) Hashtbl.t;
17}
18
19let create ~sw ~enabled ?(cache_get_requests=true) ?(cache_range_requests=true) ~cache_dir () =
20 let cacheio =
21 match cache_dir with
22 | Some dir when enabled ->
23 (try
24 Some (Cacheio.create ~base_dir:dir)
25 with e ->
26 Log.warn (fun m -> m "Failed to create cacheio backend: %s. Using memory cache only."
27 (Printexc.to_string e));
28 None)
29 | _ -> None
30 in
31 { sw; enabled; cache_get_requests; cache_range_requests; cacheio;
32 memory_cache = Hashtbl.create 100 }
33
34let make_cache_key ~method_ ~url ~headers =
35 let method_str = match method_ with
36 | `GET -> "GET" | `HEAD -> "HEAD"
37 | _ -> "OTHER"
38 in
39 let url_str = Uri.to_string url in
40 let range_str = match Cohttp.Header.get headers "range" with
41 | Some r -> "_range:" ^ r
42 | None -> ""
43 in
44 Printf.sprintf "%s_%s%s" method_str url_str range_str
45
46let is_cacheable ~method_ ~status ~headers =
47 match method_ with
48 | `GET | `HEAD ->
49 let code = Cohttp.Code.code_of_status status in
50 if code >= 200 && code < 300 then
51 match Cohttp.Header.get headers "cache-control" with
52 | Some cc ->
53 let cc_lower = String.lowercase_ascii cc in
54 let rec contains s sub pos =
55 if pos + String.length sub > String.length s then false
56 else if String.sub s pos (String.length sub) = sub then true
57 else contains s sub (pos + 1)
58 in
59 not (contains cc_lower "no-store" 0 ||
60 contains cc_lower "no-cache" 0 ||
61 contains cc_lower "private" 0)
62 | None -> true
63 else
64 code = 301 || code = 308
65 | _ -> false
66
67let parse_max_age headers =
68 match Cohttp.Header.get headers "cache-control" with
69 | Some cc ->
70 let parts = String.split_on_char ',' cc |> List.map String.trim in
71 List.find_map (fun part ->
72 let prefix = "max-age=" in
73 if String.starts_with ~prefix part then
74 let value = String.sub part (String.length prefix)
75 (String.length part - String.length prefix) in
76 try Some (float_of_string value) with _ -> None
77 else None
78 ) parts
79 | None -> None
80
81let serialize_metadata ~status ~headers =
82 let status_code = Cohttp.Code.code_of_status status in
83 let headers_assoc = Cohttp.Header.to_list headers in
84 let json = `Assoc [
85 ("status_code", `Int status_code);
86 ("headers", `Assoc (List.map (fun (k, v) -> (k, `String v)) headers_assoc));
87 ] in
88 Yojson.Basic.to_string json
89
90let deserialize_metadata json_str =
91 try
92 let open Yojson.Basic.Util in
93 let json = Yojson.Basic.from_string json_str in
94 let status_code = json |> member "status_code" |> to_int in
95 let status = Cohttp.Code.status_of_code status_code in
96 let headers_json = json |> member "headers" |> to_assoc in
97 let headers = headers_json
98 |> List.map (fun (k, v) -> (k, to_string v))
99 |> Cohttp.Header.of_list in
100 Some (status, headers)
101 with _ -> None
102
103let get t ~method_ ~url ~headers =
104 if not t.enabled then None
105 else if method_ = `GET && not t.cache_get_requests then None
106 else
107 let key = make_cache_key ~method_ ~url ~headers in
108
109 (* Try cacheio first *)
110 match t.cacheio with
111 | Some cache ->
112 (* Check for metadata entry *)
113 let metadata_key = key ^ ".meta" in
114 let body_key = key ^ ".body" in
115
116 if Cacheio.exists cache ~key:metadata_key && Cacheio.exists cache ~key:body_key then
117 Eio.Switch.run @@ fun sw ->
118 (* Read metadata *)
119 let metadata_opt = match Cacheio.get cache ~key:metadata_key ~sw with
120 | Some source ->
121 let buf = Buffer.create 256 in
122 Eio.Flow.copy source (Eio.Flow.buffer_sink buf);
123 deserialize_metadata (Buffer.contents buf)
124 | None -> None
125 in
126
127 (match metadata_opt with
128 | Some (status, resp_headers) ->
129 (* Read body *)
130 (match Cacheio.get cache ~key:body_key ~sw with
131 | Some source ->
132 let buf = Buffer.create 4096 in
133 Eio.Flow.copy source (Eio.Flow.buffer_sink buf);
134 let body = Buffer.contents buf in
135 Log.debug (fun m -> m "Cache hit for %s" (Uri.to_string url));
136 Some { status; headers = resp_headers; body }
137 | None ->
138 Log.debug (fun m -> m "Cache body missing for %s" (Uri.to_string url));
139 None)
140 | None ->
141 Log.debug (fun m -> m "Cache metadata missing for %s" (Uri.to_string url));
142 None)
143 else
144 (Log.debug (fun m -> m "Cache miss for %s" (Uri.to_string url));
145 None)
146 | None ->
147 (* Fall back to memory cache *)
148 match Hashtbl.find_opt t.memory_cache key with
149 | Some (response, expiry) when expiry > Unix.gettimeofday () ->
150 Log.debug (fun m -> m "Memory cache hit for %s" (Uri.to_string url));
151 Some response
152 | _ ->
153 Log.debug (fun m -> m "Cache miss for %s" (Uri.to_string url));
154 None
155
156let get_stream t ~method_ ~url ~headers ~sw =
157 if not t.enabled then None
158 else if method_ = `GET && not t.cache_get_requests then None
159 else
160 let key = make_cache_key ~method_ ~url ~headers in
161
162 match t.cacheio with
163 | Some cache ->
164 let metadata_key = key ^ ".meta" in
165 let body_key = key ^ ".body" in
166
167 if Cacheio.exists cache ~key:metadata_key && Cacheio.exists cache ~key:body_key then
168 (* Read metadata first *)
169 let metadata_opt =
170 match Cacheio.get cache ~key:metadata_key ~sw with
171 | Some source ->
172 let buf = Buffer.create 256 in
173 Eio.Flow.copy source (Eio.Flow.buffer_sink buf);
174 deserialize_metadata (Buffer.contents buf)
175 | None -> None
176 in
177
178 (match metadata_opt with
179 | Some (status, resp_headers) ->
180 (* Return body stream directly *)
181 (match Cacheio.get cache ~key:body_key ~sw with
182 | Some source ->
183 Log.debug (fun m -> m "Streaming cache hit for %s" (Uri.to_string url));
184 Some (status, resp_headers, source)
185 | None -> None)
186 | None -> None)
187 else None
188 | None -> None
189
190let put t ~method_ ~url ~request_headers ~status ~headers ~body =
191 if not t.enabled then ()
192 else if is_cacheable ~method_ ~status ~headers then
193 let key = make_cache_key ~method_ ~url ~headers:request_headers in
194 let ttl = parse_max_age headers in
195
196 Log.debug (fun m -> m "Caching response for %s (ttl: %s)"
197 (Uri.to_string url)
198 (match ttl with Some t -> Printf.sprintf "%.0fs" t | None -> "3600s"));
199
200 (match t.cacheio with
201 | Some cache ->
202 Eio.Switch.run @@ fun _sw ->
203 let metadata_key = key ^ ".meta" in
204 let metadata = serialize_metadata ~status ~headers in
205 let metadata_source = Eio.Flow.string_source metadata in
206 Cacheio.put cache ~key:metadata_key ~source:metadata_source ~ttl ();
207
208 let body_key = key ^ ".body" in
209 let body_source = Eio.Flow.string_source body in
210 Cacheio.put cache ~key:body_key ~source:body_source ~ttl ()
211 | None -> ());
212
213 let cached_resp = { status; headers; body } in
214 let expiry = Unix.gettimeofday () +. Option.value ttl ~default:3600.0 in
215 Hashtbl.replace t.memory_cache key (cached_resp, expiry)
216
217let put_stream t ~method_ ~url ~request_headers ~status ~headers ~body_source ~ttl =
218 if not t.enabled then ()
219 else if is_cacheable ~method_ ~status ~headers then
220 let key = make_cache_key ~method_ ~url ~headers:request_headers in
221
222 Log.debug (fun m -> m "Caching streamed response for %s (ttl: %s)"
223 (Uri.to_string url)
224 (match ttl with Some t -> Printf.sprintf "%.0fs" t | None -> "3600s"));
225
226 match t.cacheio with
227 | Some cache ->
228 Eio.Switch.run @@ fun _sw ->
229
230 (* Store metadata *)
231 let metadata_key = key ^ ".meta" in
232 let metadata = serialize_metadata ~status ~headers in
233 let metadata_source = Eio.Flow.string_source metadata in
234 Cacheio.put cache ~key:metadata_key ~source:metadata_source ~ttl ();
235
236 (* Store body directly from source *)
237 let body_key = key ^ ".body" in
238 Cacheio.put cache ~key:body_key ~source:body_source ~ttl ()
239 | None -> ()
240
241module Range = struct
242 type t = {
243 start : int64;
244 end_ : int64 option; (* None means to end of file *)
245 }
246
247 let of_header header =
248 (* Parse Range: bytes=start-end *)
249 let prefix = "bytes=" in
250 let prefix_len = String.length prefix in
251 if String.length header >= prefix_len &&
252 String.sub header 0 prefix_len = prefix then
253 let range_str = String.sub header prefix_len (String.length header - prefix_len) in
254 match String.split_on_char '-' range_str with
255 | [start; ""] ->
256 (* bytes=N- means from N to end *)
257 (try Some { start = Int64.of_string start; end_ = None }
258 with _ -> None)
259 | [start; end_] ->
260 (* bytes=N-M *)
261 (try Some {
262 start = Int64.of_string start;
263 end_ = Some (Int64.of_string end_)
264 }
265 with _ -> None)
266 | _ -> None
267 else None
268
269 let to_header t =
270 match t.end_ with
271 | None -> Printf.sprintf "bytes=%Ld-" t.start
272 | Some e -> Printf.sprintf "bytes=%Ld-%Ld" t.start e
273
274 let to_cacheio_range t ~total_size =
275 let end_ = match t.end_ with
276 | None -> Int64.pred total_size
277 | Some e -> min e (Int64.pred total_size)
278 in
279 (* Convert to Cacheio.Range.t *)
280 Cacheio.Range.create ~start:t.start ~end_
281end
282
283let download_range t ~sw ~url ~range ~on_chunk =
284 let range_header = Range.to_header range in
285 Log.debug (fun m -> m "Range request for %s: %s"
286 (Uri.to_string url) range_header);
287
288 match t.cacheio with
289 | Some cache ->
290 let key = Uri.to_string url in
291 let cacheio_range = Range.to_cacheio_range range ~total_size:Int64.max_int in
292
293 (match Cacheio.get_range cache ~key ~range:cacheio_range ~sw with
294 | `Complete source ->
295 let rec read_chunks () =
296 let chunk = Cstruct.create 8192 in
297 try
298 let n = Eio.Flow.single_read source chunk in
299 if n > 0 then begin
300 on_chunk (Cstruct.to_string ~off:0 ~len:n chunk);
301 read_chunks ()
302 end
303 with End_of_file -> ()
304 in
305 read_chunks ();
306 Some true
307 | `Chunks chunk_sources ->
308 List.iter (fun (_range, source) ->
309 let rec read_chunk () =
310 let chunk = Cstruct.create 8192 in
311 try
312 let n = Eio.Flow.single_read source chunk in
313 if n > 0 then begin
314 on_chunk (Cstruct.to_string ~off:0 ~len:n chunk);
315 read_chunk ()
316 end
317 with End_of_file -> ()
318 in
319 read_chunk ()
320 ) chunk_sources;
321 Some true
322 | `Not_found -> None)
323 | None -> None
324
325let put_chunk t ~url ~range ~data =
326 if not t.enabled || not t.cache_range_requests then ()
327 else
328 match t.cacheio with
329 | Some cache ->
330 let key = Uri.to_string url in
331 let cacheio_range = Range.to_cacheio_range range ~total_size:Int64.max_int in
332 Eio.Switch.run @@ fun _sw ->
333 let source = Eio.Flow.string_source data in
334 Cacheio.put_chunk cache ~key ~range:cacheio_range ~source ()
335 | None ->
336 Log.debug (fun m -> m "Cannot cache chunk for %s: no cacheio backend"
337 (Uri.to_string url))
338
339let has_complete t ~url ~total_size =
340 if not t.enabled then false
341 else
342 match t.cacheio with
343 | Some cache ->
344 let key = Uri.to_string url in
345 Cacheio.has_complete_chunks cache ~key ~total_size
346 | None -> false
347
348let missing_ranges t ~url ~total_size =
349 if not t.enabled then
350 [{ Range.start = 0L; end_ = Some (Int64.pred total_size) }]
351 else
352 match t.cacheio with
353 | Some cache ->
354 let key = Uri.to_string url in
355 let cacheio_ranges = Cacheio.missing_ranges cache ~key ~total_size in
356 List.map (fun r ->
357 { Range.start = Cacheio.Range.start r;
358 end_ = Some (Cacheio.Range.end_ r) }
359 ) cacheio_ranges
360 | None ->
361 [{ Range.start = 0L; end_ = Some (Int64.pred total_size) }]
362
363let coalesce_chunks t ~url =
364 if not t.enabled then false
365 else
366 match t.cacheio with
367 | Some cache ->
368 let key = Uri.to_string url in
369 let promise = Cacheio.coalesce_chunks cache ~key ~verify:true () in
370 (match Eio.Promise.await promise with
371 | Ok () ->
372 Log.info (fun m -> m "Successfully coalesced chunks for %s" key);
373 true
374 | Error exn ->
375 Log.warn (fun m -> m "Failed to coalesce chunks for %s: %s"
376 key (Printexc.to_string exn));
377 false)
378 | None -> false
379
380let evict t ~url =
381 if not t.enabled then ()
382 else
383 let key = make_cache_key ~method_:`GET ~url ~headers:(Cohttp.Header.init ()) in
384 (match t.cacheio with
385 | Some cache ->
386 Cacheio.delete cache ~key:(key ^ ".meta");
387 Cacheio.delete cache ~key:(key ^ ".body")
388 | None -> ());
389 Log.debug (fun m -> m "Evicting cache for %s" (Uri.to_string url));
390 Hashtbl.remove t.memory_cache key
391
392let clear t =
393 Log.info (fun m -> m "Clearing entire cache");
394 (match t.cacheio with
395 | Some cache -> Cacheio.clear cache
396 | None -> ());
397 Hashtbl.clear t.memory_cache
398
399let stats t =
400 let cacheio_stats =
401 match t.cacheio with
402 | Some cache ->
403 let stats = Cacheio.stats cache in
404 `Assoc [
405 ("total_entries", `Int (Cacheio.Stats.entry_count stats));
406 ("total_bytes", `Int (Int64.to_int (Cacheio.Stats.total_size stats)));
407 ("expired_entries", `Int (Cacheio.Stats.expired_count stats));
408 ("pinned_entries", `Int (Cacheio.Stats.pinned_count stats));
409 ("temporary_entries", `Int (Cacheio.Stats.temporary_count stats));
410 ]
411 | None -> `Assoc []
412 in
413 `Assoc [
414 ("memory_cache_entries", `Int (Hashtbl.length t.memory_cache));
415 ("cache_backend", `String (if Option.is_some t.cacheio then "cacheio" else "memory"));
416 ("enabled", `Bool t.enabled);
417 ("cache_get_requests", `Bool t.cache_get_requests);
418 ("cache_range_requests", `Bool t.cache_range_requests);
419 ("cacheio_stats", cacheio_stats);
420 ]