···
1
-
let src = Logs.Src.create "requests.cache" ~doc:"HTTP cache with cacheio"
2
-
module Log = (val Logs.src_log src : Logs.LOG)
4
-
type cached_response = {
5
-
status : Cohttp.Code.status_code;
6
-
headers : Cohttp.Header.t;
13
-
cache_get_requests : bool;
14
-
cache_range_requests : bool;
15
-
cacheio : Cacheio.t option;
16
-
memory_cache : (string, cached_response * float) Hashtbl.t;
19
-
let create ~sw ~enabled ?(cache_get_requests=true) ?(cache_range_requests=true) ~cache_dir () =
21
-
match cache_dir with
22
-
| Some dir when enabled ->
24
-
Some (Cacheio.create ~base_dir:dir)
26
-
Log.warn (fun m -> m "Failed to create cacheio backend: %s. Using memory cache only."
27
-
(Printexc.to_string e));
31
-
{ sw; enabled; cache_get_requests; cache_range_requests; cacheio;
32
-
memory_cache = Hashtbl.create 100 }
34
-
let make_cache_key ~method_ ~url ~headers =
35
-
let method_str = match method_ with
36
-
| `GET -> "GET" | `HEAD -> "HEAD"
39
-
let url_str = Uri.to_string url in
40
-
let range_str = match Cohttp.Header.get headers "range" with
41
-
| Some r -> "_range:" ^ r
44
-
Printf.sprintf "%s_%s%s" method_str url_str range_str
46
-
let is_cacheable ~method_ ~status ~headers =
49
-
let code = Cohttp.Code.code_of_status status in
50
-
if code >= 200 && code < 300 then
51
-
match Cohttp.Header.get headers "cache-control" with
53
-
let cc_lower = String.lowercase_ascii cc in
54
-
let rec contains s sub pos =
55
-
if pos + String.length sub > String.length s then false
56
-
else if String.sub s pos (String.length sub) = sub then true
57
-
else contains s sub (pos + 1)
59
-
not (contains cc_lower "no-store" 0 ||
60
-
contains cc_lower "no-cache" 0 ||
61
-
contains cc_lower "private" 0)
64
-
code = 301 || code = 308
67
-
let parse_max_age headers =
68
-
match Cohttp.Header.get headers "cache-control" with
70
-
let parts = String.split_on_char ',' cc |> List.map String.trim in
71
-
List.find_map (fun part ->
72
-
let prefix = "max-age=" in
73
-
if String.starts_with ~prefix part then
74
-
let value = String.sub part (String.length prefix)
75
-
(String.length part - String.length prefix) in
76
-
try Some (float_of_string value) with _ -> None
81
-
(* JSON codec for cache metadata *)
82
-
module Metadata = struct
85
-
headers : (string * string) list;
88
-
let make status_code headers = { status_code; headers }
89
-
let status_code t = t.status_code
90
-
let headers t = t.headers
93
-
let header_pair_jsont =
94
-
let dec x y = (x, y) in
95
-
let enc (x, y) i = if i = 0 then x else y in
96
-
Jsont.t2 ~dec ~enc Jsont.string
98
-
Jsont.Object.map ~kind:"CacheMetadata" make
99
-
|> Jsont.Object.mem "status_code" Jsont.int ~enc:status_code
100
-
|> Jsont.Object.mem "headers" (Jsont.list header_pair_jsont) ~enc:headers
101
-
|> Jsont.Object.finish
104
-
let serialize_metadata ~status ~headers =
105
-
let status_code = Cohttp.Code.code_of_status status in
106
-
let headers_assoc = Cohttp.Header.to_list headers in
107
-
let metadata = Metadata.make status_code headers_assoc in
108
-
match Jsont_bytesrw.encode_string' Metadata.t_jsont metadata with
110
-
| Error e -> failwith (Fmt.str "Failed to serialize metadata: %s" (Jsont.Error.to_string e))
112
-
let deserialize_metadata json_str =
114
-
match Jsont_bytesrw.decode_string' Metadata.t_jsont json_str with
116
-
let status = Cohttp.Code.status_of_code (Metadata.status_code metadata) in
117
-
let headers = Cohttp.Header.of_list (Metadata.headers metadata) in
118
-
Some (status, headers)
122
-
let get t ~method_ ~url ~headers =
123
-
if not t.enabled then None
124
-
else if method_ = `GET && not t.cache_get_requests then None
126
-
let key = make_cache_key ~method_ ~url ~headers in
128
-
(* Try cacheio first *)
129
-
match t.cacheio with
131
-
(* Check for metadata entry *)
132
-
let metadata_key = key ^ ".meta" in
133
-
let body_key = key ^ ".body" in
135
-
if Cacheio.exists cache ~key:metadata_key && Cacheio.exists cache ~key:body_key then
136
-
Eio.Switch.run @@ fun sw ->
137
-
(* Read metadata *)
138
-
let metadata_opt = match Cacheio.get cache ~key:metadata_key ~sw with
140
-
let buf = Buffer.create 256 in
141
-
Eio.Flow.copy source (Eio.Flow.buffer_sink buf);
142
-
deserialize_metadata (Buffer.contents buf)
146
-
(match metadata_opt with
147
-
| Some (status, resp_headers) ->
149
-
(match Cacheio.get cache ~key:body_key ~sw with
151
-
let buf = Buffer.create 4096 in
152
-
Eio.Flow.copy source (Eio.Flow.buffer_sink buf);
153
-
let body = Buffer.contents buf in
154
-
Log.debug (fun m -> m "Cache hit for %s" (Uri.to_string url));
155
-
Some { status; headers = resp_headers; body }
157
-
Log.debug (fun m -> m "Cache body missing for %s" (Uri.to_string url));
160
-
Log.debug (fun m -> m "Cache metadata missing for %s" (Uri.to_string url));
163
-
(Log.debug (fun m -> m "Cache miss for %s" (Uri.to_string url));
166
-
(* Fall back to memory cache *)
167
-
match Hashtbl.find_opt t.memory_cache key with
168
-
| Some (response, expiry) when expiry > Unix.gettimeofday () ->
169
-
Log.debug (fun m -> m "Memory cache hit for %s" (Uri.to_string url));
172
-
Log.debug (fun m -> m "Cache miss for %s" (Uri.to_string url));
175
-
let get_stream t ~method_ ~url ~headers ~sw =
176
-
if not t.enabled then None
177
-
else if method_ = `GET && not t.cache_get_requests then None
179
-
let key = make_cache_key ~method_ ~url ~headers in
181
-
match t.cacheio with
183
-
let metadata_key = key ^ ".meta" in
184
-
let body_key = key ^ ".body" in
186
-
if Cacheio.exists cache ~key:metadata_key && Cacheio.exists cache ~key:body_key then
187
-
(* Read metadata first *)
189
-
match Cacheio.get cache ~key:metadata_key ~sw with
191
-
let buf = Buffer.create 256 in
192
-
Eio.Flow.copy source (Eio.Flow.buffer_sink buf);
193
-
deserialize_metadata (Buffer.contents buf)
197
-
(match metadata_opt with
198
-
| Some (status, resp_headers) ->
199
-
(* Return body stream directly *)
200
-
(match Cacheio.get cache ~key:body_key ~sw with
202
-
Log.debug (fun m -> m "Streaming cache hit for %s" (Uri.to_string url));
203
-
Some (status, resp_headers, source)
209
-
let put t ~method_ ~url ~request_headers ~status ~headers ~body =
210
-
if not t.enabled then ()
211
-
else if is_cacheable ~method_ ~status ~headers then
212
-
let key = make_cache_key ~method_ ~url ~headers:request_headers in
213
-
let ttl = parse_max_age headers in
215
-
Log.debug (fun m -> m "Caching response for %s (ttl: %s)"
216
-
(Uri.to_string url)
217
-
(match ttl with Some t -> Printf.sprintf "%.0fs" t | None -> "3600s"));
219
-
(match t.cacheio with
221
-
Eio.Switch.run @@ fun _sw ->
222
-
let metadata_key = key ^ ".meta" in
223
-
let metadata = serialize_metadata ~status ~headers in
224
-
let metadata_source = Eio.Flow.string_source metadata in
225
-
Cacheio.put cache ~key:metadata_key ~source:metadata_source ~ttl ();
227
-
let body_key = key ^ ".body" in
228
-
let body_source = Eio.Flow.string_source body in
229
-
Cacheio.put cache ~key:body_key ~source:body_source ~ttl ()
232
-
let cached_resp = { status; headers; body } in
233
-
let expiry = Unix.gettimeofday () +. Option.value ttl ~default:3600.0 in
234
-
Hashtbl.replace t.memory_cache key (cached_resp, expiry)
236
-
let put_stream t ~method_ ~url ~request_headers ~status ~headers ~body_source ~ttl =
237
-
if not t.enabled then ()
238
-
else if is_cacheable ~method_ ~status ~headers then
239
-
let key = make_cache_key ~method_ ~url ~headers:request_headers in
241
-
Log.debug (fun m -> m "Caching streamed response for %s (ttl: %s)"
242
-
(Uri.to_string url)
243
-
(match ttl with Some t -> Printf.sprintf "%.0fs" t | None -> "3600s"));
245
-
match t.cacheio with
247
-
Eio.Switch.run @@ fun _sw ->
249
-
(* Store metadata *)
250
-
let metadata_key = key ^ ".meta" in
251
-
let metadata = serialize_metadata ~status ~headers in
252
-
let metadata_source = Eio.Flow.string_source metadata in
253
-
Cacheio.put cache ~key:metadata_key ~source:metadata_source ~ttl ();
255
-
(* Store body directly from source *)
256
-
let body_key = key ^ ".body" in
257
-
Cacheio.put cache ~key:body_key ~source:body_source ~ttl ()
260
-
module Range = struct
263
-
end_ : int64 option; (* None means to end of file *)
266
-
let of_header header =
267
-
(* Parse Range: bytes=start-end *)
268
-
let prefix = "bytes=" in
269
-
let prefix_len = String.length prefix in
270
-
if String.length header >= prefix_len &&
271
-
String.sub header 0 prefix_len = prefix then
272
-
let range_str = String.sub header prefix_len (String.length header - prefix_len) in
273
-
match String.split_on_char '-' range_str with
275
-
(* bytes=N- means from N to end *)
276
-
(try Some { start = Int64.of_string start; end_ = None }
281
-
start = Int64.of_string start;
282
-
end_ = Some (Int64.of_string end_)
290
-
| None -> Printf.sprintf "bytes=%Ld-" t.start
291
-
| Some e -> Printf.sprintf "bytes=%Ld-%Ld" t.start e
293
-
let to_cacheio_range t ~total_size =
294
-
let end_ = match t.end_ with
295
-
| None -> Int64.pred total_size
296
-
| Some e -> min e (Int64.pred total_size)
298
-
(* Convert to Cacheio.Range.t *)
299
-
Cacheio.Range.create ~start:t.start ~end_
302
-
let download_range t ~sw ~url ~range ~on_chunk =
303
-
let range_header = Range.to_header range in
304
-
Log.debug (fun m -> m "Range request for %s: %s"
305
-
(Uri.to_string url) range_header);
307
-
match t.cacheio with
309
-
let key = Uri.to_string url in
310
-
let cacheio_range = Range.to_cacheio_range range ~total_size:Int64.max_int in
312
-
(match Cacheio.get_range cache ~key ~range:cacheio_range ~sw with
313
-
| `Complete source ->
314
-
let rec read_chunks () =
315
-
let chunk = Cstruct.create 8192 in
317
-
let n = Eio.Flow.single_read source chunk in
318
-
if n > 0 then begin
319
-
on_chunk (Cstruct.to_string ~off:0 ~len:n chunk);
322
-
with End_of_file -> ()
326
-
| `Chunks chunk_sources ->
327
-
List.iter (fun (_range, source) ->
328
-
let rec read_chunk () =
329
-
let chunk = Cstruct.create 8192 in
331
-
let n = Eio.Flow.single_read source chunk in
332
-
if n > 0 then begin
333
-
on_chunk (Cstruct.to_string ~off:0 ~len:n chunk);
336
-
with End_of_file -> ()
341
-
| `Not_found -> None)
344
-
let put_chunk t ~url ~range ~data =
345
-
if not t.enabled || not t.cache_range_requests then ()
347
-
match t.cacheio with
349
-
let key = Uri.to_string url in
350
-
let cacheio_range = Range.to_cacheio_range range ~total_size:Int64.max_int in
351
-
Eio.Switch.run @@ fun _sw ->
352
-
let source = Eio.Flow.string_source data in
353
-
Cacheio.put_chunk cache ~key ~range:cacheio_range ~source ()
355
-
Log.debug (fun m -> m "Cannot cache chunk for %s: no cacheio backend"
356
-
(Uri.to_string url))
358
-
let has_complete t ~url ~total_size =
359
-
if not t.enabled then false
361
-
match t.cacheio with
363
-
let key = Uri.to_string url in
364
-
Cacheio.has_complete_chunks cache ~key ~total_size
367
-
let missing_ranges t ~url ~total_size =
368
-
if not t.enabled then
369
-
[{ Range.start = 0L; end_ = Some (Int64.pred total_size) }]
371
-
match t.cacheio with
373
-
let key = Uri.to_string url in
374
-
let cacheio_ranges = Cacheio.missing_ranges cache ~key ~total_size in
376
-
{ Range.start = Cacheio.Range.start r;
377
-
end_ = Some (Cacheio.Range.end_ r) }
380
-
[{ Range.start = 0L; end_ = Some (Int64.pred total_size) }]
382
-
let coalesce_chunks t ~url =
383
-
if not t.enabled then false
385
-
match t.cacheio with
387
-
let key = Uri.to_string url in
388
-
let promise = Cacheio.coalesce_chunks cache ~key ~verify:true () in
389
-
(match Eio.Promise.await promise with
391
-
Log.info (fun m -> m "Successfully coalesced chunks for %s" key);
394
-
Log.warn (fun m -> m "Failed to coalesce chunks for %s: %s"
395
-
key (Printexc.to_string exn));
400
-
if not t.enabled then ()
402
-
let key = make_cache_key ~method_:`GET ~url ~headers:(Cohttp.Header.init ()) in
403
-
(match t.cacheio with
405
-
Cacheio.delete cache ~key:(key ^ ".meta");
406
-
Cacheio.delete cache ~key:(key ^ ".body")
408
-
Log.debug (fun m -> m "Evicting cache for %s" (Uri.to_string url));
409
-
Hashtbl.remove t.memory_cache key
412
-
Log.info (fun m -> m "Clearing entire cache");
413
-
(match t.cacheio with
414
-
| Some cache -> Cacheio.clear cache
416
-
Hashtbl.clear t.memory_cache
418
-
module Stats = struct
419
-
type cacheio_stats = {
420
-
total_entries : int;
422
-
expired_entries : int;
423
-
pinned_entries : int;
424
-
temporary_entries : int;
428
-
memory_cache_entries : int;
429
-
cache_backend : string;
431
-
cache_get_requests : bool;
432
-
cache_range_requests : bool;
433
-
cacheio_stats : cacheio_stats option;
436
-
let make_cacheio_stats total_entries total_bytes expired_entries pinned_entries temporary_entries =
437
-
{ total_entries; total_bytes; expired_entries; pinned_entries; temporary_entries }
439
-
let make memory_cache_entries cache_backend enabled cache_get_requests cache_range_requests cacheio_stats =
440
-
{ memory_cache_entries; cache_backend; enabled; cache_get_requests; cache_range_requests; cacheio_stats }
442
-
let cacheio_stats_jsont =
443
-
Jsont.Object.map ~kind:"CacheioStats" make_cacheio_stats
444
-
|> Jsont.Object.mem "total_entries" Jsont.int ~enc:(fun t -> t.total_entries)
445
-
|> Jsont.Object.mem "total_bytes" Jsont.int ~enc:(fun t -> t.total_bytes)
446
-
|> Jsont.Object.mem "expired_entries" Jsont.int ~enc:(fun t -> t.expired_entries)
447
-
|> Jsont.Object.mem "pinned_entries" Jsont.int ~enc:(fun t -> t.pinned_entries)
448
-
|> Jsont.Object.mem "temporary_entries" Jsont.int ~enc:(fun t -> t.temporary_entries)
449
-
|> Jsont.Object.finish
452
-
Jsont.Object.map ~kind:"CacheStats" make
453
-
|> Jsont.Object.mem "memory_cache_entries" Jsont.int ~enc:(fun t -> t.memory_cache_entries)
454
-
|> Jsont.Object.mem "cache_backend" Jsont.string ~enc:(fun t -> t.cache_backend)
455
-
|> Jsont.Object.mem "enabled" Jsont.bool ~enc:(fun t -> t.enabled)
456
-
|> Jsont.Object.mem "cache_get_requests" Jsont.bool ~enc:(fun t -> t.cache_get_requests)
457
-
|> Jsont.Object.mem "cache_range_requests" Jsont.bool ~enc:(fun t -> t.cache_range_requests)
458
-
|> Jsont.Object.opt_mem "cacheio_stats" cacheio_stats_jsont ~enc:(fun t -> t.cacheio_stats)
459
-
|> Jsont.Object.finish
462
-
match Jsont_bytesrw.encode_string' ~format:Jsont.Indent t_jsont t with
465
-
let msg = Jsont.Error.to_string e in
466
-
failwith (Printf.sprintf "Failed to encode stats: %s" msg)
470
-
let cacheio_stats =
471
-
match t.cacheio with
473
-
let stats = Cacheio.stats cache in
474
-
Some (Stats.make_cacheio_stats
475
-
(Cacheio.Stats.entry_count stats)
476
-
(Int64.to_int (Cacheio.Stats.total_size stats))
477
-
(Cacheio.Stats.expired_count stats)
478
-
(Cacheio.Stats.pinned_count stats)
479
-
(Cacheio.Stats.temporary_count stats))
483
-
(Hashtbl.length t.memory_cache)
484
-
(if Option.is_some t.cacheio then "cacheio" else "memory")
486
-
t.cache_get_requests
487
-
t.cache_range_requests