OCaml HTTP cookie handling library with support for Eio-based storage jars
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** Cookie jar for storing and managing HTTP cookies.
7
8 This module provides a complete cookie jar implementation following
9 {{:https://datatracker.ietf.org/doc/html/rfc6265} RFC 6265} while
10 integrating Eio for efficient asynchronous operations.
11
12 A cookie jar maintains a collection of cookies with automatic cleanup of
13 expired entries. It implements the standard browser behavior for cookie
14 storage, including:
15 - Automatic removal of expired cookies
16 - Domain and path-based cookie retrieval per
17 {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.4} Section 5.4}
18 - Delta tracking for Set-Cookie headers
19 - Mozilla format persistence for cross-tool compatibility
20
21 @see <https://datatracker.ietf.org/doc/html/rfc6265> RFC 6265 - HTTP State Management Mechanism
22
23 {2 Standards and References}
24
25 This cookie jar implements the storage model from:
26
27 {ul
28 {- {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.3}RFC 6265 Section 5.3} -
29 Storage Model - Cookie insertion, replacement, and expiration}
30 {- {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.4}RFC 6265 Section 5.4} -
31 The Cookie Header - Cookie retrieval and ordering}}
32
33 Key RFC 6265 requirements implemented:
34 {ul
35 {- Domain matching per {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.1.3}Section 5.1.3}}
36 {- Path matching per {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.1.4}Section 5.1.4}}
37 {- Cookie ordering per {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.4}Section 5.4 Step 2}}
38 {- Creation time preservation per {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.3}Section 5.3 Step 11.3}}}
39
40 {2 Related Libraries}
41
42 {ul
43 {- {!Cookeio} - HTTP cookie parsing, validation, and serialization}
44 {- [Requests] - HTTP client that uses this jar for cookie persistence}
45 {- [Xdge] - XDG Base Directory support for cookie file paths}} *)
46
47type t
48(** Cookie jar for storing and managing cookies.
49
50 A cookie jar maintains a collection of cookies with automatic cleanup of
51 expired entries and enforcement of storage limits. It implements the
52 standard browser behavior for cookie storage per
53 {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.3} RFC 6265 Section 5.3}. *)
54
55(** {1 Cookie Jar Creation and Loading} *)
56
57val create : unit -> t
58(** Create an empty cookie jar. *)
59
60val load : clock:_ Eio.Time.clock -> Eio.Fs.dir_ty Eio.Path.t -> t
61(** Load cookies from Mozilla format file.
62
63 Loads cookies from a file in Mozilla format, using the provided clock to set
64 creation and last access times. Returns an empty jar if the file doesn't
65 exist or cannot be loaded. *)
66
67val save : Eio.Fs.dir_ty Eio.Path.t -> t -> unit
68(** Save cookies to Mozilla format file. *)
69
70(** {1 Cookie Jar Management} *)
71
72val add_cookie : t -> Cookeio.t -> unit
73(** Add a cookie to the jar.
74
75 The cookie is added to the delta, meaning it will appear in Set-Cookie
76 headers when calling {!delta}. If a cookie with the same name/domain/path
77 exists, it will be replaced per
78 {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.3} RFC 6265 Section 5.3}.
79
80 Per Section 5.3, Step 11.3, when replacing an existing cookie, the original
81 creation-time is preserved. This ensures stable cookie ordering per
82 Section 5.4, Step 2.
83
84 @see <https://datatracker.ietf.org/doc/html/rfc6265#section-5.3> RFC 6265 Section 5.3 - Storage Model *)
85
86val add_original : t -> Cookeio.t -> unit
87(** Add an original cookie to the jar.
88
89 Original cookies are those received from the client (via Cookie header).
90 They do not appear in the delta. This method should be used when loading
91 cookies from incoming HTTP requests.
92
93 Per Section 5.3, Step 11.3, when replacing an existing cookie, the original
94 creation-time is preserved.
95
96 @see <https://datatracker.ietf.org/doc/html/rfc6265#section-5.3> RFC 6265 Section 5.3 - Storage Model *)
97
98val delta : t -> Cookeio.t list
99(** Get cookies that need to be sent in Set-Cookie headers.
100
101 Returns cookies that have been added via {!add_cookie} and removal cookies
102 for original cookies that have been removed. Does not include original
103 cookies that were added via {!add_original}.
104
105 @see <https://datatracker.ietf.org/doc/html/rfc6265#section-4.1> RFC 6265 Section 4.1 - Set-Cookie *)
106
107val remove : t -> clock:_ Eio.Time.clock -> Cookeio.t -> unit
108(** Remove a cookie from the jar.
109
110 If an original cookie with the same name/domain/path exists, creates a
111 removal cookie (empty value, Max-Age=0, past expiration) that appears in the
112 delta. If only a delta cookie exists, simply removes it from the delta.
113
114 Per {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.3} RFC 6265 Section 5.3},
115 cookies are removed by sending a Set-Cookie with an expiry date in the past.
116
117 @see <https://datatracker.ietf.org/doc/html/rfc6265#section-5.3> RFC 6265 Section 5.3 - Storage Model *)
118
119val get_cookies :
120 t ->
121 clock:_ Eio.Time.clock ->
122 domain:string ->
123 path:string ->
124 is_secure:bool ->
125 Cookeio.t list
126(** Get cookies applicable for a URL.
127
128 Implements the cookie retrieval algorithm from
129 {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.4}RFC 6265 Section 5.4}
130 for generating the Cookie header.
131
132 {3 Algorithm}
133
134 Per RFC 6265 Section 5.4, the user agent should:
135 1. Filter cookies by domain matching (Section 5.1.3)
136 2. Filter cookies by path matching (Section 5.1.4)
137 3. Filter out cookies with Secure attribute when request is non-secure
138 4. Filter out expired cookies
139 5. Sort remaining cookies (longer paths first, then by creation time)
140 6. Update last-access-time for retrieved cookies
141
142 This function implements all these steps, combining original and delta cookies
143 with delta taking precedence. Excludes:
144 - Removal cookies (empty value)
145 - Expired cookies (expiry-time in the past per Section 5.3)
146 - Secure cookies when [is_secure = false]
147
148 {3 Cookie Ordering}
149
150 Cookies are sorted per Section 5.4, Step 2:
151 - Cookies with longer paths are listed before cookies with shorter paths
152 - Among cookies with equal-length paths, cookies with earlier creation-times
153 are listed first
154
155 This ordering ensures more specific cookies take precedence.
156
157 {3 Matching Rules}
158
159 Domain matching follows {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.1.3} Section 5.1.3}:
160 - IP addresses require exact match only
161 - Hostnames support subdomain matching unless host-only flag is set
162
163 Path matching follows {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.1.4} Section 5.1.4}.
164
165 @param t Cookie jar
166 @param clock Clock for updating last-access-time
167 @param domain Request domain
168 @param path Request path
169 @param is_secure Whether the request is over a secure channel (HTTPS)
170 @return List of matching cookies, sorted per RFC 6265
171
172 @see <https://datatracker.ietf.org/doc/html/rfc6265#section-5.3> RFC 6265 Section 5.3 - Storage Model (expiry)
173 @see <https://datatracker.ietf.org/doc/html/rfc6265#section-5.4> RFC 6265 Section 5.4 - The Cookie Header *)
174
175val clear : t -> unit
176(** Clear all cookies. *)
177
178val clear_expired : t -> clock:_ Eio.Time.clock -> unit
179(** Clear expired cookies.
180
181 Removes cookies whose expiry-time is in the past per
182 {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.3} RFC 6265 Section 5.3}. *)
183
184val clear_session_cookies : t -> unit
185(** Clear session cookies.
186
187 Removes cookies that have no Expires or Max-Age attribute (session cookies).
188 Per {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.3} RFC 6265 Section 5.3},
189 these cookies are normally removed when the user agent "session" ends. *)
190
191val count : t -> int
192(** Get the number of unique cookies in the jar. *)
193
194val get_all_cookies : t -> Cookeio.t list
195(** Get all cookies in the jar.
196
197 Returns all cookies including expired ones (for inspection/debugging).
198 Use {!get_cookies} with appropriate domain/path for filtered results that
199 exclude expired cookies, or call {!clear_expired} first. *)
200
201val is_empty : t -> bool
202(** Check if the jar is empty. *)
203
204(** {1 Pretty Printing} *)
205
206val pp : Format.formatter -> t -> unit
207(** Pretty print a cookie jar. *)
208
209(** {1 Mozilla Format} *)
210
211val to_mozilla_format : t -> string
212(** Serialize cookies in Mozilla/Netscape cookie format.
213
214 The Mozilla format uses tab-separated fields:
215 {[domain \t include_subdomains \t path \t secure \t expires \t name \t value]}
216
217 The [include_subdomains] field corresponds to the inverse of the
218 {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.3} host-only-flag}
219 in RFC 6265. *)
220
221val from_mozilla_format : clock:_ Eio.Time.clock -> string -> t
222(** Parse Mozilla format cookies.
223
224 Creates a cookie jar from a string in Mozilla cookie format, using the
225 provided clock to set creation and last access times. The [include_subdomains]
226 field is mapped to the host-only-flag per
227 {{:https://datatracker.ietf.org/doc/html/rfc6265#section-5.3} RFC 6265 Section 5.3}. *)