My agentic slop goes here. Not intended for anyone else!
1(** Email header field implementation with structured parsing.
2
3 This module implements email header field types and operations as specified in
4 RFC 8621 Section 4.1.2 and 4.1.3. It provides parsing, validation, and conversion
5 functions for header fields with support for multiple access patterns including
6 Raw, Text, Addresses, GroupedAddresses, MessageIds, Date, and URLs.
7
8 @see <https://www.rfc-editor.org/rfc/rfc8621.html#section-4.1.2> RFC 8621, Section 4.1.2 - Header Field Forms
9 @see <https://www.rfc-editor.org/rfc/rfc8621.html#section-4.1.3> RFC 8621, Section 4.1.3 - Header Field Properties
10*)
11
12type t = {
13 name : string;
14 value : string;
15}
16
17let name t = t.name
18let value t = t.value
19
20let validate_name name =
21 if name = "" then
22 Error "Header field name cannot be empty"
23 else if String.contains name ':' then
24 Error "Header field name cannot contain colon character"
25 else if String.contains name ' ' then
26 Error "Header field name cannot contain spaces"
27 else if String.contains name '\t' then
28 Error "Header field name cannot contain tab characters"
29 else
30 let is_valid_char c =
31 let code = Char.code c in
32 code >= 33 && code <= 126 && c <> ':'
33 in
34 let rec check_chars i =
35 if i >= String.length name then
36 Ok ()
37 else if is_valid_char name.[i] then
38 check_chars (i + 1)
39 else
40 Error (Printf.sprintf "Header field name contains invalid character: %C" name.[i])
41 in
42 check_chars 0
43
44let create ~name ~value () =
45 match validate_name name with
46 | Ok () -> Ok { name; value }
47 | Error msg -> Error msg
48
49let create_unsafe ~name ~value () =
50 { name; value }
51
52let to_json t =
53 `Assoc [
54 ("name", `String t.name);
55 ("value", `String t.value)
56 ]
57
58let of_json = function
59 | `Assoc fields ->
60 (match List.assoc_opt "name" fields, List.assoc_opt "value" fields with
61 | Some (`String name), Some (`String value) ->
62 Ok { name; value }
63 | Some (`String _), _ ->
64 Error "Header field JSON missing or invalid 'value' field"
65 | _, Some (`String _) ->
66 Error "Header field JSON missing or invalid 'name' field"
67 | _ ->
68 Error "Header field JSON missing both 'name' and 'value' fields")
69 | _ ->
70 Error "Header field JSON must be an object"
71
72let list_to_json headers =
73 `List (List.map to_json headers)
74
75let list_of_json = function
76 | `List items ->
77 let rec parse_items acc errors = function
78 | [] when errors = [] -> Ok (List.rev acc)
79 | [] -> Error (String.concat "; " (List.rev errors))
80 | item :: rest ->
81 (match of_json item with
82 | Ok header -> parse_items (header :: acc) errors rest
83 | Error msg -> parse_items acc (msg :: errors) rest)
84 in
85 parse_items [] [] items
86 | _ ->
87 Error "Header field list JSON must be an array"
88
89let normalize_name name =
90 String.lowercase_ascii name
91
92let find_by_name headers name =
93 let target = normalize_name name in
94 List.find_opt (fun h -> normalize_name h.name = target) headers
95
96let find_all_by_name headers name =
97 let target = normalize_name name in
98 List.filter (fun h -> normalize_name h.name = target) headers
99
100let pp fmt t =
101 Format.fprintf fmt "%s: %s" t.name t.value
102
103let pp_hum fmt t = pp fmt t
104
105(** Structured header value types for different access patterns *)
106module Value = struct
107 (** Header value access patterns as defined in RFC 8621 Section 4.1.2 *)
108 type access_form =
109 | Raw (** Raw octets as they appear in the message *)
110 | Text (** Decoded and unfolded text *)
111 | Addresses (** Parsed email addresses *)
112 | GroupedAddresses (** Parsed addresses preserving group information *)
113 | MessageIds (** Parsed message ID list *)
114 | Date (** Parsed date value *)
115 | URLs (** Parsed URL list *)
116
117 (** Structured header value types *)
118 type parsed_value =
119 | Raw_value of string
120 | Text_value of string
121 | Addresses_value of Address.t list
122 | GroupedAddresses_value of Address.Group.t list
123 | MessageIds_value of string list
124 | Date_value of Jmap.Date.t
125 | URLs_value of string list
126
127 (** Parse error types *)
128 type parse_error =
129 | Invalid_encoding of string
130 | Malformed_header of string
131 | Unsupported_form of string * access_form
132 | Parse_failure of string
133end
134
135(** RFC 2047 encoded-word decoder *)
136module RFC2047 = struct
137 (** Decode RFC 2047 encoded words in header values *)
138 let decode_encoded_words (text : string) : string =
139 let re = Str.regexp "=\\?\\([^?]+\\)\\?\\([QB]\\)\\?\\([^?]*\\)\\?=" in
140 let decode_word _charset encoding encoded =
141 try
142 let decoded = match String.uppercase_ascii encoding with
143 | "Q" -> (* Quoted-printable decoding simplified *)
144 let s = Str.global_replace (Str.regexp "_") " " encoded in
145 let s = Str.global_replace (Str.regexp "=") "" s in (* Simplified *)
146 s
147 | "B" -> (* Base64 decoding - simplified implementation *)
148 (match Base64.decode encoded with
149 | Ok decoded -> decoded
150 | Error _ -> encoded)
151 | _ -> encoded
152 in
153 (* For now, just return decoded text - proper charset conversion would need external library *)
154 decoded
155 with _ -> encoded
156 in
157 Str.global_substitute re (fun s ->
158 let charset = Str.matched_group 1 s in
159 let encoding = Str.matched_group 2 s in
160 let encoded = Str.matched_group 3 s in
161 decode_word charset encoding encoded
162 ) text
163
164 (** Unfold header field lines according to RFC 5322 *)
165 let unfold (text : string) : string =
166 (* Replace CRLF followed by whitespace with single space *)
167 let text = Str.global_replace (Str.regexp "\r?\n[ \t]+") " " text in
168 (* Trim leading and trailing whitespace *)
169 String.trim text
170end
171
172(** Header field parsers for different access patterns *)
173module Parser = struct
174 open Value
175
176 (** Parse header as Raw form (RFC 8621 Section 4.1.2.1) *)
177 let as_raw (header : t) : (parsed_value, parse_error) result =
178 Ok (Raw_value (value header))
179
180 (** Parse header as Text form (RFC 8621 Section 4.1.2.2) *)
181 let as_text (header : t) : (parsed_value, parse_error) result =
182 try
183 let raw_value = value header in
184 let unfolded = RFC2047.unfold raw_value in
185 let decoded = RFC2047.decode_encoded_words unfolded in
186 let trimmed = String.trim decoded in
187 Ok (Text_value trimmed)
188 with exn ->
189 Error (Parse_failure ("Text parsing failed: " ^ Printexc.to_string exn))
190
191 (** Valid header fields for Text form according to RFC 8621 *)
192 let text_form_valid_headers = [
193 "subject"; "comments"; "keywords"; "list-id"
194 ]
195
196 (** Check if header can be parsed as Text form *)
197 let can_parse_as_text (header : t) : bool =
198 let header_name = String.lowercase_ascii (name header) in
199 List.mem header_name text_form_valid_headers ||
200 not (List.mem header_name ["from"; "to"; "cc"; "bcc"; "sender"; "reply-to"])
201
202 (** Parse email address from RFC 5322 mailbox syntax *)
203 let parse_mailbox (mailbox_str : string) : Address.t option =
204 let trimmed = String.trim mailbox_str in
205 (* Simple regex for basic email address parsing *)
206 let email_re = Str.regexp ".*<\\(.*@.*\\)>" in
207 let name_email_re = Str.regexp "\\(.*\\)[ \t]*<\\(.*@.*\\)>" in
208 let simple_email_re = Str.regexp "\\([^@ \t]+@[^@ \t]+\\)" in
209
210 if Str.string_match name_email_re trimmed 0 then
211 let name_part = String.trim (Str.matched_group 1 trimmed) in
212 let email_part = String.trim (Str.matched_group 2 trimmed) in
213 let clean_name = if name_part = "" then None else Some name_part in
214 Some (Address.create_unsafe ?name:clean_name ~email:email_part ())
215 else if Str.string_match email_re trimmed 0 then
216 let email_part = String.trim (Str.matched_group 1 trimmed) in
217 Some (Address.create_unsafe ~email:email_part ())
218 else if Str.string_match simple_email_re trimmed 0 then
219 let email_part = Str.matched_group 1 trimmed in
220 Some (Address.create_unsafe ~email:email_part ())
221 else
222 None
223
224 (** Parse header as Addresses form (RFC 8621 Section 4.1.2.3) *)
225 let as_addresses (header : t) : (parsed_value, parse_error) result =
226 try
227 let raw_value = value header in
228 let unfolded = RFC2047.unfold raw_value in
229 let decoded = RFC2047.decode_encoded_words unfolded in
230
231 (* Split by comma to get individual addresses *)
232 let address_parts = Str.split (Str.regexp "[ \t]*,[ \t]*") decoded in
233 let addresses = List.filter_map parse_mailbox address_parts in
234
235 Ok (Addresses_value addresses)
236 with exn ->
237 Error (Parse_failure ("Address parsing failed: " ^ Printexc.to_string exn))
238
239 (** Valid header fields for Addresses form according to RFC 8621 *)
240 let addresses_form_valid_headers = [
241 "from"; "sender"; "reply-to"; "to"; "cc"; "bcc";
242 "resent-from"; "resent-sender"; "resent-reply-to"; "resent-to"; "resent-cc"; "resent-bcc"
243 ]
244
245 (** Check if header can be parsed as Addresses form *)
246 let can_parse_as_addresses (header : t) : bool =
247 let header_name = String.lowercase_ascii (name header) in
248 List.mem header_name addresses_form_valid_headers
249
250 (** Parse header as GroupedAddresses form (RFC 8621 Section 4.1.2.4) *)
251 let as_grouped_addresses (header : t) : (parsed_value, parse_error) result =
252 try
253 let raw_value = value header in
254 let unfolded = RFC2047.unfold raw_value in
255 let decoded = RFC2047.decode_encoded_words unfolded in
256
257 (* For now, create a single group with all addresses - proper group parsing is complex *)
258 let address_parts = Str.split (Str.regexp "[ \t]*,[ \t]*") decoded in
259 let addresses = List.filter_map parse_mailbox address_parts in
260 let group = Address.Group.create ~addresses () in
261
262 Ok (GroupedAddresses_value [group])
263 with exn ->
264 Error (Parse_failure ("Grouped address parsing failed: " ^ Printexc.to_string exn))
265
266 (** Parse message ID from angle brackets *)
267 let parse_message_id (msg_id_str : string) : string option =
268 let trimmed = String.trim msg_id_str in
269 let msg_id_re = Str.regexp "<\\([^>]+\\)>" in
270 if Str.string_match msg_id_re trimmed 0 then
271 Some (Str.matched_group 1 trimmed)
272 else if not (String.contains trimmed '<') && not (String.contains trimmed '>') then
273 Some trimmed (* Message ID without brackets *)
274 else
275 None
276
277 (** Parse header as MessageIds form (RFC 8621 Section 4.1.2.5) *)
278 let as_message_ids (header : t) : (parsed_value, parse_error) result =
279 try
280 let raw_value = value header in
281 let unfolded = RFC2047.unfold raw_value in
282
283 (* Split by whitespace to get individual message IDs *)
284 let id_parts = Str.split (Str.regexp "[ \t\r\n]+") unfolded in
285 let message_ids = List.filter_map parse_message_id id_parts in
286
287 Ok (MessageIds_value message_ids)
288 with exn ->
289 Error (Parse_failure ("Message ID parsing failed: " ^ Printexc.to_string exn))
290
291 (** Valid header fields for MessageIds form according to RFC 8621 *)
292 let message_ids_form_valid_headers = [
293 "message-id"; "in-reply-to"; "references"
294 ]
295
296 (** Check if header can be parsed as MessageIds form *)
297 let can_parse_as_message_ids (header : t) : bool =
298 let header_name = String.lowercase_ascii (name header) in
299 List.mem header_name message_ids_form_valid_headers
300
301 (** Parse RFC 5322 date-time *)
302 let parse_date_time (date_str : string) : float option =
303 let trimmed = String.trim date_str in
304 (* Simple ISO 8601 parsing - more complex RFC 5322 parsing would need external library *)
305 try
306 (* Try ISO format first *)
307 if Str.string_match (Str.regexp "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]T[0-9][0-9]:[0-9][0-9]:[0-9][0-9]Z") trimmed 0 then
308 let tm = Scanf.sscanf trimmed "%04d-%02d-%02dT%02d:%02d:%02dZ"
309 (fun y m d h min sec ->
310 {Unix.tm_year = y - 1900; tm_mon = m - 1; tm_mday = d;
311 tm_hour = h; tm_min = min; tm_sec = sec; tm_wday = 0;
312 tm_yday = 0; tm_isdst = false}) in
313 Some (fst (Unix.mktime tm))
314 else
315 (* Fall back to Unix.strptime if available, or return None *)
316 None
317 with _ -> None
318
319 (** Parse header as Date form (RFC 8621 Section 4.1.2.6) *)
320 let as_date (header : t) : (parsed_value, parse_error) result =
321 try
322 let raw_value = value header in
323 let unfolded = RFC2047.unfold raw_value in
324
325 match parse_date_time unfolded with
326 | Some timestamp -> Ok (Date_value (Jmap.Date.of_timestamp timestamp))
327 | None -> Error (Parse_failure "Date parsing failed")
328 with exn ->
329 Error (Parse_failure ("Date parsing failed: " ^ Printexc.to_string exn))
330
331 (** Valid header fields for Date form according to RFC 8621 *)
332 let date_form_valid_headers = [
333 "date"; "resent-date"; "delivery-date"
334 ]
335
336 (** Check if header can be parsed as Date form *)
337 let can_parse_as_date (header : t) : bool =
338 let header_name = String.lowercase_ascii (name header) in
339 List.mem header_name date_form_valid_headers
340
341 (** Parse URL from angle brackets *)
342 let parse_url (url_str : string) : string option =
343 let trimmed = String.trim url_str in
344 let url_re = Str.regexp "<\\([^>]+\\)>" in
345 if Str.string_match url_re trimmed 0 then
346 Some (Str.matched_group 1 trimmed)
347 else if String.contains trimmed ':' then
348 Some trimmed (* URL without brackets *)
349 else
350 None
351
352 (** Parse header as URLs form (RFC 8621 Section 4.1.2.7) *)
353 let as_urls (header : t) : (parsed_value, parse_error) result =
354 try
355 let raw_value = value header in
356 let unfolded = RFC2047.unfold raw_value in
357
358 (* Split by comma to get individual URLs *)
359 let url_parts = Str.split (Str.regexp "[ \t]*,[ \t]*") unfolded in
360 let urls = List.filter_map parse_url url_parts in
361
362 Ok (URLs_value urls)
363 with exn ->
364 Error (Parse_failure ("URL parsing failed: " ^ Printexc.to_string exn))
365
366 (** Valid header fields for URLs form according to RFC 8621 *)
367 let urls_form_valid_headers = [
368 "list-archive"; "list-help"; "list-id"; "list-post"; "list-subscribe"; "list-unsubscribe"
369 ]
370
371 (** Check if header can be parsed as URLs form *)
372 let can_parse_as_urls (header : t) : bool =
373 let header_name = String.lowercase_ascii (name header) in
374 List.mem header_name urls_form_valid_headers
375end
376
377(** High-level header access pattern functions *)
378
379(** Get header value as Raw form - always succeeds *)
380let as_raw (header : t) : string =
381 value header
382
383(** Get header value as Text form with RFC 2047 decoding and unfolding *)
384let as_text (header : t) : (string, Value.parse_error) result =
385 if not (Parser.can_parse_as_text header) then
386 Error (Value.Unsupported_form (name header, Value.Text))
387 else
388 match Parser.as_text header with
389 | Ok (Value.Text_value text) -> Ok text
390 | Ok _ -> Error (Value.Parse_failure "Invalid parsed value type")
391 | Error err -> Error err
392
393(** Get header value as list of parsed email addresses *)
394let as_addresses (header : t) : (Address.t list, Value.parse_error) result =
395 if not (Parser.can_parse_as_addresses header) then
396 Error (Value.Unsupported_form (name header, Value.Addresses))
397 else
398 match Parser.as_addresses header with
399 | Ok (Value.Addresses_value addrs) -> Ok addrs
400 | Ok _ -> Error (Value.Parse_failure "Invalid parsed value type")
401 | Error err -> Error err
402
403(** Get header value as list of grouped addresses *)
404let as_grouped_addresses (header : t) : (Address.Group.t list, Value.parse_error) result =
405 if not (Parser.can_parse_as_addresses header) then
406 Error (Value.Unsupported_form (name header, Value.GroupedAddresses))
407 else
408 match Parser.as_grouped_addresses header with
409 | Ok (Value.GroupedAddresses_value groups) -> Ok groups
410 | Ok _ -> Error (Value.Parse_failure "Invalid parsed value type")
411 | Error err -> Error err
412
413(** Get header value as list of message IDs *)
414let as_message_ids (header : t) : (string list, Value.parse_error) result =
415 if not (Parser.can_parse_as_message_ids header) then
416 Error (Value.Unsupported_form (name header, Value.MessageIds))
417 else
418 match Parser.as_message_ids header with
419 | Ok (Value.MessageIds_value ids) -> Ok ids
420 | Ok _ -> Error (Value.Parse_failure "Invalid parsed value type")
421 | Error err -> Error err
422
423(** Get header value as parsed date *)
424let as_date (header : t) : (Jmap.Date.t, Value.parse_error) result =
425 if not (Parser.can_parse_as_date header) then
426 Error (Value.Unsupported_form (name header, Value.Date))
427 else
428 match Parser.as_date header with
429 | Ok (Value.Date_value date) -> Ok date
430 | Ok _ -> Error (Value.Parse_failure "Invalid parsed value type")
431 | Error err -> Error err
432
433(** Get header value as list of URLs *)
434let as_urls (header : t) : (string list, Value.parse_error) result =
435 if not (Parser.can_parse_as_urls header) then
436 Error (Value.Unsupported_form (name header, Value.URLs))
437 else
438 match Parser.as_urls header with
439 | Ok (Value.URLs_value urls) -> Ok urls
440 | Ok _ -> Error (Value.Parse_failure "Invalid parsed value type")
441 | Error err -> Error err
442
443(** Get header value in the specified access form *)
444let parse_as (header : t) (form : Value.access_form) : (Value.parsed_value, Value.parse_error) result =
445 match form with
446 | Value.Raw -> Parser.as_raw header
447 | Value.Text -> Parser.as_text header
448 | Value.Addresses -> Parser.as_addresses header
449 | Value.GroupedAddresses -> Parser.as_grouped_addresses header
450 | Value.MessageIds -> Parser.as_message_ids header
451 | Value.Date -> Parser.as_date header
452 | Value.URLs -> Parser.as_urls header
453
454(** Utility functions for working with header lists *)
455
456(** Find header and parse as Text form *)
457let find_and_parse_as_text (headers : t list) (header_name : string) : string option =
458 match find_by_name headers header_name with
459 | Some header ->
460 (match as_text header with
461 | Ok text -> Some text
462 | Error _ -> None)
463 | None -> None
464
465(** Find header and parse as addresses *)
466let find_and_parse_as_addresses (headers : t list) (header_name : string) : Address.t list option =
467 match find_by_name headers header_name with
468 | Some header ->
469 (match as_addresses header with
470 | Ok addrs -> Some addrs
471 | Error _ -> None)
472 | None -> None
473
474(** Find header and parse as message IDs *)
475let find_and_parse_as_message_ids (headers : t list) (header_name : string) : string list option =
476 match find_by_name headers header_name with
477 | Some header ->
478 (match as_message_ids header with
479 | Ok ids -> Some ids
480 | Error _ -> None)
481 | None -> None
482
483(** Find header and parse as date *)
484let find_and_parse_as_date (headers : t list) (header_name : string) : Jmap.Date.t option =
485 match find_by_name headers header_name with
486 | Some header ->
487 (match as_date header with
488 | Ok date -> Some date
489 | Error _ -> None)
490 | None -> None