My agentic slop goes here. Not intended for anyone else!
1(** Email header field implementation with structured parsing. 2 3 This module implements email header field types and operations as specified in 4 RFC 8621 Section 4.1.2 and 4.1.3. It provides parsing, validation, and conversion 5 functions for header fields with support for multiple access patterns including 6 Raw, Text, Addresses, GroupedAddresses, MessageIds, Date, and URLs. 7 8 @see <https://www.rfc-editor.org/rfc/rfc8621.html#section-4.1.2> RFC 8621, Section 4.1.2 - Header Field Forms 9 @see <https://www.rfc-editor.org/rfc/rfc8621.html#section-4.1.3> RFC 8621, Section 4.1.3 - Header Field Properties 10*) 11 12type t = { 13 name : string; 14 value : string; 15} 16 17let name t = t.name 18let value t = t.value 19 20let validate_name name = 21 if name = "" then 22 Error "Header field name cannot be empty" 23 else if String.contains name ':' then 24 Error "Header field name cannot contain colon character" 25 else if String.contains name ' ' then 26 Error "Header field name cannot contain spaces" 27 else if String.contains name '\t' then 28 Error "Header field name cannot contain tab characters" 29 else 30 let is_valid_char c = 31 let code = Char.code c in 32 code >= 33 && code <= 126 && c <> ':' 33 in 34 let rec check_chars i = 35 if i >= String.length name then 36 Ok () 37 else if is_valid_char name.[i] then 38 check_chars (i + 1) 39 else 40 Error (Printf.sprintf "Header field name contains invalid character: %C" name.[i]) 41 in 42 check_chars 0 43 44let create ~name ~value () = 45 match validate_name name with 46 | Ok () -> Ok { name; value } 47 | Error msg -> Error msg 48 49let create_unsafe ~name ~value () = 50 { name; value } 51 52let to_json t = 53 `Assoc [ 54 ("name", `String t.name); 55 ("value", `String t.value) 56 ] 57 58let of_json = function 59 | `Assoc fields -> 60 (match List.assoc_opt "name" fields, List.assoc_opt "value" fields with 61 | Some (`String name), Some (`String value) -> 62 Ok { name; value } 63 | Some (`String _), _ -> 64 Error "Header field JSON missing or invalid 'value' field" 65 | _, Some (`String _) -> 66 Error "Header field JSON missing or invalid 'name' field" 67 | _ -> 68 Error "Header field JSON missing both 'name' and 'value' fields") 69 | _ -> 70 Error "Header field JSON must be an object" 71 72let list_to_json headers = 73 `List (List.map to_json headers) 74 75let list_of_json = function 76 | `List items -> 77 let rec parse_items acc errors = function 78 | [] when errors = [] -> Ok (List.rev acc) 79 | [] -> Error (String.concat "; " (List.rev errors)) 80 | item :: rest -> 81 (match of_json item with 82 | Ok header -> parse_items (header :: acc) errors rest 83 | Error msg -> parse_items acc (msg :: errors) rest) 84 in 85 parse_items [] [] items 86 | _ -> 87 Error "Header field list JSON must be an array" 88 89let normalize_name name = 90 String.lowercase_ascii name 91 92let find_by_name headers name = 93 let target = normalize_name name in 94 List.find_opt (fun h -> normalize_name h.name = target) headers 95 96let find_all_by_name headers name = 97 let target = normalize_name name in 98 List.filter (fun h -> normalize_name h.name = target) headers 99 100let pp fmt t = 101 Format.fprintf fmt "%s: %s" t.name t.value 102 103let pp_hum fmt t = pp fmt t 104 105(** Structured header value types for different access patterns *) 106module Value = struct 107 (** Header value access patterns as defined in RFC 8621 Section 4.1.2 *) 108 type access_form = 109 | Raw (** Raw octets as they appear in the message *) 110 | Text (** Decoded and unfolded text *) 111 | Addresses (** Parsed email addresses *) 112 | GroupedAddresses (** Parsed addresses preserving group information *) 113 | MessageIds (** Parsed message ID list *) 114 | Date (** Parsed date value *) 115 | URLs (** Parsed URL list *) 116 117 (** Structured header value types *) 118 type parsed_value = 119 | Raw_value of string 120 | Text_value of string 121 | Addresses_value of Address.t list 122 | GroupedAddresses_value of Address.Group.t list 123 | MessageIds_value of string list 124 | Date_value of Jmap.Date.t 125 | URLs_value of string list 126 127 (** Parse error types *) 128 type parse_error = 129 | Invalid_encoding of string 130 | Malformed_header of string 131 | Unsupported_form of string * access_form 132 | Parse_failure of string 133end 134 135(** RFC 2047 encoded-word decoder *) 136module RFC2047 = struct 137 (** Decode RFC 2047 encoded words in header values *) 138 let decode_encoded_words (text : string) : string = 139 let re = Str.regexp "=\\?\\([^?]+\\)\\?\\([QB]\\)\\?\\([^?]*\\)\\?=" in 140 let decode_word _charset encoding encoded = 141 try 142 let decoded = match String.uppercase_ascii encoding with 143 | "Q" -> (* Quoted-printable decoding simplified *) 144 let s = Str.global_replace (Str.regexp "_") " " encoded in 145 let s = Str.global_replace (Str.regexp "=") "" s in (* Simplified *) 146 s 147 | "B" -> (* Base64 decoding - simplified implementation *) 148 (match Base64.decode encoded with 149 | Ok decoded -> decoded 150 | Error _ -> encoded) 151 | _ -> encoded 152 in 153 (* For now, just return decoded text - proper charset conversion would need external library *) 154 decoded 155 with _ -> encoded 156 in 157 Str.global_substitute re (fun s -> 158 let charset = Str.matched_group 1 s in 159 let encoding = Str.matched_group 2 s in 160 let encoded = Str.matched_group 3 s in 161 decode_word charset encoding encoded 162 ) text 163 164 (** Unfold header field lines according to RFC 5322 *) 165 let unfold (text : string) : string = 166 (* Replace CRLF followed by whitespace with single space *) 167 let text = Str.global_replace (Str.regexp "\r?\n[ \t]+") " " text in 168 (* Trim leading and trailing whitespace *) 169 String.trim text 170end 171 172(** Header field parsers for different access patterns *) 173module Parser = struct 174 open Value 175 176 (** Parse header as Raw form (RFC 8621 Section 4.1.2.1) *) 177 let as_raw (header : t) : (parsed_value, parse_error) result = 178 Ok (Raw_value (value header)) 179 180 (** Parse header as Text form (RFC 8621 Section 4.1.2.2) *) 181 let as_text (header : t) : (parsed_value, parse_error) result = 182 try 183 let raw_value = value header in 184 let unfolded = RFC2047.unfold raw_value in 185 let decoded = RFC2047.decode_encoded_words unfolded in 186 let trimmed = String.trim decoded in 187 Ok (Text_value trimmed) 188 with exn -> 189 Error (Parse_failure ("Text parsing failed: " ^ Printexc.to_string exn)) 190 191 (** Valid header fields for Text form according to RFC 8621 *) 192 let text_form_valid_headers = [ 193 "subject"; "comments"; "keywords"; "list-id" 194 ] 195 196 (** Check if header can be parsed as Text form *) 197 let can_parse_as_text (header : t) : bool = 198 let header_name = String.lowercase_ascii (name header) in 199 List.mem header_name text_form_valid_headers || 200 not (List.mem header_name ["from"; "to"; "cc"; "bcc"; "sender"; "reply-to"]) 201 202 (** Parse email address from RFC 5322 mailbox syntax *) 203 let parse_mailbox (mailbox_str : string) : Address.t option = 204 let trimmed = String.trim mailbox_str in 205 (* Simple regex for basic email address parsing *) 206 let email_re = Str.regexp ".*<\\(.*@.*\\)>" in 207 let name_email_re = Str.regexp "\\(.*\\)[ \t]*<\\(.*@.*\\)>" in 208 let simple_email_re = Str.regexp "\\([^@ \t]+@[^@ \t]+\\)" in 209 210 if Str.string_match name_email_re trimmed 0 then 211 let name_part = String.trim (Str.matched_group 1 trimmed) in 212 let email_part = String.trim (Str.matched_group 2 trimmed) in 213 let clean_name = if name_part = "" then None else Some name_part in 214 Some (Address.create_unsafe ?name:clean_name ~email:email_part ()) 215 else if Str.string_match email_re trimmed 0 then 216 let email_part = String.trim (Str.matched_group 1 trimmed) in 217 Some (Address.create_unsafe ~email:email_part ()) 218 else if Str.string_match simple_email_re trimmed 0 then 219 let email_part = Str.matched_group 1 trimmed in 220 Some (Address.create_unsafe ~email:email_part ()) 221 else 222 None 223 224 (** Parse header as Addresses form (RFC 8621 Section 4.1.2.3) *) 225 let as_addresses (header : t) : (parsed_value, parse_error) result = 226 try 227 let raw_value = value header in 228 let unfolded = RFC2047.unfold raw_value in 229 let decoded = RFC2047.decode_encoded_words unfolded in 230 231 (* Split by comma to get individual addresses *) 232 let address_parts = Str.split (Str.regexp "[ \t]*,[ \t]*") decoded in 233 let addresses = List.filter_map parse_mailbox address_parts in 234 235 Ok (Addresses_value addresses) 236 with exn -> 237 Error (Parse_failure ("Address parsing failed: " ^ Printexc.to_string exn)) 238 239 (** Valid header fields for Addresses form according to RFC 8621 *) 240 let addresses_form_valid_headers = [ 241 "from"; "sender"; "reply-to"; "to"; "cc"; "bcc"; 242 "resent-from"; "resent-sender"; "resent-reply-to"; "resent-to"; "resent-cc"; "resent-bcc" 243 ] 244 245 (** Check if header can be parsed as Addresses form *) 246 let can_parse_as_addresses (header : t) : bool = 247 let header_name = String.lowercase_ascii (name header) in 248 List.mem header_name addresses_form_valid_headers 249 250 (** Parse header as GroupedAddresses form (RFC 8621 Section 4.1.2.4) *) 251 let as_grouped_addresses (header : t) : (parsed_value, parse_error) result = 252 try 253 let raw_value = value header in 254 let unfolded = RFC2047.unfold raw_value in 255 let decoded = RFC2047.decode_encoded_words unfolded in 256 257 (* For now, create a single group with all addresses - proper group parsing is complex *) 258 let address_parts = Str.split (Str.regexp "[ \t]*,[ \t]*") decoded in 259 let addresses = List.filter_map parse_mailbox address_parts in 260 let group = Address.Group.create ~addresses () in 261 262 Ok (GroupedAddresses_value [group]) 263 with exn -> 264 Error (Parse_failure ("Grouped address parsing failed: " ^ Printexc.to_string exn)) 265 266 (** Parse message ID from angle brackets *) 267 let parse_message_id (msg_id_str : string) : string option = 268 let trimmed = String.trim msg_id_str in 269 let msg_id_re = Str.regexp "<\\([^>]+\\)>" in 270 if Str.string_match msg_id_re trimmed 0 then 271 Some (Str.matched_group 1 trimmed) 272 else if not (String.contains trimmed '<') && not (String.contains trimmed '>') then 273 Some trimmed (* Message ID without brackets *) 274 else 275 None 276 277 (** Parse header as MessageIds form (RFC 8621 Section 4.1.2.5) *) 278 let as_message_ids (header : t) : (parsed_value, parse_error) result = 279 try 280 let raw_value = value header in 281 let unfolded = RFC2047.unfold raw_value in 282 283 (* Split by whitespace to get individual message IDs *) 284 let id_parts = Str.split (Str.regexp "[ \t\r\n]+") unfolded in 285 let message_ids = List.filter_map parse_message_id id_parts in 286 287 Ok (MessageIds_value message_ids) 288 with exn -> 289 Error (Parse_failure ("Message ID parsing failed: " ^ Printexc.to_string exn)) 290 291 (** Valid header fields for MessageIds form according to RFC 8621 *) 292 let message_ids_form_valid_headers = [ 293 "message-id"; "in-reply-to"; "references" 294 ] 295 296 (** Check if header can be parsed as MessageIds form *) 297 let can_parse_as_message_ids (header : t) : bool = 298 let header_name = String.lowercase_ascii (name header) in 299 List.mem header_name message_ids_form_valid_headers 300 301 (** Parse RFC 5322 date-time *) 302 let parse_date_time (date_str : string) : float option = 303 let trimmed = String.trim date_str in 304 (* Simple ISO 8601 parsing - more complex RFC 5322 parsing would need external library *) 305 try 306 (* Try ISO format first *) 307 if Str.string_match (Str.regexp "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]T[0-9][0-9]:[0-9][0-9]:[0-9][0-9]Z") trimmed 0 then 308 let tm = Scanf.sscanf trimmed "%04d-%02d-%02dT%02d:%02d:%02dZ" 309 (fun y m d h min sec -> 310 {Unix.tm_year = y - 1900; tm_mon = m - 1; tm_mday = d; 311 tm_hour = h; tm_min = min; tm_sec = sec; tm_wday = 0; 312 tm_yday = 0; tm_isdst = false}) in 313 Some (fst (Unix.mktime tm)) 314 else 315 (* Fall back to Unix.strptime if available, or return None *) 316 None 317 with _ -> None 318 319 (** Parse header as Date form (RFC 8621 Section 4.1.2.6) *) 320 let as_date (header : t) : (parsed_value, parse_error) result = 321 try 322 let raw_value = value header in 323 let unfolded = RFC2047.unfold raw_value in 324 325 match parse_date_time unfolded with 326 | Some timestamp -> Ok (Date_value (Jmap.Date.of_timestamp timestamp)) 327 | None -> Error (Parse_failure "Date parsing failed") 328 with exn -> 329 Error (Parse_failure ("Date parsing failed: " ^ Printexc.to_string exn)) 330 331 (** Valid header fields for Date form according to RFC 8621 *) 332 let date_form_valid_headers = [ 333 "date"; "resent-date"; "delivery-date" 334 ] 335 336 (** Check if header can be parsed as Date form *) 337 let can_parse_as_date (header : t) : bool = 338 let header_name = String.lowercase_ascii (name header) in 339 List.mem header_name date_form_valid_headers 340 341 (** Parse URL from angle brackets *) 342 let parse_url (url_str : string) : string option = 343 let trimmed = String.trim url_str in 344 let url_re = Str.regexp "<\\([^>]+\\)>" in 345 if Str.string_match url_re trimmed 0 then 346 Some (Str.matched_group 1 trimmed) 347 else if String.contains trimmed ':' then 348 Some trimmed (* URL without brackets *) 349 else 350 None 351 352 (** Parse header as URLs form (RFC 8621 Section 4.1.2.7) *) 353 let as_urls (header : t) : (parsed_value, parse_error) result = 354 try 355 let raw_value = value header in 356 let unfolded = RFC2047.unfold raw_value in 357 358 (* Split by comma to get individual URLs *) 359 let url_parts = Str.split (Str.regexp "[ \t]*,[ \t]*") unfolded in 360 let urls = List.filter_map parse_url url_parts in 361 362 Ok (URLs_value urls) 363 with exn -> 364 Error (Parse_failure ("URL parsing failed: " ^ Printexc.to_string exn)) 365 366 (** Valid header fields for URLs form according to RFC 8621 *) 367 let urls_form_valid_headers = [ 368 "list-archive"; "list-help"; "list-id"; "list-post"; "list-subscribe"; "list-unsubscribe" 369 ] 370 371 (** Check if header can be parsed as URLs form *) 372 let can_parse_as_urls (header : t) : bool = 373 let header_name = String.lowercase_ascii (name header) in 374 List.mem header_name urls_form_valid_headers 375end 376 377(** High-level header access pattern functions *) 378 379(** Get header value as Raw form - always succeeds *) 380let as_raw (header : t) : string = 381 value header 382 383(** Get header value as Text form with RFC 2047 decoding and unfolding *) 384let as_text (header : t) : (string, Value.parse_error) result = 385 if not (Parser.can_parse_as_text header) then 386 Error (Value.Unsupported_form (name header, Value.Text)) 387 else 388 match Parser.as_text header with 389 | Ok (Value.Text_value text) -> Ok text 390 | Ok _ -> Error (Value.Parse_failure "Invalid parsed value type") 391 | Error err -> Error err 392 393(** Get header value as list of parsed email addresses *) 394let as_addresses (header : t) : (Address.t list, Value.parse_error) result = 395 if not (Parser.can_parse_as_addresses header) then 396 Error (Value.Unsupported_form (name header, Value.Addresses)) 397 else 398 match Parser.as_addresses header with 399 | Ok (Value.Addresses_value addrs) -> Ok addrs 400 | Ok _ -> Error (Value.Parse_failure "Invalid parsed value type") 401 | Error err -> Error err 402 403(** Get header value as list of grouped addresses *) 404let as_grouped_addresses (header : t) : (Address.Group.t list, Value.parse_error) result = 405 if not (Parser.can_parse_as_addresses header) then 406 Error (Value.Unsupported_form (name header, Value.GroupedAddresses)) 407 else 408 match Parser.as_grouped_addresses header with 409 | Ok (Value.GroupedAddresses_value groups) -> Ok groups 410 | Ok _ -> Error (Value.Parse_failure "Invalid parsed value type") 411 | Error err -> Error err 412 413(** Get header value as list of message IDs *) 414let as_message_ids (header : t) : (string list, Value.parse_error) result = 415 if not (Parser.can_parse_as_message_ids header) then 416 Error (Value.Unsupported_form (name header, Value.MessageIds)) 417 else 418 match Parser.as_message_ids header with 419 | Ok (Value.MessageIds_value ids) -> Ok ids 420 | Ok _ -> Error (Value.Parse_failure "Invalid parsed value type") 421 | Error err -> Error err 422 423(** Get header value as parsed date *) 424let as_date (header : t) : (Jmap.Date.t, Value.parse_error) result = 425 if not (Parser.can_parse_as_date header) then 426 Error (Value.Unsupported_form (name header, Value.Date)) 427 else 428 match Parser.as_date header with 429 | Ok (Value.Date_value date) -> Ok date 430 | Ok _ -> Error (Value.Parse_failure "Invalid parsed value type") 431 | Error err -> Error err 432 433(** Get header value as list of URLs *) 434let as_urls (header : t) : (string list, Value.parse_error) result = 435 if not (Parser.can_parse_as_urls header) then 436 Error (Value.Unsupported_form (name header, Value.URLs)) 437 else 438 match Parser.as_urls header with 439 | Ok (Value.URLs_value urls) -> Ok urls 440 | Ok _ -> Error (Value.Parse_failure "Invalid parsed value type") 441 | Error err -> Error err 442 443(** Get header value in the specified access form *) 444let parse_as (header : t) (form : Value.access_form) : (Value.parsed_value, Value.parse_error) result = 445 match form with 446 | Value.Raw -> Parser.as_raw header 447 | Value.Text -> Parser.as_text header 448 | Value.Addresses -> Parser.as_addresses header 449 | Value.GroupedAddresses -> Parser.as_grouped_addresses header 450 | Value.MessageIds -> Parser.as_message_ids header 451 | Value.Date -> Parser.as_date header 452 | Value.URLs -> Parser.as_urls header 453 454(** Utility functions for working with header lists *) 455 456(** Find header and parse as Text form *) 457let find_and_parse_as_text (headers : t list) (header_name : string) : string option = 458 match find_by_name headers header_name with 459 | Some header -> 460 (match as_text header with 461 | Ok text -> Some text 462 | Error _ -> None) 463 | None -> None 464 465(** Find header and parse as addresses *) 466let find_and_parse_as_addresses (headers : t list) (header_name : string) : Address.t list option = 467 match find_by_name headers header_name with 468 | Some header -> 469 (match as_addresses header with 470 | Ok addrs -> Some addrs 471 | Error _ -> None) 472 | None -> None 473 474(** Find header and parse as message IDs *) 475let find_and_parse_as_message_ids (headers : t list) (header_name : string) : string list option = 476 match find_by_name headers header_name with 477 | Some header -> 478 (match as_message_ids header with 479 | Ok ids -> Some ids 480 | Error _ -> None) 481 | None -> None 482 483(** Find header and parse as date *) 484let find_and_parse_as_date (headers : t list) (header_name : string) : Jmap.Date.t option = 485 match find_by_name headers header_name with 486 | Some header -> 487 (match as_date header with 488 | Ok date -> Some date 489 | Error _ -> None) 490 | None -> None