My agentic slop goes here. Not intended for anyone else!
1(** Thread Reconstruction Algorithms for JMAP.
2
3 This module implements various email threading algorithms used to group related
4 emails into conversations. Supports both standard threading (RFC 5256) and
5 custom algorithms for reconstructing thread relationships from email headers.
6
7 Threading algorithms analyze Message-ID, References, and In-Reply-To headers
8 to determine which emails belong in the same conversation thread.
9
10 @see <https://www.rfc-editor.org/rfc/rfc5256.html> RFC 5256: Threading algorithms
11 @see <https://www.rfc-editor.org/rfc/rfc8621.html#section-3> RFC 8621 Section 3: Threads
12*)
13
14(* Remove open statement to avoid circular dependency *)
15
16(** Thread reconstruction result containing grouped emails *)
17type thread_group = {
18 thread_id : Jmap.Id.t;
19 (** Unique identifier for this thread *)
20
21 email_ids : Jmap.Id.t list;
22 (** List of email IDs in this thread, ordered by relationship *)
23
24 root_email_id : Jmap.Id.t option;
25 (** ID of the root email that started this thread *)
26
27 last_updated : Jmap.Date.t;
28 (** Timestamp of the most recent email in the thread *)
29}
30
31(** Thread relationship information for an email *)
32type email_relationship = {
33 email_id : Jmap.Id.t;
34 (** The email's unique identifier *)
35
36 message_id : string option;
37 (** The email's Message-ID header value *)
38
39 in_reply_to : string option;
40 (** The In-Reply-To header value indicating parent message *)
41
42 references : string list;
43 (** List of Message-IDs from References header *)
44
45 subject : string;
46 (** Normalized subject for subject-based threading *)
47
48 date : Jmap.Date.t;
49 (** Email's date for chronological ordering *)
50}
51
52(** Threading algorithm type *)
53type algorithm = [
54 | `RFC5256_REFERENCES
55 (** Standard REFERENCES algorithm from RFC 5256 *)
56
57 | `RFC5256_ORDEREDSUBJECT
58 (** Standard ORDEREDSUBJECT algorithm from RFC 5256 *)
59
60 | `HYBRID
61 (** Hybrid algorithm combining references and subject matching *)
62
63 | `CONVERSATION
64 (** Gmail-style conversation threading *)
65]
66
67(** {1 Core Threading Functions} *)
68
69(** Extract email relationship information from an Email object.
70
71 Parses the email's headers to extract Message-ID, In-Reply-To, References,
72 and other fields needed for threading algorithms.
73
74 @param email The email to analyze
75 @return Relationship information for threading *)
76val extract_relationships : Jmap_email.Email.Email.t -> email_relationship
77
78(** Build a thread group from a list of related emails.
79
80 Takes emails that have been determined to belong to the same thread and
81 organizes them into a thread group with proper ordering.
82
83 @param emails List of related emails
84 @return Thread group containing the emails in conversation order *)
85val build_thread_group : Jmap_email.Email.Email.t list -> thread_group
86
87(** {1 Threading Algorithms} *)
88
89(** Reconstruct threads using the REFERENCES algorithm (RFC 5256).
90
91 This is the standard threading algorithm that uses Message-ID, In-Reply-To,
92 and References headers to build a tree of related messages.
93
94 @param emails List of emails to thread
95 @return List of thread groups *)
96val thread_by_references : Jmap_email.Email.Email.t list -> thread_group list
97
98(** Reconstruct threads using the ORDEREDSUBJECT algorithm (RFC 5256).
99
100 Groups emails by normalized subject line, then orders them chronologically.
101 Less accurate than REFERENCES but works when headers are missing.
102
103 @param emails List of emails to thread
104 @return List of thread groups *)
105val thread_by_ordered_subject : Jmap_email.Email.Email.t list -> thread_group list
106
107(** Reconstruct threads using a hybrid algorithm.
108
109 Combines REFERENCES and subject-based threading. First attempts to thread
110 by references, then groups orphaned messages by subject similarity.
111
112 @param emails List of emails to thread
113 @return List of thread groups *)
114val thread_hybrid : Jmap_email.Email.Email.t list -> thread_group list
115
116(** Reconstruct threads using conversation-style grouping.
117
118 Similar to Gmail's conversation view - aggressively groups emails that
119 appear to be part of the same discussion, even with broken threading.
120
121 @param emails List of emails to thread
122 @return List of thread groups *)
123val thread_conversations : Jmap_email.Email.Email.t list -> thread_group list
124
125(** Apply the specified threading algorithm to a list of emails.
126
127 @param algorithm The threading algorithm to use
128 @param emails List of emails to thread
129 @return List of thread groups *)
130val apply_algorithm : algorithm -> Jmap_email.Email.Email.t list -> thread_group list
131
132(** {1 Thread Relationship Management} *)
133
134(** Thread relationship graph for managing conversation structure *)
135module ThreadGraph : sig
136 (** Thread graph type maintaining email relationships *)
137 type t
138
139 (** Create an empty thread graph.
140 @return New empty graph *)
141 val create : unit -> t
142
143 (** Add an email to the thread graph.
144
145 Analyzes the email's headers and adds it to the appropriate position
146 in the conversation tree based on its relationships.
147
148 @param t The thread graph
149 @param email The email to add
150 @return Updated thread graph *)
151 val add_email : t -> Jmap_email.Email.Email.t -> t
152
153 (** Remove an email from the thread graph.
154
155 @param t The thread graph
156 @param email_id The ID of the email to remove
157 @return Updated thread graph *)
158 val remove_email : t -> Jmap.Id.t -> t
159
160 (** Find the thread containing a specific email.
161
162 @param t The thread graph
163 @param email_id The email ID to search for
164 @return Thread ID if found *)
165 val find_thread : t -> Jmap.Id.t -> Jmap.Id.t option
166
167 (** Get all emails in a specific thread.
168
169 @param t The thread graph
170 @param thread_id The thread ID
171 @return List of email IDs in conversation order *)
172 val get_thread_emails : t -> Jmap.Id.t -> Jmap.Id.t list
173
174 (** Get all threads in the graph.
175
176 @param t The thread graph
177 @return List of all thread groups *)
178 val get_all_threads : t -> thread_group list
179
180 (** Merge two threads into one.
181
182 Used when discovering that two apparently separate threads are actually
183 part of the same conversation.
184
185 @param t The thread graph
186 @param thread1 First thread ID
187 @param thread2 Second thread ID
188 @return Updated graph with merged threads *)
189 val merge_threads : t -> Jmap.Id.t -> Jmap.Id.t -> t
190
191 (** Split a thread into two separate threads.
192
193 Used when determining that emails were incorrectly grouped together.
194
195 @param t The thread graph
196 @param thread_id Thread to split
197 @param split_point Email ID where split should occur
198 @return Updated graph with split threads *)
199 val split_thread : t -> Jmap.Id.t -> Jmap.Id.t -> t
200
201 (** Recalculate thread relationships.
202
203 Re-runs the threading algorithm on all emails in the graph, useful after
204 bulk operations or when threading rules change.
205
206 @param t The thread graph
207 @param algorithm Algorithm to use for recalculation
208 @return Updated graph with recalculated threads *)
209 val recalculate : t -> algorithm -> t
210end
211
212(** {1 Utility Functions} *)
213
214(** Normalize a subject line for threading comparison.
215
216 Removes "Re:", "Fwd:", and other prefixes, normalizes whitespace, and
217 converts to a canonical form for comparison.
218
219 @param subject The subject line to normalize
220 @return Normalized subject string *)
221val normalize_subject : string -> string
222
223(** Check if two emails appear to be related based on headers.
224
225 Examines Message-ID, References, and In-Reply-To headers to determine
226 if emails are part of the same conversation.
227
228 @param email1 First email to compare
229 @param email2 Second email to compare
230 @return true if emails appear related *)
231val are_related : Jmap_email.Email.Email.t -> Jmap_email.Email.Email.t -> bool
232
233(** Sort emails within a thread by conversation order.
234
235 Orders emails based on their relationships and timestamps to create
236 a natural reading order for the conversation.
237
238 @param emails List of emails in the same thread
239 @return Emails sorted in conversation order *)
240val sort_thread_emails : Jmap_email.Email.Email.t list -> Jmap_email.Email.Email.t list
241
242(** Calculate threading statistics for a set of emails.
243
244 @param threads List of thread groups
245 @return Statistics including thread count, average thread size, etc. *)
246val calculate_stats : thread_group list -> [
247 | `ThreadCount of int
248 | `AverageThreadSize of float
249 | `LargestThread of int
250 | `SingletonThreads of int
251 | `MultiEmailThreads of int
252] list