this repo has no description
1// Email classifier using rule-based approach learned from labeled data
2
3import type { EmailInput, ClassificationResult } from "./types.ts";
4
5export class EmailClassifier {
6 classify(email: EmailInput): ClassificationResult {
7 const subject = email.subject.toLowerCase();
8 const body = email.body.toLowerCase();
9 const from = email.from.toLowerCase();
10 const combined = `${subject} ${body}`;
11
12 // CRITICAL RULES: Always relevant (security, passwords, account issues)
13 const securityResult = this.checkSecurity(subject, body, combined);
14 if (securityResult) return securityResult;
15
16 // RESPONSE TO STUDENT ACTION: Application confirmations, enrollment confirmations
17 const actionResult = this.checkStudentAction(subject, body, combined);
18 if (actionResult) return actionResult;
19
20 // ACCEPTED STUDENT: Portal access, deposit reminders, accepted student info
21 const acceptedResult = this.checkAccepted(subject, body, combined);
22 if (acceptedResult) return acceptedResult;
23
24 // DUAL ENROLLMENT: Course registration, schedules, specific to enrolled students
25 const dualEnrollmentResult = this.checkDualEnrollment(subject, body, combined, from);
26 if (dualEnrollmentResult) return dualEnrollmentResult;
27
28 // SCHOLARSHIP AWARDED: Actually awarded/received (not eligible/apply/consideration)
29 const scholarshipResult = this.checkScholarship(subject, body, combined);
30 if (scholarshipResult) return scholarshipResult;
31
32 // FINANCIAL AID READY: Explicit offers ready to review (not applications)
33 const aidResult = this.checkFinancialAid(subject, body, combined);
34 if (aidResult) return aidResult;
35
36 // DEFINITELY NOT RELEVANT: Marketing, newsletters, unsolicited outreach
37 const irrelevantResult = this.checkIrrelevant(subject, body, combined, from);
38 if (irrelevantResult) return irrelevantResult;
39
40 // DEFAULT: If uncertain, mark as not relevant (fail-safe for spam)
41 return {
42 pertains: false,
43 reason: "No clear relevance indicators found",
44 confidence: 0.3,
45 matched_rules: ["default_not_relevant"]
46 };
47 }
48
49 private checkSecurity(subject: string, body: string, combined: string): ClassificationResult | null {
50 const patterns = [
51 /\bpassword\s+(reset|change|update|expired)\b/,
52 /\breset\s+your\s+password\b/,
53 /\baccount\s+security\b/,
54 /\bsecurity\s+alert\b/,
55 /\bunusual\s+(sign[- ]?in|activity)\b/,
56 /\bverification\s+code\b/,
57 /\b(2fa|mfa|two[- ]factor)\b/,
58 /\bcompromised\s+account\b/,
59 /\baccount\s+(locked|suspended)\b/,
60 /\bsuspicious\s+activity\b/,
61 ];
62
63 for (const pattern of patterns) {
64 if (pattern.test(combined)) {
65 // Make sure it's not just marketing mentioning "saving" (false positive on "$36,645 on tuition")
66 // Real security alerts won't talk about tuition savings
67 if (/\bsaving.*\bon\s+tuition\b|\btuition.*\bsaving\b/.test(combined)) {
68 return null; // Just marketing
69 }
70 return {
71 pertains: true,
72 reason: "Security/password alert - always important",
73 confidence: 1.0,
74 matched_rules: ["security_alert"]
75 };
76 }
77 }
78
79 return null;
80 }
81
82 private checkStudentAction(subject: string, body: string, combined: string): ClassificationResult | null {
83 const patterns = [
84 /\bapplication\s+(received|complete|submitted|confirmation)\b/,
85 /\breceived\s+your\s+application\b/,
86 /\bthank\s+you\s+for\s+(applying|submitting)\b/,
87 /\benrollment\s+confirmation\b/,
88 /\bconfirmation\s+(of|for)\s+(your\s+)?(application|enrollment)\b/,
89 /\byour\s+application\s+(has\s+been|is)\s+(received|complete)\b/,
90 ];
91
92 for (const pattern of patterns) {
93 if (pattern.test(combined)) {
94 // But exclude if it's just marketing about "how to apply"
95 if (/\bhow\s+to\s+apply\b|\bapply\s+now\b|\bstart\s+(your\s+)?application\b/.test(combined)) {
96 return null;
97 }
98 return {
99 pertains: true,
100 reason: "Confirmation of student action (application/enrollment)",
101 confidence: 0.95,
102 matched_rules: ["student_action_confirmation"]
103 };
104 }
105 }
106
107 return null;
108 }
109
110 private checkAccepted(subject: string, body: string, combined: string): ClassificationResult | null {
111 const patterns = [
112 /\baccepted\s+(student\s+)?portal\b/,
113 /\byour\s+(personalized\s+)?accepted\s+portal\b/,
114 /\bdeposit\s+(today|now|by|to\s+reserve)\b/,
115 /\breserve\s+your\s+(place|spot)\b/,
116 /\bcongratulations.*\baccepted\b/,
117 /\byou\s+(have\s+been|are|were)\s+accepted\b/,
118 /\badmission\s+(decision|offer)\b/,
119 /\benroll(ment)?\s+deposit\b/,
120 ];
121
122 for (const pattern of patterns) {
123 if (pattern.test(combined)) {
124 // Exclude pre-admission and marketing
125 if (/\bacceptance\s+rate\b|\bhigh\s+acceptance\b|\bpre[- ]admit(ted)?\b|\bautomatic\s+admission\b/.test(combined)) {
126 return null;
127 }
128 // Exclude marketing about future admission decisions
129 if (/\byou\s+will\s+(also\s+)?receive\s+(an?\s+)?(accelerated\s+)?admission\s+decision\b/.test(combined)) {
130 return null;
131 }
132 if (/\breceive\s+an\s+admission\s+decision\s+within\b/.test(combined)) {
133 return null;
134 }
135 return {
136 pertains: true,
137 reason: "Accepted student portal/deposit information",
138 confidence: 0.95,
139 matched_rules: ["accepted_student"]
140 };
141 }
142 }
143
144 return null;
145 }
146
147 private checkDualEnrollment(subject: string, body: string, combined: string, from: string): ClassificationResult | null {
148 // Check for dual enrollment patterns
149 const dualEnrollmentIndicators = [
150 /\bdual\s+enrollment\b/,
151 /\bcourse\s+(registration|deletion|added|dropped)\b/,
152 /\bspring\s+\d{4}\s+(course|on[- ]campus)\b/,
153 /\bhow\s+to\s+register\b.*\b(course|class)/,
154 /\bcedarville\s+university\).*\b(course|registration)\b/,
155 ];
156
157 for (const pattern of dualEnrollmentIndicators) {
158 if (pattern.test(combined)) {
159 // Dual enrollment is relevant if it's about actual courses, not marketing
160 if (/\blearn\s+more\s+about\b|\binterested\s+in\b|\bconsider\s+joining\b/.test(combined)) {
161 return null; // Just marketing
162 }
163 return {
164 pertains: true,
165 reason: "Dual enrollment course information",
166 confidence: 0.9,
167 matched_rules: ["dual_enrollment"]
168 };
169 }
170 }
171
172 return null;
173 }
174
175 private checkScholarship(subject: string, body: string, combined: string): ClassificationResult | null {
176 // Check for specific scholarship application opportunities FIRST (for accepted/enrolled students)
177 // This is different from general "apply for scholarships" marketing
178 if (/\bapply\s+for\s+(the\s+)?.*\bscholarship\b/.test(subject)) {
179 // Check if it's specific (President's, Ministry, named scholarships)
180 if (/\bpresident'?s\b|\bministry\b|\bimpact\b/.test(combined)) {
181 return {
182 pertains: true,
183 reason: "Scholarship application opportunity for accepted student",
184 confidence: 0.75,
185 matched_rules: ["scholarship_application_opportunity"]
186 };
187 }
188 }
189
190 // Negative indicators: not actually awarded - check these before awarded patterns
191 const notAwardedPatterns = [
192 /\bscholarship\b.*\b(held|reserved)\s+for\s+you\b/,
193 /\b(held|reserved)\s+for\s+you\b/,
194 /\bconsider(ed|ation)\b.*\bscholarship\b/,
195 /\bscholarship\b.*\bconsider(ed|ation)\b/,
196 /\beligible\s+for\b.*\bscholarship\b/,
197 /\bscholarship\b.*\beligible\b/,
198 /\bmay\s+qualify\b.*\bscholarship\b/,
199 /\bguaranteed\s+admission\b/,
200 /\bpriority\s+consideration\b/,
201 ];
202
203 // Check if scholarship is mentioned but not awarded
204 const hasScholarshipMention = /\bscholarship\b/.test(combined);
205 if (hasScholarshipMention) {
206 for (const pattern of notAwardedPatterns) {
207 if (pattern.test(combined)) {
208 return {
209 pertains: false,
210 reason: "Scholarship mentioned but not actually awarded (held/eligible/apply)",
211 confidence: 0.9,
212 matched_rules: ["scholarship_not_awarded"]
213 };
214 }
215 }
216 }
217
218 // Positive indicators: actually awarded
219 const awardedPatterns = [
220 /\bcongratulations\b.*\bscholarship\b/,
221 /\byou\s+(have|received|are\s+awarded|won)\b.*\bscholarship\b/,
222 /\bwe\s+(are\s+)?(pleased\s+to\s+)?award(ing)?\b.*\bscholarship\b/,
223 /\bscholarship\s+(offer|award)\b/,
224 /\breceived\s+a\s+scholarship\b/,
225 ];
226
227 for (const pattern of awardedPatterns) {
228 if (pattern.test(combined)) {
229 return {
230 pertains: true,
231 reason: "Scholarship actually awarded",
232 confidence: 0.95,
233 matched_rules: ["scholarship_awarded"]
234 };
235 }
236 }
237
238 return null;
239 }
240
241 private checkFinancialAid(subject: string, body: string, combined: string): ClassificationResult | null {
242 // Positive: aid is ready
243 const readyPatterns = [
244 /\bfinancial\s+aid\b.*\boffer\b.*\b(ready|available)\b/,
245 /\b(ready|available)\b.*\bfinancial\s+aid\b.*\boffer\b/,
246 /\baward\s+letter\b.*\b(ready|available|posted|view)\b/,
247 /\b(view|review)\s+(your\s+)?award\s+letter\b/,
248 /\bfinancial\s+aid\s+package\b.*\b(ready|available|posted)\b/,
249 /\byour\s+aid\s+is\s+ready\b/,
250 ];
251
252 // Negative: aid applications, FAFSA reminders
253 const notReadyPatterns = [
254 /\blearn\s+more\s+about\b.*\bfinancial\s+aid\b/,
255 /\bapply\b.*\b(for\s+)?financial\s+aid\b/,
256 /\bfinancial\s+aid\b.*\bapplication\b/,
257 /\bcomplete\s+(your\s+)?fafsa\b/,
258 /\bconsidered\s+for\b.*\baid\b/,
259 /\bpriority\s+(deadline|consideration)\b.*\bfinancial\s+aid\b/,
260 ];
261
262 for (const pattern of readyPatterns) {
263 if (pattern.test(combined)) {
264 // Check for negative indicators
265 for (const negPattern of notReadyPatterns) {
266 if (negPattern.test(combined)) {
267 return null; // Just application info
268 }
269 }
270 return {
271 pertains: true,
272 reason: "Financial aid offer ready to review",
273 confidence: 0.95,
274 matched_rules: ["financial_aid_ready"]
275 };
276 }
277 }
278
279 return null;
280 }
281
282 private checkIrrelevant(subject: string, body: string, combined: string, from: string): ClassificationResult | null {
283 // Strong indicators of marketing/spam
284 const irrelevantPatterns = [
285 // Newsletter/blog content
286 /\bstudent\s+life\s+blog\b/,
287 /\b(student\s+life\s+)?blog\s+(post|update)\b/,
288 /\bnew\s+student\s+life\s+blog\b/,
289 /\bnewsletter\b/,
290 /\bweekly\s+(digest|update)\b/,
291
292 // Marketing events
293 /\bupcoming\s+events\b/,
294 /\bjoin\s+us\s+(for|at)\b/,
295 /\bopen\s+house\b/,
296 /\bvirtual\s+tour\b/,
297 /\bcampus\s+(visit|tour|event)\b/,
298 /\bmeet\s+(the|our)\s+(students|faculty)\b/,
299
300 // Generic outreach (not applied yet)
301 /\bhaven'?t\s+applied.*yet\b/,
302 /\bstill\s+time\s+to\s+apply\b/,
303 /\bhow\s+is\s+your\s+college\s+search\b/,
304 /\bstart\s+(your\s+)?college\s+search\b/,
305 /\bexplore\s+(our\s+)?(programs|campus)\b/,
306
307 // Unsolicited outreach patterns
308 /\bi\s+hope\s+you\s+have\s+been\s+receiving\s+my\s+emails\b/,
309 /\bam\s+i\s+reaching\b/,
310 /\byou\s+are\s+on\s+.*\s+(radar|list)\b/,
311 /\bi\s+want\s+to\s+make\s+sure\s+you\s+know\b/,
312 /\byou'?re\s+invited\s+to\s+submit\b/,
313 /\bi'?m\s+eager\s+to\s+consider\s+you\b/,
314 /\bsubmit\s+your\s+.*\s+application\b/,
315 /\bpriority\s+status\b.*\bsubmit.*application\b/,
316
317 // Priority deadline extensions (spam)
318 /\bextended.*\bpriority\s+deadline\b/,
319 /\bpriority\s+deadline.*\bextended\b/,
320
321 // Summer camps/programs
322 /\bsummer\s+(academy|camp|program)\b/,
323 /\bsave\s+the\s+date\b/,
324
325 // Ugly sweaters and other fluff
326 /\bugly\s+sweater\b/,
327 /\bit'?s\s+.+\s+season\b/,
328 ];
329
330 for (const pattern of irrelevantPatterns) {
331 if (pattern.test(combined)) {
332 return {
333 pertains: false,
334 reason: "Marketing/newsletter/unsolicited outreach",
335 confidence: 0.95,
336 matched_rules: ["irrelevant_marketing"]
337 };
338 }
339 }
340
341 // Haven't applied yet = not relevant
342 if (/\bhaven'?t\s+applied\b/.test(combined)) {
343 return {
344 pertains: false,
345 reason: "Unsolicited email where student has not applied",
346 confidence: 0.95,
347 matched_rules: ["not_applied"]
348 };
349 }
350
351 return null;
352 }
353}
354
355// Convenience function
356export function classifyEmail(email: EmailInput): ClassificationResult {
357 const classifier = new EmailClassifier();
358 return classifier.classify(email);
359}