this repo has no description
1// Email classifier using rule-based approach learned from labeled data
2
3import type { EmailInput, ClassificationResult } from "./types.ts";
4
5export class EmailClassifier {
6 classify(email: EmailInput): ClassificationResult {
7 // Defensive checks for Apps Script environment
8 if (!email || typeof email !== 'object') {
9 return {
10 pertains: false,
11 reason: "Invalid email object",
12 confidence: 0.0,
13 matched_rules: ["invalid_input"]
14 };
15 }
16
17 const subject = (email.subject || '').toLowerCase();
18 const body = (email.body || '').toLowerCase();
19 const from = (email.from || '').toLowerCase();
20 const combined = `${subject} ${body}`;
21
22 // CRITICAL RULES: Always relevant (security, passwords, account issues)
23 const securityResult = this.checkSecurity(subject, body, combined);
24 if (securityResult) return securityResult;
25
26 // RESPONSE TO STUDENT ACTION: Application confirmations, enrollment confirmations
27 const actionResult = this.checkStudentAction(subject, body, combined);
28 if (actionResult) return actionResult;
29
30 // ACCEPTED STUDENT: Portal access, deposit reminders, accepted student info
31 const acceptedResult = this.checkAccepted(subject, body, combined);
32 if (acceptedResult) return acceptedResult;
33
34 // DUAL ENROLLMENT: Course registration, schedules, specific to enrolled students
35 const dualEnrollmentResult = this.checkDualEnrollment(subject, body, combined, from);
36 if (dualEnrollmentResult) return dualEnrollmentResult;
37
38 // SCHOLARSHIP AWARDED: Actually awarded/received (not eligible/apply/consideration)
39 const scholarshipResult = this.checkScholarship(subject, body, combined);
40 if (scholarshipResult) return scholarshipResult;
41
42 // FINANCIAL AID READY: Explicit offers ready to review (not applications)
43 const aidResult = this.checkFinancialAid(subject, body, combined);
44 if (aidResult) return aidResult;
45
46 // DEFINITELY NOT RELEVANT: Marketing, newsletters, unsolicited outreach
47 const irrelevantResult = this.checkIrrelevant(subject, body, combined, from);
48 if (irrelevantResult) return irrelevantResult;
49
50 // DEFAULT: If uncertain, mark as not relevant (fail-safe for spam)
51 return {
52 pertains: false,
53 reason: "No clear relevance indicators found",
54 confidence: 0.3,
55 matched_rules: ["default_not_relevant"]
56 };
57 }
58
59 private checkSecurity(subject: string, body: string, combined: string): ClassificationResult | null {
60 const patterns = [
61 /\bpassword\s+(reset|change|update|expired)\b/,
62 /\breset\s+your\s+password\b/,
63 /\baccount\s+security\b/,
64 /\bsecurity\s+alert\b/,
65 /\bunusual\s+(sign[- ]?in|activity)\b/,
66 /\bverification\s+code\b/,
67 /\b(2fa|mfa|two[- ]factor)\b/,
68 /\bcompromised\s+account\b/,
69 /\baccount\s+(locked|suspended)\b/,
70 /\bsuspicious\s+activity\b/,
71 ];
72
73 for (const pattern of patterns) {
74 if (pattern.test(combined)) {
75 // Make sure it's not just marketing mentioning "saving" (false positive on "$36,645 on tuition")
76 // Real security alerts won't talk about tuition savings
77 if (/\bsaving.*\bon\s+tuition\b|\btuition.*\bsaving\b/.test(combined)) {
78 return null; // Just marketing
79 }
80 return {
81 pertains: true,
82 reason: "Security/password alert - always important",
83 confidence: 1.0,
84 matched_rules: ["security_alert"]
85 };
86 }
87 }
88
89 return null;
90 }
91
92 private checkStudentAction(subject: string, body: string, combined: string): ClassificationResult | null {
93 const patterns = [
94 /\bapplication\s+(received|complete|submitted|confirmation)\b/,
95 /\breceived\s+your\s+application\b/,
96 /\bthank\s+you\s+for\s+(applying|submitting)\b/,
97 /\benrollment\s+confirmation\b/,
98 /\bconfirmation\s+(of|for)\s+(your\s+)?(application|enrollment)\b/,
99 /\byour\s+application\s+(has\s+been|is)\s+(received|complete)\b/,
100 ];
101
102 for (const pattern of patterns) {
103 if (pattern.test(combined)) {
104 // But exclude if it's just marketing about "how to apply"
105 if (/\bhow\s+to\s+apply\b|\bapply\s+now\b|\bstart\s+(your\s+)?application\b/.test(combined)) {
106 return null;
107 }
108 return {
109 pertains: true,
110 reason: "Confirmation of student action (application/enrollment)",
111 confidence: 0.95,
112 matched_rules: ["student_action_confirmation"]
113 };
114 }
115 }
116
117 return null;
118 }
119
120 private checkAccepted(subject: string, body: string, combined: string): ClassificationResult | null {
121 const patterns = [
122 /\baccepted\s+(student\s+)?portal\b/,
123 /\byour\s+(personalized\s+)?accepted\s+portal\b/,
124 /\bdeposit\s+(today|now|by|to\s+reserve)\b/,
125 /\breserve\s+your\s+(place|spot)\b/,
126 /\bcongratulations.*\baccepted\b/,
127 /\byou\s+(have\s+been|are|were)\s+accepted\b/,
128 /\badmission\s+(decision|offer)\b/,
129 /\benroll(ment)?\s+deposit\b/,
130 ];
131
132 for (const pattern of patterns) {
133 if (pattern.test(combined)) {
134 // Exclude pre-admission and marketing
135 if (/\bacceptance\s+rate\b|\bhigh\s+acceptance\b|\bpre[- ]admit(ted)?\b|\bautomatic\s+admission\b/.test(combined)) {
136 return null;
137 }
138 // Exclude "direct admit/admission" marketing that asks to complete profile
139 if (/\bdirect\s+(admit(ted)?|admission)\b.*\b(complete|submit).*\bprofile\b|\b(complete|submit).*\bprofile\b.*\bdirect\s+(admit(ted)?|admission)\b/.test(combined)) {
140 return null;
141 }
142 // Exclude marketing about future admission decisions
143 if (/\byou\s+will\s+(also\s+)?receive\s+(an?\s+)?(accelerated\s+)?admission\s+decision\b/.test(combined)) {
144 return null;
145 }
146 if (/\breceive\s+an\s+admission\s+decision\s+within\b/.test(combined)) {
147 return null;
148 }
149 // Exclude "Priority Student" spam that asks to submit application
150 if (/\bpriority\s+student\b.*\bsubmit.*application\b|\bsubmit.*\bpriority\s+student\s+application\b/.test(combined)) {
151 return null;
152 }
153 // Exclude if asking to submit ANY application (not accepted yet)
154 if (/\bsubmit\s+(your\s+)?(the\s+)?application\b/.test(combined)) {
155 return null;
156 }
157 // Exclude "once you are accepted" - means they're not accepted yet
158 if (/\bonce\s+you\s+(are|have\s+been)\s+accepted\b/.test(combined)) {
159 return null;
160 }
161 // Exclude "reserve your spot" for events/webinars (not enrollment)
162 if (/\breserve\s+your\s+spot\b/.test(combined) && /\b(virtual|webinar|event|program|zoom|session)\b/.test(combined)) {
163 return null;
164 }
165 // Exclude "top candidate" spam asking to apply/start application
166 if (/\btop\s+candidate\b.*\b(apply|start.*application|submit.*application)\b/.test(combined)) {
167 return null;
168 }
169 if (/\binvite\s+you\s+to\s+apply\b/.test(combined)) {
170 return null;
171 }
172 // Exclude application deadline marketing (Early Decision/Action, priority deadlines, etc.)
173 if (/\b(early\s+(decision|action)|priority)\b.*\b(deadline|apply|application)\b.*\b(approaching|by|extended)\b/.test(combined)) {
174 return null;
175 }
176 if (/\bapply\s+(by|now|right\s+away|today)\b|\bdeadline.*\b(december|january|february|march)\b/.test(combined)) {
177 return null;
178 }
179 // Exclude "Panther Priority Application" and similar marketing
180 if (/\bpanther\s+priority\s+application\b|\bpriority\s+application\b/.test(combined)) {
181 return null;
182 }
183 // Exclude "deadline details" marketing spam
184 if (/\bdeadline\s+details\b|\byour\s+deadline\b/.test(combined)) {
185 return null;
186 }
187 // Exclude "application deadline will be" (future deadline announcements)
188 if (/\bapplication\s+deadline\s+will\s+be\b/.test(combined)) {
189 return null;
190 }
191 // Exclude "flip these pages" and similar exploratory marketing
192 if (/\bflip\s+these\s+pages\b|\blearn\s+more\s+about\s+being\b/.test(combined)) {
193 return null;
194 }
195 // Exclude "want to make sure you're ready" deadline pressure
196 if (/\b(want|wanted)\s+to\s+make\s+sure\s+you'?re\s+ready\b/.test(combined)) {
197 return null;
198 }
199 // Exclude "we're interested in you" with apply language
200 if (/\bwe'?re\s+interested\s+in\s+you\b/.test(combined) && /\bapply\b/.test(combined)) {
201 return null;
202 }
203 // Exclude "you have until midnight/tonight to apply" deadline pressure
204 if (/\byou\s+have\s+until\b.*\b(midnight|tonight|today)\b.*\bto\s+apply\b/.test(combined)) {
205 return null;
206 }
207 return {
208 pertains: true,
209 reason: "Accepted student portal/deposit information",
210 confidence: 0.95,
211 matched_rules: ["accepted_student"]
212 };
213 }
214 }
215
216 return null;
217 }
218
219 private checkDualEnrollment(subject: string, body: string, combined: string, from: string): ClassificationResult | null {
220 // Check for dual enrollment patterns
221 const dualEnrollmentIndicators = [
222 /\bdual\s+enrollment\b/,
223 /\bcourse\s+(registration|deletion|added|dropped)\b/,
224 /\bspring\s+\d{4}\s+(course|on[- ]campus)\b/,
225 /\bhow\s+to\s+register\b.*\b(course|class)/,
226 /\bcedarville\s+university\).*\b(course|registration)\b/,
227 ];
228
229 for (const pattern of dualEnrollmentIndicators) {
230 if (pattern.test(combined)) {
231 // Dual enrollment is relevant if it's about actual courses, not marketing
232 if (/\blearn\s+more\s+about\b|\binterested\s+in\b|\bconsider\s+joining\b/.test(combined)) {
233 return null; // Just marketing
234 }
235 return {
236 pertains: true,
237 reason: "Dual enrollment course information",
238 confidence: 0.9,
239 matched_rules: ["dual_enrollment"]
240 };
241 }
242 }
243
244 return null;
245 }
246
247 private checkScholarship(subject: string, body: string, combined: string): ClassificationResult | null {
248 // Check for specific scholarship application opportunities FIRST (for accepted/enrolled students)
249 // This is different from general "apply for scholarships" marketing
250 if (/\bapply\s+for\s+(the\s+)?.*\bscholarship\b/.test(subject)) {
251 // Check if it's specific (President's, Ministry, named scholarships)
252 if (/\bpresident'?s\b|\bministry\b|\bimpact\b/.test(combined)) {
253 return {
254 pertains: true,
255 reason: "Scholarship application opportunity for accepted student",
256 confidence: 0.75,
257 matched_rules: ["scholarship_application_opportunity"]
258 };
259 }
260 }
261
262 // Negative indicators: not actually awarded - check these before awarded patterns
263 const notAwardedPatterns = [
264 /\bscholarship\b.*\b(held|reserved)\s+for\s+you\b/,
265 /\b(held|reserved)\s+for\s+you\b/,
266 /\bconsider(ed|ation)\b.*\bscholarship\b/,
267 /\bscholarship\b.*\bconsider(ed|ation)\b/,
268 /\beligible\s+for\b.*\bscholarship\b/,
269 /\bscholarship\b.*\beligible\b/,
270 /\bmay\s+qualify\b.*\bscholarship\b/,
271 /\bguaranteed\s+admission\b/,
272 /\bpriority\s+consideration\b/,
273 // Scholarship events/days (attend to get scholarship = not awarded)
274 /\b(attend|register\s+for).*\bscholarship\s+(day|event|award\s+event)\b/,
275 /\bscholarship\s+(day|event).*\b(attend|register)\b/,
276 /\bsoar\s+(scholarship\s+award\s+)?event\b/,
277 // Direct admission/scholarship forms to submit (not awarded yet)
278 /\bdirect\s+admission\b.*\bscholarship\s+form\b/,
279 /\bscholarship\s+form\b.*\bdirect\s+admission\b/,
280 /\bsubmit\s+(your\s+)?.*\bscholarship\s+form\b/,
281 // "Want to make sure you're ready" deadline pressure with scholarship mention
282 /\b(want|wanted)\s+to\s+make\s+sure\s+you'?re\s+ready\b.*\bscholarship\b/,
283 // Scholarship estimate (not actual award)
284 /\bscholarship\s+estimate\b/,
285 /\byou\s+have\s+not\s+(yet\s+)?seen\s+your.*\bscholarship\b/,
286 /\bacademic\s+scholarship\s+estimate\b/,
287 ];
288
289 // Check if scholarship is mentioned but not awarded
290 const hasScholarshipMention = /\bscholarship\b/.test(combined);
291 if (hasScholarshipMention) {
292 for (const pattern of notAwardedPatterns) {
293 if (pattern.test(combined)) {
294 return {
295 pertains: false,
296 reason: "Scholarship mentioned but not actually awarded (held/eligible/apply)",
297 confidence: 0.9,
298 matched_rules: ["scholarship_not_awarded"]
299 };
300 }
301 }
302 }
303
304 // Positive indicators: actually awarded
305 const awardedPatterns = [
306 /\bcongratulations\b.*\bscholarship\b/,
307 /\byou\s+(have|received|are\s+awarded|won)\b.*\bscholarship\b/,
308 /\bwe\s+(are\s+)?(pleased\s+to\s+)?award(ing)?\b.*\bscholarship\b/,
309 /\bscholarship\s+(offer|award)\b/,
310 /\breceived\s+a\s+scholarship\b/,
311 ];
312
313 for (const pattern of awardedPatterns) {
314 if (pattern.test(combined)) {
315 return {
316 pertains: true,
317 reason: "Scholarship actually awarded",
318 confidence: 0.95,
319 matched_rules: ["scholarship_awarded"]
320 };
321 }
322 }
323
324 return null;
325 }
326
327 private checkFinancialAid(subject: string, body: string, combined: string): ClassificationResult | null {
328 // Positive: aid is ready
329 const readyPatterns = [
330 /\bfinancial\s+aid\b.*\boffer\b.*\b(ready|available)\b/,
331 /\b(ready|available)\b.*\bfinancial\s+aid\b.*\boffer\b/,
332 /\baward\s+letter\b.*\b(ready|available|posted|view)\b/,
333 /\b(view|review)\s+(your\s+)?award\s+letter\b/,
334 /\bfinancial\s+aid\s+package\b.*\b(ready|available|posted)\b/,
335 /\byour\s+aid\s+is\s+ready\b/,
336 ];
337
338 // Negative: aid applications, FAFSA reminders
339 const notReadyPatterns = [
340 /\blearn\s+more\s+about\b.*\bfinancial\s+aid\b/,
341 /\bapply\b.*\b(for\s+)?financial\s+aid\b/,
342 /\bfinancial\s+aid\b.*\bapplication\b/,
343 /\bcomplete\s+(your\s+)?fafsa\b/,
344 /\bconsidered\s+for\b.*\baid\b/,
345 /\bpriority\s+(deadline|consideration)\b.*\bfinancial\s+aid\b/,
346 ];
347
348 for (const pattern of readyPatterns) {
349 if (pattern.test(combined)) {
350 // Check for negative indicators
351 for (const negPattern of notReadyPatterns) {
352 if (negPattern.test(combined)) {
353 return null; // Just application info
354 }
355 }
356 return {
357 pertains: true,
358 reason: "Financial aid offer ready to review",
359 confidence: 0.95,
360 matched_rules: ["financial_aid_ready"]
361 };
362 }
363 }
364
365 return null;
366 }
367
368 private checkIrrelevant(subject: string, body: string, combined: string, from: string): ClassificationResult | null {
369 // Strong indicators of marketing/spam
370 const irrelevantPatterns = [
371 // Newsletter/blog content
372 /\bstudent\s+life\s+blog\b/,
373 /\b(student\s+life\s+)?blog\s+(post|update)\b/,
374 /\bnew\s+student\s+life\s+blog\b/,
375 /\bnewsletter\b/,
376 /\bweekly\s+(digest|update)\b/,
377
378 // Marketing events
379 /\bupcoming\s+events\b/,
380 /\bjoin\s+us\s+(for|at|on\s+zoom)\b/,
381 /\bopen\s+house\b/,
382 /\bvirtual\s+tour\b/,
383 /\bcampus\s+(visit|tour|event)\b/,
384 /\bmeet\s+(the|our)\s+(students|faculty)\b/,
385
386 // Generic outreach (not applied yet)
387 /\bhaven'?t\s+applied.*yet\b/,
388 /\bstill\s+time\s+to\s+apply\b/,
389 /\bhow\s+is\s+your\s+college\s+search\b/,
390 /\bstart\s+(your\s+)?college\s+search\b/,
391 /\bexplore\s+(our\s+)?(programs|campus)\b/,
392
393 // Unsolicited outreach patterns
394 /\bi\s+hope\s+you\s+have\s+been\s+receiving\s+my\s+emails\b/,
395 /\bam\s+i\s+reaching\b/,
396 /\byou\s+are\s+on\s+.*\s+(radar|list)\b/,
397 /\byou'?re\s+on\s+(our|my)\s+radar\b/,
398 /\bi\s+want\s+to\s+make\s+sure\s+you\s+know\b/,
399 /\byou'?re\s+invited\s+to\s+submit\b/,
400 /\bi'?m\s+eager\s+to\s+consider\s+you\b/,
401 /\bsubmit\s+your\s+.*\s+application\b/,
402 /\bpriority\s+status\b.*\bsubmit.*application\b/,
403 /\btop\s+candidate\b.*\binvite\s+you\s+to\s+apply\b/,
404 /\binvite\s+you\s+to\s+apply\b/,
405
406 // Priority deadline extensions (spam)
407 /\bextended.*\bpriority\s+deadline\b/,
408 /\bpriority\s+deadline.*\bextended\b/,
409
410 // Summer camps/programs
411 /\bsummer\s+(academy|camp|program)\b/,
412 /\bsave\s+the\s+date\b/,
413
414 // Ugly sweaters and other fluff
415 /\bugly\s+sweater\b/,
416 /\bit'?s\s+.+\s+season\b/,
417
418 // FAFSA/scholarship info sessions (not actual aid offers)
419 /\bjoin\s+us.*\b(virtual\s+program|zoom)\b.*\b(scholarship|financial\s+aid)\b/,
420 /\blearn\s+more\b.*\b(scholarship|financial\s+aid)\s+(opportunities|options)\b/,
421 /\b(scholarship|financial\s+aid)\s+(opportunities|options)\b.*\blearn\s+more\b/,
422 ];
423
424 for (const pattern of irrelevantPatterns) {
425 if (pattern.test(combined)) {
426 return {
427 pertains: false,
428 reason: "Marketing/newsletter/unsolicited outreach",
429 confidence: 0.95,
430 matched_rules: ["irrelevant_marketing"]
431 };
432 }
433 }
434
435 // Haven't applied yet = not relevant
436 if (/\bhaven'?t\s+applied\b/.test(combined)) {
437 return {
438 pertains: false,
439 reason: "Unsolicited email where student has not applied",
440 confidence: 0.95,
441 matched_rules: ["not_applied"]
442 };
443 }
444
445 return null;
446 }
447}
448
449// Convenience function
450export function classifyEmail(email: EmailInput): ClassificationResult {
451 const classifier = new EmailClassifier();
452 return classifier.classify(email);
453}