this repo has no description
at main 18 kB view raw
1// Email classifier using rule-based approach learned from labeled data 2 3import type { EmailInput, ClassificationResult } from "./types.ts"; 4 5export class EmailClassifier { 6 classify(email: EmailInput): ClassificationResult { 7 // Defensive checks for Apps Script environment 8 if (!email || typeof email !== 'object') { 9 return { 10 pertains: false, 11 reason: "Invalid email object", 12 confidence: 0.0, 13 matched_rules: ["invalid_input"] 14 }; 15 } 16 17 const subject = (email.subject || '').toLowerCase(); 18 const body = (email.body || '').toLowerCase(); 19 const from = (email.from || '').toLowerCase(); 20 const combined = `${subject} ${body}`; 21 22 // CRITICAL RULES: Always relevant (security, passwords, account issues) 23 const securityResult = this.checkSecurity(subject, body, combined); 24 if (securityResult) return securityResult; 25 26 // RESPONSE TO STUDENT ACTION: Application confirmations, enrollment confirmations 27 const actionResult = this.checkStudentAction(subject, body, combined); 28 if (actionResult) return actionResult; 29 30 // ACCEPTED STUDENT: Portal access, deposit reminders, accepted student info 31 const acceptedResult = this.checkAccepted(subject, body, combined); 32 if (acceptedResult) return acceptedResult; 33 34 // DUAL ENROLLMENT: Course registration, schedules, specific to enrolled students 35 const dualEnrollmentResult = this.checkDualEnrollment(subject, body, combined, from); 36 if (dualEnrollmentResult) return dualEnrollmentResult; 37 38 // SCHOLARSHIP AWARDED: Actually awarded/received (not eligible/apply/consideration) 39 const scholarshipResult = this.checkScholarship(subject, body, combined); 40 if (scholarshipResult) return scholarshipResult; 41 42 // FINANCIAL AID READY: Explicit offers ready to review (not applications) 43 const aidResult = this.checkFinancialAid(subject, body, combined); 44 if (aidResult) return aidResult; 45 46 // DEFINITELY NOT RELEVANT: Marketing, newsletters, unsolicited outreach 47 const irrelevantResult = this.checkIrrelevant(subject, body, combined, from); 48 if (irrelevantResult) return irrelevantResult; 49 50 // DEFAULT: If uncertain, mark as not relevant (fail-safe for spam) 51 return { 52 pertains: false, 53 reason: "No clear relevance indicators found", 54 confidence: 0.3, 55 matched_rules: ["default_not_relevant"] 56 }; 57 } 58 59 private checkSecurity(subject: string, body: string, combined: string): ClassificationResult | null { 60 const patterns = [ 61 /\bpassword\s+(reset|change|update|expired)\b/, 62 /\breset\s+your\s+password\b/, 63 /\baccount\s+security\b/, 64 /\bsecurity\s+alert\b/, 65 /\bunusual\s+(sign[- ]?in|activity)\b/, 66 /\bverification\s+code\b/, 67 /\b(2fa|mfa|two[- ]factor)\b/, 68 /\bcompromised\s+account\b/, 69 /\baccount\s+(locked|suspended)\b/, 70 /\bsuspicious\s+activity\b/, 71 ]; 72 73 for (const pattern of patterns) { 74 if (pattern.test(combined)) { 75 // Make sure it's not just marketing mentioning "saving" (false positive on "$36,645 on tuition") 76 // Real security alerts won't talk about tuition savings 77 if (/\bsaving.*\bon\s+tuition\b|\btuition.*\bsaving\b/.test(combined)) { 78 return null; // Just marketing 79 } 80 return { 81 pertains: true, 82 reason: "Security/password alert - always important", 83 confidence: 1.0, 84 matched_rules: ["security_alert"] 85 }; 86 } 87 } 88 89 return null; 90 } 91 92 private checkStudentAction(subject: string, body: string, combined: string): ClassificationResult | null { 93 const patterns = [ 94 /\bapplication\s+(received|complete|submitted|confirmation)\b/, 95 /\breceived\s+your\s+application\b/, 96 /\bthank\s+you\s+for\s+(applying|submitting)\b/, 97 /\benrollment\s+confirmation\b/, 98 /\bconfirmation\s+(of|for)\s+(your\s+)?(application|enrollment)\b/, 99 /\byour\s+application\s+(has\s+been|is)\s+(received|complete)\b/, 100 ]; 101 102 for (const pattern of patterns) { 103 if (pattern.test(combined)) { 104 // But exclude if it's just marketing about "how to apply" 105 if (/\bhow\s+to\s+apply\b|\bapply\s+now\b|\bstart\s+(your\s+)?application\b/.test(combined)) { 106 return null; 107 } 108 return { 109 pertains: true, 110 reason: "Confirmation of student action (application/enrollment)", 111 confidence: 0.95, 112 matched_rules: ["student_action_confirmation"] 113 }; 114 } 115 } 116 117 return null; 118 } 119 120 private checkAccepted(subject: string, body: string, combined: string): ClassificationResult | null { 121 const patterns = [ 122 /\baccepted\s+(student\s+)?portal\b/, 123 /\byour\s+(personalized\s+)?accepted\s+portal\b/, 124 /\bdeposit\s+(today|now|by|to\s+reserve)\b/, 125 /\breserve\s+your\s+(place|spot)\b/, 126 /\bcongratulations.*\baccepted\b/, 127 /\byou\s+(have\s+been|are|were)\s+accepted\b/, 128 /\badmission\s+(decision|offer)\b/, 129 /\benroll(ment)?\s+deposit\b/, 130 ]; 131 132 for (const pattern of patterns) { 133 if (pattern.test(combined)) { 134 // Exclude pre-admission and marketing 135 if (/\bacceptance\s+rate\b|\bhigh\s+acceptance\b|\bpre[- ]admit(ted)?\b|\bautomatic\s+admission\b/.test(combined)) { 136 return null; 137 } 138 // Exclude "direct admit/admission" marketing that asks to complete profile 139 if (/\bdirect\s+(admit(ted)?|admission)\b.*\b(complete|submit).*\bprofile\b|\b(complete|submit).*\bprofile\b.*\bdirect\s+(admit(ted)?|admission)\b/.test(combined)) { 140 return null; 141 } 142 // Exclude marketing about future admission decisions 143 if (/\byou\s+will\s+(also\s+)?receive\s+(an?\s+)?(accelerated\s+)?admission\s+decision\b/.test(combined)) { 144 return null; 145 } 146 if (/\breceive\s+an\s+admission\s+decision\s+within\b/.test(combined)) { 147 return null; 148 } 149 // Exclude "Priority Student" spam that asks to submit application 150 if (/\bpriority\s+student\b.*\bsubmit.*application\b|\bsubmit.*\bpriority\s+student\s+application\b/.test(combined)) { 151 return null; 152 } 153 // Exclude if asking to submit ANY application (not accepted yet) 154 if (/\bsubmit\s+(your\s+)?(the\s+)?application\b/.test(combined)) { 155 return null; 156 } 157 // Exclude "once you are accepted" - means they're not accepted yet 158 if (/\bonce\s+you\s+(are|have\s+been)\s+accepted\b/.test(combined)) { 159 return null; 160 } 161 // Exclude "reserve your spot" for events/webinars (not enrollment) 162 if (/\breserve\s+your\s+spot\b/.test(combined) && /\b(virtual|webinar|event|program|zoom|session)\b/.test(combined)) { 163 return null; 164 } 165 // Exclude "top candidate" spam asking to apply/start application 166 if (/\btop\s+candidate\b.*\b(apply|start.*application|submit.*application)\b/.test(combined)) { 167 return null; 168 } 169 if (/\binvite\s+you\s+to\s+apply\b/.test(combined)) { 170 return null; 171 } 172 // Exclude application deadline marketing (Early Decision/Action, priority deadlines, etc.) 173 if (/\b(early\s+(decision|action)|priority)\b.*\b(deadline|apply|application)\b.*\b(approaching|by|extended)\b/.test(combined)) { 174 return null; 175 } 176 if (/\bapply\s+(by|now|right\s+away|today)\b|\bdeadline.*\b(december|january|february|march)\b/.test(combined)) { 177 return null; 178 } 179 // Exclude "Panther Priority Application" and similar marketing 180 if (/\bpanther\s+priority\s+application\b|\bpriority\s+application\b/.test(combined)) { 181 return null; 182 } 183 // Exclude "deadline details" marketing spam 184 if (/\bdeadline\s+details\b|\byour\s+deadline\b/.test(combined)) { 185 return null; 186 } 187 // Exclude "application deadline will be" (future deadline announcements) 188 if (/\bapplication\s+deadline\s+will\s+be\b/.test(combined)) { 189 return null; 190 } 191 // Exclude "flip these pages" and similar exploratory marketing 192 if (/\bflip\s+these\s+pages\b|\blearn\s+more\s+about\s+being\b/.test(combined)) { 193 return null; 194 } 195 // Exclude "want to make sure you're ready" deadline pressure 196 if (/\b(want|wanted)\s+to\s+make\s+sure\s+you'?re\s+ready\b/.test(combined)) { 197 return null; 198 } 199 // Exclude "we're interested in you" with apply language 200 if (/\bwe'?re\s+interested\s+in\s+you\b/.test(combined) && /\bapply\b/.test(combined)) { 201 return null; 202 } 203 // Exclude "you have until midnight/tonight to apply" deadline pressure 204 if (/\byou\s+have\s+until\b.*\b(midnight|tonight|today)\b.*\bto\s+apply\b/.test(combined)) { 205 return null; 206 } 207 return { 208 pertains: true, 209 reason: "Accepted student portal/deposit information", 210 confidence: 0.95, 211 matched_rules: ["accepted_student"] 212 }; 213 } 214 } 215 216 return null; 217 } 218 219 private checkDualEnrollment(subject: string, body: string, combined: string, from: string): ClassificationResult | null { 220 // Check for dual enrollment patterns 221 const dualEnrollmentIndicators = [ 222 /\bdual\s+enrollment\b/, 223 /\bcourse\s+(registration|deletion|added|dropped)\b/, 224 /\bspring\s+\d{4}\s+(course|on[- ]campus)\b/, 225 /\bhow\s+to\s+register\b.*\b(course|class)/, 226 /\bcedarville\s+university\).*\b(course|registration)\b/, 227 ]; 228 229 for (const pattern of dualEnrollmentIndicators) { 230 if (pattern.test(combined)) { 231 // Dual enrollment is relevant if it's about actual courses, not marketing 232 if (/\blearn\s+more\s+about\b|\binterested\s+in\b|\bconsider\s+joining\b/.test(combined)) { 233 return null; // Just marketing 234 } 235 return { 236 pertains: true, 237 reason: "Dual enrollment course information", 238 confidence: 0.9, 239 matched_rules: ["dual_enrollment"] 240 }; 241 } 242 } 243 244 return null; 245 } 246 247 private checkScholarship(subject: string, body: string, combined: string): ClassificationResult | null { 248 // Check for specific scholarship application opportunities FIRST (for accepted/enrolled students) 249 // This is different from general "apply for scholarships" marketing 250 if (/\bapply\s+for\s+(the\s+)?.*\bscholarship\b/.test(subject)) { 251 // Check if it's specific (President's, Ministry, named scholarships) 252 if (/\bpresident'?s\b|\bministry\b|\bimpact\b/.test(combined)) { 253 return { 254 pertains: true, 255 reason: "Scholarship application opportunity for accepted student", 256 confidence: 0.75, 257 matched_rules: ["scholarship_application_opportunity"] 258 }; 259 } 260 } 261 262 // Negative indicators: not actually awarded - check these before awarded patterns 263 const notAwardedPatterns = [ 264 /\bscholarship\b.*\b(held|reserved)\s+for\s+you\b/, 265 /\b(held|reserved)\s+for\s+you\b/, 266 /\bconsider(ed|ation)\b.*\bscholarship\b/, 267 /\bscholarship\b.*\bconsider(ed|ation)\b/, 268 /\beligible\s+for\b.*\bscholarship\b/, 269 /\bscholarship\b.*\beligible\b/, 270 /\bmay\s+qualify\b.*\bscholarship\b/, 271 /\bguaranteed\s+admission\b/, 272 /\bpriority\s+consideration\b/, 273 // Scholarship events/days (attend to get scholarship = not awarded) 274 /\b(attend|register\s+for).*\bscholarship\s+(day|event|award\s+event)\b/, 275 /\bscholarship\s+(day|event).*\b(attend|register)\b/, 276 /\bsoar\s+(scholarship\s+award\s+)?event\b/, 277 // Direct admission/scholarship forms to submit (not awarded yet) 278 /\bdirect\s+admission\b.*\bscholarship\s+form\b/, 279 /\bscholarship\s+form\b.*\bdirect\s+admission\b/, 280 /\bsubmit\s+(your\s+)?.*\bscholarship\s+form\b/, 281 // "Want to make sure you're ready" deadline pressure with scholarship mention 282 /\b(want|wanted)\s+to\s+make\s+sure\s+you'?re\s+ready\b.*\bscholarship\b/, 283 // Scholarship estimate (not actual award) 284 /\bscholarship\s+estimate\b/, 285 /\byou\s+have\s+not\s+(yet\s+)?seen\s+your.*\bscholarship\b/, 286 /\bacademic\s+scholarship\s+estimate\b/, 287 ]; 288 289 // Check if scholarship is mentioned but not awarded 290 const hasScholarshipMention = /\bscholarship\b/.test(combined); 291 if (hasScholarshipMention) { 292 for (const pattern of notAwardedPatterns) { 293 if (pattern.test(combined)) { 294 return { 295 pertains: false, 296 reason: "Scholarship mentioned but not actually awarded (held/eligible/apply)", 297 confidence: 0.9, 298 matched_rules: ["scholarship_not_awarded"] 299 }; 300 } 301 } 302 } 303 304 // Positive indicators: actually awarded 305 const awardedPatterns = [ 306 /\bcongratulations\b.*\bscholarship\b/, 307 /\byou\s+(have|received|are\s+awarded|won)\b.*\bscholarship\b/, 308 /\bwe\s+(are\s+)?(pleased\s+to\s+)?award(ing)?\b.*\bscholarship\b/, 309 /\bscholarship\s+(offer|award)\b/, 310 /\breceived\s+a\s+scholarship\b/, 311 ]; 312 313 for (const pattern of awardedPatterns) { 314 if (pattern.test(combined)) { 315 return { 316 pertains: true, 317 reason: "Scholarship actually awarded", 318 confidence: 0.95, 319 matched_rules: ["scholarship_awarded"] 320 }; 321 } 322 } 323 324 return null; 325 } 326 327 private checkFinancialAid(subject: string, body: string, combined: string): ClassificationResult | null { 328 // Positive: aid is ready 329 const readyPatterns = [ 330 /\bfinancial\s+aid\b.*\boffer\b.*\b(ready|available)\b/, 331 /\b(ready|available)\b.*\bfinancial\s+aid\b.*\boffer\b/, 332 /\baward\s+letter\b.*\b(ready|available|posted|view)\b/, 333 /\b(view|review)\s+(your\s+)?award\s+letter\b/, 334 /\bfinancial\s+aid\s+package\b.*\b(ready|available|posted)\b/, 335 /\byour\s+aid\s+is\s+ready\b/, 336 ]; 337 338 // Negative: aid applications, FAFSA reminders 339 const notReadyPatterns = [ 340 /\blearn\s+more\s+about\b.*\bfinancial\s+aid\b/, 341 /\bapply\b.*\b(for\s+)?financial\s+aid\b/, 342 /\bfinancial\s+aid\b.*\bapplication\b/, 343 /\bcomplete\s+(your\s+)?fafsa\b/, 344 /\bconsidered\s+for\b.*\baid\b/, 345 /\bpriority\s+(deadline|consideration)\b.*\bfinancial\s+aid\b/, 346 ]; 347 348 for (const pattern of readyPatterns) { 349 if (pattern.test(combined)) { 350 // Check for negative indicators 351 for (const negPattern of notReadyPatterns) { 352 if (negPattern.test(combined)) { 353 return null; // Just application info 354 } 355 } 356 return { 357 pertains: true, 358 reason: "Financial aid offer ready to review", 359 confidence: 0.95, 360 matched_rules: ["financial_aid_ready"] 361 }; 362 } 363 } 364 365 return null; 366 } 367 368 private checkIrrelevant(subject: string, body: string, combined: string, from: string): ClassificationResult | null { 369 // Strong indicators of marketing/spam 370 const irrelevantPatterns = [ 371 // Newsletter/blog content 372 /\bstudent\s+life\s+blog\b/, 373 /\b(student\s+life\s+)?blog\s+(post|update)\b/, 374 /\bnew\s+student\s+life\s+blog\b/, 375 /\bnewsletter\b/, 376 /\bweekly\s+(digest|update)\b/, 377 378 // Marketing events 379 /\bupcoming\s+events\b/, 380 /\bjoin\s+us\s+(for|at|on\s+zoom)\b/, 381 /\bopen\s+house\b/, 382 /\bvirtual\s+tour\b/, 383 /\bcampus\s+(visit|tour|event)\b/, 384 /\bmeet\s+(the|our)\s+(students|faculty)\b/, 385 386 // Generic outreach (not applied yet) 387 /\bhaven'?t\s+applied.*yet\b/, 388 /\bstill\s+time\s+to\s+apply\b/, 389 /\bhow\s+is\s+your\s+college\s+search\b/, 390 /\bstart\s+(your\s+)?college\s+search\b/, 391 /\bexplore\s+(our\s+)?(programs|campus)\b/, 392 393 // Unsolicited outreach patterns 394 /\bi\s+hope\s+you\s+have\s+been\s+receiving\s+my\s+emails\b/, 395 /\bam\s+i\s+reaching\b/, 396 /\byou\s+are\s+on\s+.*\s+(radar|list)\b/, 397 /\byou'?re\s+on\s+(our|my)\s+radar\b/, 398 /\bi\s+want\s+to\s+make\s+sure\s+you\s+know\b/, 399 /\byou'?re\s+invited\s+to\s+submit\b/, 400 /\bi'?m\s+eager\s+to\s+consider\s+you\b/, 401 /\bsubmit\s+your\s+.*\s+application\b/, 402 /\bpriority\s+status\b.*\bsubmit.*application\b/, 403 /\btop\s+candidate\b.*\binvite\s+you\s+to\s+apply\b/, 404 /\binvite\s+you\s+to\s+apply\b/, 405 406 // Priority deadline extensions (spam) 407 /\bextended.*\bpriority\s+deadline\b/, 408 /\bpriority\s+deadline.*\bextended\b/, 409 410 // Summer camps/programs 411 /\bsummer\s+(academy|camp|program)\b/, 412 /\bsave\s+the\s+date\b/, 413 414 // Ugly sweaters and other fluff 415 /\bugly\s+sweater\b/, 416 /\bit'?s\s+.+\s+season\b/, 417 418 // FAFSA/scholarship info sessions (not actual aid offers) 419 /\bjoin\s+us.*\b(virtual\s+program|zoom)\b.*\b(scholarship|financial\s+aid)\b/, 420 /\blearn\s+more\b.*\b(scholarship|financial\s+aid)\s+(opportunities|options)\b/, 421 /\b(scholarship|financial\s+aid)\s+(opportunities|options)\b.*\blearn\s+more\b/, 422 ]; 423 424 for (const pattern of irrelevantPatterns) { 425 if (pattern.test(combined)) { 426 return { 427 pertains: false, 428 reason: "Marketing/newsletter/unsolicited outreach", 429 confidence: 0.95, 430 matched_rules: ["irrelevant_marketing"] 431 }; 432 } 433 } 434 435 // Haven't applied yet = not relevant 436 if (/\bhaven'?t\s+applied\b/.test(combined)) { 437 return { 438 pertains: false, 439 reason: "Unsolicited email where student has not applied", 440 confidence: 0.95, 441 matched_rules: ["not_applied"] 442 }; 443 } 444 445 return null; 446 } 447} 448 449// Convenience function 450export function classifyEmail(email: EmailInput): ClassificationResult { 451 const classifier = new EmailClassifier(); 452 return classifier.classify(email); 453}