this repo has no description
1// Email classifier using rule-based approach learned from labeled data 2 3import type { EmailInput, ClassificationResult } from "./types.ts"; 4 5export class EmailClassifier { 6 classify(email: EmailInput): ClassificationResult { 7 const subject = email.subject.toLowerCase(); 8 const body = email.body.toLowerCase(); 9 const from = email.from.toLowerCase(); 10 const combined = `${subject} ${body}`; 11 12 // CRITICAL RULES: Always relevant (security, passwords, account issues) 13 const securityResult = this.checkSecurity(subject, body, combined); 14 if (securityResult) return securityResult; 15 16 // RESPONSE TO STUDENT ACTION: Application confirmations, enrollment confirmations 17 const actionResult = this.checkStudentAction(subject, body, combined); 18 if (actionResult) return actionResult; 19 20 // ACCEPTED STUDENT: Portal access, deposit reminders, accepted student info 21 const acceptedResult = this.checkAccepted(subject, body, combined); 22 if (acceptedResult) return acceptedResult; 23 24 // DUAL ENROLLMENT: Course registration, schedules, specific to enrolled students 25 const dualEnrollmentResult = this.checkDualEnrollment(subject, body, combined, from); 26 if (dualEnrollmentResult) return dualEnrollmentResult; 27 28 // SCHOLARSHIP AWARDED: Actually awarded/received (not eligible/apply/consideration) 29 const scholarshipResult = this.checkScholarship(subject, body, combined); 30 if (scholarshipResult) return scholarshipResult; 31 32 // FINANCIAL AID READY: Explicit offers ready to review (not applications) 33 const aidResult = this.checkFinancialAid(subject, body, combined); 34 if (aidResult) return aidResult; 35 36 // DEFINITELY NOT RELEVANT: Marketing, newsletters, unsolicited outreach 37 const irrelevantResult = this.checkIrrelevant(subject, body, combined, from); 38 if (irrelevantResult) return irrelevantResult; 39 40 // DEFAULT: If uncertain, mark as not relevant (fail-safe for spam) 41 return { 42 pertains: false, 43 reason: "No clear relevance indicators found", 44 confidence: 0.3, 45 matched_rules: ["default_not_relevant"] 46 }; 47 } 48 49 private checkSecurity(subject: string, body: string, combined: string): ClassificationResult | null { 50 const patterns = [ 51 /\bpassword\s+(reset|change|update|expired)\b/, 52 /\breset\s+your\s+password\b/, 53 /\baccount\s+security\b/, 54 /\bsecurity\s+alert\b/, 55 /\bunusual\s+(sign[- ]?in|activity)\b/, 56 /\bverification\s+code\b/, 57 /\b(2fa|mfa|two[- ]factor)\b/, 58 /\bcompromised\s+account\b/, 59 /\baccount\s+(locked|suspended)\b/, 60 /\bsuspicious\s+activity\b/, 61 ]; 62 63 for (const pattern of patterns) { 64 if (pattern.test(combined)) { 65 // Make sure it's not just marketing mentioning "saving" (false positive on "$36,645 on tuition") 66 // Real security alerts won't talk about tuition savings 67 if (/\bsaving.*\bon\s+tuition\b|\btuition.*\bsaving\b/.test(combined)) { 68 return null; // Just marketing 69 } 70 return { 71 pertains: true, 72 reason: "Security/password alert - always important", 73 confidence: 1.0, 74 matched_rules: ["security_alert"] 75 }; 76 } 77 } 78 79 return null; 80 } 81 82 private checkStudentAction(subject: string, body: string, combined: string): ClassificationResult | null { 83 const patterns = [ 84 /\bapplication\s+(received|complete|submitted|confirmation)\b/, 85 /\breceived\s+your\s+application\b/, 86 /\bthank\s+you\s+for\s+(applying|submitting)\b/, 87 /\benrollment\s+confirmation\b/, 88 /\bconfirmation\s+(of|for)\s+(your\s+)?(application|enrollment)\b/, 89 /\byour\s+application\s+(has\s+been|is)\s+(received|complete)\b/, 90 ]; 91 92 for (const pattern of patterns) { 93 if (pattern.test(combined)) { 94 // But exclude if it's just marketing about "how to apply" 95 if (/\bhow\s+to\s+apply\b|\bapply\s+now\b|\bstart\s+(your\s+)?application\b/.test(combined)) { 96 return null; 97 } 98 return { 99 pertains: true, 100 reason: "Confirmation of student action (application/enrollment)", 101 confidence: 0.95, 102 matched_rules: ["student_action_confirmation"] 103 }; 104 } 105 } 106 107 return null; 108 } 109 110 private checkAccepted(subject: string, body: string, combined: string): ClassificationResult | null { 111 const patterns = [ 112 /\baccepted\s+(student\s+)?portal\b/, 113 /\byour\s+(personalized\s+)?accepted\s+portal\b/, 114 /\bdeposit\s+(today|now|by|to\s+reserve)\b/, 115 /\breserve\s+your\s+(place|spot)\b/, 116 /\bcongratulations.*\baccepted\b/, 117 /\byou\s+(have\s+been|are|were)\s+accepted\b/, 118 /\badmission\s+(decision|offer)\b/, 119 /\benroll(ment)?\s+deposit\b/, 120 ]; 121 122 for (const pattern of patterns) { 123 if (pattern.test(combined)) { 124 // Exclude pre-admission and marketing 125 if (/\bacceptance\s+rate\b|\bhigh\s+acceptance\b|\bpre[- ]admit(ted)?\b|\bautomatic\s+admission\b/.test(combined)) { 126 return null; 127 } 128 // Exclude marketing about future admission decisions 129 if (/\byou\s+will\s+(also\s+)?receive\s+(an?\s+)?(accelerated\s+)?admission\s+decision\b/.test(combined)) { 130 return null; 131 } 132 if (/\breceive\s+an\s+admission\s+decision\s+within\b/.test(combined)) { 133 return null; 134 } 135 return { 136 pertains: true, 137 reason: "Accepted student portal/deposit information", 138 confidence: 0.95, 139 matched_rules: ["accepted_student"] 140 }; 141 } 142 } 143 144 return null; 145 } 146 147 private checkDualEnrollment(subject: string, body: string, combined: string, from: string): ClassificationResult | null { 148 // Check for dual enrollment patterns 149 const dualEnrollmentIndicators = [ 150 /\bdual\s+enrollment\b/, 151 /\bcourse\s+(registration|deletion|added|dropped)\b/, 152 /\bspring\s+\d{4}\s+(course|on[- ]campus)\b/, 153 /\bhow\s+to\s+register\b.*\b(course|class)/, 154 /\bcedarville\s+university\).*\b(course|registration)\b/, 155 ]; 156 157 for (const pattern of dualEnrollmentIndicators) { 158 if (pattern.test(combined)) { 159 // Dual enrollment is relevant if it's about actual courses, not marketing 160 if (/\blearn\s+more\s+about\b|\binterested\s+in\b|\bconsider\s+joining\b/.test(combined)) { 161 return null; // Just marketing 162 } 163 return { 164 pertains: true, 165 reason: "Dual enrollment course information", 166 confidence: 0.9, 167 matched_rules: ["dual_enrollment"] 168 }; 169 } 170 } 171 172 return null; 173 } 174 175 private checkScholarship(subject: string, body: string, combined: string): ClassificationResult | null { 176 // Check for specific scholarship application opportunities FIRST (for accepted/enrolled students) 177 // This is different from general "apply for scholarships" marketing 178 if (/\bapply\s+for\s+(the\s+)?.*\bscholarship\b/.test(subject)) { 179 // Check if it's specific (President's, Ministry, named scholarships) 180 if (/\bpresident'?s\b|\bministry\b|\bimpact\b/.test(combined)) { 181 return { 182 pertains: true, 183 reason: "Scholarship application opportunity for accepted student", 184 confidence: 0.75, 185 matched_rules: ["scholarship_application_opportunity"] 186 }; 187 } 188 } 189 190 // Negative indicators: not actually awarded - check these before awarded patterns 191 const notAwardedPatterns = [ 192 /\bscholarship\b.*\b(held|reserved)\s+for\s+you\b/, 193 /\b(held|reserved)\s+for\s+you\b/, 194 /\bconsider(ed|ation)\b.*\bscholarship\b/, 195 /\bscholarship\b.*\bconsider(ed|ation)\b/, 196 /\beligible\s+for\b.*\bscholarship\b/, 197 /\bscholarship\b.*\beligible\b/, 198 /\bmay\s+qualify\b.*\bscholarship\b/, 199 /\bguaranteed\s+admission\b/, 200 /\bpriority\s+consideration\b/, 201 ]; 202 203 // Check if scholarship is mentioned but not awarded 204 const hasScholarshipMention = /\bscholarship\b/.test(combined); 205 if (hasScholarshipMention) { 206 for (const pattern of notAwardedPatterns) { 207 if (pattern.test(combined)) { 208 return { 209 pertains: false, 210 reason: "Scholarship mentioned but not actually awarded (held/eligible/apply)", 211 confidence: 0.9, 212 matched_rules: ["scholarship_not_awarded"] 213 }; 214 } 215 } 216 } 217 218 // Positive indicators: actually awarded 219 const awardedPatterns = [ 220 /\bcongratulations\b.*\bscholarship\b/, 221 /\byou\s+(have|received|are\s+awarded|won)\b.*\bscholarship\b/, 222 /\bwe\s+(are\s+)?(pleased\s+to\s+)?award(ing)?\b.*\bscholarship\b/, 223 /\bscholarship\s+(offer|award)\b/, 224 /\breceived\s+a\s+scholarship\b/, 225 ]; 226 227 for (const pattern of awardedPatterns) { 228 if (pattern.test(combined)) { 229 return { 230 pertains: true, 231 reason: "Scholarship actually awarded", 232 confidence: 0.95, 233 matched_rules: ["scholarship_awarded"] 234 }; 235 } 236 } 237 238 return null; 239 } 240 241 private checkFinancialAid(subject: string, body: string, combined: string): ClassificationResult | null { 242 // Positive: aid is ready 243 const readyPatterns = [ 244 /\bfinancial\s+aid\b.*\boffer\b.*\b(ready|available)\b/, 245 /\b(ready|available)\b.*\bfinancial\s+aid\b.*\boffer\b/, 246 /\baward\s+letter\b.*\b(ready|available|posted|view)\b/, 247 /\b(view|review)\s+(your\s+)?award\s+letter\b/, 248 /\bfinancial\s+aid\s+package\b.*\b(ready|available|posted)\b/, 249 /\byour\s+aid\s+is\s+ready\b/, 250 ]; 251 252 // Negative: aid applications, FAFSA reminders 253 const notReadyPatterns = [ 254 /\blearn\s+more\s+about\b.*\bfinancial\s+aid\b/, 255 /\bapply\b.*\b(for\s+)?financial\s+aid\b/, 256 /\bfinancial\s+aid\b.*\bapplication\b/, 257 /\bcomplete\s+(your\s+)?fafsa\b/, 258 /\bconsidered\s+for\b.*\baid\b/, 259 /\bpriority\s+(deadline|consideration)\b.*\bfinancial\s+aid\b/, 260 ]; 261 262 for (const pattern of readyPatterns) { 263 if (pattern.test(combined)) { 264 // Check for negative indicators 265 for (const negPattern of notReadyPatterns) { 266 if (negPattern.test(combined)) { 267 return null; // Just application info 268 } 269 } 270 return { 271 pertains: true, 272 reason: "Financial aid offer ready to review", 273 confidence: 0.95, 274 matched_rules: ["financial_aid_ready"] 275 }; 276 } 277 } 278 279 return null; 280 } 281 282 private checkIrrelevant(subject: string, body: string, combined: string, from: string): ClassificationResult | null { 283 // Strong indicators of marketing/spam 284 const irrelevantPatterns = [ 285 // Newsletter/blog content 286 /\bstudent\s+life\s+blog\b/, 287 /\b(student\s+life\s+)?blog\s+(post|update)\b/, 288 /\bnew\s+student\s+life\s+blog\b/, 289 /\bnewsletter\b/, 290 /\bweekly\s+(digest|update)\b/, 291 292 // Marketing events 293 /\bupcoming\s+events\b/, 294 /\bjoin\s+us\s+(for|at)\b/, 295 /\bopen\s+house\b/, 296 /\bvirtual\s+tour\b/, 297 /\bcampus\s+(visit|tour|event)\b/, 298 /\bmeet\s+(the|our)\s+(students|faculty)\b/, 299 300 // Generic outreach (not applied yet) 301 /\bhaven'?t\s+applied.*yet\b/, 302 /\bstill\s+time\s+to\s+apply\b/, 303 /\bhow\s+is\s+your\s+college\s+search\b/, 304 /\bstart\s+(your\s+)?college\s+search\b/, 305 /\bexplore\s+(our\s+)?(programs|campus)\b/, 306 307 // Unsolicited outreach patterns 308 /\bi\s+hope\s+you\s+have\s+been\s+receiving\s+my\s+emails\b/, 309 /\bam\s+i\s+reaching\b/, 310 /\byou\s+are\s+on\s+.*\s+(radar|list)\b/, 311 /\bi\s+want\s+to\s+make\s+sure\s+you\s+know\b/, 312 /\byou'?re\s+invited\s+to\s+submit\b/, 313 /\bi'?m\s+eager\s+to\s+consider\s+you\b/, 314 /\bsubmit\s+your\s+.*\s+application\b/, 315 /\bpriority\s+status\b.*\bsubmit.*application\b/, 316 317 // Priority deadline extensions (spam) 318 /\bextended.*\bpriority\s+deadline\b/, 319 /\bpriority\s+deadline.*\bextended\b/, 320 321 // Summer camps/programs 322 /\bsummer\s+(academy|camp|program)\b/, 323 /\bsave\s+the\s+date\b/, 324 325 // Ugly sweaters and other fluff 326 /\bugly\s+sweater\b/, 327 /\bit'?s\s+.+\s+season\b/, 328 ]; 329 330 for (const pattern of irrelevantPatterns) { 331 if (pattern.test(combined)) { 332 return { 333 pertains: false, 334 reason: "Marketing/newsletter/unsolicited outreach", 335 confidence: 0.95, 336 matched_rules: ["irrelevant_marketing"] 337 }; 338 } 339 } 340 341 // Haven't applied yet = not relevant 342 if (/\bhaven'?t\s+applied\b/.test(combined)) { 343 return { 344 pertains: false, 345 reason: "Unsolicited email where student has not applied", 346 confidence: 0.95, 347 matched_rules: ["not_applied"] 348 }; 349 } 350 351 return null; 352 } 353} 354 355// Convenience function 356export function classifyEmail(email: EmailInput): ClassificationResult { 357 const classifier = new EmailClassifier(); 358 return classifier.classify(email); 359}