this repo has no description
at main 5.0 kB view raw
1#!/usr/bin/env bun 2// Import labeled emails into the main dataset and evaluate 3 4import { readFileSync, writeFileSync } from "fs"; 5import { classifyEmail } from "./classifier.ts"; 6import type { EmailInput } from "./types.ts"; 7 8interface LabeledEmail { 9 thread_id: string; 10 subject: string; 11 from: string; 12 to?: string; 13 cc?: string; 14 date: string; 15 body: string; 16 labels?: string[]; 17 is_in_inbox?: boolean; 18 pertains: boolean; 19 reason: string; 20 labeled_at: string; 21} 22 23interface LabeledData { 24 exported_at: string; 25 labeled_at?: string; 26 total_count: number; 27 label?: string; 28 emails: LabeledEmail[]; 29} 30 31interface DatasetEmail extends LabeledEmail { 32 confidence?: string; 33} 34 35interface Dataset { 36 exported_at: string; 37 total_count: number; 38 label: string; 39 emails: DatasetEmail[]; 40} 41 42function main() { 43 const args = process.argv.slice(2); 44 45 if (args.length === 0) { 46 console.error("Usage: bun import-labeled.ts <labeled-file.json> [dataset-file.json]"); 47 process.exit(1); 48 } 49 50 const inputFile = args[0]; 51 const datasetFile = args[1] || "data/labeled-emails.json"; 52 53 console.log(`📥 Importing labeled emails from ${inputFile}...`); 54 55 let newData: LabeledData; 56 try { 57 newData = JSON.parse(readFileSync(inputFile, "utf-8")); 58 } catch (e) { 59 console.error(`Failed to read ${inputFile}:`, e); 60 process.exit(1); 61 } 62 63 console.log(`📊 Loading existing dataset from ${datasetFile}...`); 64 65 let dataset: Dataset; 66 try { 67 dataset = JSON.parse(readFileSync(datasetFile, "utf-8")); 68 } catch (e) { 69 console.error(`Failed to read ${datasetFile}:`, e); 70 process.exit(1); 71 } 72 73 // Check for duplicates by thread_id 74 const existingThreadIds = new Set(dataset.emails.map(e => e.thread_id)); 75 const newEmails = newData.emails.filter(e => !existingThreadIds.has(e.thread_id)); 76 const skipped = newData.emails.length - newEmails.length; 77 78 if (skipped > 0) { 79 console.log(`⚠️ Skipped ${skipped} duplicate emails`); 80 } 81 82 if (newEmails.length === 0) { 83 console.log("❌ No new emails to import"); 84 return; 85 } 86 87 console.log(`✅ Importing ${newEmails.length} new labeled emails`); 88 89 // Add confidence field for consistency (human labels are high confidence) 90 const emailsWithConfidence: DatasetEmail[] = newEmails.map(e => ({ 91 ...e, 92 confidence: "high" 93 })); 94 95 // Merge into dataset 96 dataset.emails.push(...emailsWithConfidence); 97 dataset.total_count = dataset.emails.length; 98 dataset.exported_at = new Date().toISOString(); 99 100 // Save updated dataset 101 writeFileSync(datasetFile, JSON.stringify(dataset, null, 2)); 102 console.log(`💾 Saved ${dataset.total_count} total emails to ${datasetFile}`); 103 104 // Evaluate the classifier on new emails 105 console.log("\n" + "=".repeat(80)); 106 console.log("🧪 Evaluating classifier on newly labeled emails..."); 107 console.log("=".repeat(80)); 108 109 let correct = 0; 110 let incorrect = 0; 111 const failures: Array<{ 112 email: LabeledEmail; 113 expected: boolean; 114 got: boolean; 115 reason: string; 116 }> = []; 117 118 for (const email of newEmails) { 119 const input: EmailInput = { 120 subject: email.subject, 121 from: email.from, 122 body: email.body, 123 }; 124 125 const result = classifyEmail(input); 126 const isCorrect = result.pertains === email.pertains; 127 128 if (isCorrect) { 129 correct++; 130 } else { 131 incorrect++; 132 failures.push({ 133 email, 134 expected: email.pertains, 135 got: result.pertains, 136 reason: result.reason, 137 }); 138 } 139 } 140 141 const accuracy = ((correct / newEmails.length) * 100).toFixed(1); 142 143 console.log(`\nResults for ${newEmails.length} new emails:`); 144 console.log(` ✅ Correct: ${correct}`); 145 console.log(` ❌ Incorrect: ${incorrect}`); 146 console.log(` 📊 Accuracy: ${accuracy}%`); 147 148 if (failures.length > 0) { 149 console.log("\n" + "=".repeat(80)); 150 console.log("❌ FAILURES - Update classifier to fix these:"); 151 console.log("=".repeat(80)); 152 153 failures.forEach((f, i) => { 154 console.log(`\n${i + 1}. ${f.expected ? "FALSE NEGATIVE" : "FALSE POSITIVE"}`); 155 console.log(` Subject: ${f.email.subject}`); 156 console.log(` From: ${f.email.from}`); 157 console.log(` Expected: ${f.expected ? "RELEVANT" : "NOT RELEVANT"} (${f.email.reason})`); 158 console.log(` Got: ${f.got ? "RELEVANT" : "NOT RELEVANT"} (${f.reason})`); 159 console.log(` Body preview: ${f.email.body.slice(0, 200)}...`); 160 }); 161 162 console.log("\n" + "=".repeat(80)); 163 console.log("Next steps:"); 164 console.log(" 1. Review failures above"); 165 console.log(" 2. Update classifier.ts with new patterns"); 166 console.log(" 3. Run: bun test"); 167 console.log(" 4. Run: bun run evaluate.ts"); 168 console.log(" 5. Run: bun run generate-gscript.ts"); 169 console.log("=".repeat(80)); 170 } else { 171 console.log("\n" + "=".repeat(80)); 172 console.log("🎉 All new emails classified correctly!"); 173 console.log("=".repeat(80)); 174 } 175} 176 177main();