this repo has no description
1#!/usr/bin/env bun
2// Import labeled emails into the main dataset and evaluate
3
4import { readFileSync, writeFileSync } from "fs";
5import { classifyEmail } from "./classifier.ts";
6import type { EmailInput } from "./types.ts";
7
8interface LabeledEmail {
9 thread_id: string;
10 subject: string;
11 from: string;
12 to?: string;
13 cc?: string;
14 date: string;
15 body: string;
16 labels?: string[];
17 is_in_inbox?: boolean;
18 pertains: boolean;
19 reason: string;
20 labeled_at: string;
21}
22
23interface LabeledData {
24 exported_at: string;
25 labeled_at?: string;
26 total_count: number;
27 label?: string;
28 emails: LabeledEmail[];
29}
30
31interface DatasetEmail extends LabeledEmail {
32 confidence?: string;
33}
34
35interface Dataset {
36 exported_at: string;
37 total_count: number;
38 label: string;
39 emails: DatasetEmail[];
40}
41
42function main() {
43 const args = process.argv.slice(2);
44
45 if (args.length === 0) {
46 console.error("Usage: bun import-labeled.ts <labeled-file.json> [dataset-file.json]");
47 process.exit(1);
48 }
49
50 const inputFile = args[0];
51 const datasetFile = args[1] || "data/labeled-emails.json";
52
53 console.log(`📥 Importing labeled emails from ${inputFile}...`);
54
55 let newData: LabeledData;
56 try {
57 newData = JSON.parse(readFileSync(inputFile, "utf-8"));
58 } catch (e) {
59 console.error(`Failed to read ${inputFile}:`, e);
60 process.exit(1);
61 }
62
63 console.log(`📊 Loading existing dataset from ${datasetFile}...`);
64
65 let dataset: Dataset;
66 try {
67 dataset = JSON.parse(readFileSync(datasetFile, "utf-8"));
68 } catch (e) {
69 console.error(`Failed to read ${datasetFile}:`, e);
70 process.exit(1);
71 }
72
73 // Check for duplicates by thread_id
74 const existingThreadIds = new Set(dataset.emails.map(e => e.thread_id));
75 const newEmails = newData.emails.filter(e => !existingThreadIds.has(e.thread_id));
76 const skipped = newData.emails.length - newEmails.length;
77
78 if (skipped > 0) {
79 console.log(`⚠️ Skipped ${skipped} duplicate emails`);
80 }
81
82 if (newEmails.length === 0) {
83 console.log("❌ No new emails to import");
84 return;
85 }
86
87 console.log(`✅ Importing ${newEmails.length} new labeled emails`);
88
89 // Add confidence field for consistency (human labels are high confidence)
90 const emailsWithConfidence: DatasetEmail[] = newEmails.map(e => ({
91 ...e,
92 confidence: "high"
93 }));
94
95 // Merge into dataset
96 dataset.emails.push(...emailsWithConfidence);
97 dataset.total_count = dataset.emails.length;
98 dataset.exported_at = new Date().toISOString();
99
100 // Save updated dataset
101 writeFileSync(datasetFile, JSON.stringify(dataset, null, 2));
102 console.log(`💾 Saved ${dataset.total_count} total emails to ${datasetFile}`);
103
104 // Evaluate the classifier on new emails
105 console.log("\n" + "=".repeat(80));
106 console.log("🧪 Evaluating classifier on newly labeled emails...");
107 console.log("=".repeat(80));
108
109 let correct = 0;
110 let incorrect = 0;
111 const failures: Array<{
112 email: LabeledEmail;
113 expected: boolean;
114 got: boolean;
115 reason: string;
116 }> = [];
117
118 for (const email of newEmails) {
119 const input: EmailInput = {
120 subject: email.subject,
121 from: email.from,
122 body: email.body,
123 };
124
125 const result = classifyEmail(input);
126 const isCorrect = result.pertains === email.pertains;
127
128 if (isCorrect) {
129 correct++;
130 } else {
131 incorrect++;
132 failures.push({
133 email,
134 expected: email.pertains,
135 got: result.pertains,
136 reason: result.reason,
137 });
138 }
139 }
140
141 const accuracy = ((correct / newEmails.length) * 100).toFixed(1);
142
143 console.log(`\nResults for ${newEmails.length} new emails:`);
144 console.log(` ✅ Correct: ${correct}`);
145 console.log(` ❌ Incorrect: ${incorrect}`);
146 console.log(` 📊 Accuracy: ${accuracy}%`);
147
148 if (failures.length > 0) {
149 console.log("\n" + "=".repeat(80));
150 console.log("❌ FAILURES - Update classifier to fix these:");
151 console.log("=".repeat(80));
152
153 failures.forEach((f, i) => {
154 console.log(`\n${i + 1}. ${f.expected ? "FALSE NEGATIVE" : "FALSE POSITIVE"}`);
155 console.log(` Subject: ${f.email.subject}`);
156 console.log(` From: ${f.email.from}`);
157 console.log(` Expected: ${f.expected ? "RELEVANT" : "NOT RELEVANT"} (${f.email.reason})`);
158 console.log(` Got: ${f.got ? "RELEVANT" : "NOT RELEVANT"} (${f.reason})`);
159 console.log(` Body preview: ${f.email.body.slice(0, 200)}...`);
160 });
161
162 console.log("\n" + "=".repeat(80));
163 console.log("Next steps:");
164 console.log(" 1. Review failures above");
165 console.log(" 2. Update classifier.ts with new patterns");
166 console.log(" 3. Run: bun test");
167 console.log(" 4. Run: bun run evaluate.ts");
168 console.log(" 5. Run: bun run generate-gscript.ts");
169 console.log("=".repeat(80));
170 } else {
171 console.log("\n" + "=".repeat(80));
172 console.log("🎉 All new emails classified correctly!");
173 console.log("=".repeat(80));
174 }
175}
176
177main();