馃 distributed transcription service thistle.dunkirk.sh
1import { expect, test } from "bun:test"; 2import { cleanVTT, parseVTT } from "./vtt-cleaner"; 3 4const sampleVTT = `WEBVTT 5 600:00:00.000 --> 00:00:03.480 7<|startoftranscript|> [SIDE CONVERSATION]<|endoftext|> 8 900:00:03.480 --> 00:00:05.000 10<|startoftranscript|> Yes? 11 1200:00:05.000 --> 00:00:08.000 13So with this course packet, what quiz is and exams, and if I can study through here, what you talk about? 14 1500:00:08.000 --> 00:00:10.000 16And I give you a good review every time. 17 1800:00:10.000 --> 00:00:12.000 19Yeah, so I'd be good to just study that and then we can do it.`; 20 21test("parseVTT extracts segments correctly", () => { 22 const segments = parseVTT(sampleVTT); 23 24 expect(segments.length).toBeGreaterThan(0); 25 expect(segments[0]?.timestamp).toContain("-->"); 26 expect(segments[0]?.text).toBeDefined(); 27 expect(segments[0]?.start).toBeGreaterThanOrEqual(0); 28 expect(segments[0]?.end).toBeGreaterThanOrEqual(0); 29}); 30 31test("parseVTT handles empty VTT", () => { 32 const emptyVTT = "WEBVTT\n\n"; 33 const segments = parseVTT(emptyVTT); 34 35 expect(segments.length).toBe(0); 36}); 37 38test("cleanVTT preserves VTT format when AI key not available", async () => { 39 // Save original env var 40 const originalKey = process.env.LLM_API_KEY; 41 42 // Remove key to test fallback 43 delete process.env.LLM_API_KEY; 44 45 const result = await cleanVTT("test-vtt", sampleVTT); 46 47 expect(result).toContain("WEBVTT"); 48 expect(result).toContain("-->"); 49 50 // Restore original key 51 if (originalKey) { 52 process.env.LLM_API_KEY = originalKey; 53 } 54}); 55 56test("cleanVTT preserves empty VTT", async () => { 57 const emptyVTT = "WEBVTT\n\n"; 58 const result = await cleanVTT("test-empty", emptyVTT); 59 60 expect(result).toBe(emptyVTT); 61}); 62 63// AI integration test - skip by default to avoid burning credits 64// Run with: bun test src/lib/vtt-cleaner.test.ts --test-name-pattern "AI" 65test.skip("cleanVTT uses AI when available", async () => { 66 if (!process.env.LLM_API_KEY) { 67 console.log("Skipping AI test - no LLM_API_KEY set"); 68 return; 69 } 70 71 const result = await cleanVTT("test-ai", sampleVTT); 72 73 expect(result).toContain("WEBVTT"); 74 expect(result).toContain("-->"); 75 76 // AI should clean up tags 77 expect(result).not.toContain("<|startoftranscript|>"); 78 expect(result).not.toContain("[SIDE CONVERSATION]"); 79 80 // Should have paragraph formatting 81 expect(result).toContain("Paragraph"); 82 83 console.log("AI-cleaned VTT preview:", result.substring(0, 300)); 84}, 30000);