馃 distributed transcription service
thistle.dunkirk.sh
1import { expect, test } from "bun:test";
2import { cleanVTT, parseVTT } from "./vtt-cleaner";
3
4const sampleVTT = `WEBVTT
5
600:00:00.000 --> 00:00:03.480
7<|startoftranscript|> [SIDE CONVERSATION]<|endoftext|>
8
900:00:03.480 --> 00:00:05.000
10<|startoftranscript|> Yes?
11
1200:00:05.000 --> 00:00:08.000
13So with this course packet, what quiz is and exams, and if I can study through here, what you talk about?
14
1500:00:08.000 --> 00:00:10.000
16And I give you a good review every time.
17
1800:00:10.000 --> 00:00:12.000
19Yeah, so I'd be good to just study that and then we can do it.`;
20
21test("parseVTT extracts segments correctly", () => {
22 const segments = parseVTT(sampleVTT);
23
24 expect(segments.length).toBeGreaterThan(0);
25 expect(segments[0]?.timestamp).toContain("-->");
26 expect(segments[0]?.text).toBeDefined();
27 expect(segments[0]?.start).toBeGreaterThanOrEqual(0);
28 expect(segments[0]?.end).toBeGreaterThanOrEqual(0);
29});
30
31test("parseVTT handles empty VTT", () => {
32 const emptyVTT = "WEBVTT\n\n";
33 const segments = parseVTT(emptyVTT);
34
35 expect(segments.length).toBe(0);
36});
37
38test("cleanVTT preserves VTT format when AI key not available", async () => {
39 // Save original env var
40 const originalKey = process.env.LLM_API_KEY;
41
42 // Remove key to test fallback
43 delete process.env.LLM_API_KEY;
44
45 const result = await cleanVTT("test-vtt", sampleVTT);
46
47 expect(result).toContain("WEBVTT");
48 expect(result).toContain("-->");
49
50 // Restore original key
51 if (originalKey) {
52 process.env.LLM_API_KEY = originalKey;
53 }
54});
55
56test("cleanVTT preserves empty VTT", async () => {
57 const emptyVTT = "WEBVTT\n\n";
58 const result = await cleanVTT("test-empty", emptyVTT);
59
60 expect(result).toBe(emptyVTT);
61});
62
63// AI integration test - skip by default to avoid burning credits
64// Run with: bun test src/lib/vtt-cleaner.test.ts --test-name-pattern "AI"
65test.skip("cleanVTT uses AI when available", async () => {
66 if (!process.env.LLM_API_KEY) {
67 console.log("Skipping AI test - no LLM_API_KEY set");
68 return;
69 }
70
71 const result = await cleanVTT("test-ai", sampleVTT);
72
73 expect(result).toContain("WEBVTT");
74 expect(result).toContain("-->");
75
76 // AI should clean up tags
77 expect(result).not.toContain("<|startoftranscript|>");
78 expect(result).not.toContain("[SIDE CONVERSATION]");
79
80 // Should have paragraph formatting
81 expect(result).toContain("Paragraph");
82
83 console.log("AI-cleaned VTT preview:", result.substring(0, 300));
84}, 30000);