馃 distributed transcription service
thistle.dunkirk.sh
1import { expect, test } from "bun:test";
2import { cleanVTT, parseVTT } from "./vtt-cleaner";
3
4const sampleVTT = `WEBVTT
5
600:00:00.000 --> 00:00:03.480
7<|startoftranscript|> [SIDE CONVERSATION]<|endoftext|>
8
900:00:03.480 --> 00:00:05.000
10<|startoftranscript|> Yes?
11
1200:00:05.000 --> 00:00:08.000
13So with this course packet, what quiz is and exams, and if I can study through here, what you talk about?
14
1500:00:08.000 --> 00:00:10.000
16And I give you a good review every time.
17
1800:00:10.000 --> 00:00:12.000
19Yeah, so I'd be good to just study that and then we can do it.`;
20
21test("parseVTT extracts segments correctly", () => {
22 const segments = parseVTT(sampleVTT);
23
24 expect(segments.length).toBeGreaterThan(0);
25 expect(segments[0]?.timestamp).toContain("-->");
26 expect(segments[0]?.text).toBeDefined();
27 expect(segments[0]?.start).toBeGreaterThanOrEqual(0);
28 expect(segments[0]?.end).toBeGreaterThanOrEqual(0);
29});
30
31test("parseVTT handles empty VTT", () => {
32 const emptyVTT = "WEBVTT\n\n";
33 const segments = parseVTT(emptyVTT);
34
35 expect(segments.length).toBe(0);
36});
37
38test("cleanVTT preserves VTT format when AI key not available", async () => {
39 // Save original env var
40 const originalKey = process.env.LLM_API_KEY;
41
42 // Remove key to test fallback
43 delete process.env.LLM_API_KEY;
44
45 const result = await cleanVTT("test-vtt", sampleVTT);
46
47 expect(result).toContain("WEBVTT");
48 expect(result).toContain("-->");
49
50 // Restore original key
51 if (originalKey) {
52 process.env.LLM_API_KEY = originalKey;
53 }
54});
55
56test("cleanVTT preserves empty VTT", async () => {
57 const emptyVTT = "WEBVTT\n\n";
58
59 // Save and remove API key to avoid burning tokens
60 const originalKey = process.env.LLM_API_KEY;
61 delete process.env.LLM_API_KEY;
62
63 const result = await cleanVTT("test-empty", emptyVTT);
64
65 expect(result).toBe(emptyVTT);
66
67 // Restore original key
68 if (originalKey) {
69 process.env.LLM_API_KEY = originalKey;
70 }
71});
72
73// AI integration test - skip by default to avoid burning credits
74// Run with: bun test src/lib/vtt-cleaner.test.ts --test-name-pattern "AI"
75test.skip("cleanVTT uses AI when available", async () => {
76 if (!process.env.LLM_API_KEY) {
77 console.log("Skipping AI test - no LLM_API_KEY set");
78 return;
79 }
80
81 const result = await cleanVTT("test-ai", sampleVTT);
82
83 expect(result).toContain("WEBVTT");
84 expect(result).toContain("-->");
85
86 // AI should clean up tags
87 expect(result).not.toContain("<|startoftranscript|>");
88 expect(result).not.toContain("[SIDE CONVERSATION]");
89
90 // Should have paragraph formatting
91 expect(result).toContain("Paragraph");
92
93 console.log("AI-cleaned VTT preview:", result.substring(0, 300));
94}, 30000);