馃 distributed transcription service
thistle.dunkirk.sh
1import { test, expect } from "bun:test";
2import { cleanVTT, parseVTT } from "./vtt-cleaner";
3
4const sampleVTT = `WEBVTT
5
600:00:00.000 --> 00:00:03.480
7<|startoftranscript|> [SIDE CONVERSATION]<|endoftext|>
8
900:00:03.480 --> 00:00:05.000
10<|startoftranscript|> Yes?
11
1200:00:05.000 --> 00:00:08.000
13So with this course packet, what quiz is and exams, and if I can study through here, what you talk about?
14
1500:00:08.000 --> 00:00:10.000
16And I give you a good review every time.
17
1800:00:10.000 --> 00:00:12.000
19Yeah, so I'd be good to just study that and then we can do it.`;
20
21test("parseVTT extracts segments correctly", () => {
22 const segments = parseVTT(sampleVTT);
23
24 expect(segments.length).toBeGreaterThan(0);
25 expect(segments[0]?.timestamp).toContain("-->");
26 expect(segments[0]?.text).toBeDefined();
27 expect(segments[0]?.start).toBeGreaterThanOrEqual(0);
28 expect(segments[0]?.end).toBeGreaterThanOrEqual(0);
29});
30
31test("parseVTT handles empty VTT", () => {
32 const emptyVTT = "WEBVTT\n\n";
33 const segments = parseVTT(emptyVTT);
34
35 expect(segments.length).toBe(0);
36});
37
38test("cleanVTT preserves VTT format when AI key not available", async () => {
39 // Save original env var
40 const originalKey = process.env.LLM_API_KEY;
41
42 // Remove key to test fallback
43 delete process.env.LLM_API_KEY;
44
45 const result = await cleanVTT("test-vtt", sampleVTT);
46
47 expect(result).toContain("WEBVTT");
48 expect(result).toContain("-->");
49
50 // Restore original key
51 if (originalKey) {
52 process.env.LLM_API_KEY = originalKey;
53 }
54});
55
56test("cleanVTT preserves empty VTT", async () => {
57 const emptyVTT = "WEBVTT\n\n";
58 const result = await cleanVTT("test-empty", emptyVTT);
59
60 expect(result).toBe(emptyVTT);
61});
62
63// Integration test - only runs if API key is available
64test("cleanVTT uses AI when available", async () => {
65 if (!process.env.LLM_API_KEY) {
66 console.log("Skipping AI test - no LLM_API_KEY set");
67 return;
68 }
69
70 const result = await cleanVTT("test-ai", sampleVTT);
71
72 expect(result).toContain("WEBVTT");
73 expect(result).toContain("-->");
74
75 // AI should clean up tags
76 expect(result).not.toContain("<|startoftranscript|>");
77 expect(result).not.toContain("[SIDE CONVERSATION]");
78
79 // Should have paragraph formatting
80 expect(result).toContain("Paragraph");
81
82 console.log("AI-cleaned VTT preview:", result.substring(0, 300));
83}, 30000);