馃 distributed transcription service thistle.dunkirk.sh
1import { test, expect } from "bun:test"; 2import { cleanAndGetParagraphBoundaries } from "./transcript-cleaner"; 3 4test("cleanAndGetParagraphBoundaries cleans transcript and returns paragraph boundaries", async () => { 5// Use a longer, more realistic transcript sample with natural paragraph breaks 6const rawTranscript = `[SIDE CONVERSATION] Today in chapel we are talking about the fact that we believe in having gospel conversations. I'm gonna run my own PowerPoint. I'm gonna jump around. It's gonna be a little more conversational than normal. It's not gonna be like one of the normal sermons, although I know me and my tendency it'll turn into a sermon at some point just because that's the way God made me, so I can't help it. 7 8Alright, so when it starts just have fun with it. We'll go on. Here's what it says in our doctrinal statement. It says, "Due to the commission of Christ and the urgency of the Gospel, all believers are to engage in Gospel conversations." How many of you believe that? That's pretty weak. How many of you believe that? 9 10To live God-honoring lives and to work continuously for the spread of the Gospel to their neighbors and the nations. Now, let's be honest, as we start off this morning, all of us could do a better job with personal evangelism, and all of us could do a better job with a heart for missions. 11 12So I'm not up here talking to you about something I have conquered or mastered. I'm not the expert on this. In fact, when it comes to personal evangelism in my own strength, I'm often a complete failure. But I have found that even in my weakness, God can use me in powerful ways when I make myself available to Him.`; 13 14// Create mock segments from raw transcript (simulating whisper output) 15const sentences = rawTranscript.split(/\.\s+/); 16const mockSegments: { index?: number; start?: number; end?: number; text: string }[] = []; 17let timeOffset = 0; 18for (let i = 0; i < sentences.length; i++) { 19const sentence = sentences[i]?.trim(); 20if (!sentence) continue; 21const duration = sentence.split(/\s+/).length * 0.3; // ~0.3s per word 22mockSegments.push({ 23index: i, 24start: timeOffset, 25end: timeOffset + duration, 26text: sentence, 27}); 28timeOffset += duration; 29} 30 31const result = await cleanAndGetParagraphBoundaries({ 32transcriptId: "test-123", 33rawTranscript, 34segments: mockSegments, 35maxWordsMove: 3, 36}); 37 38// Check that we got a result 39expect(result.paragraphs).toBeDefined(); 40expect(result.paragraphs!.length).toBeGreaterThan(1); // Should have multiple paragraphs 41 42// Check that paragraphs have the expected structure 43for (const para of result.paragraphs!) { 44 expect(para).toHaveProperty('startSegmentIndex'); 45 expect(para).toHaveProperty('endSegmentIndex'); 46 expect(para).toHaveProperty('text'); 47 expect(para.text.length).toBeGreaterThan(0); 48} 49 50// The cleaned text should have tags removed 51const cleanedText = result.paragraphs!.map(p => p.text).join(' '); 52 53expect(cleanedText).not.toContain("[SIDE CONVERSATION]"); 54expect(cleanedText.toLowerCase()).toContain("gospel"); 55expect(cleanedText.toLowerCase()).toContain("evangelism"); 56 57 console.log(`Detected ${result.paragraphs!.length} paragraphs from ${mockSegments.length} segments`); 58 console.log("First paragraph:", result.paragraphs![0]?.text.substring(0, 100) + "..."); 59 console.log("Last paragraph:", result.paragraphs![result.paragraphs!.length - 1]?.text.substring(0, 100) + "..."); 60}, 30000); // 30s timeout for API call 61 62test("cleanAndGetParagraphBoundaries handles empty transcript", async () => { 63const result = await cleanAndGetParagraphBoundaries({ 64transcriptId: "test-empty", 65rawTranscript: "", 66segments: [], 67maxWordsMove: 3, 68}); 69 70expect(result.paragraphs).toEqual([]); 71}); 72 73test("cleanAndGetParagraphBoundaries returns error on missing API key", async () => { 74const rawTranscript = "Test transcript"; 75 76// Test with missing API key (if it's actually set, this test might fail) 77const originalKey = process.env.OPENROUTER_API_KEY; 78delete process.env.OPENROUTER_API_KEY; 79 80const result = await cleanAndGetParagraphBoundaries({ 81transcriptId: "test-fallback", 82rawTranscript, 83segments: [{ text: rawTranscript }], 84maxWordsMove: 3, 85}); 86 87expect(result.paragraphs).toBeUndefined(); 88expect(result.error).toBe("OPENROUTER_API_KEY not set"); 89 90// Restore key 91if (originalKey) { 92process.env.OPENROUTER_API_KEY = originalKey; 93} 94});