+72
-17
src/components/transcription.ts
+72
-17
src/components/transcription.ts
························
························+const endOffset = si < sentences.length - 1 ? startOffset + sentenceDuration - 0.001 : paraEnd - paraStart;+return html`<span class="segment" data-start="${spanStart}" data-end="${spanEnd}">${sent}</span>${si < sentences.length - 1 ? ' ' : ''}`;
+78
-37
src/lib/transcript-cleaner.test.ts
+78
-37
src/lib/transcript-cleaner.test.ts
···-const rawTranscript = `[SIDE CONVERSATION] Yes? So with this course packet, what quiz is and exams, and if I can study through here, what you talk about? And I give you a good review every time. Yeah, so I'd be good to just study that and then we can do it. Yeah, and all the examples and stuff that we get from class especially. And then I, like your first quiz, I give you a mock quiz exactly like the quiz. Oh, okay. so you can kind of get a feel for how I do things. [inaudible] Okay? [inaudible] Yeah. [background chatter]`;
···+test("cleanAndGetParagraphBoundaries cleans transcript and returns paragraph boundaries", async () => {+const rawTranscript = `[SIDE CONVERSATION] Today in chapel we are talking about the fact that we believe in having gospel conversations. I'm gonna run my own PowerPoint. I'm gonna jump around. It's gonna be a little more conversational than normal. It's not gonna be like one of the normal sermons, although I know me and my tendency it'll turn into a sermon at some point just because that's the way God made me, so I can't help it.+Alright, so when it starts just have fun with it. We'll go on. Here's what it says in our doctrinal statement. It says, "Due to the commission of Christ and the urgency of the Gospel, all believers are to engage in Gospel conversations." How many of you believe that? That's pretty weak. How many of you believe that?+To live God-honoring lives and to work continuously for the spread of the Gospel to their neighbors and the nations. Now, let's be honest, as we start off this morning, all of us could do a better job with personal evangelism, and all of us could do a better job with a heart for missions.+So I'm not up here talking to you about something I have conquered or mastered. I'm not the expert on this. In fact, when it comes to personal evangelism in my own strength, I'm often a complete failure. But I have found that even in my weakness, God can use me in powerful ways when I make myself available to Him.`;+console.log(`Detected ${result.paragraphs!.length} paragraphs from ${mockSegments.length} segments`);+console.log("Last paragraph:", result.paragraphs![result.paragraphs!.length - 1]?.text.substring(0, 100) + "...");
+91
-84
src/lib/transcript-cleaner.ts
+91
-84
src/lib/transcript-cleaner.ts
······6. If there are obvious speaking mistakes then you can fix those (e.g. "we are going no wait sorry you should be doing")-"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent",-`[TranscriptCleaner] Completed for ${transcriptId}: ${rawTranscript.length} → ${cleanedText.length} chars (${reduction}% reduction)`,
···+const prompt = `You are a transcript editor and paragrapher. Input: a list of original transcript segments with their index, start time (seconds), end time (seconds), and the RAW transcript text.···6. If there are obvious speaking mistakes then you can fix those (e.g. "we are going no wait sorry you should be doing")+Then, determine paragraph boundaries by grouping the cleaned segments into logical paragraphs. A paragraph represents a complete thought, topic, or idea. Create MULTIPLE paragraphs based on:+CRITICAL: Each paragraph MUST end with a complete sentence. DO NOT break paragraphs mid-sentence.+- ALWAYS end paragraphs at sentence boundaries (after periods, question marks, or exclamation points)+- Return the paragraphs in order and cover the entire cleaned transcript text without overlap or omission
+13
-8
src/lib/transcription.ts
+13
-8
src/lib/transcription.ts
···············
···············
+44
src/lib/vtt-cleaner.test.ts
+44
src/lib/vtt-cleaner.test.ts
······
+147
-29
src/lib/vtt-cleaner.ts
+147
-29
src/lib/vtt-cleaner.ts
··················
··················+// Attempt LLM-driven cleaning and paragraphing in one request, fallback to deterministic rules+// Use paragraph-based ID: "Paragraph N-M" where N is paragraph number, M is segment within paragraph+`[VTTCleaner] Completed for ${transcriptionId}: ${cleanedSegments.length} segments in ${paragraphBoundaries.length} paragraphs`,