馃 distributed transcription service thistle.dunkirk.sh
at main 10 kB view raw
1import { css, html, LitElement } from "lit"; 2import { customElement, property } from "lit/decorators.js"; 3 4interface VTTSegment { 5 start: number; 6 end: number; 7 text: string; 8 index?: string; 9} 10 11function parseVTT(vttContent: string): VTTSegment[] { 12 const segments: VTTSegment[] = []; 13 const lines = vttContent.split("\n"); 14 15 let i = 0; 16 // Skip WEBVTT header if present 17 while (i < lines.length && (lines[i] || "").trim() !== "WEBVTT") { 18 i++; 19 } 20 if (i < lines.length) i++; // advance past header if found 21 22 while (i < lines.length) { 23 let index: string | undefined; 24 let line = lines[i] || ""; 25 26 // Check for cue ID (line before timestamp) 27 if (line.trim() && !line.includes("-->")) { 28 index = line.trim(); 29 i++; 30 line = lines[i] || ""; 31 } 32 33 if (line.includes("-->")) { 34 const parts = line.split("-->").map((s) => s.trim()); 35 const start = parseVTTTimestamp(parts[0] ?? ""); 36 const end = parseVTTTimestamp(parts[1] ?? ""); 37 38 // Collect text lines until empty line 39 const textLines: string[] = []; 40 i++; 41 while (i < lines.length && (lines[i] || "").trim()) { 42 textLines.push(lines[i] || ""); 43 i++; 44 } 45 46 segments.push({ 47 start, 48 end, 49 text: textLines.join(" ").trim(), 50 index, 51 }); 52 } else { 53 i++; 54 } 55 } 56 57 return segments; 58} 59 60function parseVTTTimestamp(timestamp?: string): number { 61 const parts = (timestamp || "").split(":"); 62 if (parts.length === 3) { 63 const hours = Number.parseFloat(parts[0] || "0"); 64 const minutes = Number.parseFloat(parts[1] || "0"); 65 const seconds = Number.parseFloat(parts[2] || "0"); 66 return hours * 3600 + minutes * 60 + seconds; 67 } 68 return 0; 69} 70 71@customElement("vtt-viewer") 72export class VTTViewer extends LitElement { 73 @property({ type: String }) vttContent = ""; 74 @property({ type: String }) audioId = ""; 75 76 static override styles = css` 77 .viewer-container { 78 position: relative; 79 } 80 81 .copy-btn { 82 position: absolute; 83 top: 0.5rem; 84 right: 0.5rem; 85 background: var(--primary); 86 color: var(--background); 87 border: none; 88 padding: 0.25rem 0.5rem; 89 border-radius: 4px; 90 font-size: 0.875rem; 91 cursor: pointer; 92 opacity: 0; 93 transition: opacity 0.15s ease; 94 } 95 96 .viewer-container:hover .copy-btn { 97 opacity: 1; 98 } 99 100 .transcript { 101 background: color-mix(in srgb, var(--primary) 5%, transparent); 102 border-radius: 6px; 103 padding: 1rem; 104 font-family: monospace; 105 font-size: 0.875rem; 106 color: var(--text); 107 line-height: 1.6; 108 word-wrap: break-word; 109 } 110 111 .segment { 112 cursor: pointer; 113 transition: background 0.1s; 114 display: inline; 115 } 116 117 .segment:hover { 118 background: color-mix(in srgb, var(--primary) 15%, transparent); 119 border-radius: 2px; 120 } 121 122 .current-segment { 123 background: color-mix(in srgb, var(--accent) 30%, transparent); 124 border-radius: 2px; 125 } 126 127 .paragraph { 128 display: block; 129 margin: 0 0 1rem 0; 130 line-height: 1.6; 131 } 132 `; 133 134 private audioElement: HTMLAudioElement | null = null; 135 private boundTimeUpdate: 136 | ((this: HTMLAudioElement, ev: Event) => void) 137 | null = null; 138 private boundTranscriptClick: ((e: Event) => void) | null = null; 139 140 private findAudioElementById(id: string): HTMLAudioElement | null { 141 let root: Node | Document = this.getRootNode(); 142 let depth = 0; 143 while (root && depth < 10) { 144 if (root instanceof ShadowRoot) { 145 const el = root.querySelector(`#${id}`) as HTMLAudioElement | null; 146 if (el) return el; 147 root = (root as ShadowRoot).host?.getRootNode?.(); 148 } else if (root instanceof Document) { 149 const byId = root.getElementById(id) as HTMLAudioElement | null; 150 if (byId) return byId; 151 break; 152 } else { 153 break; 154 } 155 depth++; 156 } 157 return null; 158 } 159 160 private setupHighlighting() { 161 // Detach previous listeners if any 162 this.detachHighlighting(); 163 164 const audioElement = this.findAudioElementById(this.audioId); 165 const transcriptDiv = this.shadowRoot?.querySelector( 166 ".transcript", 167 ) as HTMLDivElement | null; 168 if (!audioElement || !transcriptDiv) return; 169 170 // Clear any lingering highlights from prior instances 171 transcriptDiv.querySelectorAll(".current-segment").forEach((el) => { 172 (el as HTMLElement).classList.remove("current-segment"); 173 }); 174 175 this.audioElement = audioElement; 176 let currentSegmentElement: HTMLElement | null = null; 177 178 this.boundTimeUpdate = () => { 179 const currentTime = this.audioElement?.currentTime ?? 0; 180 const segmentElements = transcriptDiv.querySelectorAll("[data-start]"); 181 let found = false; 182 183 for (const el of Array.from(segmentElements)) { 184 const start = Number.parseFloat( 185 (el as HTMLElement).dataset.start || "0", 186 ); 187 const end = Number.parseFloat((el as HTMLElement).dataset.end || "0"); 188 189 if (currentTime >= start && currentTime <= end) { 190 found = true; 191 if (currentSegmentElement !== el) { 192 currentSegmentElement?.classList.remove("current-segment"); 193 (el as HTMLElement).classList.add("current-segment"); 194 currentSegmentElement = el as HTMLElement; 195 (el as HTMLElement).scrollIntoView({ 196 behavior: "smooth", 197 block: "center", 198 }); 199 } 200 break; 201 } 202 } 203 204 // If no segment matched, clear any existing highlight 205 if (!found && currentSegmentElement) { 206 currentSegmentElement.classList.remove("current-segment"); 207 currentSegmentElement = null; 208 } 209 }; 210 211 audioElement.addEventListener( 212 "timeupdate", 213 this.boundTimeUpdate as EventListener, 214 ); 215 216 this.boundTranscriptClick = (e: Event) => { 217 const target = e.target as HTMLElement; 218 if (target.dataset.start && this.audioElement) { 219 this.audioElement.currentTime = Number.parseFloat(target.dataset.start); 220 this.audioElement.play(); 221 } 222 }; 223 224 transcriptDiv.addEventListener("click", this.boundTranscriptClick); 225 } 226 227 private detachHighlighting() { 228 try { 229 const transcriptDiv = this.shadowRoot?.querySelector( 230 ".transcript", 231 ) as HTMLDivElement | null; 232 if (this.audioElement) { 233 // Pause playback to avoid audio continuing after the viewer is removed 234 try { 235 this.audioElement.pause(); 236 } catch (_e) { 237 // ignore 238 } 239 if (this.boundTimeUpdate) { 240 this.audioElement.removeEventListener( 241 "timeupdate", 242 this.boundTimeUpdate, 243 ); 244 } 245 } 246 if (transcriptDiv && this.boundTranscriptClick) { 247 transcriptDiv.removeEventListener("click", this.boundTranscriptClick); 248 } 249 } finally { 250 this.audioElement = null; 251 this.boundTimeUpdate = null; 252 this.boundTranscriptClick = null; 253 } 254 } 255 256 override disconnectedCallback() { 257 this.detachHighlighting(); 258 super.disconnectedCallback?.(); 259 } 260 261 override updated(changed: Map<string, unknown>) { 262 super.updated(changed); 263 if (changed.has("vttContent") || changed.has("audioId")) { 264 this.setupHighlighting(); 265 } 266 } 267 268 private renderFromVTT() { 269 if (!this.vttContent) return html``; 270 const segments = parseVTT(this.vttContent); 271 const paragraphGroups = new Map<string, VTTSegment[]>(); 272 273 for (const segment of segments) { 274 const id = (segment.index || "").trim(); 275 const match = id.match(/^Paragraph\s+(\d+)-/); 276 const paraNum = match?.[1] ?? "0"; 277 if (!paragraphGroups.has(paraNum)) paragraphGroups.set(paraNum, []); 278 const group = paragraphGroups.get(paraNum); 279 if (group) group.push(segment); 280 } 281 282 const paragraphs = Array.from(paragraphGroups.entries()).map( 283 ([_, groupSegments]) => { 284 const fullText = groupSegments.map((s) => s.text || "").join(" "); 285 const sentences = fullText.split(/(?<=[.!?])\s+/g).filter(Boolean); 286 const wordCounts = sentences.map( 287 (s) => s.split(/\s+/).filter(Boolean).length, 288 ); 289 const totalWords = Math.max( 290 1, 291 wordCounts.reduce((a, b) => a + b, 0), 292 ); 293 const paraStart = Math.min(...groupSegments.map((s) => s.start ?? 0)); 294 const paraEnd = Math.max( 295 ...groupSegments.map((s) => s.end ?? paraStart), 296 ); 297 let acc = 0; 298 const paraDuration = paraEnd - paraStart; 299 300 return html`<div class="paragraph">${sentences.map((sent, si) => { 301 const wordCount = wordCounts[si]; 302 if (wordCount === undefined) return ""; 303 304 const startOffset = (acc / totalWords) * paraDuration; 305 acc += wordCount; 306 const sentenceDuration = (wordCount / totalWords) * paraDuration; 307 const endOffset = 308 si < sentences.length - 1 309 ? startOffset + sentenceDuration - 0.001 310 : paraEnd - paraStart; 311 const spanStart = paraStart + startOffset; 312 const spanEnd = paraStart + endOffset; 313 return html`<span class="segment" data-start="${spanStart}" data-end="${spanEnd}">${sent}</span>${si < sentences.length - 1 ? " " : ""}`; 314 })}</div>`; 315 }, 316 ); 317 318 return html`${paragraphs}`; 319 } 320 321 private extractPlainText(): string { 322 if (!this.vttContent) return ""; 323 const segments = parseVTT(this.vttContent); 324 // Group into paragraphs by index as in renderFromVTT 325 const paragraphGroups = new Map<string, string[]>(); 326 for (const s of segments) { 327 const id = (s.index || "").trim(); 328 const match = id.match(/^Paragraph\s+(\d+)-/); 329 const paraNum = match?.[1] ?? "0"; 330 if (!paragraphGroups.has(paraNum)) paragraphGroups.set(paraNum, []); 331 const group = paragraphGroups.get(paraNum); 332 if (group) group.push(s.text || ""); 333 } 334 const paragraphs = Array.from(paragraphGroups.values()).map((group) => 335 group.join(" ").replace(/\s+/g, " ").trim(), 336 ); 337 return paragraphs.join("\n\n").trim(); 338 } 339 340 private async copyTranscript(e?: Event) { 341 e?.stopPropagation(); 342 const text = this.extractPlainText(); 343 if (!text) return; 344 try { 345 if (navigator?.clipboard?.writeText) { 346 await navigator.clipboard.writeText(text); 347 } else { 348 // Fallback 349 const ta = document.createElement("textarea"); 350 ta.value = text; 351 ta.style.position = "fixed"; 352 ta.style.opacity = "0"; 353 document.body.appendChild(ta); 354 ta.select(); 355 document.execCommand("copy"); 356 document.body.removeChild(ta); 357 } 358 const btn = this.shadowRoot?.querySelector( 359 ".copy-btn", 360 ) as HTMLButtonElement | null; 361 if (btn) { 362 const orig = btn.innerText; 363 btn.innerText = "Copied!"; 364 setTimeout(() => { 365 btn.innerText = orig; 366 }, 1500); 367 } 368 } catch { 369 // ignore 370 } 371 } 372 373 override render() { 374 return html`<div class="viewer-container"><button class="copy-btn" @click=${this.copyTranscript} aria-label="Copy transcript">Copy</button><div class="transcript">${this.renderFromVTT()}</div></div>`; 375 } 376}