馃 distributed transcription service
thistle.dunkirk.sh
1import { css, html, LitElement } from "lit";
2import { customElement, state } from "lit/decorators.js";
3import "./vtt-viewer.ts";
4
5interface TranscriptionJob {
6 id: string;
7 filename: string;
8 class_name?: string;
9 status: "uploading" | "processing" | "transcribing" | "completed" | "failed";
10 progress: number;
11 transcript?: string;
12 created_at: number;
13 audioUrl?: string;
14 vttSegments?: VTTSegment[];
15 vttContent?: string;
16}
17
18interface VTTSegment {
19 start: number;
20 end: number;
21 text: string;
22 index?: string;
23}
24
25class WordStreamer {
26 private queue: string[] = [];
27 private isProcessing = false;
28 private wordDelay: number;
29 private onWord: (word: string) => void;
30
31 constructor(wordDelay: number = 50, onWord: (word: string) => void) {
32 this.wordDelay = wordDelay;
33 this.onWord = onWord;
34 }
35
36 addChunk(text: string) {
37 // Split on whitespace and filter out empty strings
38 const words = text.split(/(\s+)/).filter((w) => w.length > 0);
39 this.queue.push(...words);
40
41 // Start processing if not already running
42 if (!this.isProcessing) {
43 this.processQueue();
44 }
45 }
46
47 private async processQueue() {
48 this.isProcessing = true;
49
50 while (this.queue.length > 0) {
51 const word = this.queue.shift();
52 if (!word) break;
53 this.onWord(word);
54 await new Promise((resolve) => setTimeout(resolve, this.wordDelay));
55 }
56
57 this.isProcessing = false;
58 }
59
60 showAll() {
61 // Drain entire queue immediately
62 while (this.queue.length > 0) {
63 const word = this.queue.shift();
64 if (!word) break;
65 this.onWord(word);
66 }
67 this.isProcessing = false;
68 }
69
70 clear() {
71 this.queue = [];
72 this.isProcessing = false;
73 }
74}
75
76@customElement("transcription-component")
77export class TranscriptionComponent extends LitElement {
78 @state() jobs: TranscriptionJob[] = [];
79 @state() isUploading = false;
80 @state() dragOver = false;
81 @state() serviceAvailable = true;
82 @state() existingClasses: string[] = [];
83 @state() showNewClassInput = false;
84 // Word streamers for each job
85 private wordStreamers = new Map<string, WordStreamer>();
86 // Displayed transcripts
87 private displayedTranscripts = new Map<string, string>();
88 // Track last full transcript to compare
89 private lastTranscripts = new Map<string, string>();
90
91 static override styles = css`
92 :host {
93 display: block;
94 }
95
96 .upload-area {
97 border: 2px dashed var(--secondary);
98 border-radius: 8px;
99 padding: 3rem 2rem;
100 text-align: center;
101 transition: all 0.2s;
102 cursor: pointer;
103 background: var(--background);
104 }
105
106 .upload-area:hover,
107 .upload-area.drag-over {
108 border-color: var(--primary);
109 background: color-mix(in srgb, var(--primary) 5%, transparent);
110 }
111
112 .upload-area.disabled {
113 border-color: var(--secondary);
114 opacity: 0.6;
115 cursor: not-allowed;
116 }
117
118 .upload-area.disabled:hover {
119 border-color: var(--secondary);
120 background: transparent;
121 }
122
123 .upload-icon {
124 font-size: 3rem;
125 color: var(--secondary);
126 margin-bottom: 1rem;
127 }
128
129 .upload-text {
130 color: var(--text);
131 font-size: 1.125rem;
132 font-weight: 500;
133 margin-bottom: 0.5rem;
134 }
135
136 .upload-hint {
137 color: var(--text);
138 opacity: 0.7;
139 font-size: 0.875rem;
140 }
141
142 .jobs-section {
143 margin-top: 2rem;
144 }
145
146 .jobs-title {
147 font-size: 1.25rem;
148 font-weight: 600;
149 color: var(--text);
150 margin-bottom: 1rem;
151 }
152
153 .job-card {
154 background: var(--background);
155 border: 1px solid var(--secondary);
156 border-radius: 8px;
157 padding: 1.5rem;
158 margin-bottom: 1rem;
159 }
160
161 .job-header {
162 display: flex;
163 align-items: center;
164 justify-content: space-between;
165 margin-bottom: 1rem;
166 }
167
168 .job-filename {
169 font-weight: 500;
170 color: var(--text);
171 }
172
173 .job-status {
174 padding: 0.25rem 0.75rem;
175 border-radius: 4px;
176 font-size: 0.75rem;
177 font-weight: 600;
178 text-transform: uppercase;
179 }
180
181 .status-uploading {
182 background: color-mix(in srgb, var(--primary) 10%, transparent);
183 color: var(--primary);
184 }
185
186 .status-processing {
187 background: color-mix(in srgb, var(--primary) 10%, transparent);
188 color: var(--primary);
189 }
190
191 .status-transcribing {
192 background: color-mix(in srgb, var(--accent) 10%, transparent);
193 color: var(--accent);
194 }
195
196 .status-completed {
197 background: color-mix(in srgb, var(--success) 10%, transparent);
198 color: var(--success);
199 }
200
201 .status-failed {
202 background: color-mix(in srgb, var(--text) 10%, transparent);
203 color: var(--text);
204 }
205
206 .progress-bar {
207 width: 100%;
208 height: 4px;
209 background: var(--secondary);
210 border-radius: 2px;
211 margin-bottom: 1rem;
212 overflow: hidden;
213 position: relative;
214 }
215
216 .progress-fill {
217 height: 100%;
218 background: var(--primary);
219 border-radius: 2px;
220 transition: width 0.3s;
221 }
222
223 .progress-fill.indeterminate {
224 width: 30%;
225 background: var(--primary);
226 animation: progress-slide 1.5s ease-in-out infinite;
227 }
228
229 @keyframes progress-slide {
230 0% {
231 transform: translateX(-100%);
232 }
233 100% {
234 transform: translateX(333%);
235 }
236 }
237
238 .job-transcript {
239 background: color-mix(in srgb, var(--primary) 5%, transparent);
240 border-radius: 6px;
241 padding: 1rem;
242 margin-top: 1rem;
243 font-family: monospace;
244 font-size: 0.875rem;
245 color: var(--text);
246 line-height: 1.6;
247 word-wrap: break-word;
248 }
249
250 .segment {
251 cursor: pointer;
252 transition: background 0.1s;
253 display: inline;
254 }
255
256 .segment:hover {
257 background: color-mix(in srgb, var(--primary) 15%, transparent);
258 border-radius: 2px;
259 }
260
261 .current-segment {
262 background: color-mix(in srgb, var(--accent) 30%, transparent);
263 border-radius: 2px;
264 }
265
266 .paragraph {
267 display: block;
268 margin: 0 0 1rem 0;
269 line-height: 1.6;
270 }
271
272 .audio-player {
273 margin-top: 1rem;
274 width: 100%;
275 }
276
277 .audio-player audio {
278 width: 100%;
279 height: 2.5rem;
280 }
281
282 .hidden {
283 display: none;
284 }
285
286 .file-input {
287 display: none;
288 }
289
290 .upload-form {
291 margin-top: 1rem;
292 display: flex;
293 flex-direction: column;
294 gap: 0.75rem;
295 }
296
297 .class-input {
298 padding: 0.5rem 0.75rem;
299 border: 1px solid var(--secondary);
300 border-radius: 4px;
301 font-size: 0.875rem;
302 color: var(--text);
303 background: var(--background);
304 }
305
306 .class-input:focus {
307 outline: none;
308 border-color: var(--primary);
309 }
310
311 .class-input::placeholder {
312 color: var(--paynes-gray);
313 opacity: 0.6;
314 }
315
316 .class-select {
317 width: 100%;
318 padding: 0.5rem 0.75rem;
319 border: 1px solid var(--secondary);
320 border-radius: 4px;
321 font-size: 0.875rem;
322 color: var(--text);
323 background: var(--background);
324 cursor: pointer;
325 }
326
327 .class-select:focus {
328 outline: none;
329 border-color: var(--primary);
330 }
331
332 .class-select option {
333 padding: 0.5rem;
334 }
335
336 .class-group {
337 margin-bottom: 2rem;
338 }
339
340 .class-header {
341 font-size: 1.25rem;
342 font-weight: 600;
343 color: var(--text);
344 margin-bottom: 1rem;
345 padding-bottom: 0.5rem;
346 border-bottom: 2px solid var(--accent);
347 }
348
349 .no-class-header {
350 border-bottom-color: var(--secondary);
351 }
352 `;
353
354 private eventSources: Map<string, EventSource> = new Map();
355 private handleAuthChange = async () => {
356 await this.checkHealth();
357 await this.loadJobs();
358 await this.loadExistingClasses();
359 this.connectToJobStreams();
360 };
361
362 private async loadExistingClasses() {
363 try {
364 const response = await fetch("/api/transcriptions");
365 if (!response.ok) {
366 this.existingClasses = [];
367 return;
368 }
369
370 const data = await response.json();
371 const jobs = data.jobs || [];
372
373 // Extract unique class names
374 const classSet = new Set<string>();
375 for (const job of jobs) {
376 if (job.class_name) {
377 classSet.add(job.class_name);
378 }
379 }
380
381 this.existingClasses = Array.from(classSet).sort();
382 } catch (error) {
383 console.error("Failed to load classes:", error);
384 this.existingClasses = [];
385 }
386 }
387
388 override async connectedCallback() {
389 super.connectedCallback();
390 await this.checkHealth();
391 await this.loadJobs();
392 await this.loadExistingClasses();
393 this.connectToJobStreams();
394
395 // Listen for auth changes to reload jobs
396 window.addEventListener("auth-changed", this.handleAuthChange);
397 }
398
399 override disconnectedCallback() {
400 super.disconnectedCallback();
401 // Clean up all event sources and word streamers
402 for (const es of this.eventSources.values()) {
403 es.close();
404 }
405 this.eventSources.clear();
406
407 for (const streamer of this.wordStreamers.values()) {
408 streamer.clear();
409 }
410 this.wordStreamers.clear();
411 this.displayedTranscripts.clear();
412 this.lastTranscripts.clear();
413
414 window.removeEventListener("auth-changed", this.handleAuthChange);
415 }
416
417 private connectToJobStreams() {
418 // Connect to SSE streams for active jobs
419 for (const job of this.jobs) {
420 if (
421 job.status === "processing" ||
422 job.status === "transcribing" ||
423 job.status === "uploading"
424 ) {
425 this.connectToJobStream(job.id);
426 }
427 }
428 }
429
430 private connectToJobStream(jobId: string, retryCount = 0) {
431 if (this.eventSources.has(jobId)) {
432 return; // Already connected
433 }
434
435 const eventSource = new EventSource(`/api/transcriptions/${jobId}/stream`);
436
437 // Handle named "update" events from SSE stream
438 eventSource.addEventListener("update", async (event) => {
439 const update = JSON.parse(event.data);
440
441 // Update the job in our list efficiently (mutate in place for Lit)
442 const job = this.jobs.find((j) => j.id === jobId);
443 if (job) {
444 // Update properties directly
445 if (update.status !== undefined) job.status = update.status;
446 if (update.progress !== undefined) job.progress = update.progress;
447 if (update.transcript !== undefined) {
448 job.transcript = update.transcript;
449
450 // Get or create word streamer for this job
451 if (!this.wordStreamers.has(jobId)) {
452 const streamer = new WordStreamer(50, (word) => {
453 const current = this.displayedTranscripts.get(jobId) || "";
454 this.displayedTranscripts.set(jobId, current + word);
455 this.requestUpdate();
456 });
457 this.wordStreamers.set(jobId, streamer);
458 }
459
460 const streamer = this.wordStreamers.get(jobId);
461 if (!streamer) return;
462 const lastTranscript = this.lastTranscripts.get(jobId) || "";
463 const newTranscript = update.transcript;
464
465 // Check if this is new content we haven't seen
466 if (newTranscript !== lastTranscript) {
467 // If new transcript starts with last transcript, it's cumulative - add diff
468 if (newTranscript.startsWith(lastTranscript)) {
469 const newPortion = newTranscript.slice(lastTranscript.length);
470 if (newPortion.trim()) {
471 streamer.addChunk(newPortion);
472 }
473 } else {
474 // Completely different segment, add space separator then new content
475 if (lastTranscript) {
476 streamer.addChunk(" ");
477 }
478 streamer.addChunk(newTranscript);
479 }
480 this.lastTranscripts.set(jobId, newTranscript);
481 }
482
483 // On completion, show everything immediately
484 if (update.status === "completed") {
485 streamer.showAll();
486 this.wordStreamers.delete(jobId);
487 this.lastTranscripts.delete(jobId);
488 }
489 }
490
491 // Trigger Lit re-render by creating new array reference
492 this.jobs = [...this.jobs];
493
494 // Close connection if job is complete or failed
495 if (update.status === "completed" || update.status === "failed") {
496 eventSource.close();
497 this.eventSources.delete(jobId);
498
499 // Clean up streamer
500 const streamer = this.wordStreamers.get(jobId);
501 if (streamer) {
502 streamer.clear();
503 this.wordStreamers.delete(jobId);
504 }
505 this.lastTranscripts.delete(jobId);
506
507 // Load VTT for completed jobs
508 if (update.status === "completed") {
509 await this.loadVTTForJob(jobId);
510 }
511 }
512 }
513 });
514
515 eventSource.onerror = (error) => {
516 console.warn(`SSE connection error for job ${jobId}:`, error);
517 eventSource.close();
518 this.eventSources.delete(jobId);
519
520 // Check if the job still exists before retrying
521 const job = this.jobs.find((j) => j.id === jobId);
522 if (!job) {
523 console.log(`Job ${jobId} no longer exists, skipping retry`);
524 return;
525 }
526
527 // Don't retry if job is already in a terminal state
528 if (job.status === "completed" || job.status === "failed") {
529 console.log(`Job ${jobId} is ${job.status}, skipping retry`);
530 return;
531 }
532
533 // Retry connection up to 3 times with exponential backoff
534 if (retryCount < 3) {
535 const backoff = 2 ** retryCount * 1000; // 1s, 2s, 4s
536 console.log(
537 `Retrying connection in ${backoff}ms (attempt ${retryCount + 1}/3)`,
538 );
539 setTimeout(() => {
540 this.connectToJobStream(jobId, retryCount + 1);
541 }, backoff);
542 } else {
543 console.error(`Failed to connect to job ${jobId} after 3 attempts`);
544 }
545 };
546
547 this.eventSources.set(jobId, eventSource);
548 }
549
550 async checkHealth() {
551 try {
552 const response = await fetch("/api/transcriptions/health");
553 if (response.ok) {
554 const data = await response.json();
555 this.serviceAvailable = data.available;
556 } else {
557 this.serviceAvailable = false;
558 }
559 } catch {
560 this.serviceAvailable = false;
561 }
562 }
563
564 async loadJobs() {
565 try {
566 const response = await fetch("/api/transcriptions");
567 if (response.ok) {
568 const data = await response.json();
569 this.jobs = data.jobs;
570
571 // Initialize displayedTranscripts for completed/failed jobs
572 for (const job of this.jobs) {
573 if (
574 (job.status === "completed" || job.status === "failed") &&
575 job.transcript
576 ) {
577 this.displayedTranscripts.set(job.id, job.transcript);
578 }
579
580 // Fetch VTT for completed jobs
581 if (job.status === "completed") {
582 await this.loadVTTForJob(job.id);
583 }
584 }
585 // Don't override serviceAvailable - it's set by checkHealth()
586 } else if (response.status === 404) {
587 // Transcription service not available - show empty state
588 this.jobs = [];
589 } else {
590 console.error("Failed to load jobs:", response.status);
591 }
592 } catch (error) {
593 // Network error or service unavailable - don't break the page
594 console.warn("Transcription service unavailable:", error);
595 this.jobs = [];
596 }
597 }
598
599 private async loadVTTForJob(jobId: string) {
600 try {
601 const response = await fetch(`/api/transcriptions/${jobId}?format=vtt`);
602 if (response.ok) {
603 const vttContent = await response.text();
604
605 // Update job with VTT content
606 const job = this.jobs.find((j) => j.id === jobId);
607 if (job) {
608 job.vttContent = vttContent;
609 job.audioUrl = `/api/transcriptions/${jobId}/audio`;
610 this.jobs = [...this.jobs];
611 }
612 }
613 } catch (error) {
614 console.warn(`Failed to load VTT for job ${jobId}:`, error);
615 }
616 }
617
618 private handleDragOver(e: DragEvent) {
619 e.preventDefault();
620 this.dragOver = true;
621 }
622
623 private handleDragLeave(e: DragEvent) {
624 e.preventDefault();
625 this.dragOver = false;
626 }
627
628 private async handleDrop(e: DragEvent) {
629 e.preventDefault();
630 this.dragOver = false;
631
632 const files = e.dataTransfer?.files;
633 const file = files?.[0];
634 if (file) {
635 await this.uploadFile(file);
636 }
637 }
638
639 private async handleFileSelect(e: Event) {
640 const input = e.target as HTMLInputElement;
641 const file = input.files?.[0];
642 if (file) {
643 await this.uploadFile(file);
644 }
645 }
646
647 private handleClassSelectChange(e: Event) {
648 const select = e.target as HTMLSelectElement;
649 this.showNewClassInput = select.value === "__new__";
650 }
651
652 private async uploadFile(file: File) {
653 const allowedTypes = [
654 "audio/mpeg", // MP3
655 "audio/wav", // WAV
656 "audio/x-wav", // WAV (alternative)
657 "audio/m4a", // M4A
658 "audio/x-m4a", // M4A (alternative)
659 "audio/mp4", // MP4 audio
660 "audio/aac", // AAC
661 "audio/ogg", // OGG
662 "audio/webm", // WebM audio
663 "audio/flac", // FLAC
664 ];
665
666 // Also check file extension for M4A files (sometimes MIME type isn't set correctly)
667 const isM4A = file.name.toLowerCase().endsWith(".m4a");
668 const isAllowedType =
669 allowedTypes.includes(file.type) || (isM4A && file.type === "");
670
671 if (!isAllowedType) {
672 alert(
673 "Please select a supported audio file (MP3, WAV, M4A, AAC, OGG, WebM, or FLAC)",
674 );
675 return;
676 }
677
678 if (file.size > 100 * 1024 * 1024) {
679 // 100MB limit
680 alert("File size must be less than 100MB");
681 return;
682 }
683
684 this.isUploading = true;
685
686 try {
687 // Get class name from dropdown or input
688 let className = "";
689
690 if (this.showNewClassInput) {
691 const classInput = this.shadowRoot?.querySelector(
692 "#class-name-input",
693 ) as HTMLInputElement;
694 className = classInput?.value?.trim() || "";
695 } else {
696 const classSelect = this.shadowRoot?.querySelector(
697 "#class-select",
698 ) as HTMLSelectElement;
699 const selectedValue = classSelect?.value;
700 if (
701 selectedValue &&
702 selectedValue !== "__new__" &&
703 selectedValue !== ""
704 ) {
705 className = selectedValue;
706 }
707 }
708
709 const formData = new FormData();
710 formData.append("audio", file);
711 if (className) {
712 formData.append("class_name", className);
713 }
714
715 const response = await fetch("/api/transcriptions", {
716 method: "POST",
717 body: formData,
718 });
719
720 if (!response.ok) {
721 const data = await response.json();
722 alert(
723 data.error ||
724 "Upload failed - transcription service may be unavailable",
725 );
726 } else {
727 await response.json();
728 // Redirect to class page after successful upload
729 let className = "";
730
731 if (this.showNewClassInput) {
732 const classInput = this.shadowRoot?.querySelector(
733 "#class-name-input",
734 ) as HTMLInputElement;
735 className = classInput?.value?.trim() || "";
736 } else {
737 const classSelect = this.shadowRoot?.querySelector(
738 "#class-select",
739 ) as HTMLSelectElement;
740 const selectedValue = classSelect?.value;
741 if (
742 selectedValue &&
743 selectedValue !== "__new__" &&
744 selectedValue !== ""
745 ) {
746 className = selectedValue;
747 }
748 }
749
750 if (className) {
751 window.location.href = `/class/${encodeURIComponent(className)}`;
752 } else {
753 window.location.href = "/class/uncategorized";
754 }
755 }
756 } catch {
757 alert("Upload failed - transcription service may be unavailable");
758 } finally {
759 this.isUploading = false;
760 }
761 }
762
763 override render() {
764 return html`
765 <div class="upload-area ${this.dragOver ? "drag-over" : ""} ${!this.serviceAvailable ? "disabled" : ""}"
766 @dragover=${this.serviceAvailable ? this.handleDragOver : null}
767 @dragleave=${this.serviceAvailable ? this.handleDragLeave : null}
768 @drop=${this.serviceAvailable ? this.handleDrop : null}
769 @click=${this.serviceAvailable ? () => (this.shadowRoot?.querySelector(".file-input") as HTMLInputElement)?.click() : null}>
770 <div class="upload-icon">馃幍</div>
771 <div class="upload-text">
772 ${
773 !this.serviceAvailable
774 ? "Transcription service unavailable"
775 : this.isUploading
776 ? "Uploading..."
777 : "Drop audio file here or click to browse"
778 }
779 </div>
780 <div class="upload-hint">
781 ${this.serviceAvailable ? "Supports MP3, WAV, M4A, AAC, OGG, WebM, FLAC up to 100MB" : "Transcription is currently unavailable"}
782 </div>
783 <input type="file" class="file-input" accept="audio/mpeg,audio/wav,audio/m4a,audio/mp4,audio/aac,audio/ogg,audio/webm,audio/flac,.m4a" @change=${this.handleFileSelect} ${!this.serviceAvailable ? "disabled" : ""} />
784 </div>
785
786 ${
787 this.serviceAvailable
788 ? html`
789 <div class="upload-form">
790 <select
791 id="class-select"
792 class="class-select"
793 ?disabled=${this.isUploading}
794 @change=${this.handleClassSelectChange}
795 >
796 <option value="">Select a class (optional)</option>
797 ${this.existingClasses.map(
798 (className) => html`
799 <option value=${className}>${className}</option>
800 `,
801 )}
802 <option value="__new__">+ Add new class</option>
803 </select>
804
805 ${
806 this.showNewClassInput
807 ? html`
808 <input
809 type="text"
810 id="class-name-input"
811 class="class-input"
812 placeholder="Enter new class name"
813 ?disabled=${this.isUploading}
814 />
815 `
816 : ""
817 }
818 </div>
819 `
820 : ""
821 }
822 `;
823 }
824}