🪻 distributed transcription service thistle.dunkirk.sh

feat: move to vtt transcripts with audio files

dunkirk.sh 6e888ef1 3f14cd65

verified
Changed files
+287 -21
src
+206 -5
src/components/transcription.ts
···
progress: number;
transcript?: string;
created_at: number;
}
class WordStreamer {
···
word-wrap: break-word;
}
.hidden {
display: none;
}
···
const eventSource = new EventSource(`/api/transcriptions/${jobId}/stream`);
// Handle named "update" events from SSE stream
-
eventSource.addEventListener("update", (event) => {
const update = JSON.parse(event.data);
// Update the job in our list efficiently (mutate in place for Lit)
···
this.wordStreamers.delete(jobId);
}
this.lastTranscripts.delete(jobId);
}
}
});
···
if ((job.status === "completed" || job.status === "failed") && job.transcript) {
this.displayedTranscripts.set(job.id, job.transcript);
}
}
// Don't override serviceAvailable - it's set by checkHealth()
} else if (response.status === 404) {
···
}
}
private handleDragOver(e: DragEvent) {
e.preventDefault();
this.dragOver = true;
···
}
private renderTranscript(job: TranscriptionJob) {
-
const displayed = this.displayedTranscripts.get(job.id) || "";
-
return displayed;
}
override render() {
return html`
···
}
${
-
this.displayedTranscripts.has(job.id) && this.displayedTranscripts.get(job.id)
? html`
<div class="job-transcript">${this.renderTranscript(job)}</div>
`
-
: ""
}
</div>
`,
···
progress: number;
transcript?: string;
created_at: number;
+
audioUrl?: string;
+
vttSegments?: VTTSegment[];
+
}
+
+
interface VTTSegment {
+
start: number;
+
end: number;
+
text: string;
+
}
+
+
+
+
function parseVTT(vttContent: string): VTTSegment[] {
+
const segments: VTTSegment[] = [];
+
const lines = vttContent.split("\n");
+
+
let i = 0;
+
// Skip WEBVTT header
+
while (i < lines.length && !lines[i]?.includes("-->")) {
+
i++;
+
}
+
+
while (i < lines.length) {
+
const line = lines[i];
+
if (line?.includes("-->")) {
+
const [startStr, endStr] = line.split("-->").map((s) => s.trim());
+
const start = parseVTTTimestamp(startStr || "");
+
const end = parseVTTTimestamp(endStr || "");
+
+
// Collect text lines until empty line
+
const textLines: string[] = [];
+
i++;
+
while (i < lines.length && lines[i]?.trim()) {
+
textLines.push(lines[i] || "");
+
i++;
+
}
+
+
segments.push({
+
start,
+
end,
+
text: textLines.join(" ").trim(),
+
});
+
}
+
i++;
+
}
+
+
return segments;
+
}
+
+
function parseVTTTimestamp(timestamp: string): number {
+
const parts = timestamp.split(":");
+
if (parts.length === 3) {
+
const hours = Number.parseFloat(parts[0] || "0");
+
const minutes = Number.parseFloat(parts[1] || "0");
+
const seconds = Number.parseFloat(parts[2] || "0");
+
return hours * 3600 + minutes * 60 + seconds;
+
}
+
return 0;
}
class WordStreamer {
···
word-wrap: break-word;
}
+
.segment {
+
cursor: pointer;
+
transition: background 0.1s;
+
display: inline;
+
}
+
+
.segment:hover {
+
background: color-mix(in srgb, var(--primary) 15%, transparent);
+
border-radius: 2px;
+
}
+
+
.current-segment {
+
background: color-mix(in srgb, var(--accent) 30%, transparent);
+
border-radius: 2px;
+
}
+
+
.audio-player {
+
margin-top: 1rem;
+
width: 100%;
+
}
+
+
.audio-player audio {
+
width: 100%;
+
height: 2.5rem;
+
}
+
.hidden {
display: none;
}
···
const eventSource = new EventSource(`/api/transcriptions/${jobId}/stream`);
// Handle named "update" events from SSE stream
+
eventSource.addEventListener("update", async (event) => {
const update = JSON.parse(event.data);
// Update the job in our list efficiently (mutate in place for Lit)
···
this.wordStreamers.delete(jobId);
}
this.lastTranscripts.delete(jobId);
+
+
// Load VTT for completed jobs
+
if (update.status === "completed") {
+
await this.loadVTTForJob(jobId);
+
this.setupWordHighlighting(jobId);
+
}
}
}
});
···
if ((job.status === "completed" || job.status === "failed") && job.transcript) {
this.displayedTranscripts.set(job.id, job.transcript);
}
+
+
// Fetch VTT for completed jobs
+
if (job.status === "completed") {
+
await this.loadVTTForJob(job.id);
+
await this.updateComplete;
+
this.setupWordHighlighting(job.id);
+
}
}
// Don't override serviceAvailable - it's set by checkHealth()
} else if (response.status === 404) {
···
}
}
+
private async loadVTTForJob(jobId: string) {
+
try {
+
const response = await fetch(`/api/transcriptions/${jobId}?format=vtt`);
+
if (response.ok) {
+
const vttContent = await response.text();
+
const segments = parseVTT(vttContent);
+
+
// Update job with VTT segments
+
const job = this.jobs.find((j) => j.id === jobId);
+
if (job) {
+
job.vttSegments = segments;
+
job.audioUrl = `/api/transcriptions/${jobId}/audio`;
+
this.jobs = [...this.jobs];
+
}
+
}
+
} catch (error) {
+
console.warn(`Failed to load VTT for job ${jobId}:`, error);
+
}
+
}
+
+
private setupWordHighlighting(jobId: string) {
+
const job = this.jobs.find((j) => j.id === jobId);
+
if (!job?.audioUrl || !job.vttSegments) return;
+
+
// Wait for next frame to ensure DOM is updated
+
requestAnimationFrame(() => {
+
const audioElement = this.shadowRoot?.querySelector(
+
`#audio-${jobId}`,
+
) as HTMLAudioElement;
+
const transcriptDiv = this.shadowRoot?.querySelector(
+
`#transcript-${jobId}`,
+
) as HTMLDivElement;
+
+
if (!audioElement || !transcriptDiv) {
+
console.warn("Could not find audio or transcript elements");
+
return;
+
}
+
+
// Track current segment
+
let currentSegmentElement: HTMLElement | null = null;
+
+
// Update highlighting on timeupdate
+
audioElement.addEventListener("timeupdate", () => {
+
const currentTime = audioElement.currentTime;
+
const segmentElements = transcriptDiv.querySelectorAll("[data-start]");
+
+
for (const el of segmentElements) {
+
const start = Number.parseFloat(
+
(el as HTMLElement).dataset.start || "0",
+
);
+
const end = Number.parseFloat((el as HTMLElement).dataset.end || "0");
+
+
if (currentTime >= start && currentTime <= end) {
+
if (currentSegmentElement !== el) {
+
currentSegmentElement?.classList.remove("current-segment");
+
(el as HTMLElement).classList.add("current-segment");
+
currentSegmentElement = el as HTMLElement;
+
+
// Auto-scroll to current segment
+
el.scrollIntoView({
+
behavior: "smooth",
+
block: "center",
+
});
+
}
+
break;
+
}
+
}
+
});
+
+
// Handle segment clicks
+
transcriptDiv.addEventListener("click", (e) => {
+
const target = e.target as HTMLElement;
+
if (target.dataset.start) {
+
const start = Number.parseFloat(target.dataset.start);
+
audioElement.currentTime = start;
+
audioElement.play();
+
}
+
});
+
});
+
}
+
private handleDragOver(e: DragEvent) {
e.preventDefault();
this.dragOver = true;
···
}
private renderTranscript(job: TranscriptionJob) {
+
if (!job.vttSegments) {
+
const displayed = this.displayedTranscripts.get(job.id) || "";
+
return displayed;
+
}
+
+
const segments = job.vttSegments;
+
// Render segments as clickable spans
+
return html`${segments.map(
+
(segment, idx) => html`<span
+
class="segment"
+
data-start="${segment.start}"
+
data-end="${segment.end}"
+
>${segment.text}</span>${idx < segments.length - 1 ? " " : ""}`,
+
)}`;
}
+
+
override render() {
return html`
···
}
${
+
job.status === "completed" && job.audioUrl && job.vttSegments
? html`
+
<div class="audio-player">
+
<audio id="audio-${job.id}" preload="metadata" controls src="${job.audioUrl}"></audio>
+
</div>
+
<div class="job-transcript" id="transcript-${job.id}">
+
${this.renderTranscript(job)}
+
</div>
+
`
+
: this.displayedTranscripts.has(job.id) && this.displayedTranscripts.get(job.id)
+
? html`
<div class="job-transcript">${this.renderTranscript(job)}</div>
`
+
: ""
}
</div>
`,
+77
src/index.ts
···
}
},
},
"/api/transcriptions": {
GET: async (req) => {
try {
···
}
},
},
+
"/api/transcriptions/:id/audio": {
+
GET: async (req) => {
+
try {
+
const user = requireAuth(req);
+
const transcriptionId = req.params.id;
+
+
// Verify ownership and get filename
+
const transcription = db
+
.query<
+
{
+
id: string;
+
user_id: number;
+
filename: string;
+
status: string;
+
},
+
[string]
+
>("SELECT id, user_id, filename, status FROM transcriptions WHERE id = ?")
+
.get(transcriptionId);
+
+
if (!transcription || transcription.user_id !== user.id) {
+
return Response.json(
+
{ error: "Transcription not found" },
+
{ status: 404 },
+
);
+
}
+
+
if (transcription.status !== "completed") {
+
return Response.json(
+
{ error: "Transcription not completed yet" },
+
{ status: 400 },
+
);
+
}
+
+
// Serve the audio file with range request support
+
const filePath = `./uploads/${transcription.filename}`;
+
const file = Bun.file(filePath);
+
+
if (!(await file.exists())) {
+
return Response.json({ error: "Audio file not found" }, { status: 404 });
+
}
+
+
const fileSize = file.size;
+
const range = req.headers.get("range");
+
+
// Handle range requests for seeking
+
if (range) {
+
const parts = range.replace(/bytes=/, "").split("-");
+
const start = Number.parseInt(parts[0] || "0", 10);
+
const end = parts[1] ? Number.parseInt(parts[1], 10) : fileSize - 1;
+
const chunkSize = end - start + 1;
+
+
const fileSlice = file.slice(start, end + 1);
+
+
return new Response(fileSlice, {
+
status: 206,
+
headers: {
+
"Content-Range": `bytes ${start}-${end}/${fileSize}`,
+
"Accept-Ranges": "bytes",
+
"Content-Length": chunkSize.toString(),
+
"Content-Type": file.type || "audio/mpeg",
+
},
+
});
+
}
+
+
// No range request, send entire file
+
return new Response(file, {
+
headers: {
+
"Content-Type": file.type || "audio/mpeg",
+
"Accept-Ranges": "bytes",
+
"Content-Length": fileSize.toString(),
+
},
+
});
+
} catch (error) {
+
return handleError(error);
+
}
+
},
+
},
"/api/transcriptions": {
GET: async (req) => {
try {
+4 -16
src/lib/transcription.ts
···
]);
// Connect to SSE stream from Murmur (use the job_id returned by Murmur)
-
this.streamWhisperJob(transcriptionId, job_id, filePath);
} catch (error) {
console.error(
`[Transcription] Failed to start ${transcriptionId}:`,
···
private streamWhisperJob(
transcriptionId: string,
jobId: string,
-
filePath: string,
) {
// Prevent duplicate streams using locks
if (this.streamLocks.has(transcriptionId)) {
···
}
const update = JSON.parse(data) as WhisperJob;
-
await this.handleWhisperUpdate(transcriptionId, filePath, update);
} catch (err) {
console.error(
`[Stream] Error processing update for ${transcriptionId}:`,
···
private async handleWhisperUpdate(
transcriptionId: string,
-
filePath: string,
update: WhisperJob,
) {
if (update.status === "pending") {
···
transcript,
});
-
// Only close stream and delete local file - keep Whisper job for potential replay/debugging
this.closeStream(transcriptionId);
-
this.deleteLocalFile(filePath);
} else if (update.status === "failed") {
const errorMessage = (
update.error_message ?? "Transcription failed"
···
this.streamLocks.delete(transcriptionId);
}
-
private deleteLocalFile(filePath: string) {
-
// Delete uploaded file from disk
-
Bun.file(filePath)
-
.text()
-
.then(() => Bun.write(filePath, ""))
-
.catch(() => {});
-
}
-
private updateTranscription(
transcriptionId: string,
data: {
···
console.log(
`[Sync] Reconnecting to active job ${localJob.id} (Murmur job ${whisperJob.id})`,
);
-
const filePath = `./uploads/${localJob.filename}`;
-
this.streamWhisperJob(localJob.id, whisperJob.id, filePath);
}
} else if (
whisperJob.status === "completed" ||
···
]);
// Connect to SSE stream from Murmur (use the job_id returned by Murmur)
+
this.streamWhisperJob(transcriptionId, job_id);
} catch (error) {
console.error(
`[Transcription] Failed to start ${transcriptionId}:`,
···
private streamWhisperJob(
transcriptionId: string,
jobId: string,
) {
// Prevent duplicate streams using locks
if (this.streamLocks.has(transcriptionId)) {
···
}
const update = JSON.parse(data) as WhisperJob;
+
await this.handleWhisperUpdate(transcriptionId, update);
} catch (err) {
console.error(
`[Stream] Error processing update for ${transcriptionId}:`,
···
private async handleWhisperUpdate(
transcriptionId: string,
update: WhisperJob,
) {
if (update.status === "pending") {
···
transcript,
});
+
// Close stream - keep audio file for playback
this.closeStream(transcriptionId);
} else if (update.status === "failed") {
const errorMessage = (
update.error_message ?? "Transcription failed"
···
this.streamLocks.delete(transcriptionId);
}
private updateTranscription(
transcriptionId: string,
data: {
···
console.log(
`[Sync] Reconnecting to active job ${localJob.id} (Murmur job ${whisperJob.id})`,
);
+
this.streamWhisperJob(localJob.id, whisperJob.id);
}
} else if (
whisperJob.status === "completed" ||