🪻 distributed transcription service thistle.dunkirk.sh

feat: add handling for vtt transcripts

dunkirk.sh 5d3390bb 54239ae6

verified
+78 -1
src/index.ts
···
type TranscriptionUpdate,
WhisperServiceManager,
} from "./lib/transcription";
-
import { getTranscript } from "./lib/transcript-storage";
import indexHTML from "./pages/index.html";
import settingsHTML from "./pages/settings.html";
import transcribeHTML from "./pages/transcribe.html";
···
GET: async () => {
const isHealthy = await whisperService.checkHealth();
return Response.json({ available: isHealthy });
},
},
"/api/transcriptions": {
···
type TranscriptionUpdate,
WhisperServiceManager,
} from "./lib/transcription";
+
import { getTranscript, getTranscriptVTT } from "./lib/transcript-storage";
import indexHTML from "./pages/index.html";
import settingsHTML from "./pages/settings.html";
import transcribeHTML from "./pages/transcribe.html";
···
GET: async () => {
const isHealthy = await whisperService.checkHealth();
return Response.json({ available: isHealthy });
+
},
+
},
+
"/api/transcriptions/:id": {
+
GET: async (req) => {
+
try {
+
const user = requireAuth(req);
+
const transcriptionId = req.params.id;
+
+
// Verify ownership
+
const transcription = db
+
.query<
+
{
+
id: string;
+
user_id: number;
+
status: string;
+
original_filename: string;
+
},
+
[string]
+
>(
+
"SELECT id, user_id, status, original_filename FROM transcriptions WHERE id = ?",
+
)
+
.get(transcriptionId);
+
+
if (!transcription || transcription.user_id !== user.id) {
+
return Response.json(
+
{ error: "Transcription not found" },
+
{ status: 404 },
+
);
+
}
+
+
if (transcription.status !== "completed") {
+
return Response.json(
+
{ error: "Transcription not completed yet" },
+
{ status: 400 },
+
);
+
}
+
+
// Get format from query parameter
+
const url = new URL(req.url);
+
const format = url.searchParams.get("format");
+
+
// Return WebVTT format if requested
+
if (format === "vtt") {
+
const vttContent = await getTranscriptVTT(transcriptionId);
+
+
if (!vttContent) {
+
return Response.json(
+
{ error: "VTT transcript not available" },
+
{ status: 404 },
+
);
+
}
+
+
return new Response(vttContent, {
+
headers: {
+
"Content-Type": "text/vtt",
+
"Content-Disposition": `attachment; filename="${transcription.original_filename}.vtt"`,
+
},
+
});
+
}
+
+
// Default: return plain text transcript from file
+
const transcript = await getTranscript(transcriptionId);
+
if (!transcript) {
+
return Response.json(
+
{ error: "Transcript not available" },
+
{ status: 404 },
+
);
+
}
+
+
return new Response(transcript, {
+
headers: {
+
"Content-Type": "text/plain",
+
},
+
});
+
} catch (error) {
+
return handleError(error);
+
}
},
},
"/api/transcriptions": {
+17
src/lib/transcript-storage.test.ts
···
import {
deleteTranscript,
getTranscript,
hasTranscript,
saveTranscript,
} from "./transcript-storage";
test("transcript storage", async () => {
···
}
});
···
import {
deleteTranscript,
getTranscript,
+
getTranscriptVTT,
hasTranscript,
saveTranscript,
+
saveTranscriptVTT,
} from "./transcript-storage";
test("transcript storage", async () => {
···
}
});
+
test("VTT transcript storage", async () => {
+
const testId = "test-vtt-123";
+
const vttContent = "WEBVTT\n\n00:00:00.000 --> 00:00:02.500\nHello world\n\n";
+
+
// Save VTT
+
await saveTranscriptVTT(testId, vttContent);
+
+
// Read VTT
+
const retrieved = await getTranscriptVTT(testId);
+
expect(retrieved).toBe(vttContent);
+
+
// Clean up
+
await deleteTranscript(testId);
+
});
+
+27
src/lib/transcript-storage.ts
···
const filePath = `${TRANSCRIPTS_DIR}/${safeId}.txt`;
return await Bun.file(filePath).exists();
}
···
const filePath = `${TRANSCRIPTS_DIR}/${safeId}.txt`;
return await Bun.file(filePath).exists();
}
+
+
/**
+
* Write WebVTT transcript to file system
+
*/
+
export async function saveTranscriptVTT(
+
transcriptionId: string,
+
vttContent: string,
+
): Promise<void> {
+
const safeId = validateTranscriptionId(transcriptionId);
+
const filePath = `${TRANSCRIPTS_DIR}/${safeId}.vtt`;
+
await Bun.write(filePath, vttContent);
+
}
+
+
/**
+
* Read WebVTT transcript from file system
+
*/
+
export async function getTranscriptVTT(
+
transcriptionId: string,
+
): Promise<string | null> {
+
const safeId = validateTranscriptionId(transcriptionId);
+
const filePath = `${TRANSCRIPTS_DIR}/${safeId}.vtt`;
+
try {
+
return await Bun.file(filePath).text();
+
} catch {
+
return null;
+
}
+
}
+41 -1
src/lib/transcription.ts
···
import type { Database } from "bun:sqlite";
import { createEventSource } from "eventsource-client";
import { ErrorCode } from "./errors";
-
import { saveTranscript } from "./transcript-storage";
// Constants
export const MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
···
await saveTranscript(transcriptionId, transcript);
}
this.updateTranscription(transcriptionId, {
status: "completed",
progress: 100,
···
// Save transcript to file
if (transcript) {
await saveTranscript(transcriptionId, transcript);
}
this.updateTranscription(transcriptionId, {
···
import type { Database } from "bun:sqlite";
import { createEventSource } from "eventsource-client";
import { ErrorCode } from "./errors";
+
import { saveTranscript, saveTranscriptVTT } from "./transcript-storage";
// Constants
export const MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
···
await saveTranscript(transcriptionId, transcript);
}
+
// Fetch and save VTT file from Murmur
+
const whisperJobId = this.db
+
.query<{ whisper_job_id: string }, [string]>(
+
"SELECT whisper_job_id FROM transcriptions WHERE id = ?",
+
)
+
.get(transcriptionId)?.whisper_job_id;
+
+
if (whisperJobId) {
+
try {
+
const vttResponse = await fetch(
+
`${this.serviceUrl}/transcribe/${whisperJobId}?format=vtt`,
+
);
+
if (vttResponse.ok) {
+
const vttContent = await vttResponse.text();
+
await saveTranscriptVTT(transcriptionId, vttContent);
+
}
+
} catch (error) {
+
console.warn(
+
`[Transcription] Failed to fetch VTT for ${transcriptionId}:`,
+
error,
+
);
+
}
+
}
+
this.updateTranscription(transcriptionId, {
status: "completed",
progress: 100,
···
// Save transcript to file
if (transcript) {
await saveTranscript(transcriptionId, transcript);
+
}
+
+
// Fetch and save VTT file
+
try {
+
const vttResponse = await fetch(
+
`${this.serviceUrl}/transcribe/${whisperJob.id}?format=vtt`,
+
);
+
if (vttResponse.ok) {
+
const vttContent = await vttResponse.text();
+
await saveTranscriptVTT(transcriptionId, vttContent);
+
}
+
} catch (error) {
+
console.warn(
+
`[Sync] Failed to fetch VTT for ${transcriptionId}:`,
+
error,
+
);
}
this.updateTranscription(transcriptionId, {