···
import { ErrorCode } from "./errors";
6
-
export const MAX_FILE_SIZE = 25 * 1024 * 1024; // 25MB
6
+
export const MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
export const MAX_TRANSCRIPT_LENGTH = 50000;
export const MAX_ERROR_LENGTH = 255;
···
export type TranscriptionStatus =
···
private events: TranscriptionEventEmitter,
111
+
async checkHealth(): Promise<boolean> {
113
+
const response = await fetch(`${this.serviceUrl}/jobs`, {
116
+
return response.ok;
async startTranscription(
···
const filePath = `./uploads/${filename}`;
const fileBuffer = await Bun.file(filePath).arrayBuffer();
125
-
// Create form data for the faster-whisper server
137
+
// Create form data for the Murmur server
const formData = new FormData();
const file = new File([fileBuffer], filename, { type: "audio/mpeg" });
formData.append("file", file);
130
-
// Call the faster-whisper server to start transcription
142
+
// Call the Murmur server to start transcription
const response = await fetch(`${this.serviceUrl}/transcribe`, {
···
const { job_id } = await response.json();
145
-
// Connect to SSE stream from Whisper
157
+
// Store Murmur's job_id in our database for tracking
158
+
this.db.run("UPDATE transcriptions SET whisper_job_id = ? WHERE id = ?", [
163
+
// Connect to SSE stream from Murmur (use the job_id returned by Murmur)
this.streamWhisperJob(transcriptionId, job_id, filePath);
···
const es = createEventSource({
url: `${this.serviceUrl}/transcribe/${jobId}/stream`,
187
-
onMessage: ({ data }) => {
205
+
onMessage: ({ event, data }) => {
207
+
// Handle "error" events from SSE (e.g., "Job not found")
208
+
if (event === "error") {
209
+
const errorData = JSON.parse(data) as { error: string };
211
+
`[Stream] Whisper service error for ${transcriptionId}:`,
215
+
// Mark the job as failed in our database
216
+
this.updateTranscription(transcriptionId, {
218
+
error_message: errorData.error,
221
+
this.events.emit(transcriptionId, {
224
+
error_message: errorData.error,
225
+
error_code: ErrorCode.TRANSCRIPTION_FAILED,
228
+
this.closeStream(transcriptionId);
const update = JSON.parse(data) as WhisperJob;
190
-
this.handleWhisperUpdate(transcriptionId, jobId, filePath, update);
233
+
this.handleWhisperUpdate(transcriptionId, filePath, update);
`[Stream] Error processing update for ${transcriptionId}:`,
···
private handleWhisperUpdate(
251
+
if (update.status === "pending") {
252
+
// Initial status, no action needed
if (update.status === "processing") {
210
-
const progress = Math.max(10, Math.min(95, update.progress ?? 0));
211
-
this.updateTranscription(transcriptionId, { progress });
257
+
// Murmur is initializing (file I/O, WhisperKit setup) - no transcript yet
258
+
const progress = Math.min(100, update.progress ?? 0);
260
+
this.updateTranscription(transcriptionId, {
261
+
status: "processing",
this.events.emit(transcriptionId, {
269
+
} else if (update.status === "transcribing") {
270
+
// Active transcription with progress callbacks
271
+
const progress = Math.min(100, update.progress ?? 0);
273
+
// If progress is still 0, keep status as "processing" until real progress starts
274
+
const status = progress === 0 ? "processing" : "transcribing";
276
+
// Strip WhisperKit special tokens from intermediate transcript
277
+
let transcript = update.transcript ?? "";
278
+
transcript = transcript.replace(/<\|[^|]+\|>/g, "").trim();
280
+
this.updateTranscription(transcriptionId, {
286
+
this.events.emit(transcriptionId, {
289
+
transcript: transcript || undefined,
} else if (update.status === "completed") {
292
+
// Final transcript should already have tokens stripped by Murmur
const transcript = (update.transcript ?? "").substring(
···
236
-
this.cleanupJob(transcriptionId, jobId, filePath);
310
+
// Only close stream and delete local file - keep Whisper job for potential replay/debugging
311
+
this.closeStream(transcriptionId);
312
+
this.deleteLocalFile(filePath);
} else if (update.status === "failed") {
update.error_message ?? "Transcription failed"
···
error_code: ErrorCode.TRANSCRIPTION_FAILED,
330
+
// Only close stream - keep failed jobs in Whisper for debugging
this.closeStream(transcriptionId);
255
-
this.deleteWhisperJob(jobId);
259
-
private cleanupJob(transcriptionId: string, jobId: string, filePath: string) {
260
-
// Delete uploaded file
263
-
.then(() => Bun.write(filePath, ""))
266
-
this.closeStream(transcriptionId);
267
-
this.deleteWhisperJob(jobId);
private closeStream(transcriptionId: string) {
const es = this.activeStreams.get(transcriptionId);
···
this.streamLocks.delete(transcriptionId);
279
-
private async deleteWhisperJob(jobId: string) {
281
-
await fetch(`${this.serviceUrl}/transcribe/${jobId}`, {
285
-
// Silent fail - job may already be deleted
344
+
private deleteLocalFile(filePath: string) {
345
+
// Delete uploaded file from disk
348
+
.then(() => Bun.write(filePath, ""))
private updateTranscription(
···
async syncWithWhisper(): Promise<void> {
331
-
const whisperJobs = await this.fetchWhisperJobs();
332
-
if (!whisperJobs) return;
393
+
const whisperJobs = await this.fetchWhisperJobs();
394
+
if (!whisperJobs) {
395
+
throw new Error("Murmur service unavailable");
334
-
const activeDbJobs = this.getActiveDbJobs();
335
-
const activeJobsMap = new Map(activeDbJobs.map((j) => [j.id, j]));
398
+
const activeDbJobs = this.getActiveDbJobs();
399
+
const activeJobsMap = new Map(activeDbJobs.map((j) => [j.id, j]));
337
-
await this.syncWhisperJobsToDb(whisperJobs, activeJobsMap);
338
-
await this.syncDbJobsToWhisper(activeDbJobs, whisperJobs);
342
-
error instanceof Error ? error.message : "Unknown error",
401
+
await this.syncWhisperJobsToDb(whisperJobs, activeJobsMap);
402
+
await this.syncDbJobsToWhisper(activeDbJobs, whisperJobs);
private async fetchWhisperJobs(): Promise<WhisperJob[] | null> {
···
private getActiveDbJobs(): Array<{
421
+
whisper_job_id: string | null;
367
-
.query<{ id: string; filename: string; status: string }, []>(
368
-
"SELECT id, filename, status FROM transcriptions WHERE status IN ('uploading', 'processing')",
429
+
whisper_job_id: string | null;
435
+
"SELECT id, whisper_job_id, filename, status FROM transcriptions WHERE status IN ('uploading', 'processing', 'transcribing')",
···
whisperJobs: WhisperJob[],
377
-
{ id: string; filename: string; status: string }
446
+
whisper_job_id: string | null;
for (const whisperJob of whisperJobs) {
381
-
const localJob = activeJobsMap.get(whisperJob.id);
453
+
// Try to find by whisper_job_id first, then fall back to id
454
+
let localJob = Array.from(activeJobsMap.values()).find(
455
+
(j) => j.whisper_job_id === whisperJob.id,
459
+
// Legacy: try matching by our transcriptionId === whisperJob.id
460
+
localJob = activeJobsMap.get(whisperJob.id);
await this.handleOrphanedWhisperJob(whisperJob.id);
388
-
if (whisperJob.status === "completed" || whisperJob.status === "failed") {
389
-
await this.syncCompletedJob(whisperJob);
468
+
// Reconnect to active jobs on startup
470
+
whisperJob.status === "processing" ||
471
+
whisperJob.status === "transcribing"
473
+
// Check if we're already streaming this job
474
+
if (!this.activeStreams.has(localJob.id)) {
476
+
`[Sync] Reconnecting to active job ${localJob.id} (Murmur job ${whisperJob.id})`,
478
+
const filePath = `./uploads/${localJob.filename}`;
479
+
this.streamWhisperJob(localJob.id, whisperJob.id, filePath);
482
+
whisperJob.status === "completed" ||
483
+
whisperJob.status === "failed"
485
+
// Use our transcription ID, not Murmur's job ID
486
+
await this.syncCompletedJob(whisperJob, localJob.id);
private async handleOrphanedWhisperJob(jobId: string) {
492
+
// Check if this Murmur job_id exists in our DB (either as id or whisper_job_id)
const jobExists = this.db
396
-
.query<{ id: string }, [string]>(
397
-
"SELECT id FROM transcriptions WHERE id = ?",
494
+
.query<{ id: string }, [string, string]>(
495
+
"SELECT id FROM transcriptions WHERE id = ? OR whisper_job_id = ?",
497
+
.get(jobId, jobId);
402
-
// Not our job, delete it from Whisper
403
-
await this.deleteWhisperJob(jobId);
500
+
// Not our job - Murmur will keep it until explicitly deleted
502
+
`[Sync] Found orphaned job ${jobId} in Murmur (not in our DB)`,
407
-
private async syncCompletedJob(whisperJob: WhisperJob) {
507
+
private async syncCompletedJob(
508
+
whisperJob: WhisperJob,
509
+
transcriptionId: string,
const details = await this.fetchJobDetails(whisperJob.id);
···
details.transcript?.substring(0, MAX_TRANSCRIPT_LENGTH) ?? "";
416
-
this.updateTranscription(whisperJob.id, {
519
+
this.updateTranscription(transcriptionId, {
422
-
this.events.emit(whisperJob.id, {
525
+
this.events.emit(transcriptionId, {
···
details.error_message ?? "Transcription failed"
).substring(0, MAX_ERROR_LENGTH);
432
-
this.updateTranscription(whisperJob.id, {
535
+
this.updateTranscription(transcriptionId, {
error_message: errorMessage,
437
-
this.events.emit(whisperJob.id, {
540
+
this.events.emit(transcriptionId, {
error_message: errorMessage,
444
-
await this.deleteWhisperJob(whisperJob.id);
547
+
// Job persists in Murmur until explicitly deleted - we just sync state
`[Sync] Failed to retrieve details for job ${whisperJob.id}`,
···
private async syncDbJobsToWhisper(
459
-
activeDbJobs: Array<{ id: string; filename: string; status: string }>,
562
+
activeDbJobs: Array<{
564
+
whisper_job_id: string | null;
whisperJobs: WhisperJob[],
for (const localJob of activeDbJobs) {
463
-
const whisperHasJob = whisperJobs.some((wj) => wj.id === localJob.id);
571
+
// Check if Murmur has this job (by whisper_job_id or legacy id match)
572
+
const whisperHasJob = whisperJobs.some(
573
+
(wj) => wj.id === localJob.whisper_job_id || wj.id === localJob.id,
465
-
if (!whisperHasJob) {
466
-
// Job was lost, mark as failed
576
+
if (!whisperHasJob && localJob.whisper_job_id) {
577
+
// Job was lost from Murmur, mark as failed
const errorMessage = "Job lost - whisper service may have restarted";
this.updateTranscription(localJob.id, {