···
1
+
import type { Database } from "bun:sqlite";
2
+
import { createEventSource } from "eventsource-client";
3
+
import { ErrorCode } from "./errors";
6
+
export const MAX_FILE_SIZE = 25 * 1024 * 1024; // 25MB
7
+
export const MAX_TRANSCRIPT_LENGTH = 50000;
8
+
export const MAX_ERROR_LENGTH = 255;
11
+
export type TranscriptionStatus =
17
+
export interface TranscriptionUpdate {
18
+
status: TranscriptionStatus;
20
+
transcript?: string;
21
+
error_message?: string;
22
+
error_code?: string;
25
+
export interface WhisperJob {
29
+
transcript?: string;
30
+
error_message?: string;
33
+
// Event emitter for real-time transcription updates with automatic cleanup
34
+
export class TranscriptionEventEmitter {
35
+
private listeners = new Map<
37
+
Set<(data: TranscriptionUpdate) => void>
39
+
private cleanupTimers = new Map<string, NodeJS.Timeout>();
41
+
on(transcriptionId: string, callback: (data: TranscriptionUpdate) => void) {
42
+
if (!this.listeners.has(transcriptionId)) {
43
+
this.listeners.set(transcriptionId, new Set());
45
+
this.listeners.get(transcriptionId)?.add(callback);
47
+
// Clear any pending cleanup for this transcription
48
+
const timer = this.cleanupTimers.get(transcriptionId);
50
+
clearTimeout(timer);
51
+
this.cleanupTimers.delete(transcriptionId);
55
+
off(transcriptionId: string, callback: (data: TranscriptionUpdate) => void) {
56
+
this.listeners.get(transcriptionId)?.delete(callback);
58
+
// Schedule cleanup if no listeners remain
59
+
if (this.listeners.get(transcriptionId)?.size === 0) {
60
+
this.scheduleCleanup(transcriptionId);
64
+
emit(transcriptionId: string, data: TranscriptionUpdate) {
65
+
const callbacks = this.listeners.get(transcriptionId);
67
+
for (const callback of callbacks) {
72
+
// Auto-cleanup completed/failed jobs after emission
73
+
if (data.status === "completed" || data.status === "failed") {
74
+
this.scheduleCleanup(transcriptionId);
78
+
hasListeners(transcriptionId: string): boolean {
79
+
return (this.listeners.get(transcriptionId)?.size ?? 0) > 0;
82
+
private scheduleCleanup(transcriptionId: string) {
83
+
// Clean up listeners after 5 minutes of inactivity
84
+
const timer = setTimeout(
86
+
this.listeners.delete(transcriptionId);
87
+
this.cleanupTimers.delete(transcriptionId);
92
+
this.cleanupTimers.set(transcriptionId, timer);
96
+
// Whisper service manager
97
+
export class WhisperServiceManager {
98
+
private activeStreams = new Map<
100
+
ReturnType<typeof createEventSource>
102
+
private streamLocks = new Set<string>();
105
+
private serviceUrl: string,
106
+
private db: Database,
107
+
private events: TranscriptionEventEmitter,
110
+
async startTranscription(
111
+
transcriptionId: string,
115
+
// Update status to processing
116
+
this.updateTranscription(transcriptionId, {
117
+
status: "processing",
121
+
// Read file from disk
122
+
const filePath = `./uploads/${filename}`;
123
+
const fileBuffer = await Bun.file(filePath).arrayBuffer();
125
+
// Create form data for the faster-whisper server
126
+
const formData = new FormData();
127
+
const file = new File([fileBuffer], filename, { type: "audio/mpeg" });
128
+
formData.append("file", file);
130
+
// Call the faster-whisper server to start transcription
131
+
const response = await fetch(`${this.serviceUrl}/transcribe`, {
136
+
if (!response.ok) {
137
+
const errorText = await response.text().catch(() => "Unknown error");
139
+
`Whisper service returned ${response.status}: ${errorText}`,
143
+
const { job_id } = await response.json();
145
+
`[Transcription] Created Whisper job ${job_id} for ${transcriptionId}`,
148
+
// Connect to SSE stream from Whisper
149
+
this.streamWhisperJob(transcriptionId, job_id, filePath);
152
+
`[Transcription] Failed to start ${transcriptionId}:`,
155
+
const errorMessage =
156
+
error instanceof Error ? error.message : "Unknown error";
158
+
error instanceof Error && error.message.includes("Whisper service")
159
+
? ErrorCode.WHISPER_SERVICE_ERROR
160
+
: ErrorCode.TRANSCRIPTION_FAILED;
162
+
this.updateTranscription(transcriptionId, {
164
+
error_message: errorMessage,
167
+
this.events.emit(transcriptionId, {
170
+
error_message: errorMessage,
171
+
error_code: errorCode,
176
+
private streamWhisperJob(
177
+
transcriptionId: string,
181
+
// Prevent duplicate streams using locks
182
+
if (this.streamLocks.has(transcriptionId)) {
183
+
console.log(`[Stream] Already streaming ${transcriptionId}, skipping`);
187
+
this.streamLocks.add(transcriptionId);
189
+
const es = createEventSource({
190
+
url: `${this.serviceUrl}/transcribe/${jobId}/stream`,
191
+
onMessage: ({ data }) => {
193
+
const update = JSON.parse(data) as WhisperJob;
194
+
this.handleWhisperUpdate(transcriptionId, jobId, filePath, update);
197
+
`[Stream] Error processing update for ${transcriptionId}:`,
204
+
this.activeStreams.set(transcriptionId, es);
207
+
private handleWhisperUpdate(
208
+
transcriptionId: string,
211
+
update: WhisperJob,
213
+
if (update.status === "processing") {
214
+
const progress = Math.max(10, Math.min(95, update.progress ?? 0));
215
+
this.updateTranscription(transcriptionId, { progress });
217
+
this.events.emit(transcriptionId, {
218
+
status: "processing",
221
+
} else if (update.status === "completed") {
222
+
const transcript = (update.transcript ?? "").substring(
224
+
MAX_TRANSCRIPT_LENGTH,
227
+
this.updateTranscription(transcriptionId, {
228
+
status: "completed",
233
+
this.events.emit(transcriptionId, {
234
+
status: "completed",
240
+
this.cleanupJob(transcriptionId, jobId, filePath);
241
+
} else if (update.status === "failed") {
242
+
const errorMessage = (
243
+
update.error_message ?? "Transcription failed"
244
+
).substring(0, MAX_ERROR_LENGTH);
246
+
this.updateTranscription(transcriptionId, {
248
+
error_message: errorMessage,
251
+
this.events.emit(transcriptionId, {
254
+
error_message: errorMessage,
255
+
error_code: ErrorCode.TRANSCRIPTION_FAILED,
258
+
this.closeStream(transcriptionId);
259
+
this.deleteWhisperJob(jobId);
263
+
private cleanupJob(transcriptionId: string, jobId: string, filePath: string) {
264
+
// Delete uploaded file
267
+
.then(() => Bun.write(filePath, ""))
270
+
this.closeStream(transcriptionId);
271
+
this.deleteWhisperJob(jobId);
274
+
private closeStream(transcriptionId: string) {
275
+
const es = this.activeStreams.get(transcriptionId);
278
+
this.activeStreams.delete(transcriptionId);
280
+
this.streamLocks.delete(transcriptionId);
283
+
private async deleteWhisperJob(jobId: string) {
285
+
await fetch(`${this.serviceUrl}/transcribe/${jobId}`, {
289
+
// Silent fail - job may already be deleted
293
+
private updateTranscription(
294
+
transcriptionId: string,
296
+
status?: TranscriptionStatus;
298
+
transcript?: string;
299
+
error_message?: string;
302
+
const updates: string[] = [];
303
+
const values: (string | number)[] = [];
305
+
if (data.status !== undefined) {
306
+
updates.push("status = ?");
307
+
values.push(data.status);
309
+
if (data.progress !== undefined) {
310
+
updates.push("progress = ?");
311
+
values.push(data.progress);
313
+
if (data.transcript !== undefined) {
314
+
updates.push("transcript = ?");
315
+
values.push(data.transcript);
317
+
if (data.error_message !== undefined) {
318
+
updates.push("error_message = ?");
319
+
values.push(data.error_message);
322
+
updates.push("updated_at = ?");
323
+
values.push(Math.floor(Date.now() / 1000));
325
+
values.push(transcriptionId);
328
+
`UPDATE transcriptions SET ${updates.join(", ")} WHERE id = ?`,
333
+
async syncWithWhisper(): Promise<void> {
335
+
const whisperJobs = await this.fetchWhisperJobs();
336
+
if (!whisperJobs) return;
338
+
const activeDbJobs = this.getActiveDbJobs();
339
+
const activeJobsMap = new Map(activeDbJobs.map((j) => [j.id, j]));
341
+
await this.syncWhisperJobsToDb(whisperJobs, activeJobsMap);
342
+
await this.syncDbJobsToWhisper(activeDbJobs, whisperJobs);
346
+
error instanceof Error ? error.message : "Unknown error",
351
+
private async fetchWhisperJobs(): Promise<WhisperJob[] | null> {
353
+
const response = await fetch(`${this.serviceUrl}/jobs`);
354
+
if (!response.ok) {
355
+
console.warn("[Sync] Whisper service unavailable");
358
+
const { jobs } = await response.json();
365
+
private getActiveDbJobs(): Array<{
371
+
.query<{ id: string; filename: string; status: string }, []>(
372
+
"SELECT id, filename, status FROM transcriptions WHERE status IN ('uploading', 'processing')",
377
+
private async syncWhisperJobsToDb(
378
+
whisperJobs: WhisperJob[],
379
+
activeJobsMap: Map<
381
+
{ id: string; filename: string; status: string }
384
+
for (const whisperJob of whisperJobs) {
385
+
const localJob = activeJobsMap.get(whisperJob.id);
388
+
await this.handleOrphanedWhisperJob(whisperJob.id);
392
+
if (whisperJob.status === "completed" || whisperJob.status === "failed") {
393
+
await this.syncCompletedJob(whisperJob);
398
+
private async handleOrphanedWhisperJob(jobId: string) {
399
+
const jobExists = this.db
400
+
.query<{ id: string }, [string]>(
401
+
"SELECT id FROM transcriptions WHERE id = ?",
406
+
// Not our job, delete it from Whisper
407
+
await this.deleteWhisperJob(jobId);
411
+
private async syncCompletedJob(whisperJob: WhisperJob) {
413
+
const details = await this.fetchJobDetails(whisperJob.id);
414
+
if (!details) return;
416
+
if (details.status === "completed") {
418
+
details.transcript?.substring(0, MAX_TRANSCRIPT_LENGTH) ?? "";
420
+
this.updateTranscription(whisperJob.id, {
421
+
status: "completed",
426
+
this.events.emit(whisperJob.id, {
427
+
status: "completed",
431
+
} else if (details.status === "failed") {
432
+
const errorMessage = (
433
+
details.error_message ?? "Transcription failed"
434
+
).substring(0, MAX_ERROR_LENGTH);
436
+
this.updateTranscription(whisperJob.id, {
438
+
error_message: errorMessage,
441
+
this.events.emit(whisperJob.id, {
444
+
error_message: errorMessage,
448
+
await this.deleteWhisperJob(whisperJob.id);
451
+
`[Sync] Failed to retrieve details for job ${whisperJob.id}`,
456
+
private async fetchJobDetails(jobId: string): Promise<WhisperJob | null> {
457
+
const response = await fetch(`${this.serviceUrl}/transcribe/${jobId}`);
458
+
if (!response.ok) return null;
459
+
return response.json();
462
+
private async syncDbJobsToWhisper(
463
+
activeDbJobs: Array<{ id: string; filename: string; status: string }>,
464
+
whisperJobs: WhisperJob[],
466
+
for (const localJob of activeDbJobs) {
467
+
const whisperHasJob = whisperJobs.some((wj) => wj.id === localJob.id);
469
+
if (!whisperHasJob) {
470
+
// Job was lost, mark as failed
471
+
const errorMessage = "Job lost - whisper service may have restarted";
473
+
this.updateTranscription(localJob.id, {
475
+
error_message: errorMessage,
478
+
this.events.emit(localJob.id, {
481
+
error_message: errorMessage,
487
+
async cleanupStaleFiles(): Promise<void> {
489
+
// Find transcriptions older than 24 hours that are completed or failed
490
+
const staleTranscriptions = this.db
491
+
.query<{ filename: string }, [number]>(
492
+
`SELECT filename FROM transcriptions
493
+
WHERE status IN ('completed', 'failed')
494
+
AND updated_at < ?`,
496
+
.all(Math.floor(Date.now() / 1000) - 24 * 60 * 60);
498
+
for (const { filename } of staleTranscriptions) {
499
+
const filePath = `./uploads/${filename}`;
500
+
await Bun.write(filePath, "").catch(() => {});
504
+
`[Cleanup] Removed ${staleTranscriptions.length} stale files`,
507
+
console.error("[Cleanup] Failed:", error);