馃 distributed transcription service thistle.dunkirk.sh
1// File-based transcript storage to avoid SQLite size limits 2 3import { basename } from "node:path"; 4 5const TRANSCRIPTS_DIR = "./transcripts"; 6 7/** 8 * Validate and sanitize transcription ID to prevent directory traversal 9 */ 10function validateTranscriptionId(id: string): string { 11 // Reject empty strings 12 if (!id || id.length === 0) { 13 throw new Error("Invalid transcription ID: empty"); 14 } 15 // Only allow safe characters (alphanumeric, hyphens, underscores) 16 if (!/^[a-zA-Z0-9_-]+$/.test(id)) { 17 throw new Error("Invalid transcription ID format"); 18 } 19 // Ensure no path traversal by using only the basename 20 const safeName = basename(id); 21 if (safeName !== id) { 22 throw new Error("Invalid transcription ID: path traversal detected"); 23 } 24 return safeName; 25} 26 27/** 28 * Write WebVTT transcript to file system 29 */ 30export async function saveTranscriptVTT( 31 transcriptionId: string, 32 vttContent: string, 33): Promise<void> { 34 const safeId = validateTranscriptionId(transcriptionId); 35 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.vtt`; 36 await Bun.write(filePath, vttContent); 37} 38 39/** 40 * Read WebVTT transcript from file system 41 */ 42export async function getTranscriptVTT( 43 transcriptionId: string, 44): Promise<string | null> { 45 const safeId = validateTranscriptionId(transcriptionId); 46 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.vtt`; 47 try { 48 return await Bun.file(filePath).text(); 49 } catch { 50 return null; 51 } 52} 53 54/** 55 * Compatibility wrappers using VTT as the canonical format 56 */ 57export async function hasTranscript(transcriptionId: string): Promise<boolean> { 58 const safeId = validateTranscriptionId(transcriptionId); 59 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.vtt`; 60 try { 61 // Try reading the file; if it exists return true 62 await Bun.file(filePath).text(); 63 return true; 64 } catch { 65 return false; 66 } 67} 68 69export async function saveTranscript( 70 transcriptionId: string, 71 content: string, 72): Promise<void> { 73 // Store transcripts as VTT files to keep a single canonical format 74 return saveTranscriptVTT(transcriptionId, content); 75} 76 77export async function getTranscript( 78 transcriptionId: string, 79): Promise<string | null> { 80 // Read VTT content as the transcript text 81 return getTranscriptVTT(transcriptionId); 82} 83 84export async function deleteTranscript(transcriptionId: string): Promise<void> { 85 const safeId = validateTranscriptionId(transcriptionId); 86 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.vtt`; 87 try { 88 const fs = await import("node:fs"); 89 if (fs.existsSync(filePath)) { 90 fs.unlinkSync(filePath); 91 } 92 } catch { 93 // Ignore errors 94 } 95}