馃 distributed transcription service
thistle.dunkirk.sh
1// File-based transcript storage to avoid SQLite size limits
2
3import { basename } from "node:path";
4
5const TRANSCRIPTS_DIR = "./transcripts";
6
7/**
8 * Validate and sanitize transcription ID to prevent directory traversal
9 */
10function validateTranscriptionId(id: string): string {
11 // Reject empty strings
12 if (!id || id.length === 0) {
13 throw new Error("Invalid transcription ID: empty");
14 }
15 // Only allow safe characters (alphanumeric, hyphens, underscores)
16 if (!/^[a-zA-Z0-9_-]+$/.test(id)) {
17 throw new Error("Invalid transcription ID format");
18 }
19 // Ensure no path traversal by using only the basename
20 const safeName = basename(id);
21 if (safeName !== id) {
22 throw new Error("Invalid transcription ID: path traversal detected");
23 }
24 return safeName;
25}
26
27/**
28 * Write WebVTT transcript to file system
29 */
30export async function saveTranscriptVTT(
31 transcriptionId: string,
32 vttContent: string,
33): Promise<void> {
34 const safeId = validateTranscriptionId(transcriptionId);
35 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.vtt`;
36 await Bun.write(filePath, vttContent);
37}
38
39/**
40 * Read WebVTT transcript from file system
41 */
42export async function getTranscriptVTT(
43 transcriptionId: string,
44): Promise<string | null> {
45 const safeId = validateTranscriptionId(transcriptionId);
46 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.vtt`;
47 try {
48 return await Bun.file(filePath).text();
49 } catch {
50 return null;
51 }
52}
53
54/**
55 * Compatibility wrappers using VTT as the canonical format
56 */
57export async function hasTranscript(transcriptionId: string): Promise<boolean> {
58 const safeId = validateTranscriptionId(transcriptionId);
59 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.vtt`;
60 try {
61 // Try reading the file; if it exists return true
62 await Bun.file(filePath).text();
63 return true;
64 } catch {
65 return false;
66 }
67}
68
69export async function saveTranscript(
70 transcriptionId: string,
71 content: string,
72): Promise<void> {
73 // Store transcripts as VTT files to keep a single canonical format
74 return saveTranscriptVTT(transcriptionId, content);
75}
76
77export async function getTranscript(
78 transcriptionId: string,
79): Promise<string | null> {
80 // Read VTT content as the transcript text
81 return getTranscriptVTT(transcriptionId);
82}
83
84export async function deleteTranscript(transcriptionId: string): Promise<void> {
85 const safeId = validateTranscriptionId(transcriptionId);
86 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.vtt`;
87 try {
88 const fs = await import("node:fs");
89 if (fs.existsSync(filePath)) {
90 fs.unlinkSync(filePath);
91 }
92 } catch {
93 // Ignore errors
94 }
95}