馃 distributed transcription service
thistle.dunkirk.sh
1// File-based transcript storage to avoid SQLite size limits
2
3import { unlinkSync } from "node:fs";
4import { basename } from "node:path";
5
6const TRANSCRIPTS_DIR = "./transcripts";
7
8/**
9 * Validate and sanitize transcription ID to prevent directory traversal
10 */
11function validateTranscriptionId(id: string): string {
12 // Reject empty strings
13 if (!id || id.length === 0) {
14 throw new Error("Invalid transcription ID: empty");
15 }
16 // Only allow safe characters (alphanumeric, hyphens, underscores)
17 if (!/^[a-zA-Z0-9_-]+$/.test(id)) {
18 throw new Error("Invalid transcription ID format");
19 }
20 // Ensure no path traversal by using only the basename
21 const safeName = basename(id);
22 if (safeName !== id) {
23 throw new Error("Invalid transcription ID: path traversal detected");
24 }
25 return safeName;
26}
27
28/**
29 * Write transcript to file system
30 */
31export async function saveTranscript(
32 transcriptionId: string,
33 transcript: string,
34): Promise<void> {
35 const safeId = validateTranscriptionId(transcriptionId);
36 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.txt`;
37 await Bun.write(filePath, transcript);
38}
39
40/**
41 * Read transcript from file system
42 */
43export async function getTranscript(
44 transcriptionId: string,
45): Promise<string | null> {
46 const safeId = validateTranscriptionId(transcriptionId);
47 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.txt`;
48 try {
49 return await Bun.file(filePath).text();
50 } catch {
51 return null;
52 }
53}
54
55/**
56 * Delete transcript file
57 */
58export async function deleteTranscript(transcriptionId: string): Promise<void> {
59 const safeId = validateTranscriptionId(transcriptionId);
60 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.txt`;
61 try {
62 unlinkSync(filePath);
63 } catch {
64 // File doesn't exist or already deleted
65 }
66}
67
68/**
69 * Check if transcript exists
70 */
71export async function hasTranscript(transcriptionId: string): Promise<boolean> {
72 const safeId = validateTranscriptionId(transcriptionId);
73 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.txt`;
74 return await Bun.file(filePath).exists();
75}
76
77/**
78 * Write WebVTT transcript to file system
79 */
80export async function saveTranscriptVTT(
81 transcriptionId: string,
82 vttContent: string,
83): Promise<void> {
84 const safeId = validateTranscriptionId(transcriptionId);
85 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.vtt`;
86 await Bun.write(filePath, vttContent);
87}
88
89/**
90 * Read WebVTT transcript from file system
91 */
92export async function getTranscriptVTT(
93 transcriptionId: string,
94): Promise<string | null> {
95 const safeId = validateTranscriptionId(transcriptionId);
96 const filePath = `${TRANSCRIPTS_DIR}/${safeId}.vtt`;
97 try {
98 return await Bun.file(filePath).text();
99 } catch {
100 return null;
101 }
102}