🪻 distributed transcription service thistle.dunkirk.sh

feat: auto detect file time

dunkirk.sh c0f6c375 4d7be0af

verified
+147 -25
src/components/upload-recording-modal.ts
···
@state() private selectedSectionId: string | null = null;
@state() private uploading = false;
@state() private error: string | null = null;
+
@state() private detectedMeetingTime: string | null = null;
+
@state() private detectingMeetingTime = false;
static override styles = css`
:host {
···
align-items: center;
gap: 0.5rem;
}
+
+
.meeting-time-selector {
+
display: flex;
+
flex-direction: column;
+
gap: 0.5rem;
+
}
+
+
.meeting-time-button {
+
padding: 0.75rem 1rem;
+
background: var(--background);
+
border: 2px solid var(--secondary);
+
border-radius: 6px;
+
font-size: 0.875rem;
+
font-weight: 500;
+
cursor: pointer;
+
transition: all 0.2s;
+
font-family: inherit;
+
color: var(--text);
+
text-align: left;
+
display: flex;
+
align-items: center;
+
gap: 0.5rem;
+
}
+
+
.meeting-time-button:hover {
+
border-color: var(--primary);
+
background: color-mix(in srgb, var(--primary) 5%, transparent);
+
}
+
+
.meeting-time-button.selected {
+
background: var(--primary);
+
border-color: var(--primary);
+
color: white;
+
}
+
+
.meeting-time-button.detected {
+
border-color: var(--accent);
+
}
+
+
.meeting-time-button.detected::after {
+
content: "✨ Auto-detected";
+
margin-left: auto;
+
font-size: 0.75rem;
+
opacity: 0.8;
+
}
+
+
.detecting-text {
+
font-size: 0.875rem;
+
color: var(--paynes-gray);
+
padding: 0.5rem;
+
text-align: center;
+
font-style: italic;
+
}
`;
-
private handleFileSelect(e: Event) {
+
private async handleFileSelect(e: Event) {
const input = e.target as HTMLInputElement;
if (input.files && input.files.length > 0) {
this.selectedFile = input.files[0] ?? null;
this.error = null;
+
this.detectedMeetingTime = null;
+
this.selectedMeetingTimeId = null;
+
+
// Auto-detect meeting time from file metadata
+
if (this.selectedFile && this.classId) {
+
await this.detectMeetingTime();
+
}
}
}
-
private handleMeetingTimeChange(e: Event) {
-
const select = e.target as HTMLSelectElement;
-
this.selectedMeetingTimeId = select.value || null;
+
private async detectMeetingTime() {
+
if (!this.selectedFile || !this.classId) return;
+
+
this.detectingMeetingTime = true;
+
+
try {
+
const formData = new FormData();
+
formData.append("audio", this.selectedFile);
+
formData.append("class_id", this.classId);
+
+
// Send the file's original lastModified timestamp (preserved by browser)
+
// This is more accurate than server-side file timestamps
+
if (this.selectedFile.lastModified) {
+
formData.append(
+
"file_timestamp",
+
this.selectedFile.lastModified.toString(),
+
);
+
}
+
+
const response = await fetch("/api/transcriptions/detect-meeting-time", {
+
method: "POST",
+
body: formData,
+
});
+
+
if (!response.ok) {
+
console.warn("Failed to detect meeting time");
+
return;
+
}
+
+
const data = await response.json();
+
+
if (data.detected && data.meeting_time_id) {
+
this.detectedMeetingTime = data.meeting_time_id;
+
this.selectedMeetingTimeId = data.meeting_time_id;
+
}
+
} catch (error) {
+
console.warn("Error detecting meeting time:", error);
+
} finally {
+
this.detectingMeetingTime = false;
+
}
+
}
+
+
private handleMeetingTimeSelect(meetingTimeId: string) {
+
this.selectedMeetingTimeId = meetingTimeId;
}
private handleSectionChange(e: Event) {
···
this.selectedMeetingTimeId = null;
this.selectedSectionId = null;
this.error = null;
+
this.detectedMeetingTime = null;
+
this.detectingMeetingTime = false;
this.dispatchEvent(new CustomEvent("close"));
}
···
throw new Error(data.error || "Upload failed");
}
-
// Success - close modal and notify parent
+
// Success
this.dispatchEvent(new CustomEvent("upload-success"));
this.handleClose();
} catch (error) {
···
<div class="help-text">Maximum file size: 100MB</div>
</div>
-
<div class="form-group">
-
<label for="meeting-time">Meeting Time</label>
-
<select
-
id="meeting-time"
-
@change=${this.handleMeetingTimeChange}
-
?disabled=${this.uploading}
-
required
-
>
-
<option value="">Select a meeting time...</option>
-
${this.meetingTimes.map(
-
(meeting) => html`
-
<option value=${meeting.id}>${meeting.label}</option>
-
`,
-
)}
-
</select>
-
<div class="help-text">
-
Select which meeting this recording is for
-
</div>
-
</div>
+
${
+
this.selectedFile
+
? html`
+
<div class="form-group">
+
<label>Meeting Time</label>
+
${
+
this.detectingMeetingTime
+
? html`<div class="detecting-text">Detecting meeting time from audio metadata...</div>`
+
: html`
+
<div class="meeting-time-selector">
+
${this.meetingTimes.map(
+
(meeting) => html`
+
<button
+
type="button"
+
class="meeting-time-button ${this.selectedMeetingTimeId === meeting.id ? "selected" : ""} ${this.detectedMeetingTime === meeting.id ? "detected" : ""}"
+
@click=${() => this.handleMeetingTimeSelect(meeting.id)}
+
?disabled=${this.uploading}
+
>
+
${meeting.label}
+
</button>
+
`,
+
)}
+
</div>
+
`
+
}
+
<div class="help-text">
+
${
+
this.detectedMeetingTime
+
? "Auto-detected based on recording date. You can change if needed."
+
: "Select which meeting this recording is for"
+
}
+
</div>
+
</div>
+
`
+
: ""
+
}
${
-
this.sections.length > 1
+
this.sections.length > 1 && this.selectedFile
? html`
<div class="form-group">
<label for="section">Section (optional)</label>
+157 -1
src/index.ts
···
WhisperServiceManager,
} from "./lib/transcription";
import {
+
extractAudioCreationDate,
+
findMatchingMeetingTime,
+
getDayName,
+
} from "./lib/audio-metadata";
+
import {
validateClassId,
validateCourseCode,
validateCourseName,
···
},
},
+
"/api/transcriptions/detect-meeting-time": {
+
POST: async (req) => {
+
try {
+
const user = requireAuth(req);
+
+
const formData = await req.formData();
+
const file = formData.get("audio") as File;
+
const classId = formData.get("class_id") as string | null;
+
const fileTimestampStr = formData.get("file_timestamp") as
+
| string
+
| null;
+
+
if (!file) throw ValidationErrors.missingField("audio");
+
if (!classId) throw ValidationErrors.missingField("class_id");
+
+
// Verify user is enrolled in the class
+
const enrolled = isUserEnrolledInClass(user.id, classId);
+
if (!enrolled && user.role !== "admin") {
+
return Response.json(
+
{ error: "Not enrolled in this class" },
+
{ status: 403 },
+
);
+
}
+
+
let creationDate: Date | null = null;
+
+
// Try client-provided timestamp first (most accurate - from original file)
+
if (fileTimestampStr) {
+
const timestamp = Number.parseInt(fileTimestampStr, 10);
+
if (!Number.isNaN(timestamp)) {
+
creationDate = new Date(timestamp);
+
console.log(
+
`[Upload] Using client-provided file timestamp: ${creationDate.toISOString()}`,
+
);
+
}
+
}
+
+
// Fallback: extract from audio file metadata
+
if (!creationDate) {
+
// Save file temporarily
+
const tempId = crypto.randomUUID();
+
const fileExtension = file.name.split(".").pop()?.toLowerCase();
+
const tempFilename = `temp-${tempId}.${fileExtension}`;
+
const tempPath = `./uploads/${tempFilename}`;
+
+
await Bun.write(tempPath, file);
+
+
try {
+
creationDate = await extractAudioCreationDate(tempPath);
+
} finally {
+
// Clean up temp file
+
try {
+
await Bun.$`rm ${tempPath}`.quiet();
+
} catch {
+
// Ignore cleanup errors
+
}
+
}
+
}
+
+
if (!creationDate) {
+
return Response.json({
+
detected: false,
+
meeting_time_id: null,
+
message: "Could not extract creation date from audio file",
+
});
+
}
+
+
// Get meeting times for this class
+
const meetingTimes = getMeetingTimesForClass(classId);
+
+
if (meetingTimes.length === 0) {
+
return Response.json({
+
detected: false,
+
meeting_time_id: null,
+
message: "No meeting times configured for this class",
+
});
+
}
+
+
// Find matching meeting time based on day of week
+
const matchedId = findMatchingMeetingTime(
+
creationDate,
+
meetingTimes,
+
);
+
+
if (matchedId) {
+
const dayName = getDayName(creationDate);
+
return Response.json({
+
detected: true,
+
meeting_time_id: matchedId,
+
day: dayName,
+
date: creationDate.toISOString(),
+
});
+
}
+
+
const dayName = getDayName(creationDate);
+
return Response.json({
+
detected: false,
+
meeting_time_id: null,
+
day: dayName,
+
date: creationDate.toISOString(),
+
message: `No meeting time matches ${dayName}`,
+
});
+
} catch (error) {
+
return handleError(error);
+
}
+
},
+
},
"/api/transcriptions": {
GET: async (req) => {
try {
···
const uploadDir = "./uploads";
await Bun.write(`${uploadDir}/${filename}`, file);
+
// Auto-detect meeting time from audio metadata if class provided and no meeting_time_id
+
let finalMeetingTimeId = meetingTimeId;
+
if (classId && !meetingTimeId) {
+
try {
+
// Extract creation date from audio file
+
const creationDate = await extractAudioCreationDate(
+
`${uploadDir}/${filename}`,
+
);
+
+
if (creationDate) {
+
// Get meeting times for this class
+
const meetingTimes = getMeetingTimesForClass(classId);
+
+
if (meetingTimes.length > 0) {
+
// Find matching meeting time based on day of week
+
const matchedId = findMatchingMeetingTime(
+
creationDate,
+
meetingTimes,
+
);
+
+
if (matchedId) {
+
finalMeetingTimeId = matchedId;
+
const dayName = getDayName(creationDate);
+
console.log(
+
`[Upload] Auto-detected meeting time for ${dayName} (${creationDate.toISOString()}) -> ${matchedId}`,
+
);
+
} else {
+
const dayName = getDayName(creationDate);
+
console.log(
+
`[Upload] No meeting time matches ${dayName}, leaving unassigned`,
+
);
+
}
+
}
+
}
+
} catch (error) {
+
// Non-fatal: just log and continue without auto-detection
+
console.warn(
+
"[Upload] Failed to auto-detect meeting time:",
+
error instanceof Error ? error.message : "Unknown error",
+
);
+
}
+
}
+
// Create database record
db.run(
"INSERT INTO transcriptions (id, user_id, class_id, meeting_time_id, section_id, filename, original_filename, status) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
···
transcriptionId,
user.id,
classId,
-
meetingTimeId,
+
finalMeetingTimeId,
sectionId,
filename,
file.name,
···
return Response.json(
id: transcriptionId,
+
meeting_time_id: finalMeetingTimeId,
message: "Upload successful",
},
{ status: 201 },
+55
src/lib/audio-metadata.integration.test.ts
···
+
import { afterAll, describe, expect, test } from "bun:test";
+
import { extractAudioCreationDate } from "./audio-metadata";
+
+
describe("extractAudioCreationDate (integration)", () => {
+
const testAudioPath = "./test-audio-sample.m4a";
+
+
// Clean up test file after tests
+
afterAll(async () => {
+
try {
+
await Bun.file(testAudioPath).exists().then(async (exists) => {
+
if (exists) {
+
await Bun.$`rm ${testAudioPath}`;
+
}
+
});
+
} catch {
+
// Ignore cleanup errors
+
}
+
});
+
+
test("extracts creation date from audio file with metadata", async () => {
+
// Create a test audio file with metadata using ffmpeg
+
// 1 second silent audio with creation_time metadata
+
const creationTime = "2024-01-15T14:30:00.000000Z";
+
+
// Create the file with metadata
+
await Bun.$`ffmpeg -f lavfi -i anullsrc=r=44100:cl=mono -t 1 -metadata creation_time=${creationTime} -y ${testAudioPath}`.quiet();
+
+
const date = await extractAudioCreationDate(testAudioPath);
+
+
expect(date).not.toBeNull();
+
expect(date).toBeInstanceOf(Date);
+
// JavaScript Date.toISOString() uses 3 decimal places, not 6 like the input
+
expect(date?.toISOString()).toBe("2024-01-15T14:30:00.000Z");
+
});
+
+
test("returns null for audio file without creation_time metadata", async () => {
+
// Create audio file without metadata
+
await Bun.$`ffmpeg -f lavfi -i anullsrc=r=44100:cl=mono -t 1 -y ${testAudioPath}`.quiet();
+
+
const date = await extractAudioCreationDate(testAudioPath);
+
+
// Should use file modification time as fallback
+
expect(date).not.toBeNull();
+
expect(date).toBeInstanceOf(Date);
+
// Should be very recent (within last minute)
+
const now = new Date();
+
const diff = now.getTime() - (date?.getTime() ?? 0);
+
expect(diff).toBeLessThan(60000); // Less than 1 minute
+
});
+
+
test("returns null for non-existent file", async () => {
+
const date = await extractAudioCreationDate("./non-existent-file.m4a");
+
expect(date).toBeNull();
+
});
+
});
+128
src/lib/audio-metadata.test.ts
···
+
import { describe, expect, test } from "bun:test";
+
import {
+
findMatchingMeetingTime,
+
getDayName,
+
getDayOfWeek,
+
meetingTimeLabelMatchesDay,
+
} from "./audio-metadata";
+
+
describe("getDayOfWeek", () => {
+
test("returns correct day number", () => {
+
// January 1, 2024 is a Monday (day 1)
+
const monday = new Date("2024-01-01T12:00:00Z");
+
expect(getDayOfWeek(monday)).toBe(1);
+
+
// January 7, 2024 is a Sunday (day 0)
+
const sunday = new Date("2024-01-07T12:00:00Z");
+
expect(getDayOfWeek(sunday)).toBe(0);
+
+
// January 6, 2024 is a Saturday (day 6)
+
const saturday = new Date("2024-01-06T12:00:00Z");
+
expect(getDayOfWeek(saturday)).toBe(6);
+
});
+
});
+
+
describe("getDayName", () => {
+
test("returns correct day name", () => {
+
expect(getDayName(new Date("2024-01-01T12:00:00Z"))).toBe("Monday");
+
expect(getDayName(new Date("2024-01-02T12:00:00Z"))).toBe("Tuesday");
+
expect(getDayName(new Date("2024-01-03T12:00:00Z"))).toBe("Wednesday");
+
expect(getDayName(new Date("2024-01-04T12:00:00Z"))).toBe("Thursday");
+
expect(getDayName(new Date("2024-01-05T12:00:00Z"))).toBe("Friday");
+
expect(getDayName(new Date("2024-01-06T12:00:00Z"))).toBe("Saturday");
+
expect(getDayName(new Date("2024-01-07T12:00:00Z"))).toBe("Sunday");
+
});
+
});
+
+
describe("meetingTimeLabelMatchesDay", () => {
+
test("matches full day names", () => {
+
expect(meetingTimeLabelMatchesDay("Monday Lecture", "Monday")).toBe(true);
+
expect(meetingTimeLabelMatchesDay("Tuesday Lab", "Tuesday")).toBe(true);
+
expect(meetingTimeLabelMatchesDay("Wednesday Discussion", "Wednesday")).toBe(
+
true,
+
);
+
});
+
+
test("matches 3-letter abbreviations", () => {
+
expect(meetingTimeLabelMatchesDay("Mon Lecture", "Monday")).toBe(true);
+
expect(meetingTimeLabelMatchesDay("Tue Lab", "Tuesday")).toBe(true);
+
expect(meetingTimeLabelMatchesDay("Wed Discussion", "Wednesday")).toBe(
+
true,
+
);
+
expect(meetingTimeLabelMatchesDay("Thu Seminar", "Thursday")).toBe(true);
+
expect(meetingTimeLabelMatchesDay("Fri Workshop", "Friday")).toBe(true);
+
expect(meetingTimeLabelMatchesDay("Sat Review", "Saturday")).toBe(true);
+
expect(meetingTimeLabelMatchesDay("Sun Study", "Sunday")).toBe(true);
+
});
+
+
test("is case insensitive", () => {
+
expect(meetingTimeLabelMatchesDay("MONDAY LECTURE", "Monday")).toBe(true);
+
expect(meetingTimeLabelMatchesDay("monday lecture", "Monday")).toBe(true);
+
expect(meetingTimeLabelMatchesDay("MoNdAy LeCTuRe", "Monday")).toBe(true);
+
});
+
+
test("does not match wrong days", () => {
+
expect(meetingTimeLabelMatchesDay("Monday Lecture", "Tuesday")).toBe(false);
+
expect(meetingTimeLabelMatchesDay("Wednesday Lab", "Thursday")).toBe(false);
+
expect(meetingTimeLabelMatchesDay("Lecture Hall A", "Monday")).toBe(false);
+
});
+
+
test("handles labels without day names", () => {
+
expect(meetingTimeLabelMatchesDay("Lecture", "Monday")).toBe(false);
+
expect(meetingTimeLabelMatchesDay("Lab Session", "Tuesday")).toBe(false);
+
expect(meetingTimeLabelMatchesDay("Section A", "Wednesday")).toBe(false);
+
});
+
});
+
+
describe("findMatchingMeetingTime", () => {
+
const meetingTimes = [
+
{ id: "mt1", label: "Monday Lecture" },
+
{ id: "mt2", label: "Wednesday Discussion" },
+
{ id: "mt3", label: "Friday Lab" },
+
];
+
+
test("finds correct meeting time for full day name", () => {
+
const monday = new Date("2024-01-01T12:00:00Z"); // Monday
+
expect(findMatchingMeetingTime(monday, meetingTimes)).toBe("mt1");
+
+
const wednesday = new Date("2024-01-03T12:00:00Z"); // Wednesday
+
expect(findMatchingMeetingTime(wednesday, meetingTimes)).toBe("mt2");
+
+
const friday = new Date("2024-01-05T12:00:00Z"); // Friday
+
expect(findMatchingMeetingTime(friday, meetingTimes)).toBe("mt3");
+
});
+
+
test("finds correct meeting time for abbreviated day name", () => {
+
const abbrevMeetingTimes = [
+
{ id: "mt1", label: "Mon Lecture" },
+
{ id: "mt2", label: "Wed Discussion" },
+
{ id: "mt3", label: "Fri Lab" },
+
];
+
+
const monday = new Date("2024-01-01T12:00:00Z");
+
expect(findMatchingMeetingTime(monday, abbrevMeetingTimes)).toBe("mt1");
+
});
+
+
test("returns null when no match found", () => {
+
const tuesday = new Date("2024-01-02T12:00:00Z"); // Tuesday
+
expect(findMatchingMeetingTime(tuesday, meetingTimes)).toBe(null);
+
+
const saturday = new Date("2024-01-06T12:00:00Z"); // Saturday
+
expect(findMatchingMeetingTime(saturday, meetingTimes)).toBe(null);
+
});
+
+
test("returns null for empty meeting times", () => {
+
const monday = new Date("2024-01-01T12:00:00Z");
+
expect(findMatchingMeetingTime(monday, [])).toBe(null);
+
});
+
+
test("returns first match when multiple matches exist", () => {
+
const duplicateMeetingTimes = [
+
{ id: "mt1", label: "Monday Lecture" },
+
{ id: "mt2", label: "Monday Lab" },
+
];
+
+
const monday = new Date("2024-01-01T12:00:00Z");
+
expect(findMatchingMeetingTime(monday, duplicateMeetingTimes)).toBe("mt1");
+
});
+
});
+144
src/lib/audio-metadata.ts
···
+
import { $ } from "bun";
+
+
/**
+
* Extracts creation date from audio file metadata using ffprobe
+
* Falls back to file birth time (original creation) if no metadata found
+
* @param filePath Path to audio file
+
* @returns Date object or null if not found
+
*/
+
export async function extractAudioCreationDate(
+
filePath: string,
+
): Promise<Date | null> {
+
try {
+
// Use ffprobe to extract creation_time metadata
+
// -v quiet: suppress verbose output
+
// -print_format json: output as JSON
+
// -show_entries format_tags: show all tags to search for date fields
+
const result =
+
await $`ffprobe -v quiet -print_format json -show_entries format_tags ${filePath}`.text();
+
+
const metadata = JSON.parse(result);
+
const tags = metadata?.format?.tags || {};
+
+
// Try multiple metadata fields that might contain creation date
+
const dateFields = [
+
tags.creation_time, // Standard creation_time
+
tags.date, // Common date field
+
tags.DATE, // Uppercase variant
+
tags.year, // Year field
+
tags.YEAR, // Uppercase variant
+
tags["com.apple.quicktime.creationdate"], // Apple QuickTime
+
tags.TDRC, // ID3v2 recording time
+
tags.TDRL, // ID3v2 release time
+
];
+
+
for (const dateField of dateFields) {
+
if (dateField) {
+
const date = new Date(dateField);
+
if (!Number.isNaN(date.getTime())) {
+
console.log(
+
`[AudioMetadata] Extracted creation date from metadata: ${date.toISOString()} from ${filePath}`,
+
);
+
return date;
+
}
+
}
+
}
+
+
// Fallback: use file birth time (original creation time on filesystem)
+
// This preserves the original file creation date better than mtime
+
console.log(
+
`[AudioMetadata] No creation_time metadata found, using file birth time`,
+
);
+
const file = Bun.file(filePath);
+
const stat = await file.stat();
+
const date = new Date(stat.birthtime || stat.mtime);
+
console.log(
+
`[AudioMetadata] Using file birth time: ${date.toISOString()} from ${filePath}`,
+
);
+
return date;
+
} catch (error) {
+
console.error(
+
`[AudioMetadata] Failed to extract metadata from ${filePath}:`,
+
error instanceof Error ? error.message : "Unknown error",
+
);
+
return null;
+
}
+
}
+
+
/**
+
* Gets day of week from a date (0 = Sunday, 6 = Saturday)
+
*/
+
export function getDayOfWeek(date: Date): number {
+
return date.getDay();
+
}
+
+
/**
+
* Gets day name from a date
+
*/
+
export function getDayName(date: Date): string {
+
const days = [
+
"Sunday",
+
"Monday",
+
"Tuesday",
+
"Wednesday",
+
"Thursday",
+
"Friday",
+
"Saturday",
+
];
+
return days[date.getDay()] || "Unknown";
+
}
+
+
/**
+
* Checks if a meeting time label matches a specific day
+
* Labels like "Monday Lecture", "Tuesday Lab", "Wed Discussion" should match
+
*/
+
export function meetingTimeLabelMatchesDay(
+
label: string,
+
dayName: string,
+
): boolean {
+
const lowerLabel = label.toLowerCase();
+
const lowerDay = dayName.toLowerCase();
+
+
// Check for full day name
+
if (lowerLabel.includes(lowerDay)) {
+
return true;
+
}
+
+
// Check for 3-letter abbreviations
+
const abbrev = dayName.slice(0, 3).toLowerCase();
+
if (lowerLabel.includes(abbrev)) {
+
return true;
+
}
+
+
return false;
+
}
+
+
/**
+
* Finds the best matching meeting time for a given date
+
* @param date Date from audio metadata
+
* @param meetingTimes Available meeting times for the class
+
* @returns Meeting time ID or null if no match
+
*/
+
export function findMatchingMeetingTime(
+
date: Date,
+
meetingTimes: Array<{ id: string; label: string }>,
+
): string | null {
+
const dayName = getDayName(date);
+
+
// Find meeting time that matches the day
+
const match = meetingTimes.find((mt) =>
+
meetingTimeLabelMatchesDay(mt.label, dayName),
+
);
+
+
if (match) {
+
console.log(
+
`[AudioMetadata] Matched ${dayName} to meeting time: ${match.label}`,
+
);
+
return match.id;
+
}
+
+
console.log(
+
`[AudioMetadata] No meeting time found matching ${dayName} in available options: ${meetingTimes.map((mt) => mt.label).join(", ")}`,
+
);
+
return null;
+
}