commit e1c6d9400e3e03899995cba340b0f2dcbf88a3dd · dunkirk.sh/thistle

-86

whisper-server/README.md

···

       1
       -
       # Whisper Transcription Server

     

       2
       -
       

     

       3
       -
       This is a FastAPI server that provides real-time audio transcription using the faster-whisper library.

     

       4
       -
       

     

       5
       -
       ## Features

     

       6
       -
       

     

       7
       -
       - Real-time transcription with streaming progress updates

     

       8
       -
       - Supports multiple audio formats (MP3, WAV, M4A, etc.)

     

       9
       -
       - Language detection

     

       10
       -
       - Segment-based transcription with timestamps

     

       11
       -
       - RESTful API endpoint

     

       12
       -
       

     

       13
       -
       ## Setup

     

       14
       -
       

     

       15
       -
       ### 1. Install Dependencies

     

       16
       -
       

     

       17
       -
       ```bash

     

       18
       -
       pip install -r requirements.txt

     

       19
       -
       ```

     

       20
       -
       

     

       21
       -
       ### 2. Run the Server

     

       22
       -
       

     

       23
       -
       **Option 1: Manual setup**

     

       24
       -
       ```bash

     

       25
       -
       pip install -r requirements.txt

     

       26
       -
       python main.py

     

       27
       -
       ```

     

       28
       -
       

     

       29
       -
       **Option 2: Quick start script**

     

       30
       -
       ```bash

     

       31
       -
       ./run.sh

     

       32
       -
       ```

     

       33
       -
       

     

       34
       -
       The server will start on `http://localhost:8000` and load the Whisper model (this may take a few minutes on first run).

     

       35
       -
       

     

       36
       -
       ## API Usage

     

       37
       -
       

     

       38
       -
       ### POST `/transcribe-with-progress`

     

       39
       -
       

     

       40
       -
       Upload an audio file to get real-time transcription progress.

     

       41
       -
       

     

       42
       -
       **Example with curl:**

     

       43
       -
       ```bash

     

       44
       -
       curl -X POST "http://localhost:8000/transcribe-with-progress" \

     

       45
       -
            -F "file=@/path/to/your/audio.mp3"

     

       46
       -
       ```

     

       47
       -
       

     

       48
       -
       **Streaming Response:**

     

       49
       -
       The endpoint returns a stream of JSON objects:

     

       50
       -
       

     

       51
       -
       ```json

     

       52
       -
       {"status": "starting", "total_duration": 15.36, "language": "en", "language_probability": 0.99}

     

       53
       -
       {"status": "progress", "percentage": 25.59, "start": 0.0, "end": 3.93, "text": "This is a test of the transcription server."}

     

       54
       -
       {"status": "progress", "percentage": 57.68, "start": 3.93, "end": 8.86, "text": "It should be streaming the results back in real time."}

     

       55
       -
       {"status": "complete"}

     

       56
       -
       ```

     

       57
       -
       

     

       58
       -
       ### Response Format

     

       59
       -
       

     

       60
       -
       - `starting`: Initial metadata about the audio file

     

       61
       -
       - `progress`: Transcription segments with progress percentage

     

       62
       -
       - `complete`: Transcription finished successfully

     

       63
       -
       - `error`: An error occurred during transcription

     

       64
       -
       

     

       65
       -
       ## Configuration

     

       66
       -
       

     

       67
       -
       You can modify the model settings in `main.py`:

     

       68
       -
       

     

       69
       -
       ```python

     

       70
       -
       model_size = "base"  # Options: tiny, base, small, medium, large-v1, large-v2, large-v3

     

       71
       -
       model = WhisperModel(model_size, device="cpu", compute_type="int8")

     

       72
       -
       ```

     

       73
       -
       

     

       74
       -
       For GPU acceleration, change to:

     

       75
       -
       ```python

     

       76
       -
       model = WhisperModel(model_size, device="cuda", compute_type="float16")

     

       77
       -
       ```

     

       78
       -
       

     

       79
       -
       ## Integration with Thistle

     

       80
       -
       

     

       81
       -
       This server is designed to work with the Thistle web application. Set the `WHISPER_SERVICE_URL` environment variable in Thistle to point to this server.

     

       82
       -
       

     

       83
       -
       ```bash

     

       84
       -
       # In Thistle's .env file

     

       85
       -
       WHISPER_SERVICE_URL=http://localhost:8000

     

       86
       -
       ```

-223

whisper-server/main.py

···

       1
       -
       import os

     

       2
       -
       import json

     

       3
       -
       import tempfile

     

       4
       -
       import asyncio

     

       5
       -
       import sqlite3

     

       6
       -
       import time

     

       7
       -
       import uuid

     

       8
       -
       from faster_whisper import WhisperModel

     

       9
       -
       from fastapi import FastAPI, UploadFile, File

     

       10
       -
       from fastapi.responses import StreamingResponse

     

       11
       -
       from sse_starlette.sse import EventSourceResponse

     

       12
       -
       

     

       13
       -
       # --- 1. Load Model on Startup ---

     

       14
       -
       # This loads the model only once, not on every request

     

       15
       -
       print("--- Loading faster-whisper model... ---")

     

       16
       -
       model_size = "medium.en"

     

       17
       -
       # You can change this to "cuda" and "float16" if you have a GPU

     

       18
       -
       model = WhisperModel(model_size, device="cpu", compute_type="int8")

     

       19
       -
       print(f"--- Model '{model_size}' loaded. Server is ready. ---")

     

       20
       -
       

     

       21
       -
       # --- 2. Setup Database for Job Tracking ---

     

       22
       -
       db_path = "./whisper.db"  # Independent DB for Whisper server

     

       23
       -
       db = sqlite3.connect(db_path, check_same_thread=False)

     

       24
       -
       db.execute("""

     

       25
       -
       CREATE TABLE IF NOT EXISTS whisper_jobs (

     

       26
       -
           id TEXT PRIMARY KEY,

     

       27
       -
           status TEXT DEFAULT 'pending',

     

       28
       -
           progress REAL DEFAULT 0,

     

       29
       -
           transcript TEXT DEFAULT '',

     

       30
       -
           error_message TEXT DEFAULT '',

     

       31
       -
           created_at INTEGER,

     

       32
       -
           updated_at INTEGER

     

       33
       -
       )

     

       34
       -
       """)

     

       35
       -
       db.commit()

     

       36
       -
       

     

       37
       -
       # --- 2. Create FastAPI App ---

     

       38
       -
       app = FastAPI(title="Whisper Transcription Server with Progress")

     

       39
       -
       

     

       40
       -
       

     

       41
       -
       # --- 3. Define the Transcription Function ---

     

       42
       -
       # Runs in background and updates DB

     

       43
       -
       def run_transcription(job_id: str, temp_file_path: str):

     

       44
       -
           try:

     

       45
       -
               # 1. Update to processing

     

       46
       -
               db.execute("UPDATE whisper_jobs SET status = 'processing', updated_at = ? WHERE id = ?", (int(time.time()), job_id))

     

       47
       -
               db.commit()

     

       48
       -
       

     

       49
       -
               # 2. Get segments and total audio duration

     

       50
       -
               segments, info = model.transcribe(

     

       51
       -
                   temp_file_path,

     

       52
       -
                   beam_size=5,

     

       53
       -
                   vad_filter=True

     

       54
       -
               )

     

       55
       -
       

     

       56
       -
               total_duration = round(info.duration, 2)

     

       57
       -
               print(f"Job {job_id}: Total audio duration: {total_duration}s")

     

       58
       -
               print(f"Job {job_id}: Detected language: {info.language}")

     

       59
       -
       

     

       60
       -
               transcript = ""

     

       61
       -
       

     

       62
       -
               # 3. Process each segment

     

       63
       -
               for segment in segments:

     

       64
       -
                   progress_percent = (segment.end / total_duration) * 100

     

       65
       -
                   transcript += segment.text.strip() + " "

     

       66
       -
       

     

       67
       -
                   db.execute("""

     

       68
       -
                       UPDATE whisper_jobs SET progress = ?, transcript = ?, updated_at = ? WHERE id = ?

     

       69
       -
                   """, (round(progress_percent, 2), transcript.strip(), int(time.time()), job_id))

     

       70
       -
                   db.commit()

     

       71
       -
       

     

       72
       -
               # 4. Complete

     

       73
       -
               db.execute("UPDATE whisper_jobs SET status = 'completed', progress = 100, updated_at = ? WHERE id = ?", (int(time.time()), job_id))

     

       74
       -
               db.commit()

     

       75
       -
       

     

       76
       -
           except Exception as e:

     

       77
       -
               db.execute("UPDATE whisper_jobs SET status = 'failed', error_message = ?, updated_at = ? WHERE id = ?", (str(e), int(time.time()), job_id))

     

       78
       -
               db.commit()

     

       79
       -
       

     

       80
       -
           finally:

     

       81
       -
               # Clean up temp file

     

       82
       -
               print(f"Job {job_id}: Cleaning up temp file: {temp_file_path}")

     

       83
       -
               os.remove(temp_file_path)

     

       84
       -
       

     

       85
       -
       

     

       86
       -
       # --- 4. Define the FastAPI Endpoints ---

     

       87
       -
       @app.post("/transcribe")

     

       88
       -
       async def transcribe_endpoint(file: UploadFile = File(...)):

     

       89
       -
           """

     

       90
       -
           Accepts an audio file, starts transcription in background, returns job ID.

     

       91
       -
           """

     

       92
       -
       

     

       93
       -
           # Generate job ID

     

       94
       -
           job_id = str(uuid.uuid4())

     

       95
       -
       

     

       96
       -
           # Save the uploaded file to a temporary file

     

       97
       -
           with tempfile.NamedTemporaryFile(delete=False, suffix=".tmp") as temp_file:

     

       98
       -
               while content := await file.read(1024 * 1024):

     

       99
       -
                   temp_file.write(content)

     

       100
       -
               temp_file_path = temp_file.name

     

       101
       -
       

     

       102
       -
           print(f"Job {job_id}: File saved to temporary path: {temp_file_path}")

     

       103
       -
       

     

       104
       -
           # Create job in DB

     

       105
       -
           db.execute("INSERT INTO whisper_jobs (id, created_at, updated_at) VALUES (?, ?, ?)", (job_id, int(time.time()), int(time.time())))

     

       106
       -
           db.commit()

     

       107
       -
       

     

       108
       -
           # Start transcription in background

     

       109
       -
           asyncio.create_task(asyncio.to_thread(run_transcription, job_id, temp_file_path))

     

       110
       -
       

     

       111
       -
           return {"job_id": job_id}

     

       112
       -
       

     

       113
       -
       @app.get("/transcribe/{job_id}/stream")

     

       114
       -
       async def stream_transcription_status(job_id: str):

     

       115
       -
           """

     

       116
       -
           Stream the status and progress of a transcription job via SSE.

     

       117
       -
           """

     

       118
       -
           async def event_generator():

     

       119
       -
               last_updated_at = None

     

       120
       -
               

     

       121
       -
               while True:

     

       122
       -
                   row = db.execute("""

     

       123
       -
                       SELECT status, progress, transcript, error_message, updated_at 

     

       124
       -
                       FROM whisper_jobs 

     

       125
       -
                       WHERE id = ?

     

       126
       -
                   """, (job_id,)).fetchone()

     

       127
       -
                   

     

       128
       -
                   if not row:

     

       129
       -
                       yield {

     

       130
       -
                           "event": "error",

     

       131
       -
                           "data": json.dumps({"error": "Job not found"})

     

       132
       -
                       }

     

       133
       -
                       return

     

       134
       -
                   

     

       135
       -
                   status, progress, transcript, error_message, updated_at = row

     

       136
       -
                   

     

       137
       -
                   # Only send if data changed

     

       138
       -
                   if updated_at != last_updated_at:

     

       139
       -
                       last_updated_at = updated_at

     

       140
       -
                       

     

       141
       -
                       data = {

     

       142
       -
                           "status": status,

     

       143
       -
                           "progress": progress,

     

       144
       -
                       }

     

       145
       -
                       

     

       146
       -
                       # Include transcript only if it changed (save bandwidth)

     

       147
       -
                       if transcript:

     

       148
       -
                           data["transcript"] = transcript

     

       149
       -
                       

     

       150
       -
                       if error_message:

     

       151
       -
                           data["error_message"] = error_message

     

       152
       -
                       

     

       153
       -
                       yield {

     

       154
       -
                           "event": "message",

     

       155
       -
                           "data": json.dumps(data)

     

       156
       -
                       }

     

       157
       -
                   

     

       158
       -
                   # Close stream if job is complete or failed

     

       159
       -
                   if status in ('completed', 'failed'):

     

       160
       -
                       return

     

       161
       -
                   

     

       162
       -
                   # Poll every 500ms

     

       163
       -
                   await asyncio.sleep(0.5)

     

       164
       -
           

     

       165
       -
           return EventSourceResponse(event_generator())

     

       166
       -
       

     

       167
       -
       @app.get("/transcribe/{job_id}")

     

       168
       -
       def get_transcription_status(job_id: str):

     

       169
       -
           """

     

       170
       -
           Get the status and progress of a transcription job.

     

       171
       -
           """

     

       172
       -
           row = db.execute("SELECT status, progress, transcript, error_message FROM whisper_jobs WHERE id = ?", (job_id,)).fetchone()

     

       173
       -
           if not row:

     

       174
       -
               return {"error": "Job not found"}, 404

     

       175
       -
       

     

       176
       -
           status, progress, transcript, error_message = row

     

       177
       -
           return {

     

       178
       -
               "status": status,

     

       179
       -
               "progress": progress,

     

       180
       -
               "transcript": transcript,

     

       181
       -
               "error_message": error_message

     

       182
       -
           }

     

       183
       -
       

     

       184
       -
       @app.get("/jobs")

     

       185
       -
       def list_jobs():

     

       186
       -
           """

     

       187
       -
           List all jobs with their current status. Used for recovery/sync.

     

       188
       -
           """

     

       189
       -
           rows = db.execute("""

     

       190
       -
               SELECT id, status, progress, created_at, updated_at 

     

       191
       -
               FROM whisper_jobs 

     

       192
       -
               ORDER BY created_at DESC

     

       193
       -
           """).fetchall()

     

       194
       -
           

     

       195
       -
           jobs = []

     

       196
       -
           for row in rows:

     

       197
       -
               jobs.append({

     

       198
       -
                   "id": row[0],

     

       199
       -
                   "status": row[1],

     

       200
       -
                   "progress": row[2],

     

       201
       -
                   "created_at": row[3],

     

       202
       -
                   "updated_at": row[4]

     

       203
       -
               })

     

       204
       -
           

     

       205
       -
           return {"jobs": jobs}

     

       206
       -
       

     

       207
       -
       @app.delete("/transcribe/{job_id}")

     

       208
       -
       def delete_job(job_id: str):

     

       209
       -
           """

     

       210
       -
           Delete a job from the database. Used for cleanup.

     

       211
       -
           """

     

       212
       -
           result = db.execute("DELETE FROM whisper_jobs WHERE id = ?", (job_id,))

     

       213
       -
           db.commit()

     

       214
       -
           

     

       215
       -
           if result.rowcount == 0:

     

       216
       -
               return {"error": "Job not found"}, 404

     

       217
       -
           

     

       218
       -
           return {"success": True}

     

       219
       -
       

     

       220
       -
       

     

       221
       -
       if __name__ == "__main__":

     

       222
       -
           import uvicorn

     

       223
       -
           uvicorn.run(app, host="0.0.0.0", port=8000)

-4

whisper-server/requirements.txt

···

       1
       -
       fastapi[all]==0.115.6

     

       2
       -
       uvicorn[standard]==0.32.1

     

       3
       -
       faster-whisper==1.1.1

     

       4
       -
       sse-starlette==2.2.1

-14

whisper-server/run.sh

···

       1
       -
       #!/bin/bash

     

       2
       -
       

     

       3
       -
       # Quick script to run the Whisper transcription server

     

       4
       -
       

     

       5
       -
       echo "Setting up Whisper transcription server..."

     

       6
       -
       echo "Installing dependencies..."

     

       7
       -
       pip3 install -r requirements.txt

     

       8
       -
       

     

       9
       -
       echo ""

     

       10
       -
       echo "Starting Whisper server on http://localhost:8000"

     

       11
       -
       echo "Press Ctrl+C to stop"

     

       12
       -
       echo ""

     

       13
       -
       

     

       14
       -
       python main.py