src/lib/transcribe.ts at f04d69d19857333075888e0fb7b0873e8845d7e3 · ptr.pet/trill

ptr.pet / trill
creates video voice memos from audio clips; with bluesky integration. trill.ptr.pet
trill / src / lib / transcribe.ts
at f04d69d19857333075888e0fb7b0873e8845d7e3 2.8 kB view raw
  1import {
  2  AutomaticSpeechRecognitionPipeline,
  3  pipeline,
  4} from "@huggingface/transformers";
  5import { toaster } from "~/components/Toaster";
  6import { defaultWhisperModel, whisperModel } from "./settings";
  7
  8let transcriberPromise: Promise<AutomaticSpeechRecognitionPipeline> | null =
  9  null;
 10let model: AutomaticSpeechRecognitionPipeline | null = null;
 11
 12const loadModel = () => {
 13  if (model) return Promise.resolve(model);
 14
 15  if (transcriberPromise) return transcriberPromise;
 16
 17  let toastId: string | undefined;
 18
 19  const modelName = whisperModel.get() ?? defaultWhisperModel;
 20
 21  transcriberPromise = pipeline("automatic-speech-recognition", modelName, {
 22    progress_callback: (data: any) => {
 23      // data contains: { status, file, name, loaded, total, progress }
 24      if (data.status === "initiate") {
 25        if (!toastId) {
 26          toastId = toaster.create({
 27            title: "downloading transcription model",
 28            description: `fetching ${data.file}...`,
 29            type: "info",
 30            duration: 999999,
 31          });
 32        }
 33      } else if (data.status === "progress" && toastId) {
 34        const percent = data.progress ? Math.round(data.progress) : 0;
 35        toaster.update(toastId, {
 36          title: "downloading transcription model",
 37          description: `fetching ${data.file} (at ${percent}%)...`,
 38          type: "info",
 39          duration: 999999,
 40        });
 41      }
 42    },
 43  })
 44    .then((transcriber) => {
 45      if (toastId) {
 46        toaster.update(toastId, {
 47          title: "transcription model loaded",
 48          description: `${modelName.split("/")[1]} is ready`,
 49          type: "success",
 50          duration: 3000,
 51        });
 52      }
 53      model = transcriber;
 54      return transcriber;
 55    })
 56    .catch((err) => {
 57      const toastOpts = {
 58        title: "transcription model download failed",
 59        description: `${err}`,
 60        type: "error",
 61        duration: 5000,
 62      };
 63      if (toastId) toaster.update(toastId, toastOpts);
 64      else toaster.create(toastOpts);
 65
 66      model = null;
 67
 68      throw err;
 69    })
 70    .finally(() => {
 71      transcriberPromise = null;
 72    });
 73
 74  return transcriberPromise;
 75};
 76
 77export const preloadModel = () => {
 78  model = null;
 79  loadModel().catch((e) => console.error("preload failed", e));
 80};
 81
 82export const transcribe = async (file: File): Promise<string> => {
 83  const url = URL.createObjectURL(file);
 84  try {
 85    await loadModel();
 86    if (!model) throw "model not loaded";
 87
 88    const output = await model(url);
 89    return [output].flat()[0].text.trim();
 90  } catch (err) {
 91    console.error("transcription failed", err);
 92    toaster.create({
 93      title: "transcription failed",
 94      description: `${err}`,
 95      type: "error",
 96    });
 97    throw err;
 98  } finally {
 99    URL.revokeObjectURL(url);
100  }
101};