Merge branch 'feature/x-local-audio-transcription' into 'develop'

Feature/x local audio transcription See merge request proj-wise2526-video2document/video2document!10
2026-06-15 18:01:52 +02:00 · 2025-11-08 14:47:05 +01:00
parent 086a41e515 cfa37b9f2f
commit 6182aec35b
7 changed files with 151 additions and 42 deletions
@@ -1,4 +1,5 @@
 {
  "type": "module",
  "dependencies": {
    "cli-progress": "^3.12.0",
    "ffmpeg-static": "^5.2.0",
@@ -12,4 +13,3 @@
    "typescript": "^5.9.3"
  }
 }
@@ -0,0 +1,5 @@
 npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/Kurzgesagt.mov
 npx ts-node ./transcribe.ts ../storage/audio/Kurzgesagt.wav
 npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/GitLabMeeting.mov
 npx ts-node ./transcribe.ts ../storage/audio/GitLabMeeting.wav
@@ -0,0 +1,23 @@
 #!/usr/bin/env ts-node
 import { extractAudioFromVideo } from "../services/modules/extraction/ffmpegExtractor.ts";
 const videoPath = process.argv[2];
 if (!videoPath) {
  console.error("Usage: ts-node extractAudio.ts <videoPath>");
  process.exit(1);
 }
 (async () => {
  try {
    console.log(`Extracting audio from: ${videoPath}`);
    await extractAudioFromVideo(videoPath); // Call the extraction function (ffmpegExtractor.ts in services/modules/extraction)
    console.log("Audio extraction completed successfully.");
  } catch (err) {
    console.error("Audio extraction failed:", err);
    process.exit(1);
  }
 })();
@@ -0,0 +1,18 @@
 import { whisperLocal } from "../services/modules/transcription/local/whisperLocal.ts";
 const audioPath = process.argv[2];
 if (!audioPath) {
  console.error("Please provide an audio file path as argument.");
  process.exit(1);
 }
 const whisper = new whisperLocal();
 (async () => {
  try {
    const text = await whisper.transcribe(audioPath);
    console.log(text);
  } catch (err) {
    console.error("Transcription failed:", err);
  }
 })();
@@ -3,14 +3,14 @@ import ffmpeg from 'fluent-ffmpeg';
 import path from 'path';
 import fs from 'fs';
 import cliProgress from 'cli-progress';
 import { fileURLToPath } from 'url';
 // Base code reference: https://docs.yemreak.com/arsiv/programming/extract-audio-from-video-with-typescript-and-ffmpeg
-// Test command: npx ts-node ffmpegExtractor.ts /path/to/video.mp4
+// Test command: npx ts-node ./extract.ts /path/to/video.mp4
 /**
 * Extracts audio from a video file and saves it as WAV.
 * @param videoFilePath Path to the input video file.
 * @param outputAudioPath Path where the output WAV audio will be saved.
 */
 // Ensure ffmpeg binary is available
@@ -21,12 +21,16 @@ ffmpeg.setFfmpegPath(ffmpegPath);
 // Ensure an input video path is provided via CLI
 if (process.argv.length < 3) {
-    console.error('Usage: ts-node ffmpegExtractor.ts <input-video-path>');
+  console.error('Usage: ts-node ./extract.ts <input-video-path>');
  process.exit(1);
 }
-// Prepare output directory (always relative to project root)
+// Resolve __dirname equivalent in ESM
-const outputDir = path.join(process.cwd(), 'storage', 'audio');
+const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
 // Prepare output directory (always storage/audio under project root)
 const outputDir = path.join(__dirname, '..', '..', '..', 'storage', 'audio');
 if (!fs.existsSync(outputDir)) {
  fs.mkdirSync(outputDir, { recursive: true });
 }
@@ -50,34 +54,37 @@ const progressBar = new cliProgress.SingleBar({
 * - Shows CLI progress bar
 * - Handles errors gracefully (without errors)
 */
-function extractAudioFromVideo(videoFilePath: string, outputAudioPath: string): Promise<void> {
+export function extractAudioFromVideo(videoFilePath: string): Promise<void> {
  return new Promise((resolve, reject) => {
    ffmpeg(videoFilePath)
-            .outputFormat('wav') // Set output format to WAV
+      .outputFormat('wav')
      .audioCodec('pcm_s16le')
      .audioChannels(1)
      .audioFrequency(16000)
      .on('progress', (progress) => {
                // Start progress bar if not already active
        if (!progressBar.isActive) progressBar.start(100, 0, { timemark: '00:00:00' });
        if (progress.percent) {
          progressBar.update(progress.percent, { timemark: progress.timemark });
        }
      })
      .on('end', () => {
                // Finish progress bar
        progressBar.update(100, { timemark: 'done' });
        progressBar.stop();
        console.log(`Extraction completed: ${outputAudioPath}`);
        resolve();
      })
      .on('error', (err) => {
-                // Show extraction errors in a clear format
+        progressBar.stop();
        console.error(`failed_audio_extraction: ${err.message}`);
        reject(err);
      })
-            .save(outputAudioPath); // Save output file
+      .save(outputAudioPath);
  });
 }
-// Run extraction
+// Run extraction if executed directly from CLI
-extractAudioFromVideo(inputVideoPath, outputAudioPath)
+if (import.meta.url === `file://${process.argv[1]}`) {
  extractAudioFromVideo(inputVideoPath)
    .then(() => console.log('Audio extraction successful.'))
    .catch((err) => console.error(err));
 }
@@ -0,0 +1,55 @@
 import { exec } from "child_process"; // Node.js built-in module
 import path from "path"; // Path module
 import fs from "fs"; // File system module
 import { fileURLToPath } from "url"; // To handle __dirname in ES modules
 const __filename = fileURLToPath(import.meta.url); // Get current file path
 const __dirname = path.dirname(__filename); // Get current directory path
 const transcriptsDir = path.resolve(__dirname, "../../storage/transcriptions");
 export class whisperLocal { // is called by transcribe.ts
  private whisperBinary: string; // Path to the whisper.cpp binary
  private modelPath: string; // Path to the model file
  constructor() {
    this.whisperBinary = path.resolve(
      __dirname,
      "whisper.cpp/build/bin/whisper-cli" //Path to the compiled whisper binary
    );
    this.modelPath = path.resolve(
      __dirname,
      "whisper.cpp/models/ggml-base.en.bin" // Path to the English model file
    );
  }
  async transcribe(audioPath: string): Promise<string> { //asyncronous function to transcribe audio
    return new Promise((resolve, reject) => {
        const transcriptsDir = path.resolve(__dirname, "../../../../storage/transcripts"); //storage directory for transcripts
        if (!fs.existsSync(transcriptsDir)) { //if transcripts directory does not exist, create it
            fs.mkdirSync(transcriptsDir, { recursive: true });
        }
        const outputBase = path.resolve( // Base path for output transcript files, name is same as audio file (video file)
            transcriptsDir,
            path.basename(audioPath, path.extname(audioPath))
        );
        const command = `"${this.whisperBinary}" -m "${this.modelPath}" -f "${audioPath}" -otxt -of "${outputBase}"`; // Command to execute whisper binary with model and audio file, outputting text file
      exec(command, (error, stdout, stderr) => {
        if (error) return reject(error);
        const outputTxt = `${outputBase}.txt`;
        if (fs.existsSync(outputTxt)) {
          const transcript = fs.readFileSync(outputTxt, "utf8");
          resolve(transcript);
        } else {
          reject(new Error("No transcript file found"));
        }
        });
    });
  }
 }