Merge branch 'feature/x-local-audio-transcription' into 'develop'

Feature/x local audio transcription See merge request proj-wise2526-video2document/video2document!10
2026-06-15 18:01:52 +02:00 · 2025-11-08 14:47:05 +01:00
parent 086a41e515 cfa37b9f2f
commit 6182aec35b
7 changed files with 151 additions and 42 deletions
@@ -1,4 +1,5 @@
 {
+  "type": "module",
  "dependencies": {
    "cli-progress": "^3.12.0",
    "ffmpeg-static": "^5.2.0",
@@ -12,4 +13,3 @@
    "typescript": "^5.9.3"
  }
 }
-
@@ -0,0 +1,5 @@
+npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/Kurzgesagt.mov
+npx ts-node ./transcribe.ts ../storage/audio/Kurzgesagt.wav
+
+npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/GitLabMeeting.mov
+npx ts-node ./transcribe.ts ../storage/audio/GitLabMeeting.wav
@@ -0,0 +1,23 @@
+#!/usr/bin/env ts-node
+
+import { extractAudioFromVideo } from "../services/modules/extraction/ffmpegExtractor.ts";
+
+const videoPath = process.argv[2];
+
+if (!videoPath) {
+  console.error("Usage: ts-node extractAudio.ts <videoPath>");
+  process.exit(1);
+}
+
+(async () => {
+  try {
+    console.log(`Extracting audio from: ${videoPath}`);
+
+    await extractAudioFromVideo(videoPath); // Call the extraction function (ffmpegExtractor.ts in services/modules/extraction)
+
+    console.log("Audio extraction completed successfully.");
+  } catch (err) {
+    console.error("Audio extraction failed:", err);
+    process.exit(1);
+  }
+})();
@@ -0,0 +1,18 @@
+import { whisperLocal } from "../services/modules/transcription/local/whisperLocal.ts";
+
+const audioPath = process.argv[2];
+if (!audioPath) {
+  console.error("Please provide an audio file path as argument.");
+  process.exit(1);
+}
+
+const whisper = new whisperLocal();
+
+(async () => {
+  try {
+    const text = await whisper.transcribe(audioPath);
+    console.log(text);
+  } catch (err) {
+    console.error("Transcription failed:", err);
+  }
+})();
@@ -3,32 +3,36 @@ import ffmpeg from 'fluent-ffmpeg';
 import path from 'path';
 import fs from 'fs';
 import cliProgress from 'cli-progress';
+import { fileURLToPath } from 'url';

 // Base code reference: https://docs.yemreak.com/arsiv/programming/extract-audio-from-video-with-typescript-and-ffmpeg
-// Test command: npx ts-node ffmpegExtractor.ts /path/to/video.mp4
+// Test command: npx ts-node ./extract.ts /path/to/video.mp4

 /**
 * Extracts audio from a video file and saves it as WAV.
 * @param videoFilePath Path to the input video file.
- * @param outputAudioPath Path where the output WAV audio will be saved.
 */

 // Ensure ffmpeg binary is available
 if (!ffmpegPath) {
-    throw new Error('FFmpeg binary not found!');
+  throw new Error('FFmpeg binary not found!');
 }
 ffmpeg.setFfmpegPath(ffmpegPath);

 // Ensure an input video path is provided via CLI
 if (process.argv.length < 3) {
-    console.error('Usage: ts-node ffmpegExtractor.ts <input-video-path>');
-    process.exit(1);
+  console.error('Usage: ts-node ./extract.ts <input-video-path>');
+  process.exit(1);
 }

-// Prepare output directory (always relative to project root)
-const outputDir = path.join(process.cwd(), 'storage', 'audio');
+// Resolve __dirname equivalent in ESM
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+// Prepare output directory (always storage/audio under project root)
+const outputDir = path.join(__dirname, '..', '..', '..', 'storage', 'audio');
 if (!fs.existsSync(outputDir)) {
-    fs.mkdirSync(outputDir, { recursive: true });
+  fs.mkdirSync(outputDir, { recursive: true });
 }

 // Derive input and output paths
@@ -38,10 +42,10 @@ const outputAudioPath = path.join(outputDir, `${inputVideoName}.wav`);

 // Initialize CLI progress bar
 const progressBar = new cliProgress.SingleBar({
-    format: 'Processing |{bar}| {percentage}% | {timemark}',
-    barCompleteChar: '\u2588',
-    barIncompleteChar: '\u2591',
-    hideCursor: true
+  format: 'Processing |{bar}| {percentage}% | {timemark}',
+  barCompleteChar: '\u2588',
+  barIncompleteChar: '\u2591',
+  hideCursor: true
 });

 /**
@@ -50,34 +54,37 @@ const progressBar = new cliProgress.SingleBar({
 * - Shows CLI progress bar
 * - Handles errors gracefully (without errors)
 */
-function extractAudioFromVideo(videoFilePath: string, outputAudioPath: string): Promise<void> {
-    return new Promise((resolve, reject) => {
-        ffmpeg(videoFilePath)
-            .outputFormat('wav') // Set output format to WAV
-            .on('progress', (progress) => {
-                // Start progress bar if not already active
-                if (!progressBar.isActive) progressBar.start(100, 0, { timemark: '00:00:00' });
-                if (progress.percent) {
-                    progressBar.update(progress.percent, { timemark: progress.timemark });
-                }
-            })
-            .on('end', () => {
-                // Finish progress bar
-                progressBar.update(100, { timemark: 'done' });
-                progressBar.stop();
-                console.log(`Extraction completed: ${outputAudioPath}`);
-                resolve();
-            })
-            .on('error', (err) => {
-                // Show extraction errors in a clear format
-                console.error(`failed_audio_extraction: ${err.message}`);
-                reject(err);
-            })
-            .save(outputAudioPath); // Save output file
-    });
+export function extractAudioFromVideo(videoFilePath: string): Promise<void> {
+  return new Promise((resolve, reject) => {
+    ffmpeg(videoFilePath)
+      .outputFormat('wav')
+      .audioCodec('pcm_s16le')
+      .audioChannels(1)
+      .audioFrequency(16000)
+      .on('progress', (progress) => {
+        if (!progressBar.isActive) progressBar.start(100, 0, { timemark: '00:00:00' });
+        if (progress.percent) {
+          progressBar.update(progress.percent, { timemark: progress.timemark });
+        }
+      })
+      .on('end', () => {
+        progressBar.update(100, { timemark: 'done' });
+        progressBar.stop();
+        console.log(`Extraction completed: ${outputAudioPath}`);
+        resolve();
+      })
+      .on('error', (err) => {
+        progressBar.stop();
+        console.error(`failed_audio_extraction: ${err.message}`);
+        reject(err);
+      })
+      .save(outputAudioPath);
+  });
 }

-// Run extraction
-extractAudioFromVideo(inputVideoPath, outputAudioPath)
+// Run extraction if executed directly from CLI
+if (import.meta.url === `file://${process.argv[1]}`) {
+  extractAudioFromVideo(inputVideoPath)
    .then(() => console.log('Audio extraction successful.'))
    .catch((err) => console.error(err));
+}
@@ -0,0 +1,55 @@
+import { exec } from "child_process"; // Node.js built-in module
+import path from "path"; // Path module
+import fs from "fs"; // File system module
+import { fileURLToPath } from "url"; // To handle __dirname in ES modules
+
+const __filename = fileURLToPath(import.meta.url); // Get current file path
+const __dirname = path.dirname(__filename); // Get current directory path
+const transcriptsDir = path.resolve(__dirname, "../../storage/transcriptions");
+
+
+export class whisperLocal { // is called by transcribe.ts
+  private whisperBinary: string; // Path to the whisper.cpp binary
+  private modelPath: string; // Path to the model file
+
+  constructor() {
+    this.whisperBinary = path.resolve(
+      __dirname,
+      "whisper.cpp/build/bin/whisper-cli" //Path to the compiled whisper binary
+    );
+
+    this.modelPath = path.resolve(
+      __dirname,
+      "whisper.cpp/models/ggml-base.en.bin" // Path to the English model file
+    );
+  }
+
+  async transcribe(audioPath: string): Promise<string> { //asyncronous function to transcribe audio
+    return new Promise((resolve, reject) => {
+        const transcriptsDir = path.resolve(__dirname, "../../../../storage/transcripts"); //storage directory for transcripts
+
+        if (!fs.existsSync(transcriptsDir)) { //if transcripts directory does not exist, create it
+            fs.mkdirSync(transcriptsDir, { recursive: true });
+        }
+
+        const outputBase = path.resolve( // Base path for output transcript files, name is same as audio file (video file)
+            transcriptsDir,
+            path.basename(audioPath, path.extname(audioPath))
+        );
+
+        const command = `"${this.whisperBinary}" -m "${this.modelPath}" -f "${audioPath}" -otxt -of "${outputBase}"`; // Command to execute whisper binary with model and audio file, outputting text file
+
+      exec(command, (error, stdout, stderr) => {
+        if (error) return reject(error);
+
+        const outputTxt = `${outputBase}.txt`;
+        if (fs.existsSync(outputTxt)) {
+          const transcript = fs.readFileSync(outputTxt, "utf8");
+          resolve(transcript);
+        } else {
+          reject(new Error("No transcript file found"));
+        }
+        });
+    });
+  }
+}