Merge branch 'feature/x-local-audio-transcription' into 'develop'

Feature/x local audio transcription

See merge request proj-wise2526-video2document/video2document!10
This commit is contained in:
Hughes, Mike
2025-11-08 14:47:05 +01:00
7 changed files with 151 additions and 42 deletions
+1 -1
View File
@@ -1,4 +1,5 @@
{
"type": "module",
"dependencies": {
"cli-progress": "^3.12.0",
"ffmpeg-static": "^5.2.0",
@@ -12,4 +13,3 @@
"typescript": "^5.9.3"
}
}
+5
View File
@@ -0,0 +1,5 @@
npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/Kurzgesagt.mov
npx ts-node ./transcribe.ts ../storage/audio/Kurzgesagt.wav
npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/GitLabMeeting.mov
npx ts-node ./transcribe.ts ../storage/audio/GitLabMeeting.wav
+23
View File
@@ -0,0 +1,23 @@
#!/usr/bin/env ts-node
import { extractAudioFromVideo } from "../services/modules/extraction/ffmpegExtractor.ts";
const videoPath = process.argv[2];
if (!videoPath) {
console.error("Usage: ts-node extractAudio.ts <videoPath>");
process.exit(1);
}
(async () => {
try {
console.log(`Extracting audio from: ${videoPath}`);
await extractAudioFromVideo(videoPath); // Call the extraction function (ffmpegExtractor.ts in services/modules/extraction)
console.log("Audio extraction completed successfully.");
} catch (err) {
console.error("Audio extraction failed:", err);
process.exit(1);
}
})();
+18
View File
@@ -0,0 +1,18 @@
import { whisperLocal } from "../services/modules/transcription/local/whisperLocal.ts";
const audioPath = process.argv[2];
if (!audioPath) {
console.error("Please provide an audio file path as argument.");
process.exit(1);
}
const whisper = new whisperLocal();
(async () => {
try {
const text = await whisper.transcribe(audioPath);
console.log(text);
} catch (err) {
console.error("Transcription failed:", err);
}
})();
+46 -39
View File
@@ -3,32 +3,36 @@ import ffmpeg from 'fluent-ffmpeg';
import path from 'path';
import fs from 'fs';
import cliProgress from 'cli-progress';
import { fileURLToPath } from 'url';
// Base code reference: https://docs.yemreak.com/arsiv/programming/extract-audio-from-video-with-typescript-and-ffmpeg
// Test command: npx ts-node ffmpegExtractor.ts /path/to/video.mp4
// Test command: npx ts-node ./extract.ts /path/to/video.mp4
/**
* Extracts audio from a video file and saves it as WAV.
* @param videoFilePath Path to the input video file.
* @param outputAudioPath Path where the output WAV audio will be saved.
*/
// Ensure ffmpeg binary is available
if (!ffmpegPath) {
throw new Error('FFmpeg binary not found!');
throw new Error('FFmpeg binary not found!');
}
ffmpeg.setFfmpegPath(ffmpegPath);
// Ensure an input video path is provided via CLI
if (process.argv.length < 3) {
console.error('Usage: ts-node ffmpegExtractor.ts <input-video-path>');
process.exit(1);
console.error('Usage: ts-node ./extract.ts <input-video-path>');
process.exit(1);
}
// Prepare output directory (always relative to project root)
const outputDir = path.join(process.cwd(), 'storage', 'audio');
// Resolve __dirname equivalent in ESM
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Prepare output directory (always storage/audio under project root)
const outputDir = path.join(__dirname, '..', '..', '..', 'storage', 'audio');
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
fs.mkdirSync(outputDir, { recursive: true });
}
// Derive input and output paths
@@ -38,10 +42,10 @@ const outputAudioPath = path.join(outputDir, `${inputVideoName}.wav`);
// Initialize CLI progress bar
const progressBar = new cliProgress.SingleBar({
format: 'Processing |{bar}| {percentage}% | {timemark}',
barCompleteChar: '\u2588',
barIncompleteChar: '\u2591',
hideCursor: true
format: 'Processing |{bar}| {percentage}% | {timemark}',
barCompleteChar: '\u2588',
barIncompleteChar: '\u2591',
hideCursor: true
});
/**
@@ -50,34 +54,37 @@ const progressBar = new cliProgress.SingleBar({
* - Shows CLI progress bar
* - Handles errors gracefully (without errors)
*/
function extractAudioFromVideo(videoFilePath: string, outputAudioPath: string): Promise<void> {
return new Promise((resolve, reject) => {
ffmpeg(videoFilePath)
.outputFormat('wav') // Set output format to WAV
.on('progress', (progress) => {
// Start progress bar if not already active
if (!progressBar.isActive) progressBar.start(100, 0, { timemark: '00:00:00' });
if (progress.percent) {
progressBar.update(progress.percent, { timemark: progress.timemark });
}
})
.on('end', () => {
// Finish progress bar
progressBar.update(100, { timemark: 'done' });
progressBar.stop();
console.log(`Extraction completed: ${outputAudioPath}`);
resolve();
})
.on('error', (err) => {
// Show extraction errors in a clear format
console.error(`failed_audio_extraction: ${err.message}`);
reject(err);
})
.save(outputAudioPath); // Save output file
});
export function extractAudioFromVideo(videoFilePath: string): Promise<void> {
return new Promise((resolve, reject) => {
ffmpeg(videoFilePath)
.outputFormat('wav')
.audioCodec('pcm_s16le')
.audioChannels(1)
.audioFrequency(16000)
.on('progress', (progress) => {
if (!progressBar.isActive) progressBar.start(100, 0, { timemark: '00:00:00' });
if (progress.percent) {
progressBar.update(progress.percent, { timemark: progress.timemark });
}
})
.on('end', () => {
progressBar.update(100, { timemark: 'done' });
progressBar.stop();
console.log(`Extraction completed: ${outputAudioPath}`);
resolve();
})
.on('error', (err) => {
progressBar.stop();
console.error(`failed_audio_extraction: ${err.message}`);
reject(err);
})
.save(outputAudioPath);
});
}
// Run extraction
extractAudioFromVideo(inputVideoPath, outputAudioPath)
// Run extraction if executed directly from CLI
if (import.meta.url === `file://${process.argv[1]}`) {
extractAudioFromVideo(inputVideoPath)
.then(() => console.log('Audio extraction successful.'))
.catch((err) => console.error(err));
}
Submodule services/modules/transcription/local/whisper.cpp added at 999a7e0cbf
@@ -0,0 +1,55 @@
import { exec } from "child_process"; // Node.js built-in module
import path from "path"; // Path module
import fs from "fs"; // File system module
import { fileURLToPath } from "url"; // To handle __dirname in ES modules
const __filename = fileURLToPath(import.meta.url); // Get current file path
const __dirname = path.dirname(__filename); // Get current directory path
const transcriptsDir = path.resolve(__dirname, "../../storage/transcriptions");
export class whisperLocal { // is called by transcribe.ts
private whisperBinary: string; // Path to the whisper.cpp binary
private modelPath: string; // Path to the model file
constructor() {
this.whisperBinary = path.resolve(
__dirname,
"whisper.cpp/build/bin/whisper-cli" //Path to the compiled whisper binary
);
this.modelPath = path.resolve(
__dirname,
"whisper.cpp/models/ggml-base.en.bin" // Path to the English model file
);
}
async transcribe(audioPath: string): Promise<string> { //asyncronous function to transcribe audio
return new Promise((resolve, reject) => {
const transcriptsDir = path.resolve(__dirname, "../../../../storage/transcripts"); //storage directory for transcripts
if (!fs.existsSync(transcriptsDir)) { //if transcripts directory does not exist, create it
fs.mkdirSync(transcriptsDir, { recursive: true });
}
const outputBase = path.resolve( // Base path for output transcript files, name is same as audio file (video file)
transcriptsDir,
path.basename(audioPath, path.extname(audioPath))
);
const command = `"${this.whisperBinary}" -m "${this.modelPath}" -f "${audioPath}" -otxt -of "${outputBase}"`; // Command to execute whisper binary with model and audio file, outputting text file
exec(command, (error, stdout, stderr) => {
if (error) return reject(error);
const outputTxt = `${outputBase}.txt`;
if (fs.existsSync(outputTxt)) {
const transcript = fs.readFileSync(outputTxt, "utf8");
resolve(transcript);
} else {
reject(new Error("No transcript file found"));
}
});
});
}
}