mirror of
https://gitlab.rlp.net/proj-wise2526-video2document/video2document.git
synced 2026-06-15 18:01:52 +02:00
Merge branch 'feature/x-local-audio-transcription' into 'develop'
Feature/x local audio transcription See merge request proj-wise2526-video2document/video2document!10
This commit is contained in:
+1
-1
@@ -1,4 +1,5 @@
|
||||
{
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"cli-progress": "^3.12.0",
|
||||
"ffmpeg-static": "^5.2.0",
|
||||
@@ -12,4 +13,3 @@
|
||||
"typescript": "^5.9.3"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/Kurzgesagt.mov
|
||||
npx ts-node ./transcribe.ts ../storage/audio/Kurzgesagt.wav
|
||||
|
||||
npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/GitLabMeeting.mov
|
||||
npx ts-node ./transcribe.ts ../storage/audio/GitLabMeeting.wav
|
||||
@@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env ts-node
|
||||
|
||||
import { extractAudioFromVideo } from "../services/modules/extraction/ffmpegExtractor.ts";
|
||||
|
||||
const videoPath = process.argv[2];
|
||||
|
||||
if (!videoPath) {
|
||||
console.error("Usage: ts-node extractAudio.ts <videoPath>");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
console.log(`Extracting audio from: ${videoPath}`);
|
||||
|
||||
await extractAudioFromVideo(videoPath); // Call the extraction function (ffmpegExtractor.ts in services/modules/extraction)
|
||||
|
||||
console.log("Audio extraction completed successfully.");
|
||||
} catch (err) {
|
||||
console.error("Audio extraction failed:", err);
|
||||
process.exit(1);
|
||||
}
|
||||
})();
|
||||
@@ -0,0 +1,18 @@
|
||||
import { whisperLocal } from "../services/modules/transcription/local/whisperLocal.ts";
|
||||
|
||||
const audioPath = process.argv[2];
|
||||
if (!audioPath) {
|
||||
console.error("Please provide an audio file path as argument.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const whisper = new whisperLocal();
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
const text = await whisper.transcribe(audioPath);
|
||||
console.log(text);
|
||||
} catch (err) {
|
||||
console.error("Transcription failed:", err);
|
||||
}
|
||||
})();
|
||||
@@ -3,32 +3,36 @@ import ffmpeg from 'fluent-ffmpeg';
|
||||
import path from 'path';
|
||||
import fs from 'fs';
|
||||
import cliProgress from 'cli-progress';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
// Base code reference: https://docs.yemreak.com/arsiv/programming/extract-audio-from-video-with-typescript-and-ffmpeg
|
||||
// Test command: npx ts-node ffmpegExtractor.ts /path/to/video.mp4
|
||||
// Test command: npx ts-node ./extract.ts /path/to/video.mp4
|
||||
|
||||
/**
|
||||
* Extracts audio from a video file and saves it as WAV.
|
||||
* @param videoFilePath Path to the input video file.
|
||||
* @param outputAudioPath Path where the output WAV audio will be saved.
|
||||
*/
|
||||
|
||||
// Ensure ffmpeg binary is available
|
||||
if (!ffmpegPath) {
|
||||
throw new Error('FFmpeg binary not found!');
|
||||
throw new Error('FFmpeg binary not found!');
|
||||
}
|
||||
ffmpeg.setFfmpegPath(ffmpegPath);
|
||||
|
||||
// Ensure an input video path is provided via CLI
|
||||
if (process.argv.length < 3) {
|
||||
console.error('Usage: ts-node ffmpegExtractor.ts <input-video-path>');
|
||||
process.exit(1);
|
||||
console.error('Usage: ts-node ./extract.ts <input-video-path>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Prepare output directory (always relative to project root)
|
||||
const outputDir = path.join(process.cwd(), 'storage', 'audio');
|
||||
// Resolve __dirname equivalent in ESM
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// Prepare output directory (always storage/audio under project root)
|
||||
const outputDir = path.join(__dirname, '..', '..', '..', 'storage', 'audio');
|
||||
if (!fs.existsSync(outputDir)) {
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
}
|
||||
|
||||
// Derive input and output paths
|
||||
@@ -38,10 +42,10 @@ const outputAudioPath = path.join(outputDir, `${inputVideoName}.wav`);
|
||||
|
||||
// Initialize CLI progress bar
|
||||
const progressBar = new cliProgress.SingleBar({
|
||||
format: 'Processing |{bar}| {percentage}% | {timemark}',
|
||||
barCompleteChar: '\u2588',
|
||||
barIncompleteChar: '\u2591',
|
||||
hideCursor: true
|
||||
format: 'Processing |{bar}| {percentage}% | {timemark}',
|
||||
barCompleteChar: '\u2588',
|
||||
barIncompleteChar: '\u2591',
|
||||
hideCursor: true
|
||||
});
|
||||
|
||||
/**
|
||||
@@ -50,34 +54,37 @@ const progressBar = new cliProgress.SingleBar({
|
||||
* - Shows CLI progress bar
|
||||
* - Handles errors gracefully (without errors)
|
||||
*/
|
||||
function extractAudioFromVideo(videoFilePath: string, outputAudioPath: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
ffmpeg(videoFilePath)
|
||||
.outputFormat('wav') // Set output format to WAV
|
||||
.on('progress', (progress) => {
|
||||
// Start progress bar if not already active
|
||||
if (!progressBar.isActive) progressBar.start(100, 0, { timemark: '00:00:00' });
|
||||
if (progress.percent) {
|
||||
progressBar.update(progress.percent, { timemark: progress.timemark });
|
||||
}
|
||||
})
|
||||
.on('end', () => {
|
||||
// Finish progress bar
|
||||
progressBar.update(100, { timemark: 'done' });
|
||||
progressBar.stop();
|
||||
console.log(`Extraction completed: ${outputAudioPath}`);
|
||||
resolve();
|
||||
})
|
||||
.on('error', (err) => {
|
||||
// Show extraction errors in a clear format
|
||||
console.error(`failed_audio_extraction: ${err.message}`);
|
||||
reject(err);
|
||||
})
|
||||
.save(outputAudioPath); // Save output file
|
||||
});
|
||||
export function extractAudioFromVideo(videoFilePath: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
ffmpeg(videoFilePath)
|
||||
.outputFormat('wav')
|
||||
.audioCodec('pcm_s16le')
|
||||
.audioChannels(1)
|
||||
.audioFrequency(16000)
|
||||
.on('progress', (progress) => {
|
||||
if (!progressBar.isActive) progressBar.start(100, 0, { timemark: '00:00:00' });
|
||||
if (progress.percent) {
|
||||
progressBar.update(progress.percent, { timemark: progress.timemark });
|
||||
}
|
||||
})
|
||||
.on('end', () => {
|
||||
progressBar.update(100, { timemark: 'done' });
|
||||
progressBar.stop();
|
||||
console.log(`Extraction completed: ${outputAudioPath}`);
|
||||
resolve();
|
||||
})
|
||||
.on('error', (err) => {
|
||||
progressBar.stop();
|
||||
console.error(`failed_audio_extraction: ${err.message}`);
|
||||
reject(err);
|
||||
})
|
||||
.save(outputAudioPath);
|
||||
});
|
||||
}
|
||||
|
||||
// Run extraction
|
||||
extractAudioFromVideo(inputVideoPath, outputAudioPath)
|
||||
// Run extraction if executed directly from CLI
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
extractAudioFromVideo(inputVideoPath)
|
||||
.then(() => console.log('Audio extraction successful.'))
|
||||
.catch((err) => console.error(err));
|
||||
}
|
||||
+1
Submodule services/modules/transcription/local/whisper.cpp added at 999a7e0cbf
@@ -0,0 +1,55 @@
|
||||
import { exec } from "child_process"; // Node.js built-in module
|
||||
import path from "path"; // Path module
|
||||
import fs from "fs"; // File system module
|
||||
import { fileURLToPath } from "url"; // To handle __dirname in ES modules
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url); // Get current file path
|
||||
const __dirname = path.dirname(__filename); // Get current directory path
|
||||
const transcriptsDir = path.resolve(__dirname, "../../storage/transcriptions");
|
||||
|
||||
|
||||
export class whisperLocal { // is called by transcribe.ts
|
||||
private whisperBinary: string; // Path to the whisper.cpp binary
|
||||
private modelPath: string; // Path to the model file
|
||||
|
||||
constructor() {
|
||||
this.whisperBinary = path.resolve(
|
||||
__dirname,
|
||||
"whisper.cpp/build/bin/whisper-cli" //Path to the compiled whisper binary
|
||||
);
|
||||
|
||||
this.modelPath = path.resolve(
|
||||
__dirname,
|
||||
"whisper.cpp/models/ggml-base.en.bin" // Path to the English model file
|
||||
);
|
||||
}
|
||||
|
||||
async transcribe(audioPath: string): Promise<string> { //asyncronous function to transcribe audio
|
||||
return new Promise((resolve, reject) => {
|
||||
const transcriptsDir = path.resolve(__dirname, "../../../../storage/transcripts"); //storage directory for transcripts
|
||||
|
||||
if (!fs.existsSync(transcriptsDir)) { //if transcripts directory does not exist, create it
|
||||
fs.mkdirSync(transcriptsDir, { recursive: true });
|
||||
}
|
||||
|
||||
const outputBase = path.resolve( // Base path for output transcript files, name is same as audio file (video file)
|
||||
transcriptsDir,
|
||||
path.basename(audioPath, path.extname(audioPath))
|
||||
);
|
||||
|
||||
const command = `"${this.whisperBinary}" -m "${this.modelPath}" -f "${audioPath}" -otxt -of "${outputBase}"`; // Command to execute whisper binary with model and audio file, outputting text file
|
||||
|
||||
exec(command, (error, stdout, stderr) => {
|
||||
if (error) return reject(error);
|
||||
|
||||
const outputTxt = `${outputBase}.txt`;
|
||||
if (fs.existsSync(outputTxt)) {
|
||||
const transcript = fs.readFileSync(outputTxt, "utf8");
|
||||
resolve(transcript);
|
||||
} else {
|
||||
reject(new Error("No transcript file found"));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user