mirror of
https://gitlab.rlp.net/proj-wise2526-video2document/video2document.git
synced 2026-06-15 18:01:52 +02:00
Merge branch 'feature/x-local-audio-transcription' into 'develop'
Feature/x local audio transcription See merge request proj-wise2526-video2document/video2document!10
This commit is contained in:
+1
-1
@@ -1,4 +1,5 @@
|
|||||||
{
|
{
|
||||||
|
"type": "module",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"cli-progress": "^3.12.0",
|
"cli-progress": "^3.12.0",
|
||||||
"ffmpeg-static": "^5.2.0",
|
"ffmpeg-static": "^5.2.0",
|
||||||
@@ -12,4 +13,3 @@
|
|||||||
"typescript": "^5.9.3"
|
"typescript": "^5.9.3"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,5 @@
|
|||||||
|
npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/Kurzgesagt.mov
|
||||||
|
npx ts-node ./transcribe.ts ../storage/audio/Kurzgesagt.wav
|
||||||
|
|
||||||
|
npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/GitLabMeeting.mov
|
||||||
|
npx ts-node ./transcribe.ts ../storage/audio/GitLabMeeting.wav
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
#!/usr/bin/env ts-node
|
||||||
|
|
||||||
|
import { extractAudioFromVideo } from "../services/modules/extraction/ffmpegExtractor.ts";
|
||||||
|
|
||||||
|
const videoPath = process.argv[2];
|
||||||
|
|
||||||
|
if (!videoPath) {
|
||||||
|
console.error("Usage: ts-node extractAudio.ts <videoPath>");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
try {
|
||||||
|
console.log(`Extracting audio from: ${videoPath}`);
|
||||||
|
|
||||||
|
await extractAudioFromVideo(videoPath); // Call the extraction function (ffmpegExtractor.ts in services/modules/extraction)
|
||||||
|
|
||||||
|
console.log("Audio extraction completed successfully.");
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Audio extraction failed:", err);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
})();
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
import { whisperLocal } from "../services/modules/transcription/local/whisperLocal.ts";
|
||||||
|
|
||||||
|
const audioPath = process.argv[2];
|
||||||
|
if (!audioPath) {
|
||||||
|
console.error("Please provide an audio file path as argument.");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const whisper = new whisperLocal();
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
try {
|
||||||
|
const text = await whisper.transcribe(audioPath);
|
||||||
|
console.log(text);
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Transcription failed:", err);
|
||||||
|
}
|
||||||
|
})();
|
||||||
@@ -3,14 +3,14 @@ import ffmpeg from 'fluent-ffmpeg';
|
|||||||
import path from 'path';
|
import path from 'path';
|
||||||
import fs from 'fs';
|
import fs from 'fs';
|
||||||
import cliProgress from 'cli-progress';
|
import cliProgress from 'cli-progress';
|
||||||
|
import { fileURLToPath } from 'url';
|
||||||
|
|
||||||
// Base code reference: https://docs.yemreak.com/arsiv/programming/extract-audio-from-video-with-typescript-and-ffmpeg
|
// Base code reference: https://docs.yemreak.com/arsiv/programming/extract-audio-from-video-with-typescript-and-ffmpeg
|
||||||
// Test command: npx ts-node ffmpegExtractor.ts /path/to/video.mp4
|
// Test command: npx ts-node ./extract.ts /path/to/video.mp4
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts audio from a video file and saves it as WAV.
|
* Extracts audio from a video file and saves it as WAV.
|
||||||
* @param videoFilePath Path to the input video file.
|
* @param videoFilePath Path to the input video file.
|
||||||
* @param outputAudioPath Path where the output WAV audio will be saved.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// Ensure ffmpeg binary is available
|
// Ensure ffmpeg binary is available
|
||||||
@@ -21,12 +21,16 @@ ffmpeg.setFfmpegPath(ffmpegPath);
|
|||||||
|
|
||||||
// Ensure an input video path is provided via CLI
|
// Ensure an input video path is provided via CLI
|
||||||
if (process.argv.length < 3) {
|
if (process.argv.length < 3) {
|
||||||
console.error('Usage: ts-node ffmpegExtractor.ts <input-video-path>');
|
console.error('Usage: ts-node ./extract.ts <input-video-path>');
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prepare output directory (always relative to project root)
|
// Resolve __dirname equivalent in ESM
|
||||||
const outputDir = path.join(process.cwd(), 'storage', 'audio');
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
|
const __dirname = path.dirname(__filename);
|
||||||
|
|
||||||
|
// Prepare output directory (always storage/audio under project root)
|
||||||
|
const outputDir = path.join(__dirname, '..', '..', '..', 'storage', 'audio');
|
||||||
if (!fs.existsSync(outputDir)) {
|
if (!fs.existsSync(outputDir)) {
|
||||||
fs.mkdirSync(outputDir, { recursive: true });
|
fs.mkdirSync(outputDir, { recursive: true });
|
||||||
}
|
}
|
||||||
@@ -50,34 +54,37 @@ const progressBar = new cliProgress.SingleBar({
|
|||||||
* - Shows CLI progress bar
|
* - Shows CLI progress bar
|
||||||
* - Handles errors gracefully (without errors)
|
* - Handles errors gracefully (without errors)
|
||||||
*/
|
*/
|
||||||
function extractAudioFromVideo(videoFilePath: string, outputAudioPath: string): Promise<void> {
|
export function extractAudioFromVideo(videoFilePath: string): Promise<void> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
ffmpeg(videoFilePath)
|
ffmpeg(videoFilePath)
|
||||||
.outputFormat('wav') // Set output format to WAV
|
.outputFormat('wav')
|
||||||
|
.audioCodec('pcm_s16le')
|
||||||
|
.audioChannels(1)
|
||||||
|
.audioFrequency(16000)
|
||||||
.on('progress', (progress) => {
|
.on('progress', (progress) => {
|
||||||
// Start progress bar if not already active
|
|
||||||
if (!progressBar.isActive) progressBar.start(100, 0, { timemark: '00:00:00' });
|
if (!progressBar.isActive) progressBar.start(100, 0, { timemark: '00:00:00' });
|
||||||
if (progress.percent) {
|
if (progress.percent) {
|
||||||
progressBar.update(progress.percent, { timemark: progress.timemark });
|
progressBar.update(progress.percent, { timemark: progress.timemark });
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.on('end', () => {
|
.on('end', () => {
|
||||||
// Finish progress bar
|
|
||||||
progressBar.update(100, { timemark: 'done' });
|
progressBar.update(100, { timemark: 'done' });
|
||||||
progressBar.stop();
|
progressBar.stop();
|
||||||
console.log(`Extraction completed: ${outputAudioPath}`);
|
console.log(`Extraction completed: ${outputAudioPath}`);
|
||||||
resolve();
|
resolve();
|
||||||
})
|
})
|
||||||
.on('error', (err) => {
|
.on('error', (err) => {
|
||||||
// Show extraction errors in a clear format
|
progressBar.stop();
|
||||||
console.error(`failed_audio_extraction: ${err.message}`);
|
console.error(`failed_audio_extraction: ${err.message}`);
|
||||||
reject(err);
|
reject(err);
|
||||||
})
|
})
|
||||||
.save(outputAudioPath); // Save output file
|
.save(outputAudioPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run extraction
|
// Run extraction if executed directly from CLI
|
||||||
extractAudioFromVideo(inputVideoPath, outputAudioPath)
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||||
|
extractAudioFromVideo(inputVideoPath)
|
||||||
.then(() => console.log('Audio extraction successful.'))
|
.then(() => console.log('Audio extraction successful.'))
|
||||||
.catch((err) => console.error(err));
|
.catch((err) => console.error(err));
|
||||||
|
}
|
||||||
+1
Submodule services/modules/transcription/local/whisper.cpp added at 999a7e0cbf
@@ -0,0 +1,55 @@
|
|||||||
|
import { exec } from "child_process"; // Node.js built-in module
|
||||||
|
import path from "path"; // Path module
|
||||||
|
import fs from "fs"; // File system module
|
||||||
|
import { fileURLToPath } from "url"; // To handle __dirname in ES modules
|
||||||
|
|
||||||
|
const __filename = fileURLToPath(import.meta.url); // Get current file path
|
||||||
|
const __dirname = path.dirname(__filename); // Get current directory path
|
||||||
|
const transcriptsDir = path.resolve(__dirname, "../../storage/transcriptions");
|
||||||
|
|
||||||
|
|
||||||
|
export class whisperLocal { // is called by transcribe.ts
|
||||||
|
private whisperBinary: string; // Path to the whisper.cpp binary
|
||||||
|
private modelPath: string; // Path to the model file
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.whisperBinary = path.resolve(
|
||||||
|
__dirname,
|
||||||
|
"whisper.cpp/build/bin/whisper-cli" //Path to the compiled whisper binary
|
||||||
|
);
|
||||||
|
|
||||||
|
this.modelPath = path.resolve(
|
||||||
|
__dirname,
|
||||||
|
"whisper.cpp/models/ggml-base.en.bin" // Path to the English model file
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async transcribe(audioPath: string): Promise<string> { //asyncronous function to transcribe audio
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const transcriptsDir = path.resolve(__dirname, "../../../../storage/transcripts"); //storage directory for transcripts
|
||||||
|
|
||||||
|
if (!fs.existsSync(transcriptsDir)) { //if transcripts directory does not exist, create it
|
||||||
|
fs.mkdirSync(transcriptsDir, { recursive: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
const outputBase = path.resolve( // Base path for output transcript files, name is same as audio file (video file)
|
||||||
|
transcriptsDir,
|
||||||
|
path.basename(audioPath, path.extname(audioPath))
|
||||||
|
);
|
||||||
|
|
||||||
|
const command = `"${this.whisperBinary}" -m "${this.modelPath}" -f "${audioPath}" -otxt -of "${outputBase}"`; // Command to execute whisper binary with model and audio file, outputting text file
|
||||||
|
|
||||||
|
exec(command, (error, stdout, stderr) => {
|
||||||
|
if (error) return reject(error);
|
||||||
|
|
||||||
|
const outputTxt = `${outputBase}.txt`;
|
||||||
|
if (fs.existsSync(outputTxt)) {
|
||||||
|
const transcript = fs.readFileSync(outputTxt, "utf8");
|
||||||
|
resolve(transcript);
|
||||||
|
} else {
|
||||||
|
reject(new Error("No transcript file found"));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user