From 9902b0421eea5d04c317f1b1fc6f66d049550b22 Mon Sep 17 00:00:00 2001 From: MikeHughes-BIN Date: Thu, 6 Nov 2025 09:58:28 +0100 Subject: [PATCH 1/2] Implemented local transcription solution with starting scripts --- package.json | 4 +- scripts/commands.txt | 2 + scripts/extract.ts | 23 +++++ scripts/transcribe.ts | 18 ++++ .../modules/extraction/ffmpegExtractor.ts | 87 ++++++++++--------- .../modules/transcription/local/whisper.cpp | 1 + .../transcription/local/whisperLocal.ts | 55 ++++++++++++ 7 files changed, 148 insertions(+), 42 deletions(-) create mode 100644 scripts/commands.txt create mode 100644 scripts/extract.ts create mode 100644 scripts/transcribe.ts create mode 160000 services/modules/transcription/local/whisper.cpp create mode 100644 services/modules/transcription/local/whisperLocal.ts diff --git a/package.json b/package.json index c120da8..649f09f 100644 --- a/package.json +++ b/package.json @@ -1,4 +1,5 @@ { + "type": "module", "dependencies": { "cli-progress": "^3.12.0", "ffmpeg-static": "^5.2.0", @@ -11,5 +12,4 @@ "ts-node": "^10.9.2", "typescript": "^5.9.3" } -} - +} \ No newline at end of file diff --git a/scripts/commands.txt b/scripts/commands.txt new file mode 100644 index 0000000..31b6ea7 --- /dev/null +++ b/scripts/commands.txt @@ -0,0 +1,2 @@ +npx ts-node ./extract.ts /Users/mikehughes/Downloads/sweetHomeAlabama.mov +npx ts-node ./transcribe.ts ../storage/audio/sweetHomeAlabama.wav diff --git a/scripts/extract.ts b/scripts/extract.ts new file mode 100644 index 0000000..2d1c7a0 --- /dev/null +++ b/scripts/extract.ts @@ -0,0 +1,23 @@ +#!/usr/bin/env ts-node + +import { extractAudioFromVideo } from "../services/modules/extraction/ffmpegExtractor.ts"; + +const videoPath = process.argv[2]; + +if (!videoPath) { + console.error("Usage: ts-node extractAudio.ts "); + process.exit(1); +} + +(async () => { + try { + console.log(`Extracting audio from: ${videoPath}`); + + await extractAudioFromVideo(videoPath); // Call the extraction function (ffmpegExtractor.ts in services/modules/extraction) + + console.log("Audio extraction completed successfully."); + } catch (err) { + console.error("Audio extraction failed:", err); + process.exit(1); + } +})(); \ No newline at end of file diff --git a/scripts/transcribe.ts b/scripts/transcribe.ts new file mode 100644 index 0000000..55d91a5 --- /dev/null +++ b/scripts/transcribe.ts @@ -0,0 +1,18 @@ +import { whisperLocal } from "../services/modules/transcription/local/whisperLocal.ts"; + +const audioPath = process.argv[2]; +if (!audioPath) { + console.error("Please provide an audio file path as argument."); + process.exit(1); +} + +const whisper = new whisperLocal(); + +(async () => { + try { + const text = await whisper.transcribe(audioPath); + console.log(text); + } catch (err) { + console.error("Transcription failed:", err); + } +})(); \ No newline at end of file diff --git a/services/modules/extraction/ffmpegExtractor.ts b/services/modules/extraction/ffmpegExtractor.ts index 27c280b..0831859 100644 --- a/services/modules/extraction/ffmpegExtractor.ts +++ b/services/modules/extraction/ffmpegExtractor.ts @@ -3,32 +3,36 @@ import ffmpeg from 'fluent-ffmpeg'; import path from 'path'; import fs from 'fs'; import cliProgress from 'cli-progress'; +import { fileURLToPath } from 'url'; // Base code reference: https://docs.yemreak.com/arsiv/programming/extract-audio-from-video-with-typescript-and-ffmpeg -// Test command: npx ts-node ffmpegExtractor.ts /path/to/video.mp4 +// Test command: npx ts-node ./extract.ts /path/to/video.mp4 /** * Extracts audio from a video file and saves it as WAV. * @param videoFilePath Path to the input video file. - * @param outputAudioPath Path where the output WAV audio will be saved. */ // Ensure ffmpeg binary is available if (!ffmpegPath) { - throw new Error('FFmpeg binary not found!'); + throw new Error('FFmpeg binary not found!'); } ffmpeg.setFfmpegPath(ffmpegPath); // Ensure an input video path is provided via CLI if (process.argv.length < 3) { - console.error('Usage: ts-node ffmpegExtractor.ts '); - process.exit(1); + console.error('Usage: ts-node ./extract.ts '); + process.exit(1); } -// Prepare output directory (always relative to project root) -const outputDir = path.join(process.cwd(), 'storage', 'audio'); +// Resolve __dirname equivalent in ESM +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +// Prepare output directory (always storage/audio under project root) +const outputDir = path.join(__dirname, '..', '..', 'storage', 'audio'); if (!fs.existsSync(outputDir)) { - fs.mkdirSync(outputDir, { recursive: true }); + fs.mkdirSync(outputDir, { recursive: true }); } // Derive input and output paths @@ -38,10 +42,10 @@ const outputAudioPath = path.join(outputDir, `${inputVideoName}.wav`); // Initialize CLI progress bar const progressBar = new cliProgress.SingleBar({ - format: 'Processing |{bar}| {percentage}% | {timemark}', - barCompleteChar: '\u2588', - barIncompleteChar: '\u2591', - hideCursor: true + format: 'Processing |{bar}| {percentage}% | {timemark}', + barCompleteChar: '\u2588', + barIncompleteChar: '\u2591', + hideCursor: true }); /** @@ -50,34 +54,37 @@ const progressBar = new cliProgress.SingleBar({ * - Shows CLI progress bar * - Handles errors gracefully (without errors) */ -function extractAudioFromVideo(videoFilePath: string, outputAudioPath: string): Promise { - return new Promise((resolve, reject) => { - ffmpeg(videoFilePath) - .outputFormat('wav') // Set output format to WAV - .on('progress', (progress) => { - // Start progress bar if not already active - if (!progressBar.isActive) progressBar.start(100, 0, { timemark: '00:00:00' }); - if (progress.percent) { - progressBar.update(progress.percent, { timemark: progress.timemark }); - } - }) - .on('end', () => { - // Finish progress bar - progressBar.update(100, { timemark: 'done' }); - progressBar.stop(); - console.log(`Extraction completed: ${outputAudioPath}`); - resolve(); - }) - .on('error', (err) => { - // Show extraction errors in a clear format - console.error(`failed_audio_extraction: ${err.message}`); - reject(err); - }) - .save(outputAudioPath); // Save output file - }); +export function extractAudioFromVideo(videoFilePath: string): Promise { + return new Promise((resolve, reject) => { + ffmpeg(videoFilePath) + .outputFormat('wav') + .audioCodec('pcm_s16le') + .audioChannels(1) + .audioFrequency(16000) + .on('progress', (progress) => { + if (!progressBar.isActive) progressBar.start(100, 0, { timemark: '00:00:00' }); + if (progress.percent) { + progressBar.update(progress.percent, { timemark: progress.timemark }); + } + }) + .on('end', () => { + progressBar.update(100, { timemark: 'done' }); + progressBar.stop(); + console.log(`Extraction completed: ${outputAudioPath}`); + resolve(); + }) + .on('error', (err) => { + progressBar.stop(); + console.error(`failed_audio_extraction: ${err.message}`); + reject(err); + }) + .save(outputAudioPath); + }); } -// Run extraction -extractAudioFromVideo(inputVideoPath, outputAudioPath) +// Run extraction if executed directly from CLI +if (import.meta.url === `file://${process.argv[1]}`) { + extractAudioFromVideo(inputVideoPath) .then(() => console.log('Audio extraction successful.')) - .catch((err) => console.error(err)); \ No newline at end of file + .catch((err) => console.error(err)); +} \ No newline at end of file diff --git a/services/modules/transcription/local/whisper.cpp b/services/modules/transcription/local/whisper.cpp new file mode 160000 index 0000000..999a7e0 --- /dev/null +++ b/services/modules/transcription/local/whisper.cpp @@ -0,0 +1 @@ +Subproject commit 999a7e0cbf8484dc2cea1e9f855d6b39f34f7ae9 diff --git a/services/modules/transcription/local/whisperLocal.ts b/services/modules/transcription/local/whisperLocal.ts new file mode 100644 index 0000000..e1923a0 --- /dev/null +++ b/services/modules/transcription/local/whisperLocal.ts @@ -0,0 +1,55 @@ +import { exec } from "child_process"; // Node.js built-in module +import path from "path"; // Path module +import fs from "fs"; // File system module +import { fileURLToPath } from "url"; // To handle __dirname in ES modules + +const __filename = fileURLToPath(import.meta.url); // Get current file path +const __dirname = path.dirname(__filename); // Get current directory path +const transcriptsDir = path.resolve(__dirname, "../../storage/transcriptions"); + + +export class whisperLocal { // is called by transcribe.ts + private whisperBinary: string; // Path to the whisper.cpp binary + private modelPath: string; // Path to the model file + + constructor() { + this.whisperBinary = path.resolve( + __dirname, + "whisper.cpp/build/bin/whisper-cli" //Path to the compiled whisper binary + ); + + this.modelPath = path.resolve( + __dirname, + "whisper.cpp/models/ggml-base.en.bin" // Path to the English model file + ); + } + + async transcribe(audioPath: string): Promise { //asyncronous function to transcribe audio + return new Promise((resolve, reject) => { + const transcriptsDir = path.resolve(__dirname, "../../../../storage/transcripts"); //storage directory for transcripts + + if (!fs.existsSync(transcriptsDir)) { //if transcripts directory does not exist, create it + fs.mkdirSync(transcriptsDir, { recursive: true }); + } + + const outputBase = path.resolve( // Base path for output transcript files, name is same as audio file (video file) + transcriptsDir, + path.basename(audioPath, path.extname(audioPath)) + ); + + const command = `"${this.whisperBinary}" -m "${this.modelPath}" -f "${audioPath}" -otxt -of "${outputBase}"`; // Command to execute whisper binary with model and audio file, outputting text file + + exec(command, (error, stdout, stderr) => { + if (error) return reject(error); + + const outputTxt = `${outputBase}.txt`; + if (fs.existsSync(outputTxt)) { + const transcript = fs.readFileSync(outputTxt, "utf8"); + resolve(transcript); + } else { + reject(new Error("No transcript file found")); + } + }); + }); + } +} \ No newline at end of file From cfa37b9f2fc2db0e6de6df8821eb17a3a804215a Mon Sep 17 00:00:00 2001 From: MikeHughes-BIN Date: Sat, 8 Nov 2025 13:26:35 +0100 Subject: [PATCH 2/2] additional test for commands.txt and fix for correct path in the ffmpegExtractor module --- scripts/commands.txt | 7 +++++-- services/modules/extraction/ffmpegExtractor.ts | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/commands.txt b/scripts/commands.txt index 31b6ea7..fc620fe 100644 --- a/scripts/commands.txt +++ b/scripts/commands.txt @@ -1,2 +1,5 @@ -npx ts-node ./extract.ts /Users/mikehughes/Downloads/sweetHomeAlabama.mov -npx ts-node ./transcribe.ts ../storage/audio/sweetHomeAlabama.wav +npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/Kurzgesagt.mov +npx ts-node ./transcribe.ts ../storage/audio/Kurzgesagt.wav + +npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/GitLabMeeting.mov +npx ts-node ./transcribe.ts ../storage/audio/GitLabMeeting.wav diff --git a/services/modules/extraction/ffmpegExtractor.ts b/services/modules/extraction/ffmpegExtractor.ts index 0831859..c23c879 100644 --- a/services/modules/extraction/ffmpegExtractor.ts +++ b/services/modules/extraction/ffmpegExtractor.ts @@ -30,7 +30,7 @@ const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Prepare output directory (always storage/audio under project root) -const outputDir = path.join(__dirname, '..', '..', 'storage', 'audio'); +const outputDir = path.join(__dirname, '..', '..', '..', 'storage', 'audio'); if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); }