From ee31d26116046318bf8b5790170af3158505d0e1 Mon Sep 17 00:00:00 2001 From: Azeufack Noupeu Willy Date: Thu, 11 Dec 2025 14:52:48 +0100 Subject: [PATCH] Implemented local Parakeet transcription module (S4-07) --- .../modules/transcription-local/parakeet.js | 54 ++++++++++++++ .../parakeet_transcribe.py | 71 +++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 services/modules/transcription-local/parakeet.js create mode 100644 services/modules/transcription-local/parakeet_transcribe.py diff --git a/services/modules/transcription-local/parakeet.js b/services/modules/transcription-local/parakeet.js new file mode 100644 index 0000000..10c95e0 --- /dev/null +++ b/services/modules/transcription-local/parakeet.js @@ -0,0 +1,54 @@ +// ----------------------------------------------------------- +// Parakeet (Step 3A: spawn Python minimal integration) +// ----------------------------------------------------------- + +const fs = require("fs"); +const path = require("path"); +const { spawn } = require("child_process"); + +module.exports = { + name: "parakeet", + type: "transcription", + displayname: "NVIDIA Parakeet", + + async function(audioFilePath) { + console.log("🦜 [Parakeet] Starting test integration (spawn only)..."); + console.log("🦜 Input audio:", audioFilePath); + + // Check audio exists + if (!fs.existsSync(audioFilePath)) { + throw new Error("Audio file does not exist: " + audioFilePath); + } + + // Output path in storage/transcripts + const sessionId = path.basename(audioFilePath).replace(/\.[^.]+$/, ""); + const outputDir = path.join(__dirname, "../../../storage/transcripts"); + fs.mkdirSync(outputDir, { recursive: true }); + + const outputPath = path.join(outputDir, `${sessionId}.json`); + + // ------------------------------------------------------- + // SPAWN PYTHON SCRIPT (step 3A β€” dummy script) + // ------------------------------------------------------- + return new Promise((resolve, reject) => { + const python310 = "C:\\Users\\smith\\AppData\\Local\\Programs\\Python\\Python310\\python.exe"; + const py = spawn(python310, [ + path.join(__dirname, "parakeet_transcribe.py"), + audioFilePath, + outputPath + ]); + + py.stdout.on("data", data => console.log("🦜 [Python]", data.toString().trim())); + py.stderr.on("data", data => console.error("🦜 [Python ERR]", data.toString().trim())); + + py.on("close", code => { + if (code === 0) { + console.log("🦜 [Parakeet] Done (spawn test). Output:", outputPath); + resolve(outputPath); + } else { + reject(new Error("Python script failed with exit code " + code)); + } + }); + }); + } +}; diff --git a/services/modules/transcription-local/parakeet_transcribe.py b/services/modules/transcription-local/parakeet_transcribe.py new file mode 100644 index 0000000..1272e46 --- /dev/null +++ b/services/modules/transcription-local/parakeet_transcribe.py @@ -0,0 +1,71 @@ +# ----------------------------------------------------------- +# Parakeet Real Transcriber (NVIDIA NeMo + PyTorch GPU) +# ----------------------------------------------------------- + +import sys +import json +import soundfile as sf +import torch +from nemo.collections.asr.models import ASRModel + +# Args: +# sys.argv[1] = input audio path +# sys.argv[2] = output JSON path + +audio_path = sys.argv[1] +output_path = sys.argv[2] + +print("πŸ”₯ Starting Parakeet model...") +device = "cuda" if torch.cuda.is_available() else "cpu" +print("πŸ”₯ Using device:", device) + +# ----------------------------------------------------------- +# Load Parakeet model (NVIDIA pretrained ASR) +# ----------------------------------------------------------- +model = ASRModel.from_pretrained(model_name="nvidia/parakeet-ctc-0.6b") +model = model.to(device) +model.eval() + +# ----------------------------------------------------------- +# Load audio +# ----------------------------------------------------------- +print("🎧 Loading audio:", audio_path) +audio, sr = sf.read(audio_path) + +# model expects mono float32 +if len(audio.shape) > 1: + audio = audio.mean(axis=1) + +audio = audio.astype("float32") + +# ----------------------------------------------------------- +# Run inference +# ----------------------------------------------------------- +print("🧠 Running inference...") +with torch.no_grad(): + hyp = model.transcribe([audio])[0] + +# Extract only the text +if hasattr(hyp, "text"): + transcript = hyp.text +else: + # fallback: convert to string (rare) + transcript = str(hyp) + +print("πŸ“„ Transcript:", transcript) + +# ----------------------------------------------------------- +# Save JSON format compatible with V2D pipeline +# ----------------------------------------------------------- +result = { + "id": output_path.split("/")[-1].replace(".json", ""), + "tool": "nemo_parakeet", + "status": "completed", + "text": transcript, + "words": [] # Parakeet XS doesn’t return word timestamps +} + +with open(output_path, "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + +print("βœ” JSON saved at:", output_path)