From ee31d26116046318bf8b5790170af3158505d0e1 Mon Sep 17 00:00:00 2001
From: Azeufack Noupeu Willy <willy.azeufack.noupeu@th-bingen.de>
Date: Thu, 11 Dec 2025 14:52:48 +0100
Subject: [PATCH] Implemented local Parakeet transcription module (S4-07)

---
 .../modules/transcription-local/parakeet.js   | 54 ++++++++++++++
 .../parakeet_transcribe.py                    | 71 +++++++++++++++++++
 2 files changed, 125 insertions(+)
 create mode 100644 services/modules/transcription-local/parakeet.js
 create mode 100644 services/modules/transcription-local/parakeet_transcribe.py

diff --git a/services/modules/transcription-local/parakeet.js b/services/modules/transcription-local/parakeet.js
new file mode 100644
index 0000000..10c95e0
--- /dev/null
+++ b/services/modules/transcription-local/parakeet.js
@@ -0,0 +1,54 @@
+// -----------------------------------------------------------
+//  Parakeet (Step 3A: spawn Python minimal integration)
+// -----------------------------------------------------------
+
+const fs = require("fs");
+const path = require("path");
+const { spawn } = require("child_process");
+
+module.exports = {
+    name: "parakeet",
+    type: "transcription",
+    displayname: "NVIDIA Parakeet",
+
+    async function(audioFilePath) {
+        console.log("🦜 [Parakeet] Starting test integration (spawn only)...");
+        console.log("🦜 Input audio:", audioFilePath);
+
+        // Check audio exists
+        if (!fs.existsSync(audioFilePath)) {
+            throw new Error("Audio file does not exist: " + audioFilePath);
+        }
+
+        // Output path in storage/transcripts
+        const sessionId = path.basename(audioFilePath).replace(/\.[^.]+$/, "");
+        const outputDir = path.join(__dirname, "../../../storage/transcripts");
+        fs.mkdirSync(outputDir, { recursive: true });
+
+        const outputPath = path.join(outputDir, `${sessionId}.json`);
+
+        // -------------------------------------------------------
+        // SPAWN PYTHON SCRIPT (step 3A — dummy script)
+        // -------------------------------------------------------
+        return new Promise((resolve, reject) => {
+            const python310 = "C:\\Users\\smith\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
+            const py = spawn(python310, [
+                path.join(__dirname, "parakeet_transcribe.py"),
+                audioFilePath,
+                outputPath
+            ]);
+
+            py.stdout.on("data", data => console.log("🦜 [Python]", data.toString().trim()));
+            py.stderr.on("data", data => console.error("🦜 [Python ERR]", data.toString().trim()));
+
+            py.on("close", code => {
+                if (code === 0) {
+                    console.log("🦜 [Parakeet] Done (spawn test). Output:", outputPath);
+                    resolve(outputPath);
+                } else {
+                    reject(new Error("Python script failed with exit code " + code));
+                }
+            });
+        });
+    }
+};
diff --git a/services/modules/transcription-local/parakeet_transcribe.py b/services/modules/transcription-local/parakeet_transcribe.py
new file mode 100644
index 0000000..1272e46
--- /dev/null
+++ b/services/modules/transcription-local/parakeet_transcribe.py
@@ -0,0 +1,71 @@
+# -----------------------------------------------------------
+#  Parakeet Real Transcriber (NVIDIA NeMo + PyTorch GPU)
+# -----------------------------------------------------------
+
+import sys
+import json
+import soundfile as sf
+import torch
+from nemo.collections.asr.models import ASRModel
+
+# Args:
+#   sys.argv[1] = input audio path
+#   sys.argv[2] = output JSON path
+
+audio_path = sys.argv[1]
+output_path = sys.argv[2]
+
+print("🔥 Starting Parakeet model...")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print("🔥 Using device:", device)
+
+# -----------------------------------------------------------
+# Load Parakeet model (NVIDIA pretrained ASR)
+# -----------------------------------------------------------
+model = ASRModel.from_pretrained(model_name="nvidia/parakeet-ctc-0.6b")
+model = model.to(device)
+model.eval()
+
+# -----------------------------------------------------------
+# Load audio
+# -----------------------------------------------------------
+print("🎧 Loading audio:", audio_path)
+audio, sr = sf.read(audio_path)
+
+# model expects mono float32
+if len(audio.shape) > 1:
+    audio = audio.mean(axis=1)
+
+audio = audio.astype("float32")
+
+# -----------------------------------------------------------
+# Run inference
+# -----------------------------------------------------------
+print("🧠 Running inference...")
+with torch.no_grad():
+    hyp = model.transcribe([audio])[0]
+
+# Extract only the text
+if hasattr(hyp, "text"):
+    transcript = hyp.text
+else:
+    # fallback: convert to string (rare)
+    transcript = str(hyp)
+
+print("📄 Transcript:", transcript)
+
+# -----------------------------------------------------------
+# Save JSON format compatible with V2D pipeline
+# -----------------------------------------------------------
+result = {
+    "id": output_path.split("/")[-1].replace(".json", ""),
+    "tool": "nemo_parakeet",
+    "status": "completed",
+    "text": transcript,
+    "words": []  # Parakeet XS doesn’t return word timestamps
+}
+
+with open(output_path, "w", encoding="utf-8") as f:
+    json.dump(result, f, indent=2, ensure_ascii=False)
+
+print("✔ JSON saved at:", output_path)