Merge branch 'feature/s4-07-parakeet' of https://gitlab.rlp.net/proj-wise2526-video2document/video2document into feature/s4-07-parakeet

Implemented local Parakeet transcription module (S4-07)
Merge branch 'develop' of https://gitlab.rlp.net/proj-wise2526-video2document/video2document into develop
2026-06-15 18:01:52 +02:00 · 2026-01-08 15:26:59 +01:00 · 2026-01-08 15:09:59 +01:00 · 2026-01-08 13:46:41 +01:00 · 2025-12-11 14:52:48 +01:00 · 2025-12-06 14:51:45 +01:00
2 changed files with 125 additions and 0 deletions
@@ -0,0 +1,54 @@
+// -----------------------------------------------------------
+//  Parakeet (Step 3A: spawn Python minimal integration)
+// -----------------------------------------------------------
+
+const fs = require("fs");
+const path = require("path");
+const { spawn } = require("child_process");
+
+module.exports = {
+    name: "parakeet",
+    type: "transcription",
+    displayname: "NVIDIA Parakeet",
+
+    async function(audioFilePath) {
+        console.log("🦜 [Parakeet] Starting test integration (spawn only)...");
+        console.log("🦜 Input audio:", audioFilePath);
+
+        // Check audio exists
+        if (!fs.existsSync(audioFilePath)) {
+            throw new Error("Audio file does not exist: " + audioFilePath);
+        }
+
+        // Output path in storage/transcripts
+        const sessionId = path.basename(audioFilePath).replace(/\.[^.]+$/, "");
+        const outputDir = path.join(__dirname, "../../../storage/transcripts");
+        fs.mkdirSync(outputDir, { recursive: true });
+
+        const outputPath = path.join(outputDir, `${sessionId}.json`);
+
+        // -------------------------------------------------------
+        // SPAWN PYTHON SCRIPT (step 3A — dummy script)
+        // -------------------------------------------------------
+        return new Promise((resolve, reject) => {
+            const python310 = "C:\\Users\\smith\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
+            const py = spawn(python310, [
+                path.join(__dirname, "parakeet_transcribe.py"),
+                audioFilePath,
+                outputPath
+            ]);
+
+            py.stdout.on("data", data => console.log("🦜 [Python]", data.toString().trim()));
+            py.stderr.on("data", data => console.error("🦜 [Python ERR]", data.toString().trim()));
+
+            py.on("close", code => {
+                if (code === 0) {
+                    console.log("🦜 [Parakeet] Done (spawn test). Output:", outputPath);
+                    resolve(outputPath);
+                } else {
+                    reject(new Error("Python script failed with exit code " + code));
+                }
+            });
+        });
+    }
+};
@@ -0,0 +1,71 @@
+# -----------------------------------------------------------
+#  Parakeet Real Transcriber (NVIDIA NeMo + PyTorch GPU)
+# -----------------------------------------------------------
+
+import sys
+import json
+import soundfile as sf
+import torch
+from nemo.collections.asr.models import ASRModel
+
+# Args:
+#   sys.argv[1] = input audio path
+#   sys.argv[2] = output JSON path
+
+audio_path = sys.argv[1]
+output_path = sys.argv[2]
+
+print("🔥 Starting Parakeet model...")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print("🔥 Using device:", device)
+
+# -----------------------------------------------------------
+# Load Parakeet model (NVIDIA pretrained ASR)
+# -----------------------------------------------------------
+model = ASRModel.from_pretrained(model_name="nvidia/parakeet-ctc-0.6b")
+model = model.to(device)
+model.eval()
+
+# -----------------------------------------------------------
+# Load audio
+# -----------------------------------------------------------
+print("🎧 Loading audio:", audio_path)
+audio, sr = sf.read(audio_path)
+
+# model expects mono float32
+if len(audio.shape) > 1:
+    audio = audio.mean(axis=1)
+
+audio = audio.astype("float32")
+
+# -----------------------------------------------------------
+# Run inference
+# -----------------------------------------------------------
+print("🧠 Running inference...")
+with torch.no_grad():
+    hyp = model.transcribe([audio])[0]
+
+# Extract only the text
+if hasattr(hyp, "text"):
+    transcript = hyp.text
+else:
+    # fallback: convert to string (rare)
+    transcript = str(hyp)
+
+print("📄 Transcript:", transcript)
+
+# -----------------------------------------------------------
+# Save JSON format compatible with V2D pipeline
+# -----------------------------------------------------------
+result = {
+    "id": output_path.split("/")[-1].replace(".json", ""),
+    "tool": "nemo_parakeet",
+    "status": "completed",
+    "text": transcript,
+    "words": []  # Parakeet XS doesn’t return word timestamps
+}
+
+with open(output_path, "w", encoding="utf-8") as f:
+    json.dump(result, f, indent=2, ensure_ascii=False)
+
+print("✔ JSON saved at:", output_path)
Author	SHA1	Message	Date
Azeufack Noupeu Willy	7e4cf93cae	Merge branch 'feature/s4-07-parakeet' of https://gitlab.rlp.net/proj-wise2526-video2document/video2document into feature/s4-07-parakeet	2026-01-08 15:26:59 +01:00
Azeufack Noupeu Willy	fe86fa1a2f	Implemented local Parakeet transcription module (S4-07)	2026-01-08 15:09:59 +01:00
Azeufack Noupeu Willy	1e4bde93b1	Merge branch 'develop' of https://gitlab.rlp.net/proj-wise2526-video2document/video2document into develop	2026-01-08 13:46:41 +01:00
Azeufack Noupeu Willy	ee31d26116	Implemented local Parakeet transcription module (S4-07)	2025-12-11 14:52:48 +01:00
Azeufack Noupeu Willy	a13fea6734	Merge branch 'develop' of https://gitlab.rlp.net/proj-wise2526-video2document/video2document into develop	2025-12-06 14:51:45 +01:00
Azeufack Noupeu Willy	e82cf779da	Merge branch 'main' of https://gitlab.rlp.net/proj-wise2526-video2document/video2document into develop	2025-12-06 14:48:31 +01:00