Compare commits

...

10 Commits

Author SHA1 Message Date
Azeufack Noupeu Willy 7e4cf93cae Merge branch 'feature/s4-07-parakeet' of https://gitlab.rlp.net/proj-wise2526-video2document/video2document into feature/s4-07-parakeet 2026-01-08 15:26:59 +01:00
Azeufack Noupeu Willy fe86fa1a2f Implemented local Parakeet transcription module (S4-07) 2026-01-08 15:09:59 +01:00
Azeufack Noupeu Willy 1e4bde93b1 Merge branch 'develop' of https://gitlab.rlp.net/proj-wise2526-video2document/video2document into develop 2026-01-08 13:46:41 +01:00
Hughes, Mike c62ee5aa41 Merge branch 'fix/remove-misleading-env-file' into 'develop'
env example is in UTF 16 which, when copied and used as the .env, is...

See merge request proj-wise2526-video2document/video2document!79
2026-01-07 21:15:29 +01:00
MikeHughes-BIN 9c156a7df3 env example is in UTF 16 which, when copied and used as the .env, is unreadable and breaks the Programm 2025-12-23 13:48:27 +01:00
Hughes, Mike 797898fb8b Merge branch 'develop' into 'main'
Develop

See merge request proj-wise2526-video2document/video2document!78
2025-12-22 14:27:25 +01:00
Minning, Eric 7b630646cc Merge branch 'develop' into 'main'
Develop

See merge request proj-wise2526-video2document/video2document!76
2025-12-22 14:25:10 +01:00
Azeufack Noupeu Willy ee31d26116 Implemented local Parakeet transcription module (S4-07) 2025-12-11 14:52:48 +01:00
Azeufack Noupeu Willy a13fea6734 Merge branch 'develop' of https://gitlab.rlp.net/proj-wise2526-video2document/video2document into develop 2025-12-06 14:51:45 +01:00
Azeufack Noupeu Willy e82cf779da Merge branch 'main' of https://gitlab.rlp.net/proj-wise2526-video2document/video2document into develop 2025-12-06 14:48:31 +01:00
4 changed files with 125 additions and 0 deletions
Vendored
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.
@@ -0,0 +1,54 @@
// -----------------------------------------------------------
// Parakeet (Step 3A: spawn Python minimal integration)
// -----------------------------------------------------------
const fs = require("fs");
const path = require("path");
const { spawn } = require("child_process");
module.exports = {
name: "parakeet",
type: "transcription",
displayname: "NVIDIA Parakeet",
async function(audioFilePath) {
console.log("🦜 [Parakeet] Starting test integration (spawn only)...");
console.log("🦜 Input audio:", audioFilePath);
// Check audio exists
if (!fs.existsSync(audioFilePath)) {
throw new Error("Audio file does not exist: " + audioFilePath);
}
// Output path in storage/transcripts
const sessionId = path.basename(audioFilePath).replace(/\.[^.]+$/, "");
const outputDir = path.join(__dirname, "../../../storage/transcripts");
fs.mkdirSync(outputDir, { recursive: true });
const outputPath = path.join(outputDir, `${sessionId}.json`);
// -------------------------------------------------------
// SPAWN PYTHON SCRIPT (step 3A — dummy script)
// -------------------------------------------------------
return new Promise((resolve, reject) => {
const python310 = "C:\\Users\\smith\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
const py = spawn(python310, [
path.join(__dirname, "parakeet_transcribe.py"),
audioFilePath,
outputPath
]);
py.stdout.on("data", data => console.log("🦜 [Python]", data.toString().trim()));
py.stderr.on("data", data => console.error("🦜 [Python ERR]", data.toString().trim()));
py.on("close", code => {
if (code === 0) {
console.log("🦜 [Parakeet] Done (spawn test). Output:", outputPath);
resolve(outputPath);
} else {
reject(new Error("Python script failed with exit code " + code));
}
});
});
}
};
@@ -0,0 +1,71 @@
# -----------------------------------------------------------
# Parakeet Real Transcriber (NVIDIA NeMo + PyTorch GPU)
# -----------------------------------------------------------
import sys
import json
import soundfile as sf
import torch
from nemo.collections.asr.models import ASRModel
# Args:
# sys.argv[1] = input audio path
# sys.argv[2] = output JSON path
audio_path = sys.argv[1]
output_path = sys.argv[2]
print("🔥 Starting Parakeet model...")
device = "cuda" if torch.cuda.is_available() else "cpu"
print("🔥 Using device:", device)
# -----------------------------------------------------------
# Load Parakeet model (NVIDIA pretrained ASR)
# -----------------------------------------------------------
model = ASRModel.from_pretrained(model_name="nvidia/parakeet-ctc-0.6b")
model = model.to(device)
model.eval()
# -----------------------------------------------------------
# Load audio
# -----------------------------------------------------------
print("🎧 Loading audio:", audio_path)
audio, sr = sf.read(audio_path)
# model expects mono float32
if len(audio.shape) > 1:
audio = audio.mean(axis=1)
audio = audio.astype("float32")
# -----------------------------------------------------------
# Run inference
# -----------------------------------------------------------
print("🧠 Running inference...")
with torch.no_grad():
hyp = model.transcribe([audio])[0]
# Extract only the text
if hasattr(hyp, "text"):
transcript = hyp.text
else:
# fallback: convert to string (rare)
transcript = str(hyp)
print("📄 Transcript:", transcript)
# -----------------------------------------------------------
# Save JSON format compatible with V2D pipeline
# -----------------------------------------------------------
result = {
"id": output_path.split("/")[-1].replace(".json", ""),
"tool": "nemo_parakeet",
"status": "completed",
"text": transcript,
"words": [] # Parakeet XS doesnt return word timestamps
}
with open(output_path, "w", encoding="utf-8") as f:
json.dump(result, f, indent=2, ensure_ascii=False)
print("✔ JSON saved at:", output_path)