Merge branch 'develop' into 'feature/ui-test'

# Conflicts:
#   main.js
This commit is contained in:
Hughes, Mike
2026-01-18 17:38:36 +01:00
18 changed files with 1099 additions and 277 deletions
+2 -1
View File
@@ -8,7 +8,7 @@ if (!fs.existsSync(outputDir)) {
}
// Ensure SAIA API key is set in environment variables: export SAIA_API_KEY="your_api_key_here"
const SAIA_API_KEY = process.env.SAIA_API_KEY; // Ensure SAIA API key is set in environment variables
let SAIA_API_KEY // Ensure SAIA API key is set in environment variables
const SAIA_URL = "https://chat-ai.academiccloud.de/v1/chat/completions"; // URL for the REST call, used model and action
const module_exports = {
@@ -18,6 +18,7 @@ const module_exports = {
description: "Generates documents using OpenAI GPT OSS 120B via SAIA platform",
async function(parameter) {
SAIA_API_KEY = process.env.SAIA_API_KEY;
return new Promise(async (resolve, reject) => {
try {
// console.log("SAIA OpenAI GPT module invoked with parameters:", parameter);
+2 -1
View File
@@ -8,7 +8,7 @@ if (!fs.existsSync(outputDir)) {
}
// Ensure Google API key is set in environment variables: export GOOGLE_API_KEY="your_api_key_here"
const GEMINI_API_KEY = process.env.GOOGLE_API_KEY; // Ensure Google API key is set in environment variables: export GOOGLE_API_KEY="your_api_key_here"
let GEMINI_API_KEY; // Ensure Google API key is set in environment variables: export GOOGLE_API_KEY="your_api_key_here"
const GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent"; // URL for the REST call, used model and action
const module_exports = {
@@ -18,6 +18,7 @@ const module_exports = {
description: "Generates documents using Google Gemini LLM",
async function(parameter) {
GEMINI_API_KEY = process.env.GOOGLE_API_KEY;
return new Promise(async (resolve, reject) => {
try {
// console.log("Gemini LLM module invoked with parameters:", parameter);
+2 -1
View File
@@ -8,7 +8,7 @@ if (!fs.existsSync(outputDir)) {
}
// Ensure SAIA API key is set in environment variables: export SAIA_API_KEY="your_api_key_here"
const SAIA_API_KEY = process.env.SAIA_API_KEY;
let SAIA_API_KEY;
const SAIA_URL = "https://chat-ai.academiccloud.de/v1/chat/completions"; // URL for the REST call, used model and action
const module_exports = {
@@ -18,6 +18,7 @@ const module_exports = {
description: "Generates documents using QWEN 3 235B via SAIA platform",
async function(parameter) {
SAIA_API_KEY = process.env.SAIA_API_KEY;
return new Promise(async (resolve, reject) => {
try {
// console.log("SAIA QWEN 3 235B module invoked with parameters:", parameter);
@@ -0,0 +1,79 @@
const fs = require('fs');
const path = require('path');
const module_exports = {
name: "replace_speaker",
type: "processor",
displayname: "Speaker Name Replacer",
description: "Replaces speaker placeholder names with actual names based on a mapping in HTML files",
async function(speakerMapping) {
// Relativ von dieser Datei aus
const documentsDir = path.resolve(__dirname, '../../../storage/documents');
const inputHtmlPath = await this.getNewestFile(documentsDir, '.html');
if (!inputHtmlPath) {
throw new Error(`No HTML files found in ${documentsDir}`);
}
return await this.replaceNames(inputHtmlPath, speakerMapping);
},
getNewestFile: async function(dirPath, extension) {
try {
const files = await fs.promises.readdir(dirPath);
const filtered = files.filter(f => f.endsWith(extension));
if (filtered.length === 0) return null;
const filesWithStats = await Promise.all(
filtered.map(async (f) => {
const fullPath = path.join(dirPath, f);
const stats = await fs.promises.stat(fullPath);
return { path: fullPath, time: stats.mtimeMs };
})
);
return filesWithStats.reduce((newest, curr) =>
curr.time > newest.time ? curr : newest
).path;
} catch (error) {
console.error("Error reading directory:", error);
throw error;
}
},
replaceNames: async function(inputHtmlPath, speakerMapping) {
try {
const htmlContent = await fs.promises.readFile(inputHtmlPath, "utf-8");
let outputContent = htmlContent;
Object.entries(speakerMapping).forEach(([placeholder, value]) => {
// Extract name if value is an object
const displayName = typeof value === 'string' ? value : value.name;
// Normalize placeholder for matching (remove case sensitivity)
const normalizedPlaceholder = placeholder.toLowerCase();
// Replace all variations: speakerA, SpeakerA, SPEAKERA, speaker_a, Speaker A, etc.
// Matches with optional spaces, underscores, and parentheses
const regex = new RegExp(
`\\b[Ss]peaker\\s*[_-]?\\s*${placeholder.charAt(placeholder.length - 1)}\\b|\\b${placeholder}\\b`,
'gi'
);
outputContent = outputContent.replace(regex, displayName);
});
await fs.promises.writeFile(inputHtmlPath, outputContent, "utf-8");
return inputHtmlPath;
} catch (error) {
console.error("Error replacing speaker names:", error);
throw error;
}
}
};
module.exports = module_exports;
@@ -0,0 +1,54 @@
// -----------------------------------------------------------
// Parakeet (Step 3A: spawn Python minimal integration)
// -----------------------------------------------------------
const fs = require("fs");
const path = require("path");
const { spawn } = require("child_process");
module.exports = {
name: "parakeet",
type: "transcription",
displayname: "NVIDIA Parakeet",
async function(audioFilePath) {
console.log("🦜 [Parakeet] Starting test integration (spawn only)...");
console.log("🦜 Input audio:", audioFilePath);
// Check audio exists
if (!fs.existsSync(audioFilePath)) {
throw new Error("Audio file does not exist: " + audioFilePath);
}
// Output path in storage/transcripts
const sessionId = path.basename(audioFilePath).replace(/\.[^.]+$/, "");
const outputDir = path.join(__dirname, "../../../storage/transcripts");
fs.mkdirSync(outputDir, { recursive: true });
const outputPath = path.join(outputDir, `${sessionId}.json`);
// -------------------------------------------------------
// SPAWN PYTHON SCRIPT (step 3A — dummy script)
// -------------------------------------------------------
return new Promise((resolve, reject) => {
const python310 = "C:\\Users\\smith\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
const py = spawn(python310, [
path.join(__dirname, "parakeet_transcribe.py"),
audioFilePath,
outputPath
]);
py.stdout.on("data", data => console.log("🦜 [Python]", data.toString().trim()));
py.stderr.on("data", data => console.error("🦜 [Python ERR]", data.toString().trim()));
py.on("close", code => {
if (code === 0) {
console.log("🦜 [Parakeet] Done (spawn test). Output:", outputPath);
resolve(outputPath);
} else {
reject(new Error("Python script failed with exit code " + code));
}
});
});
}
};
@@ -0,0 +1,71 @@
# -----------------------------------------------------------
# Parakeet Real Transcriber (NVIDIA NeMo + PyTorch GPU)
# -----------------------------------------------------------
import sys
import json
import soundfile as sf
import torch
from nemo.collections.asr.models import ASRModel
# Args:
# sys.argv[1] = input audio path
# sys.argv[2] = output JSON path
audio_path = sys.argv[1]
output_path = sys.argv[2]
print("🔥 Starting Parakeet model...")
device = "cuda" if torch.cuda.is_available() else "cpu"
print("🔥 Using device:", device)
# -----------------------------------------------------------
# Load Parakeet model (NVIDIA pretrained ASR)
# -----------------------------------------------------------
model = ASRModel.from_pretrained(model_name="nvidia/parakeet-ctc-0.6b")
model = model.to(device)
model.eval()
# -----------------------------------------------------------
# Load audio
# -----------------------------------------------------------
print("🎧 Loading audio:", audio_path)
audio, sr = sf.read(audio_path)
# model expects mono float32
if len(audio.shape) > 1:
audio = audio.mean(axis=1)
audio = audio.astype("float32")
# -----------------------------------------------------------
# Run inference
# -----------------------------------------------------------
print("🧠 Running inference...")
with torch.no_grad():
hyp = model.transcribe([audio])[0]
# Extract only the text
if hasattr(hyp, "text"):
transcript = hyp.text
else:
# fallback: convert to string (rare)
transcript = str(hyp)
print("📄 Transcript:", transcript)
# -----------------------------------------------------------
# Save JSON format compatible with V2D pipeline
# -----------------------------------------------------------
result = {
"id": output_path.split("/")[-1].replace(".json", ""),
"tool": "nemo_parakeet",
"status": "completed",
"text": transcript,
"words": [] # Parakeet XS doesnt return word timestamps
}
with open(output_path, "w", encoding="utf-8") as f:
json.dump(result, f, indent=2, ensure_ascii=False)
print("✔ JSON saved at:", output_path)
@@ -1,4 +1,4 @@
const API_KEY = process.env.ASSEMBLYAI_API_KEY;
let API_KEY
const BASE_URL = 'https://api.assemblyai.com/v2';
//---------------------------------------------------Upload audio---------------------------------------------------
@@ -92,6 +92,7 @@ module.exports = {
audioformat: "mp3",
async function(audioFileName) {
API_KEY = process.env.ASSEMBLYAI_API_KEY;
return new Promise(async (resolve, reject) => {
try {
// audioFileName ist nur "datei.mp3"