mirror of
https://gitlab.rlp.net/proj-wise2526-video2document/video2document.git
synced 2026-06-15 18:01:52 +02:00
Merge branch 'develop' into 'feature/ui-test'
# Conflicts: # main.js
This commit is contained in:
@@ -8,7 +8,7 @@ if (!fs.existsSync(outputDir)) {
|
||||
}
|
||||
|
||||
// Ensure SAIA API key is set in environment variables: export SAIA_API_KEY="your_api_key_here"
|
||||
const SAIA_API_KEY = process.env.SAIA_API_KEY; // Ensure SAIA API key is set in environment variables
|
||||
let SAIA_API_KEY // Ensure SAIA API key is set in environment variables
|
||||
const SAIA_URL = "https://chat-ai.academiccloud.de/v1/chat/completions"; // URL for the REST call, used model and action
|
||||
|
||||
const module_exports = {
|
||||
@@ -18,6 +18,7 @@ const module_exports = {
|
||||
description: "Generates documents using OpenAI GPT OSS 120B via SAIA platform",
|
||||
|
||||
async function(parameter) {
|
||||
SAIA_API_KEY = process.env.SAIA_API_KEY;
|
||||
return new Promise(async (resolve, reject) => {
|
||||
try {
|
||||
// console.log("SAIA OpenAI GPT module invoked with parameters:", parameter);
|
||||
|
||||
@@ -8,7 +8,7 @@ if (!fs.existsSync(outputDir)) {
|
||||
}
|
||||
|
||||
// Ensure Google API key is set in environment variables: export GOOGLE_API_KEY="your_api_key_here"
|
||||
const GEMINI_API_KEY = process.env.GOOGLE_API_KEY; // Ensure Google API key is set in environment variables: export GOOGLE_API_KEY="your_api_key_here"
|
||||
let GEMINI_API_KEY; // Ensure Google API key is set in environment variables: export GOOGLE_API_KEY="your_api_key_here"
|
||||
const GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent"; // URL for the REST call, used model and action
|
||||
|
||||
const module_exports = {
|
||||
@@ -18,6 +18,7 @@ const module_exports = {
|
||||
description: "Generates documents using Google Gemini LLM",
|
||||
|
||||
async function(parameter) {
|
||||
GEMINI_API_KEY = process.env.GOOGLE_API_KEY;
|
||||
return new Promise(async (resolve, reject) => {
|
||||
try {
|
||||
// console.log("Gemini LLM module invoked with parameters:", parameter);
|
||||
|
||||
@@ -8,7 +8,7 @@ if (!fs.existsSync(outputDir)) {
|
||||
}
|
||||
|
||||
// Ensure SAIA API key is set in environment variables: export SAIA_API_KEY="your_api_key_here"
|
||||
const SAIA_API_KEY = process.env.SAIA_API_KEY;
|
||||
let SAIA_API_KEY;
|
||||
const SAIA_URL = "https://chat-ai.academiccloud.de/v1/chat/completions"; // URL for the REST call, used model and action
|
||||
|
||||
const module_exports = {
|
||||
@@ -18,6 +18,7 @@ const module_exports = {
|
||||
description: "Generates documents using QWEN 3 235B via SAIA platform",
|
||||
|
||||
async function(parameter) {
|
||||
SAIA_API_KEY = process.env.SAIA_API_KEY;
|
||||
return new Promise(async (resolve, reject) => {
|
||||
try {
|
||||
// console.log("SAIA QWEN 3 235B module invoked with parameters:", parameter);
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const module_exports = {
|
||||
name: "replace_speaker",
|
||||
type: "processor",
|
||||
displayname: "Speaker Name Replacer",
|
||||
description: "Replaces speaker placeholder names with actual names based on a mapping in HTML files",
|
||||
|
||||
async function(speakerMapping) {
|
||||
// Relativ von dieser Datei aus
|
||||
const documentsDir = path.resolve(__dirname, '../../../storage/documents');
|
||||
const inputHtmlPath = await this.getNewestFile(documentsDir, '.html');
|
||||
|
||||
if (!inputHtmlPath) {
|
||||
throw new Error(`No HTML files found in ${documentsDir}`);
|
||||
}
|
||||
|
||||
return await this.replaceNames(inputHtmlPath, speakerMapping);
|
||||
},
|
||||
|
||||
getNewestFile: async function(dirPath, extension) {
|
||||
try {
|
||||
const files = await fs.promises.readdir(dirPath);
|
||||
const filtered = files.filter(f => f.endsWith(extension));
|
||||
|
||||
if (filtered.length === 0) return null;
|
||||
|
||||
const filesWithStats = await Promise.all(
|
||||
filtered.map(async (f) => {
|
||||
const fullPath = path.join(dirPath, f);
|
||||
const stats = await fs.promises.stat(fullPath);
|
||||
return { path: fullPath, time: stats.mtimeMs };
|
||||
})
|
||||
);
|
||||
|
||||
return filesWithStats.reduce((newest, curr) =>
|
||||
curr.time > newest.time ? curr : newest
|
||||
).path;
|
||||
} catch (error) {
|
||||
console.error("Error reading directory:", error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
replaceNames: async function(inputHtmlPath, speakerMapping) {
|
||||
try {
|
||||
const htmlContent = await fs.promises.readFile(inputHtmlPath, "utf-8");
|
||||
|
||||
let outputContent = htmlContent;
|
||||
Object.entries(speakerMapping).forEach(([placeholder, value]) => {
|
||||
// Extract name if value is an object
|
||||
const displayName = typeof value === 'string' ? value : value.name;
|
||||
|
||||
// Normalize placeholder for matching (remove case sensitivity)
|
||||
const normalizedPlaceholder = placeholder.toLowerCase();
|
||||
|
||||
// Replace all variations: speakerA, SpeakerA, SPEAKERA, speaker_a, Speaker A, etc.
|
||||
// Matches with optional spaces, underscores, and parentheses
|
||||
const regex = new RegExp(
|
||||
`\\b[Ss]peaker\\s*[_-]?\\s*${placeholder.charAt(placeholder.length - 1)}\\b|\\b${placeholder}\\b`,
|
||||
'gi'
|
||||
);
|
||||
|
||||
outputContent = outputContent.replace(regex, displayName);
|
||||
});
|
||||
|
||||
await fs.promises.writeFile(inputHtmlPath, outputContent, "utf-8");
|
||||
|
||||
return inputHtmlPath;
|
||||
|
||||
} catch (error) {
|
||||
console.error("Error replacing speaker names:", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = module_exports;
|
||||
@@ -0,0 +1,54 @@
|
||||
// -----------------------------------------------------------
|
||||
// Parakeet (Step 3A: spawn Python minimal integration)
|
||||
// -----------------------------------------------------------
|
||||
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { spawn } = require("child_process");
|
||||
|
||||
module.exports = {
|
||||
name: "parakeet",
|
||||
type: "transcription",
|
||||
displayname: "NVIDIA Parakeet",
|
||||
|
||||
async function(audioFilePath) {
|
||||
console.log("🦜 [Parakeet] Starting test integration (spawn only)...");
|
||||
console.log("🦜 Input audio:", audioFilePath);
|
||||
|
||||
// Check audio exists
|
||||
if (!fs.existsSync(audioFilePath)) {
|
||||
throw new Error("Audio file does not exist: " + audioFilePath);
|
||||
}
|
||||
|
||||
// Output path in storage/transcripts
|
||||
const sessionId = path.basename(audioFilePath).replace(/\.[^.]+$/, "");
|
||||
const outputDir = path.join(__dirname, "../../../storage/transcripts");
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
|
||||
const outputPath = path.join(outputDir, `${sessionId}.json`);
|
||||
|
||||
// -------------------------------------------------------
|
||||
// SPAWN PYTHON SCRIPT (step 3A — dummy script)
|
||||
// -------------------------------------------------------
|
||||
return new Promise((resolve, reject) => {
|
||||
const python310 = "C:\\Users\\smith\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
|
||||
const py = spawn(python310, [
|
||||
path.join(__dirname, "parakeet_transcribe.py"),
|
||||
audioFilePath,
|
||||
outputPath
|
||||
]);
|
||||
|
||||
py.stdout.on("data", data => console.log("🦜 [Python]", data.toString().trim()));
|
||||
py.stderr.on("data", data => console.error("🦜 [Python ERR]", data.toString().trim()));
|
||||
|
||||
py.on("close", code => {
|
||||
if (code === 0) {
|
||||
console.log("🦜 [Parakeet] Done (spawn test). Output:", outputPath);
|
||||
resolve(outputPath);
|
||||
} else {
|
||||
reject(new Error("Python script failed with exit code " + code));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,71 @@
|
||||
# -----------------------------------------------------------
|
||||
# Parakeet Real Transcriber (NVIDIA NeMo + PyTorch GPU)
|
||||
# -----------------------------------------------------------
|
||||
|
||||
import sys
|
||||
import json
|
||||
import soundfile as sf
|
||||
import torch
|
||||
from nemo.collections.asr.models import ASRModel
|
||||
|
||||
# Args:
|
||||
# sys.argv[1] = input audio path
|
||||
# sys.argv[2] = output JSON path
|
||||
|
||||
audio_path = sys.argv[1]
|
||||
output_path = sys.argv[2]
|
||||
|
||||
print("🔥 Starting Parakeet model...")
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
print("🔥 Using device:", device)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# Load Parakeet model (NVIDIA pretrained ASR)
|
||||
# -----------------------------------------------------------
|
||||
model = ASRModel.from_pretrained(model_name="nvidia/parakeet-ctc-0.6b")
|
||||
model = model.to(device)
|
||||
model.eval()
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# Load audio
|
||||
# -----------------------------------------------------------
|
||||
print("🎧 Loading audio:", audio_path)
|
||||
audio, sr = sf.read(audio_path)
|
||||
|
||||
# model expects mono float32
|
||||
if len(audio.shape) > 1:
|
||||
audio = audio.mean(axis=1)
|
||||
|
||||
audio = audio.astype("float32")
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# Run inference
|
||||
# -----------------------------------------------------------
|
||||
print("🧠 Running inference...")
|
||||
with torch.no_grad():
|
||||
hyp = model.transcribe([audio])[0]
|
||||
|
||||
# Extract only the text
|
||||
if hasattr(hyp, "text"):
|
||||
transcript = hyp.text
|
||||
else:
|
||||
# fallback: convert to string (rare)
|
||||
transcript = str(hyp)
|
||||
|
||||
print("📄 Transcript:", transcript)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# Save JSON format compatible with V2D pipeline
|
||||
# -----------------------------------------------------------
|
||||
result = {
|
||||
"id": output_path.split("/")[-1].replace(".json", ""),
|
||||
"tool": "nemo_parakeet",
|
||||
"status": "completed",
|
||||
"text": transcript,
|
||||
"words": [] # Parakeet XS doesn’t return word timestamps
|
||||
}
|
||||
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print("✔ JSON saved at:", output_path)
|
||||
@@ -1,4 +1,4 @@
|
||||
const API_KEY = process.env.ASSEMBLYAI_API_KEY;
|
||||
let API_KEY
|
||||
const BASE_URL = 'https://api.assemblyai.com/v2';
|
||||
|
||||
//---------------------------------------------------Upload audio---------------------------------------------------
|
||||
@@ -92,6 +92,7 @@ module.exports = {
|
||||
audioformat: "mp3",
|
||||
|
||||
async function(audioFileName) {
|
||||
API_KEY = process.env.ASSEMBLYAI_API_KEY;
|
||||
return new Promise(async (resolve, reject) => {
|
||||
try {
|
||||
// audioFileName ist nur "datei.mp3"
|
||||
|
||||
Reference in New Issue
Block a user