mirror of
https://gitlab.rlp.net/proj-wise2526-video2document/video2document.git
synced 2026-06-15 18:01:52 +02:00
Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7e4cf93cae | |||
| fe86fa1a2f | |||
| 1e4bde93b1 | |||
| c62ee5aa41 | |||
| 9c156a7df3 | |||
| 797898fb8b | |||
| f81c9bc4e8 | |||
| 7b630646cc | |||
| b57972bf1b | |||
| a96a3bebd1 | |||
| 8c0130337e | |||
| 4a7aeec869 | |||
| 4f3b03f881 | |||
| 26ed7e3c58 | |||
| 853eb1a5eb | |||
| ee31d26116 | |||
| a13fea6734 | |||
| e82cf779da | |||
| 283b4ed6af |
Binary file not shown.
@@ -160,6 +160,7 @@ electron.ipcMain.on("file_submit", async (event, args) => {
|
||||
mainWindow.webContents.send("progress", {curstep:curstep, totalsteps:totalsteps})
|
||||
}).catch(err => {
|
||||
mainWindow.webContents.send("error", err)
|
||||
console.log(err);
|
||||
return
|
||||
})
|
||||
|
||||
@@ -174,6 +175,7 @@ electron.ipcMain.on("file_submit", async (event, args) => {
|
||||
mainWindow.webContents.send("progress", {curstep:curstep, totalsteps:totalsteps})
|
||||
}).catch(err => {
|
||||
mainWindow.webContents.send("error", err)
|
||||
console.log(err);
|
||||
return
|
||||
})
|
||||
|
||||
@@ -188,6 +190,7 @@ electron.ipcMain.on("file_submit", async (event, args) => {
|
||||
mainWindow.webContents.send("progress", {curstep:curstep, totalsteps:totalsteps})
|
||||
}).catch(err => {
|
||||
mainWindow.webContents.send("error", err)
|
||||
console.log(err);
|
||||
return
|
||||
})
|
||||
|
||||
@@ -204,6 +207,7 @@ electron.ipcMain.on("file_submit", async (event, args) => {
|
||||
mainWindow.webContents.send("progress", {curstep:curstep, totalsteps:totalsteps})
|
||||
}).catch(err => {
|
||||
mainWindow.webContents.send("error", err)
|
||||
console.log(err);
|
||||
return
|
||||
})
|
||||
|
||||
@@ -212,6 +216,7 @@ electron.ipcMain.on("file_submit", async (event, args) => {
|
||||
mainWindow.webContents.send("speakerAudios", resp)
|
||||
}).catch(err => {
|
||||
mainWindow.webContents.send("error", err)
|
||||
console.log(err);
|
||||
return
|
||||
})
|
||||
} catch (error) {
|
||||
|
||||
@@ -30,24 +30,27 @@ async function showSaveDialog(defaultName, format) {
|
||||
throw err;
|
||||
}
|
||||
} else if (platform === 'win32') {
|
||||
// Windows
|
||||
const safeName = decodeURIComponent(defaultName);
|
||||
|
||||
const powershell = `
|
||||
Add-Type -AssemblyName System.Windows.Forms
|
||||
$dialog = New-Object System.Windows.Forms.SaveFileDialog
|
||||
$dialog.FileName = "${defaultName}.${format}"
|
||||
$dialog.Filter = "${format.toUpperCase()} Dateien (*.${format})|*.${format}|Alle Dateien (*.*)|*.*"
|
||||
$dialog.Title = "Dokument speichern als"
|
||||
$result = $dialog.ShowDialog()
|
||||
Add-Type -AssemblyName System.Windows.Forms;
|
||||
$dialog = New-Object System.Windows.Forms.SaveFileDialog;
|
||||
$dialog.FileName = '${safeName}.${format}';
|
||||
$dialog.Filter = '${format.toUpperCase()} Dateien (*.${format})|*.${format}|Alle Dateien (*.*)|*.*';
|
||||
$dialog.Title = 'Dokument speichern als';
|
||||
$result = $dialog.ShowDialog();
|
||||
if ($result -eq 'OK') { $dialog.FileName }
|
||||
`;
|
||||
|
||||
try {
|
||||
const result = execSync(`powershell -Command "${powershell.replace(/\n/g, '; ')}"`, {
|
||||
encoding: 'utf8'
|
||||
});
|
||||
const result = execSync(
|
||||
`powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, ' ')}"`,
|
||||
{ encoding: 'utf8' }
|
||||
);
|
||||
return result.trim() || null;
|
||||
} catch (err) {
|
||||
throw err;
|
||||
if (err.status === 1) return null; // User cancelled
|
||||
throw new Error("Save dialog failed: " + err.message);
|
||||
}
|
||||
} else {
|
||||
// Linux - zenity oder kdialog
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
// -----------------------------------------------------------
|
||||
// Parakeet (Step 3A: spawn Python minimal integration)
|
||||
// -----------------------------------------------------------
|
||||
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { spawn } = require("child_process");
|
||||
|
||||
module.exports = {
|
||||
name: "parakeet",
|
||||
type: "transcription",
|
||||
displayname: "NVIDIA Parakeet",
|
||||
|
||||
async function(audioFilePath) {
|
||||
console.log("🦜 [Parakeet] Starting test integration (spawn only)...");
|
||||
console.log("🦜 Input audio:", audioFilePath);
|
||||
|
||||
// Check audio exists
|
||||
if (!fs.existsSync(audioFilePath)) {
|
||||
throw new Error("Audio file does not exist: " + audioFilePath);
|
||||
}
|
||||
|
||||
// Output path in storage/transcripts
|
||||
const sessionId = path.basename(audioFilePath).replace(/\.[^.]+$/, "");
|
||||
const outputDir = path.join(__dirname, "../../../storage/transcripts");
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
|
||||
const outputPath = path.join(outputDir, `${sessionId}.json`);
|
||||
|
||||
// -------------------------------------------------------
|
||||
// SPAWN PYTHON SCRIPT (step 3A — dummy script)
|
||||
// -------------------------------------------------------
|
||||
return new Promise((resolve, reject) => {
|
||||
const python310 = "C:\\Users\\smith\\AppData\\Local\\Programs\\Python\\Python310\\python.exe";
|
||||
const py = spawn(python310, [
|
||||
path.join(__dirname, "parakeet_transcribe.py"),
|
||||
audioFilePath,
|
||||
outputPath
|
||||
]);
|
||||
|
||||
py.stdout.on("data", data => console.log("🦜 [Python]", data.toString().trim()));
|
||||
py.stderr.on("data", data => console.error("🦜 [Python ERR]", data.toString().trim()));
|
||||
|
||||
py.on("close", code => {
|
||||
if (code === 0) {
|
||||
console.log("🦜 [Parakeet] Done (spawn test). Output:", outputPath);
|
||||
resolve(outputPath);
|
||||
} else {
|
||||
reject(new Error("Python script failed with exit code " + code));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,71 @@
|
||||
# -----------------------------------------------------------
|
||||
# Parakeet Real Transcriber (NVIDIA NeMo + PyTorch GPU)
|
||||
# -----------------------------------------------------------
|
||||
|
||||
import sys
|
||||
import json
|
||||
import soundfile as sf
|
||||
import torch
|
||||
from nemo.collections.asr.models import ASRModel
|
||||
|
||||
# Args:
|
||||
# sys.argv[1] = input audio path
|
||||
# sys.argv[2] = output JSON path
|
||||
|
||||
audio_path = sys.argv[1]
|
||||
output_path = sys.argv[2]
|
||||
|
||||
print("🔥 Starting Parakeet model...")
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
print("🔥 Using device:", device)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# Load Parakeet model (NVIDIA pretrained ASR)
|
||||
# -----------------------------------------------------------
|
||||
model = ASRModel.from_pretrained(model_name="nvidia/parakeet-ctc-0.6b")
|
||||
model = model.to(device)
|
||||
model.eval()
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# Load audio
|
||||
# -----------------------------------------------------------
|
||||
print("🎧 Loading audio:", audio_path)
|
||||
audio, sr = sf.read(audio_path)
|
||||
|
||||
# model expects mono float32
|
||||
if len(audio.shape) > 1:
|
||||
audio = audio.mean(axis=1)
|
||||
|
||||
audio = audio.astype("float32")
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# Run inference
|
||||
# -----------------------------------------------------------
|
||||
print("🧠 Running inference...")
|
||||
with torch.no_grad():
|
||||
hyp = model.transcribe([audio])[0]
|
||||
|
||||
# Extract only the text
|
||||
if hasattr(hyp, "text"):
|
||||
transcript = hyp.text
|
||||
else:
|
||||
# fallback: convert to string (rare)
|
||||
transcript = str(hyp)
|
||||
|
||||
print("📄 Transcript:", transcript)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# Save JSON format compatible with V2D pipeline
|
||||
# -----------------------------------------------------------
|
||||
result = {
|
||||
"id": output_path.split("/")[-1].replace(".json", ""),
|
||||
"tool": "nemo_parakeet",
|
||||
"status": "completed",
|
||||
"text": transcript,
|
||||
"words": [] # Parakeet XS doesn’t return word timestamps
|
||||
}
|
||||
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print("✔ JSON saved at:", output_path)
|
||||
Reference in New Issue
Block a user