From 0f548b301230c52a27a774793eb3eea7dc5a2f08 Mon Sep 17 00:00:00 2001 From: MikeHughes-BIN Date: Sun, 18 Jan 2026 19:05:55 +0100 Subject: [PATCH 1/3] Add function to send speaker packages and handle errors --- electron/main/script.js | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/electron/main/script.js b/electron/main/script.js index e9e661a..f4d2794 100644 --- a/electron/main/script.js +++ b/electron/main/script.js @@ -499,4 +499,14 @@ function reloadDocuments() { existingDocs.appendChild(option); }); }); -} \ No newline at end of file +} + +function sendSpeakerPackages() { + try { + window.submitSpeaker.speaker_submit(speakerAudios); + } catch (error) { + console.log(error); + } +} + +window.sendSpeakerPackages = sendSpeakerPackages; \ No newline at end of file From 8f2024df1432f41ade821f4108c11d92dec830c1 Mon Sep 17 00:00:00 2001 From: MikeHughes-BIN Date: Sun, 18 Jan 2026 19:12:04 +0100 Subject: [PATCH 2/3] Refactor HTML structure for improved readability and maintainability --- electron/main/index.html | 573 ++++++++++++++++++++++----------------- 1 file changed, 329 insertions(+), 244 deletions(-) diff --git a/electron/main/index.html b/electron/main/index.html index 0d9587f..093e9fa 100644 --- a/electron/main/index.html +++ b/electron/main/index.html @@ -1,255 +1,340 @@ - + + + + + Video to document + + + - - - - Video to document - - - + +
+
+
-

Video to document

+

Video to document

-
- -
- -
- -
-
1. Step
-
2. Step
-
3. Step
-
4. Step
-
5. Step
-
6. Step
-
- -
- - - -
- - - - - -
-

Upload your video here:

-
-

Drag and drop video file

- -
No video chosen
-
- -
- - -
-
- - - - - - - - - - - - - - - - - - +
+
1. Step
+
2. Step
+
3. Step
+
4. Step
+
5. Step
+
6. Step
+
-
- - - - - +
+ - \ No newline at end of file + +
+ + + + +
+

Upload your video here:

+
+

Drag and drop video file

+ +
No video chosen
+
+ +
+ + +
+
+ + + + + + + + + + + + + + + + + +
+ + +
+ + + + + + From b6276b005e1d03b01f208ee4322f0da0d12911f5 Mon Sep 17 00:00:00 2001 From: MikeHughes-BIN Date: Sun, 18 Jan 2026 19:13:18 +0100 Subject: [PATCH 3/3] Removed Parakeet again as it requires PyTorch which is too large for this project --- .../modules/transcription-local/parakeet.js | 54 -------------- .../parakeet_transcribe.py | 71 ------------------- 2 files changed, 125 deletions(-) delete mode 100644 services/modules/transcription-local/parakeet.js delete mode 100644 services/modules/transcription-local/parakeet_transcribe.py diff --git a/services/modules/transcription-local/parakeet.js b/services/modules/transcription-local/parakeet.js deleted file mode 100644 index 10c95e0..0000000 --- a/services/modules/transcription-local/parakeet.js +++ /dev/null @@ -1,54 +0,0 @@ -// ----------------------------------------------------------- -// Parakeet (Step 3A: spawn Python minimal integration) -// ----------------------------------------------------------- - -const fs = require("fs"); -const path = require("path"); -const { spawn } = require("child_process"); - -module.exports = { - name: "parakeet", - type: "transcription", - displayname: "NVIDIA Parakeet", - - async function(audioFilePath) { - console.log("🦜 [Parakeet] Starting test integration (spawn only)..."); - console.log("🦜 Input audio:", audioFilePath); - - // Check audio exists - if (!fs.existsSync(audioFilePath)) { - throw new Error("Audio file does not exist: " + audioFilePath); - } - - // Output path in storage/transcripts - const sessionId = path.basename(audioFilePath).replace(/\.[^.]+$/, ""); - const outputDir = path.join(__dirname, "../../../storage/transcripts"); - fs.mkdirSync(outputDir, { recursive: true }); - - const outputPath = path.join(outputDir, `${sessionId}.json`); - - // ------------------------------------------------------- - // SPAWN PYTHON SCRIPT (step 3A — dummy script) - // ------------------------------------------------------- - return new Promise((resolve, reject) => { - const python310 = "C:\\Users\\smith\\AppData\\Local\\Programs\\Python\\Python310\\python.exe"; - const py = spawn(python310, [ - path.join(__dirname, "parakeet_transcribe.py"), - audioFilePath, - outputPath - ]); - - py.stdout.on("data", data => console.log("🦜 [Python]", data.toString().trim())); - py.stderr.on("data", data => console.error("🦜 [Python ERR]", data.toString().trim())); - - py.on("close", code => { - if (code === 0) { - console.log("🦜 [Parakeet] Done (spawn test). Output:", outputPath); - resolve(outputPath); - } else { - reject(new Error("Python script failed with exit code " + code)); - } - }); - }); - } -}; diff --git a/services/modules/transcription-local/parakeet_transcribe.py b/services/modules/transcription-local/parakeet_transcribe.py deleted file mode 100644 index 1272e46..0000000 --- a/services/modules/transcription-local/parakeet_transcribe.py +++ /dev/null @@ -1,71 +0,0 @@ -# ----------------------------------------------------------- -# Parakeet Real Transcriber (NVIDIA NeMo + PyTorch GPU) -# ----------------------------------------------------------- - -import sys -import json -import soundfile as sf -import torch -from nemo.collections.asr.models import ASRModel - -# Args: -# sys.argv[1] = input audio path -# sys.argv[2] = output JSON path - -audio_path = sys.argv[1] -output_path = sys.argv[2] - -print("🔥 Starting Parakeet model...") -device = "cuda" if torch.cuda.is_available() else "cpu" -print("🔥 Using device:", device) - -# ----------------------------------------------------------- -# Load Parakeet model (NVIDIA pretrained ASR) -# ----------------------------------------------------------- -model = ASRModel.from_pretrained(model_name="nvidia/parakeet-ctc-0.6b") -model = model.to(device) -model.eval() - -# ----------------------------------------------------------- -# Load audio -# ----------------------------------------------------------- -print("🎧 Loading audio:", audio_path) -audio, sr = sf.read(audio_path) - -# model expects mono float32 -if len(audio.shape) > 1: - audio = audio.mean(axis=1) - -audio = audio.astype("float32") - -# ----------------------------------------------------------- -# Run inference -# ----------------------------------------------------------- -print("🧠 Running inference...") -with torch.no_grad(): - hyp = model.transcribe([audio])[0] - -# Extract only the text -if hasattr(hyp, "text"): - transcript = hyp.text -else: - # fallback: convert to string (rare) - transcript = str(hyp) - -print("📄 Transcript:", transcript) - -# ----------------------------------------------------------- -# Save JSON format compatible with V2D pipeline -# ----------------------------------------------------------- -result = { - "id": output_path.split("/")[-1].replace(".json", ""), - "tool": "nemo_parakeet", - "status": "completed", - "text": transcript, - "words": [] # Parakeet XS doesn’t return word timestamps -} - -with open(output_path, "w", encoding="utf-8") as f: - json.dump(result, f, indent=2, ensure_ascii=False) - -print("✔ JSON saved at:", output_path)