Compare commits

...

15 Commits

Author SHA1 Message Date
santa 2f5efee9c7 Summarizer und weiters angepasst 2025-11-24 14:27:49 +01:00
santa 465fe8bd41 Summarizer angepasst 2025-11-24 14:25:29 +01:00
santa b87bfd444d Test erstellt 2025-11-20 13:27:15 +01:00
santa 97b571b7f9 Einbindung des Summarizer 2025-11-20 11:51:34 +01:00
santa 455147a41b Summarizer Tool erstellt zur zusammenfassung der vom TranskriptionTool kommenden json 2025-11-20 10:15:11 +01:00
santa 9441699561 if Pruefung fuer Datei Endung gefixt 2025-11-16 14:46:19 +01:00
Spanier, Pit 2edc7f8351 Merge branch 'fix/transcription-module-fix' into 'develop'
New Folder structure

See merge request proj-wise2526-video2document/video2document!21
2025-11-15 15:11:35 +01:00
MikeHughes-BIN 6083773f88 New Folder structure 2025-11-15 14:45:13 +01:00
Spanier, Pit 444d408480 Merge branch 'feature/fixing-the-program' into 'develop'
fixed the program by moving the example module back to where the program can ACTUALLY load it

See merge request proj-wise2526-video2document/video2document!20
2025-11-14 14:30:16 +01:00
emily d9eacafc3a fixed the program by moving the example module back to where the program can ACTUALLY load it 2025-11-14 14:28:11 +01:00
Hughes, Mike ab737f0dc9 Merge branch 'feature/12-externe-transkription-s2-02b' into 'develop'
feat(S2-02b): Implement AssemblyAI external transcription with speaker diarization

See merge request proj-wise2526-video2document/video2document!18
2025-11-13 17:38:39 +01:00
MikeHughes-BIN 79e0c48755 Reduced Number of test paths to avoid redundancy 2025-11-13 17:35:40 +01:00
MikeHughes-BIN 9254ddc57f Changed the Folder Structure for better maintainability 2025-11-13 17:34:22 +01:00
Azeufack Noupeu Willy c021272ca4 merge: Resolve conflicts with develop 2025-11-13 15:22:33 +01:00
Azeufack Noupeu Willy e7e97a7f60 feat(S2-02b): Implement AssemblyAI external transcription with speaker diarization
- Add assembly.ts module for REST API transcription via AssemblyAI
- Implement 5-step pipeline: upload → create job → poll status → download → save
- Enable speaker_labels for diarization (Speaker A, B, C...)
- Add millisecond-precision timestamps for each utterance
- Store JSON transcripts in storage/transcripts/{session_id}.json
- Add axios, dotenv dependencies
- Add transcribeLatest.ts helper for quick testing

User Story: S2-02b - Externe Transkription per REST API
2025-11-13 13:07:18 +01:00
26 changed files with 13030 additions and 104 deletions
+3
View File
@@ -12,6 +12,9 @@ try {
contextBridge.exposeInMainWorld("electronAPI", {
getFilePath: (file) => {return webUtils.getPathForFile(file)}
})
contextBridge.exposeInMainWorld("summarizer", {
runFile: (file) => ipcRenderer.send("summarize-transcription", file)
});
} catch (error) {
console.log("Error in preload.js");
}
+9 -3
View File
@@ -16,15 +16,21 @@ uploadContainer.addEventListener("drop", (e) => {
e.preventDefault()
const files = e.dataTransfer.files
const filePath = window.explorer.onFileDrop(files[0])
var holdy = filePath + "";
if(holdy.endsWith(".mp4") || holdy.endsWith(".mov") || holdy.endsWith(".avi") || holdy.endsWith( ".mkv")){
console.log(filePath)
var holdy = String(filePath);
const lower = holdy.toLowerCase();
const validExt = [".mp4", ".mov", ".avi", ".mkv"];
if(validExt.some(ext => lower.endsWith(ext))){
console.log(filePath);
const files1 = e.dataTransfer.files;
handleFiles(files1);
}else{
console.log('Video format invalid!');
}
} catch (error) {
console.log("Error in renderer.js with the listerner for the drop function");
console.log(error);
}
+49 -43
View File
@@ -6,7 +6,7 @@ manualUploadBtn.addEventListener('click', () => {
} catch (error) {
console.log("Error in manualBtn EventListener click");
}
});
//function to check if one checkbox is at least klicked
@@ -15,18 +15,24 @@ function checkBoxes() {
const checkboxes = document.querySelectorAll('input[name="docFormat"]');
let isChecked = false;
checkboxes.forEach(function(checkbox){
if(checkbox.checked){
checkboxes.forEach(function (checkbox) {
if (checkbox.checked) {
isChecked = true;
}
});
if(isChecked){
if (isChecked) {
//Code to submit the video
var pathTest = window.electronAPI.getFilePath(videoUpload.files[0]);
if(pathTest.endsWith(".mp4") || holdy.endsWith(".mov") || holdy.endsWith(".avi") || holdy.endsWith( ".mkv")){
window.extractor.extract({inputVideoPath: pathTest, outputType:"wav"})
const lower = pathTest.toLowerCase();
const validExt = [".mp4", ".mov", ".avi", ".mkv"];
if(validExt.some(ext => lower.endsWith(ext))){
window.extractor.extract({ inputVideoPath: pathTest, outputType: "wav" });
}
} else {
//language only english at the moment
alert('Please select at least one document type.');
@@ -34,43 +40,43 @@ function checkBoxes() {
} catch (error) {
console.log(error)
}
// mapFunctions.get("extraction-video-to-audio").function({inputVideoPath:"./a.mp4", outputType:"wav"})
// mapFunctions.get("extraction-video-to-audio").function({inputVideoPath:"./a.mp4", outputType:"wav"})
}
//language changing feature
function changeLanguage(language) {
if (language === 'en') {
document.getElementById('title').textContent = 'Video to document';
document.getElementById('h1').textContent = 'Video to document';
document.getElementById('p1').textContent = 'Drag and drop video file';
document.getElementById('fileName').textContent = 'No video chosen';
document.getElementById('manualUploadBtn').textContent = 'Search video';
document.getElementById('checkbox_group').textContent = 'Choose prefered document style:';
document.getElementById('label_format').textContent = 'Meeting report';
document.getElementById('label_summary').textContent = 'Summary with timestamps';
document.getElementById('submitButton').textContent = 'Submit';
} else if (language === 'de') {
document.getElementById('title').textContent = 'Video zu Dokument';
document.getElementById('h1').textContent = 'Video zu Dokument';
document.getElementById('p1').textContent = 'Video per Drag & Drop ablegen';
document.getElementById('fileName').textContent = 'Kein Video ausgewaehlt';
document.getElementById('manualUploadBtn').textContent = 'Video suchen';
document.getElementById('checkbox_group').textContent = 'Bevorzugte Dokumentvarianten:';
document.getElementById('label_format').textContent = 'Meeting Bericht';
document.getElementById('label_summary').textContent = 'Zusammenfassung mit Zeitstempeln';
document.getElementById('submitButton').textContent = 'Absenden';
} else if(language == "in") {
document.getElementById('title').textContent = 'दस्तावेज़ के लिए वीडियो';
document.getElementById('h1').textContent = 'दस्तावेज़ के लिए वीडियो';
document.getElementById('p1').textContent = 'वीडियो फ़ाइल खींचें और छोड़ें';
document.getElementById('fileName').textContent = 'कोई वीडियो नहीं चुना गया';
document.getElementById('manualUploadBtn').textContent = 'वीडियो खोजें';
document.getElementById('checkbox_group').textContent = 'पसंदीदा दस्तावेज़ शैली चुनें:';
document.getElementById('label_format').textContent = 'बैठक रिपोर्ट';
document.getElementById('label_summary').textContent = 'टाइमस्टैम्प के साथ सारांश';
document.getElementById('submitButton').textContent = 'जमा करना';
}
if (language === 'en') {
document.getElementById('title').textContent = 'Video to document';
document.getElementById('h1').textContent = 'Video to document';
document.getElementById('p1').textContent = 'Drag and drop video file';
document.getElementById('fileName').textContent = 'No video chosen';
document.getElementById('manualUploadBtn').textContent = 'Search video';
document.getElementById('checkbox_group').textContent = 'Choose prefered document style:';
document.getElementById('label_format').textContent = 'Meeting report';
document.getElementById('label_summary').textContent = 'Summary with timestamps';
document.getElementById('submitButton').textContent = 'Submit';
} else if (language === 'de') {
document.getElementById('title').textContent = 'Video zu Dokument';
document.getElementById('h1').textContent = 'Video zu Dokument';
document.getElementById('p1').textContent = 'Video per Drag & Drop ablegen';
document.getElementById('fileName').textContent = 'Kein Video ausgewaehlt';
document.getElementById('manualUploadBtn').textContent = 'Video suchen';
document.getElementById('checkbox_group').textContent = 'Bevorzugte Dokumentvarianten:';
document.getElementById('label_format').textContent = 'Meeting Bericht';
document.getElementById('label_summary').textContent = 'Zusammenfassung mit Zeitstempeln';
document.getElementById('submitButton').textContent = 'Absenden';
} else if (language == "in") {
document.getElementById('title').textContent = 'दस्तावेज़ के लिए वीडियो';
document.getElementById('h1').textContent = 'दस्तावेज़ के लिए वीडियो';
document.getElementById('p1').textContent = 'वीडियो फ़ाइल खींचें और छोड़ें';
document.getElementById('fileName').textContent = 'कोई वीडियो नहीं चुना गया';
document.getElementById('manualUploadBtn').textContent = 'वीडियो खोजें';
document.getElementById('checkbox_group').textContent = 'पसंदीदा दस्तावेज़ शैली चुनें:';
document.getElementById('label_format').textContent = 'बैठक रिपोर्ट';
document.getElementById('label_summary').textContent = 'टाइमस्टैम्प के साथ सारांश';
document.getElementById('submitButton').textContent = 'जमा करना';
}
}
@@ -81,7 +87,7 @@ videoUpload.addEventListener('change', () => {
} catch (error) {
console.log("Error in manualBtn EventListener change");
}
});
@@ -99,11 +105,11 @@ function handleFiles(files) {
} catch (error) {
console.log("Error in script.js handleFiles function");
}
}
//function to regulate the progress on the progressbar
function updateProgressBar(bar, value){
function updateProgressBar(bar, value) {
try {
value = Math.round(value);
bar.querySelector(".progress_fill").style.width = `${value}%`;
@@ -111,5 +117,5 @@ function updateProgressBar(bar, value){
} catch (error) {
console.log("Error in scripts.js updateProgressBar function");
}
}
+6 -1
View File
@@ -76,4 +76,9 @@ electron.app.whenReady().then(createWindow);
electron.ipcMain.on("extract", (event, args) => {
mapFunctions.get("extraction-video-to-audio").function(args)
})
})
electron.ipcMain.on("summarize-transcription", (event, args) => {
mapFunctions.get("summarize-transcription").function(args);
});
+22
View File
@@ -9,7 +9,9 @@
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"@types/axios": "^0.9.36",
"cli-progress": "^3.12.0",
"dotenv": "^17.2.3",
"electron": "^39.1.1",
"express": "^5.1.0",
"ffmpeg-static": "^5.2.0",
@@ -149,6 +151,12 @@
"dev": true,
"license": "MIT"
},
"node_modules/@types/axios": {
"version": "0.9.36",
"resolved": "https://registry.npmjs.org/@types/axios/-/axios-0.9.36.tgz",
"integrity": "sha512-NLOpedx9o+rxo/X5ChbdiX6mS1atE4WHmEEIcR9NLenRVa5HoVjAvjafwU3FPTqnZEstpoqCaW7fagqSoTDNeg==",
"license": "MIT"
},
"node_modules/@types/cacheable-request": {
"version": "6.0.3",
"resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.3.tgz",
@@ -198,6 +206,7 @@
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.9.2.tgz",
"integrity": "sha512-uWN8YqxXxqFMX2RqGOrumsKeti4LlmIMIyV0lgut4jx7KQBcBiW6vkDtIBvHnHIquwNfJhk8v2OtmO8zXWHfPA==",
"license": "MIT",
"peer": true,
"dependencies": {
"undici-types": "~7.16.0"
}
@@ -584,6 +593,18 @@
"node": ">=0.3.1"
}
},
"node_modules/dotenv": {
"version": "17.2.3",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.2.3.tgz",
"integrity": "sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://dotenvx.com"
}
},
"node_modules/dunder-proto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
@@ -1824,6 +1845,7 @@
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
"dev": true,
"license": "Apache-2.0",
"peer": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
+2
View File
@@ -1,6 +1,8 @@
{
"dependencies": {
"@types/axios": "^0.9.36",
"cli-progress": "^3.12.0",
"dotenv": "^17.2.3",
"electron": "^39.1.1",
"express": "^5.1.0",
"ffmpeg-static": "^5.2.0",
-5
View File
@@ -1,5 +0,0 @@
npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/Kurzgesagt.mov
npx ts-node ./transcribe.ts ../storage/audio/Kurzgesagt.wav
npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/GitLabMeeting.mov
npx ts-node ./transcribe.ts ../storage/audio/GitLabMeeting.wav
-23
View File
@@ -1,23 +0,0 @@
#!/usr/bin/env ts-node
import { extractAudioFromVideo } from "../services/modules/extraction/ffmpegExtractor.ts";
const videoPath = process.argv[2];
if (!videoPath) {
console.error("Usage: ts-node extractAudio.ts <videoPath>");
process.exit(1);
}
(async () => {
try {
console.log(`Extracting audio from: ${videoPath}`);
await extractAudioFromVideo(videoPath); // Call the extraction function (ffmpegExtractor.ts in services/modules/extraction)
console.log("Audio extraction completed successfully.");
} catch (err) {
console.error("Audio extraction failed:", err);
process.exit(1);
}
})();
-18
View File
@@ -1,18 +0,0 @@
import { whisperLocal } from "../services/modules/transcription/local/whisperLocal.ts";
const audioPath = process.argv[2];
if (!audioPath) {
console.error("Please provide an audio file path as argument.");
process.exit(1);
}
const whisper = new whisperLocal();
(async () => {
try {
const text = await whisper.transcribe(audioPath);
console.log(text);
} catch (err) {
console.error("Transcription failed:", err);
}
})();
@@ -0,0 +1,138 @@
const fs = require("fs");
const path = require("path");
// Prepare output directory (always storage/transcriptionSummaries under project root)
const outputDir = `${__dirname}/../../../storage/transcriptionSummaries`;
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
//Speaker, ALL-Sentences, Start, End
module.exports = {
name: "summarize-transcription", // Unique name for our function that will later be used to get the function from the map via "mapFunctions.get("example").function()"
type: "summarizer", // value used to differentiate each module to order them in the UI
displayname: "Summarizer", // The displayname used within the UI
async function(args) {
let inputJson = args.json;
//JSON Path
if (args.jsonPath) {
try {
const raw = fs.readFileSync(args.jsonPath, "utf-8");
inputJson = JSON.parse(raw);
} catch (e) {
console.error("Failed to load JSON from file:", e);
return { error: "Could not read JSON from file path." };
}
}
// JSON parsen
if (typeof inputJson === "string") {
try {
inputJson = JSON.parse(inputJson);
} catch (e) {
console.log("Invalid JSON in summarize-transcription");
return { error: "Invalid JSON" };
}
}
const words = inputJson.words;
if (!Array.isArray(words)) {
return { error: "No words Array found" };
}
const ENDINGS = [".", "!", "?"]; // '...' auch als Satzende ?
const ABBREVIATIONS = new Set(["z.B.", "bzw.", "u.a.", "Dr.", "Mr.", "Mrs.", "Prof.", "etc."]); //TODO weitere Ergaenzen
const result = [];
let currentSentence = "";
let currentSpeaker = null;
let startTime = null;
let endTime = null;
for (const w of words) {
if (!currentSpeaker) currentSpeaker = w.speaker;
if (startTime === null) startTime = w.start;
endTime = w.end;
//speaker changing
if (currentSpeaker !== w.speaker && currentSentence) {
const lastEntry = result[result.length - 1];
if (lastEntry && lastEntry.speaker === currentSpeaker) {
lastEntry.sentence += " " + currentSentence;
lastEntry.end = endTime;
} else {
result.push({
speaker: currentSpeaker,
sentence: currentSentence,
start: startTime,
end: endTime
});
}
currentSentence = "";
startTime = w.start;
}
currentSpeaker = w.speaker;
currentSentence += (currentSentence ? " " : "") + w.text; //sentence beginning or not
const lastWord = w.text.trim();
const lastChar = lastWord.slice(-1);
const isAbbreviation = ABBREVIATIONS.has(lastWord);
//sentence ending
if (ENDINGS.includes(lastChar) && !isAbbreviation) {
const lastEntry = result[result.length - 1];
if (lastEntry && lastEntry.speaker === currentSpeaker) {
lastEntry.sentence += " " + currentSentence;
lastEntry.end = endTime;
} else {
result.push({
speaker: currentSpeaker,
sentence: currentSentence,
start: startTime,
end: endTime
});
}
currentSentence = "";
startTime = null;
endTime = null;
currentSpeaker = null;
}
}
// safe last sentence
if (currentSentence) {
const lastEntry = result[result.length - 1];
if (lastEntry && lastEntry.speaker === currentSpeaker) {
lastEntry.sentence += " " + currentSentence;
lastEntry.end = endTime;
} else {
result.push({
speaker: currentSpeaker,
sentence: currentSentence,
start: startTime,
end: endTime
});
}
}
// Output as Text
const output = result.map(r =>
`Sprecher ${r.speaker} [${r.start.toFixed(2)} - ${r.end.toFixed(2)}]: ${r.sentence}`
);
// Output on cosole
//console.log("\n------------\nMerged Transcription Result:\n", output, "\n------------\n");
try {
const jsonPath = path.join(outputDir, "transcription_result.json");
fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2), "utf-8");
const txtPath = path.join(outputDir, "transcription_result.txt");
fs.writeFileSync(txtPath, output.join("\n"), "utf-8");
console.log(`Summary successfully saved:\n- ${jsonPath}\n- ${txtPath}`);
} catch (err) {
console.error("Error saving Summary:", err);
}
}
}
@@ -0,0 +1,121 @@
const fs = require("fs");
const path = require("path");
// Prepare output directory (always storage/transcriptionSummaries under project root)
const outputDir = `${__dirname}/../../../storage/transcriptionSummaries`;
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
//Speaker, Sentence, Start, End
module.exports = {
name: "summarize-transcription2", // Unique name for our function that will later be used to get the function from the map via "mapFunctions.get("example").function()"
type: "summarizer", // value used to differentiate each module to order them in the UI
displayname: "Summarizer", // The displayname used within the UI
async function(args) {
let inputJson = args.json;
//JSON Path
if (args.jsonPath) {
try {
const raw = fs.readFileSync(args.jsonPath, "utf-8");
inputJson = JSON.parse(raw);
} catch (e) {
console.error("Failed to load JSON from file:", e);
return { error: "Could not read JSON from file path." };
}
}
// JSON parsen
if (typeof inputJson === "string") {
try {
inputJson = JSON.parse(inputJson);
} catch (e) {
console.log("Invalid JSON in summarize-transcription");
return { error: "Invalid JSON" };
}
}
const words = inputJson.words;
if (!Array.isArray(words)) {
return { error: "No words Array found" };
}
const ENDINGS = [".", "!", "?"]; // '...' auch als Satzende ?
const ABBREVIATIONS = new Set(["z.B.", "bzw.", "u.a.", "Dr.", "Mr.", "Mrs.", "Prof.", "etc."]); //TODO weitere Ergaenzen
const result = [];
let currentSentence = "";
let currentSpeaker = null;
let startTime = null;
let endTime = null;
for (const w of words) {
if (!currentSpeaker) currentSpeaker = w.speaker;
if (startTime === null) startTime = w.start;
endTime = w.end;
//speaker changing
if (currentSpeaker !== w.speaker && currentSentence) {
result.push({
speaker: currentSpeaker,
sentence: currentSentence,
start: startTime,
end: endTime
});
currentSentence = "";
startTime = w.start;
}
currentSpeaker = w.speaker;
currentSentence += (currentSentence ? " " : "") + w.text; //sentence beginning or not
const lastWord = w.text.trim();
const lastChar = lastWord.slice(-1);
const isAbbreviation = ABBREVIATIONS.has(lastWord);
//sentence ending
if (ENDINGS.includes(lastChar) && !isAbbreviation) {
result.push({
speaker: currentSpeaker,
sentence: currentSentence,
start: startTime,
end: endTime
});
currentSentence = "";
startTime = null;
endTime = null;
currentSpeaker = null;
}
}
// safe last sentence
if (currentSentence) {
result.push({
speaker: currentSpeaker,
sentence: currentSentence,
start: startTime,
end: endTime
});
}
// Output as Text
const output = result.map(r =>
`Sprecher ${r.speaker} [${r.start.toFixed(2)} - ${r.end.toFixed(2)}]: ${r.sentence}`
);
// Output on cosole
//console.log("\n------------\nMerged Transcription Result:\n", output, "\n------------\n");
try {
const jsonPath = path.join(outputDir, "transcription_result.json");
fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2), "utf-8");
const txtPath = path.join(outputDir, "transcription_result.txt");
fs.writeFileSync(txtPath, output.join("\n"), "utf-8");
console.log(`Summary successfully saved:\n- ${jsonPath}\n- ${txtPath}`);
} catch (err) {
console.error("Error saving Summary:", err);
}
}
}
@@ -5,7 +5,7 @@ import { fileURLToPath } from "url"; // To handle __dirname in ES modules
const __filename = fileURLToPath(import.meta.url); // Get current file path
const __dirname = path.dirname(__filename); // Get current directory path
const transcriptsDir = path.resolve(__dirname, "../../storage/transcriptions");
const transcriptsDir = path.resolve(__dirname, "../../../storage/transcriptions");
export class whisperLocal { // is called by transcribe.ts
@@ -26,7 +26,6 @@ export class whisperLocal { // is called by transcribe.ts
async transcribe(audioPath: string): Promise<string> { //asyncronous function to transcribe audio
return new Promise((resolve, reject) => {
const transcriptsDir = path.resolve(__dirname, "../../../../storage/transcripts"); //storage directory for transcripts
if (!fs.existsSync(transcriptsDir)) { //if transcripts directory does not exist, create it
fs.mkdirSync(transcriptsDir, { recursive: true });
@@ -0,0 +1,123 @@
require('dotenv').config();
const API_KEY = process.env.API_KEY;
const BASE_URL = 'https://api.assemblyai.com/v2';
//---------------------------------------------------Upload audio---------------------------------------------------
async function uploadAudio(audioPath) {
const audioData = fs.readFileSync(audioPath);
const response = await axios.post(`${BASE_URL}/upload`, audioData, {
headers: {
authorization: API_KEY,
'content-type': 'application/octet-stream'
}
});
return response.data.upload_url;
}
////---------------------------------------------------Extract session id---------------------------------------------------
function getSessionId(inputPath) {
try {
const parsed = new URL(inputPath);
const base = path.basename(parsed.pathname);
return base.replace(/\.[^.]+$/, '');
} catch {
return path.basename(inputPath, path.extname(inputPath));
}
}
//---------------------------------------------------Create transcript---------------------------------------------------
async function createTranscript(audioUrl) {
const response = await axios.post(
`${BASE_URL}/transcript`,
{
audio_url: audioUrl,
speaker_labels: true,
language_detection: true
},
{
headers: {
authorization: API_KEY,
'content-type': 'application/json'
}
}
);
return response.data.id;
}
//---------------------------------------------------Poll transcript---------------------------------------------------
async function pollTranscript(transcriptId) {
while (true) {
const response = await axios.get(`${BASE_URL}/transcript/${transcriptId}`, {
headers: { authorization: API_KEY }
});
const status = response.data.status;
if (status === 'completed') return response.data;
if (status === 'error') throw new Error(`Transcription failed: ${response.data.error}`);
await new Promise(res => setTimeout(res, 3000));
}
}
//---------------------------------------------------Save transcript---------------------------------------------------
function saveTranscript(transcript, sessionId) {
const outputDir = path.join(__dirname, '../../../storage/transcripts');
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
const outputPath = path.join(outputDir, `${sessionId}.json`);
fs.writeFileSync(outputPath, JSON.stringify(transcript, null, 2));
console.log(`Transcript saved: ${outputPath}`);
}
//---------------------------------------------------Modul---------------------------------------------------
module.exports = {
name: 'assembly',
type: 'transcription',
displayname: 'AssemblyAI',
async function(audioFileName) {
try {
// audioFileName ist nur "datei.mp3"
const audioPath = path.join(
__dirname,
'../../../storage/audio',
audioFileName
);
let audioUrl;
if (/^https?:\/\//i.test(audioFileName)) {
audioUrl = audioFileName;
} else {
if (!fs.existsSync(audioPath)) {
throw new Error(`Audio file not found: ${audioPath}`);
}
audioUrl = await uploadAudio(audioPath);
}
const transcriptId = await createTranscript(audioUrl);
const transcript = await pollTranscript(transcriptId);
const sessionId = getSessionId(audioFileName);
saveTranscript(transcript, sessionId);
} catch (error) {
console.error('Transcription error:', error.message);
}
}
};
@@ -0,0 +1,133 @@
import 'dotenv/config';
import axios from 'axios';
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const API_KEY = process.env.ASSEMBLYAI_API_KEY;
const BASE_URL = 'https://api.assemblyai.com/v2';
/**
* Uploads audio file to AssemblyAI
*/
async function uploadAudio(audioPath: string): Promise<string> {
const audioData = fs.readFileSync(audioPath);
const response = await axios.post<{ upload_url: string }>(`${BASE_URL}/upload`, audioData, {
headers: {
'authorization': API_KEY,
'content-type': 'application/octet-stream'
}
});
return response.data.upload_url;
}
/**
* Extract a session id (basename without extension) from a local path or a URL
*/
function getSessionId(inputPath: string): string {
try {
const parsed = new URL(inputPath);
const base = path.basename(parsed.pathname);
return base.replace(/\.[^.]+$/, '');
} catch (err) {
// not a URL, treat as local path
return path.basename(inputPath, path.extname(inputPath));
}
}
/**
* Creates transcription job with speaker diarization
*/
async function createTranscript(audioUrl: string): Promise<string> {
const response = await axios.post<{ id: string }>(`${BASE_URL}/transcript`, {
audio_url: audioUrl,
speaker_labels: true,
language_detection: true
}, {
headers: {
'authorization': API_KEY,
'content-type': 'application/json'
}
});
return response.data.id;
}
/**
* Polls transcript status until completed
*/
async function pollTranscript(transcriptId: string): Promise<any> {
while (true) {
const response = await axios.get<any>(`${BASE_URL}/transcript/${transcriptId}`, {
headers: { 'authorization': API_KEY }
});
const status = response.data.status;
if (status === 'completed') {
return response.data;
} else if (status === 'error') {
throw new Error(`Transcription failed: ${response.data.error}`);
}
// Wait 3 seconds before next poll
await new Promise(resolve => setTimeout(resolve, 3000));
}
}
/**
* Saves transcript to storage
*/
function saveTranscript(transcript: any, sessionId: string): void {
const outputDir = path.join(__dirname, '..', '..', '..', 'storage', 'transcripts');
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
const outputPath = path.join(outputDir, `${sessionId}.json`);
fs.writeFileSync(outputPath, JSON.stringify(transcript, null, 2));
console.log(`✅ Transcript saved: ${outputPath}`);
}
export default {
name: "assembly",
type: "transcription",
displayname: "AssemblyAI",
run: async (audioPath: string) => {
try {
// Determine if audioPath is an external URL or a local file
let audioUrl: string;
if (/^https?:\/\//i.test(audioPath)) {
console.log('🔗 Using external audio URL...');
audioUrl = audioPath;
} else {
console.log('🔄 Uploading local audio...');
if (!fs.existsSync(audioPath)) {
throw new Error(`Audio file not found: ${audioPath}`);
}
audioUrl = await uploadAudio(audioPath);
}
console.log('🔄 Creating transcript job...');
const transcriptId = await createTranscript(audioUrl);
console.log('⏳ Waiting for transcription...');
const transcript = await pollTranscript(transcriptId);
const sessionId = getSessionId(audioPath);
saveTranscript(transcript, sessionId);
return transcript;
} catch (error: any) {
console.error('❌ Transcription error:', error.message);
throw error;
}
}
};
@@ -1,8 +0,0 @@
module.exports = {
name:"assembly", // Unique name for our function that will later be used to get the function from the map via "mapFunctions.get("example").function()"
type:"transcription", // value used to differentiate each module to order them in the UI
displayname:"Assembly", // The displayname used within the UI
async function(parameter){
// TODO add code to actually process the audio file
}
}
Submodule services/modules/transcription/local/whisper.cpp deleted from 999a7e0cbf
+7
View File
@@ -6,6 +6,13 @@ module.exports = {
// We are now calling the example function from the example folder
mapFunctions.get("example").function("Startup")
let transcript = await mapFunctions.get("assembly").function('../../storage/audio/IMG_2978.wav');
let summary = await mapFunctions.get("summarize-transcription").function({jsonPath:'/Users/santa/Proj25/video2document/storage/transcripts/IMG_2978.json'});
// mapFunctions.get("extraction-video-to-audio").function({inputVideoPath:"./a.mp4", outputType:"wav"})
// mapFunctions.get("extraction-video-to-audio").function({inputVideoPath:"./b.mp4", outputType:"wav"})
// mapFunctions.get("extraction-video-to-audio").function({inputVideoPath:"./b.mp4", outputType:"flac"})
@@ -0,0 +1,52 @@
// services/pipeline/jobs/transcribeLatest.ts
import path from 'path';
import fs from 'fs';
import assembly from '../../modules/transcription/assembly';
/**
* Finds the most recently modified .wav file in storage/audio/
*/
function getLatestWav(): string {
const audioDir = path.join(process.cwd(), 'storage', 'audio');
const files = fs.readdirSync(audioDir).filter(f => f.toLowerCase().endsWith('.wav'));
if (files.length === 0) throw new Error('⚠️ No .wav file found in storage/audio');
const newest = files
.map(f => ({ f, t: fs.statSync(path.join(audioDir, f)).mtimeMs }))
.sort((a, b) => b.t - a.t)[0].f;
return path.join(audioDir, newest);
}
/**
* Full transcription pipeline according to the defined workflow:
* 1. Audio Upload → AssemblyAI
* 2. Job Creation (transcript_id)
* 3. Polling Status (queued → processing → completed)
* 4. Download Transcript JSON
* 5. Storage: /transcripts/{session_id}.json
*/
async function main() {
const audioPath = getLatestWav();
console.log('1️⃣ Audio Upload → AssemblyAI');
console.log(' Source:', audioPath);
console.log('2️⃣ Job Creation (transcript_id)');
console.log('3️⃣ Polling Status (queued → processing → completed)');
console.log('4️⃣ Download Transcript JSON');
console.log('5️⃣ Storage: /transcripts/{session_id}.json');
// Execute the transcription process via the AssemblyAI module
const result = await assembly.run(audioPath);
console.log('✅ Transcription completed successfully');
console.log('🆔 Transcript ID:', result.id);
console.log('📁 Transcript file saved under: storage/transcripts/');
}
// Entry point
main().catch((err) => {
console.error('❌ Transcription pipeline failed:', err.message || err);
process.exit(1);
});
+14
View File
@@ -0,0 +1,14 @@
import 'dotenv/config';
import assemblyModule from '../../services/modules/transcription-remote/assembly.ts';
// Test: URL passed as argument OR local file ./storage/audio/test.wav
const audioPath = process.argv[2] || './storage/audio/test.wav';
assemblyModule.run(audioPath)
.then(result => {
console.log('✅ Success!');
console.log('Transcript ID:', result.id);
})
.catch(error => {
console.error('❌ Error:', error?.message || error);
});
+18
View File
@@ -0,0 +1,18 @@
require('dotenv').config();
const path = require('path');
const assemblyModule = require('../../services/modules/transcription-remote/assembly.js');
// Audio-Datei oder URL aus Kommandozeile, Standard: test.wav
const audioPath = process.argv[2] || './storage/audio/IMG_2978.wav';
(async () => {
try {
const transcript = await assemblyModule.run(audioPath);
console.log('Transcription succesful');
console.log('Transcript ID:', transcript?.id);
console.log('Speaker labels:', transcript?.utterances?.length || 0);
} catch (error) {
console.error('Error in Transcription:', error?.message || error);
}
})();
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,12 @@
const fs = require('fs');
const transSummarizer = require("../../services/modules/jsonTools/transcriptionSummarizer.js");
// JSON-Datei laden
const inputJson = JSON.parse(fs.readFileSync("./testFile.json", "utf8"));
// Übergabe an den Summarizer
transSummarizer.function({
json: inputJson
});
View File