From 9a0a34981329d442554866ce583f1b7378a6b8da Mon Sep 17 00:00:00 2001 From: Emily Date: Mon, 24 Nov 2025 17:01:49 +0100 Subject: [PATCH] Switched to transcription summarizer2 due to a better output format --- main.js | 2 +- .../jsonTools/transcriptionSummarizer2.js | 185 ++++++++++-------- 2 files changed, 99 insertions(+), 88 deletions(-) diff --git a/main.js b/main.js index 83045b1..39c408d 100644 --- a/main.js +++ b/main.js @@ -169,7 +169,7 @@ electron.ipcMain.on("file_submit", async (event, args) => { console.log("\n\n Running the Transcription Summarizer module"); // This code summarises the transcript, so that it can be used by an llm // await mapFunctions.get("summarize-transcription").function('A:\\programing\\@projects\\video2document\\storage\\transcripts\\IMG_2978.json').then(resp => { - await mapFunctions.get("summarize-transcription").function(transcriptpath).then(resp => { + await mapFunctions.get("summarize-transcription2").function(transcriptpath).then(resp => { console.log(resp); transcriptpath = resp curstep++ diff --git a/services/modules/jsonTools/transcriptionSummarizer2.js b/services/modules/jsonTools/transcriptionSummarizer2.js index 12d00bf..6993926 100644 --- a/services/modules/jsonTools/transcriptionSummarizer2.js +++ b/services/modules/jsonTools/transcriptionSummarizer2.js @@ -14,108 +14,119 @@ module.exports = { type: "summarizer", // value used to differentiate each module to order them in the UI displayname: "Summarizer", // The displayname used within the UI async function(args) { - let inputJson = args.json; + return new Promise(async (resolve, reject) => { + let inputJson = args.json; - //JSON Path - if (args.jsonPath) { - try { - const raw = fs.readFileSync(args.jsonPath, "utf-8"); - inputJson = JSON.parse(raw); - } catch (e) { - console.error("Failed to load JSON from file:", e); - return { error: "Could not read JSON from file path." }; + //JSON Path + if (args.jsonPath) { + try { + const raw = fs.readFileSync(args.jsonPath, "utf-8"); + inputJson = JSON.parse(raw); + } catch (e) { + console.error("Failed to load JSON from file:", e); + return { error: "Could not read JSON from file path." }; + } } - } - // JSON parsen - if (typeof inputJson === "string") { - try { - inputJson = JSON.parse(inputJson); - } catch (e) { - console.log("Invalid JSON in summarize-transcription"); - return { error: "Invalid JSON" }; + // JSON parsen + if (typeof args === "string") { + try { + await new Promise((res) => { + fs.readFile(args, 'utf8', function (err, data) { + if (err) throw err; + inputJson = JSON.parse(data); + res() + }); + }) + } catch (e) { + console.log("Invalid JSON in summarize-transcription"); + console.log(e) + return { error: "Invalid JSON" }; + } + } + + const words = inputJson.words; + if (!Array.isArray(words)) { + return { error: "No words Array found" }; } - } - const words = inputJson.words; - if (!Array.isArray(words)) { - return { error: "No words Array found" }; - } + const ENDINGS = [".", "!", "?"]; // '...' auch als Satzende ? + const ABBREVIATIONS = new Set(["z.B.", "bzw.", "u.a.", "Dr.", "Mr.", "Mrs.", "Prof.", "etc."]); //TODO weitere Ergaenzen - const ENDINGS = [".", "!", "?"]; // '...' auch als Satzende ? - const ABBREVIATIONS = new Set(["z.B.", "bzw.", "u.a.", "Dr.", "Mr.", "Mrs.", "Prof.", "etc."]); //TODO weitere Ergaenzen + const result = []; + let currentSentence = ""; + let currentSpeaker = null; + let startTime = null; + let endTime = null; - const result = []; - let currentSentence = ""; - let currentSpeaker = null; - let startTime = null; - let endTime = null; + for (const w of words) { + if (!currentSpeaker) currentSpeaker = w.speaker; + if (startTime === null) startTime = w.start; + endTime = w.end; + + //speaker changing + if (currentSpeaker !== w.speaker && currentSentence) { + result.push({ + speaker: currentSpeaker, + sentence: currentSentence, + start: startTime, + end: endTime + }); + currentSentence = ""; + startTime = w.start; + } + currentSpeaker = w.speaker; + currentSentence += (currentSentence ? " " : "") + w.text; //sentence beginning or not + const lastWord = w.text.trim(); + const lastChar = lastWord.slice(-1); + const isAbbreviation = ABBREVIATIONS.has(lastWord); - for (const w of words) { - if (!currentSpeaker) currentSpeaker = w.speaker; - if (startTime === null) startTime = w.start; - endTime = w.end; - - //speaker changing - if (currentSpeaker !== w.speaker && currentSentence) { + //sentence ending + if (ENDINGS.includes(lastChar) && !isAbbreviation) { + result.push({ + speaker: currentSpeaker, + sentence: currentSentence, + start: startTime, + end: endTime + }); + currentSentence = ""; + startTime = null; + endTime = null; + currentSpeaker = null; + } + } + + // safe last sentence + if (currentSentence) { result.push({ speaker: currentSpeaker, sentence: currentSentence, start: startTime, end: endTime }); - currentSentence = ""; - startTime = w.start; } - currentSpeaker = w.speaker; - currentSentence += (currentSentence ? " " : "") + w.text; //sentence beginning or not - const lastWord = w.text.trim(); - const lastChar = lastWord.slice(-1); - const isAbbreviation = ABBREVIATIONS.has(lastWord); - //sentence ending - if (ENDINGS.includes(lastChar) && !isAbbreviation) { - result.push({ - speaker: currentSpeaker, - sentence: currentSentence, - start: startTime, - end: endTime - }); - currentSentence = ""; - startTime = null; - endTime = null; - currentSpeaker = null; + + // Output as Text + const output = result.map(r => + `Sprecher ${r.speaker} [${r.start.toFixed(2)} - ${r.end.toFixed(2)}]: ${r.sentence}` + ); + + // Output on cosole + //console.log("\n------------\nMerged Transcription Result:\n", output, "\n------------\n"); + + try { + const jsonPath = path.join(outputDir, "transcription_result.json"); + fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2), "utf-8"); + + const txtPath = path.join(outputDir, "transcription_result.txt"); + fs.writeFileSync(txtPath, output.join("\n"), "utf-8"); + + console.log(`Summary successfully saved:\n- ${jsonPath}\n- ${txtPath}`); + resolve(jsonPath); + } catch (err) { + console.error("Error saving Summary:", err); + reject(err); } - } - - // safe last sentence - if (currentSentence) { - result.push({ - speaker: currentSpeaker, - sentence: currentSentence, - start: startTime, - end: endTime - }); - } - - - // Output as Text - const output = result.map(r => - `Sprecher ${r.speaker} [${r.start.toFixed(2)} - ${r.end.toFixed(2)}]: ${r.sentence}` - ); - - // Output on cosole - //console.log("\n------------\nMerged Transcription Result:\n", output, "\n------------\n"); - - try { - const jsonPath = path.join(outputDir, "transcription_result.json"); - fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2), "utf-8"); - - const txtPath = path.join(outputDir, "transcription_result.txt"); - fs.writeFileSync(txtPath, output.join("\n"), "utf-8"); - - console.log(`Summary successfully saved:\n- ${jsonPath}\n- ${txtPath}`); - } catch (err) { - console.error("Error saving Summary:", err); - } + }) } }