From 455147a41b49388c61cadad674f587e6152801c6 Mon Sep 17 00:00:00 2001 From: santa Date: Thu, 20 Nov 2025 10:15:11 +0100 Subject: [PATCH] Summarizer Tool erstellt zur zusammenfassung der vom TranskriptionTool kommenden json --- .../jsonTools/transcriptionSummarizer.js | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 services/modules/jsonTools/transcriptionSummarizer.js diff --git a/services/modules/jsonTools/transcriptionSummarizer.js b/services/modules/jsonTools/transcriptionSummarizer.js new file mode 100644 index 0000000..dae3537 --- /dev/null +++ b/services/modules/jsonTools/transcriptionSummarizer.js @@ -0,0 +1,123 @@ +// Prepare output directory (always storage/transcriptionSummaries under project root) +const outputDir = `${__dirname}/../../../storage/transcriptionSummaries`; +if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); +} + +module.exports = { + name: "summarize-transcription", // Unique name for our function that will later be used to get the function from the map via "mapFunctions.get("example").function()" + type: "transcription", // value used to differentiate each module to order them in the UI + displayname: "Summarizer", // The displayname used within the UI + async function(args) { + let inputJson = args.json; + + // JSON parsen + if (typeof inputJson === "string") { + try { + inputJson = JSON.parse(inputJson); + } catch (e) { + console.log("Invalid JSON in summarize-transcription"); + return { error: "Invalid JSON" }; + } + } + + const words = inputJson.words; + if (!Array.isArray(words)) { + return { error: "No words Array found" }; + } + + const ENDINGS = [".", "!", "?"]; // '...' auch als Satzende ? + const ABBREVIATIONS = new Set(["z.B.", "bzw.", "u.a.", "Dr.", "Mr.", "Mrs.", "Prof.", "etc."]); //TODO weitere Ergaenzen + + const result = []; + let currentSentence = ""; + let currentSpeaker = null; + let startTime = null; + let endTime = null; + + for (const w of words) { + if (!currentSpeaker) currentSpeaker = w.speaker; + if (startTime === null) startTime = w.start; + endTime = w.end; + + //speaker changing + if (currentSpeaker !== w.speaker && currentSentence) { + const lastEntry = result[result.length - 1]; + if (lastEntry && lastEntry.speaker === currentSpeaker) { + lastEntry.sentence += " " + currentSentence; + lastEntry.end = endTime; + } else { + result.push({ + speaker: currentSpeaker, + sentence: currentSentence, + start: startTime, + end: endTime + }); + } + currentSentence = ""; + startTime = w.start; + } + currentSpeaker = w.speaker; + currentSentence += (currentSentence ? " " : "") + w.text; //sentence beginning or not + const lastWord = w.text.trim(); + const lastChar = lastWord.slice(-1); + const isAbbreviation = ABBREVIATIONS.has(lastWord); + + //sentence ending + if (ENDINGS.includes(lastChar) && !isAbbreviation) { + const lastEntry = result[result.length - 1]; + if (lastEntry && lastEntry.speaker === currentSpeaker) { + lastEntry.sentence += " " + currentSentence; + lastEntry.end = endTime; + } else { + result.push({ + speaker: currentSpeaker, + sentence: currentSentence, + start: startTime, + end: endTime + }); + } + currentSentence = ""; + startTime = null; + endTime = null; + currentSpeaker = null; + } + } + + // safe last sentence + if (currentSentence) { + const lastEntry = result[result.length - 1]; + if (lastEntry && lastEntry.speaker === currentSpeaker) { + lastEntry.sentence += " " + currentSentence; + lastEntry.end = endTime; + } else { + result.push({ + speaker: currentSpeaker, + sentence: currentSentence, + start: startTime, + end: endTime + }); + } + } + + // Output as Text + const output = result.map(r => + `Sprecher ${r.speaker} [${r.start.toFixed(2)} - ${r.end.toFixed(2)}]: ${r.sentence}` + ); + + // Output on cosole + //console.log("\n------------\nMerged Transcription Result:\n", output, "\n------------\n"); + + try { + const jsonPath = path.join(outputDir, "transcription_result.json"); + fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2), "utf-8"); + + const txtPath = path.join(outputDir, "transcription_result.txt"); + fs.writeFileSync(txtPath, output.join("\n"), "utf-8"); + + console.log(`Summary successfully saved:\n- ${jsonPath}\n- ${txtPath}`); + } catch (err) { + console.error("Error saving Summary:", err); + } + } +}