Switched to transcription summarizer2 due to a better output format

2026-06-15 18:01:52 +02:00 · 2025-11-24 17:01:49 +01:00
parent 9dfc05e987
commit 9a0a349813
2 changed files with 99 additions and 88 deletions
@@ -169,7 +169,7 @@ electron.ipcMain.on("file_submit", async (event, args) => {
        console.log("\n\n Running the Transcription Summarizer module");
        // This code summarises the transcript, so that it can be used by an llm
        // await mapFunctions.get("summarize-transcription").function('A:\\programing\\@projects\\video2document\\storage\\transcripts\\IMG_2978.json').then(resp => {
-        await mapFunctions.get("summarize-transcription").function(transcriptpath).then(resp => {
+        await mapFunctions.get("summarize-transcription2").function(transcriptpath).then(resp => {
            console.log(resp);
            transcriptpath = resp
            curstep++
@@ -14,108 +14,119 @@ module.exports = {
    type: "summarizer", // value used to differentiate each module to order them in the UI
    displayname: "Summarizer",  // The displayname used within the UI
    async function(args) {
-        let inputJson = args.json;
+        return new Promise(async (resolve, reject) => {
+            let inputJson = args.json;

-        //JSON Path
-        if (args.jsonPath) {
-            try {
-                const raw = fs.readFileSync(args.jsonPath, "utf-8");
-                inputJson = JSON.parse(raw);
-            } catch (e) {
-                console.error("Failed to load JSON from file:", e);
-                return { error: "Could not read JSON from file path." };
+            //JSON Path
+            if (args.jsonPath) {
+                try {
+                    const raw = fs.readFileSync(args.jsonPath, "utf-8");
+                    inputJson = JSON.parse(raw);
+                } catch (e) {
+                    console.error("Failed to load JSON from file:", e);
+                    return { error: "Could not read JSON from file path." };
+                }
            }
-        }
-        // JSON parsen
-        if (typeof inputJson === "string") {
-            try {
-                inputJson = JSON.parse(inputJson);
-            } catch (e) {
-                console.log("Invalid JSON in summarize-transcription");
-                return { error: "Invalid JSON" };
+            // JSON parsen
+                if (typeof args === "string") {
+                    try {
+                        await new Promise((res) => {
+                            fs.readFile(args, 'utf8', function (err, data) {
+                                if (err) throw err;
+                                inputJson = JSON.parse(data);
+                                res()
+                            });
+                        })
+                    } catch (e) {
+                        console.log("Invalid JSON in summarize-transcription");
+                        console.log(e)
+                        return { error: "Invalid JSON" };
+                    }
+                }
+
+            const words = inputJson.words;
+            if (!Array.isArray(words)) {
+                return { error: "No words Array found" };
            }
-        }

-        const words = inputJson.words;
-        if (!Array.isArray(words)) {
-            return { error: "No words Array found" };
-        }
+            const ENDINGS = [".", "!", "?"]; // '...' auch als Satzende ?
+            const ABBREVIATIONS = new Set(["z.B.", "bzw.", "u.a.", "Dr.", "Mr.", "Mrs.", "Prof.", "etc."]); //TODO weitere Ergaenzen

-        const ENDINGS = [".", "!", "?"]; // '...' auch als Satzende ?
-        const ABBREVIATIONS = new Set(["z.B.", "bzw.", "u.a.", "Dr.", "Mr.", "Mrs.", "Prof.", "etc."]); //TODO weitere Ergaenzen
+            const result = [];
+            let currentSentence = "";
+            let currentSpeaker = null;
+            let startTime = null;
+            let endTime = null;

-        const result = [];
-        let currentSentence = "";
-        let currentSpeaker = null;
-        let startTime = null;
-        let endTime = null;
+            for (const w of words) {
+                if (!currentSpeaker) currentSpeaker = w.speaker;
+                if (startTime === null) startTime = w.start;
+                endTime = w.end;
+        
+                //speaker changing
+                if (currentSpeaker !== w.speaker && currentSentence) {
+                    result.push({
+                        speaker: currentSpeaker,
+                        sentence: currentSentence,
+                        start: startTime,
+                        end: endTime
+                    });
+                    currentSentence = "";
+                    startTime = w.start;
+                }
+                currentSpeaker = w.speaker;
+                currentSentence += (currentSentence ? " " : "") + w.text; //sentence beginning or not
+                const lastWord = w.text.trim();
+                const lastChar = lastWord.slice(-1);
+                const isAbbreviation = ABBREVIATIONS.has(lastWord);

-        for (const w of words) {
-            if (!currentSpeaker) currentSpeaker = w.speaker;
-            if (startTime === null) startTime = w.start;
-            endTime = w.end;
-    
-            //speaker changing
-             if (currentSpeaker !== w.speaker && currentSentence) {
+                //sentence ending
+                if (ENDINGS.includes(lastChar) && !isAbbreviation) {
+                    result.push({
+                        speaker: currentSpeaker,
+                        sentence: currentSentence,
+                        start: startTime,
+                        end: endTime
+                    });
+                    currentSentence = "";
+                    startTime = null;
+                    endTime = null;
+                    currentSpeaker = null;
+                }
+            }
+
+            // safe last sentence 
+            if (currentSentence) {
                result.push({
                    speaker: currentSpeaker,
                    sentence: currentSentence,
                    start: startTime,
                    end: endTime
                });
-                currentSentence = "";
-                startTime = w.start;
            }
-            currentSpeaker = w.speaker;
-            currentSentence += (currentSentence ? " " : "") + w.text; //sentence beginning or not
-            const lastWord = w.text.trim();
-            const lastChar = lastWord.slice(-1);
-            const isAbbreviation = ABBREVIATIONS.has(lastWord);

-            //sentence ending
-            if (ENDINGS.includes(lastChar) && !isAbbreviation) {
-                result.push({
-                    speaker: currentSpeaker,
-                    sentence: currentSentence,
-                    start: startTime,
-                    end: endTime
-                });
-                currentSentence = "";
-                startTime = null;
-                endTime = null;
-                currentSpeaker = null;
+
+            // Output as Text
+            const output = result.map(r => 
+                `Sprecher ${r.speaker} [${r.start.toFixed(2)} - ${r.end.toFixed(2)}]: ${r.sentence}`
+            );
+
+            // Output on cosole
+            //console.log("\n------------\nMerged Transcription Result:\n", output, "\n------------\n");
+
+            try {
+                const jsonPath = path.join(outputDir, "transcription_result.json");
+                fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2), "utf-8");
+
+                const txtPath = path.join(outputDir, "transcription_result.txt");
+                fs.writeFileSync(txtPath, output.join("\n"), "utf-8");
+
+                console.log(`Summary successfully saved:\n- ${jsonPath}\n- ${txtPath}`);
+                resolve(jsonPath);
+            } catch (err) {
+                console.error("Error saving Summary:", err);
+                reject(err);
            }
-        }
-
-        // safe last sentence 
-        if (currentSentence) {
-            result.push({
-                speaker: currentSpeaker,
-                sentence: currentSentence,
-                start: startTime,
-                end: endTime
-            });
-        }
-
-
-        // Output as Text
-        const output = result.map(r => 
-            `Sprecher ${r.speaker} [${r.start.toFixed(2)} - ${r.end.toFixed(2)}]: ${r.sentence}`
-        );
-
-        // Output on cosole
-        //console.log("\n------------\nMerged Transcription Result:\n", output, "\n------------\n");
-
-        try {
-            const jsonPath = path.join(outputDir, "transcription_result.json");
-            fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2), "utf-8");
-
-            const txtPath = path.join(outputDir, "transcription_result.txt");
-            fs.writeFileSync(txtPath, output.join("\n"), "utf-8");
-
-            console.log(`Summary successfully saved:\n- ${jsonPath}\n- ${txtPath}`);
-        } catch (err) {
-            console.error("Error saving Summary:", err);
-        }
+        })
    }
 }