video2document/services/modules/jsonTools/transcriptionSummarizer2.js

const fs = require("fs");
const path = require("path");

// Prepare output directory (always storage/transcriptionSummaries under project root)
const outputDir = `${__dirname}/../../../storage/transcriptionSummaries`;
if (!fs.existsSync(outputDir)) {
    fs.mkdirSync(outputDir, { recursive: true });
}

//Speaker, Sentence, Start, End

module.exports = {
    name: "summarize-transcription2", // Unique name for our function that will later be used to get the function from the map via "mapFunctions.get("example").function()"
    type: "summarizer", // value used to differentiate each module to order them in the UI
    displayname: "Summarizer",  // The displayname used within the UI
    async function(args) {
        let inputJson = args.json;

        //JSON Path
        if (args.jsonPath) {
            try {
                const raw = fs.readFileSync(args.jsonPath, "utf-8");
                inputJson = JSON.parse(raw);
            } catch (e) {
                console.error("Failed to load JSON from file:", e);
                return { error: "Could not read JSON from file path." };
            }
        }
        // JSON parsen
        if (typeof inputJson === "string") {
            try {
                inputJson = JSON.parse(inputJson);
            } catch (e) {
                console.log("Invalid JSON in summarize-transcription");
                return { error: "Invalid JSON" };
            }
        }

        const words = inputJson.words;
        if (!Array.isArray(words)) {
            return { error: "No words Array found" };
        }

        const ENDINGS = [".", "!", "?"]; // '...' auch als Satzende ?
        const ABBREVIATIONS = new Set(["z.B.", "bzw.", "u.a.", "Dr.", "Mr.", "Mrs.", "Prof.", "etc."]); //TODO weitere Ergaenzen

        const result = [];
        let currentSentence = "";
        let currentSpeaker = null;
        let startTime = null;
        let endTime = null;

        for (const w of words) {
            if (!currentSpeaker) currentSpeaker = w.speaker;
            if (startTime === null) startTime = w.start;
            endTime = w.end;

            //speaker changing
             if (currentSpeaker !== w.speaker && currentSentence) {
                result.push({
                    speaker: currentSpeaker,
                    sentence: currentSentence,
                    start: startTime,
                    end: endTime
                });
                currentSentence = "";
                startTime = w.start;
            }
            currentSpeaker = w.speaker;
            currentSentence += (currentSentence ? " " : "") + w.text; //sentence beginning or not
            const lastWord = w.text.trim();
            const lastChar = lastWord.slice(-1);
            const isAbbreviation = ABBREVIATIONS.has(lastWord);

            //sentence ending
            if (ENDINGS.includes(lastChar) && !isAbbreviation) {
                result.push({
                    speaker: currentSpeaker,
                    sentence: currentSentence,
                    start: startTime,
                    end: endTime
                });
                currentSentence = "";
                startTime = null;
                endTime = null;
                currentSpeaker = null;
            }
        }

        // safe last sentence
        if (currentSentence) {
            result.push({
                speaker: currentSpeaker,
                sentence: currentSentence,
                start: startTime,
                end: endTime
            });
        }


        // Output as Text
        const output = result.map(r =>
            `Sprecher ${r.speaker} [${r.start.toFixed(2)} - ${r.end.toFixed(2)}]: ${r.sentence}`
        );

        // Output on cosole
        //console.log("\n------------\nMerged Transcription Result:\n", output, "\n------------\n");

        try {
            const jsonPath = path.join(outputDir, "transcription_result.json");
            fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2), "utf-8");

            const txtPath = path.join(outputDir, "transcription_result.txt");
            fs.writeFileSync(txtPath, output.join("\n"), "utf-8");

            console.log(`Summary successfully saved:\n- ${jsonPath}\n- ${txtPath}`);
        } catch (err) {
            console.error("Error saving Summary:", err);
        }
    }
}