Switched to transcription summarizer2 due to a better output format

This commit is contained in:
2025-11-24 17:01:49 +01:00
parent 9dfc05e987
commit 9a0a349813
2 changed files with 99 additions and 88 deletions
+1 -1
View File
@@ -169,7 +169,7 @@ electron.ipcMain.on("file_submit", async (event, args) => {
console.log("\n\n Running the Transcription Summarizer module"); console.log("\n\n Running the Transcription Summarizer module");
// This code summarises the transcript, so that it can be used by an llm // This code summarises the transcript, so that it can be used by an llm
// await mapFunctions.get("summarize-transcription").function('A:\\programing\\@projects\\video2document\\storage\\transcripts\\IMG_2978.json').then(resp => { // await mapFunctions.get("summarize-transcription").function('A:\\programing\\@projects\\video2document\\storage\\transcripts\\IMG_2978.json').then(resp => {
await mapFunctions.get("summarize-transcription").function(transcriptpath).then(resp => { await mapFunctions.get("summarize-transcription2").function(transcriptpath).then(resp => {
console.log(resp); console.log(resp);
transcriptpath = resp transcriptpath = resp
curstep++ curstep++
@@ -14,108 +14,119 @@ module.exports = {
type: "summarizer", // value used to differentiate each module to order them in the UI type: "summarizer", // value used to differentiate each module to order them in the UI
displayname: "Summarizer", // The displayname used within the UI displayname: "Summarizer", // The displayname used within the UI
async function(args) { async function(args) {
let inputJson = args.json; return new Promise(async (resolve, reject) => {
let inputJson = args.json;
//JSON Path //JSON Path
if (args.jsonPath) { if (args.jsonPath) {
try { try {
const raw = fs.readFileSync(args.jsonPath, "utf-8"); const raw = fs.readFileSync(args.jsonPath, "utf-8");
inputJson = JSON.parse(raw); inputJson = JSON.parse(raw);
} catch (e) { } catch (e) {
console.error("Failed to load JSON from file:", e); console.error("Failed to load JSON from file:", e);
return { error: "Could not read JSON from file path." }; return { error: "Could not read JSON from file path." };
}
} }
} // JSON parsen
// JSON parsen if (typeof args === "string") {
if (typeof inputJson === "string") { try {
try { await new Promise((res) => {
inputJson = JSON.parse(inputJson); fs.readFile(args, 'utf8', function (err, data) {
} catch (e) { if (err) throw err;
console.log("Invalid JSON in summarize-transcription"); inputJson = JSON.parse(data);
return { error: "Invalid JSON" }; res()
});
})
} catch (e) {
console.log("Invalid JSON in summarize-transcription");
console.log(e)
return { error: "Invalid JSON" };
}
}
const words = inputJson.words;
if (!Array.isArray(words)) {
return { error: "No words Array found" };
} }
}
const words = inputJson.words; const ENDINGS = [".", "!", "?"]; // '...' auch als Satzende ?
if (!Array.isArray(words)) { const ABBREVIATIONS = new Set(["z.B.", "bzw.", "u.a.", "Dr.", "Mr.", "Mrs.", "Prof.", "etc."]); //TODO weitere Ergaenzen
return { error: "No words Array found" };
}
const ENDINGS = [".", "!", "?"]; // '...' auch als Satzende ? const result = [];
const ABBREVIATIONS = new Set(["z.B.", "bzw.", "u.a.", "Dr.", "Mr.", "Mrs.", "Prof.", "etc."]); //TODO weitere Ergaenzen let currentSentence = "";
let currentSpeaker = null;
let startTime = null;
let endTime = null;
const result = []; for (const w of words) {
let currentSentence = ""; if (!currentSpeaker) currentSpeaker = w.speaker;
let currentSpeaker = null; if (startTime === null) startTime = w.start;
let startTime = null; endTime = w.end;
let endTime = null;
//speaker changing
if (currentSpeaker !== w.speaker && currentSentence) {
result.push({
speaker: currentSpeaker,
sentence: currentSentence,
start: startTime,
end: endTime
});
currentSentence = "";
startTime = w.start;
}
currentSpeaker = w.speaker;
currentSentence += (currentSentence ? " " : "") + w.text; //sentence beginning or not
const lastWord = w.text.trim();
const lastChar = lastWord.slice(-1);
const isAbbreviation = ABBREVIATIONS.has(lastWord);
for (const w of words) { //sentence ending
if (!currentSpeaker) currentSpeaker = w.speaker; if (ENDINGS.includes(lastChar) && !isAbbreviation) {
if (startTime === null) startTime = w.start; result.push({
endTime = w.end; speaker: currentSpeaker,
sentence: currentSentence,
//speaker changing start: startTime,
if (currentSpeaker !== w.speaker && currentSentence) { end: endTime
});
currentSentence = "";
startTime = null;
endTime = null;
currentSpeaker = null;
}
}
// safe last sentence
if (currentSentence) {
result.push({ result.push({
speaker: currentSpeaker, speaker: currentSpeaker,
sentence: currentSentence, sentence: currentSentence,
start: startTime, start: startTime,
end: endTime end: endTime
}); });
currentSentence = "";
startTime = w.start;
} }
currentSpeaker = w.speaker;
currentSentence += (currentSentence ? " " : "") + w.text; //sentence beginning or not
const lastWord = w.text.trim();
const lastChar = lastWord.slice(-1);
const isAbbreviation = ABBREVIATIONS.has(lastWord);
//sentence ending
if (ENDINGS.includes(lastChar) && !isAbbreviation) { // Output as Text
result.push({ const output = result.map(r =>
speaker: currentSpeaker, `Sprecher ${r.speaker} [${r.start.toFixed(2)} - ${r.end.toFixed(2)}]: ${r.sentence}`
sentence: currentSentence, );
start: startTime,
end: endTime // Output on cosole
}); //console.log("\n------------\nMerged Transcription Result:\n", output, "\n------------\n");
currentSentence = "";
startTime = null; try {
endTime = null; const jsonPath = path.join(outputDir, "transcription_result.json");
currentSpeaker = null; fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2), "utf-8");
const txtPath = path.join(outputDir, "transcription_result.txt");
fs.writeFileSync(txtPath, output.join("\n"), "utf-8");
console.log(`Summary successfully saved:\n- ${jsonPath}\n- ${txtPath}`);
resolve(jsonPath);
} catch (err) {
console.error("Error saving Summary:", err);
reject(err);
} }
} })
// safe last sentence
if (currentSentence) {
result.push({
speaker: currentSpeaker,
sentence: currentSentence,
start: startTime,
end: endTime
});
}
// Output as Text
const output = result.map(r =>
`Sprecher ${r.speaker} [${r.start.toFixed(2)} - ${r.end.toFixed(2)}]: ${r.sentence}`
);
// Output on cosole
//console.log("\n------------\nMerged Transcription Result:\n", output, "\n------------\n");
try {
const jsonPath = path.join(outputDir, "transcription_result.json");
fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2), "utf-8");
const txtPath = path.join(outputDir, "transcription_result.txt");
fs.writeFileSync(txtPath, output.join("\n"), "utf-8");
console.log(`Summary successfully saved:\n- ${jsonPath}\n- ${txtPath}`);
} catch (err) {
console.error("Error saving Summary:", err);
}
} }
} }