Files
video2document/services/modules/jsonTools/transcriptionSummarizer2.js
emily faee605f12 Initial version of the working CI pipeline
Added mocha based unit tests for each module
Did a bit of cleanup in the modules to remove debug console.log calls
Removed the Progress bar in the extractor and the library requirement
Promisified the gemini module to make sure it returns the path as a promise instead of just on the cli
Fixed gitignore so that it now only ignores the content int the storage directories, and not the whole directories
Added neetingReport.json for the LLMs to use
2025-12-09 22:07:43 +01:00

150 lines
5.4 KiB
JavaScript

// Prepare output directory (always storage/transcriptionSummaries under project root)
const outputDir = `${__dirname}/../../../storage/transcriptionSummaries`;
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
function getSessionId(inputPath) {
try {
const parsed = new URL(inputPath);
const base = path.basename(parsed.pathname);
return base.replace(/\.[^.]+$/, '');
} catch {
return path.basename(inputPath, path.extname(inputPath));
}
}
//Speaker, Sentence, Start, End
module.exports = {
name: "summarize-transcription2", // Unique name for our function that will later be used to get the function from the map via "mapFunctions.get("example").function()"
type: "summarizer", // value used to differentiate each module to order them in the UI
displayname: "Summarizer", // The displayname used within the UI
async function(args) {
return new Promise(async (resolve, reject) => {
let inputJson = args.json;
//JSON Path
if (args.jsonPath) {
try {
const raw = fs.readFileSync(args.jsonPath, "utf-8");
inputJson = JSON.parse(raw);
} catch (e) {
console.error("Failed to load JSON from file:", e);
reject("Could not read JSON from file path.")
return
}
}
// JSON parsen
if (typeof args === "string") {
try {
await new Promise((res, rej) => {
fs.readFile(args, 'utf8', function (err, data) {
if (err){
rej(err)
return
}
inputJson = JSON.parse(data);
res()
});
})
} catch (e) {
// console.log("Invalid JSON in summarize-transcription");
// console.log(e)
reject(e)
return
}
}
const words = inputJson.words;
if (!Array.isArray(words)) {
reject("No words Array found")
return;
}
const ENDINGS = [".", "!", "?"]; // '...' auch als Satzende ?
const ABBREVIATIONS = new Set(["z.B.", "bzw.", "u.a.", "Dr.", "Mr.", "Mrs.", "Prof.", "etc."]); //TODO weitere Ergaenzen
const result = [];
let currentSentence = "";
let currentSpeaker = null;
let startTime = null;
let endTime = null;
for (const w of words) {
if (!currentSpeaker) currentSpeaker = w.speaker;
if (startTime === null) startTime = w.start;
endTime = w.end;
//speaker changing
if (currentSpeaker !== w.speaker && currentSentence) {
result.push({
speaker: currentSpeaker,
sentence: currentSentence,
start: startTime,
end: endTime
});
currentSentence = "";
startTime = w.start;
}
currentSpeaker = w.speaker;
currentSentence += (currentSentence ? " " : "") + w.text; //sentence beginning or not
const lastWord = w.text.trim();
const lastChar = lastWord.slice(-1);
const isAbbreviation = ABBREVIATIONS.has(lastWord);
//sentence ending
if (ENDINGS.includes(lastChar) && !isAbbreviation) {
result.push({
speaker: currentSpeaker,
sentence: currentSentence,
start: startTime,
end: endTime
});
currentSentence = "";
startTime = null;
endTime = null;
currentSpeaker = null;
}
}
// safe last sentence
if (currentSentence) {
result.push({
speaker: currentSpeaker,
sentence: currentSentence,
start: startTime,
end: endTime
});
}
// Output as Text
const output = result.map(r =>
`Sprecher ${r.speaker} [${r.start.toFixed(2)} - ${r.end.toFixed(2)}]: ${r.sentence}`
);
// Output on cosole
//console.log("\n------------\nMerged Transcription Result:\n", output, "\n------------\n");
try {
let filename = getSessionId(args);
const jsonPath = path.join(outputDir, `${filename}-${new Date().getTime()}.json`);
fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2), "utf-8");
const txtPath = path.join(outputDir, `${filename}-${new Date().getTime()}.txt`);
fs.writeFileSync(txtPath, output.join("\n"), "utf-8");
// console.log(`Summary successfully saved:\n- ${jsonPath}\n- ${txtPath}`);
resolve(jsonPath);
} catch (err) {
// console.error("Error saving Summary:", err);
reject(err);
}
})
}
}