Files
video2document/services/modules/replace_speaker/replaceSpeaker.js
T

120 lines
4.7 KiB
JavaScript

const fs = require('fs');
const path = require('path');
const outputDir = path.join(__dirname, "../../../storage/documents"); // path for output directory
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true }); // Create output directory if it doesn't exist
}
const module_exports = {
name: "replace_speaker",
type: "processor",
displayname: "Speaker Name Replacer",
description: "Replaces speaker placeholder names with actual names based on a mapping in HTML files",
async function(parameter) {
return new Promise(async (resolve, reject) => {
try {
// console.log("Speaker replacer module invoked with parameters:", parameter);
resolve(await this.replaceNames(
parameter.inputHtmlPath, // Path to input HTML file
parameter.speakerMappingPath // Path to speaker mapping file (JSON)
));
} catch (error) {
// console.error("Error in speaker replacer module:", error);
reject(error)
}
})
},
replaceNames: async function(inputHtmlPath, speakerMappingPath) {
return new Promise(async(resolve, reject) => {
try {
const htmlContent = await fs.promises.readFile(inputHtmlPath, "utf-8"); // read HTML file
const mappingData = await fs.promises.readFile(speakerMappingPath, "utf-8"); // read mapping file
// Parse mapping - supports JSON or simple format
let speakerMap = {};
try {
speakerMap = JSON.parse(mappingData); // Try to parse as JSON
} catch (e) {
// If not JSON, try simple format: "Speaker A,Mike\nSpeaker B,Stefan"
const lines = mappingData.trim().split('\n');
lines.forEach(line => {
const [placeholder, realName] = line.split(',').map(s => s.trim());
if (placeholder && realName) {
speakerMap[placeholder] = realName;
}
});
}
// Replace all speaker names in HTML content
let outputContent = htmlContent;
Object.entries(speakerMap).forEach(([placeholder, realName]) => {
// Create regex to replace all occurrences (case-sensitive)
const regex = new RegExp(`\\b${placeholder}\\b`, 'g');
outputContent = outputContent.replace(regex, realName);
});
// Generate output file path based on input file name
const inputFileName = path.basename(inputHtmlPath, path.extname(inputHtmlPath));
const outPath = path.join(outputDir, `${inputFileName}_replaced.html`);
// Write output to file
fs.writeFileSync(outPath, outputContent, "utf8");
// console.log("Replaced HTML file written to:", outPath);
resolve(outPath)
} catch (error) {
// console.error("Error replacing speaker names:", error);
reject(error)
}
})
}
};
module.exports = module_exports;
// CLI Mode: Allow direct execution
if (require.main === module) {
(async () => {
const args = process.argv.slice(2);
if (args.length < 2) {
console.error("Usage: node string-replacer.js <inputHtmlPath> <speakerMappingPath>");
console.error("Example: node string-replacer.js ./document.html ./speaker_mapping.json");
console.error("\nMapping file formats:");
console.error("JSON: {\"Speaker A\": \"Mike\", \"Speaker B\": \"Stefan\"}");
console.error("or simple: Speaker A,Mike\\nSpeaker B,Stefan");
process.exit(1);
}
const [inputHtmlPath, speakerMappingPath] = args;
// Check if files exist
if (!fs.existsSync(inputHtmlPath)) {
console.error(`ERROR: HTML file not found: ${inputHtmlPath}`);
process.exit(1);
}
if (!fs.existsSync(speakerMappingPath)) {
console.error(`ERROR: Speaker mapping file not found: ${speakerMappingPath}`);
process.exit(1);
}
console.log("Starting speaker name replacement...");
console.log(`HTML file: ${inputHtmlPath}`);
console.log(`Mapping file: ${speakerMappingPath}`);
await module_exports.replaceNames(
inputHtmlPath,
speakerMappingPath
);
console.log("Done!");
})();
}