mirror of
https://gitlab.rlp.net/proj-wise2526-video2document/video2document.git
synced 2026-06-15 18:01:52 +02:00
237 lines
7.1 KiB
JavaScript
237 lines
7.1 KiB
JavaScript
const fs = require("fs");
|
||
const path = require("path");
|
||
const puppeteer = require("puppeteer");
|
||
const htmlToDocx = require("html-to-docx");
|
||
const { execSync } = require("child_process");
|
||
const os = require("os");
|
||
|
||
const outputDir = path.join(__dirname, "../../../storage/documents");
|
||
|
||
if (!fs.existsSync(outputDir)) {
|
||
fs.mkdirSync(outputDir, { recursive: true });
|
||
}
|
||
|
||
async function showSaveDialog(defaultName, format) {
|
||
const platform = os.platform();
|
||
|
||
if (platform === "darwin") {
|
||
// macOS
|
||
const applescript = `
|
||
set defaultName to "${defaultName}.${format}"
|
||
set theFile to choose file name with prompt "Dokument speichern als:" default name defaultName
|
||
POSIX path of theFile
|
||
`;
|
||
|
||
try {
|
||
const result = execSync(`osascript -e '${applescript}'`, {
|
||
encoding: "utf8",
|
||
});
|
||
return result.trim();
|
||
} catch (err) {
|
||
if (err.status === 1) return null; // User canceled
|
||
throw err;
|
||
}
|
||
} else if (platform === "win32") {
|
||
const safeName = decodeURIComponent(defaultName);
|
||
|
||
const powershell = `
|
||
Add-Type -AssemblyName System.Windows.Forms;
|
||
$dialog = New-Object System.Windows.Forms.SaveFileDialog;
|
||
$dialog.FileName = '${safeName}.${format}';
|
||
$dialog.Filter = '${format.toUpperCase()} Dateien (*.${format})|*.${format}|Alle Dateien (*.*)|*.*';
|
||
$dialog.Title = 'Dokument speichern als';
|
||
$result = $dialog.ShowDialog();
|
||
if ($result -eq 'OK') { $dialog.FileName }
|
||
`;
|
||
|
||
try {
|
||
const result = execSync(
|
||
`powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, " ")}"`,
|
||
{ encoding: "utf8" },
|
||
);
|
||
return result.trim() || null;
|
||
} catch (err) {
|
||
if (err.status === 1) return null; // User cancelled
|
||
throw new Error("Save dialog failed: " + err.message);
|
||
}
|
||
} else {
|
||
// Linux - zenity oder kdialog
|
||
try {
|
||
const result = execSync(
|
||
`zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`,
|
||
{ encoding: "utf8" },
|
||
);
|
||
return result.trim();
|
||
} catch (err) {
|
||
try {
|
||
const result = execSync(
|
||
`kdialog --getsavefilename . "${defaultName}.${format}"`,
|
||
{ encoding: "utf8" },
|
||
);
|
||
return result.trim();
|
||
} catch (err2) {
|
||
// Fallback
|
||
return path.join(os.homedir(), "Downloads", `${defaultName}.${format}`);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
const module_exports = {
|
||
name: "htmlDocumentConverter",
|
||
type: "converter",
|
||
displayname: "HTML Document Converter",
|
||
description: "Converts LLM-generated HTML to PDF, DOCX, TXT, or HTML",
|
||
|
||
/**
|
||
* Main conversion function
|
||
* @param {Object} options
|
||
* @param {string} options.inputPath - Path to the HTML input
|
||
* @param {string} options.format - 'pdf' | 'docx' | 'html' | 'txt'
|
||
* @param {string} [options.outputName] - Optional output filename (without extension)
|
||
* @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode)
|
||
*/
|
||
async convert({ inputPath, format = "pdf", outputName, showDialog = false }) {
|
||
format = format.toLowerCase().replace(".", ""); // <-- FIX
|
||
|
||
if (!["pdf", "docx", "html", "txt"].includes(format)) {
|
||
throw new Error(`Unsupported format: ${format}`);
|
||
}
|
||
if (!fs.existsSync(inputPath)) {
|
||
throw new Error(`Input file not found: ${inputPath}`);
|
||
}
|
||
|
||
const ext = path.extname(inputPath).toLowerCase();
|
||
const baseName = outputName || path.basename(inputPath, ext);
|
||
|
||
let outputFile;
|
||
|
||
if (showDialog) {
|
||
// Zeige nativen Dialog
|
||
outputFile = await showSaveDialog(baseName, format);
|
||
if (!outputFile) {
|
||
console.log("Speichervorgang abgebrochen.");
|
||
return null;
|
||
}
|
||
} else {
|
||
// Nutze Standard-Ausgabeverzeichnis
|
||
outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`);
|
||
}
|
||
|
||
let htmlContent = fs.readFileSync(inputPath, "utf8");
|
||
|
||
// Remove <think> tags if present
|
||
htmlContent = htmlContent.replace(/<think>[\s\S]*?<\/think>/gi, "");
|
||
|
||
switch (format.toLowerCase()) {
|
||
case "html":
|
||
fs.writeFileSync(outputFile, htmlContent, "utf8");
|
||
break;
|
||
case "pdf":
|
||
await this.htmlToPDF(htmlContent, outputFile);
|
||
break;
|
||
case "docx":
|
||
await this.htmlToDOCX(htmlContent, outputFile);
|
||
break;
|
||
case "txt":
|
||
fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), "utf8");
|
||
break;
|
||
default:
|
||
throw new Error(`Unsupported format: ${format}`);
|
||
}
|
||
|
||
console.log(`Erfolgreich gespeichert: ${outputFile}`);
|
||
return outputFile;
|
||
},
|
||
|
||
// HTML → PDF
|
||
async htmlToPDF(html, outputPath) {
|
||
let browser;
|
||
try {
|
||
browser = await puppeteer.launch({
|
||
headless: true,
|
||
args: ["--no-sandbox", "--disable-setuid-sandbox"],
|
||
});
|
||
|
||
const page = await browser.newPage();
|
||
await page.setContent(html, { waitUntil: "networkidle0" });
|
||
await page.pdf({
|
||
path: outputPath,
|
||
format: "A4",
|
||
printBackground: true,
|
||
margin: {
|
||
top: "20mm",
|
||
right: "20mm",
|
||
bottom: "20mm",
|
||
left: "20mm",
|
||
},
|
||
});
|
||
} finally {
|
||
if (browser) {
|
||
await browser.close();
|
||
}
|
||
}
|
||
},
|
||
|
||
// HTML → DOCX
|
||
async htmlToDOCX(html, outputPath) {
|
||
try {
|
||
// html‑to‑docx library converts HTML string into a Word .docx buffer
|
||
// Usage from html‑to‑docx docs:
|
||
// await HTMLtoDOCX(htmlString, headerHTMLString, documentOptions, footerHTMLString) [oai_citation:0‡GitHub](https://github.com/privateOmega/html-to-docx?utm_source=chatgpt.com)
|
||
const buffer = await htmlToDocx(html, null, {
|
||
table: { row: { cantSplit: true } },
|
||
});
|
||
fs.writeFileSync(outputPath, buffer);
|
||
} catch (err) {
|
||
throw new Error(`DOCX conversion failed: ${err.message}`);
|
||
}
|
||
},
|
||
|
||
// HTML → TXT
|
||
htmlToTXT(html) {
|
||
// A decent plain text conversion: strip tags and collapse whitespace
|
||
// If you want more advanced extraction consider using a library like `html-to-text` or `strip-html` [oai_citation:1‡GitHub](https://github.com/html-to-text/node-html-to-text?utm_source=chatgpt.com)
|
||
return (
|
||
html
|
||
// Remove all tags
|
||
.replace(/<[^>]+>/g, "")
|
||
// Convert multiple whitespace into single spaces
|
||
.replace(/\s+/g, " ")
|
||
.trim()
|
||
);
|
||
},
|
||
};
|
||
|
||
module.exports = module_exports;
|
||
|
||
// CLI usage mit Dialog
|
||
if (require.main === module) {
|
||
(async () => {
|
||
const args = process.argv.slice(2);
|
||
if (args.length < 1) {
|
||
console.log("Usage: node htmlDocumentConverter.js <input.html> [format]");
|
||
console.log("Formats: pdf (default), docx, html, txt");
|
||
console.log("");
|
||
console.log(
|
||
'Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.',
|
||
);
|
||
process.exit(1);
|
||
}
|
||
|
||
const inputPath = args[0];
|
||
const format = args[1] || "pdf";
|
||
|
||
try {
|
||
await module_exports.convert({
|
||
inputPath,
|
||
format,
|
||
showDialog: true,
|
||
});
|
||
} catch (err) {
|
||
console.error("Konvertierung fehlgeschlagen:", err.message);
|
||
process.exit(1);
|
||
}
|
||
})();
|
||
}
|