Refactor code formatting and improve error handling in htmlDocumentConverter

This commit is contained in:
MikeHughes-BIN
2026-01-18 17:28:41 +01:00
parent 1ed386fcf4
commit 18e791d56e
+76 -43
View File
@@ -1,9 +1,9 @@
const fs = require('fs'); const fs = require("fs");
const path = require('path'); const path = require("path");
const puppeteer = require('puppeteer'); const puppeteer = require("puppeteer");
const htmlToDocx = require('html-to-docx'); const htmlToDocx = require("html-to-docx");
const { execSync } = require('child_process'); const { execSync } = require("child_process");
const os = require('os'); const os = require("os");
const outputDir = path.join(__dirname, "../../../storage/documents"); const outputDir = path.join(__dirname, "../../../storage/documents");
@@ -14,7 +14,7 @@ if (!fs.existsSync(outputDir)) {
async function showSaveDialog(defaultName, format) { async function showSaveDialog(defaultName, format) {
const platform = os.platform(); const platform = os.platform();
if (platform === 'darwin') { if (platform === "darwin") {
// macOS // macOS
const applescript = ` const applescript = `
set defaultName to "${defaultName}.${format}" set defaultName to "${defaultName}.${format}"
@@ -23,13 +23,15 @@ async function showSaveDialog(defaultName, format) {
`; `;
try { try {
const result = execSync(`osascript -e '${applescript}'`, { encoding: 'utf8' }); const result = execSync(`osascript -e '${applescript}'`, {
encoding: "utf8",
});
return result.trim(); return result.trim();
} catch (err) { } catch (err) {
if (err.status === 1) return null; // User canceled if (err.status === 1) return null; // User canceled
throw err; throw err;
} }
} else if (platform === 'win32') { } else if (platform === "win32") {
const safeName = decodeURIComponent(defaultName); const safeName = decodeURIComponent(defaultName);
const powershell = ` const powershell = `
@@ -44,8 +46,8 @@ async function showSaveDialog(defaultName, format) {
try { try {
const result = execSync( const result = execSync(
`powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, ' ')}"`, `powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, " ")}"`,
{ encoding: 'utf8' } { encoding: "utf8" },
); );
return result.trim() || null; return result.trim() || null;
} catch (err) { } catch (err) {
@@ -57,19 +59,19 @@ async function showSaveDialog(defaultName, format) {
try { try {
const result = execSync( const result = execSync(
`zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`, `zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`,
{ encoding: 'utf8' } { encoding: "utf8" },
); );
return result.trim(); return result.trim();
} catch (err) { } catch (err) {
try { try {
const result = execSync( const result = execSync(
`kdialog --getsavefilename . "${defaultName}.${format}"`, `kdialog --getsavefilename . "${defaultName}.${format}"`,
{ encoding: 'utf8' } { encoding: "utf8" },
); );
return result.trim(); return result.trim();
} catch (err2) { } catch (err2) {
// Fallback // Fallback
return path.join(os.homedir(), 'Downloads', `${defaultName}.${format}`); return path.join(os.homedir(), "Downloads", `${defaultName}.${format}`);
} }
} }
} }
@@ -89,11 +91,10 @@ const module_exports = {
* @param {string} [options.outputName] - Optional output filename (without extension) * @param {string} [options.outputName] - Optional output filename (without extension)
* @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode) * @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode)
*/ */
async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) { async convert({ inputPath, format = "pdf", outputName, showDialog = false }) {
format = format.toLowerCase().replace(".", ""); // <-- FIX
format = format.toLowerCase().replace('.', ''); // <-- FIX if (!["pdf", "docx", "html", "txt"].includes(format)) {
if (!['pdf', 'docx', 'html', 'txt'].includes(format)) {
throw new Error(`Unsupported format: ${format}`); throw new Error(`Unsupported format: ${format}`);
} }
if (!fs.existsSync(inputPath)) { if (!fs.existsSync(inputPath)) {
@@ -109,7 +110,7 @@ async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) {
// Zeige nativen Dialog // Zeige nativen Dialog
outputFile = await showSaveDialog(baseName, format); outputFile = await showSaveDialog(baseName, format);
if (!outputFile) { if (!outputFile) {
console.log('Speichervorgang abgebrochen.'); console.log("Speichervorgang abgebrochen.");
return null; return null;
} }
} else { } else {
@@ -117,23 +118,23 @@ async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) {
outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`); outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`);
} }
let htmlContent = fs.readFileSync(inputPath, 'utf8'); let htmlContent = fs.readFileSync(inputPath, "utf8");
// Remove <think> tags if present // Remove <think> tags if present
htmlContent = htmlContent.replace(/<think>[\s\S]*?<\/think>/gi, ''); htmlContent = htmlContent.replace(/<think>[\s\S]*?<\/think>/gi, "");
switch (format.toLowerCase()) { switch (format.toLowerCase()) {
case 'html': case "html":
fs.writeFileSync(outputFile, htmlContent, 'utf8'); fs.writeFileSync(outputFile, htmlContent, "utf8");
break; break;
case 'pdf': case "pdf":
await this.htmlToPDF(htmlContent, outputFile); await this.htmlToPDF(htmlContent, outputFile);
break; break;
case 'docx': case "docx":
await this.htmlToDOCX(htmlContent, outputFile); await this.htmlToDOCX(htmlContent, outputFile);
break; break;
case 'txt': case "txt":
fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), 'utf8'); fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), "utf8");
break; break;
default: default:
throw new Error(`Unsupported format: ${format}`); throw new Error(`Unsupported format: ${format}`);
@@ -145,31 +146,61 @@ async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) {
// HTML → PDF // HTML → PDF
async htmlToPDF(html, outputPath) { async htmlToPDF(html, outputPath) {
const browser = await puppeteer.launch({ let browser;
try {
browser = await puppeteer.launch({
headless: true, headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'] args: ["--no-sandbox", "--disable-setuid-sandbox"],
}); });
const page = await browser.newPage(); const page = await browser.newPage();
await page.setContent(html, { waitUntil: 'networkidle0' }); await page.setContent(html, { waitUntil: "networkidle0" });
await page.pdf({ await page.pdf({
path: outputPath, path: outputPath,
format: 'A4', format: "A4",
printBackground: true, printBackground: true,
margin: { top: '20mm', right: '20mm', bottom: '20mm', left: '20mm' } margin: {
top: "20mm",
right: "20mm",
bottom: "20mm",
left: "20mm",
},
}); });
} finally {
if (browser) {
await browser.close(); await browser.close();
}
}
}, },
// HTML → DOCX // HTML → DOCX
async htmlToDOCX(html, outputPath) { async htmlToDOCX(html, outputPath) {
const buffer = await htmlToDocx(html); try {
// htmltodocx library converts HTML string into a Word .docx buffer
// Usage from htmltodocx docs:
// await HTMLtoDOCX(htmlString, headerHTMLString, documentOptions, footerHTMLString) [oai_citation:0‡GitHub](https://github.com/privateOmega/html-to-docx?utm_source=chatgpt.com)
const buffer = await htmlToDocx(html, null, {
table: { row: { cantSplit: true } },
});
fs.writeFileSync(outputPath, buffer); fs.writeFileSync(outputPath, buffer);
} catch (err) {
throw new Error(`DOCX conversion failed: ${err.message}`);
}
}, },
// HTML → TXT (rudimentär) // HTML → TXT
htmlToTXT(html) { htmlToTXT(html) {
return html.replace(/<[^>]*>/g, '').replace(/\s+\n/g, '\n').trim(); // A decent plain text conversion: strip tags and collapse whitespace
} // If you want more advanced extraction consider using a library like `html-to-text` or `strip-html` [oai_citation:1‡GitHub](https://github.com/html-to-text/node-html-to-text?utm_source=chatgpt.com)
return (
html
// Remove all tags
.replace(/<[^>]+>/g, "")
// Convert multiple whitespace into single spaces
.replace(/\s+/g, " ")
.trim()
);
},
}; };
module.exports = module_exports; module.exports = module_exports;
@@ -179,24 +210,26 @@ if (require.main === module) {
(async () => { (async () => {
const args = process.argv.slice(2); const args = process.argv.slice(2);
if (args.length < 1) { if (args.length < 1) {
console.log('Usage: node htmlDocumentConverter.js <input.html> [format]'); console.log("Usage: node htmlDocumentConverter.js <input.html> [format]");
console.log('Formats: pdf (default), docx, html, txt'); console.log("Formats: pdf (default), docx, html, txt");
console.log(''); console.log("");
console.log('Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.'); console.log(
'Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.',
);
process.exit(1); process.exit(1);
} }
const inputPath = args[0]; const inputPath = args[0];
const format = args[1] || 'pdf'; const format = args[1] || "pdf";
try { try {
await module_exports.convert({ await module_exports.convert({
inputPath, inputPath,
format, format,
showDialog: true showDialog: true,
}); });
} catch (err) { } catch (err) {
console.error('Konvertierung fehlgeschlagen:', err.message); console.error("Konvertierung fehlgeschlagen:", err.message);
process.exit(1); process.exit(1);
} }
})(); })();