Refactor code formatting and improve error handling in htmlDocumentConverter

This commit is contained in:
MikeHughes-BIN
2026-01-18 17:28:41 +01:00
parent 1ed386fcf4
commit 18e791d56e
+76 -43
View File
@@ -1,9 +1,9 @@
const fs = require('fs');
const path = require('path');
const puppeteer = require('puppeteer');
const htmlToDocx = require('html-to-docx');
const { execSync } = require('child_process');
const os = require('os');
const fs = require("fs");
const path = require("path");
const puppeteer = require("puppeteer");
const htmlToDocx = require("html-to-docx");
const { execSync } = require("child_process");
const os = require("os");
const outputDir = path.join(__dirname, "../../../storage/documents");
@@ -14,7 +14,7 @@ if (!fs.existsSync(outputDir)) {
async function showSaveDialog(defaultName, format) {
const platform = os.platform();
if (platform === 'darwin') {
if (platform === "darwin") {
// macOS
const applescript = `
set defaultName to "${defaultName}.${format}"
@@ -23,13 +23,15 @@ async function showSaveDialog(defaultName, format) {
`;
try {
const result = execSync(`osascript -e '${applescript}'`, { encoding: 'utf8' });
const result = execSync(`osascript -e '${applescript}'`, {
encoding: "utf8",
});
return result.trim();
} catch (err) {
if (err.status === 1) return null; // User canceled
throw err;
}
} else if (platform === 'win32') {
} else if (platform === "win32") {
const safeName = decodeURIComponent(defaultName);
const powershell = `
@@ -44,8 +46,8 @@ async function showSaveDialog(defaultName, format) {
try {
const result = execSync(
`powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, ' ')}"`,
{ encoding: 'utf8' }
`powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, " ")}"`,
{ encoding: "utf8" },
);
return result.trim() || null;
} catch (err) {
@@ -57,19 +59,19 @@ async function showSaveDialog(defaultName, format) {
try {
const result = execSync(
`zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`,
{ encoding: 'utf8' }
{ encoding: "utf8" },
);
return result.trim();
} catch (err) {
try {
const result = execSync(
`kdialog --getsavefilename . "${defaultName}.${format}"`,
{ encoding: 'utf8' }
{ encoding: "utf8" },
);
return result.trim();
} catch (err2) {
// Fallback
return path.join(os.homedir(), 'Downloads', `${defaultName}.${format}`);
return path.join(os.homedir(), "Downloads", `${defaultName}.${format}`);
}
}
}
@@ -89,11 +91,10 @@ const module_exports = {
* @param {string} [options.outputName] - Optional output filename (without extension)
* @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode)
*/
async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) {
async convert({ inputPath, format = "pdf", outputName, showDialog = false }) {
format = format.toLowerCase().replace(".", ""); // <-- FIX
format = format.toLowerCase().replace('.', ''); // <-- FIX
if (!['pdf', 'docx', 'html', 'txt'].includes(format)) {
if (!["pdf", "docx", "html", "txt"].includes(format)) {
throw new Error(`Unsupported format: ${format}`);
}
if (!fs.existsSync(inputPath)) {
@@ -109,7 +110,7 @@ async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) {
// Zeige nativen Dialog
outputFile = await showSaveDialog(baseName, format);
if (!outputFile) {
console.log('Speichervorgang abgebrochen.');
console.log("Speichervorgang abgebrochen.");
return null;
}
} else {
@@ -117,23 +118,23 @@ async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) {
outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`);
}
let htmlContent = fs.readFileSync(inputPath, 'utf8');
let htmlContent = fs.readFileSync(inputPath, "utf8");
// Remove <think> tags if present
htmlContent = htmlContent.replace(/<think>[\s\S]*?<\/think>/gi, '');
htmlContent = htmlContent.replace(/<think>[\s\S]*?<\/think>/gi, "");
switch (format.toLowerCase()) {
case 'html':
fs.writeFileSync(outputFile, htmlContent, 'utf8');
case "html":
fs.writeFileSync(outputFile, htmlContent, "utf8");
break;
case 'pdf':
case "pdf":
await this.htmlToPDF(htmlContent, outputFile);
break;
case 'docx':
case "docx":
await this.htmlToDOCX(htmlContent, outputFile);
break;
case 'txt':
fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), 'utf8');
case "txt":
fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), "utf8");
break;
default:
throw new Error(`Unsupported format: ${format}`);
@@ -145,31 +146,61 @@ async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) {
// HTML → PDF
async htmlToPDF(html, outputPath) {
const browser = await puppeteer.launch({
let browser;
try {
browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
await page.setContent(html, { waitUntil: 'networkidle0' });
await page.setContent(html, { waitUntil: "networkidle0" });
await page.pdf({
path: outputPath,
format: 'A4',
format: "A4",
printBackground: true,
margin: { top: '20mm', right: '20mm', bottom: '20mm', left: '20mm' }
margin: {
top: "20mm",
right: "20mm",
bottom: "20mm",
left: "20mm",
},
});
} finally {
if (browser) {
await browser.close();
}
}
},
// HTML → DOCX
async htmlToDOCX(html, outputPath) {
const buffer = await htmlToDocx(html);
try {
// htmltodocx library converts HTML string into a Word .docx buffer
// Usage from htmltodocx docs:
// await HTMLtoDOCX(htmlString, headerHTMLString, documentOptions, footerHTMLString) [oai_citation:0‡GitHub](https://github.com/privateOmega/html-to-docx?utm_source=chatgpt.com)
const buffer = await htmlToDocx(html, null, {
table: { row: { cantSplit: true } },
});
fs.writeFileSync(outputPath, buffer);
} catch (err) {
throw new Error(`DOCX conversion failed: ${err.message}`);
}
},
// HTML → TXT (rudimentär)
// HTML → TXT
htmlToTXT(html) {
return html.replace(/<[^>]*>/g, '').replace(/\s+\n/g, '\n').trim();
}
// A decent plain text conversion: strip tags and collapse whitespace
// If you want more advanced extraction consider using a library like `html-to-text` or `strip-html` [oai_citation:1‡GitHub](https://github.com/html-to-text/node-html-to-text?utm_source=chatgpt.com)
return (
html
// Remove all tags
.replace(/<[^>]+>/g, "")
// Convert multiple whitespace into single spaces
.replace(/\s+/g, " ")
.trim()
);
},
};
module.exports = module_exports;
@@ -179,24 +210,26 @@ if (require.main === module) {
(async () => {
const args = process.argv.slice(2);
if (args.length < 1) {
console.log('Usage: node htmlDocumentConverter.js <input.html> [format]');
console.log('Formats: pdf (default), docx, html, txt');
console.log('');
console.log('Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.');
console.log("Usage: node htmlDocumentConverter.js <input.html> [format]");
console.log("Formats: pdf (default), docx, html, txt");
console.log("");
console.log(
'Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.',
);
process.exit(1);
}
const inputPath = args[0];
const format = args[1] || 'pdf';
const format = args[1] || "pdf";
try {
await module_exports.convert({
inputPath,
format,
showDialog: true
showDialog: true,
});
} catch (err) {
console.error('Konvertierung fehlgeschlagen:', err.message);
console.error("Konvertierung fehlgeschlagen:", err.message);
process.exit(1);
}
})();