Refactor code formatting and improve error handling in htmlDocumentConverter

This commit is contained in:
MikeHughes-BIN
2026-01-18 17:28:41 +01:00
parent 1ed386fcf4
commit 18e791d56e
+188 -155
View File
@@ -1,35 +1,37 @@
const fs = require('fs'); const fs = require("fs");
const path = require('path'); const path = require("path");
const puppeteer = require('puppeteer'); const puppeteer = require("puppeteer");
const htmlToDocx = require('html-to-docx'); const htmlToDocx = require("html-to-docx");
const { execSync } = require('child_process'); const { execSync } = require("child_process");
const os = require('os'); const os = require("os");
const outputDir = path.join(__dirname, "../../../storage/documents"); const outputDir = path.join(__dirname, "../../../storage/documents");
if (!fs.existsSync(outputDir)) { if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true }); fs.mkdirSync(outputDir, { recursive: true });
} }
async function showSaveDialog(defaultName, format) { async function showSaveDialog(defaultName, format) {
const platform = os.platform(); const platform = os.platform();
if (platform === 'darwin') { if (platform === "darwin") {
// macOS // macOS
const applescript = ` const applescript = `
set defaultName to "${defaultName}.${format}" set defaultName to "${defaultName}.${format}"
set theFile to choose file name with prompt "Dokument speichern als:" default name defaultName set theFile to choose file name with prompt "Dokument speichern als:" default name defaultName
POSIX path of theFile POSIX path of theFile
`; `;
try { try {
const result = execSync(`osascript -e '${applescript}'`, { encoding: 'utf8' }); const result = execSync(`osascript -e '${applescript}'`, {
return result.trim(); encoding: "utf8",
} catch (err) { });
if (err.status === 1) return null; // User canceled return result.trim();
throw err; } catch (err) {
} if (err.status === 1) return null; // User canceled
} else if (platform === 'win32') { throw err;
}
} else if (platform === "win32") {
const safeName = decodeURIComponent(defaultName); const safeName = decodeURIComponent(defaultName);
const powershell = ` const powershell = `
@@ -43,161 +45,192 @@ async function showSaveDialog(defaultName, format) {
`; `;
try { try {
const result = execSync( const result = execSync(
`powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, ' ')}"`, `powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, " ")}"`,
{ encoding: 'utf8' } { encoding: "utf8" },
); );
return result.trim() || null; return result.trim() || null;
} catch (err) { } catch (err) {
if (err.status === 1) return null; // User cancelled if (err.status === 1) return null; // User cancelled
throw new Error("Save dialog failed: " + err.message); throw new Error("Save dialog failed: " + err.message);
} }
} else { } else {
// Linux - zenity oder kdialog // Linux - zenity oder kdialog
try { try {
const result = execSync( const result = execSync(
`zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`, `zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`,
{ encoding: 'utf8' } { encoding: "utf8" },
); );
return result.trim(); return result.trim();
} catch (err) { } catch (err) {
try { try {
const result = execSync( const result = execSync(
`kdialog --getsavefilename . "${defaultName}.${format}"`, `kdialog --getsavefilename . "${defaultName}.${format}"`,
{ encoding: 'utf8' } { encoding: "utf8" },
); );
return result.trim(); return result.trim();
} catch (err2) { } catch (err2) {
// Fallback // Fallback
return path.join(os.homedir(), 'Downloads', `${defaultName}.${format}`); return path.join(os.homedir(), "Downloads", `${defaultName}.${format}`);
} }
}
} }
}
} }
const module_exports = { const module_exports = {
name: "htmlDocumentConverter", name: "htmlDocumentConverter",
type: "converter", type: "converter",
displayname: "HTML Document Converter", displayname: "HTML Document Converter",
description: "Converts LLM-generated HTML to PDF, DOCX, TXT, or HTML", description: "Converts LLM-generated HTML to PDF, DOCX, TXT, or HTML",
/** /**
* Main conversion function * Main conversion function
* @param {Object} options * @param {Object} options
* @param {string} options.inputPath - Path to the HTML input * @param {string} options.inputPath - Path to the HTML input
* @param {string} options.format - 'pdf' | 'docx' | 'html' | 'txt' * @param {string} options.format - 'pdf' | 'docx' | 'html' | 'txt'
* @param {string} [options.outputName] - Optional output filename (without extension) * @param {string} [options.outputName] - Optional output filename (without extension)
* @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode) * @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode)
*/ */
async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) { async convert({ inputPath, format = "pdf", outputName, showDialog = false }) {
format = format.toLowerCase().replace(".", ""); // <-- FIX
format = format.toLowerCase().replace('.', ''); // <-- FIX if (!["pdf", "docx", "html", "txt"].includes(format)) {
throw new Error(`Unsupported format: ${format}`);
}
if (!fs.existsSync(inputPath)) {
throw new Error(`Input file not found: ${inputPath}`);
}
if (!['pdf', 'docx', 'html', 'txt'].includes(format)) { const ext = path.extname(inputPath).toLowerCase();
const baseName = outputName || path.basename(inputPath, ext);
let outputFile;
if (showDialog) {
// Zeige nativen Dialog
outputFile = await showSaveDialog(baseName, format);
if (!outputFile) {
console.log("Speichervorgang abgebrochen.");
return null;
}
} else {
// Nutze Standard-Ausgabeverzeichnis
outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`);
}
let htmlContent = fs.readFileSync(inputPath, "utf8");
// Remove <think> tags if present
htmlContent = htmlContent.replace(/<think>[\s\S]*?<\/think>/gi, "");
switch (format.toLowerCase()) {
case "html":
fs.writeFileSync(outputFile, htmlContent, "utf8");
break;
case "pdf":
await this.htmlToPDF(htmlContent, outputFile);
break;
case "docx":
await this.htmlToDOCX(htmlContent, outputFile);
break;
case "txt":
fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), "utf8");
break;
default:
throw new Error(`Unsupported format: ${format}`); throw new Error(`Unsupported format: ${format}`);
} }
if (!fs.existsSync(inputPath)) {
throw new Error(`Input file not found: ${inputPath}`);
}
const ext = path.extname(inputPath).toLowerCase(); console.log(`Erfolgreich gespeichert: ${outputFile}`);
const baseName = outputName || path.basename(inputPath, ext); return outputFile;
},
let outputFile;
if (showDialog) {
// Zeige nativen Dialog
outputFile = await showSaveDialog(baseName, format);
if (!outputFile) {
console.log('Speichervorgang abgebrochen.');
return null;
}
} else {
// Nutze Standard-Ausgabeverzeichnis
outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`);
}
let htmlContent = fs.readFileSync(inputPath, 'utf8'); // HTML → PDF
async htmlToPDF(html, outputPath) {
let browser;
try {
browser = await puppeteer.launch({
headless: true,
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
// Remove <think> tags if present const page = await browser.newPage();
htmlContent = htmlContent.replace(/<think>[\s\S]*?<\/think>/gi, ''); await page.setContent(html, { waitUntil: "networkidle0" });
await page.pdf({
switch (format.toLowerCase()) { path: outputPath,
case 'html': format: "A4",
fs.writeFileSync(outputFile, htmlContent, 'utf8'); printBackground: true,
break; margin: {
case 'pdf': top: "20mm",
await this.htmlToPDF(htmlContent, outputFile); right: "20mm",
break; bottom: "20mm",
case 'docx': left: "20mm",
await this.htmlToDOCX(htmlContent, outputFile); },
break; });
case 'txt': } finally {
fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), 'utf8'); if (browser) {
break;
default:
throw new Error(`Unsupported format: ${format}`);
}
console.log(`Erfolgreich gespeichert: ${outputFile}`);
return outputFile;
},
// HTML → PDF
async htmlToPDF(html, outputPath) {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.setContent(html, { waitUntil: 'networkidle0' });
await page.pdf({
path: outputPath,
format: 'A4',
printBackground: true,
margin: { top: '20mm', right: '20mm', bottom: '20mm', left: '20mm' }
});
await browser.close(); await browser.close();
}, }
// HTML → DOCX
async htmlToDOCX(html, outputPath) {
const buffer = await htmlToDocx(html);
fs.writeFileSync(outputPath, buffer);
},
// HTML → TXT (rudimentär)
htmlToTXT(html) {
return html.replace(/<[^>]*>/g, '').replace(/\s+\n/g, '\n').trim();
} }
},
// HTML → DOCX
async htmlToDOCX(html, outputPath) {
try {
// htmltodocx library converts HTML string into a Word .docx buffer
// Usage from htmltodocx docs:
// await HTMLtoDOCX(htmlString, headerHTMLString, documentOptions, footerHTMLString) [oai_citation:0‡GitHub](https://github.com/privateOmega/html-to-docx?utm_source=chatgpt.com)
const buffer = await htmlToDocx(html, null, {
table: { row: { cantSplit: true } },
});
fs.writeFileSync(outputPath, buffer);
} catch (err) {
throw new Error(`DOCX conversion failed: ${err.message}`);
}
},
// HTML → TXT
htmlToTXT(html) {
// A decent plain text conversion: strip tags and collapse whitespace
// If you want more advanced extraction consider using a library like `html-to-text` or `strip-html` [oai_citation:1‡GitHub](https://github.com/html-to-text/node-html-to-text?utm_source=chatgpt.com)
return (
html
// Remove all tags
.replace(/<[^>]+>/g, "")
// Convert multiple whitespace into single spaces
.replace(/\s+/g, " ")
.trim()
);
},
}; };
module.exports = module_exports; module.exports = module_exports;
// CLI usage mit Dialog // CLI usage mit Dialog
if (require.main === module) { if (require.main === module) {
(async () => { (async () => {
const args = process.argv.slice(2); const args = process.argv.slice(2);
if (args.length < 1) { if (args.length < 1) {
console.log('Usage: node htmlDocumentConverter.js <input.html> [format]'); console.log("Usage: node htmlDocumentConverter.js <input.html> [format]");
console.log('Formats: pdf (default), docx, html, txt'); console.log("Formats: pdf (default), docx, html, txt");
console.log(''); console.log("");
console.log('Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.'); console.log(
process.exit(1); 'Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.',
} );
process.exit(1);
}
const inputPath = args[0]; const inputPath = args[0];
const format = args[1] || 'pdf'; const format = args[1] || "pdf";
try { try {
await module_exports.convert({ await module_exports.convert({
inputPath, inputPath,
format, format,
showDialog: true showDialog: true,
}); });
} catch (err) { } catch (err) {
console.error('Konvertierung fehlgeschlagen:', err.message); console.error("Konvertierung fehlgeschlagen:", err.message);
process.exit(1); process.exit(1);
} }
})(); })();
} }