const fs = require("fs"); const path = require("path"); const puppeteer = require("puppeteer"); const htmlToDocx = require("html-to-docx"); const { execSync } = require("child_process"); const os = require("os"); const outputDir = path.join(__dirname, "../../../storage/documents"); if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } async function showSaveDialog(defaultName, format) { const platform = os.platform(); if (platform === "darwin") { // macOS const applescript = ` set defaultName to "${defaultName}.${format}" set theFile to choose file name with prompt "Dokument speichern als:" default name defaultName POSIX path of theFile `; try { const result = execSync(`osascript -e '${applescript}'`, { encoding: "utf8", }); return result.trim(); } catch (err) { if (err.status === 1) return null; // User canceled throw err; } } else if (platform === "win32") { const safeName = decodeURIComponent(defaultName); const powershell = ` Add-Type -AssemblyName System.Windows.Forms; $dialog = New-Object System.Windows.Forms.SaveFileDialog; $dialog.FileName = '${safeName}.${format}'; $dialog.Filter = '${format.toUpperCase()} Dateien (*.${format})|*.${format}|Alle Dateien (*.*)|*.*'; $dialog.Title = 'Dokument speichern als'; $result = $dialog.ShowDialog(); if ($result -eq 'OK') { $dialog.FileName } `; try { const result = execSync( `powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, " ")}"`, { encoding: "utf8" }, ); return result.trim() || null; } catch (err) { if (err.status === 1) return null; // User cancelled throw new Error("Save dialog failed: " + err.message); } } else { // Linux - zenity oder kdialog try { const result = execSync( `zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`, { encoding: "utf8" }, ); return result.trim(); } catch (err) { try { const result = execSync( `kdialog --getsavefilename . "${defaultName}.${format}"`, { encoding: "utf8" }, ); return result.trim(); } catch (err2) { // Fallback return path.join(os.homedir(), "Downloads", `${defaultName}.${format}`); } } } } const module_exports = { name: "htmlDocumentConverter", type: "converter", displayname: "HTML Document Converter", description: "Converts LLM-generated HTML to PDF, DOCX, TXT, or HTML", /** * Main conversion function * @param {Object} options * @param {string} options.inputPath - Path to the HTML input * @param {string} options.format - 'pdf' | 'docx' | 'html' | 'txt' * @param {string} [options.outputName] - Optional output filename (without extension) * @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode) */ async convert({ inputPath, format = "pdf", outputName, showDialog = false }) { format = format.toLowerCase().replace(".", ""); // <-- FIX if (!["pdf", "docx", "html", "txt"].includes(format)) { throw new Error(`Unsupported format: ${format}`); } if (!fs.existsSync(inputPath)) { throw new Error(`Input file not found: ${inputPath}`); } const ext = path.extname(inputPath).toLowerCase(); const baseName = outputName || path.basename(inputPath, ext); let outputFile; if (showDialog) { // Zeige nativen Dialog outputFile = await showSaveDialog(baseName, format); if (!outputFile) { console.log("Speichervorgang abgebrochen."); return null; } } else { // Nutze Standard-Ausgabeverzeichnis outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`); } let htmlContent = fs.readFileSync(inputPath, "utf8"); // Remove tags if present htmlContent = htmlContent.replace(/[\s\S]*?<\/think>/gi, ""); switch (format.toLowerCase()) { case "html": fs.writeFileSync(outputFile, htmlContent, "utf8"); break; case "pdf": await this.htmlToPDF(htmlContent, outputFile); break; case "docx": await this.htmlToDOCX(htmlContent, outputFile); break; case "txt": fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), "utf8"); break; default: throw new Error(`Unsupported format: ${format}`); } console.log(`Erfolgreich gespeichert: ${outputFile}`); return outputFile; }, // HTML → PDF async htmlToPDF(html, outputPath) { let browser; try { browser = await puppeteer.launch({ headless: true, args: ["--no-sandbox", "--disable-setuid-sandbox"], }); const page = await browser.newPage(); await page.setContent(html, { waitUntil: "networkidle0" }); await page.pdf({ path: outputPath, format: "A4", printBackground: true, margin: { top: "20mm", right: "20mm", bottom: "20mm", left: "20mm", }, }); } finally { if (browser) { await browser.close(); } } }, // HTML → DOCX async htmlToDOCX(html, outputPath) { try { // html‑to‑docx library converts HTML string into a Word .docx buffer // Usage from html‑to‑docx docs: // await HTMLtoDOCX(htmlString, headerHTMLString, documentOptions, footerHTMLString) [oai_citation:0‡GitHub](https://github.com/privateOmega/html-to-docx?utm_source=chatgpt.com) const buffer = await htmlToDocx(html, null, { table: { row: { cantSplit: true } }, }); fs.writeFileSync(outputPath, buffer); } catch (err) { throw new Error(`DOCX conversion failed: ${err.message}`); } }, // HTML → TXT htmlToTXT(html) { // A decent plain text conversion: strip tags and collapse whitespace // If you want more advanced extraction consider using a library like `html-to-text` or `strip-html` [oai_citation:1‡GitHub](https://github.com/html-to-text/node-html-to-text?utm_source=chatgpt.com) return ( html // Remove all tags .replace(/<[^>]+>/g, "") // Convert multiple whitespace into single spaces .replace(/\s+/g, " ") .trim() ); }, }; module.exports = module_exports; // CLI usage mit Dialog if (require.main === module) { (async () => { const args = process.argv.slice(2); if (args.length < 1) { console.log("Usage: node htmlDocumentConverter.js [format]"); console.log("Formats: pdf (default), docx, html, txt"); console.log(""); console.log( 'Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.', ); process.exit(1); } const inputPath = args[0]; const format = args[1] || "pdf"; try { await module_exports.convert({ inputPath, format, showDialog: true, }); } catch (err) { console.error("Konvertierung fehlgeschlagen:", err.message); process.exit(1); } })(); }