diff --git a/services/modules/convert/convert.js b/services/modules/convert/convert.js index 508c24f..40045eb 100644 --- a/services/modules/convert/convert.js +++ b/services/modules/convert/convert.js @@ -1,35 +1,37 @@ -const fs = require('fs'); -const path = require('path'); -const puppeteer = require('puppeteer'); -const htmlToDocx = require('html-to-docx'); -const { execSync } = require('child_process'); -const os = require('os'); +const fs = require("fs"); +const path = require("path"); +const puppeteer = require("puppeteer"); +const htmlToDocx = require("html-to-docx"); +const { execSync } = require("child_process"); +const os = require("os"); const outputDir = path.join(__dirname, "../../../storage/documents"); if (!fs.existsSync(outputDir)) { - fs.mkdirSync(outputDir, { recursive: true }); + fs.mkdirSync(outputDir, { recursive: true }); } async function showSaveDialog(defaultName, format) { - const platform = os.platform(); - - if (platform === 'darwin') { - // macOS - const applescript = ` + const platform = os.platform(); + + if (platform === "darwin") { + // macOS + const applescript = ` set defaultName to "${defaultName}.${format}" set theFile to choose file name with prompt "Dokument speichern als:" default name defaultName POSIX path of theFile `; - - try { - const result = execSync(`osascript -e '${applescript}'`, { encoding: 'utf8' }); - return result.trim(); - } catch (err) { - if (err.status === 1) return null; // User canceled - throw err; - } - } else if (platform === 'win32') { + + try { + const result = execSync(`osascript -e '${applescript}'`, { + encoding: "utf8", + }); + return result.trim(); + } catch (err) { + if (err.status === 1) return null; // User canceled + throw err; + } + } else if (platform === "win32") { const safeName = decodeURIComponent(defaultName); const powershell = ` @@ -43,161 +45,192 @@ async function showSaveDialog(defaultName, format) { `; try { - const result = execSync( - `powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, ' ')}"`, - { encoding: 'utf8' } - ); - return result.trim() || null; + const result = execSync( + `powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, " ")}"`, + { encoding: "utf8" }, + ); + return result.trim() || null; } catch (err) { - if (err.status === 1) return null; // User cancelled - throw new Error("Save dialog failed: " + err.message); + if (err.status === 1) return null; // User cancelled + throw new Error("Save dialog failed: " + err.message); } - } else { - // Linux - zenity oder kdialog - try { - const result = execSync( - `zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`, - { encoding: 'utf8' } - ); - return result.trim(); - } catch (err) { - try { - const result = execSync( - `kdialog --getsavefilename . "${defaultName}.${format}"`, - { encoding: 'utf8' } - ); - return result.trim(); - } catch (err2) { - // Fallback - return path.join(os.homedir(), 'Downloads', `${defaultName}.${format}`); - } - } + } else { + // Linux - zenity oder kdialog + try { + const result = execSync( + `zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`, + { encoding: "utf8" }, + ); + return result.trim(); + } catch (err) { + try { + const result = execSync( + `kdialog --getsavefilename . "${defaultName}.${format}"`, + { encoding: "utf8" }, + ); + return result.trim(); + } catch (err2) { + // Fallback + return path.join(os.homedir(), "Downloads", `${defaultName}.${format}`); + } } + } } const module_exports = { - name: "htmlDocumentConverter", - type: "converter", - displayname: "HTML Document Converter", - description: "Converts LLM-generated HTML to PDF, DOCX, TXT, or HTML", + name: "htmlDocumentConverter", + type: "converter", + displayname: "HTML Document Converter", + description: "Converts LLM-generated HTML to PDF, DOCX, TXT, or HTML", - /** - * Main conversion function - * @param {Object} options - * @param {string} options.inputPath - Path to the HTML input - * @param {string} options.format - 'pdf' | 'docx' | 'html' | 'txt' - * @param {string} [options.outputName] - Optional output filename (without extension) - * @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode) - */ -async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) { + /** + * Main conversion function + * @param {Object} options + * @param {string} options.inputPath - Path to the HTML input + * @param {string} options.format - 'pdf' | 'docx' | 'html' | 'txt' + * @param {string} [options.outputName] - Optional output filename (without extension) + * @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode) + */ + async convert({ inputPath, format = "pdf", outputName, showDialog = false }) { + format = format.toLowerCase().replace(".", ""); // <-- FIX - format = format.toLowerCase().replace('.', ''); // <-- FIX + if (!["pdf", "docx", "html", "txt"].includes(format)) { + throw new Error(`Unsupported format: ${format}`); + } + if (!fs.existsSync(inputPath)) { + throw new Error(`Input file not found: ${inputPath}`); + } - if (!['pdf', 'docx', 'html', 'txt'].includes(format)) { + const ext = path.extname(inputPath).toLowerCase(); + const baseName = outputName || path.basename(inputPath, ext); + + let outputFile; + + if (showDialog) { + // Zeige nativen Dialog + outputFile = await showSaveDialog(baseName, format); + if (!outputFile) { + console.log("Speichervorgang abgebrochen."); + return null; + } + } else { + // Nutze Standard-Ausgabeverzeichnis + outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`); + } + + let htmlContent = fs.readFileSync(inputPath, "utf8"); + + // Remove tags if present + htmlContent = htmlContent.replace(/[\s\S]*?<\/think>/gi, ""); + + switch (format.toLowerCase()) { + case "html": + fs.writeFileSync(outputFile, htmlContent, "utf8"); + break; + case "pdf": + await this.htmlToPDF(htmlContent, outputFile); + break; + case "docx": + await this.htmlToDOCX(htmlContent, outputFile); + break; + case "txt": + fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), "utf8"); + break; + default: throw new Error(`Unsupported format: ${format}`); } - if (!fs.existsSync(inputPath)) { - throw new Error(`Input file not found: ${inputPath}`); - } - const ext = path.extname(inputPath).toLowerCase(); - const baseName = outputName || path.basename(inputPath, ext); - - let outputFile; - - if (showDialog) { - // Zeige nativen Dialog - outputFile = await showSaveDialog(baseName, format); - if (!outputFile) { - console.log('Speichervorgang abgebrochen.'); - return null; - } - } else { - // Nutze Standard-Ausgabeverzeichnis - outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`); - } + console.log(`Erfolgreich gespeichert: ${outputFile}`); + return outputFile; + }, - let htmlContent = fs.readFileSync(inputPath, 'utf8'); + // HTML → PDF + async htmlToPDF(html, outputPath) { + let browser; + try { + browser = await puppeteer.launch({ + headless: true, + args: ["--no-sandbox", "--disable-setuid-sandbox"], + }); - // Remove tags if present - htmlContent = htmlContent.replace(/[\s\S]*?<\/think>/gi, ''); - - switch (format.toLowerCase()) { - case 'html': - fs.writeFileSync(outputFile, htmlContent, 'utf8'); - break; - case 'pdf': - await this.htmlToPDF(htmlContent, outputFile); - break; - case 'docx': - await this.htmlToDOCX(htmlContent, outputFile); - break; - case 'txt': - fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), 'utf8'); - break; - default: - throw new Error(`Unsupported format: ${format}`); - } - - console.log(`Erfolgreich gespeichert: ${outputFile}`); - return outputFile; - }, - - // HTML → PDF - async htmlToPDF(html, outputPath) { - const browser = await puppeteer.launch({ - headless: true, - args: ['--no-sandbox', '--disable-setuid-sandbox'] - }); - const page = await browser.newPage(); - await page.setContent(html, { waitUntil: 'networkidle0' }); - await page.pdf({ - path: outputPath, - format: 'A4', - printBackground: true, - margin: { top: '20mm', right: '20mm', bottom: '20mm', left: '20mm' } - }); + const page = await browser.newPage(); + await page.setContent(html, { waitUntil: "networkidle0" }); + await page.pdf({ + path: outputPath, + format: "A4", + printBackground: true, + margin: { + top: "20mm", + right: "20mm", + bottom: "20mm", + left: "20mm", + }, + }); + } finally { + if (browser) { await browser.close(); - }, - - // HTML → DOCX - async htmlToDOCX(html, outputPath) { - const buffer = await htmlToDocx(html); - fs.writeFileSync(outputPath, buffer); - }, - - // HTML → TXT (rudimentär) - htmlToTXT(html) { - return html.replace(/<[^>]*>/g, '').replace(/\s+\n/g, '\n').trim(); + } } + }, + + // HTML → DOCX + async htmlToDOCX(html, outputPath) { + try { + // html‑to‑docx library converts HTML string into a Word .docx buffer + // Usage from html‑to‑docx docs: + // await HTMLtoDOCX(htmlString, headerHTMLString, documentOptions, footerHTMLString) [oai_citation:0‡GitHub](https://github.com/privateOmega/html-to-docx?utm_source=chatgpt.com) + const buffer = await htmlToDocx(html, null, { + table: { row: { cantSplit: true } }, + }); + fs.writeFileSync(outputPath, buffer); + } catch (err) { + throw new Error(`DOCX conversion failed: ${err.message}`); + } + }, + + // HTML → TXT + htmlToTXT(html) { + // A decent plain text conversion: strip tags and collapse whitespace + // If you want more advanced extraction consider using a library like `html-to-text` or `strip-html` [oai_citation:1‡GitHub](https://github.com/html-to-text/node-html-to-text?utm_source=chatgpt.com) + return ( + html + // Remove all tags + .replace(/<[^>]+>/g, "") + // Convert multiple whitespace into single spaces + .replace(/\s+/g, " ") + .trim() + ); + }, }; module.exports = module_exports; // CLI usage mit Dialog if (require.main === module) { - (async () => { - const args = process.argv.slice(2); - if (args.length < 1) { - console.log('Usage: node htmlDocumentConverter.js [format]'); - console.log('Formats: pdf (default), docx, html, txt'); - console.log(''); - console.log('Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.'); - process.exit(1); - } + (async () => { + const args = process.argv.slice(2); + if (args.length < 1) { + console.log("Usage: node htmlDocumentConverter.js [format]"); + console.log("Formats: pdf (default), docx, html, txt"); + console.log(""); + console.log( + 'Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.', + ); + process.exit(1); + } - const inputPath = args[0]; - const format = args[1] || 'pdf'; + const inputPath = args[0]; + const format = args[1] || "pdf"; - try { - await module_exports.convert({ - inputPath, - format, - showDialog: true - }); - } catch (err) { - console.error('Konvertierung fehlgeschlagen:', err.message); - process.exit(1); - } - })(); -} \ No newline at end of file + try { + await module_exports.convert({ + inputPath, + format, + showDialog: true, + }); + } catch (err) { + console.error("Konvertierung fehlgeschlagen:", err.message); + process.exit(1); + } + })(); +}