mirror of
https://gitlab.rlp.net/proj-wise2526-video2document/video2document.git
synced 2026-06-15 18:01:52 +02:00
Refactor code formatting and improve error handling in htmlDocumentConverter
This commit is contained in:
+185
-152
@@ -1,35 +1,37 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const puppeteer = require('puppeteer');
|
||||
const htmlToDocx = require('html-to-docx');
|
||||
const { execSync } = require('child_process');
|
||||
const os = require('os');
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const puppeteer = require("puppeteer");
|
||||
const htmlToDocx = require("html-to-docx");
|
||||
const { execSync } = require("child_process");
|
||||
const os = require("os");
|
||||
|
||||
const outputDir = path.join(__dirname, "../../../storage/documents");
|
||||
|
||||
if (!fs.existsSync(outputDir)) {
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
}
|
||||
|
||||
async function showSaveDialog(defaultName, format) {
|
||||
const platform = os.platform();
|
||||
const platform = os.platform();
|
||||
|
||||
if (platform === 'darwin') {
|
||||
// macOS
|
||||
const applescript = `
|
||||
if (platform === "darwin") {
|
||||
// macOS
|
||||
const applescript = `
|
||||
set defaultName to "${defaultName}.${format}"
|
||||
set theFile to choose file name with prompt "Dokument speichern als:" default name defaultName
|
||||
POSIX path of theFile
|
||||
`;
|
||||
|
||||
try {
|
||||
const result = execSync(`osascript -e '${applescript}'`, { encoding: 'utf8' });
|
||||
return result.trim();
|
||||
} catch (err) {
|
||||
if (err.status === 1) return null; // User canceled
|
||||
throw err;
|
||||
}
|
||||
} else if (platform === 'win32') {
|
||||
try {
|
||||
const result = execSync(`osascript -e '${applescript}'`, {
|
||||
encoding: "utf8",
|
||||
});
|
||||
return result.trim();
|
||||
} catch (err) {
|
||||
if (err.status === 1) return null; // User canceled
|
||||
throw err;
|
||||
}
|
||||
} else if (platform === "win32") {
|
||||
const safeName = decodeURIComponent(defaultName);
|
||||
|
||||
const powershell = `
|
||||
@@ -43,161 +45,192 @@ async function showSaveDialog(defaultName, format) {
|
||||
`;
|
||||
|
||||
try {
|
||||
const result = execSync(
|
||||
`powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, ' ')}"`,
|
||||
{ encoding: 'utf8' }
|
||||
);
|
||||
return result.trim() || null;
|
||||
const result = execSync(
|
||||
`powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, " ")}"`,
|
||||
{ encoding: "utf8" },
|
||||
);
|
||||
return result.trim() || null;
|
||||
} catch (err) {
|
||||
if (err.status === 1) return null; // User cancelled
|
||||
throw new Error("Save dialog failed: " + err.message);
|
||||
if (err.status === 1) return null; // User cancelled
|
||||
throw new Error("Save dialog failed: " + err.message);
|
||||
}
|
||||
} else {
|
||||
// Linux - zenity oder kdialog
|
||||
try {
|
||||
const result = execSync(
|
||||
`zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`,
|
||||
{ encoding: 'utf8' }
|
||||
);
|
||||
return result.trim();
|
||||
} catch (err) {
|
||||
try {
|
||||
const result = execSync(
|
||||
`kdialog --getsavefilename . "${defaultName}.${format}"`,
|
||||
{ encoding: 'utf8' }
|
||||
);
|
||||
return result.trim();
|
||||
} catch (err2) {
|
||||
// Fallback
|
||||
return path.join(os.homedir(), 'Downloads', `${defaultName}.${format}`);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Linux - zenity oder kdialog
|
||||
try {
|
||||
const result = execSync(
|
||||
`zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`,
|
||||
{ encoding: "utf8" },
|
||||
);
|
||||
return result.trim();
|
||||
} catch (err) {
|
||||
try {
|
||||
const result = execSync(
|
||||
`kdialog --getsavefilename . "${defaultName}.${format}"`,
|
||||
{ encoding: "utf8" },
|
||||
);
|
||||
return result.trim();
|
||||
} catch (err2) {
|
||||
// Fallback
|
||||
return path.join(os.homedir(), "Downloads", `${defaultName}.${format}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const module_exports = {
|
||||
name: "htmlDocumentConverter",
|
||||
type: "converter",
|
||||
displayname: "HTML Document Converter",
|
||||
description: "Converts LLM-generated HTML to PDF, DOCX, TXT, or HTML",
|
||||
name: "htmlDocumentConverter",
|
||||
type: "converter",
|
||||
displayname: "HTML Document Converter",
|
||||
description: "Converts LLM-generated HTML to PDF, DOCX, TXT, or HTML",
|
||||
|
||||
/**
|
||||
* Main conversion function
|
||||
* @param {Object} options
|
||||
* @param {string} options.inputPath - Path to the HTML input
|
||||
* @param {string} options.format - 'pdf' | 'docx' | 'html' | 'txt'
|
||||
* @param {string} [options.outputName] - Optional output filename (without extension)
|
||||
* @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode)
|
||||
*/
|
||||
async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) {
|
||||
/**
|
||||
* Main conversion function
|
||||
* @param {Object} options
|
||||
* @param {string} options.inputPath - Path to the HTML input
|
||||
* @param {string} options.format - 'pdf' | 'docx' | 'html' | 'txt'
|
||||
* @param {string} [options.outputName] - Optional output filename (without extension)
|
||||
* @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode)
|
||||
*/
|
||||
async convert({ inputPath, format = "pdf", outputName, showDialog = false }) {
|
||||
format = format.toLowerCase().replace(".", ""); // <-- FIX
|
||||
|
||||
format = format.toLowerCase().replace('.', ''); // <-- FIX
|
||||
if (!["pdf", "docx", "html", "txt"].includes(format)) {
|
||||
throw new Error(`Unsupported format: ${format}`);
|
||||
}
|
||||
if (!fs.existsSync(inputPath)) {
|
||||
throw new Error(`Input file not found: ${inputPath}`);
|
||||
}
|
||||
|
||||
if (!['pdf', 'docx', 'html', 'txt'].includes(format)) {
|
||||
const ext = path.extname(inputPath).toLowerCase();
|
||||
const baseName = outputName || path.basename(inputPath, ext);
|
||||
|
||||
let outputFile;
|
||||
|
||||
if (showDialog) {
|
||||
// Zeige nativen Dialog
|
||||
outputFile = await showSaveDialog(baseName, format);
|
||||
if (!outputFile) {
|
||||
console.log("Speichervorgang abgebrochen.");
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
// Nutze Standard-Ausgabeverzeichnis
|
||||
outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`);
|
||||
}
|
||||
|
||||
let htmlContent = fs.readFileSync(inputPath, "utf8");
|
||||
|
||||
// Remove <think> tags if present
|
||||
htmlContent = htmlContent.replace(/<think>[\s\S]*?<\/think>/gi, "");
|
||||
|
||||
switch (format.toLowerCase()) {
|
||||
case "html":
|
||||
fs.writeFileSync(outputFile, htmlContent, "utf8");
|
||||
break;
|
||||
case "pdf":
|
||||
await this.htmlToPDF(htmlContent, outputFile);
|
||||
break;
|
||||
case "docx":
|
||||
await this.htmlToDOCX(htmlContent, outputFile);
|
||||
break;
|
||||
case "txt":
|
||||
fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), "utf8");
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported format: ${format}`);
|
||||
}
|
||||
if (!fs.existsSync(inputPath)) {
|
||||
throw new Error(`Input file not found: ${inputPath}`);
|
||||
}
|
||||
|
||||
const ext = path.extname(inputPath).toLowerCase();
|
||||
const baseName = outputName || path.basename(inputPath, ext);
|
||||
console.log(`Erfolgreich gespeichert: ${outputFile}`);
|
||||
return outputFile;
|
||||
},
|
||||
|
||||
let outputFile;
|
||||
// HTML → PDF
|
||||
async htmlToPDF(html, outputPath) {
|
||||
let browser;
|
||||
try {
|
||||
browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ["--no-sandbox", "--disable-setuid-sandbox"],
|
||||
});
|
||||
|
||||
if (showDialog) {
|
||||
// Zeige nativen Dialog
|
||||
outputFile = await showSaveDialog(baseName, format);
|
||||
if (!outputFile) {
|
||||
console.log('Speichervorgang abgebrochen.');
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
// Nutze Standard-Ausgabeverzeichnis
|
||||
outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`);
|
||||
}
|
||||
|
||||
let htmlContent = fs.readFileSync(inputPath, 'utf8');
|
||||
|
||||
// Remove <think> tags if present
|
||||
htmlContent = htmlContent.replace(/<think>[\s\S]*?<\/think>/gi, '');
|
||||
|
||||
switch (format.toLowerCase()) {
|
||||
case 'html':
|
||||
fs.writeFileSync(outputFile, htmlContent, 'utf8');
|
||||
break;
|
||||
case 'pdf':
|
||||
await this.htmlToPDF(htmlContent, outputFile);
|
||||
break;
|
||||
case 'docx':
|
||||
await this.htmlToDOCX(htmlContent, outputFile);
|
||||
break;
|
||||
case 'txt':
|
||||
fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), 'utf8');
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported format: ${format}`);
|
||||
}
|
||||
|
||||
console.log(`Erfolgreich gespeichert: ${outputFile}`);
|
||||
return outputFile;
|
||||
},
|
||||
|
||||
// HTML → PDF
|
||||
async htmlToPDF(html, outputPath) {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||
});
|
||||
const page = await browser.newPage();
|
||||
await page.setContent(html, { waitUntil: 'networkidle0' });
|
||||
await page.pdf({
|
||||
path: outputPath,
|
||||
format: 'A4',
|
||||
printBackground: true,
|
||||
margin: { top: '20mm', right: '20mm', bottom: '20mm', left: '20mm' }
|
||||
});
|
||||
const page = await browser.newPage();
|
||||
await page.setContent(html, { waitUntil: "networkidle0" });
|
||||
await page.pdf({
|
||||
path: outputPath,
|
||||
format: "A4",
|
||||
printBackground: true,
|
||||
margin: {
|
||||
top: "20mm",
|
||||
right: "20mm",
|
||||
bottom: "20mm",
|
||||
left: "20mm",
|
||||
},
|
||||
});
|
||||
} finally {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
},
|
||||
|
||||
// HTML → DOCX
|
||||
async htmlToDOCX(html, outputPath) {
|
||||
const buffer = await htmlToDocx(html);
|
||||
fs.writeFileSync(outputPath, buffer);
|
||||
},
|
||||
|
||||
// HTML → TXT (rudimentär)
|
||||
htmlToTXT(html) {
|
||||
return html.replace(/<[^>]*>/g, '').replace(/\s+\n/g, '\n').trim();
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
// HTML → DOCX
|
||||
async htmlToDOCX(html, outputPath) {
|
||||
try {
|
||||
// html‑to‑docx library converts HTML string into a Word .docx buffer
|
||||
// Usage from html‑to‑docx docs:
|
||||
// await HTMLtoDOCX(htmlString, headerHTMLString, documentOptions, footerHTMLString) [oai_citation:0‡GitHub](https://github.com/privateOmega/html-to-docx?utm_source=chatgpt.com)
|
||||
const buffer = await htmlToDocx(html, null, {
|
||||
table: { row: { cantSplit: true } },
|
||||
});
|
||||
fs.writeFileSync(outputPath, buffer);
|
||||
} catch (err) {
|
||||
throw new Error(`DOCX conversion failed: ${err.message}`);
|
||||
}
|
||||
},
|
||||
|
||||
// HTML → TXT
|
||||
htmlToTXT(html) {
|
||||
// A decent plain text conversion: strip tags and collapse whitespace
|
||||
// If you want more advanced extraction consider using a library like `html-to-text` or `strip-html` [oai_citation:1‡GitHub](https://github.com/html-to-text/node-html-to-text?utm_source=chatgpt.com)
|
||||
return (
|
||||
html
|
||||
// Remove all tags
|
||||
.replace(/<[^>]+>/g, "")
|
||||
// Convert multiple whitespace into single spaces
|
||||
.replace(/\s+/g, " ")
|
||||
.trim()
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
module.exports = module_exports;
|
||||
|
||||
// CLI usage mit Dialog
|
||||
if (require.main === module) {
|
||||
(async () => {
|
||||
const args = process.argv.slice(2);
|
||||
if (args.length < 1) {
|
||||
console.log('Usage: node htmlDocumentConverter.js <input.html> [format]');
|
||||
console.log('Formats: pdf (default), docx, html, txt');
|
||||
console.log('');
|
||||
console.log('Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.');
|
||||
process.exit(1);
|
||||
}
|
||||
(async () => {
|
||||
const args = process.argv.slice(2);
|
||||
if (args.length < 1) {
|
||||
console.log("Usage: node htmlDocumentConverter.js <input.html> [format]");
|
||||
console.log("Formats: pdf (default), docx, html, txt");
|
||||
console.log("");
|
||||
console.log(
|
||||
'Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.',
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const inputPath = args[0];
|
||||
const format = args[1] || 'pdf';
|
||||
const inputPath = args[0];
|
||||
const format = args[1] || "pdf";
|
||||
|
||||
try {
|
||||
await module_exports.convert({
|
||||
inputPath,
|
||||
format,
|
||||
showDialog: true
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('Konvertierung fehlgeschlagen:', err.message);
|
||||
process.exit(1);
|
||||
}
|
||||
})();
|
||||
try {
|
||||
await module_exports.convert({
|
||||
inputPath,
|
||||
format,
|
||||
showDialog: true,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("Konvertierung fehlgeschlagen:", err.message);
|
||||
process.exit(1);
|
||||
}
|
||||
})();
|
||||
}
|
||||
Reference in New Issue
Block a user