mirror of
https://gitlab.rlp.net/proj-wise2526-video2document/video2document.git
synced 2026-06-15 18:01:52 +02:00
Refactor code formatting and improve error handling in htmlDocumentConverter
This commit is contained in:
+188
-155
@@ -1,35 +1,37 @@
|
|||||||
const fs = require('fs');
|
const fs = require("fs");
|
||||||
const path = require('path');
|
const path = require("path");
|
||||||
const puppeteer = require('puppeteer');
|
const puppeteer = require("puppeteer");
|
||||||
const htmlToDocx = require('html-to-docx');
|
const htmlToDocx = require("html-to-docx");
|
||||||
const { execSync } = require('child_process');
|
const { execSync } = require("child_process");
|
||||||
const os = require('os');
|
const os = require("os");
|
||||||
|
|
||||||
const outputDir = path.join(__dirname, "../../../storage/documents");
|
const outputDir = path.join(__dirname, "../../../storage/documents");
|
||||||
|
|
||||||
if (!fs.existsSync(outputDir)) {
|
if (!fs.existsSync(outputDir)) {
|
||||||
fs.mkdirSync(outputDir, { recursive: true });
|
fs.mkdirSync(outputDir, { recursive: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
async function showSaveDialog(defaultName, format) {
|
async function showSaveDialog(defaultName, format) {
|
||||||
const platform = os.platform();
|
const platform = os.platform();
|
||||||
|
|
||||||
if (platform === 'darwin') {
|
if (platform === "darwin") {
|
||||||
// macOS
|
// macOS
|
||||||
const applescript = `
|
const applescript = `
|
||||||
set defaultName to "${defaultName}.${format}"
|
set defaultName to "${defaultName}.${format}"
|
||||||
set theFile to choose file name with prompt "Dokument speichern als:" default name defaultName
|
set theFile to choose file name with prompt "Dokument speichern als:" default name defaultName
|
||||||
POSIX path of theFile
|
POSIX path of theFile
|
||||||
`;
|
`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = execSync(`osascript -e '${applescript}'`, { encoding: 'utf8' });
|
const result = execSync(`osascript -e '${applescript}'`, {
|
||||||
return result.trim();
|
encoding: "utf8",
|
||||||
} catch (err) {
|
});
|
||||||
if (err.status === 1) return null; // User canceled
|
return result.trim();
|
||||||
throw err;
|
} catch (err) {
|
||||||
}
|
if (err.status === 1) return null; // User canceled
|
||||||
} else if (platform === 'win32') {
|
throw err;
|
||||||
|
}
|
||||||
|
} else if (platform === "win32") {
|
||||||
const safeName = decodeURIComponent(defaultName);
|
const safeName = decodeURIComponent(defaultName);
|
||||||
|
|
||||||
const powershell = `
|
const powershell = `
|
||||||
@@ -43,161 +45,192 @@ async function showSaveDialog(defaultName, format) {
|
|||||||
`;
|
`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = execSync(
|
const result = execSync(
|
||||||
`powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, ' ')}"`,
|
`powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, " ")}"`,
|
||||||
{ encoding: 'utf8' }
|
{ encoding: "utf8" },
|
||||||
);
|
);
|
||||||
return result.trim() || null;
|
return result.trim() || null;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err.status === 1) return null; // User cancelled
|
if (err.status === 1) return null; // User cancelled
|
||||||
throw new Error("Save dialog failed: " + err.message);
|
throw new Error("Save dialog failed: " + err.message);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Linux - zenity oder kdialog
|
// Linux - zenity oder kdialog
|
||||||
try {
|
try {
|
||||||
const result = execSync(
|
const result = execSync(
|
||||||
`zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`,
|
`zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`,
|
||||||
{ encoding: 'utf8' }
|
{ encoding: "utf8" },
|
||||||
);
|
);
|
||||||
return result.trim();
|
return result.trim();
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
try {
|
try {
|
||||||
const result = execSync(
|
const result = execSync(
|
||||||
`kdialog --getsavefilename . "${defaultName}.${format}"`,
|
`kdialog --getsavefilename . "${defaultName}.${format}"`,
|
||||||
{ encoding: 'utf8' }
|
{ encoding: "utf8" },
|
||||||
);
|
);
|
||||||
return result.trim();
|
return result.trim();
|
||||||
} catch (err2) {
|
} catch (err2) {
|
||||||
// Fallback
|
// Fallback
|
||||||
return path.join(os.homedir(), 'Downloads', `${defaultName}.${format}`);
|
return path.join(os.homedir(), "Downloads", `${defaultName}.${format}`);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const module_exports = {
|
const module_exports = {
|
||||||
name: "htmlDocumentConverter",
|
name: "htmlDocumentConverter",
|
||||||
type: "converter",
|
type: "converter",
|
||||||
displayname: "HTML Document Converter",
|
displayname: "HTML Document Converter",
|
||||||
description: "Converts LLM-generated HTML to PDF, DOCX, TXT, or HTML",
|
description: "Converts LLM-generated HTML to PDF, DOCX, TXT, or HTML",
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main conversion function
|
* Main conversion function
|
||||||
* @param {Object} options
|
* @param {Object} options
|
||||||
* @param {string} options.inputPath - Path to the HTML input
|
* @param {string} options.inputPath - Path to the HTML input
|
||||||
* @param {string} options.format - 'pdf' | 'docx' | 'html' | 'txt'
|
* @param {string} options.format - 'pdf' | 'docx' | 'html' | 'txt'
|
||||||
* @param {string} [options.outputName] - Optional output filename (without extension)
|
* @param {string} [options.outputName] - Optional output filename (without extension)
|
||||||
* @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode)
|
* @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode)
|
||||||
*/
|
*/
|
||||||
async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) {
|
async convert({ inputPath, format = "pdf", outputName, showDialog = false }) {
|
||||||
|
format = format.toLowerCase().replace(".", ""); // <-- FIX
|
||||||
|
|
||||||
format = format.toLowerCase().replace('.', ''); // <-- FIX
|
if (!["pdf", "docx", "html", "txt"].includes(format)) {
|
||||||
|
throw new Error(`Unsupported format: ${format}`);
|
||||||
|
}
|
||||||
|
if (!fs.existsSync(inputPath)) {
|
||||||
|
throw new Error(`Input file not found: ${inputPath}`);
|
||||||
|
}
|
||||||
|
|
||||||
if (!['pdf', 'docx', 'html', 'txt'].includes(format)) {
|
const ext = path.extname(inputPath).toLowerCase();
|
||||||
|
const baseName = outputName || path.basename(inputPath, ext);
|
||||||
|
|
||||||
|
let outputFile;
|
||||||
|
|
||||||
|
if (showDialog) {
|
||||||
|
// Zeige nativen Dialog
|
||||||
|
outputFile = await showSaveDialog(baseName, format);
|
||||||
|
if (!outputFile) {
|
||||||
|
console.log("Speichervorgang abgebrochen.");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Nutze Standard-Ausgabeverzeichnis
|
||||||
|
outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
let htmlContent = fs.readFileSync(inputPath, "utf8");
|
||||||
|
|
||||||
|
// Remove <think> tags if present
|
||||||
|
htmlContent = htmlContent.replace(/<think>[\s\S]*?<\/think>/gi, "");
|
||||||
|
|
||||||
|
switch (format.toLowerCase()) {
|
||||||
|
case "html":
|
||||||
|
fs.writeFileSync(outputFile, htmlContent, "utf8");
|
||||||
|
break;
|
||||||
|
case "pdf":
|
||||||
|
await this.htmlToPDF(htmlContent, outputFile);
|
||||||
|
break;
|
||||||
|
case "docx":
|
||||||
|
await this.htmlToDOCX(htmlContent, outputFile);
|
||||||
|
break;
|
||||||
|
case "txt":
|
||||||
|
fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), "utf8");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
throw new Error(`Unsupported format: ${format}`);
|
throw new Error(`Unsupported format: ${format}`);
|
||||||
}
|
}
|
||||||
if (!fs.existsSync(inputPath)) {
|
|
||||||
throw new Error(`Input file not found: ${inputPath}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const ext = path.extname(inputPath).toLowerCase();
|
console.log(`Erfolgreich gespeichert: ${outputFile}`);
|
||||||
const baseName = outputName || path.basename(inputPath, ext);
|
return outputFile;
|
||||||
|
},
|
||||||
let outputFile;
|
|
||||||
|
|
||||||
if (showDialog) {
|
|
||||||
// Zeige nativen Dialog
|
|
||||||
outputFile = await showSaveDialog(baseName, format);
|
|
||||||
if (!outputFile) {
|
|
||||||
console.log('Speichervorgang abgebrochen.');
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Nutze Standard-Ausgabeverzeichnis
|
|
||||||
outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
let htmlContent = fs.readFileSync(inputPath, 'utf8');
|
// HTML → PDF
|
||||||
|
async htmlToPDF(html, outputPath) {
|
||||||
|
let browser;
|
||||||
|
try {
|
||||||
|
browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ["--no-sandbox", "--disable-setuid-sandbox"],
|
||||||
|
});
|
||||||
|
|
||||||
// Remove <think> tags if present
|
const page = await browser.newPage();
|
||||||
htmlContent = htmlContent.replace(/<think>[\s\S]*?<\/think>/gi, '');
|
await page.setContent(html, { waitUntil: "networkidle0" });
|
||||||
|
await page.pdf({
|
||||||
switch (format.toLowerCase()) {
|
path: outputPath,
|
||||||
case 'html':
|
format: "A4",
|
||||||
fs.writeFileSync(outputFile, htmlContent, 'utf8');
|
printBackground: true,
|
||||||
break;
|
margin: {
|
||||||
case 'pdf':
|
top: "20mm",
|
||||||
await this.htmlToPDF(htmlContent, outputFile);
|
right: "20mm",
|
||||||
break;
|
bottom: "20mm",
|
||||||
case 'docx':
|
left: "20mm",
|
||||||
await this.htmlToDOCX(htmlContent, outputFile);
|
},
|
||||||
break;
|
});
|
||||||
case 'txt':
|
} finally {
|
||||||
fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), 'utf8');
|
if (browser) {
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new Error(`Unsupported format: ${format}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`Erfolgreich gespeichert: ${outputFile}`);
|
|
||||||
return outputFile;
|
|
||||||
},
|
|
||||||
|
|
||||||
// HTML → PDF
|
|
||||||
async htmlToPDF(html, outputPath) {
|
|
||||||
const browser = await puppeteer.launch({
|
|
||||||
headless: true,
|
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
||||||
});
|
|
||||||
const page = await browser.newPage();
|
|
||||||
await page.setContent(html, { waitUntil: 'networkidle0' });
|
|
||||||
await page.pdf({
|
|
||||||
path: outputPath,
|
|
||||||
format: 'A4',
|
|
||||||
printBackground: true,
|
|
||||||
margin: { top: '20mm', right: '20mm', bottom: '20mm', left: '20mm' }
|
|
||||||
});
|
|
||||||
await browser.close();
|
await browser.close();
|
||||||
},
|
}
|
||||||
|
|
||||||
// HTML → DOCX
|
|
||||||
async htmlToDOCX(html, outputPath) {
|
|
||||||
const buffer = await htmlToDocx(html);
|
|
||||||
fs.writeFileSync(outputPath, buffer);
|
|
||||||
},
|
|
||||||
|
|
||||||
// HTML → TXT (rudimentär)
|
|
||||||
htmlToTXT(html) {
|
|
||||||
return html.replace(/<[^>]*>/g, '').replace(/\s+\n/g, '\n').trim();
|
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
// HTML → DOCX
|
||||||
|
async htmlToDOCX(html, outputPath) {
|
||||||
|
try {
|
||||||
|
// html‑to‑docx library converts HTML string into a Word .docx buffer
|
||||||
|
// Usage from html‑to‑docx docs:
|
||||||
|
// await HTMLtoDOCX(htmlString, headerHTMLString, documentOptions, footerHTMLString) [oai_citation:0‡GitHub](https://github.com/privateOmega/html-to-docx?utm_source=chatgpt.com)
|
||||||
|
const buffer = await htmlToDocx(html, null, {
|
||||||
|
table: { row: { cantSplit: true } },
|
||||||
|
});
|
||||||
|
fs.writeFileSync(outputPath, buffer);
|
||||||
|
} catch (err) {
|
||||||
|
throw new Error(`DOCX conversion failed: ${err.message}`);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
// HTML → TXT
|
||||||
|
htmlToTXT(html) {
|
||||||
|
// A decent plain text conversion: strip tags and collapse whitespace
|
||||||
|
// If you want more advanced extraction consider using a library like `html-to-text` or `strip-html` [oai_citation:1‡GitHub](https://github.com/html-to-text/node-html-to-text?utm_source=chatgpt.com)
|
||||||
|
return (
|
||||||
|
html
|
||||||
|
// Remove all tags
|
||||||
|
.replace(/<[^>]+>/g, "")
|
||||||
|
// Convert multiple whitespace into single spaces
|
||||||
|
.replace(/\s+/g, " ")
|
||||||
|
.trim()
|
||||||
|
);
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
module.exports = module_exports;
|
module.exports = module_exports;
|
||||||
|
|
||||||
// CLI usage mit Dialog
|
// CLI usage mit Dialog
|
||||||
if (require.main === module) {
|
if (require.main === module) {
|
||||||
(async () => {
|
(async () => {
|
||||||
const args = process.argv.slice(2);
|
const args = process.argv.slice(2);
|
||||||
if (args.length < 1) {
|
if (args.length < 1) {
|
||||||
console.log('Usage: node htmlDocumentConverter.js <input.html> [format]');
|
console.log("Usage: node htmlDocumentConverter.js <input.html> [format]");
|
||||||
console.log('Formats: pdf (default), docx, html, txt');
|
console.log("Formats: pdf (default), docx, html, txt");
|
||||||
console.log('');
|
console.log("");
|
||||||
console.log('Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.');
|
console.log(
|
||||||
process.exit(1);
|
'Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.',
|
||||||
}
|
);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
const inputPath = args[0];
|
const inputPath = args[0];
|
||||||
const format = args[1] || 'pdf';
|
const format = args[1] || "pdf";
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await module_exports.convert({
|
await module_exports.convert({
|
||||||
inputPath,
|
inputPath,
|
||||||
format,
|
format,
|
||||||
showDialog: true
|
showDialog: true,
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('Konvertierung fehlgeschlagen:', err.message);
|
console.error("Konvertierung fehlgeschlagen:", err.message);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
})();
|
})();
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user