mirror of
https://gitlab.rlp.net/proj-wise2526-video2document/video2document.git
synced 2026-06-15 18:01:52 +02:00
197 lines
6.6 KiB
JavaScript
197 lines
6.6 KiB
JavaScript
const fs = require('fs');
|
|
const path = require('path');
|
|
const puppeteer = require('puppeteer');
|
|
const htmlToDocx = require('html-to-docx');
|
|
const { execSync } = require('child_process');
|
|
const os = require('os');
|
|
|
|
const outputDir = path.join(__dirname, "../../../storage/documents");
|
|
|
|
if (!fs.existsSync(outputDir)) {
|
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
}
|
|
|
|
async function showSaveDialog(defaultName, format) {
|
|
const platform = os.platform();
|
|
|
|
if (platform === 'darwin') {
|
|
// macOS
|
|
const applescript = `
|
|
set defaultName to "${defaultName}.${format}"
|
|
set theFile to choose file name with prompt "Dokument speichern als:" default name defaultName
|
|
POSIX path of theFile
|
|
`;
|
|
|
|
try {
|
|
const result = execSync(`osascript -e '${applescript}'`, { encoding: 'utf8' });
|
|
return result.trim();
|
|
} catch (err) {
|
|
if (err.status === 1) return null; // User canceled
|
|
throw err;
|
|
}
|
|
} else if (platform === 'win32') {
|
|
const safeName = decodeURIComponent(defaultName);
|
|
|
|
const powershell = `
|
|
Add-Type -AssemblyName System.Windows.Forms;
|
|
$dialog = New-Object System.Windows.Forms.SaveFileDialog;
|
|
$dialog.FileName = '${safeName}.${format}';
|
|
$dialog.Filter = '${format.toUpperCase()} Dateien (*.${format})|*.${format}|Alle Dateien (*.*)|*.*';
|
|
$dialog.Title = 'Dokument speichern als';
|
|
$result = $dialog.ShowDialog();
|
|
if ($result -eq 'OK') { $dialog.FileName }
|
|
`;
|
|
|
|
try {
|
|
const result = execSync(
|
|
`powershell -NoProfile -Command "${powershell.replace(/\r?\n/g, ' ')}"`,
|
|
{ encoding: 'utf8' }
|
|
);
|
|
return result.trim() || null;
|
|
} catch (err) {
|
|
if (err.status === 1) return null; // User cancelled
|
|
throw new Error("Save dialog failed: " + err.message);
|
|
}
|
|
} else {
|
|
// Linux - zenity oder kdialog
|
|
try {
|
|
const result = execSync(
|
|
`zenity --file-selection --save --confirm-overwrite --filename="${defaultName}.${format}"`,
|
|
{ encoding: 'utf8' }
|
|
);
|
|
return result.trim();
|
|
} catch (err) {
|
|
try {
|
|
const result = execSync(
|
|
`kdialog --getsavefilename . "${defaultName}.${format}"`,
|
|
{ encoding: 'utf8' }
|
|
);
|
|
return result.trim();
|
|
} catch (err2) {
|
|
// Fallback
|
|
return path.join(os.homedir(), 'Downloads', `${defaultName}.${format}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const module_exports = {
|
|
name: "htmlDocumentConverter",
|
|
type: "converter",
|
|
displayname: "HTML Document Converter",
|
|
description: "Converts LLM-generated HTML to PDF, DOCX, TXT, or HTML",
|
|
|
|
/**
|
|
* Main conversion function
|
|
* @param {Object} options
|
|
* @param {string} options.inputPath - Path to the HTML input
|
|
* @param {string} options.format - 'pdf' | 'docx' | 'html' | 'txt'
|
|
* @param {string} [options.outputName] - Optional output filename (without extension)
|
|
* @param {boolean} [options.showDialog] - Show save dialog (default: false in module mode, true in CLI mode)
|
|
*/
|
|
async convert({ inputPath, format = 'pdf', outputName, showDialog = false }) {
|
|
if (!fs.existsSync(inputPath)) {
|
|
throw new Error(`Input file not found: ${inputPath}`);
|
|
}
|
|
|
|
const ext = path.extname(inputPath).toLowerCase();
|
|
const baseName = outputName || path.basename(inputPath, ext);
|
|
|
|
let outputFile;
|
|
|
|
if (showDialog) {
|
|
// Zeige nativen Dialog
|
|
outputFile = await showSaveDialog(baseName, format);
|
|
if (!outputFile) {
|
|
console.log('Speichervorgang abgebrochen.');
|
|
return null;
|
|
}
|
|
} else {
|
|
// Nutze Standard-Ausgabeverzeichnis
|
|
outputFile = path.join(outputDir, `${baseName}.${format.toLowerCase()}`);
|
|
}
|
|
|
|
let htmlContent = fs.readFileSync(inputPath, 'utf8');
|
|
|
|
// Remove <think> tags if present
|
|
htmlContent = htmlContent.replace(/<think>[\s\S]*?<\/think>/gi, '');
|
|
|
|
switch (format.toLowerCase()) {
|
|
case 'html':
|
|
fs.writeFileSync(outputFile, htmlContent, 'utf8');
|
|
break;
|
|
case 'pdf':
|
|
await this.htmlToPDF(htmlContent, outputFile);
|
|
break;
|
|
case 'docx':
|
|
await this.htmlToDOCX(htmlContent, outputFile);
|
|
break;
|
|
case 'txt':
|
|
fs.writeFileSync(outputFile, this.htmlToTXT(htmlContent), 'utf8');
|
|
break;
|
|
default:
|
|
throw new Error(`Unsupported format: ${format}`);
|
|
}
|
|
|
|
console.log(`Erfolgreich gespeichert: ${outputFile}`);
|
|
return outputFile;
|
|
},
|
|
|
|
// HTML → PDF
|
|
async htmlToPDF(html, outputPath) {
|
|
const browser = await puppeteer.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
});
|
|
const page = await browser.newPage();
|
|
await page.setContent(html, { waitUntil: 'networkidle0' });
|
|
await page.pdf({
|
|
path: outputPath,
|
|
format: 'A4',
|
|
printBackground: true,
|
|
margin: { top: '20mm', right: '20mm', bottom: '20mm', left: '20mm' }
|
|
});
|
|
await browser.close();
|
|
},
|
|
|
|
// HTML → DOCX
|
|
async htmlToDOCX(html, outputPath) {
|
|
const buffer = await htmlToDocx(html);
|
|
fs.writeFileSync(outputPath, buffer);
|
|
},
|
|
|
|
// HTML → TXT (rudimentär)
|
|
htmlToTXT(html) {
|
|
return html.replace(/<[^>]*>/g, '').replace(/\s+\n/g, '\n').trim();
|
|
}
|
|
};
|
|
|
|
module.exports = module_exports;
|
|
|
|
// CLI usage mit Dialog
|
|
if (require.main === module) {
|
|
(async () => {
|
|
const args = process.argv.slice(2);
|
|
if (args.length < 1) {
|
|
console.log('Usage: node htmlDocumentConverter.js <input.html> [format]');
|
|
console.log('Formats: pdf (default), docx, html, txt');
|
|
console.log('');
|
|
console.log('Ein nativer "Speichern unter" Dialog wird automatisch geöffnet.');
|
|
process.exit(1);
|
|
}
|
|
|
|
const inputPath = args[0];
|
|
const format = args[1] || 'pdf';
|
|
|
|
try {
|
|
await module_exports.convert({
|
|
inputPath,
|
|
format,
|
|
showDialog: true
|
|
});
|
|
} catch (err) {
|
|
console.error('Konvertierung fehlgeschlagen:', err.message);
|
|
process.exit(1);
|
|
}
|
|
})();
|
|
} |