Files
video2document/services/modules/save-file/convert.js
T
2025-12-11 14:22:04 +01:00

411 lines
11 KiB
JavaScript

#!/usr/bin/env node
const fs = require('fs');
const path = require('path');
const { marked } = require('marked');
const PDFDocument = require('pdfkit');
const docx = require('docx');
const { Document, Paragraph, TextRun, HeadingLevel } = docx;
const args = process.argv.slice(2);
if (args.length < 1) {
console.log('Usage: node convert.js <input.md> [format]');
console.log('Formats: pdf (default), docx, html, txt');
console.log('Example: node convert.js document.md docx');
process.exit(1);
}
const inputFile = args[0];
const format = (args[1] || 'pdf').toLowerCase();
if (!fs.existsSync(inputFile)) {
console.error(`File not found: ${inputFile}`);
process.exit(1);
}
const validFormats = ['pdf', 'docx', 'html', 'txt'];
if (!validFormats.includes(format)) {
console.error(`Invalid format: ${format}`);
console.log('Valid formats: pdf, docx, html, txt');
process.exit(1);
}
// Enhanced marked renderer to handle bold, italic, etc.
const renderer = new marked.Renderer();
marked.setOptions({ renderer });
async function showSaveDialog(defaultName, format) {
const applescript = `
set defaultName to "${defaultName}.${format}"
set theFile to choose file name with prompt "Save converted file as:" default name defaultName
POSIX path of theFile
`;
try {
const { execSync } = require('child_process');
const result = execSync(`osascript -e '${applescript}'`, { encoding: 'utf8' });
return result.trim();
} catch (err) {
if (err.status === 1) {
console.log('Save canceled.');
process.exit(0);
}
throw err;
}
}
async function promptForFormat() {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
console.log('\nChoose output format:');
console.log('1) PDF');
console.log('2) DOCX (Word)');
console.log('3) HTML');
console.log('4) TXT (Plain text)');
readline.question('\nEnter choice (1-4): ', (answer) => {
readline.close();
const formats = { '1': 'pdf', '2': 'docx', '3': 'html', '4': 'txt' };
resolve(formats[answer] || 'pdf');
});
});
}
async function convertMarkdown() {
const mdContent = fs.readFileSync(inputFile, 'utf8');
const baseName = path.basename(inputFile, '.md');
const defaultName = baseName;
const outputFile = await showSaveDialog(defaultName, format);
console.log(`\nConverting ${path.basename(inputFile)}${path.basename(outputFile)} ...`);
try {
switch (format) {
case 'pdf':
await convertToPDF(mdContent, outputFile);
break;
case 'docx':
await convertToDOCX(mdContent, outputFile);
break;
case 'html':
await convertToHTML(mdContent, outputFile);
break;
case 'txt':
await convertToTXT(mdContent, outputFile);
break;
default:
throw new Error(`Unsupported format: ${format}`);
}
console.log(`✓ Successfully saved: ${outputFile}\n`);
} catch (err) {
console.error('✗ Error during conversion:', err.message);
process.exit(1);
}
}
// Parse inline markdown (bold, italic, code) for text extraction
function parseInlineMarkdown(text) {
const parts = [];
const regex = /(\*\*|__|`)(.*?)\1/g;
let lastIndex = 0;
let match;
while ((match = regex.exec(text)) !== null) {
if (match.index > lastIndex) {
parts.push({ text: text.slice(lastIndex, match.index), style: 'normal' });
}
const marker = match[1];
const content = match[2];
if (marker === '**' || marker === '__') {
parts.push({ text: content, style: 'bold' });
} else if (marker === '`') {
parts.push({ text: content, style: 'code' });
}
lastIndex = regex.lastIndex;
}
if (lastIndex < text.length) {
parts.push({ text: text.slice(lastIndex), style: 'normal' });
}
return parts.length > 0 ? parts : [{ text, style: 'normal' }];
}
async function convertToPDF(mdContent, outputFile) {
const doc = new PDFDocument({ margin: 50 });
const stream = fs.createWriteStream(outputFile);
doc.pipe(stream);
const tokens = marked.lexer(mdContent);
for (const token of tokens) {
switch (token.type) {
case 'heading':
const size = 28 - (token.depth * 3);
doc.fontSize(size)
.font('Helvetica-Bold')
.text(token.text, { continued: false })
.moveDown(0.5);
break;
case 'paragraph':
const parts = parseInlineMarkdown(token.text);
doc.fontSize(12);
parts.forEach((part, idx) => {
if (part.style === 'bold') {
doc.font('Helvetica-Bold');
} else if (part.style === 'code') {
doc.font('Courier').fontSize(11);
} else {
doc.font('Helvetica');
}
doc.text(part.text, { continued: idx < parts.length - 1 });
});
doc.moveDown(0.5);
break;
case 'list':
token.items.forEach(item => {
const parts = parseInlineMarkdown(item.text);
doc.fontSize(12);
doc.font('Helvetica').text('• ', { indent: 20, continued: true });
parts.forEach((part, idx) => {
if (part.style === 'bold') {
doc.font('Helvetica-Bold');
} else if (part.style === 'code') {
doc.font('Courier').fontSize(11);
} else {
doc.font('Helvetica').fontSize(12);
}
doc.text(part.text, { continued: idx < parts.length - 1 });
});
doc.moveDown(0.3);
});
doc.moveDown(0.5);
break;
case 'code':
doc.fontSize(10)
.font('Courier')
.fillColor('#333333')
.text(token.text, { indent: 20 })
.fillColor('#000000')
.moveDown();
break;
case 'blockquote':
doc.fontSize(11)
.font('Helvetica-Oblique')
.fillColor('#666666')
.text(token.text, { indent: 20 })
.fillColor('#000000')
.moveDown();
break;
case 'space':
doc.moveDown(0.5);
break;
}
}
doc.end();
return new Promise((resolve, reject) => {
stream.on('finish', resolve);
stream.on('error', reject);
});
}
async function convertToDOCX(mdContent, outputFile) {
const tokens = marked.lexer(mdContent);
const children = [];
for (const token of tokens) {
switch (token.type) {
case 'heading':
const headingLevels = [
HeadingLevel.HEADING_1,
HeadingLevel.HEADING_2,
HeadingLevel.HEADING_3,
HeadingLevel.HEADING_4,
HeadingLevel.HEADING_5,
HeadingLevel.HEADING_6
];
children.push(
new Paragraph({
text: token.text,
heading: headingLevels[token.depth - 1] || HeadingLevel.HEADING_1
})
);
break;
case 'paragraph':
const parts = parseInlineMarkdown(token.text);
const textRuns = parts.map(part => {
return new TextRun({
text: part.text,
bold: part.style === 'bold',
font: part.style === 'code' ? 'Courier New' : undefined
});
});
children.push(new Paragraph({ children: textRuns }));
break;
case 'list':
token.items.forEach(item => {
const parts = parseInlineMarkdown(item.text);
const textRuns = parts.map(part => {
return new TextRun({
text: part.text,
bold: part.style === 'bold',
font: part.style === 'code' ? 'Courier New' : undefined
});
});
children.push(new Paragraph({
children: textRuns,
bullet: { level: 0 }
}));
});
break;
case 'code':
children.push(new Paragraph({
text: token.text,
style: 'Code'
}));
break;
case 'blockquote':
children.push(new Paragraph({
text: token.text,
italics: true,
indent: { left: 720 }
}));
break;
case 'space':
children.push(new Paragraph({ text: '' }));
break;
}
}
const doc = new Document({
sections: [{
properties: {},
children: children
}]
});
const buffer = await docx.Packer.toBuffer(doc);
fs.writeFileSync(outputFile, buffer);
}
async function convertToHTML(mdContent, outputFile) {
const html = marked.parse(mdContent);
const fullHTML = `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>${path.basename(inputFile, '.md')}</title>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
max-width: 800px;
margin: 50px auto;
padding: 20px;
line-height: 1.6;
color: #333;
}
h1, h2, h3, h4, h5, h6 {
margin-top: 24px;
margin-bottom: 16px;
font-weight: 600;
line-height: 1.25;
}
h1 { font-size: 2em; border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; }
h2 { font-size: 1.5em; border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; }
code {
background: #f6f8fa;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Courier New', Courier, monospace;
font-size: 0.9em;
}
pre {
background: #f6f8fa;
padding: 16px;
border-radius: 6px;
overflow-x: auto;
line-height: 1.45;
}
pre code {
background: none;
padding: 0;
}
blockquote {
border-left: 4px solid #dfe2e5;
padding-left: 16px;
color: #6a737d;
margin: 16px 0;
}
a {
color: #0366d6;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
strong {
font-weight: 600;
}
</style>
</head>
<body>
${html}
</body>
</html>`;
fs.writeFileSync(outputFile, fullHTML, 'utf8');
}
async function convertToTXT(mdContent, outputFile) {
const tokens = marked.lexer(mdContent);
let text = '';
for (const token of tokens) {
switch (token.type) {
case 'heading':
text += '\n' + '='.repeat(token.text.length) + '\n';
text += token.text.toUpperCase() + '\n';
text += '='.repeat(token.text.length) + '\n\n';
break;
case 'paragraph':
// Remove markdown formatting for plain text
const cleanText = token.text.replace(/\*\*(.+?)\*\*/g, '$1').replace(/__(.+?)__/g, '$1');
text += cleanText + '\n\n';
break;
case 'list':
token.items.forEach(item => {
const cleanItem = item.text.replace(/\*\*(.+?)\*\*/g, '$1').replace(/__(.+?)__/g, '$1');
text += ' • ' + cleanItem + '\n';
});
text += '\n';
break;
case 'code':
text += '\n' + token.text + '\n\n';
break;
case 'blockquote':
text += ' > ' + token.text + '\n\n';
break;
}
}
fs.writeFileSync(outputFile, text, 'utf8');
}
convertMarkdown();