mirror of
https://gitlab.rlp.net/proj-wise2526-video2document/video2document.git
synced 2026-06-15 18:01:52 +02:00
411 lines
11 KiB
JavaScript
411 lines
11 KiB
JavaScript
#!/usr/bin/env node
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
const { marked } = require('marked');
|
|
const PDFDocument = require('pdfkit');
|
|
const docx = require('docx');
|
|
const { Document, Paragraph, TextRun, HeadingLevel } = docx;
|
|
const args = process.argv.slice(2);
|
|
|
|
if (args.length < 1) {
|
|
console.log('Usage: node convert.js <input.md> [format]');
|
|
console.log('Formats: pdf (default), docx, html, txt');
|
|
console.log('Example: node convert.js document.md docx');
|
|
process.exit(1);
|
|
}
|
|
|
|
const inputFile = args[0];
|
|
const format = (args[1] || 'pdf').toLowerCase();
|
|
|
|
if (!fs.existsSync(inputFile)) {
|
|
console.error(`File not found: ${inputFile}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
const validFormats = ['pdf', 'docx', 'html', 'txt'];
|
|
if (!validFormats.includes(format)) {
|
|
console.error(`Invalid format: ${format}`);
|
|
console.log('Valid formats: pdf, docx, html, txt');
|
|
process.exit(1);
|
|
}
|
|
|
|
// Enhanced marked renderer to handle bold, italic, etc.
|
|
const renderer = new marked.Renderer();
|
|
marked.setOptions({ renderer });
|
|
|
|
async function showSaveDialog(defaultName, format) {
|
|
const applescript = `
|
|
set defaultName to "${defaultName}.${format}"
|
|
set theFile to choose file name with prompt "Save converted file as:" default name defaultName
|
|
POSIX path of theFile
|
|
`;
|
|
|
|
try {
|
|
const { execSync } = require('child_process');
|
|
const result = execSync(`osascript -e '${applescript}'`, { encoding: 'utf8' });
|
|
return result.trim();
|
|
} catch (err) {
|
|
if (err.status === 1) {
|
|
console.log('Save canceled.');
|
|
process.exit(0);
|
|
}
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
async function promptForFormat() {
|
|
const readline = require('readline').createInterface({
|
|
input: process.stdin,
|
|
output: process.stdout
|
|
});
|
|
|
|
return new Promise((resolve) => {
|
|
console.log('\nChoose output format:');
|
|
console.log('1) PDF');
|
|
console.log('2) DOCX (Word)');
|
|
console.log('3) HTML');
|
|
console.log('4) TXT (Plain text)');
|
|
|
|
readline.question('\nEnter choice (1-4): ', (answer) => {
|
|
readline.close();
|
|
const formats = { '1': 'pdf', '2': 'docx', '3': 'html', '4': 'txt' };
|
|
resolve(formats[answer] || 'pdf');
|
|
});
|
|
});
|
|
}
|
|
|
|
async function convertMarkdown() {
|
|
const mdContent = fs.readFileSync(inputFile, 'utf8');
|
|
const baseName = path.basename(inputFile, '.md');
|
|
|
|
const defaultName = baseName;
|
|
const outputFile = await showSaveDialog(defaultName, format);
|
|
|
|
console.log(`\nConverting ${path.basename(inputFile)} → ${path.basename(outputFile)} ...`);
|
|
|
|
try {
|
|
switch (format) {
|
|
case 'pdf':
|
|
await convertToPDF(mdContent, outputFile);
|
|
break;
|
|
case 'docx':
|
|
await convertToDOCX(mdContent, outputFile);
|
|
break;
|
|
case 'html':
|
|
await convertToHTML(mdContent, outputFile);
|
|
break;
|
|
case 'txt':
|
|
await convertToTXT(mdContent, outputFile);
|
|
break;
|
|
default:
|
|
throw new Error(`Unsupported format: ${format}`);
|
|
}
|
|
console.log(`✓ Successfully saved: ${outputFile}\n`);
|
|
} catch (err) {
|
|
console.error('✗ Error during conversion:', err.message);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
// Parse inline markdown (bold, italic, code) for text extraction
|
|
function parseInlineMarkdown(text) {
|
|
const parts = [];
|
|
const regex = /(\*\*|__|`)(.*?)\1/g;
|
|
let lastIndex = 0;
|
|
let match;
|
|
|
|
while ((match = regex.exec(text)) !== null) {
|
|
if (match.index > lastIndex) {
|
|
parts.push({ text: text.slice(lastIndex, match.index), style: 'normal' });
|
|
}
|
|
|
|
const marker = match[1];
|
|
const content = match[2];
|
|
|
|
if (marker === '**' || marker === '__') {
|
|
parts.push({ text: content, style: 'bold' });
|
|
} else if (marker === '`') {
|
|
parts.push({ text: content, style: 'code' });
|
|
}
|
|
|
|
lastIndex = regex.lastIndex;
|
|
}
|
|
|
|
if (lastIndex < text.length) {
|
|
parts.push({ text: text.slice(lastIndex), style: 'normal' });
|
|
}
|
|
|
|
return parts.length > 0 ? parts : [{ text, style: 'normal' }];
|
|
}
|
|
|
|
async function convertToPDF(mdContent, outputFile) {
|
|
const doc = new PDFDocument({ margin: 50 });
|
|
const stream = fs.createWriteStream(outputFile);
|
|
doc.pipe(stream);
|
|
|
|
const tokens = marked.lexer(mdContent);
|
|
|
|
for (const token of tokens) {
|
|
switch (token.type) {
|
|
case 'heading':
|
|
const size = 28 - (token.depth * 3);
|
|
doc.fontSize(size)
|
|
.font('Helvetica-Bold')
|
|
.text(token.text, { continued: false })
|
|
.moveDown(0.5);
|
|
break;
|
|
case 'paragraph':
|
|
const parts = parseInlineMarkdown(token.text);
|
|
doc.fontSize(12);
|
|
|
|
parts.forEach((part, idx) => {
|
|
if (part.style === 'bold') {
|
|
doc.font('Helvetica-Bold');
|
|
} else if (part.style === 'code') {
|
|
doc.font('Courier').fontSize(11);
|
|
} else {
|
|
doc.font('Helvetica');
|
|
}
|
|
|
|
doc.text(part.text, { continued: idx < parts.length - 1 });
|
|
});
|
|
|
|
doc.moveDown(0.5);
|
|
break;
|
|
case 'list':
|
|
token.items.forEach(item => {
|
|
const parts = parseInlineMarkdown(item.text);
|
|
doc.fontSize(12);
|
|
|
|
doc.font('Helvetica').text('• ', { indent: 20, continued: true });
|
|
|
|
parts.forEach((part, idx) => {
|
|
if (part.style === 'bold') {
|
|
doc.font('Helvetica-Bold');
|
|
} else if (part.style === 'code') {
|
|
doc.font('Courier').fontSize(11);
|
|
} else {
|
|
doc.font('Helvetica').fontSize(12);
|
|
}
|
|
|
|
doc.text(part.text, { continued: idx < parts.length - 1 });
|
|
});
|
|
|
|
doc.moveDown(0.3);
|
|
});
|
|
doc.moveDown(0.5);
|
|
break;
|
|
case 'code':
|
|
doc.fontSize(10)
|
|
.font('Courier')
|
|
.fillColor('#333333')
|
|
.text(token.text, { indent: 20 })
|
|
.fillColor('#000000')
|
|
.moveDown();
|
|
break;
|
|
case 'blockquote':
|
|
doc.fontSize(11)
|
|
.font('Helvetica-Oblique')
|
|
.fillColor('#666666')
|
|
.text(token.text, { indent: 20 })
|
|
.fillColor('#000000')
|
|
.moveDown();
|
|
break;
|
|
case 'space':
|
|
doc.moveDown(0.5);
|
|
break;
|
|
}
|
|
}
|
|
|
|
doc.end();
|
|
|
|
return new Promise((resolve, reject) => {
|
|
stream.on('finish', resolve);
|
|
stream.on('error', reject);
|
|
});
|
|
}
|
|
|
|
async function convertToDOCX(mdContent, outputFile) {
|
|
const tokens = marked.lexer(mdContent);
|
|
const children = [];
|
|
|
|
for (const token of tokens) {
|
|
switch (token.type) {
|
|
case 'heading':
|
|
const headingLevels = [
|
|
HeadingLevel.HEADING_1,
|
|
HeadingLevel.HEADING_2,
|
|
HeadingLevel.HEADING_3,
|
|
HeadingLevel.HEADING_4,
|
|
HeadingLevel.HEADING_5,
|
|
HeadingLevel.HEADING_6
|
|
];
|
|
children.push(
|
|
new Paragraph({
|
|
text: token.text,
|
|
heading: headingLevels[token.depth - 1] || HeadingLevel.HEADING_1
|
|
})
|
|
);
|
|
break;
|
|
case 'paragraph':
|
|
const parts = parseInlineMarkdown(token.text);
|
|
const textRuns = parts.map(part => {
|
|
return new TextRun({
|
|
text: part.text,
|
|
bold: part.style === 'bold',
|
|
font: part.style === 'code' ? 'Courier New' : undefined
|
|
});
|
|
});
|
|
children.push(new Paragraph({ children: textRuns }));
|
|
break;
|
|
case 'list':
|
|
token.items.forEach(item => {
|
|
const parts = parseInlineMarkdown(item.text);
|
|
const textRuns = parts.map(part => {
|
|
return new TextRun({
|
|
text: part.text,
|
|
bold: part.style === 'bold',
|
|
font: part.style === 'code' ? 'Courier New' : undefined
|
|
});
|
|
});
|
|
children.push(new Paragraph({
|
|
children: textRuns,
|
|
bullet: { level: 0 }
|
|
}));
|
|
});
|
|
break;
|
|
case 'code':
|
|
children.push(new Paragraph({
|
|
text: token.text,
|
|
style: 'Code'
|
|
}));
|
|
break;
|
|
case 'blockquote':
|
|
children.push(new Paragraph({
|
|
text: token.text,
|
|
italics: true,
|
|
indent: { left: 720 }
|
|
}));
|
|
break;
|
|
case 'space':
|
|
children.push(new Paragraph({ text: '' }));
|
|
break;
|
|
}
|
|
}
|
|
|
|
const doc = new Document({
|
|
sections: [{
|
|
properties: {},
|
|
children: children
|
|
}]
|
|
});
|
|
|
|
const buffer = await docx.Packer.toBuffer(doc);
|
|
fs.writeFileSync(outputFile, buffer);
|
|
}
|
|
|
|
async function convertToHTML(mdContent, outputFile) {
|
|
const html = marked.parse(mdContent);
|
|
const fullHTML = `<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>${path.basename(inputFile, '.md')}</title>
|
|
<style>
|
|
body {
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
|
max-width: 800px;
|
|
margin: 50px auto;
|
|
padding: 20px;
|
|
line-height: 1.6;
|
|
color: #333;
|
|
}
|
|
h1, h2, h3, h4, h5, h6 {
|
|
margin-top: 24px;
|
|
margin-bottom: 16px;
|
|
font-weight: 600;
|
|
line-height: 1.25;
|
|
}
|
|
h1 { font-size: 2em; border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; }
|
|
h2 { font-size: 1.5em; border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; }
|
|
code {
|
|
background: #f6f8fa;
|
|
padding: 2px 6px;
|
|
border-radius: 3px;
|
|
font-family: 'Courier New', Courier, monospace;
|
|
font-size: 0.9em;
|
|
}
|
|
pre {
|
|
background: #f6f8fa;
|
|
padding: 16px;
|
|
border-radius: 6px;
|
|
overflow-x: auto;
|
|
line-height: 1.45;
|
|
}
|
|
pre code {
|
|
background: none;
|
|
padding: 0;
|
|
}
|
|
blockquote {
|
|
border-left: 4px solid #dfe2e5;
|
|
padding-left: 16px;
|
|
color: #6a737d;
|
|
margin: 16px 0;
|
|
}
|
|
a {
|
|
color: #0366d6;
|
|
text-decoration: none;
|
|
}
|
|
a:hover {
|
|
text-decoration: underline;
|
|
}
|
|
strong {
|
|
font-weight: 600;
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
${html}
|
|
</body>
|
|
</html>`;
|
|
|
|
fs.writeFileSync(outputFile, fullHTML, 'utf8');
|
|
}
|
|
|
|
async function convertToTXT(mdContent, outputFile) {
|
|
const tokens = marked.lexer(mdContent);
|
|
let text = '';
|
|
|
|
for (const token of tokens) {
|
|
switch (token.type) {
|
|
case 'heading':
|
|
text += '\n' + '='.repeat(token.text.length) + '\n';
|
|
text += token.text.toUpperCase() + '\n';
|
|
text += '='.repeat(token.text.length) + '\n\n';
|
|
break;
|
|
case 'paragraph':
|
|
// Remove markdown formatting for plain text
|
|
const cleanText = token.text.replace(/\*\*(.+?)\*\*/g, '$1').replace(/__(.+?)__/g, '$1');
|
|
text += cleanText + '\n\n';
|
|
break;
|
|
case 'list':
|
|
token.items.forEach(item => {
|
|
const cleanItem = item.text.replace(/\*\*(.+?)\*\*/g, '$1').replace(/__(.+?)__/g, '$1');
|
|
text += ' • ' + cleanItem + '\n';
|
|
});
|
|
text += '\n';
|
|
break;
|
|
case 'code':
|
|
text += '\n' + token.text + '\n\n';
|
|
break;
|
|
case 'blockquote':
|
|
text += ' > ' + token.text + '\n\n';
|
|
break;
|
|
}
|
|
}
|
|
|
|
fs.writeFileSync(outputFile, text, 'utf8');
|
|
}
|
|
|
|
convertMarkdown(); |