mirror of
https://gitlab.rlp.net/proj-wise2526-video2document/video2document.git
synced 2026-06-16 02:11:52 +02:00
Refactor document generation to output HTML format and update system instructions for clarity
This commit is contained in:
@@ -1,14 +1,15 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const outputDir = path.join(__dirname, "../../../storage/documents");
|
||||
const outputDir = path.join(__dirname, "../../../storage/documents"); // path for output directory
|
||||
|
||||
if (!fs.existsSync(outputDir)) {
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
fs.mkdirSync(outputDir, { recursive: true }); // Create output directory if it doesn't exist
|
||||
}
|
||||
|
||||
// Ensure SAIA API key is set in environment variables: export SAIA_API_KEY="your_api_key_here"
|
||||
const SAIA_API_KEY = process.env.SAIA_API_KEY;
|
||||
const SAIA_URL = "https://chat-ai.academiccloud.de/v1/chat/completions";
|
||||
const SAIA_URL = "https://chat-ai.academiccloud.de/v1/chat/completions"; // URL for the REST call, used model and action
|
||||
|
||||
const module_exports = {
|
||||
name: "qwen3-235b-a22b",
|
||||
@@ -20,10 +21,10 @@ const module_exports = {
|
||||
try {
|
||||
console.log("SAIA QWEN 3 235B module invoked with parameters:", parameter);
|
||||
|
||||
await this.createDocumentFromTranscript(
|
||||
parameter.inputTranscriptPath,
|
||||
parameter.documentTypePath,
|
||||
parameter.language
|
||||
await this.createDocumentFromTranscript( // Call the function to create document with transcript, document type and language
|
||||
parameter.inputTranscriptPath, // Path to input transcript file
|
||||
parameter.documentTypePath, // Path to document type file which is chosen in the front end by the user
|
||||
parameter.language // Language for the document which is chosen in the front end by the user
|
||||
);
|
||||
|
||||
} catch (error) {
|
||||
@@ -31,11 +32,11 @@ const module_exports = {
|
||||
}
|
||||
},
|
||||
|
||||
createDocumentFromTranscript: async function(transcriptPath, documentTypePath, language = "en") {
|
||||
createDocumentFromTranscript: async function(transcriptPath, documentTypePath, language = "en") { // default language is English
|
||||
try {
|
||||
const transcript = await fs.promises.readFile(transcriptPath, "utf-8");
|
||||
const documentType = await fs.promises.readFile(documentTypePath, "utf-8");
|
||||
const promptText = `${documentType}, in language ${language}, transcript:\n\n${transcript}`;
|
||||
const transcript = await fs.promises.readFile(transcriptPath, "utf-8"); // read transcript file from Path
|
||||
const documentType = await fs.promises.readFile(documentTypePath, "utf-8"); // read document type from Path
|
||||
const promptText = `${documentType}, in language ${language}, transcript:\n\n${transcript}`; // combine doc type, language and transcript - Change prompt here if needed
|
||||
|
||||
// --- REST CALL ---
|
||||
const response = await fetch(SAIA_URL, {
|
||||
@@ -48,26 +49,29 @@ const module_exports = {
|
||||
body: JSON.stringify({
|
||||
model: "qwen3-235b-a22b",
|
||||
messages: [
|
||||
{ role: "system", content: "You are a helpful assistant that generates documents from transcripts. Do NOT include any <think>, reasoning, chain-of-thought, or internal analysis. Only output the final document content." },
|
||||
{ role: "system", content: "You are a helpful assistant that generates HTML documents from transcripts. Output only valid HTML content without any preamble, explanations, or markdown formatting." },
|
||||
{ role: "user", content: promptText }
|
||||
],
|
||||
temperature: 0
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
if (!response.ok) { // ok is true when a response was successful
|
||||
const text = await response.text();
|
||||
throw new Error(`SAIA API error (${response.status}): ${text}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
// Get generated text from response or default to empty string (if null)
|
||||
// SAIA uses OpenAI-compatible structure: data.choices[x].message.content
|
||||
const output = data.choices?.[0]?.message?.content || "";
|
||||
|
||||
let inputTranscriptName = path.basename(transcriptPath, path.extname(transcriptPath));
|
||||
let inputTranscriptName = path.basename(transcriptPath, path.extname(transcriptPath)); // Name for the output file
|
||||
console.log(inputTranscriptName);
|
||||
|
||||
const outPath = path.join(outputDir, `${inputTranscriptName}.md`);
|
||||
fs.writeFileSync(outPath, output, "utf8");
|
||||
const outPath = path.join(outputDir, `${inputTranscriptName}.html`); // Output file path & name to make naming dynamic. Pulled from input transcript name
|
||||
fs.writeFileSync(outPath, output, "utf8"); // Write output to file
|
||||
|
||||
console.log("Generated document written to:", outPath);
|
||||
|
||||
@@ -79,24 +83,27 @@ const module_exports = {
|
||||
|
||||
module.exports = module_exports;
|
||||
|
||||
// CLI Mode: Allow direct execution
|
||||
if (require.main === module) {
|
||||
(async () => {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length < 2) {
|
||||
console.error("Usage: node quen3.js <transcriptPath> <documentTypePath> [language]");
|
||||
console.error("Example: node quen3.js ./transcript.json ./docType.json de");
|
||||
console.error("Usage: node qwen3.js <transcriptPath> <documentTypePath> [language]");
|
||||
console.error("Example: node qwen3.js ./transcript.json ./docType.txt de");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const [transcriptPath, documentTypePath, language] = args;
|
||||
|
||||
// Check if API key is set
|
||||
if (!SAIA_API_KEY) {
|
||||
console.error("ERROR: SAIA_API_KEY environment variable is not set!");
|
||||
console.error("Please set it with: export SAIA_API_KEY='your_api_key_here'");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Check if files exist
|
||||
if (!fs.existsSync(transcriptPath)) {
|
||||
console.error(`ERROR: Transcript file not found: ${transcriptPath}`);
|
||||
process.exit(1);
|
||||
|
||||
Reference in New Issue
Block a user