Merge branch 'feature/35-backend-llm-chat-gpt-integration-s4-10' into 'develop'

Feature/35 backend llm chat gpt integration s4 10 See merge request proj-wise2526-video2document/video2document!42
2026-06-15 18:01:52 +02:00 · 2025-12-15 14:33:31 +01:00
parent d647f53790 1683502aa1
commit 4083d5a5cb
4 changed files with 306 additions and 32 deletions
@@ -50,7 +50,7 @@ const module_exports = {
                body: JSON.stringify({
                    model: "openai-gpt-oss-120b",
                    messages: [
-                        { role: "system", content: "You are a helpful assistant that generates documents from transcripts." },
+                        { role: "system", content: "You are a helpful assistant that generates HTML documents from transcripts. Output only valid HTML content without any preamble, explanations, or markdown formatting." },
                        { role: "user", content: promptText }
                    ],
                    temperature: 0
@@ -71,7 +71,7 @@ const module_exports = {
            let inputTranscriptName = path.basename(transcriptPath, path.extname(transcriptPath)); // Name for the output file 
            console.log(inputTranscriptName);
-            const outPath = path.join(outputDir, `${inputTranscriptName}.md`); // Output file path & name to make naming dynamic. Pulled from input transcript name
+            const outPath = path.join(outputDir, `${inputTranscriptName}.html`); // Output file path & name to make naming dynamic. Pulled from input transcript name
            fs.writeFileSync(outPath, output, "utf8"); // Write output to file
            console.log("Generated document written to:", outPath);
@@ -91,7 +91,7 @@ if (require.main === module) {
        if (args.length < 2) {
            console.error("Usage: node llm-openai-gpt.js <transcriptPath> <documentTypePath> [language]");
-            console.error("Example: node llm-openai-gpt.js ./transcript.json ./docType.json de");
+            console.error("Example: node llm-openai-gpt.js ./transcript.json ./docType.txt de");
            process.exit(1);
        }
@@ -1,15 +1,17 @@
-const outputDir = path.join(__dirname, "../../../storage/documents"); //  path for output directory
+const fs = require('fs');
 const path = require('path');
 const outputDir = path.join(__dirname, "../../../storage/documents"); // path for output directory
 if (!fs.existsSync(outputDir)) {
    fs.mkdirSync(outputDir, { recursive: true }); // Create output directory if it doesn't exist
 }
 // Ensure Google API key is set in environment variables: export GOOGLE_API_KEY="your_api_key_here"
 const GEMINI_API_KEY = process.env.GOOGLE_API_KEY; // Ensure Google API key is set in environment variables: export GOOGLE_API_KEY="your_api_key_here"
-const GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent"; //URL for the REST call, used model and action
+const GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent"; // URL for the REST call, used model and action
-module.exports = {
+const module_exports = {
    name: "llm-gemini",
    type: "llm",
    displayname: "Gemini LLM",
@@ -81,4 +83,52 @@ module.exports = {
            }
        })
    }
-};
+};
 module.exports = module_exports;
 // CLI Mode: Allow direct execution
 if (require.main === module) {
    (async () => {
        const args = process.argv.slice(2);
        if (args.length < 2) {
            console.error("Usage: node llm-gemini.js <transcriptPath> <documentTypePath> [language]");
            console.error("Example: node llm-gemini.js ./transcript.json ./docType.txt de");
            process.exit(1);
        }
        const [transcriptPath, documentTypePath, language] = args;
        // Check if API key is set
        if (!GEMINI_API_KEY) {
            console.error("ERROR: GOOGLE_API_KEY environment variable is not set!");
            console.error("Please set it with: export GOOGLE_API_KEY='your_api_key_here'");
            process.exit(1);
        }
        // Check if files exist
        if (!fs.existsSync(transcriptPath)) {
            console.error(`ERROR: Transcript file not found: ${transcriptPath}`);
            process.exit(1);
        }
        if (!fs.existsSync(documentTypePath)) {
            console.error(`ERROR: Document type file not found: ${documentTypePath}`);
            process.exit(1);
        }
        console.log("Starting document generation...");
        console.log(`Transcript: ${transcriptPath}`);
        console.log(`Document Type: ${documentTypePath}`);
        console.log(`Language: ${language || 'en (default)'}`);
        await module_exports.createDocumentFromTranscript(
            transcriptPath,
            documentTypePath,
            language || 'en'
        );
        console.log("Done!");
    })();
 }
@@ -1,41 +1,42 @@
 const fs = require('fs');
 const path = require('path');
-const outputDir = path.join(__dirname, "../../../storage/documents");
+const outputDir = path.join(__dirname, "../../../storage/documents"); // path for output directory
 if (!fs.existsSync(outputDir)) {
-    fs.mkdirSync(outputDir, { recursive: true });
+    fs.mkdirSync(outputDir, { recursive: true }); // Create output directory if it doesn't exist
 }
 // Ensure SAIA API key is set in environment variables: export SAIA_API_KEY="your_api_key_here"
 const SAIA_API_KEY = process.env.SAIA_API_KEY;
-const SAIA_URL = "https://chat-ai.academiccloud.de/v1/chat/completions";
+const SAIA_URL = "https://chat-ai.academiccloud.de/v1/chat/completions"; // URL for the REST call, used model and action
 const module_exports = {
-    name: "llm-saia_llama_3.3",
+    name: "qwen3-235b-a22b",
    type: "llm",
-    displayname: "LLAMA",
+    displayname: "QWEN 3 235B",
-    description: "Generates documents using Llama 3.3 70B Instruct via SAIA platform",
+    description: "Generates documents using QWEN 3 235B via SAIA platform",
    async function(parameter) {
        try {
-            console.log("SAIA Llama 3.3 70B module invoked with parameters:", parameter);
+            console.log("SAIA QWEN 3 235B module invoked with parameters:", parameter);
-            await this.createDocumentFromTranscript(
+            await this.createDocumentFromTranscript( // Call the function to create document with transcript, document type and language
-                parameter.inputTranscriptPath,
+                parameter.inputTranscriptPath, // Path to input transcript file
-                parameter.documentTypePath,
+                parameter.documentTypePath, // Path to document type file which is chosen in the front end by the user
-                parameter.language
+                parameter.language // Language for the document which is chosen in the front end by the user
            );
        } catch (error) {
-            console.error("Error in SAIA Llama 3.3 70B module:", error);
+            console.error("Error in SAIA QWEN 3 235B module:", error);
        }
    },
-    createDocumentFromTranscript: async function(transcriptPath, documentTypePath, language = "en") {
+    createDocumentFromTranscript: async function(transcriptPath, documentTypePath, language = "en") { // default language is English
        try {
-            const transcript = await fs.promises.readFile(transcriptPath, "utf-8");
+            const transcript = await fs.promises.readFile(transcriptPath, "utf-8"); // read transcript file from Path
-            const documentType = await fs.promises.readFile(documentTypePath, "utf-8");
+            const documentType = await fs.promises.readFile(documentTypePath, "utf-8"); // read document type from Path
-            const promptText = `${documentType}, in language ${language}, transcript:\n\n${transcript}`;
+            const promptText = `${documentType}, in language ${language}, transcript:\n\n${transcript}`; // combine doc type, language and transcript - Change prompt here if needed
            // --- REST CALL ---
            const response = await fetch(SAIA_URL, {
@@ -46,28 +47,31 @@ const module_exports = {
                    "Content-Type": "application/json"
                },
                body: JSON.stringify({
-                    model: "llama-3.3-70b-instruct",  // Korrekter Modellname!
+                    model: "qwen3-235b-a22b",  
                    messages: [
-                        { role: "system", content: "You are a helpful assistant that generates documents from transcripts." },
+                        { role: "system", content: "You are a helpful assistant that generates HTML documents from transcripts. Output only valid HTML content without any preamble, explanations, or markdown formatting." },
                        { role: "user", content: promptText }
                    ],
                    temperature: 0
                })
            });
-            if (!response.ok) {
+            if (!response.ok) { // ok is true when a response was successful
                const text = await response.text();
                throw new Error(`SAIA API error (${response.status}): ${text}`);
            }
            const data = await response.json();
            // Get generated text from response or default to empty string (if null)
            // SAIA uses OpenAI-compatible structure: data.choices[x].message.content
            const output = data.choices?.[0]?.message?.content || "";
-            let inputTranscriptName = path.basename(transcriptPath, path.extname(transcriptPath));
+            let inputTranscriptName = path.basename(transcriptPath, path.extname(transcriptPath)); // Name for the output file
            console.log(inputTranscriptName);
-            const outPath = path.join(outputDir, `${inputTranscriptName}.md`);
+            const outPath = path.join(outputDir, `${inputTranscriptName}.html`); // Output file path & name to make naming dynamic. Pulled from input transcript name
-            fs.writeFileSync(outPath, output, "utf8");
+            fs.writeFileSync(outPath, output, "utf8"); // Write output to file
            console.log("Generated document written to:", outPath);
@@ -79,24 +83,27 @@ const module_exports = {
 module.exports = module_exports;
 // CLI Mode: Allow direct execution
 if (require.main === module) {
    (async () => {
        const args = process.argv.slice(2);
        if (args.length < 2) {
-            console.error("Usage: node llm-llama-3.3.js <transcriptPath> <documentTypePath> [language]");
+            console.error("Usage: node qwen3.js <transcriptPath> <documentTypePath> [language]");
-            console.error("Example: node llm-llama-3.3.js ./transcript.json ./docType.json de");
+            console.error("Example: node qwen3.js ./transcript.json ./docType.txt de");
            process.exit(1);
        }
        const [transcriptPath, documentTypePath, language] = args;
        // Check if API key is set
        if (!SAIA_API_KEY) {
            console.error("ERROR: SAIA_API_KEY environment variable is not set!");
            console.error("Please set it with: export SAIA_API_KEY='your_api_key_here'");
            process.exit(1);
        }
        // Check if files exist
        if (!fs.existsSync(transcriptPath)) {
            console.error(`ERROR: Transcript file not found: ${transcriptPath}`);
            process.exit(1);
@@ -0,0 +1,217 @@
 Generate a structured meeting report in HTML using STRUCTURE and STYLE.
 Output ONLY the final .md document — no meta comments, no explanations.
 Follow exactly the STRUCTURE defined below.
 Follow exactly the STYLE rules.
 Use timestamps in HH:MM:SS format.
 If information is missing, use: Unclear:<reason>.
 ==================== STRUCTURE & RULES ====================
 {
  "FORMAT": "HTML",
  "STRUCTURE": {
    "titlepage": [
      "title",
      "date",
      "start",
      "end",
      "duration",
      "location",
      "host",
      "participants"
    ],
    "toc": "[section](#anchor) — HH:MM:SS",
    "section": {
      "h2": "<topic> — HH:MM:SS",
      "summary": "exactly 1 concise sentence",
      "key_points": "maximum 5 bullet points; quotes optional",
      "decisions": "list items formatted as: decision text | owner | due date",
      "actions": "HTML table: id | task | owner | due | status"
    },
    "exec_summary": "exactly 3 short sentences",
    "consolidated": [
      "decisions",
      "actions"
    ],
    "appendix": "optional"
  },
  "STYLE": {
    "tone": "neutral, concise, professional",
    "ts_format": "HH:MM:SS",
    "no_meta": true
  },
  "PROCESS": {
    "timestamps": "use transcript timestamps if present; otherwise estimate minimal",
    "speakers": "use names if available; else Speaker X",
    "long_transcripts": "split → summarize → merge",
    "unclear": "Unclear:<reason>"
  },
  "PROMPT_SNIPPET": "Generate meeting report in HTML using STRUCTURE and STYLE. Output only the report."
 }
 ============================================================
 Insert all generated content into the following HTML TEMPLATE:
 # {{title}}
 **Date:** {{date}}  
 **Start:** {{start}}  
 **End:** {{end}}  
 **Duration:** {{duration}}  
 **Location:** {{location}}  
 **Host:** {{host}}  
 **Participants:** {{participants}}
 ---
 ## Table of Contents
 {{toc}}
 ---
 Generate a structured meeting report in HTML using STRUCTURE and STYLE.
 Output ONLY the final .md document — no meta comments, no explanations.
 Follow exactly the STRUCTURE defined below.
 Follow exactly the STYLE rules.
 Use timestamps in HH:MM:SS format.
 If information is missing, use: UNKLAR:<reason>.
 ==================== STRUCTURE & RULES ====================
 {
  "FORMAT": "HTML",
  "STRUCTURE": {
    "titlepage": [
      "title",
      "date",
      "start",
      "end",
      "duration",
      "location",
      "host",
      "participants"
    ],
    "toc": "[section](#anchor) — HH:MM:SS",
    "section": {
      "h2": "<topic> — HH:MM:SS",
      "summary": "exactly 1 concise sentence",
      "key_points": "maximum 5 bullet points; quotes optional",
      "decisions": "list items formatted as: decision text | owner | due date",
      "actions": "HTML table: id | task | owner | due | status"
    },
    "exec_summary": "exactly 3 short sentences",
    "consolidated": [
      "decisions",
      "actions"
    ],
    "appendix": "optional"
  },
  "STYLE": {
    "tone": "neutral, concise, professional",
    "ts_format": "HH:MM:SS",
    "no_meta": true
  },
  "PROCESS": {
    "timestamps": "use transcript timestamps if present; otherwise estimate minimal",
    "speakers": "use names if available; else Speaker X",
    "long_transcripts": "split → summarize → merge",
    "unclear": "UNKLAR:<reason>"
  },
  "PROMPT_SNIPPET": "Generate meeting report in HTML using STRUCTURE and STYLE. Output only the report."
 }
 ============================================================
 Insert all generated content into the following HTML TEMPLATE:
 # {{title}}
 **Date:** {{date}}  
 **Start:** {{start}}  
 **End:** {{end}}  
 **Duration:** {{duration}}  
 **Location:** {{location}}  
 **Host:** {{host}}  
 **Participants:** {{participants}}
 ---
 ## Table of Contents
 {{toc}}
 ---
 ## Executive Summary
 {{exec_summary}}
 ---
 ## Sections
 {{sections}}
 ---
 ## Consolidated Decisions
 {{consolidated_decisions}}
 ---
 ## Consolidated Actions
 {{consolidated_actions}}
 ---
 ## Appendix
 {{appendix}}
 ============================================================
 Final Requirement:
 Output ONLY the completed HTML meeting report.
 ## Executive Summary
 {{exec_summary}}
 ---
 ## Sections
 {{sections}}
 ---
 ## Consolidated Decisions
 {{consolidated_decisions}}
 ---
 ## Consolidated Actions
 {{consolidated_actions}}
 ---
 ## Appendix
 {{appendix}}
 ============================================================
 Final Requirement:
 Output ONLY the completed HTML meeting report.