Merge branch 'develop' into 'feature/35-backend-llm-chat-gpt-integration-s4-10'

# Conflicts: # services/modules/llm-gemini/gemini.js
Update document processing to use prompts from styles and add structured meeting report template
2026-06-15 18:01:52 +02:00 · 2025-12-15 14:07:32 +01:00 · 2025-12-15 14:01:51 +01:00 · 2025-12-15 13:55:40 +01:00 · 2025-12-15 13:53:14 +01:00 · 2025-12-14 16:53:35 +01:00
4 changed files with 306 additions and 32 deletions
@@ -50,7 +50,7 @@ const module_exports = {
                body: JSON.stringify({
                    model: "openai-gpt-oss-120b",
                    messages: [
-                        { role: "system", content: "You are a helpful assistant that generates documents from transcripts." },
+                        { role: "system", content: "You are a helpful assistant that generates HTML documents from transcripts. Output only valid HTML content without any preamble, explanations, or markdown formatting." },
                        { role: "user", content: promptText }
                    ],
                    temperature: 0
@@ -71,7 +71,7 @@ const module_exports = {
            let inputTranscriptName = path.basename(transcriptPath, path.extname(transcriptPath)); // Name for the output file 
            console.log(inputTranscriptName);

-            const outPath = path.join(outputDir, `${inputTranscriptName}.md`); // Output file path & name to make naming dynamic. Pulled from input transcript name
+            const outPath = path.join(outputDir, `${inputTranscriptName}.html`); // Output file path & name to make naming dynamic. Pulled from input transcript name
            fs.writeFileSync(outPath, output, "utf8"); // Write output to file

            console.log("Generated document written to:", outPath);
@@ -91,7 +91,7 @@ if (require.main === module) {
        
        if (args.length < 2) {
            console.error("Usage: node llm-openai-gpt.js <transcriptPath> <documentTypePath> [language]");
-            console.error("Example: node llm-openai-gpt.js ./transcript.json ./docType.json de");
+            console.error("Example: node llm-openai-gpt.js ./transcript.json ./docType.txt de");
            process.exit(1);
        }

@@ -1,15 +1,17 @@
-const outputDir = path.join(__dirname, "../../../storage/documents"); //  path for output directory
+const fs = require('fs');
+const path = require('path');
+
+const outputDir = path.join(__dirname, "../../../storage/documents"); // path for output directory

 if (!fs.existsSync(outputDir)) {
    fs.mkdirSync(outputDir, { recursive: true }); // Create output directory if it doesn't exist
 }

 // Ensure Google API key is set in environment variables: export GOOGLE_API_KEY="your_api_key_here"
-
 const GEMINI_API_KEY = process.env.GOOGLE_API_KEY; // Ensure Google API key is set in environment variables: export GOOGLE_API_KEY="your_api_key_here"
-const GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent"; //URL for the REST call, used model and action
+const GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent"; // URL for the REST call, used model and action

-module.exports = {
+const module_exports = {
    name: "llm-gemini",
    type: "llm",
    displayname: "Gemini LLM",
@@ -81,4 +83,52 @@ module.exports = {
            }
        })
    }
-};
+};
+
+module.exports = module_exports;
+
+// CLI Mode: Allow direct execution
+if (require.main === module) {
+    (async () => {
+        const args = process.argv.slice(2);
+        
+        if (args.length < 2) {
+            console.error("Usage: node llm-gemini.js <transcriptPath> <documentTypePath> [language]");
+            console.error("Example: node llm-gemini.js ./transcript.json ./docType.txt de");
+            process.exit(1);
+        }
+
+        const [transcriptPath, documentTypePath, language] = args;
+
+        // Check if API key is set
+        if (!GEMINI_API_KEY) {
+            console.error("ERROR: GOOGLE_API_KEY environment variable is not set!");
+            console.error("Please set it with: export GOOGLE_API_KEY='your_api_key_here'");
+            process.exit(1);
+        }
+
+        // Check if files exist
+        if (!fs.existsSync(transcriptPath)) {
+            console.error(`ERROR: Transcript file not found: ${transcriptPath}`);
+            process.exit(1);
+        }
+
+        if (!fs.existsSync(documentTypePath)) {
+            console.error(`ERROR: Document type file not found: ${documentTypePath}`);
+            process.exit(1);
+        }
+
+        console.log("Starting document generation...");
+        console.log(`Transcript: ${transcriptPath}`);
+        console.log(`Document Type: ${documentTypePath}`);
+        console.log(`Language: ${language || 'en (default)'}`);
+        
+        await module_exports.createDocumentFromTranscript(
+            transcriptPath,
+            documentTypePath,
+            language || 'en'
+        );
+        
+        console.log("Done!");
+    })();
+}
@@ -1,41 +1,42 @@
 const fs = require('fs');
 const path = require('path');

-const outputDir = path.join(__dirname, "../../../storage/documents");
+const outputDir = path.join(__dirname, "../../../storage/documents"); // path for output directory

 if (!fs.existsSync(outputDir)) {
-    fs.mkdirSync(outputDir, { recursive: true });
+    fs.mkdirSync(outputDir, { recursive: true }); // Create output directory if it doesn't exist
 }

+// Ensure SAIA API key is set in environment variables: export SAIA_API_KEY="your_api_key_here"
 const SAIA_API_KEY = process.env.SAIA_API_KEY;
-const SAIA_URL = "https://chat-ai.academiccloud.de/v1/chat/completions";
+const SAIA_URL = "https://chat-ai.academiccloud.de/v1/chat/completions"; // URL for the REST call, used model and action

 const module_exports = {
-    name: "llm-saia_llama_3.3",
+    name: "qwen3-235b-a22b",
    type: "llm",
-    displayname: "LLAMA",
-    description: "Generates documents using Llama 3.3 70B Instruct via SAIA platform",
+    displayname: "QWEN 3 235B",
+    description: "Generates documents using QWEN 3 235B via SAIA platform",

    async function(parameter) {
        try {
-            console.log("SAIA Llama 3.3 70B module invoked with parameters:", parameter);
+            console.log("SAIA QWEN 3 235B module invoked with parameters:", parameter);

-            await this.createDocumentFromTranscript(
-                parameter.inputTranscriptPath,
-                parameter.documentTypePath,
-                parameter.language
+            await this.createDocumentFromTranscript( // Call the function to create document with transcript, document type and language
+                parameter.inputTranscriptPath, // Path to input transcript file
+                parameter.documentTypePath, // Path to document type file which is chosen in the front end by the user
+                parameter.language // Language for the document which is chosen in the front end by the user
            );

        } catch (error) {
-            console.error("Error in SAIA Llama 3.3 70B module:", error);
+            console.error("Error in SAIA QWEN 3 235B module:", error);
        }
    },

-    createDocumentFromTranscript: async function(transcriptPath, documentTypePath, language = "en") {
+    createDocumentFromTranscript: async function(transcriptPath, documentTypePath, language = "en") { // default language is English
        try {
-            const transcript = await fs.promises.readFile(transcriptPath, "utf-8");
-            const documentType = await fs.promises.readFile(documentTypePath, "utf-8");
-            const promptText = `${documentType}, in language ${language}, transcript:\n\n${transcript}`;
+            const transcript = await fs.promises.readFile(transcriptPath, "utf-8"); // read transcript file from Path
+            const documentType = await fs.promises.readFile(documentTypePath, "utf-8"); // read document type from Path
+            const promptText = `${documentType}, in language ${language}, transcript:\n\n${transcript}`; // combine doc type, language and transcript - Change prompt here if needed

            // --- REST CALL ---
            const response = await fetch(SAIA_URL, {
@@ -46,28 +47,31 @@ const module_exports = {
                    "Content-Type": "application/json"
                },
                body: JSON.stringify({
-                    model: "llama-3.3-70b-instruct",  // Korrekter Modellname!
+                    model: "qwen3-235b-a22b",  
                    messages: [
-                        { role: "system", content: "You are a helpful assistant that generates documents from transcripts." },
+                        { role: "system", content: "You are a helpful assistant that generates HTML documents from transcripts. Output only valid HTML content without any preamble, explanations, or markdown formatting." },
                        { role: "user", content: promptText }
                    ],
                    temperature: 0
                })
            });

-            if (!response.ok) {
+            if (!response.ok) { // ok is true when a response was successful
                const text = await response.text();
                throw new Error(`SAIA API error (${response.status}): ${text}`);
            }

            const data = await response.json();
+
+            // Get generated text from response or default to empty string (if null)
+            // SAIA uses OpenAI-compatible structure: data.choices[x].message.content
            const output = data.choices?.[0]?.message?.content || "";

-            let inputTranscriptName = path.basename(transcriptPath, path.extname(transcriptPath));
+            let inputTranscriptName = path.basename(transcriptPath, path.extname(transcriptPath)); // Name for the output file
            console.log(inputTranscriptName);

-            const outPath = path.join(outputDir, `${inputTranscriptName}.md`);
-            fs.writeFileSync(outPath, output, "utf8");
+            const outPath = path.join(outputDir, `${inputTranscriptName}.html`); // Output file path & name to make naming dynamic. Pulled from input transcript name
+            fs.writeFileSync(outPath, output, "utf8"); // Write output to file

            console.log("Generated document written to:", outPath);

@@ -79,24 +83,27 @@ const module_exports = {

 module.exports = module_exports;

+// CLI Mode: Allow direct execution
 if (require.main === module) {
    (async () => {
        const args = process.argv.slice(2);
        
        if (args.length < 2) {
-            console.error("Usage: node llm-llama-3.3.js <transcriptPath> <documentTypePath> [language]");
-            console.error("Example: node llm-llama-3.3.js ./transcript.json ./docType.json de");
+            console.error("Usage: node qwen3.js <transcriptPath> <documentTypePath> [language]");
+            console.error("Example: node qwen3.js ./transcript.json ./docType.txt de");
            process.exit(1);
        }

        const [transcriptPath, documentTypePath, language] = args;

+        // Check if API key is set
        if (!SAIA_API_KEY) {
            console.error("ERROR: SAIA_API_KEY environment variable is not set!");
            console.error("Please set it with: export SAIA_API_KEY='your_api_key_here'");
            process.exit(1);
        }

+        // Check if files exist
        if (!fs.existsSync(transcriptPath)) {
            console.error(`ERROR: Transcript file not found: ${transcriptPath}`);
            process.exit(1);
@@ -0,0 +1,217 @@
+Generate a structured meeting report in HTML using STRUCTURE and STYLE.
+Output ONLY the final .md document — no meta comments, no explanations.
+
+Follow exactly the STRUCTURE defined below.
+Follow exactly the STYLE rules.
+Use timestamps in HH:MM:SS format.
+If information is missing, use: Unclear:<reason>.
+
+==================== STRUCTURE & RULES ====================
+
+{
+  "FORMAT": "HTML",
+
+  "STRUCTURE": {
+    "titlepage": [
+      "title",
+      "date",
+      "start",
+      "end",
+      "duration",
+      "location",
+      "host",
+      "participants"
+    ],
+
+    "toc": "[section](#anchor) — HH:MM:SS",
+
+    "section": {
+      "h2": "<topic> — HH:MM:SS",
+      "summary": "exactly 1 concise sentence",
+      "key_points": "maximum 5 bullet points; quotes optional",
+      "decisions": "list items formatted as: decision text | owner | due date",
+      "actions": "HTML table: id | task | owner | due | status"
+    },
+
+    "exec_summary": "exactly 3 short sentences",
+
+    "consolidated": [
+      "decisions",
+      "actions"
+    ],
+
+    "appendix": "optional"
+  },
+
+  "STYLE": {
+    "tone": "neutral, concise, professional",
+    "ts_format": "HH:MM:SS",
+    "no_meta": true
+  },
+
+  "PROCESS": {
+    "timestamps": "use transcript timestamps if present; otherwise estimate minimal",
+    "speakers": "use names if available; else Speaker X",
+    "long_transcripts": "split → summarize → merge",
+    "unclear": "Unclear:<reason>"
+  },
+
+  "PROMPT_SNIPPET": "Generate meeting report in HTML using STRUCTURE and STYLE. Output only the report."
+}
+
+============================================================
+
+Insert all generated content into the following HTML TEMPLATE:
+
+# {{title}}
+
+**Date:** {{date}}  
+**Start:** {{start}}  
+**End:** {{end}}  
+**Duration:** {{duration}}  
+**Location:** {{location}}  
+**Host:** {{host}}  
+**Participants:** {{participants}}
+
+---
+
+## Table of Contents
+{{toc}}
+
+---
+Generate a structured meeting report in HTML using STRUCTURE and STYLE.
+Output ONLY the final .md document — no meta comments, no explanations.
+
+Follow exactly the STRUCTURE defined below.
+Follow exactly the STYLE rules.
+Use timestamps in HH:MM:SS format.
+If information is missing, use: UNKLAR:<reason>.
+
+==================== STRUCTURE & RULES ====================
+
+{
+  "FORMAT": "HTML",
+
+  "STRUCTURE": {
+    "titlepage": [
+      "title",
+      "date",
+      "start",
+      "end",
+      "duration",
+      "location",
+      "host",
+      "participants"
+    ],
+
+    "toc": "[section](#anchor) — HH:MM:SS",
+
+    "section": {
+      "h2": "<topic> — HH:MM:SS",
+      "summary": "exactly 1 concise sentence",
+      "key_points": "maximum 5 bullet points; quotes optional",
+      "decisions": "list items formatted as: decision text | owner | due date",
+      "actions": "HTML table: id | task | owner | due | status"
+    },
+
+    "exec_summary": "exactly 3 short sentences",
+
+    "consolidated": [
+      "decisions",
+      "actions"
+    ],
+
+    "appendix": "optional"
+  },
+
+  "STYLE": {
+    "tone": "neutral, concise, professional",
+    "ts_format": "HH:MM:SS",
+    "no_meta": true
+  },
+
+  "PROCESS": {
+    "timestamps": "use transcript timestamps if present; otherwise estimate minimal",
+    "speakers": "use names if available; else Speaker X",
+    "long_transcripts": "split → summarize → merge",
+    "unclear": "UNKLAR:<reason>"
+  },
+
+  "PROMPT_SNIPPET": "Generate meeting report in HTML using STRUCTURE and STYLE. Output only the report."
+}
+
+============================================================
+
+Insert all generated content into the following HTML TEMPLATE:
+
+# {{title}}
+
+**Date:** {{date}}  
+**Start:** {{start}}  
+**End:** {{end}}  
+**Duration:** {{duration}}  
+**Location:** {{location}}  
+**Host:** {{host}}  
+**Participants:** {{participants}}
+
+---
+
+## Table of Contents
+{{toc}}
+
+---
+
+## Executive Summary
+{{exec_summary}}
+
+---
+
+## Sections
+{{sections}}
+
+---
+
+## Consolidated Decisions
+{{consolidated_decisions}}
+
+---
+
+## Consolidated Actions
+{{consolidated_actions}}
+
+---
+
+## Appendix
+{{appendix}}
+
+============================================================
+
+Final Requirement:
+Output ONLY the completed HTML meeting report.
+## Executive Summary
+{{exec_summary}}
+
+---
+
+## Sections
+{{sections}}
+
+---
+
+## Consolidated Decisions
+{{consolidated_decisions}}
+
+---
+
+## Consolidated Actions
+{{consolidated_actions}}
+
+---
+
+## Appendix
+{{appendix}}
+
+============================================================
+
+Final Requirement:
+Output ONLY the completed HTML meeting report.
Author	SHA1	Message	Date
Hughes, Mike	1683502aa1	Merge branch 'develop' into 'feature/35-backend-llm-chat-gpt-integration-s4-10' # Conflicts: # services/modules/llm-gemini/gemini.js	2025-12-15 14:07:32 +01:00
MikeHughes-BIN	789ecd3a31	Update document processing to use prompts from styles and add structured meeting report template	2025-12-15 14:01:51 +01:00
MikeHughes-BIN	e72d03efbe	Added Pre's Document in the correct Folder	2025-12-15 13:55:40 +01:00
MikeHughes-BIN	ec57411992	Refactor document generation to output HTML format and update system instructions for clarity	2025-12-15 13:53:14 +01:00
MikeHughes-BIN	1e38cc79f4	Extended the Role description	2025-12-14 16:53:35 +01:00
MikeHughes-BIN	746fec05d4	Changed model from llama to quen3 because llama was timing out due to token limit	2025-12-14 16:28:48 +01:00