Compare commits

...

32 Commits

Author SHA1 Message Date
Aarthi Manivannan, Premanathan Aarthi Manivannan c00640c15d Update file meeting_report_prompt.txt 2025-12-14 11:52:21 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan d80941ca65 Delete .gitkeep 2025-12-14 11:45:47 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 0e4147e893 Delete meeting_report_template.md 2025-12-14 11:45:38 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 9ab69b4b36 Delete .gitkeep 2025-12-14 11:44:59 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan cd61d8e09b Delete transcript.txt 2025-12-14 11:44:51 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 893546d142 Delete Example_output.pdf.pdf 2025-12-14 11:44:41 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 9098dafbd5 Delete .gitkeep 2025-12-14 11:44:16 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan d68192de8a Delete meeting_report_prompt.txt 2025-12-14 11:44:06 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 5a23ec9c2f Delete .gitkeep 2025-12-14 11:43:24 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan a0237ade55 Delete meeting_report_structure.json 2025-12-14 11:39:09 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 10e3c902c5 Delete meeting_report_best_practices.md 2025-12-14 11:37:32 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan b157a90671 Upload New File 2025-11-20 12:24:46 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 6931df22e0 Add new file 2025-11-20 12:12:42 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 9eaabe80b6 Add new directory 2025-11-20 12:09:35 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan db2192dc30 Add new file 2025-11-20 12:02:32 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 76129982c3 Update file meeting_report_prompt.txt 2025-11-20 11:59:16 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan ea769d3aec Add new file 2025-11-20 11:41:14 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 21f4fe95d7 Add new file 2025-11-20 11:38:32 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 8e07bcf028 Add new file 2025-11-20 11:35:47 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan fd0798872a Add new directory 2025-11-20 11:32:46 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 0ea3fba436 Add new directory 2025-11-20 11:32:20 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 5615f7fd25 research and design 2025-11-20 11:29:33 +01:00
Hughes, Mike 283b4ed6af Merge branch 'develop' into 'main'
Implemented the general modular framework.

See merge request proj-wise2526-video2document/video2document!22
2025-11-15 15:14:24 +01:00
Spanier, Pit 2edc7f8351 Merge branch 'fix/transcription-module-fix' into 'develop'
New Folder structure

See merge request proj-wise2526-video2document/video2document!21
2025-11-15 15:11:35 +01:00
MikeHughes-BIN 6083773f88 New Folder structure 2025-11-15 14:45:13 +01:00
Spanier, Pit 444d408480 Merge branch 'feature/fixing-the-program' into 'develop'
fixed the program by moving the example module back to where the program can ACTUALLY load it

See merge request proj-wise2526-video2document/video2document!20
2025-11-14 14:30:16 +01:00
emily d9eacafc3a fixed the program by moving the example module back to where the program can ACTUALLY load it 2025-11-14 14:28:11 +01:00
Hughes, Mike ab737f0dc9 Merge branch 'feature/12-externe-transkription-s2-02b' into 'develop'
feat(S2-02b): Implement AssemblyAI external transcription with speaker diarization

See merge request proj-wise2526-video2document/video2document!18
2025-11-13 17:38:39 +01:00
MikeHughes-BIN 79e0c48755 Reduced Number of test paths to avoid redundancy 2025-11-13 17:35:40 +01:00
MikeHughes-BIN 9254ddc57f Changed the Folder Structure for better maintainability 2025-11-13 17:34:22 +01:00
Azeufack Noupeu Willy c021272ca4 merge: Resolve conflicts with develop 2025-11-13 15:22:33 +01:00
Azeufack Noupeu Willy e7e97a7f60 feat(S2-02b): Implement AssemblyAI external transcription with speaker diarization
- Add assembly.ts module for REST API transcription via AssemblyAI
- Implement 5-step pipeline: upload → create job → poll status → download → save
- Enable speaker_labels for diarization (Speaker A, B, C...)
- Add millisecond-precision timestamps for each utterance
- Store JSON transcripts in storage/transcripts/{session_id}.json
- Add axios, dotenv dependencies
- Add transcribeLatest.ts helper for quick testing

User Story: S2-02b - Externe Transkription per REST API
2025-11-13 13:07:18 +01:00
16 changed files with 333 additions and 49 deletions
+22
View File
@@ -9,7 +9,9 @@
"version": "1.0.0", "version": "1.0.0",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"@types/axios": "^0.9.36",
"cli-progress": "^3.12.0", "cli-progress": "^3.12.0",
"dotenv": "^17.2.3",
"electron": "^39.1.1", "electron": "^39.1.1",
"express": "^5.1.0", "express": "^5.1.0",
"ffmpeg-static": "^5.2.0", "ffmpeg-static": "^5.2.0",
@@ -149,6 +151,12 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/@types/axios": {
"version": "0.9.36",
"resolved": "https://registry.npmjs.org/@types/axios/-/axios-0.9.36.tgz",
"integrity": "sha512-NLOpedx9o+rxo/X5ChbdiX6mS1atE4WHmEEIcR9NLenRVa5HoVjAvjafwU3FPTqnZEstpoqCaW7fagqSoTDNeg==",
"license": "MIT"
},
"node_modules/@types/cacheable-request": { "node_modules/@types/cacheable-request": {
"version": "6.0.3", "version": "6.0.3",
"resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.3.tgz", "resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.3.tgz",
@@ -198,6 +206,7 @@
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.9.2.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-24.9.2.tgz",
"integrity": "sha512-uWN8YqxXxqFMX2RqGOrumsKeti4LlmIMIyV0lgut4jx7KQBcBiW6vkDtIBvHnHIquwNfJhk8v2OtmO8zXWHfPA==", "integrity": "sha512-uWN8YqxXxqFMX2RqGOrumsKeti4LlmIMIyV0lgut4jx7KQBcBiW6vkDtIBvHnHIquwNfJhk8v2OtmO8zXWHfPA==",
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"undici-types": "~7.16.0" "undici-types": "~7.16.0"
} }
@@ -584,6 +593,18 @@
"node": ">=0.3.1" "node": ">=0.3.1"
} }
}, },
"node_modules/dotenv": {
"version": "17.2.3",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.2.3.tgz",
"integrity": "sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://dotenvx.com"
}
},
"node_modules/dunder-proto": { "node_modules/dunder-proto": {
"version": "1.0.1", "version": "1.0.1",
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
@@ -1824,6 +1845,7 @@
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
"dev": true, "dev": true,
"license": "Apache-2.0", "license": "Apache-2.0",
"peer": true,
"bin": { "bin": {
"tsc": "bin/tsc", "tsc": "bin/tsc",
"tsserver": "bin/tsserver" "tsserver": "bin/tsserver"
+2
View File
@@ -1,6 +1,8 @@
{ {
"dependencies": { "dependencies": {
"@types/axios": "^0.9.36",
"cli-progress": "^3.12.0", "cli-progress": "^3.12.0",
"dotenv": "^17.2.3",
"electron": "^39.1.1", "electron": "^39.1.1",
"express": "^5.1.0", "express": "^5.1.0",
"ffmpeg-static": "^5.2.0", "ffmpeg-static": "^5.2.0",
+109
View File
@@ -0,0 +1,109 @@
Generate a structured meeting report in MARKDOWN using STRUCTURE and STYLE.
Output ONLY the final .md document — no meta comments, no explanations.
Follow exactly the STRUCTURE defined below.
Follow exactly the STYLE rules.
Use timestamps in HH:MM:SS format.
If information is missing, use: UNKLAR:<reason>.
==================== STRUCTURE & RULES ====================
{
"FORMAT": "markdown",
"STRUCTURE": {
"titlepage": [
"title",
"date",
"start",
"end",
"duration",
"location",
"host",
"participants"
],
"toc": "[section](#anchor) — HH:MM:SS",
"section": {
"h2": "<topic> — HH:MM:SS",
"summary": "exactly 1 concise sentence",
"key_points": "maximum 5 bullet points; quotes optional",
"decisions": "list items formatted as: decision text | owner | due date",
"actions": "markdown table: id | task | owner | due | status"
},
"exec_summary": "exactly 3 short sentences",
"consolidated": [
"decisions",
"actions"
],
"appendix": "optional"
},
"STYLE": {
"tone": "neutral, concise, professional",
"ts_format": "HH:MM:SS",
"no_meta": true
},
"PROCESS": {
"timestamps": "use transcript timestamps if present; otherwise estimate minimal",
"speakers": "use names if available; else Speaker X",
"long_transcripts": "split → summarize → merge",
"unclear": "UNKLAR:<reason>"
},
"PROMPT_SNIPPET": "Generate meeting report in markdown using STRUCTURE and STYLE. Output only the report."
}
============================================================
Insert all generated content into the following MARKDOWN TEMPLATE:
# {{title}}
**Date:** {{date}}
**Start:** {{start}}
**End:** {{end}}
**Duration:** {{duration}}
**Location:** {{location}}
**Host:** {{host}}
**Participants:** {{participants}}
---
## Table of Contents
{{toc}}
---
## Executive Summary
{{exec_summary}}
---
## Sections
{{sections}}
---
## Consolidated Decisions
{{consolidated_decisions}}
---
## Consolidated Actions
{{consolidated_actions}}
---
## Appendix
{{appendix}}
============================================================
Final Requirement:
Output ONLY the completed Markdown meeting report.
-5
View File
@@ -1,5 +0,0 @@
npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/Kurzgesagt.mov
npx ts-node ./transcribe.ts ../storage/audio/Kurzgesagt.wav
npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/GitLabMeeting.mov
npx ts-node ./transcribe.ts ../storage/audio/GitLabMeeting.wav
-23
View File
@@ -1,23 +0,0 @@
#!/usr/bin/env ts-node
import { extractAudioFromVideo } from "../services/modules/extraction/ffmpegExtractor.ts";
const videoPath = process.argv[2];
if (!videoPath) {
console.error("Usage: ts-node extractAudio.ts <videoPath>");
process.exit(1);
}
(async () => {
try {
console.log(`Extracting audio from: ${videoPath}`);
await extractAudioFromVideo(videoPath); // Call the extraction function (ffmpegExtractor.ts in services/modules/extraction)
console.log("Audio extraction completed successfully.");
} catch (err) {
console.error("Audio extraction failed:", err);
process.exit(1);
}
})();
-18
View File
@@ -1,18 +0,0 @@
import { whisperLocal } from "../services/modules/transcription/local/whisperLocal.ts";
const audioPath = process.argv[2];
if (!audioPath) {
console.error("Please provide an audio file path as argument.");
process.exit(1);
}
const whisper = new whisperLocal();
(async () => {
try {
const text = await whisper.transcribe(audioPath);
console.log(text);
} catch (err) {
console.error("Transcription failed:", err);
}
})();
@@ -5,7 +5,7 @@ import { fileURLToPath } from "url"; // To handle __dirname in ES modules
const __filename = fileURLToPath(import.meta.url); // Get current file path const __filename = fileURLToPath(import.meta.url); // Get current file path
const __dirname = path.dirname(__filename); // Get current directory path const __dirname = path.dirname(__filename); // Get current directory path
const transcriptsDir = path.resolve(__dirname, "../../storage/transcriptions"); const transcriptsDir = path.resolve(__dirname, "../../../storage/transcriptions");
export class whisperLocal { // is called by transcribe.ts export class whisperLocal { // is called by transcribe.ts
@@ -26,7 +26,6 @@ export class whisperLocal { // is called by transcribe.ts
async transcribe(audioPath: string): Promise<string> { //asyncronous function to transcribe audio async transcribe(audioPath: string): Promise<string> { //asyncronous function to transcribe audio
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const transcriptsDir = path.resolve(__dirname, "../../../../storage/transcripts"); //storage directory for transcripts
if (!fs.existsSync(transcriptsDir)) { //if transcripts directory does not exist, create it if (!fs.existsSync(transcriptsDir)) { //if transcripts directory does not exist, create it
fs.mkdirSync(transcriptsDir, { recursive: true }); fs.mkdirSync(transcriptsDir, { recursive: true });
@@ -0,0 +1,133 @@
import 'dotenv/config';
import axios from 'axios';
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const API_KEY = process.env.ASSEMBLYAI_API_KEY;
const BASE_URL = 'https://api.assemblyai.com/v2';
/**
* Uploads audio file to AssemblyAI
*/
async function uploadAudio(audioPath: string): Promise<string> {
const audioData = fs.readFileSync(audioPath);
const response = await axios.post<{ upload_url: string }>(`${BASE_URL}/upload`, audioData, {
headers: {
'authorization': API_KEY,
'content-type': 'application/octet-stream'
}
});
return response.data.upload_url;
}
/**
* Extract a session id (basename without extension) from a local path or a URL
*/
function getSessionId(inputPath: string): string {
try {
const parsed = new URL(inputPath);
const base = path.basename(parsed.pathname);
return base.replace(/\.[^.]+$/, '');
} catch (err) {
// not a URL, treat as local path
return path.basename(inputPath, path.extname(inputPath));
}
}
/**
* Creates transcription job with speaker diarization
*/
async function createTranscript(audioUrl: string): Promise<string> {
const response = await axios.post<{ id: string }>(`${BASE_URL}/transcript`, {
audio_url: audioUrl,
speaker_labels: true,
language_detection: true
}, {
headers: {
'authorization': API_KEY,
'content-type': 'application/json'
}
});
return response.data.id;
}
/**
* Polls transcript status until completed
*/
async function pollTranscript(transcriptId: string): Promise<any> {
while (true) {
const response = await axios.get<any>(`${BASE_URL}/transcript/${transcriptId}`, {
headers: { 'authorization': API_KEY }
});
const status = response.data.status;
if (status === 'completed') {
return response.data;
} else if (status === 'error') {
throw new Error(`Transcription failed: ${response.data.error}`);
}
// Wait 3 seconds before next poll
await new Promise(resolve => setTimeout(resolve, 3000));
}
}
/**
* Saves transcript to storage
*/
function saveTranscript(transcript: any, sessionId: string): void {
const outputDir = path.join(__dirname, '..', '..', '..', 'storage', 'transcripts');
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
const outputPath = path.join(outputDir, `${sessionId}.json`);
fs.writeFileSync(outputPath, JSON.stringify(transcript, null, 2));
console.log(`✅ Transcript saved: ${outputPath}`);
}
export default {
name: "assembly",
type: "transcription",
displayname: "AssemblyAI",
run: async (audioPath: string) => {
try {
// Determine if audioPath is an external URL or a local file
let audioUrl: string;
if (/^https?:\/\//i.test(audioPath)) {
console.log('🔗 Using external audio URL...');
audioUrl = audioPath;
} else {
console.log('🔄 Uploading local audio...');
if (!fs.existsSync(audioPath)) {
throw new Error(`Audio file not found: ${audioPath}`);
}
audioUrl = await uploadAudio(audioPath);
}
console.log('🔄 Creating transcript job...');
const transcriptId = await createTranscript(audioUrl);
console.log('⏳ Waiting for transcription...');
const transcript = await pollTranscript(transcriptId);
const sessionId = getSessionId(audioPath);
saveTranscript(transcript, sessionId);
return transcript;
} catch (error: any) {
console.error('❌ Transcription error:', error.message);
throw error;
}
}
};
Submodule services/modules/transcription/local/whisper.cpp deleted from 999a7e0cbf
@@ -0,0 +1,52 @@
// services/pipeline/jobs/transcribeLatest.ts
import path from 'path';
import fs from 'fs';
import assembly from '../../modules/transcription/assembly';
/**
* Finds the most recently modified .wav file in storage/audio/
*/
function getLatestWav(): string {
const audioDir = path.join(process.cwd(), 'storage', 'audio');
const files = fs.readdirSync(audioDir).filter(f => f.toLowerCase().endsWith('.wav'));
if (files.length === 0) throw new Error('⚠️ No .wav file found in storage/audio');
const newest = files
.map(f => ({ f, t: fs.statSync(path.join(audioDir, f)).mtimeMs }))
.sort((a, b) => b.t - a.t)[0].f;
return path.join(audioDir, newest);
}
/**
* Full transcription pipeline according to the defined workflow:
* 1. Audio Upload → AssemblyAI
* 2. Job Creation (transcript_id)
* 3. Polling Status (queued → processing → completed)
* 4. Download Transcript JSON
* 5. Storage: /transcripts/{session_id}.json
*/
async function main() {
const audioPath = getLatestWav();
console.log('1️⃣ Audio Upload → AssemblyAI');
console.log(' Source:', audioPath);
console.log('2️⃣ Job Creation (transcript_id)');
console.log('3️⃣ Polling Status (queued → processing → completed)');
console.log('4️⃣ Download Transcript JSON');
console.log('5️⃣ Storage: /transcripts/{session_id}.json');
// Execute the transcription process via the AssemblyAI module
const result = await assembly.run(audioPath);
console.log('✅ Transcription completed successfully');
console.log('🆔 Transcript ID:', result.id);
console.log('📁 Transcript file saved under: storage/transcripts/');
}
// Entry point
main().catch((err) => {
console.error('❌ Transcription pipeline failed:', err.message || err);
process.exit(1);
});
+14
View File
@@ -0,0 +1,14 @@
import 'dotenv/config';
import assemblyModule from '../../services/modules/transcription-remote/assembly.ts';
// Test: URL passed as argument OR local file ./storage/audio/test.wav
const audioPath = process.argv[2] || './storage/audio/test.wav';
assemblyModule.run(audioPath)
.then(result => {
console.log('✅ Success!');
console.log('Transcript ID:', result.id);
})
.catch(error => {
console.error('❌ Error:', error?.message || error);
});
View File