Compare commits

..

17 Commits

Author SHA1 Message Date
Aarthi Manivannan, Premanathan Aarthi Manivannan eec0b2d356 Update 6 files
- /src/main/Resources/application.yml
- /src/main/config/AppProperties.java
- /src/main/service/ExternalApiService.java
- /src/main/controller/GenerateController.java
- /src/test/AppPropertiesTest.java
- /README.md
2025-12-14 19:03:33 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 77dcf6d812 Delete AppPropertiesTest.java 2025-12-14 18:03:41 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan b9881d55e7 Delete application.yml 2025-12-14 18:03:26 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan a7a8170cd5 Delete ExternalApiService.java 2025-12-14 18:03:05 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 15a4807567 Delete GenerateController.java 2025-12-14 18:02:46 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 3bbd82eddc Delete AppProperties.java 2025-12-14 18:02:13 +01:00
Aarthi Manivannan, Premanathan Aarthi Manivannan 55555bcc37 Update 6 files
- /src1/main/resources/application.yml
- /src1/main/java/com/v2d/document/config/AppProperties.java
- /src1/main/java/com/v2d/document/service/ExternalApiService.java
- /src1/main/java/com/v2d/document/controller/GenerateController.java
- /src1/test/AppPropertiesTest.java
- /README.md
2025-12-09 17:59:09 +01:00
Hughes, Mike 283b4ed6af Merge branch 'develop' into 'main'
Implemented the general modular framework.

See merge request proj-wise2526-video2document/video2document!22
2025-11-15 15:14:24 +01:00
Spanier, Pit 2edc7f8351 Merge branch 'fix/transcription-module-fix' into 'develop'
New Folder structure

See merge request proj-wise2526-video2document/video2document!21
2025-11-15 15:11:35 +01:00
MikeHughes-BIN 6083773f88 New Folder structure 2025-11-15 14:45:13 +01:00
Spanier, Pit 444d408480 Merge branch 'feature/fixing-the-program' into 'develop'
fixed the program by moving the example module back to where the program can ACTUALLY load it

See merge request proj-wise2526-video2document/video2document!20
2025-11-14 14:30:16 +01:00
emily d9eacafc3a fixed the program by moving the example module back to where the program can ACTUALLY load it 2025-11-14 14:28:11 +01:00
Hughes, Mike ab737f0dc9 Merge branch 'feature/12-externe-transkription-s2-02b' into 'develop'
feat(S2-02b): Implement AssemblyAI external transcription with speaker diarization

See merge request proj-wise2526-video2document/video2document!18
2025-11-13 17:38:39 +01:00
MikeHughes-BIN 79e0c48755 Reduced Number of test paths to avoid redundancy 2025-11-13 17:35:40 +01:00
MikeHughes-BIN 9254ddc57f Changed the Folder Structure for better maintainability 2025-11-13 17:34:22 +01:00
Azeufack Noupeu Willy c021272ca4 merge: Resolve conflicts with develop 2025-11-13 15:22:33 +01:00
Azeufack Noupeu Willy e7e97a7f60 feat(S2-02b): Implement AssemblyAI external transcription with speaker diarization
- Add assembly.ts module for REST API transcription via AssemblyAI
- Implement 5-step pipeline: upload → create job → poll status → download → save
- Enable speaker_labels for diarization (Speaker A, B, C...)
- Add millisecond-precision timestamps for each utterance
- Store JSON transcripts in storage/transcripts/{session_id}.json
- Add axios, dotenv dependencies
- Add transcribeLatest.ts helper for quick testing

User Story: S2-02b - Externe Transkription per REST API
2025-11-13 13:07:18 +01:00
21 changed files with 403 additions and 51 deletions
+23 -2
View File
@@ -88,6 +88,27 @@ Show your appreciation to those who have contributed to the project.
## License ## License
For open source projects, say how it is licensed. For open source projects, say how it is licensed.
---
## Project status ## Sprint 4 Secure API Key Management
If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
In Sprint 4, secure handling of API keys was implemented for the V2D (Video to Document) framework.
### Implementation Overview
- API keys are **not stored in the source code**
- The backend loads the key from an **environment variable**
- A single configuration works for all users without manual setup
- Secrets are protected from being exposed in the repository or frontend
### Configuration
The backend expects the following environment variable:
This variable is injected at runtime by the deployment or CI/CD environment and referenced in `application.yml`.
### Security Benefits
- Prevents accidental exposure of API keys
- Ensures secure collaboration in GitLab
- Follows best practices for secret management
---
+22
View File
@@ -9,7 +9,9 @@
"version": "1.0.0", "version": "1.0.0",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"@types/axios": "^0.9.36",
"cli-progress": "^3.12.0", "cli-progress": "^3.12.0",
"dotenv": "^17.2.3",
"electron": "^39.1.1", "electron": "^39.1.1",
"express": "^5.1.0", "express": "^5.1.0",
"ffmpeg-static": "^5.2.0", "ffmpeg-static": "^5.2.0",
@@ -149,6 +151,12 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/@types/axios": {
"version": "0.9.36",
"resolved": "https://registry.npmjs.org/@types/axios/-/axios-0.9.36.tgz",
"integrity": "sha512-NLOpedx9o+rxo/X5ChbdiX6mS1atE4WHmEEIcR9NLenRVa5HoVjAvjafwU3FPTqnZEstpoqCaW7fagqSoTDNeg==",
"license": "MIT"
},
"node_modules/@types/cacheable-request": { "node_modules/@types/cacheable-request": {
"version": "6.0.3", "version": "6.0.3",
"resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.3.tgz", "resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.3.tgz",
@@ -198,6 +206,7 @@
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.9.2.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-24.9.2.tgz",
"integrity": "sha512-uWN8YqxXxqFMX2RqGOrumsKeti4LlmIMIyV0lgut4jx7KQBcBiW6vkDtIBvHnHIquwNfJhk8v2OtmO8zXWHfPA==", "integrity": "sha512-uWN8YqxXxqFMX2RqGOrumsKeti4LlmIMIyV0lgut4jx7KQBcBiW6vkDtIBvHnHIquwNfJhk8v2OtmO8zXWHfPA==",
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"undici-types": "~7.16.0" "undici-types": "~7.16.0"
} }
@@ -584,6 +593,18 @@
"node": ">=0.3.1" "node": ">=0.3.1"
} }
}, },
"node_modules/dotenv": {
"version": "17.2.3",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.2.3.tgz",
"integrity": "sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://dotenvx.com"
}
},
"node_modules/dunder-proto": { "node_modules/dunder-proto": {
"version": "1.0.1", "version": "1.0.1",
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
@@ -1824,6 +1845,7 @@
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
"dev": true, "dev": true,
"license": "Apache-2.0", "license": "Apache-2.0",
"peer": true,
"bin": { "bin": {
"tsc": "bin/tsc", "tsc": "bin/tsc",
"tsserver": "bin/tsserver" "tsserver": "bin/tsserver"
+2
View File
@@ -1,6 +1,8 @@
{ {
"dependencies": { "dependencies": {
"@types/axios": "^0.9.36",
"cli-progress": "^3.12.0", "cli-progress": "^3.12.0",
"dotenv": "^17.2.3",
"electron": "^39.1.1", "electron": "^39.1.1",
"express": "^5.1.0", "express": "^5.1.0",
"ffmpeg-static": "^5.2.0", "ffmpeg-static": "^5.2.0",
-5
View File
@@ -1,5 +0,0 @@
npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/Kurzgesagt.mov
npx ts-node ./transcribe.ts ../storage/audio/Kurzgesagt.wav
npx ts-node ./extract.ts /Users/mikehughes/Downloads/Testvideo/GitLabMeeting.mov
npx ts-node ./transcribe.ts ../storage/audio/GitLabMeeting.wav
-23
View File
@@ -1,23 +0,0 @@
#!/usr/bin/env ts-node
import { extractAudioFromVideo } from "../services/modules/extraction/ffmpegExtractor.ts";
const videoPath = process.argv[2];
if (!videoPath) {
console.error("Usage: ts-node extractAudio.ts <videoPath>");
process.exit(1);
}
(async () => {
try {
console.log(`Extracting audio from: ${videoPath}`);
await extractAudioFromVideo(videoPath); // Call the extraction function (ffmpegExtractor.ts in services/modules/extraction)
console.log("Audio extraction completed successfully.");
} catch (err) {
console.error("Audio extraction failed:", err);
process.exit(1);
}
})();
-18
View File
@@ -1,18 +0,0 @@
import { whisperLocal } from "../services/modules/transcription/local/whisperLocal.ts";
const audioPath = process.argv[2];
if (!audioPath) {
console.error("Please provide an audio file path as argument.");
process.exit(1);
}
const whisper = new whisperLocal();
(async () => {
try {
const text = await whisper.transcribe(audioPath);
console.log(text);
} catch (err) {
console.error("Transcription failed:", err);
}
})();
@@ -5,7 +5,7 @@ import { fileURLToPath } from "url"; // To handle __dirname in ES modules
const __filename = fileURLToPath(import.meta.url); // Get current file path const __filename = fileURLToPath(import.meta.url); // Get current file path
const __dirname = path.dirname(__filename); // Get current directory path const __dirname = path.dirname(__filename); // Get current directory path
const transcriptsDir = path.resolve(__dirname, "../../storage/transcriptions"); const transcriptsDir = path.resolve(__dirname, "../../../storage/transcriptions");
export class whisperLocal { // is called by transcribe.ts export class whisperLocal { // is called by transcribe.ts
@@ -26,7 +26,6 @@ export class whisperLocal { // is called by transcribe.ts
async transcribe(audioPath: string): Promise<string> { //asyncronous function to transcribe audio async transcribe(audioPath: string): Promise<string> { //asyncronous function to transcribe audio
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const transcriptsDir = path.resolve(__dirname, "../../../../storage/transcripts"); //storage directory for transcripts
if (!fs.existsSync(transcriptsDir)) { //if transcripts directory does not exist, create it if (!fs.existsSync(transcriptsDir)) { //if transcripts directory does not exist, create it
fs.mkdirSync(transcriptsDir, { recursive: true }); fs.mkdirSync(transcriptsDir, { recursive: true });
@@ -0,0 +1,133 @@
import 'dotenv/config';
import axios from 'axios';
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const API_KEY = process.env.ASSEMBLYAI_API_KEY;
const BASE_URL = 'https://api.assemblyai.com/v2';
/**
* Uploads audio file to AssemblyAI
*/
async function uploadAudio(audioPath: string): Promise<string> {
const audioData = fs.readFileSync(audioPath);
const response = await axios.post<{ upload_url: string }>(`${BASE_URL}/upload`, audioData, {
headers: {
'authorization': API_KEY,
'content-type': 'application/octet-stream'
}
});
return response.data.upload_url;
}
/**
* Extract a session id (basename without extension) from a local path or a URL
*/
function getSessionId(inputPath: string): string {
try {
const parsed = new URL(inputPath);
const base = path.basename(parsed.pathname);
return base.replace(/\.[^.]+$/, '');
} catch (err) {
// not a URL, treat as local path
return path.basename(inputPath, path.extname(inputPath));
}
}
/**
* Creates transcription job with speaker diarization
*/
async function createTranscript(audioUrl: string): Promise<string> {
const response = await axios.post<{ id: string }>(`${BASE_URL}/transcript`, {
audio_url: audioUrl,
speaker_labels: true,
language_detection: true
}, {
headers: {
'authorization': API_KEY,
'content-type': 'application/json'
}
});
return response.data.id;
}
/**
* Polls transcript status until completed
*/
async function pollTranscript(transcriptId: string): Promise<any> {
while (true) {
const response = await axios.get<any>(`${BASE_URL}/transcript/${transcriptId}`, {
headers: { 'authorization': API_KEY }
});
const status = response.data.status;
if (status === 'completed') {
return response.data;
} else if (status === 'error') {
throw new Error(`Transcription failed: ${response.data.error}`);
}
// Wait 3 seconds before next poll
await new Promise(resolve => setTimeout(resolve, 3000));
}
}
/**
* Saves transcript to storage
*/
function saveTranscript(transcript: any, sessionId: string): void {
const outputDir = path.join(__dirname, '..', '..', '..', 'storage', 'transcripts');
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
const outputPath = path.join(outputDir, `${sessionId}.json`);
fs.writeFileSync(outputPath, JSON.stringify(transcript, null, 2));
console.log(`✅ Transcript saved: ${outputPath}`);
}
export default {
name: "assembly",
type: "transcription",
displayname: "AssemblyAI",
run: async (audioPath: string) => {
try {
// Determine if audioPath is an external URL or a local file
let audioUrl: string;
if (/^https?:\/\//i.test(audioPath)) {
console.log('🔗 Using external audio URL...');
audioUrl = audioPath;
} else {
console.log('🔄 Uploading local audio...');
if (!fs.existsSync(audioPath)) {
throw new Error(`Audio file not found: ${audioPath}`);
}
audioUrl = await uploadAudio(audioPath);
}
console.log('🔄 Creating transcript job...');
const transcriptId = await createTranscript(audioUrl);
console.log('⏳ Waiting for transcription...');
const transcript = await pollTranscript(transcriptId);
const sessionId = getSessionId(audioPath);
saveTranscript(transcript, sessionId);
return transcript;
} catch (error: any) {
console.error('❌ Transcription error:', error.message);
throw error;
}
}
};
Submodule services/modules/transcription/local/whisper.cpp deleted from 999a7e0cbf
@@ -0,0 +1,52 @@
// services/pipeline/jobs/transcribeLatest.ts
import path from 'path';
import fs from 'fs';
import assembly from '../../modules/transcription/assembly';
/**
* Finds the most recently modified .wav file in storage/audio/
*/
function getLatestWav(): string {
const audioDir = path.join(process.cwd(), 'storage', 'audio');
const files = fs.readdirSync(audioDir).filter(f => f.toLowerCase().endsWith('.wav'));
if (files.length === 0) throw new Error('⚠️ No .wav file found in storage/audio');
const newest = files
.map(f => ({ f, t: fs.statSync(path.join(audioDir, f)).mtimeMs }))
.sort((a, b) => b.t - a.t)[0].f;
return path.join(audioDir, newest);
}
/**
* Full transcription pipeline according to the defined workflow:
* 1. Audio Upload → AssemblyAI
* 2. Job Creation (transcript_id)
* 3. Polling Status (queued → processing → completed)
* 4. Download Transcript JSON
* 5. Storage: /transcripts/{session_id}.json
*/
async function main() {
const audioPath = getLatestWav();
console.log('1️⃣ Audio Upload → AssemblyAI');
console.log(' Source:', audioPath);
console.log('2️⃣ Job Creation (transcript_id)');
console.log('3️⃣ Polling Status (queued → processing → completed)');
console.log('4️⃣ Download Transcript JSON');
console.log('5️⃣ Storage: /transcripts/{session_id}.json');
// Execute the transcription process via the AssemblyAI module
const result = await assembly.run(audioPath);
console.log('✅ Transcription completed successfully');
console.log('🆔 Transcript ID:', result.id);
console.log('📁 Transcript file saved under: storage/transcripts/');
}
// Entry point
main().catch((err) => {
console.error('❌ Transcription pipeline failed:', err.message || err);
process.exit(1);
});
+7
View File
@@ -0,0 +1,7 @@
spring:
application:
name: v2d-document
app:
external:
apiKey: ${LLM_API_KEY:}
+13
View File
@@ -0,0 +1,13 @@
package com.v2d.document.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
@Configuration
@ConfigurationProperties(prefix = "app.external")
public class AppProperties {
private String apiKey;
public String getApiKey() { return apiKey; }
public void setApiKey(String apiKey) { this.apiKey = apiKey; }
}
@@ -0,0 +1,26 @@
package com.v2d.document.controller;
import com.v2d.document.service.ExternalApiService;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.Map;
@RestController
@RequestMapping("/api/generate")
public class GenerateController {
private final ExternalApiService externalApiService;
public GenerateController(ExternalApiService externalApiService) {
this.externalApiService = externalApiService;
}
@PostMapping
public ResponseEntity<String> generate(@RequestBody Map<String,Object> body) {
// Build provider payload from the user's body (transform safely)
String payload = "{\"text\": \"use this text\"}"; // adapt for real usage
String providerResponse = externalApiService.callProvider(payload);
return ResponseEntity.ok(providerResponse);
}
}
+90
View File
@@ -0,0 +1,90 @@
package com.v2d.document.service;
import com.v2d.document.config.AppProperties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
@Service
public class ExternalApiService {
private final Logger log = LoggerFactory.getLogger(ExternalApiService.class);
private final AppProperties props;
private final HttpClient http = HttpClient.newHttpClient();
public ExternalApiService(AppProperties props) {
this.props = props;
}
public String callProvider(String jsonPayload) {
String key = props.getApiKey();
if (key == null || key.isBlank()) {
log.warn("External API key is not configured.");
throw new IllegalStateException("External API key missing");
}
try {
HttpRequest req = HttpRequest.newBuilder()
.uri(URI.create("https://api.example.com/endpoint")) // replace with real endpoint
.header("Authorization", "Bearer " + key)
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(jsonPayload))
.build();
HttpResponse<String> resp = http.send(req, HttpResponse.BodyHandlers.ofString());
return resp.body();
} catch (Exception e) {
log.error("External API call failed: {}", e.getMessage());
throw new RuntimeException("External API call failed", e);
}
}
}
package com.v2d.document.service;
import com.v2d.document.config.AppProperties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
@Service
public class ExternalApiService {
private final Logger log = LoggerFactory.getLogger(ExternalApiService.class);
private final AppProperties props;
private final HttpClient http = HttpClient.newHttpClient();
public ExternalApiService(AppProperties props) {
this.props = props;
}
public String callProvider(String jsonPayload) {
String key = props.getApiKey();
if (key == null || key.isBlank()) {
log.warn("External API key is not configured.");
throw new IllegalStateException("External API key missing");
}
try {
HttpRequest req = HttpRequest.newBuilder()
.uri(URI.create("https://api.example.com/endpoint")) // replace with real endpoint
.header("Authorization", "Bearer " + key)
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(jsonPayload))
.build();
HttpResponse<String> resp = http.send(req, HttpResponse.BodyHandlers.ofString());
return resp.body();
} catch (Exception e) {
log.error("External API call failed: {}", e.getMessage());
throw new RuntimeException("External API call failed", e);
}
}
}
+20
View File
@@ -0,0 +1,20 @@
package com.v2d.document.config;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
import static org.assertj.core.api.Assertions.assertThat;
public class AppPropertiesTest {
private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
.withUserConfiguration(AppProperties.class)
.withPropertyValues("app.external.apiKey=TEST_KEY");
@Test
void bindsApiKey() {
contextRunner.run(context -> {
AppProperties props = context.getBean(AppProperties.class);
assertThat(props.getApiKey()).isEqualTo("TEST_KEY");
});
}
}
+14
View File
@@ -0,0 +1,14 @@
import 'dotenv/config';
import assemblyModule from '../../services/modules/transcription-remote/assembly.ts';
// Test: URL passed as argument OR local file ./storage/audio/test.wav
const audioPath = process.argv[2] || './storage/audio/test.wav';
assemblyModule.run(audioPath)
.then(result => {
console.log('✅ Success!');
console.log('Transcript ID:', result.id);
})
.catch(error => {
console.error('❌ Error:', error?.message || error);
});
View File