Files
emily faee605f12 Initial version of the working CI pipeline
Added mocha based unit tests for each module
Did a bit of cleanup in the modules to remove debug console.log calls
Removed the Progress bar in the extractor and the library requirement
Promisified the gemini module to make sure it returns the path as a promise instead of just on the cli
Fixed gitignore so that it now only ignores the content int the storage directories, and not the whole directories
Added neetingReport.json for the LLMs to use
2025-12-09 22:07:43 +01:00

126 lines
3.5 KiB
JavaScript

const API_KEY = process.env.ASSEMBLYAI_API_KEY;
const BASE_URL = 'https://api.assemblyai.com/v2';
//---------------------------------------------------Upload audio---------------------------------------------------
async function uploadAudio(audioPath) {
const audioData = fs.readFileSync(audioPath);
const response = await axios.post(`${BASE_URL}/upload`, audioData, {
headers: {
authorization: API_KEY,
'content-type': 'application/octet-stream'
}
});
return response.data.upload_url;
}
////---------------------------------------------------Extract session id---------------------------------------------------
function getSessionId(inputPath) {
try {
const parsed = new URL(inputPath);
const base = path.basename(parsed.pathname);
return base.replace(/\.[^.]+$/, '');
} catch {
return path.basename(inputPath, path.extname(inputPath));
}
}
//---------------------------------------------------Create transcript---------------------------------------------------
async function createTranscript(audioUrl) {
const response = await axios.post(
`${BASE_URL}/transcript`,
{
audio_url: audioUrl,
speaker_labels: true,
language_detection: true
},
{
headers: {
authorization: API_KEY,
'content-type': 'application/json'
}
}
);
return response.data.id;
}
//---------------------------------------------------Poll transcript---------------------------------------------------
async function pollTranscript(transcriptId) {
while (true) {
const response = await axios.get(`${BASE_URL}/transcript/${transcriptId}`, {
headers: { authorization: API_KEY }
});
const status = response.data.status;
if (status === 'completed') return response.data;
if (status === 'error') throw new Error(`Transcription failed: ${response.data.error}`);
await new Promise(res => setTimeout(res, 3000));
}
}
//---------------------------------------------------Save transcript---------------------------------------------------
function saveTranscript(transcript, sessionId) {
const outputDir = path.join(__dirname, '../../../storage/transcripts');
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
const outputPath = path.join(outputDir, `${sessionId}.json`);
fs.writeFileSync(outputPath, JSON.stringify(transcript, null, 2));
// console.log(`Transcript saved: ${outputPath}`);
return outputPath;
}
//---------------------------------------------------Modul---------------------------------------------------
module.exports = {
name: 'assembly',
type: 'transcription',
displayname: 'AssemblyAI',
audioformat: "mp3",
async function(audioFileName) {
return new Promise(async (resolve, reject) => {
try {
// audioFileName ist nur "datei.mp3"
const audioPath = audioFileName;
let audioUrl;
if (/^https?:\/\//i.test(audioFileName)) {
audioUrl = audioFileName;
} else {
if (!fs.existsSync(audioPath)) {
throw new Error(`Audio file not found: ${audioPath}`);
}
audioUrl = await uploadAudio(audioPath);
}
const transcriptId = await createTranscript(audioUrl);
const transcript = await pollTranscript(transcriptId);
const sessionId = getSessionId(audioFileName);
resolve(saveTranscript(transcript, sessionId));
} catch (error) {
// console.error('Transcription error:', error.message);
reject(error);
return
}
})
}
};