mirror of
https://gitlab.rlp.net/proj-wise2526-video2document/video2document.git
synced 2026-06-15 18:01:52 +02:00
124 lines
3.4 KiB
JavaScript
124 lines
3.4 KiB
JavaScript
require('dotenv').config();
|
|
|
|
const API_KEY = process.env.API_KEY;
|
|
const BASE_URL = 'https://api.assemblyai.com/v2';
|
|
|
|
//---------------------------------------------------Upload audio---------------------------------------------------
|
|
|
|
async function uploadAudio(audioPath) {
|
|
const audioData = fs.readFileSync(audioPath);
|
|
|
|
const response = await axios.post(`${BASE_URL}/upload`, audioData, {
|
|
headers: {
|
|
authorization: API_KEY,
|
|
'content-type': 'application/octet-stream'
|
|
}
|
|
});
|
|
|
|
return response.data.upload_url;
|
|
}
|
|
|
|
////---------------------------------------------------Extract session id---------------------------------------------------
|
|
|
|
function getSessionId(inputPath) {
|
|
try {
|
|
const parsed = new URL(inputPath);
|
|
const base = path.basename(parsed.pathname);
|
|
return base.replace(/\.[^.]+$/, '');
|
|
} catch {
|
|
return path.basename(inputPath, path.extname(inputPath));
|
|
}
|
|
}
|
|
|
|
//---------------------------------------------------Create transcript---------------------------------------------------
|
|
|
|
async function createTranscript(audioUrl) {
|
|
const response = await axios.post(
|
|
`${BASE_URL}/transcript`,
|
|
{
|
|
audio_url: audioUrl,
|
|
speaker_labels: true,
|
|
language_detection: true
|
|
},
|
|
{
|
|
headers: {
|
|
authorization: API_KEY,
|
|
'content-type': 'application/json'
|
|
}
|
|
}
|
|
);
|
|
|
|
return response.data.id;
|
|
}
|
|
|
|
//---------------------------------------------------Poll transcript---------------------------------------------------
|
|
|
|
async function pollTranscript(transcriptId) {
|
|
while (true) {
|
|
const response = await axios.get(`${BASE_URL}/transcript/${transcriptId}`, {
|
|
headers: { authorization: API_KEY }
|
|
});
|
|
|
|
const status = response.data.status;
|
|
|
|
if (status === 'completed') return response.data;
|
|
if (status === 'error') throw new Error(`Transcription failed: ${response.data.error}`);
|
|
|
|
await new Promise(res => setTimeout(res, 3000));
|
|
}
|
|
}
|
|
|
|
//---------------------------------------------------Save transcript---------------------------------------------------
|
|
|
|
function saveTranscript(transcript, sessionId) {
|
|
const outputDir = path.join(__dirname, '../../../storage/transcripts');
|
|
|
|
if (!fs.existsSync(outputDir)) {
|
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
}
|
|
|
|
const outputPath = path.join(outputDir, `${sessionId}.json`);
|
|
fs.writeFileSync(outputPath, JSON.stringify(transcript, null, 2));
|
|
|
|
console.log(`Transcript saved: ${outputPath}`);
|
|
}
|
|
|
|
//---------------------------------------------------Modul---------------------------------------------------
|
|
|
|
module.exports = {
|
|
name: 'assembly',
|
|
type: 'transcription',
|
|
displayname: 'AssemblyAI',
|
|
|
|
async function(audioFileName) {
|
|
try {
|
|
// audioFileName ist nur "datei.mp3"
|
|
const audioPath = path.join(
|
|
__dirname,
|
|
'../../../storage/audio',
|
|
audioFileName
|
|
);
|
|
|
|
let audioUrl;
|
|
|
|
if (/^https?:\/\//i.test(audioFileName)) {
|
|
audioUrl = audioFileName;
|
|
} else {
|
|
if (!fs.existsSync(audioPath)) {
|
|
throw new Error(`Audio file not found: ${audioPath}`);
|
|
}
|
|
audioUrl = await uploadAudio(audioPath);
|
|
}
|
|
|
|
const transcriptId = await createTranscript(audioUrl);
|
|
const transcript = await pollTranscript(transcriptId);
|
|
|
|
const sessionId = getSessionId(audioFileName);
|
|
saveTranscript(transcript, sessionId);
|
|
|
|
} catch (error) {
|
|
console.error('Transcription error:', error.message);
|
|
}
|
|
}
|
|
};
|