Software: Apache. PHP/8.1.30 uname -a: Linux server1.tuhinhossain.com 5.15.0-163-generic #173-Ubuntu SMP Tue Oct 14 17:51:00 UTC uid=1002(picotech) gid=1003(picotech) groups=1003(picotech),0(root) Safe-mode: OFF (not secure) /home/picotech/domains/note.picotech.app/public_html/src/services/ drwxr-xr-x | |
| Viewing file: Select action/file-type: const { OpenAI } = require('openai');
const fs = require('fs');
const fsPromises = require('fs/promises');
const { AppError } = require('../middleware/errorHandler');
const config = require('../config/config');
const { Recording, Meeting, Transcript } = require('../models');
const { ElevenLabsClient } = require("@elevenlabs/elevenlabs-js");
const openai = new OpenAI({
baseURL:config.services.openrouter.baseUrl,
apiKey: config.services.openrouter.apiKey
});
const formatTranscript=(json) =>{
const result = [];
let currentSpeaker = null;
let buffer = [];
const speakerMap = {};
let speakerCount = 1;
for (const word of json.words) {
if (word.type === "spacing") continue;
if (!speakerMap[word.speakerId]) {
speakerMap[word.speakerId] = `Speaker ${speakerCount++}`;
}
if (word.speakerId !== currentSpeaker) {
if (buffer.length > 0 && currentSpeaker !== null) {
result.push(`${speakerMap[currentSpeaker]}: ${buffer.join(" ")}`);
}
currentSpeaker = word.speakerId;
buffer = [word.text];
} else {
buffer.push(word.text);
}
}
if (buffer.length > 0 && currentSpeaker !== null) {
result.push(`${speakerMap[currentSpeaker]}: ${buffer.join(" ")}`);
}
return { text: result.join("\n"), speakerCount: speakerCount - 1 };
}
// Provider implementations
const providers = {
elevenlabs: {
transcribe: async (audioPath) => {
const audioBlob = new Blob([await fsPromises.readFile(audioPath)], { type: "audio/wav" });
const elevenlabs = new ElevenLabsClient();
const transcription = await elevenlabs.speechToText.convert({
file: audioBlob,
modelId: "scribe_v1",
tagAudioEvents: true,
languageCode: "ben",
diarize: true,
});
return transcription;
}
},
openrouter: {
transcribe: async (audioPath) => {
const base64Audio = await fsPromises.readFile(audioPath);
const base64String = base64Audio.toString('base64');
const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
method: "POST",
headers: {
Authorization: `Bearer ${config.services.openrouter.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "google/gemini-2.5-flash",
messages: [
{
role: "user",
content: [
{
type: "text",
text: "Please transcribe this audio file with speaker diarization. Return the result as JSON with 'words' array containing objects with 'text', 'speakerId', and other relevant fields. also must provide languageCode and languageProbability key in the json.",
},
{
type: "input_audio",
input_audio: {
data: base64String,
format: "wav",
},
},
],
},
],
}),
});
if (!response.ok) {
throw new Error(`OpenRouter API error: ${response.statusText}`);
}
const data = await response.json();
const content = data.choices[0].message.content;
// Try to parse as JSON, otherwise create a basic structure
let transcription;
try {
// Remove markdown code block markers if present
let cleanedContent = content.replace(/^```json\s*/, '').replace(/\s*```$/, '').trim();
transcription = JSON.parse(cleanedContent);
// Handle different response structures
if (Array.isArray(transcription)) {
// Direct array of segments
transcription = {
words: transcription.map(seg => ({
text: seg.text,
speakerId: seg.speakerId,
start: seg.start,
end: seg.end,
type: 'word'
})),
languageCode: 'unknown', // Assuming Bengali based on content
languageProbability: 0
};
} else if (transcription.sentences) {
// Handle structure with sentences
transcription.words = transcription.sentences.flatMap(sentence => sentence.words);
}
} catch (e) {
console.log("Failed to parse transcription JSON, creating basic structure.");
// If not JSON, create a simple structure
transcription = {
words: content.split(' ').map((word, index) => ({
text: word,
speakerId: 'speaker1',
type: 'word'
})),
languageCode: 'unknown',
languageProbability: 0
};
}
return transcription;
}
}
};
exports.generateTranscription = async (audioFilePath) => {
try {
const audioFile = fs.createReadStream(audioFilePath);
const transcription = await openai.audio.transcriptions.create({
file: audioFile,
model: "whisper-1",
language: "en",
response_format: "verbose_json"
});
return {
text: transcription.text,
speakers: transcription.speaker_labels || [],
confidence: transcription.confidence,
language: transcription.language
};
} catch (error) {
throw new AppError('Transcription failed: ' + error.message, 500);
}
};
exports.processRecording = async (recording_id) => {
// Fetch recording from DB
const recording = await Recording.findByPk(recording_id);
if (!recording) {
throw new AppError('Recording not found', 404);
}
// Check if recording has chunks
const { AudioChunk } = require('../models');
const chunks = await AudioChunk.findByRecording(recording_id, { order: [['chunk_index', 'ASC']] });
let fullTranscription = {
words: [],
languageCode: 'unknown',
languageProbability: 0
};
if (chunks.length > 0) {
// Process chunks
for (const chunk of chunks) {
if (chunk.transcription_status === 'completed' && chunk.transcription_data) {
const chunkTranscription = JSON.parse(chunk.transcription_data);
// Adjust timestamps to account for chunk position
if (chunkTranscription.words) {
const adjustedWords = chunkTranscription.words.map(word => ({
...word,
start: word.start ? word.start + (chunk.start_time / 1000) : undefined,
end: word.end ? word.end + (chunk.start_time / 1000) : undefined,
}));
fullTranscription.words.push(...adjustedWords);
}
// Update language info if not set
if (fullTranscription.languageCode === 'unknown' && chunkTranscription.languageCode) {
fullTranscription.languageCode = chunkTranscription.languageCode;
}
if (chunkTranscription.languageProbability > fullTranscription.languageProbability) {
fullTranscription.languageProbability = chunkTranscription.languageProbability;
}
}
}
} else {
// Fallback to original processing if no chunks
const providerName = config.transcription.defaultProvider;
const provider = providers[providerName];
if (!provider) {
throw new AppError(`Unsupported transcription provider: ${providerName}`, 500);
}
const audioPath = recording.file_path;
let transcribePath = audioPath;
let needsCleanup = false;
// Check if file is encrypted
const { isFileEncrypted, decryptFile } = require('./encryptionService');
const isEncrypted = await isFileEncrypted(audioPath);
if (isEncrypted) {
// Decrypt to temporary location
const tempDir = path.join(path.dirname(audioPath), 'temp');
await fsPromises.mkdir(tempDir, { recursive: true });
const tempPath = path.join(tempDir, `decrypted_${recording.file_name}`);
await decryptFile(audioPath, tempPath);
transcribePath = tempPath;
needsCleanup = true;
}
const start = Date.now();
fullTranscription = await provider.transcribe(transcribePath);
const end = Date.now();
const processingTimeMs = end - start;
fullTranscription.processingTime = (processingTimeMs / 1000).toFixed(2);
// Clean up
if (needsCleanup) {
try {
await fsPromises.unlink(transcribePath);
} catch (error) {
console.warn('Failed to clean up temporary decrypted file:', error);
}
}
}
const processingTimeInSeconds = fullTranscription.processingTime || '0.00';
const formatedTranscript = formatTranscript(fullTranscription);
const transcript = await Transcript.create({
recording_id: recording.id,
meeting_id: recording.meeting_id,
content: formatedTranscript.text,
speaker_count: formatedTranscript.speakerCount,
speaker_data: JSON.stringify(fullTranscription),
service_provider: config.transcription.defaultProvider,
processing_time: processingTimeInSeconds,
confidence: fullTranscription.languageProbability || 0.0,
language: fullTranscription.languageCode || 'unknown',
status: 'completed'
});
// Update recording transcription status
await recording.updateTranscriptionStatus('completed');
};
exports.providers = providers;
exports.formatTranscript = formatTranscript; |
:: Command execute :: | |
--[ c99shell v. 2.5 [PHP 8 Update] [24.05.2025] | Generation time: 0.0034 ]-- |