mirror of
https://github.com/EvolutionAPI/evolution-api.git
synced 2025-07-16 12:12:55 -06:00
feat(evoai): enhance media message handling and transcription capabilities
- Added support for audio message detection and transcription using OpenAI's Whisper API. - Integrated media downloading for both audio and image messages, with appropriate error handling. - Updated logging to redact sensitive information from payloads. - Modified existing methods to accommodate the new message structure, ensuring seamless integration with EvoAI services.
This commit is contained in:
parent
71124755b0
commit
70a4fe8f6e
@ -848,6 +848,7 @@ export class EvoaiController extends ChatbotController implements ChatbotControl
|
|||||||
},
|
},
|
||||||
debouncedContent,
|
debouncedContent,
|
||||||
msg?.pushName,
|
msg?.pushName,
|
||||||
|
msg,
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
@ -872,6 +873,7 @@ export class EvoaiController extends ChatbotController implements ChatbotControl
|
|||||||
},
|
},
|
||||||
content,
|
content,
|
||||||
msg?.pushName,
|
msg?.pushName,
|
||||||
|
msg,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,18 +3,20 @@ import { InstanceDto } from '@api/dto/instance.dto';
|
|||||||
import { PrismaRepository } from '@api/repository/repository.service';
|
import { PrismaRepository } from '@api/repository/repository.service';
|
||||||
import { WAMonitoringService } from '@api/services/monitor.service';
|
import { WAMonitoringService } from '@api/services/monitor.service';
|
||||||
import { Integration } from '@api/types/wa.types';
|
import { Integration } from '@api/types/wa.types';
|
||||||
|
import { ConfigService, Language } from '@config/env.config';
|
||||||
import { Logger } from '@config/logger.config';
|
import { Logger } from '@config/logger.config';
|
||||||
import { Evoai, EvoaiSetting, IntegrationSession } from '@prisma/client';
|
import { Evoai, EvoaiSetting, IntegrationSession } from '@prisma/client';
|
||||||
import { sendTelemetry } from '@utils/sendTelemetry';
|
import { sendTelemetry } from '@utils/sendTelemetry';
|
||||||
import axios from 'axios';
|
import axios from 'axios';
|
||||||
import path from 'path';
|
import { downloadMediaMessage } from 'baileys';
|
||||||
import { Readable } from 'stream';
|
import FormData from 'form-data';
|
||||||
import { v4 as uuidv4 } from 'uuid';
|
import { v4 as uuidv4 } from 'uuid';
|
||||||
|
|
||||||
export class EvoaiService {
|
export class EvoaiService {
|
||||||
constructor(
|
constructor(
|
||||||
private readonly waMonitor: WAMonitoringService,
|
private readonly waMonitor: WAMonitoringService,
|
||||||
private readonly prismaRepository: PrismaRepository,
|
private readonly prismaRepository: PrismaRepository,
|
||||||
|
private readonly configService: ConfigService,
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
private readonly logger = new Logger('EvoaiService');
|
private readonly logger = new Logger('EvoaiService');
|
||||||
@ -45,12 +47,34 @@ export class EvoaiService {
|
|||||||
return content.includes('imageMessage');
|
return content.includes('imageMessage');
|
||||||
}
|
}
|
||||||
|
|
||||||
private isJSON(str: string): boolean {
|
private isAudioMessage(content: string) {
|
||||||
|
return content.includes('audioMessage');
|
||||||
|
}
|
||||||
|
|
||||||
|
private async speechToText(audioBuffer: Buffer): Promise<string | null> {
|
||||||
try {
|
try {
|
||||||
JSON.parse(str);
|
const apiKey = this.configService.get<any>('OPENAI')?.API_KEY;
|
||||||
return true;
|
if (!apiKey) {
|
||||||
} catch (e) {
|
this.logger.error('[EvoAI] No OpenAI API key set for Whisper transcription');
|
||||||
return false;
|
return null;
|
||||||
|
}
|
||||||
|
const lang = this.configService.get<Language>('LANGUAGE').includes('pt')
|
||||||
|
? 'pt'
|
||||||
|
: this.configService.get<Language>('LANGUAGE');
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', audioBuffer, 'audio.ogg');
|
||||||
|
formData.append('model', 'whisper-1');
|
||||||
|
formData.append('language', lang);
|
||||||
|
const response = await axios.post('https://api.openai.com/v1/audio/transcriptions', formData, {
|
||||||
|
headers: {
|
||||||
|
...formData.getHeaders(),
|
||||||
|
Authorization: `Bearer ${apiKey}`,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
return response?.data?.text || null;
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.error(`[EvoAI] Whisper transcription failed: ${err}`);
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -62,6 +86,7 @@ export class EvoaiService {
|
|||||||
remoteJid: string,
|
remoteJid: string,
|
||||||
pushName: string,
|
pushName: string,
|
||||||
content: string,
|
content: string,
|
||||||
|
msg?: any,
|
||||||
) {
|
) {
|
||||||
try {
|
try {
|
||||||
const endpoint: string = evoai.agentUrl;
|
const endpoint: string = evoai.agentUrl;
|
||||||
@ -76,27 +101,52 @@ export class EvoaiService {
|
|||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
// If content indicates an image/file, add as a file part
|
// If content indicates an image/file, fetch and encode as base64, then send as a file part
|
||||||
if (this.isImageMessage(content)) {
|
if ((this.isImageMessage(content) || this.isAudioMessage(content)) && msg) {
|
||||||
const contentSplit = content.split('|');
|
const isImage = this.isImageMessage(content);
|
||||||
const fileUrl = contentSplit[1].split('?')[0];
|
const isAudio = this.isAudioMessage(content);
|
||||||
const textPart = contentSplit[2] || content;
|
this.logger.debug(`[EvoAI] Media message detected: ${content}`);
|
||||||
parts[0].text = textPart;
|
|
||||||
|
|
||||||
// Try to fetch the file and encode as base64
|
let transcribedText = null;
|
||||||
try {
|
if (isAudio) {
|
||||||
const fileResponse = await axios.get(fileUrl, { responseType: 'arraybuffer' });
|
try {
|
||||||
const fileContent = Buffer.from(fileResponse.data).toString('base64');
|
this.logger.debug(`[EvoAI] Downloading audio for Whisper transcription`);
|
||||||
const fileName = path.basename(fileUrl);
|
const mediaBuffer = await downloadMediaMessage({ key: msg.key, message: msg.message }, 'buffer', {});
|
||||||
parts.push({
|
transcribedText = await this.speechToText(mediaBuffer);
|
||||||
type: 'file',
|
if (transcribedText) {
|
||||||
file: {
|
parts[0].text = transcribedText;
|
||||||
name: fileName,
|
} else {
|
||||||
bytes: fileContent,
|
parts[0].text = '[Audio message could not be transcribed]';
|
||||||
},
|
}
|
||||||
});
|
} catch (err) {
|
||||||
} catch (fileErr) {
|
this.logger.error(`[EvoAI] Failed to transcribe audio: ${err}`);
|
||||||
this.logger.error(`Failed to fetch or encode file for EvoAI: ${fileErr}`);
|
parts[0].text = '[Audio message could not be transcribed]';
|
||||||
|
}
|
||||||
|
} else if (isImage) {
|
||||||
|
const contentSplit = content.split('|');
|
||||||
|
parts[0].text = contentSplit[2] || content;
|
||||||
|
let fileContent = null,
|
||||||
|
fileName = null,
|
||||||
|
mimeType = null;
|
||||||
|
try {
|
||||||
|
this.logger.debug(
|
||||||
|
`[EvoAI] Fetching image using downloadMediaMessage with msg.key: ${JSON.stringify(msg.key)}`,
|
||||||
|
);
|
||||||
|
const mediaBuffer = await downloadMediaMessage({ key: msg.key, message: msg.message }, 'buffer', {});
|
||||||
|
fileContent = Buffer.from(mediaBuffer).toString('base64');
|
||||||
|
fileName = contentSplit[2] || `${msg.key.id}.jpg`;
|
||||||
|
mimeType = 'image/jpeg';
|
||||||
|
parts.push({
|
||||||
|
type: 'file',
|
||||||
|
file: {
|
||||||
|
name: fileName,
|
||||||
|
bytes: fileContent,
|
||||||
|
mimeType: mimeType,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (fileErr) {
|
||||||
|
this.logger.error(`[EvoAI] Failed to fetch or encode image for EvoAI: ${fileErr}`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -115,7 +165,17 @@ export class EvoaiService {
|
|||||||
};
|
};
|
||||||
|
|
||||||
this.logger.debug(`[EvoAI] Sending request to: ${endpoint}`);
|
this.logger.debug(`[EvoAI] Sending request to: ${endpoint}`);
|
||||||
this.logger.debug(`[EvoAI] Payload: ${JSON.stringify(payload)}`);
|
// Redact base64 file bytes from payload log
|
||||||
|
const redactedPayload = JSON.parse(JSON.stringify(payload));
|
||||||
|
if (redactedPayload?.params?.message?.parts) {
|
||||||
|
redactedPayload.params.message.parts = redactedPayload.params.message.parts.map((part) => {
|
||||||
|
if (part.type === 'file' && part.file && part.file.bytes) {
|
||||||
|
return { ...part, file: { ...part.file, bytes: '[base64 omitted]' } };
|
||||||
|
}
|
||||||
|
return part;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
this.logger.debug(`[EvoAI] Payload: ${JSON.stringify(redactedPayload)}`);
|
||||||
|
|
||||||
if (instance.integration === Integration.WHATSAPP_BAILEYS) {
|
if (instance.integration === Integration.WHATSAPP_BAILEYS) {
|
||||||
await instance.client.presenceSubscribe(remoteJid);
|
await instance.client.presenceSubscribe(remoteJid);
|
||||||
@ -129,7 +189,7 @@ export class EvoaiService {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
this.logger.debug(`[EvoAI] Response: ${JSON.stringify(response.data)}`);
|
this.logger.debug(`[EvoAI] Response: ${JSON.stringify(response.data.status)}`);
|
||||||
|
|
||||||
if (instance.integration === Integration.WHATSAPP_BAILEYS)
|
if (instance.integration === Integration.WHATSAPP_BAILEYS)
|
||||||
await instance.client.sendPresenceUpdate('paused', remoteJid);
|
await instance.client.sendPresenceUpdate('paused', remoteJid);
|
||||||
@ -341,6 +401,7 @@ export class EvoaiService {
|
|||||||
session: IntegrationSession,
|
session: IntegrationSession,
|
||||||
content: string,
|
content: string,
|
||||||
pushName?: string,
|
pushName?: string,
|
||||||
|
msg?: any,
|
||||||
) {
|
) {
|
||||||
const data = await this.createNewSession(instance, {
|
const data = await this.createNewSession(instance, {
|
||||||
remoteJid,
|
remoteJid,
|
||||||
@ -352,7 +413,7 @@ export class EvoaiService {
|
|||||||
session = data.session;
|
session = data.session;
|
||||||
}
|
}
|
||||||
|
|
||||||
await this.sendMessageToBot(instance, session, settings, evoai, remoteJid, pushName, content);
|
await this.sendMessageToBot(instance, session, settings, evoai, remoteJid, pushName, content, msg);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -365,6 +426,7 @@ export class EvoaiService {
|
|||||||
settings: EvoaiSetting,
|
settings: EvoaiSetting,
|
||||||
content: string,
|
content: string,
|
||||||
pushName?: string,
|
pushName?: string,
|
||||||
|
msg?: any,
|
||||||
) {
|
) {
|
||||||
if (session && session.status !== 'opened') {
|
if (session && session.status !== 'opened') {
|
||||||
return;
|
return;
|
||||||
@ -398,13 +460,13 @@ export class EvoaiService {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
await this.initNewSession(instance, remoteJid, evoai, settings, session, content, pushName);
|
await this.initNewSession(instance, remoteJid, evoai, settings, session, content, pushName, msg);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!session) {
|
if (!session) {
|
||||||
await this.initNewSession(instance, remoteJid, evoai, settings, session, content, pushName);
|
await this.initNewSession(instance, remoteJid, evoai, settings, session, content, pushName, msg);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -455,7 +517,7 @@ export class EvoaiService {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
await this.sendMessageToBot(instance, session, settings, evoai, remoteJid, pushName, content);
|
await this.sendMessageToBot(instance, session, settings, evoai, remoteJid, pushName, content, msg);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user