refactor(openai): improve service initialization and streamline audio transcription handling

- Updated OpenaiService and related classes to enhance the initialization process by ensuring the correct order of parameters.
- Simplified audio message handling by consolidating transcription logic and improving error handling.
- Refactored the OpenaiController to utilize the new structure, ensuring better integration with the base chatbot framework.
- Enhanced logging for better traceability during audio processing and API interactions.
This commit is contained in:
Guilherme Gomes
2025-05-21 12:16:12 -03:00
parent 69b4f1aa02
commit c30bae4c3a
18 changed files with 1127 additions and 1890 deletions

View File

@@ -1,4 +1,5 @@
import { TriggerOperator, TriggerType } from '@prisma/client';
import { BaseChatbotDto, BaseChatbotSettingDto } from '../../base-chatbot.dto';
export class EvoaiDto extends BaseChatbotDto {

View File

@@ -11,11 +11,7 @@ import { v4 as uuidv4 } from 'uuid';
import { BaseChatbotService } from '../../base-chatbot.service';
export class EvoaiService extends BaseChatbotService<Evoai, EvoaiSetting> {
constructor(
waMonitor: WAMonitoringService,
prismaRepository: PrismaRepository,
configService: ConfigService,
) {
constructor(waMonitor: WAMonitoringService, prismaRepository: PrismaRepository, configService: ConfigService) {
super(waMonitor, prismaRepository, 'EvoaiService', configService);
}
@@ -45,24 +41,24 @@ export class EvoaiService extends BaseChatbotService<Evoai, EvoaiSetting> {
): Promise<void> {
try {
this.logger.debug(`[EvoAI] Processing message with custom process method`);
// Check if this is an audio message that we should try to transcribe
if (msg?.messageType === 'audioMessage' && msg?.message?.audioMessage) {
this.logger.debug(`[EvoAI] Detected audio message, attempting transcription`);
try {
// Download the audio using the whole msg object
const mediaBuffer = await downloadMediaMessage(msg, 'buffer', {});
this.logger.debug(`[EvoAI] Downloaded audio: ${mediaBuffer?.length || 0} bytes`);
// Transcribe with OpenAI's Whisper
const transcribedText = await this.speechToText(mediaBuffer);
this.logger.debug(`[EvoAI] Transcription result: ${transcribedText || 'FAILED'}`);
if (transcribedText) {
// Use the transcribed text instead of the original content
this.logger.debug(`[EvoAI] Using transcribed text: ${transcribedText}`);
// Call the parent process method with the transcribed text
return super.process(instance, remoteJid, bot, session, settings, transcribedText, pushName, msg);
}
@@ -70,7 +66,7 @@ export class EvoaiService extends BaseChatbotService<Evoai, EvoaiSetting> {
this.logger.error(`[EvoAI] Audio transcription error: ${err}`);
}
}
// For non-audio messages or if transcription failed, proceed normally
return super.process(instance, remoteJid, bot, session, settings, content, pushName, msg);
} catch (error) {
@@ -91,7 +87,7 @@ export class EvoaiService extends BaseChatbotService<Evoai, EvoaiSetting> {
) {
try {
this.logger.debug(`[EvoAI] Sending message to bot with content: ${content}`);
const endpoint: string = evoai.agentUrl;
const callId = `call-${uuidv4()}`;
const taskId = `task-${uuidv4()}`;
@@ -108,13 +104,13 @@ export class EvoaiService extends BaseChatbotService<Evoai, EvoaiSetting> {
if (this.isImageMessage(content) && msg) {
const contentSplit = content.split('|');
parts[0].text = contentSplit[2] || content;
try {
// Download the image
const mediaBuffer = await downloadMediaMessage(msg, 'buffer', {});
const fileContent = Buffer.from(mediaBuffer).toString('base64');
const fileName = contentSplit[2] || `${msg.key?.id || 'image'}.jpg`;
parts.push({
type: 'file',
file: {