diff --git a/src/api/integrations/chatbot/base-chatbot.controller.ts b/src/api/integrations/chatbot/base-chatbot.controller.ts index 4d061923..0244184c 100644 --- a/src/api/integrations/chatbot/base-chatbot.controller.ts +++ b/src/api/integrations/chatbot/base-chatbot.controller.ts @@ -13,7 +13,7 @@ import { ChatbotController, ChatbotControllerInterface, EmitData } from './chatb // Common settings interface for all chatbot integrations export interface ChatbotSettings { expire: number; - keywordFinish: string[]; + keywordFinish: string; delayMessage: number; unknownMessage: string; listeningFromMe: boolean; @@ -344,7 +344,6 @@ export abstract class BaseChatbotController { return null; } - /** - * Transcribes audio to text using OpenAI's Whisper API - */ - protected async speechToText(audioBuffer: Buffer): Promise { - if (!this.configService) { - this.logger.error('ConfigService not available for speech-to-text transcription'); - return null; - } - - try { - // Try to get the API key from process.env directly since ConfigService might not access it correctly - const apiKey = this.configService.get('OPENAI')?.API_KEY || process.env.OPENAI_API_KEY; - if (!apiKey) { - this.logger.error('No OpenAI API key set for Whisper transcription'); - return null; - } - - const lang = this.configService.get('LANGUAGE').includes('pt') - ? 'pt' - : this.configService.get('LANGUAGE'); - - const formData = new FormData(); - formData.append('file', audioBuffer, 'audio.ogg'); - formData.append('model', 'whisper-1'); - formData.append('language', lang); - - const response = await axios.post('https://api.openai.com/v1/audio/transcriptions', formData, { - headers: { - ...formData.getHeaders(), - Authorization: `Bearer ${apiKey}`, - }, - }); - - return response?.data?.text || null; - } catch (err) { - this.logger.error(`Whisper transcription failed: ${err}`); - return null; - } - } - /** * Create a new chatbot session */ @@ -174,12 +132,9 @@ export abstract class BaseChatbotService { } // For existing sessions, keywords might indicate the conversation should end - const keywordFinish = (settings as any)?.keywordFinish || []; + const keywordFinish = (settings as any)?.keywordFinish || ''; const normalizedContent = content.toLowerCase().trim(); - if ( - keywordFinish.length > 0 && - keywordFinish.some((keyword: string) => normalizedContent === keyword.toLowerCase()) - ) { + if (keywordFinish.length > 0 && normalizedContent === keywordFinish.toLowerCase()) { // Update session to closed and return await this.prismaRepository.integrationSession.update({ where: { diff --git a/src/api/integrations/chatbot/dify/services/dify.service.ts b/src/api/integrations/chatbot/dify/services/dify.service.ts index 01e433f6..467d27cb 100644 --- a/src/api/integrations/chatbot/dify/services/dify.service.ts +++ b/src/api/integrations/chatbot/dify/services/dify.service.ts @@ -6,13 +6,21 @@ import { Auth, ConfigService, HttpServer } from '@config/env.config'; import { Dify, DifySetting, IntegrationSession } from '@prisma/client'; import { sendTelemetry } from '@utils/sendTelemetry'; import axios from 'axios'; -import { downloadMediaMessage } from 'baileys'; import { BaseChatbotService } from '../../base-chatbot.service'; +import { OpenaiService } from '../../openai/services/openai.service'; export class DifyService extends BaseChatbotService { - constructor(waMonitor: WAMonitoringService, configService: ConfigService, prismaRepository: PrismaRepository) { + private openaiService: OpenaiService; + + constructor( + waMonitor: WAMonitoringService, + configService: ConfigService, + prismaRepository: PrismaRepository, + openaiService: OpenaiService, + ) { super(waMonitor, prismaRepository, 'DifyService', configService); + this.openaiService = openaiService; } /** @@ -73,10 +81,9 @@ export class DifyService extends BaseChatbotService { if (this.isAudioMessage(content) && msg) { try { this.logger.debug(`[Dify] Downloading audio for Whisper transcription`); - const mediaBuffer = await downloadMediaMessage({ key: msg.key, message: msg.message }, 'buffer', {}); - const transcribedText = await this.speechToText(mediaBuffer); - if (transcribedText) { - payload.query = transcribedText; + const transcription = await this.openaiService.speechToText(msg); + if (transcription) { + payload.query = transcription; } else { payload.query = '[Audio message could not be transcribed]'; } @@ -151,10 +158,9 @@ export class DifyService extends BaseChatbotService { if (this.isAudioMessage(content) && msg) { try { this.logger.debug(`[Dify] Downloading audio for Whisper transcription`); - const mediaBuffer = await downloadMediaMessage({ key: msg.key, message: msg.message }, 'buffer', {}); - const transcribedText = await this.speechToText(mediaBuffer); - if (transcribedText) { - payload.inputs.query = transcribedText; + const transcription = await this.openaiService.speechToText(msg); + if (transcription) { + payload.inputs.query = transcription; } else { payload.inputs.query = '[Audio message could not be transcribed]'; } @@ -229,10 +235,9 @@ export class DifyService extends BaseChatbotService { if (this.isAudioMessage(content) && msg) { try { this.logger.debug(`[Dify] Downloading audio for Whisper transcription`); - const mediaBuffer = await downloadMediaMessage({ key: msg.key, message: msg.message }, 'buffer', {}); - const transcribedText = await this.speechToText(mediaBuffer); - if (transcribedText) { - payload.query = transcribedText; + const transcription = await this.openaiService.speechToText(msg); + if (transcription) { + payload.query = transcription; } else { payload.query = '[Audio message could not be transcribed]'; } diff --git a/src/api/integrations/chatbot/evolutionBot/services/evolutionBot.service.ts b/src/api/integrations/chatbot/evolutionBot/services/evolutionBot.service.ts index 0a048a1a..94470f1d 100644 --- a/src/api/integrations/chatbot/evolutionBot/services/evolutionBot.service.ts +++ b/src/api/integrations/chatbot/evolutionBot/services/evolutionBot.service.ts @@ -8,10 +8,19 @@ import { sendTelemetry } from '@utils/sendTelemetry'; import axios from 'axios'; import { BaseChatbotService } from '../../base-chatbot.service'; +import { OpenaiService } from '../../openai/services/openai.service'; export class EvolutionBotService extends BaseChatbotService { - constructor(waMonitor: WAMonitoringService, configService: ConfigService, prismaRepository: PrismaRepository) { + private openaiService: OpenaiService; + + constructor( + waMonitor: WAMonitoringService, + configService: ConfigService, + prismaRepository: PrismaRepository, + openaiService: OpenaiService, + ) { super(waMonitor, prismaRepository, 'EvolutionBotService', configService); + this.openaiService = openaiService; } /** @@ -50,6 +59,21 @@ export class EvolutionBotService extends BaseChatbotService { - constructor(waMonitor: WAMonitoringService, configService: ConfigService, prismaRepository: PrismaRepository) { + private openaiService: OpenaiService; + constructor( + waMonitor: WAMonitoringService, + configService: ConfigService, + prismaRepository: PrismaRepository, + openaiService: OpenaiService, + ) { super(waMonitor, prismaRepository, 'FlowiseService', configService); + this.openaiService = openaiService; } /** @@ -49,6 +57,21 @@ export class FlowiseService extends BaseChatbotService }, }; + if (this.isAudioMessage(content) && msg) { + try { + this.logger.debug(`[EvolutionBot] Downloading audio for Whisper transcription`); + const transcription = await this.openaiService.speechToText(msg); + if (transcription) { + payload.query = transcription; + } else { + payload.query = '[Audio message could not be transcribed]'; + } + } catch (err) { + this.logger.error(`[EvolutionBot] Failed to transcribe audio: ${err}`); + payload.query = '[Audio message could not be transcribed]'; + } + } + if (this.isImageMessage(content)) { const contentSplit = content.split('|'); diff --git a/src/api/integrations/chatbot/n8n/services/n8n.service.ts b/src/api/integrations/chatbot/n8n/services/n8n.service.ts index 7046adec..45516df2 100644 --- a/src/api/integrations/chatbot/n8n/services/n8n.service.ts +++ b/src/api/integrations/chatbot/n8n/services/n8n.service.ts @@ -5,14 +5,21 @@ import { Auth, ConfigService, HttpServer } from '@config/env.config'; import { IntegrationSession, N8n, N8nSetting } from '@prisma/client'; import { sendTelemetry } from '@utils/sendTelemetry'; import axios from 'axios'; -import { downloadMediaMessage } from 'baileys'; import { BaseChatbotService } from '../../base-chatbot.service'; +import { OpenaiService } from '../../openai/services/openai.service'; import { N8nDto } from '../dto/n8n.dto'; - export class N8nService extends BaseChatbotService { - constructor(waMonitor: WAMonitoringService, prismaRepository: PrismaRepository, configService: ConfigService) { + private openaiService: OpenaiService; + + constructor( + waMonitor: WAMonitoringService, + prismaRepository: PrismaRepository, + configService: ConfigService, + openaiService: OpenaiService, + ) { super(waMonitor, prismaRepository, 'N8nService', configService); + this.openaiService = openaiService; } /** @@ -135,10 +142,9 @@ export class N8nService extends BaseChatbotService { if (this.isAudioMessage(content) && msg) { try { this.logger.debug(`[N8n] Downloading audio for Whisper transcription`); - const mediaBuffer = await downloadMediaMessage({ key: msg.key, message: msg.message }, 'buffer', {}); - const transcribedText = await this.speechToText(mediaBuffer); - if (transcribedText) { - payload.chatInput = transcribedText; + const transcription = await this.openaiService.speechToText(msg); + if (transcription) { + payload.chatInput = transcription; } else { payload.chatInput = '[Audio message could not be transcribed]'; } diff --git a/src/api/integrations/chatbot/openai/controllers/openai.controller.ts b/src/api/integrations/chatbot/openai/controllers/openai.controller.ts index 17973e97..b65299f1 100644 --- a/src/api/integrations/chatbot/openai/controllers/openai.controller.ts +++ b/src/api/integrations/chatbot/openai/controllers/openai.controller.ts @@ -176,7 +176,7 @@ export class OpenaiController extends BaseChatbotController } // Handle keyword finish - const keywordFinish = settings?.keywordFinish?.split(',') || []; + const keywordFinish = settings?.keywordFinish || ''; const normalizedContent = content.toLowerCase().trim(); - if ( - keywordFinish.length > 0 && - keywordFinish.some((keyword: string) => normalizedContent === keyword.toLowerCase().trim()) - ) { + if (keywordFinish.length > 0 && normalizedContent === keywordFinish.toLowerCase()) { if (settings?.keepOpen) { await this.prismaRepository.integrationSession.update({ where: { diff --git a/src/api/integrations/chatbot/typebot/services/typebot.service.ts b/src/api/integrations/chatbot/typebot/services/typebot.service.ts index 7be3cae7..29169999 100644 --- a/src/api/integrations/chatbot/typebot/services/typebot.service.ts +++ b/src/api/integrations/chatbot/typebot/services/typebot.service.ts @@ -6,10 +6,19 @@ import { sendTelemetry } from '@utils/sendTelemetry'; import axios from 'axios'; import { BaseChatbotService } from '../../base-chatbot.service'; +import { OpenaiService } from '../../openai/services/openai.service'; export class TypebotService extends BaseChatbotService { - constructor(waMonitor: WAMonitoringService, configService: ConfigService, prismaRepository: PrismaRepository) { + private openaiService: OpenaiService; + + constructor( + waMonitor: WAMonitoringService, + configService: ConfigService, + prismaRepository: PrismaRepository, + openaiService: OpenaiService, + ) { super(waMonitor, prismaRepository, 'TypebotService', configService); + this.openaiService = openaiService; } /** @@ -58,7 +67,7 @@ export class TypebotService extends BaseChatbotService { // Continue an existing chat const version = this.configService?.get('TYPEBOT').API_VERSION; let url: string; - let reqData: {}; + let reqData: any; if (version === 'latest') { url = `${bot.url}/api/v1/sessions/${session.sessionId.split('-')[1]}/continueChat`; @@ -71,6 +80,21 @@ export class TypebotService extends BaseChatbotService { }; } + if (this.isAudioMessage(content) && msg) { + try { + this.logger.debug(`[EvolutionBot] Downloading audio for Whisper transcription`); + const transcription = await this.openaiService.speechToText(msg); + if (transcription) { + reqData.message = transcription; + } else { + reqData.message = '[Audio message could not be transcribed]'; + } + } catch (err) { + this.logger.error(`[EvolutionBot] Failed to transcribe audio: ${err}`); + reqData.message = '[Audio message could not be transcribed]'; + } + } + const response = await axios.post(url, reqData); // Process the response and send the messages to WhatsApp diff --git a/src/api/server.module.ts b/src/api/server.module.ts index 598b014b..42e2558f 100644 --- a/src/api/server.module.ts +++ b/src/api/server.module.ts @@ -115,23 +115,24 @@ export const channelController = new ChannelController(prismaRepository, waMonit export const evolutionController = new EvolutionController(prismaRepository, waMonitor); export const metaController = new MetaController(prismaRepository, waMonitor); export const baileysController = new BaileysController(waMonitor); -// chatbots -const typebotService = new TypebotService(waMonitor, configService, prismaRepository); -export const typebotController = new TypebotController(typebotService, prismaRepository, waMonitor); const openaiService = new OpenaiService(waMonitor, prismaRepository, configService); export const openaiController = new OpenaiController(openaiService, prismaRepository, waMonitor); -const difyService = new DifyService(waMonitor, configService, prismaRepository); +// chatbots +const typebotService = new TypebotService(waMonitor, configService, prismaRepository, openaiService); +export const typebotController = new TypebotController(typebotService, prismaRepository, waMonitor); + +const difyService = new DifyService(waMonitor, configService, prismaRepository, openaiService); export const difyController = new DifyController(difyService, prismaRepository, waMonitor); -const evolutionBotService = new EvolutionBotService(waMonitor, configService, prismaRepository); +const evolutionBotService = new EvolutionBotService(waMonitor, configService, prismaRepository, openaiService); export const evolutionBotController = new EvolutionBotController(evolutionBotService, prismaRepository, waMonitor); -const flowiseService = new FlowiseService(waMonitor, configService, prismaRepository); +const flowiseService = new FlowiseService(waMonitor, configService, prismaRepository, openaiService); export const flowiseController = new FlowiseController(flowiseService, prismaRepository, waMonitor); -const n8nService = new N8nService(waMonitor, prismaRepository, configService); +const n8nService = new N8nService(waMonitor, prismaRepository, configService, openaiService); export const n8nController = new N8nController(n8nService, prismaRepository, waMonitor); const evoaiService = new EvoaiService(waMonitor, prismaRepository, configService);