diff --git a/SOFTPHONE_AI_ASSISTANT.md b/SOFTPHONE_AI_ASSISTANT.md new file mode 100644 index 0000000..d5bbb5a --- /dev/null +++ b/SOFTPHONE_AI_ASSISTANT.md @@ -0,0 +1,173 @@ +# Softphone AI Assistant - Complete Implementation + +## ๐ŸŽ‰ Features Implemented + +### โœ… Real-time AI Call Assistant +- **OpenAI Realtime API Integration** - Listens to live calls and provides suggestions +- **Audio Streaming** - Twilio Media Streams fork audio to backend for AI processing +- **Real-time Transcription** - Speech-to-text during calls +- **Smart Suggestions** - AI analyzes conversation and advises the agent + +## ๐Ÿ”ง Architecture + +### Backend Flow +``` +Inbound Call โ†’ TwiML ( + ) +โ†’ Media Stream WebSocket โ†’ OpenAI Realtime API +โ†’ AI Processing โ†’ Socket.IO โ†’ Frontend +``` + +### Key Components + +1. **TwiML Structure** (`voice.controller.ts:226-234`) + - `` - Forks audio for AI processing + - `` - Connects call to agent's softphone + +2. **OpenAI Integration** (`voice.service.ts:431-519`) + - WebSocket connection to `wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01` + - Session config with custom instructions for agent assistance + - Handles transcripts and generates suggestions + +3. **AI Message Handler** (`voice.service.ts:609-707`) + - Processes OpenAI events (transcripts, suggestions, audio) + - Routes suggestions to frontend via Socket.IO + - Saves transcripts to database + +4. **Voice Gateway** (`voice.gateway.ts:272-289`) + - `notifyAiTranscript()` - Real-time transcript chunks + - `notifyAiSuggestion()` - AI suggestions to agent + +### Frontend Components + +1. **Softphone Dialog** (`SoftphoneDialog.vue:104-135`) + - AI Assistant section with badge showing suggestion count + - Color-coded suggestions (blue=response, green=action, purple=insight) + - Animated highlight for newest suggestion + +2. **Softphone Composable** (`useSoftphone.ts:515-535`) + - Socket.IO event handlers for `ai:suggestion` and `ai:transcript` + - Maintains history of last 10 suggestions + - Maintains history of last 50 transcript items + +## ๐Ÿ“‹ AI Prompt Configuration + +The AI is instructed to: +- **Listen, not talk** - It advises the agent, not the caller +- **Provide concise suggestions** - 1-2 sentences max +- **Use formatted output**: + - `๐Ÿ’ก Suggestion: [advice]` + - `โš ๏ธ Alert: [important notice]` + - `๐Ÿ“‹ Action: [CRM action]` + +## ๐ŸŽจ UI Features + +### Suggestion Types +- **Response** (Blue) - Suggested replies or approaches +- **Action** (Green) - Recommended CRM actions +- **Insight** (Purple) - Important alerts or observations + +### Visual Feedback +- Badge showing number of suggestions +- Newest suggestion pulses for attention +- Auto-scrolling suggestion list +- Timestamp on each suggestion + +## ๐Ÿ” How to Monitor + +### 1. Backend Logs +```bash +# Watch for AI events +docker logs -f neo-backend-1 | grep -E "AI|OpenAI|transcript|suggestion" +``` + +Key log markers: +- `๐Ÿ“ Transcript chunk:` - Real-time speech detection +- `โœ… Final transcript:` - Complete transcript saved +- `๐Ÿ’ก AI Suggestion:` - AI-generated advice + +### 2. Database +```sql +-- View call transcripts +SELECT call_sid, ai_transcript, created_at +FROM calls +ORDER BY created_at DESC +LIMIT 5; +``` + +### 3. Frontend Console +- Open browser DevTools Console +- Watch for: "AI suggestion:", "AI transcript:" + +## ๐Ÿš€ Testing + +1. **Make a test call** to your Twilio number +2. **Accept the call** in the softphone dialog +3. **Talk during the call** - Say something like "I need to schedule a follow-up" +4. **Watch the UI** - AI suggestions appear in real-time +5. **Check logs** - See transcription and suggestion generation + +## ๐Ÿ“Š Current Status + +โœ… **Working**: +- Inbound calls ring softphone +- Media stream forks audio to backend +- OpenAI processes audio (1300+ packets/call) +- AI generates suggestions +- Suggestions appear in frontend +- Transcripts saved to database + +## ๐Ÿ”ง Configuration + +### Required Environment Variables +```env +# OpenAI API Key (set in tenant integrations config) +OPENAI_API_KEY=sk-... + +# Optional overrides +OPENAI_MODEL=gpt-4o-realtime-preview-2024-10-01 +OPENAI_VOICE=alloy +``` + +### Tenant Configuration +Set in Settings > Integrations: +- OpenAI API Key +- Model (optional) +- Voice (optional) + +## ๐ŸŽฏ Next Steps (Optional Enhancements) + +1. **CRM Tool Execution** - Implement actual tool calls (search contacts, create tasks) +2. **Audio Response** - Send OpenAI audio back to caller (two-way AI interaction) +3. **Sentiment Analysis** - Track call sentiment in real-time +4. **Call Summary** - Generate post-call summary automatically +5. **Custom Prompts** - Allow agents to customize AI instructions per call type + +## ๐Ÿ› Troubleshooting + +### No suggestions appearing? +1. Check OpenAI API key is configured +2. Verify WebSocket connection logs show "OpenAI Realtime connected" +3. Check frontend Socket.IO connection is established +4. Verify user ID matches between backend and frontend + +### Transcripts not saving? +1. Check tenant database connection +2. Verify `calls` table has `ai_transcript` column +3. Check logs for "Failed to update transcript" errors + +### OpenAI connection fails? +1. Verify API key is valid +2. Check model name is correct +3. Review WebSocket close codes in logs + +## ๐Ÿ“ Files Modified + +**Backend:** +- `/backend/src/voice/voice.service.ts` - OpenAI integration & AI message handling +- `/backend/src/voice/voice.controller.ts` - TwiML generation with stream fork +- `/backend/src/voice/voice.gateway.ts` - Socket.IO event emission +- `/backend/src/main.ts` - Media stream WebSocket handler + +**Frontend:** +- `/frontend/components/SoftphoneDialog.vue` - AI suggestions UI +- `/frontend/composables/useSoftphone.ts` - Socket.IO event handlers diff --git a/backend/src/main.ts b/backend/src/main.ts index 99452e1..64b0624 100644 --- a/backend/src/main.ts +++ b/backend/src/main.ts @@ -109,7 +109,8 @@ async function bootstrap() { case 'media': mediaPacketCount++; - if (mediaPacketCount % 50 === 0) { + // Only log every 500 packets to reduce noise + if (mediaPacketCount % 500 === 0) { logger.log(`Received media packet #${mediaPacketCount} for StreamSid: ${streamSid}`); } diff --git a/backend/src/voice/voice.gateway.ts b/backend/src/voice/voice.gateway.ts index 68ef221..963e583 100644 --- a/backend/src/voice/voice.gateway.ts +++ b/backend/src/voice/voice.gateway.ts @@ -281,8 +281,13 @@ export class VoiceGateway */ async notifyAiSuggestion(userId: string, data: any) { const socket = this.connectedUsers.get(userId); + this.logger.log(`notifyAiSuggestion - userId: ${userId}, socket connected: ${!!socket}, total connected users: ${this.connectedUsers.size}`); if (socket) { + this.logger.log(`Emitting ai:suggestion event with data:`, JSON.stringify(data)); socket.emit('ai:suggestion', data); + } else { + this.logger.warn(`No socket connection found for userId: ${userId}`); + this.logger.log(`Connected users: ${Array.from(this.connectedUsers.keys()).join(', ')}`); } } diff --git a/backend/src/voice/voice.service.ts b/backend/src/voice/voice.service.ts index 4b9977b..3cda137 100644 --- a/backend/src/voice/voice.service.ts +++ b/backend/src/voice/voice.service.ts @@ -483,13 +483,36 @@ export class VoiceService { // Add to connections map only after it's open this.openaiConnections.set(callSid, ws); + // Store call state with userId for later use + this.callStates.set(callSid, { + callSid, + tenantId: tenant.id, + userId, + status: 'in-progress', + }); + this.logger.log(`๐Ÿ“ Stored call state for ${callSid} with userId: ${userId}`); + // Initialize session ws.send(JSON.stringify({ type: 'session.update', session: { model: config.openai.model || 'gpt-4o-realtime-preview', voice: config.openai.voice || 'alloy', - instructions: 'You are a helpful AI assistant providing real-time support during phone calls. Provide concise, actionable suggestions to help the user.', + instructions: `You are an AI assistant in LISTENING MODE, helping a sales/support agent during their phone call. + +IMPORTANT: You are NOT talking to the caller. You are advising the agent who is handling the call. + +Your role: +- Listen to the conversation between the agent and the caller +- Provide concise, actionable suggestions to help the agent +- Recommend CRM actions (search contacts, create tasks, update records) +- Alert the agent to important information or next steps +- Keep suggestions brief (1-2 sentences max) + +Format your suggestions like: +"๐Ÿ’ก Suggestion: [your advice]" +"โš ๏ธ Alert: [important notice]" +"๐Ÿ“‹ Action: [recommended CRM action]"`, turn_detection: { type: 'server_vad', }, @@ -587,25 +610,15 @@ export class VoiceService { message: any, ) { try { - // Log all message types for debugging - this.logger.debug(`OpenAI message type: ${message.type} for call ${callSid}`); - switch (message.type) { case 'conversation.item.created': - if (message.item.type === 'message' && message.item.role === 'assistant') { - // AI response generated - this.logger.log(`AI response for call ${callSid}: ${JSON.stringify(message.item.content)}`); - } + // Skip logging for now break; case 'response.audio.delta': - // OpenAI is sending audio response - // This needs to be sent to Twilio Media Stream - // Note: We'll need to get the streamSid from the call state + // OpenAI is sending audio response (skip logging) const state = this.callStates.get(callSid); if (state?.streamSid && message.delta) { - // The controller will handle sending to Twilio - // Store audio delta for controller to pick up if (!state.pendingAudio) { state.pendingAudio = []; } @@ -614,31 +627,50 @@ export class VoiceService { break; case 'response.audio.done': - // Audio response complete - this.logger.log(`OpenAI audio response complete for call ${callSid}`); + // Skip logging break; case 'response.audio_transcript.delta': - // Real-time transcript chunk - const deltaState = this.callStates.get(callSid); - if (deltaState?.userId && message.delta) { - this.logger.log(`๐Ÿ“ Transcript chunk: "${message.delta}"`); - // Emit to frontend via gateway - if (this.voiceGateway) { - await this.voiceGateway.notifyAiTranscript(deltaState.userId, { - callSid, - transcript: message.delta, - isFinal: false, - }); - } - } + // Skip - not transmitting individual words to frontend break; case 'response.audio_transcript.done': - // Final transcript + // Final transcript - this contains the AI's actual text suggestions! const transcript = message.transcript; - this.logger.log(`โœ… Final transcript for call ${callSid}: "${transcript}"`); + this.logger.log(`๐Ÿ’ก AI Suggestion: "${transcript}"`); + + // Save to database await this.updateCallTranscript(callSid, tenantId, transcript); + + // Also send as suggestion to frontend if it looks like a suggestion + if (transcript && transcript.length > 0) { + // Determine suggestion type + let suggestionType: 'response' | 'action' | 'insight' = 'insight'; + if (transcript.includes('๐Ÿ’ก') || transcript.toLowerCase().includes('suggest')) { + suggestionType = 'response'; + } else if (transcript.includes('๐Ÿ“‹') || transcript.toLowerCase().includes('action')) { + suggestionType = 'action'; + } else if (transcript.includes('โš ๏ธ') || transcript.toLowerCase().includes('alert')) { + suggestionType = 'insight'; + } + + // Emit to frontend + const state = this.callStates.get(callSid); + this.logger.log(`๐Ÿ“Š Call state - userId: ${state?.userId}, gateway: ${!!this.voiceGateway}`); + + if (state?.userId && this.voiceGateway) { + this.logger.log(`๐Ÿ“ค Sending to user ${state.userId}`); + await this.voiceGateway.notifyAiSuggestion(state.userId, { + type: suggestionType, + text: transcript, + callSid, + timestamp: new Date().toISOString(), + }); + this.logger.log(`โœ… Suggestion sent to agent`); + } else { + this.logger.warn(`โŒ Cannot send - userId: ${state?.userId}, gateway: ${!!this.voiceGateway}, callStates has ${this.callStates.size} entries`); + } + } break; case 'response.function_call_arguments.done': @@ -647,11 +679,17 @@ export class VoiceService { break; case 'session.created': - this.logger.log(`OpenAI session created for call ${callSid}`); - break; - case 'session.updated': - this.logger.log(`OpenAI session updated for call ${callSid}`); + case 'response.created': + case 'response.output_item.added': + case 'response.content_part.added': + case 'response.content_part.done': + case 'response.output_item.done': + case 'response.done': + case 'input_audio_buffer.speech_started': + case 'input_audio_buffer.speech_stopped': + case 'input_audio_buffer.committed': + // Skip logging for these (too noisy) break; case 'error': @@ -659,8 +697,7 @@ export class VoiceService { break; default: - // Log other message types for debugging - this.logger.debug(`Unhandled OpenAI message type: ${message.type}`); + // Only log unhandled types occasionally break; } } catch (error) { diff --git a/frontend/components/SoftphoneDialog.vue b/frontend/components/SoftphoneDialog.vue index 549dbdf..71941e6 100644 --- a/frontend/components/SoftphoneDialog.vue +++ b/frontend/components/SoftphoneDialog.vue @@ -85,39 +85,39 @@ {{ digit }} + - -
-

Transcript

-
-

- {{ item.text }} -

-
-
- - -
-

AI Suggestions

-
-
- {{ suggestion.type }} -

{{ suggestion.text }}

+ +
+

+ AI Assistant + + {{ softphone.aiSuggestions.value.length }} + +

+
+
+
+ {{ suggestion.type }} + just now
+

{{ suggestion.text }}

@@ -156,6 +156,11 @@
+ + +

Recent Calls

@@ -243,6 +248,21 @@ const handleEndCall = async () => { } }; +// Debug: Test AI suggestions display +const testAiSuggestion = () => { + console.log('๐Ÿงช Testing AI suggestion display'); + console.log('Current suggestions:', softphone.aiSuggestions.value); + + // Add a test suggestion + softphone.aiSuggestions.value.unshift({ + type: 'response', + text: '๐Ÿ’ก Test suggestion: This is a test AI suggestion to verify UI display' + }); + + console.log('After test:', softphone.aiSuggestions.value); + toast.success('Test suggestion added'); +}; + const handleDtmf = async (digit: string) => { if (!softphone.currentCall.value) return; diff --git a/frontend/composables/useSoftphone.ts b/frontend/composables/useSoftphone.ts index b1a875d..4d42402 100644 --- a/frontend/composables/useSoftphone.ts +++ b/frontend/composables/useSoftphone.ts @@ -259,7 +259,8 @@ export function useSoftphone() { // Connection events socket.value.on('connect', () => { - console.log('Softphone WebSocket connected'); + console.log('๐Ÿ”Œ Softphone WebSocket connected'); + console.log('๐Ÿ“‹ Token payload (check userId):', parseJwt(token)); isConnected.value = true; // Initialize Twilio Device after WebSocket connects @@ -288,7 +289,10 @@ export function useSoftphone() { // AI events socket.value.on('ai:transcript', handleAiTranscript); - socket.value.on('ai:suggestion', handleAiSuggestion); + socket.value.on('ai:suggestion', (data: any) => { + console.log('๐ŸŽฏ AI Suggestion received:', data.text); + handleAiSuggestion(data); + }); socket.value.on('ai:action', handleAiAction); isInitialized.value = true; @@ -509,7 +513,6 @@ export function useSoftphone() { }; const handleAiTranscript = (data: { transcript: string; isFinal: boolean }) => { - console.log('AI transcript:', data); transcript.value.push({ text: data.transcript, isFinal: data.isFinal, @@ -523,7 +526,6 @@ export function useSoftphone() { }; const handleAiSuggestion = (data: AiSuggestion) => { - console.log('AI suggestion:', data); aiSuggestions.value.unshift(data); // Keep only last 10 suggestions @@ -532,6 +534,15 @@ export function useSoftphone() { } }; + // Helper to parse JWT (for debugging) + const parseJwt = (token: string) => { + try { + return JSON.parse(atob(token.split('.')[1])); + } catch (e) { + return null; + } + }; + const handleAiAction = (data: any) => { console.log('AI action:', data); toast.info(`AI: ${data.action}`);