WIP - call connecting to softphone

This commit is contained in:
Francisco Gaona
2026-01-04 04:29:25 +01:00
parent 71cd6a07b7
commit 03cbb5eb1f
11 changed files with 1190 additions and 104 deletions

View File

@@ -13,6 +13,7 @@ import { FastifyRequest, FastifyReply } from 'fastify';
import { JwtAuthGuard } from '../auth/jwt-auth.guard';
import { VoiceService } from './voice.service';
import { VoiceGateway } from './voice.gateway';
import { AudioConverterService } from './audio-converter.service';
import { InitiateCallDto } from './dto/initiate-call.dto';
import { TenantId } from '../tenant/tenant.decorator';
@@ -20,9 +21,13 @@ import { TenantId } from '../tenant/tenant.decorator';
export class VoiceController {
private readonly logger = new Logger(VoiceController.name);
// Track active Media Streams connections: streamSid -> WebSocket
private mediaStreams: Map<string, any> = new Map();
constructor(
private readonly voiceService: VoiceService,
private readonly voiceGateway: VoiceGateway,
private readonly audioConverter: AudioConverterService,
) {}
/**
@@ -159,8 +164,12 @@ export class VoiceController {
const fromNumber = body.From;
const toNumber = body.To;
this.logger.log(`=== INBOUND CALL RECEIVED ===`);
this.logger.log(`CallSid: ${callSid}, From: ${fromNumber}, To: ${toNumber}`);
this.logger.log(`\n\n╔════════════════════════════════════════╗`);
this.logger.log(`║ === INBOUND CALL RECEIVED ===`);
this.logger.log(`╚════════════════════════════════════════╝`);
this.logger.log(`CallSid: ${callSid}`);
this.logger.log(`From: ${fromNumber}`);
this.logger.log(`To: ${toNumber}`);
this.logger.log(`Full body: ${JSON.stringify(body)}`);
try {
@@ -174,6 +183,9 @@ export class VoiceController {
const connectedUsers = this.voiceGateway.getConnectedUsers(tenantDomain);
this.logger.log(`Connected users for tenant ${tenantDomain}: ${connectedUsers.length}`);
if (connectedUsers.length > 0) {
this.logger.log(`Connected user IDs: ${connectedUsers.join(', ')}`);
}
if (connectedUsers.length === 0) {
// No users online - send to voicemail or play message
@@ -182,22 +194,52 @@ export class VoiceController {
<Say>Sorry, no agents are currently available. Please try again later.</Say>
<Hangup/>
</Response>`;
this.logger.log(`No users online - returning unavailable message`);
this.logger.log(`No users online - returning unavailable message`);
return res.type('text/xml').send(twiml);
}
// Build TwiML to dial all connected clients (first to answer gets the call)
// Build TwiML to dial all connected clients with Media Streams for AI
const clientElements = connectedUsers.map(userId => ` <Client>${userId}</Client>`).join('\n');
// Use wss:// for secure WebSocket (Traefik handles HTTPS)
const streamUrl = `wss://${host}/api/voice/media-stream`;
this.logger.log(`Stream URL: ${streamUrl}`);
this.logger.log(`Dialing ${connectedUsers.length} client(s)...`);
this.logger.log(`Client IDs to dial: ${connectedUsers.join(', ')}`);
// Verify we have client IDs in proper format
if (connectedUsers.length > 0) {
this.logger.log(`First Client ID format check: "${connectedUsers[0]}" (length: ${connectedUsers[0].length})`);
}
// Notify connected users about incoming call via Socket.IO
connectedUsers.forEach(userId => {
this.voiceGateway.notifyIncomingCall(userId, {
callSid,
fromNumber,
toNumber,
tenantDomain,
});
});
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
<Response>
<Say>Connecting your call</Say>
<Dial timeout="30" action="${host}/api/voice/webhook/dial-status">
<Dial timeout="30">
${clientElements}
<OnAnswer>
<Connect>
<Stream url="${streamUrl}">
<Parameter name="tenantId" value="${tenantDomain}"/>
<Parameter name="userId" value="${connectedUsers[0]}"/>
</Stream>
</Connect>
</OnAnswer>
</Dial>
</Response>`;
this.logger.log(`Returning inbound TwiML - dialing ${connectedUsers.length} client(s)`);
this.logger.log(`Returning inbound TwiML with Media Streams - dialing ${connectedUsers.length} client(s)`);
this.logger.log(`Generated TwiML:\n${twiml}\n`);
res.type('text/xml').send(twiml);
} catch (error: any) {
this.logger.error(`Error generating inbound TwiML: ${error.message}`);
@@ -240,4 +282,216 @@ ${clientElements}
return { success: true };
}
/**
* Twilio Media Streams WebSocket endpoint
* Receives real-time audio from Twilio and forwards to OpenAI Realtime API
*
* This handles the HTTP GET request and upgrades it to WebSocket manually.
*/
@Get('media-stream')
mediaStream(@Req() req: FastifyRequest) {
// For WebSocket upgrade, we need to access the raw socket
let socket: any;
try {
this.logger.log(`=== MEDIA STREAM REQUEST ===`);
this.logger.log(`URL: ${req.url}`);
this.logger.log(`Headers keys: ${Object.keys(req.headers).join(', ')}`);
this.logger.log(`Headers: ${JSON.stringify(req.headers)}`);
// Check if this is a WebSocket upgrade request
const hasWebSocketKey = 'sec-websocket-key' in req.headers;
const hasWebSocketVersion = 'sec-websocket-version' in req.headers;
this.logger.log(`hasWebSocketKey: ${hasWebSocketKey}`);
this.logger.log(`hasWebSocketVersion: ${hasWebSocketVersion}`);
if (!hasWebSocketKey || !hasWebSocketVersion) {
this.logger.log('Not a WebSocket upgrade request - returning');
return;
}
this.logger.log('✓ WebSocket upgrade detected');
// Get the socket - try different ways
socket = (req.raw as any).socket;
this.logger.log(`Socket obtained: ${!!socket}`);
if (!socket) {
this.logger.error('Failed to get socket from req.raw');
return;
}
const rawRequest = req.raw;
const head = Buffer.alloc(0);
this.logger.log('Creating WebSocketServer...');
const WebSocketServer = require('ws').Server;
const wss = new WebSocketServer({ noServer: true });
this.logger.log('Calling handleUpgrade...');
// handleUpgrade will send the 101 response and take over the socket
wss.handleUpgrade(rawRequest, socket, head, (ws: any) => {
this.logger.log('=== TWILIO MEDIA STREAM WEBSOCKET UPGRADED SUCCESSFULLY ===');
this.handleMediaStreamSocket(ws);
});
this.logger.log('handleUpgrade completed');
} catch (error: any) {
this.logger.error(`=== FAILED TO UPGRADE TO WEBSOCKET ===`);
this.logger.error(`Error message: ${error.message}`);
this.logger.error(`Error stack: ${error.stack}`);
}
}
/**
* Handle incoming Media Stream WebSocket messages
*/
private handleMediaStreamSocket(ws: any) {
let streamSid: string | null = null;
let callSid: string | null = null;
let tenantDomain: string | null = null;
let mediaPacketCount = 0;
// WebSocket message handler
ws.on('message', async (message: Buffer) => {
try {
const msg = JSON.parse(message.toString());
switch (msg.event) {
case 'connected':
this.logger.log('=== MEDIA STREAM EVENT: CONNECTED ===');
this.logger.log(`Protocol: ${msg.protocol}`);
this.logger.log(`Version: ${msg.version}`);
break;
case 'start':
streamSid = msg.streamSid;
callSid = msg.start.callSid;
// Extract tenant from customParameters if available
tenantDomain = msg.start.customParameters?.tenantId || 'tenant1';
this.logger.log(`=== MEDIA STREAM EVENT: START ===`);
this.logger.log(`StreamSid: ${streamSid}`);
this.logger.log(`CallSid: ${callSid}`);
this.logger.log(`Tenant: ${tenantDomain}`);
this.logger.log(`AccountSid: ${msg.start.accountSid}`);
this.logger.log(`MediaFormat: ${JSON.stringify(msg.start.mediaFormat)}`);
this.logger.log(`Custom Parameters: ${JSON.stringify(msg.start.customParameters)}`);
// Store WebSocket connection
this.mediaStreams.set(streamSid, ws);
this.logger.log(`Stored WebSocket for streamSid: ${streamSid}. Total active streams: ${this.mediaStreams.size}`);
// Initialize OpenAI Realtime connection for this call
this.logger.log(`Initializing OpenAI Realtime for call ${callSid}...`);
await this.voiceService.initializeOpenAIRealtime({
callSid,
tenantId: tenantDomain,
userId: msg.start.customParameters?.userId || 'system',
});
this.logger.log(`✓ OpenAI Realtime initialized for call ${callSid}`);
break;
case 'media':
mediaPacketCount++;
if (mediaPacketCount % 50 === 0) {
// Log every 50th packet to avoid spam
this.logger.log(`Received media packet #${mediaPacketCount} for StreamSid: ${streamSid}, CallSid: ${callSid}, PayloadSize: ${msg.media.payload?.length || 0} bytes`);
}
if (!callSid || !tenantDomain) {
this.logger.warn('Received media before start event');
break;
}
// msg.media.payload is base64-encoded μ-law audio from Twilio
const twilioAudio = msg.media.payload;
// Convert Twilio audio (μ-law 8kHz) to OpenAI format (PCM16 24kHz)
const openaiAudio = this.audioConverter.twilioToOpenAI(twilioAudio);
// Send audio to OpenAI Realtime API
await this.voiceService.sendAudioToOpenAI(callSid, openaiAudio);
break;
case 'stop':
this.logger.log(`=== MEDIA STREAM EVENT: STOP ===`);
this.logger.log(`StreamSid: ${streamSid}`);
this.logger.log(`Total media packets received: ${mediaPacketCount}`);
if (streamSid) {
this.mediaStreams.delete(streamSid);
this.logger.log(`Removed WebSocket for streamSid: ${streamSid}. Remaining active streams: ${this.mediaStreams.size}`);
}
// Clean up OpenAI connection
if (callSid) {
this.logger.log(`Cleaning up OpenAI connection for call ${callSid}...`);
await this.voiceService.cleanupOpenAIConnection(callSid);
this.logger.log(`✓ OpenAI connection cleaned up for call ${callSid}`);
}
break;
default:
this.logger.debug(`Unknown media stream event: ${msg.event}`);
}
} catch (error: any) {
this.logger.error(`Error processing media stream message: ${error.message}`);
this.logger.error(`Stack: ${error.stack}`);
}
});
ws.on('close', () => {
this.logger.log(`=== MEDIA STREAM WEBSOCKET CLOSED ===`);
this.logger.log(`StreamSid: ${streamSid}`);
this.logger.log(`Total media packets in this stream: ${mediaPacketCount}`);
if (streamSid) {
this.mediaStreams.delete(streamSid);
this.logger.log(`Cleaned up streamSid on close. Remaining active streams: ${this.mediaStreams.size}`);
}
});
ws.on('error', (error: Error) => {
this.logger.error(`=== MEDIA STREAM WEBSOCKET ERROR ===`);
this.logger.error(`StreamSid: ${streamSid}`);
this.logger.error(`Error message: ${error.message}`);
this.logger.error(`Error stack: ${error.stack}`);
});
}
/**
* Send audio from OpenAI back to Twilio Media Stream
*/
async sendAudioToTwilio(streamSid: string, openaiAudioBase64: string) {
const ws = this.mediaStreams.get(streamSid);
if (!ws) {
this.logger.warn(`No Media Stream found for streamSid: ${streamSid}`);
return;
}
try {
// Convert OpenAI audio (PCM16 24kHz) to Twilio format (μ-law 8kHz)
const twilioAudio = this.audioConverter.openAIToTwilio(openaiAudioBase64);
// Send to Twilio Media Stream
const message = {
event: 'media',
streamSid,
media: {
payload: twilioAudio,
},
};
ws.send(JSON.stringify(message));
} catch (error: any) {
this.logger.error(`Error sending audio to Twilio: ${error.message}`);
}
}
}