WIP - call connecting to softphone

This commit is contained in:
Francisco Gaona
2026-01-04 04:29:25 +01:00
parent 71cd6a07b7
commit 03cbb5eb1f
11 changed files with 1190 additions and 104 deletions

View File

@@ -3,13 +3,15 @@ import {
FastifyAdapter,
NestFastifyApplication,
} from '@nestjs/platform-fastify';
import { ValidationPipe } from '@nestjs/common';
import { ValidationPipe, Logger } from '@nestjs/common';
import { AppModule } from './app.module';
import { VoiceService } from './voice/voice.service';
import { AudioConverterService } from './voice/audio-converter.service';
async function bootstrap() {
const app = await NestFactory.create<NestFastifyApplication>(
AppModule,
new FastifyAdapter({ logger: false }),
new FastifyAdapter({ logger: true }),
);
// Global validation pipe
@@ -33,6 +35,144 @@ async function bootstrap() {
const port = process.env.PORT || 3000;
await app.listen(port, '0.0.0.0');
// After app is listening, register WebSocket handler
const fastifyInstance = app.getHttpAdapter().getInstance();
const logger = new Logger('MediaStreamWS');
const voiceService = app.get(VoiceService);
const audioConverter = app.get(AudioConverterService);
const WebSocketServer = require('ws').Server;
const wss = new WebSocketServer({ noServer: true });
// Handle WebSocket upgrades at the server level
const server = (fastifyInstance.server as any);
// Track active Media Streams connections: streamSid -> WebSocket
const mediaStreams: Map<string, any> = new Map();
server.on('upgrade', (request: any, socket: any, head: any) => {
if (request.url === '/api/voice/media-stream') {
logger.log('=== MEDIA STREAM WEBSOCKET UPGRADE REQUEST ===');
logger.log(`Path: ${request.url}`);
wss.handleUpgrade(request, socket, head, (ws: any) => {
logger.log('=== MEDIA STREAM WEBSOCKET UPGRADED SUCCESSFULLY ===');
handleMediaStreamSocket(ws);
});
}
});
async function handleMediaStreamSocket(ws: any) {
let streamSid: string | null = null;
let callSid: string | null = null;
let tenantDomain: string | null = null;
let mediaPacketCount = 0;
ws.on('message', async (message: Buffer) => {
try {
const msg = JSON.parse(message.toString());
switch (msg.event) {
case 'connected':
logger.log('=== MEDIA STREAM EVENT: CONNECTED ===');
logger.log(`Protocol: ${msg.protocol}`);
logger.log(`Version: ${msg.version}`);
break;
case 'start':
streamSid = msg.streamSid;
callSid = msg.start.callSid;
tenantDomain = msg.start.customParameters?.tenantId || 'tenant1';
logger.log(`=== MEDIA STREAM EVENT: START ===`);
logger.log(`StreamSid: ${streamSid}`);
logger.log(`CallSid: ${callSid}`);
logger.log(`Tenant: ${tenantDomain}`);
logger.log(`MediaFormat: ${JSON.stringify(msg.start.mediaFormat)}`);
mediaStreams.set(streamSid, ws);
logger.log(`Stored WebSocket for streamSid: ${streamSid}. Total active streams: ${mediaStreams.size}`);
// Initialize OpenAI Realtime connection
logger.log(`Initializing OpenAI Realtime for call ${callSid}...`);
try {
await voiceService.initializeOpenAIRealtime({
callSid,
tenantId: tenantDomain,
userId: msg.start.customParameters?.userId || 'system',
});
logger.log(`✓ OpenAI Realtime initialized for call ${callSid}`);
} catch (error: any) {
logger.error(`Failed to initialize OpenAI: ${error.message}`);
}
break;
case 'media':
mediaPacketCount++;
if (mediaPacketCount % 50 === 0) {
logger.log(`Received media packet #${mediaPacketCount} for StreamSid: ${streamSid}`);
}
if (!callSid || !tenantDomain) {
logger.warn('Received media before start event');
break;
}
try {
// Convert Twilio audio (μ-law 8kHz) to OpenAI format (PCM16 24kHz)
const twilioAudio = msg.media.payload;
const openaiAudio = audioConverter.twilioToOpenAI(twilioAudio);
// Send audio to OpenAI Realtime API
await voiceService.sendAudioToOpenAI(callSid, openaiAudio);
} catch (error: any) {
logger.error(`Error processing media: ${error.message}`);
}
break;
case 'stop':
logger.log(`=== MEDIA STREAM EVENT: STOP ===`);
logger.log(`StreamSid: ${streamSid}`);
logger.log(`Total media packets received: ${mediaPacketCount}`);
if (streamSid) {
mediaStreams.delete(streamSid);
logger.log(`Removed WebSocket for streamSid: ${streamSid}`);
}
// Clean up OpenAI connection
if (callSid) {
try {
logger.log(`Cleaning up OpenAI connection for call ${callSid}...`);
await voiceService.cleanupOpenAIConnection(callSid);
logger.log(`✓ OpenAI connection cleaned up`);
} catch (error: any) {
logger.error(`Failed to cleanup OpenAI: ${error.message}`);
}
}
break;
default:
logger.debug(`Unknown media stream event: ${msg.event}`);
}
} catch (error: any) {
logger.error(`Error processing media stream message: ${error.message}`);
}
});
ws.on('close', () => {
logger.log(`=== MEDIA STREAM WEBSOCKET CLOSED ===`);
if (streamSid) {
mediaStreams.delete(streamSid);
}
});
ws.on('error', (error: Error) => {
logger.error(`=== MEDIA STREAM WEBSOCKET ERROR ===`);
logger.error(`Error message: ${error.message}`);
});
}
console.log(`🚀 Application is running on: http://localhost:${port}/api`);
}

View File

@@ -0,0 +1,214 @@
import { Injectable, Logger } from '@nestjs/common';
/**
* Audio format converter for Twilio <-> OpenAI audio streaming
*
* Twilio Media Streams format:
* - Codec: μ-law (G.711)
* - Sample rate: 8kHz
* - Encoding: base64
* - Chunk size: 20ms (160 bytes)
*
* OpenAI Realtime API format:
* - Codec: PCM16
* - Sample rate: 24kHz
* - Encoding: base64
* - Mono channel
*/
@Injectable()
export class AudioConverterService {
private readonly logger = new Logger(AudioConverterService.name);
// μ-law decode lookup table
private readonly MULAW_DECODE_TABLE = this.buildMuLawDecodeTable();
// μ-law encode lookup table
private readonly MULAW_ENCODE_TABLE = this.buildMuLawEncodeTable();
/**
* Build μ-law to linear PCM16 decode table
*/
private buildMuLawDecodeTable(): Int16Array {
const table = new Int16Array(256);
for (let i = 0; i < 256; i++) {
const mulaw = ~i;
const exponent = (mulaw >> 4) & 0x07;
const mantissa = mulaw & 0x0f;
let sample = (mantissa << 3) + 0x84;
sample <<= exponent;
sample -= 0x84;
if ((mulaw & 0x80) === 0) {
sample = -sample;
}
table[i] = sample;
}
return table;
}
/**
* Build linear PCM16 to μ-law encode table
*/
private buildMuLawEncodeTable(): Uint8Array {
const table = new Uint8Array(65536);
for (let i = 0; i < 65536; i++) {
const sample = (i - 32768);
const sign = sample < 0 ? 0x80 : 0x00;
const magnitude = Math.abs(sample);
// Add bias
let biased = magnitude + 0x84;
// Find exponent
let exponent = 7;
for (let exp = 0; exp < 8; exp++) {
if (biased <= (0xff << exp)) {
exponent = exp;
break;
}
}
// Extract mantissa
const mantissa = (biased >> (exponent + 3)) & 0x0f;
// Combine sign, exponent, mantissa
const mulaw = ~(sign | (exponent << 4) | mantissa);
table[i] = mulaw & 0xff;
}
return table;
}
/**
* Decode μ-law audio to linear PCM16
* @param mulawData - Buffer containing μ-law encoded audio
* @returns Buffer containing PCM16 audio (16-bit little-endian)
*/
decodeMuLaw(mulawData: Buffer): Buffer {
const pcm16 = Buffer.allocUnsafe(mulawData.length * 2);
for (let i = 0; i < mulawData.length; i++) {
const sample = this.MULAW_DECODE_TABLE[mulawData[i]];
pcm16.writeInt16LE(sample, i * 2);
}
return pcm16;
}
/**
* Encode linear PCM16 to μ-law
* @param pcm16Data - Buffer containing PCM16 audio (16-bit little-endian)
* @returns Buffer containing μ-law encoded audio
*/
encodeMuLaw(pcm16Data: Buffer): Buffer {
const mulaw = Buffer.allocUnsafe(pcm16Data.length / 2);
for (let i = 0; i < pcm16Data.length; i += 2) {
const sample = pcm16Data.readInt16LE(i);
const index = (sample + 32768) & 0xffff;
mulaw[i / 2] = this.MULAW_ENCODE_TABLE[index];
}
return mulaw;
}
/**
* Resample audio from 8kHz to 24kHz (linear interpolation)
* @param pcm16Data - Buffer containing 8kHz PCM16 audio
* @returns Buffer containing 24kHz PCM16 audio
*/
resample8kTo24k(pcm16Data: Buffer): Buffer {
const inputSamples = pcm16Data.length / 2;
const outputSamples = Math.floor(inputSamples * 3); // 8k * 3 = 24k
const output = Buffer.allocUnsafe(outputSamples * 2);
for (let i = 0; i < outputSamples; i++) {
const srcIndex = i / 3;
const srcIndexFloor = Math.floor(srcIndex);
const srcIndexCeil = Math.min(srcIndexFloor + 1, inputSamples - 1);
const fraction = srcIndex - srcIndexFloor;
const sample1 = pcm16Data.readInt16LE(srcIndexFloor * 2);
const sample2 = pcm16Data.readInt16LE(srcIndexCeil * 2);
// Linear interpolation
const interpolated = Math.round(sample1 + (sample2 - sample1) * fraction);
output.writeInt16LE(interpolated, i * 2);
}
return output;
}
/**
* Resample audio from 24kHz to 8kHz (decimation with averaging)
* @param pcm16Data - Buffer containing 24kHz PCM16 audio
* @returns Buffer containing 8kHz PCM16 audio
*/
resample24kTo8k(pcm16Data: Buffer): Buffer {
const inputSamples = pcm16Data.length / 2;
const outputSamples = Math.floor(inputSamples / 3); // 24k / 3 = 8k
const output = Buffer.allocUnsafe(outputSamples * 2);
for (let i = 0; i < outputSamples; i++) {
// Average 3 samples for anti-aliasing
const idx1 = Math.min(i * 3, inputSamples - 1);
const idx2 = Math.min(i * 3 + 1, inputSamples - 1);
const idx3 = Math.min(i * 3 + 2, inputSamples - 1);
const sample1 = pcm16Data.readInt16LE(idx1 * 2);
const sample2 = pcm16Data.readInt16LE(idx2 * 2);
const sample3 = pcm16Data.readInt16LE(idx3 * 2);
const averaged = Math.round((sample1 + sample2 + sample3) / 3);
output.writeInt16LE(averaged, i * 2);
}
return output;
}
/**
* Convert Twilio μ-law 8kHz to OpenAI PCM16 24kHz
* @param twilioBase64 - Base64-encoded μ-law audio from Twilio
* @returns Base64-encoded PCM16 24kHz audio for OpenAI
*/
twilioToOpenAI(twilioBase64: string): string {
try {
// Decode base64
const mulawBuffer = Buffer.from(twilioBase64, 'base64');
// μ-law -> PCM16
const pcm16_8k = this.decodeMuLaw(mulawBuffer);
// 8kHz -> 24kHz
const pcm16_24k = this.resample8kTo24k(pcm16_8k);
// Encode to base64
return pcm16_24k.toString('base64');
} catch (error) {
this.logger.error('Error converting Twilio to OpenAI audio', error);
throw error;
}
}
/**
* Convert OpenAI PCM16 24kHz to Twilio μ-law 8kHz
* @param openaiBase64 - Base64-encoded PCM16 24kHz audio from OpenAI
* @returns Base64-encoded μ-law 8kHz audio for Twilio
*/
openAIToTwilio(openaiBase64: string): string {
try {
// Decode base64
const pcm16_24k = Buffer.from(openaiBase64, 'base64');
// 24kHz -> 8kHz
const pcm16_8k = this.resample24kTo8k(pcm16_24k);
// PCM16 -> μ-law
const mulawBuffer = this.encodeMuLaw(pcm16_8k);
// Encode to base64
return mulawBuffer.toString('base64');
} catch (error) {
this.logger.error('Error converting OpenAI to Twilio audio', error);
throw error;
}
}
}

View File

@@ -13,6 +13,7 @@ import { FastifyRequest, FastifyReply } from 'fastify';
import { JwtAuthGuard } from '../auth/jwt-auth.guard';
import { VoiceService } from './voice.service';
import { VoiceGateway } from './voice.gateway';
import { AudioConverterService } from './audio-converter.service';
import { InitiateCallDto } from './dto/initiate-call.dto';
import { TenantId } from '../tenant/tenant.decorator';
@@ -20,9 +21,13 @@ import { TenantId } from '../tenant/tenant.decorator';
export class VoiceController {
private readonly logger = new Logger(VoiceController.name);
// Track active Media Streams connections: streamSid -> WebSocket
private mediaStreams: Map<string, any> = new Map();
constructor(
private readonly voiceService: VoiceService,
private readonly voiceGateway: VoiceGateway,
private readonly audioConverter: AudioConverterService,
) {}
/**
@@ -159,8 +164,12 @@ export class VoiceController {
const fromNumber = body.From;
const toNumber = body.To;
this.logger.log(`=== INBOUND CALL RECEIVED ===`);
this.logger.log(`CallSid: ${callSid}, From: ${fromNumber}, To: ${toNumber}`);
this.logger.log(`\n\n╔════════════════════════════════════════╗`);
this.logger.log(`║ === INBOUND CALL RECEIVED ===`);
this.logger.log(`╚════════════════════════════════════════╝`);
this.logger.log(`CallSid: ${callSid}`);
this.logger.log(`From: ${fromNumber}`);
this.logger.log(`To: ${toNumber}`);
this.logger.log(`Full body: ${JSON.stringify(body)}`);
try {
@@ -174,6 +183,9 @@ export class VoiceController {
const connectedUsers = this.voiceGateway.getConnectedUsers(tenantDomain);
this.logger.log(`Connected users for tenant ${tenantDomain}: ${connectedUsers.length}`);
if (connectedUsers.length > 0) {
this.logger.log(`Connected user IDs: ${connectedUsers.join(', ')}`);
}
if (connectedUsers.length === 0) {
// No users online - send to voicemail or play message
@@ -182,22 +194,52 @@ export class VoiceController {
<Say>Sorry, no agents are currently available. Please try again later.</Say>
<Hangup/>
</Response>`;
this.logger.log(`No users online - returning unavailable message`);
this.logger.log(`No users online - returning unavailable message`);
return res.type('text/xml').send(twiml);
}
// Build TwiML to dial all connected clients (first to answer gets the call)
// Build TwiML to dial all connected clients with Media Streams for AI
const clientElements = connectedUsers.map(userId => ` <Client>${userId}</Client>`).join('\n');
// Use wss:// for secure WebSocket (Traefik handles HTTPS)
const streamUrl = `wss://${host}/api/voice/media-stream`;
this.logger.log(`Stream URL: ${streamUrl}`);
this.logger.log(`Dialing ${connectedUsers.length} client(s)...`);
this.logger.log(`Client IDs to dial: ${connectedUsers.join(', ')}`);
// Verify we have client IDs in proper format
if (connectedUsers.length > 0) {
this.logger.log(`First Client ID format check: "${connectedUsers[0]}" (length: ${connectedUsers[0].length})`);
}
// Notify connected users about incoming call via Socket.IO
connectedUsers.forEach(userId => {
this.voiceGateway.notifyIncomingCall(userId, {
callSid,
fromNumber,
toNumber,
tenantDomain,
});
});
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
<Response>
<Say>Connecting your call</Say>
<Dial timeout="30" action="${host}/api/voice/webhook/dial-status">
<Dial timeout="30">
${clientElements}
<OnAnswer>
<Connect>
<Stream url="${streamUrl}">
<Parameter name="tenantId" value="${tenantDomain}"/>
<Parameter name="userId" value="${connectedUsers[0]}"/>
</Stream>
</Connect>
</OnAnswer>
</Dial>
</Response>`;
this.logger.log(`Returning inbound TwiML - dialing ${connectedUsers.length} client(s)`);
this.logger.log(`Returning inbound TwiML with Media Streams - dialing ${connectedUsers.length} client(s)`);
this.logger.log(`Generated TwiML:\n${twiml}\n`);
res.type('text/xml').send(twiml);
} catch (error: any) {
this.logger.error(`Error generating inbound TwiML: ${error.message}`);
@@ -240,4 +282,216 @@ ${clientElements}
return { success: true };
}
/**
* Twilio Media Streams WebSocket endpoint
* Receives real-time audio from Twilio and forwards to OpenAI Realtime API
*
* This handles the HTTP GET request and upgrades it to WebSocket manually.
*/
@Get('media-stream')
mediaStream(@Req() req: FastifyRequest) {
// For WebSocket upgrade, we need to access the raw socket
let socket: any;
try {
this.logger.log(`=== MEDIA STREAM REQUEST ===`);
this.logger.log(`URL: ${req.url}`);
this.logger.log(`Headers keys: ${Object.keys(req.headers).join(', ')}`);
this.logger.log(`Headers: ${JSON.stringify(req.headers)}`);
// Check if this is a WebSocket upgrade request
const hasWebSocketKey = 'sec-websocket-key' in req.headers;
const hasWebSocketVersion = 'sec-websocket-version' in req.headers;
this.logger.log(`hasWebSocketKey: ${hasWebSocketKey}`);
this.logger.log(`hasWebSocketVersion: ${hasWebSocketVersion}`);
if (!hasWebSocketKey || !hasWebSocketVersion) {
this.logger.log('Not a WebSocket upgrade request - returning');
return;
}
this.logger.log('✓ WebSocket upgrade detected');
// Get the socket - try different ways
socket = (req.raw as any).socket;
this.logger.log(`Socket obtained: ${!!socket}`);
if (!socket) {
this.logger.error('Failed to get socket from req.raw');
return;
}
const rawRequest = req.raw;
const head = Buffer.alloc(0);
this.logger.log('Creating WebSocketServer...');
const WebSocketServer = require('ws').Server;
const wss = new WebSocketServer({ noServer: true });
this.logger.log('Calling handleUpgrade...');
// handleUpgrade will send the 101 response and take over the socket
wss.handleUpgrade(rawRequest, socket, head, (ws: any) => {
this.logger.log('=== TWILIO MEDIA STREAM WEBSOCKET UPGRADED SUCCESSFULLY ===');
this.handleMediaStreamSocket(ws);
});
this.logger.log('handleUpgrade completed');
} catch (error: any) {
this.logger.error(`=== FAILED TO UPGRADE TO WEBSOCKET ===`);
this.logger.error(`Error message: ${error.message}`);
this.logger.error(`Error stack: ${error.stack}`);
}
}
/**
* Handle incoming Media Stream WebSocket messages
*/
private handleMediaStreamSocket(ws: any) {
let streamSid: string | null = null;
let callSid: string | null = null;
let tenantDomain: string | null = null;
let mediaPacketCount = 0;
// WebSocket message handler
ws.on('message', async (message: Buffer) => {
try {
const msg = JSON.parse(message.toString());
switch (msg.event) {
case 'connected':
this.logger.log('=== MEDIA STREAM EVENT: CONNECTED ===');
this.logger.log(`Protocol: ${msg.protocol}`);
this.logger.log(`Version: ${msg.version}`);
break;
case 'start':
streamSid = msg.streamSid;
callSid = msg.start.callSid;
// Extract tenant from customParameters if available
tenantDomain = msg.start.customParameters?.tenantId || 'tenant1';
this.logger.log(`=== MEDIA STREAM EVENT: START ===`);
this.logger.log(`StreamSid: ${streamSid}`);
this.logger.log(`CallSid: ${callSid}`);
this.logger.log(`Tenant: ${tenantDomain}`);
this.logger.log(`AccountSid: ${msg.start.accountSid}`);
this.logger.log(`MediaFormat: ${JSON.stringify(msg.start.mediaFormat)}`);
this.logger.log(`Custom Parameters: ${JSON.stringify(msg.start.customParameters)}`);
// Store WebSocket connection
this.mediaStreams.set(streamSid, ws);
this.logger.log(`Stored WebSocket for streamSid: ${streamSid}. Total active streams: ${this.mediaStreams.size}`);
// Initialize OpenAI Realtime connection for this call
this.logger.log(`Initializing OpenAI Realtime for call ${callSid}...`);
await this.voiceService.initializeOpenAIRealtime({
callSid,
tenantId: tenantDomain,
userId: msg.start.customParameters?.userId || 'system',
});
this.logger.log(`✓ OpenAI Realtime initialized for call ${callSid}`);
break;
case 'media':
mediaPacketCount++;
if (mediaPacketCount % 50 === 0) {
// Log every 50th packet to avoid spam
this.logger.log(`Received media packet #${mediaPacketCount} for StreamSid: ${streamSid}, CallSid: ${callSid}, PayloadSize: ${msg.media.payload?.length || 0} bytes`);
}
if (!callSid || !tenantDomain) {
this.logger.warn('Received media before start event');
break;
}
// msg.media.payload is base64-encoded μ-law audio from Twilio
const twilioAudio = msg.media.payload;
// Convert Twilio audio (μ-law 8kHz) to OpenAI format (PCM16 24kHz)
const openaiAudio = this.audioConverter.twilioToOpenAI(twilioAudio);
// Send audio to OpenAI Realtime API
await this.voiceService.sendAudioToOpenAI(callSid, openaiAudio);
break;
case 'stop':
this.logger.log(`=== MEDIA STREAM EVENT: STOP ===`);
this.logger.log(`StreamSid: ${streamSid}`);
this.logger.log(`Total media packets received: ${mediaPacketCount}`);
if (streamSid) {
this.mediaStreams.delete(streamSid);
this.logger.log(`Removed WebSocket for streamSid: ${streamSid}. Remaining active streams: ${this.mediaStreams.size}`);
}
// Clean up OpenAI connection
if (callSid) {
this.logger.log(`Cleaning up OpenAI connection for call ${callSid}...`);
await this.voiceService.cleanupOpenAIConnection(callSid);
this.logger.log(`✓ OpenAI connection cleaned up for call ${callSid}`);
}
break;
default:
this.logger.debug(`Unknown media stream event: ${msg.event}`);
}
} catch (error: any) {
this.logger.error(`Error processing media stream message: ${error.message}`);
this.logger.error(`Stack: ${error.stack}`);
}
});
ws.on('close', () => {
this.logger.log(`=== MEDIA STREAM WEBSOCKET CLOSED ===`);
this.logger.log(`StreamSid: ${streamSid}`);
this.logger.log(`Total media packets in this stream: ${mediaPacketCount}`);
if (streamSid) {
this.mediaStreams.delete(streamSid);
this.logger.log(`Cleaned up streamSid on close. Remaining active streams: ${this.mediaStreams.size}`);
}
});
ws.on('error', (error: Error) => {
this.logger.error(`=== MEDIA STREAM WEBSOCKET ERROR ===`);
this.logger.error(`StreamSid: ${streamSid}`);
this.logger.error(`Error message: ${error.message}`);
this.logger.error(`Error stack: ${error.stack}`);
});
}
/**
* Send audio from OpenAI back to Twilio Media Stream
*/
async sendAudioToTwilio(streamSid: string, openaiAudioBase64: string) {
const ws = this.mediaStreams.get(streamSid);
if (!ws) {
this.logger.warn(`No Media Stream found for streamSid: ${streamSid}`);
return;
}
try {
// Convert OpenAI audio (PCM16 24kHz) to Twilio format (μ-law 8kHz)
const twilioAudio = this.audioConverter.openAIToTwilio(openaiAudioBase64);
// Send to Twilio Media Stream
const message = {
event: 'media',
streamSid,
media: {
payload: twilioAudio,
},
};
ws.send(JSON.stringify(message));
} catch (error: any) {
this.logger.error(`Error sending audio to Twilio: ${error.message}`);
}
}
}

View File

@@ -40,7 +40,10 @@ export class VoiceGateway
private readonly jwtService: JwtService,
private readonly voiceService: VoiceService,
private readonly tenantDbService: TenantDatabaseService,
) {}
) {
// Set gateway reference in service to avoid circular dependency
this.voiceService.setGateway(this);
}
async handleConnection(client: AuthenticatedSocket) {
try {
@@ -49,7 +52,7 @@ export class VoiceGateway
client.handshake.auth.token || client.handshake.headers.authorization?.split(' ')[1];
if (!token) {
this.logger.warn('Client connection rejected: No token provided');
this.logger.warn('Client connection rejected: No token provided');
client.disconnect();
return;
}
@@ -82,8 +85,9 @@ export class VoiceGateway
this.connectedUsers.set(client.userId, client);
this.logger.log(
`Client connected: ${client.id} (User: ${client.userId}, Domain: ${domain})`,
`Client connected: ${client.id} (User: ${client.userId}, Domain: ${domain})`,
);
this.logger.log(`Total connected users in ${domain}: ${this.getConnectedUsers(domain).length}`);
// Send current call state if any active call
const activeCallSid = this.activeCallsByUser.get(client.userId);
@@ -95,7 +99,7 @@ export class VoiceGateway
client.emit('call:state', callState);
}
} catch (error) {
this.logger.error('Authentication failed', error);
this.logger.error('Authentication failed', error);
client.disconnect();
}
}
@@ -103,7 +107,8 @@ export class VoiceGateway
handleDisconnect(client: AuthenticatedSocket) {
if (client.userId) {
this.connectedUsers.delete(client.userId);
this.logger.log(`Client disconnected: ${client.id} (User: ${client.userId})`);
this.logger.log(`Client disconnected: ${client.id} (User: ${client.userId})`);
this.logger.log(`Remaining connected users: ${this.connectedUsers.size}`);
}
}

View File

@@ -3,6 +3,7 @@ import { JwtModule } from '@nestjs/jwt';
import { VoiceGateway } from './voice.gateway';
import { VoiceService } from './voice.service';
import { VoiceController } from './voice.controller';
import { AudioConverterService } from './audio-converter.service';
import { TenantModule } from '../tenant/tenant.module';
import { AuthModule } from '../auth/auth.module';
@@ -15,7 +16,7 @@ import { AuthModule } from '../auth/auth.module';
signOptions: { expiresIn: process.env.JWT_EXPIRES_IN || '24h' },
}),
],
providers: [VoiceGateway, VoiceService],
providers: [VoiceGateway, VoiceService, AudioConverterService],
controllers: [VoiceController],
exports: [VoiceService],
})

View File

@@ -15,11 +15,19 @@ export class VoiceService {
private twilioClients: Map<string, Twilio.Twilio> = new Map();
private openaiConnections: Map<string, WebSocket> = new Map(); // callSid -> WebSocket
private callStates: Map<string, any> = new Map(); // callSid -> call state
private voiceGateway: any; // Reference to gateway (to avoid circular dependency)
constructor(
private readonly tenantDbService: TenantDatabaseService,
) {}
/**
* Set gateway reference (called by gateway on init)
*/
setGateway(gateway: any) {
this.voiceGateway = gateway;
}
/**
* Get Twilio client for a tenant
*/
@@ -487,6 +495,64 @@ export class VoiceService {
}
}
/**
* Send audio data to OpenAI Realtime API
*/
async sendAudioToOpenAI(callSid: string, audioBase64: string) {
const ws = this.openaiConnections.get(callSid);
if (!ws) {
this.logger.warn(`No OpenAI connection for call ${callSid}`);
return;
}
try {
// Send audio chunk to OpenAI
ws.send(JSON.stringify({
type: 'input_audio_buffer.append',
audio: audioBase64,
}));
} catch (error) {
this.logger.error(`Failed to send audio to OpenAI for call ${callSid}`, error);
}
}
/**
* Commit audio buffer to OpenAI (trigger processing)
*/
async commitAudioBuffer(callSid: string) {
const ws = this.openaiConnections.get(callSid);
if (!ws) {
return;
}
try {
ws.send(JSON.stringify({
type: 'input_audio_buffer.commit',
}));
} catch (error) {
this.logger.error(`Failed to commit audio buffer for call ${callSid}`, error);
}
}
/**
* Clean up OpenAI connection for a call
*/
async cleanupOpenAIConnection(callSid: string) {
const ws = this.openaiConnections.get(callSid);
if (ws) {
try {
ws.close();
this.openaiConnections.delete(callSid);
this.logger.log(`Cleaned up OpenAI connection for call ${callSid}`);
} catch (error) {
this.logger.error(`Error cleaning up OpenAI connection for call ${callSid}`, error);
}
}
}
/**
* Handle OpenAI Realtime messages
*/
@@ -505,9 +571,40 @@ export class VoiceService {
}
break;
case 'response.audio.delta':
// OpenAI is sending audio response
// This needs to be sent to Twilio Media Stream
// Note: We'll need to get the streamSid from the call state
const state = this.callStates.get(callSid);
if (state?.streamSid && message.delta) {
// The controller will handle sending to Twilio
// Store audio delta for controller to pick up
if (!state.pendingAudio) {
state.pendingAudio = [];
}
state.pendingAudio.push(message.delta);
}
break;
case 'response.audio.done':
// Audio response complete
this.logger.log(`OpenAI audio response complete for call ${callSid}`);
break;
case 'response.audio_transcript.delta':
// Real-time transcript
// TODO: Emit to gateway
// Real-time transcript chunk
const deltaState = this.callStates.get(callSid);
if (deltaState?.userId && message.delta) {
// Emit to frontend via gateway
if (this.voiceGateway) {
await this.voiceGateway.notifyAiTranscript(deltaState.userId, {
callSid,
transcript: message.delta,
isFinal: false,
});
}
this.logger.debug(`Transcript delta for call ${callSid}: ${message.delta}`);
}
break;
case 'response.audio_transcript.done':