Add twilio softphone with integrated AI assistant

This commit is contained in:
Francisco Gaona
2026-01-04 08:48:43 +01:00
parent 16907aadf8
commit 5f3fcef1ec
42 changed files with 5689 additions and 923 deletions

View File

@@ -1,13 +0,0 @@
exports.up = function (knex) {
return knex.schema
.table('record_shares', (table) => {
table.timestamp('updatedAt').defaultTo(knex.fn.now());
});
};
exports.down = function (knex) {
return knex.schema
.table('record_shares', (table) => {
table.dropColumn('updatedAt');
});
};

View File

@@ -0,0 +1,55 @@
/**
* @param { import("knex").Knex } knex
* @returns { Promise<void> }
*/
exports.up = async function (knex) {
// Create calls table for tracking voice calls
await knex.schema.createTable('calls', (table) => {
table.string('id', 36).primary();
table.string('call_sid', 100).unique().notNullable().comment('Twilio call SID');
table.enum('direction', ['inbound', 'outbound']).notNullable();
table.string('from_number', 20).notNullable();
table.string('to_number', 20).notNullable();
table.enum('status', [
'queued',
'ringing',
'in-progress',
'completed',
'busy',
'failed',
'no-answer',
'canceled'
]).notNullable().defaultTo('queued');
table.integer('duration_seconds').unsigned().nullable();
table.string('recording_url', 500).nullable();
table.text('ai_transcript').nullable().comment('Full transcript from OpenAI');
table.text('ai_summary').nullable().comment('AI-generated summary');
table.json('ai_insights').nullable().comment('Structured insights from AI');
table.string('user_id', 36).notNullable().comment('User who handled the call');
table.timestamp('started_at').nullable();
table.timestamp('ended_at').nullable();
table.timestamp('created_at').defaultTo(knex.fn.now());
table.timestamp('updated_at').defaultTo(knex.fn.now());
// Indexes
table.index('call_sid');
table.index('user_id');
table.index('status');
table.index('direction');
table.index(['created_at', 'user_id']);
// Foreign key to users table
table.foreign('user_id').references('id').inTable('users').onDelete('CASCADE');
});
console.log('✅ Created calls table');
};
/**
* @param { import("knex").Knex } knex
* @returns { Promise<void> }
*/
exports.down = async function (knex) {
await knex.schema.dropTableIfExists('calls');
console.log('✅ Dropped calls table');
};

File diff suppressed because it is too large Load Diff

View File

@@ -27,6 +27,7 @@
},
"dependencies": {
"@casl/ability": "^6.7.5",
"@fastify/websocket": "^10.0.1",
"@nestjs/bullmq": "^10.1.0",
"@nestjs/common": "^10.3.0",
"@nestjs/config": "^3.1.1",
@@ -34,6 +35,9 @@
"@nestjs/jwt": "^10.2.0",
"@nestjs/passport": "^10.0.3",
"@nestjs/platform-fastify": "^10.3.0",
"@nestjs/platform-socket.io": "^10.4.20",
"@nestjs/serve-static": "^4.0.2",
"@nestjs/websockets": "^10.4.20",
"@prisma/client": "^5.8.0",
"bcrypt": "^5.1.1",
"bullmq": "^5.1.0",
@@ -43,10 +47,14 @@
"knex": "^3.1.0",
"mysql2": "^3.15.3",
"objection": "^3.1.5",
"openai": "^6.15.0",
"passport": "^0.7.0",
"passport-jwt": "^4.0.1",
"reflect-metadata": "^0.2.1",
"rxjs": "^7.8.1"
"rxjs": "^7.8.1",
"socket.io": "^4.8.3",
"twilio": "^5.11.1",
"ws": "^8.18.3"
},
"devDependencies": {
"@nestjs/cli": "^10.3.0",

View File

@@ -0,0 +1,2 @@
-- AlterTable
ALTER TABLE `tenants` ADD COLUMN `integrationsConfig` JSON NULL;

View File

@@ -24,17 +24,18 @@ model User {
}
model Tenant {
id String @id @default(cuid())
name String
slug String @unique // Used for identification
dbHost String // Database host
dbPort Int @default(3306)
dbName String // Database name
dbUsername String // Database username
dbPassword String // Encrypted database password
status String @default("active") // active, suspended, deleted
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
id String @id @default(cuid())
name String
slug String @unique // Used for identification
dbHost String // Database host
dbPort Int @default(3306)
dbName String // Database name
dbUsername String // Database username
dbPassword String // Encrypted database password
integrationsConfig Json? // Encrypted JSON config for external services (Twilio, OpenAI, etc.)
status String @default("active") // active, suspended, deleted
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
domains Domain[]

View File

@@ -7,6 +7,7 @@ import { RbacModule } from './rbac/rbac.module';
import { ObjectModule } from './object/object.module';
import { AppBuilderModule } from './app-builder/app-builder.module';
import { PageLayoutModule } from './page-layout/page-layout.module';
import { VoiceModule } from './voice/voice.module';
@Module({
imports: [
@@ -20,6 +21,7 @@ import { PageLayoutModule } from './page-layout/page-layout.module';
ObjectModule,
AppBuilderModule,
PageLayoutModule,
VoiceModule,
],
})
export class AppModule {}

View File

@@ -3,13 +3,15 @@ import {
FastifyAdapter,
NestFastifyApplication,
} from '@nestjs/platform-fastify';
import { ValidationPipe } from '@nestjs/common';
import { ValidationPipe, Logger } from '@nestjs/common';
import { AppModule } from './app.module';
import { VoiceService } from './voice/voice.service';
import { AudioConverterService } from './voice/audio-converter.service';
async function bootstrap() {
const app = await NestFactory.create<NestFastifyApplication>(
AppModule,
new FastifyAdapter(),
new FastifyAdapter({ logger: true }),
);
// Global validation pipe
@@ -33,6 +35,145 @@ async function bootstrap() {
const port = process.env.PORT || 3000;
await app.listen(port, '0.0.0.0');
// After app is listening, register WebSocket handler
const fastifyInstance = app.getHttpAdapter().getInstance();
const logger = new Logger('MediaStreamWS');
const voiceService = app.get(VoiceService);
const audioConverter = app.get(AudioConverterService);
const WebSocketServer = require('ws').Server;
const wss = new WebSocketServer({ noServer: true });
// Handle WebSocket upgrades at the server level
const server = (fastifyInstance.server as any);
// Track active Media Streams connections: streamSid -> WebSocket
const mediaStreams: Map<string, any> = new Map();
server.on('upgrade', (request: any, socket: any, head: any) => {
if (request.url === '/api/voice/media-stream') {
logger.log('=== MEDIA STREAM WEBSOCKET UPGRADE REQUEST ===');
logger.log(`Path: ${request.url}`);
wss.handleUpgrade(request, socket, head, (ws: any) => {
logger.log('=== MEDIA STREAM WEBSOCKET UPGRADED SUCCESSFULLY ===');
handleMediaStreamSocket(ws);
});
}
});
async function handleMediaStreamSocket(ws: any) {
let streamSid: string | null = null;
let callSid: string | null = null;
let tenantDomain: string | null = null;
let mediaPacketCount = 0;
ws.on('message', async (message: Buffer) => {
try {
const msg = JSON.parse(message.toString());
switch (msg.event) {
case 'connected':
logger.log('=== MEDIA STREAM EVENT: CONNECTED ===');
logger.log(`Protocol: ${msg.protocol}`);
logger.log(`Version: ${msg.version}`);
break;
case 'start':
streamSid = msg.streamSid;
callSid = msg.start.callSid;
tenantDomain = msg.start.customParameters?.tenantId || 'tenant1';
logger.log(`=== MEDIA STREAM EVENT: START ===`);
logger.log(`StreamSid: ${streamSid}`);
logger.log(`CallSid: ${callSid}`);
logger.log(`Tenant: ${tenantDomain}`);
logger.log(`MediaFormat: ${JSON.stringify(msg.start.mediaFormat)}`);
mediaStreams.set(streamSid, ws);
logger.log(`Stored WebSocket for streamSid: ${streamSid}. Total active streams: ${mediaStreams.size}`);
// Initialize OpenAI Realtime connection
logger.log(`Initializing OpenAI Realtime for call ${callSid}...`);
try {
await voiceService.initializeOpenAIRealtime({
callSid,
tenantId: tenantDomain,
userId: msg.start.customParameters?.userId || 'system',
});
logger.log(`✓ OpenAI Realtime initialized for call ${callSid}`);
} catch (error: any) {
logger.error(`Failed to initialize OpenAI: ${error.message}`);
}
break;
case 'media':
mediaPacketCount++;
// Only log every 500 packets to reduce noise
if (mediaPacketCount % 500 === 0) {
logger.log(`Received media packet #${mediaPacketCount} for StreamSid: ${streamSid}`);
}
if (!callSid || !tenantDomain) {
logger.warn('Received media before start event');
break;
}
try {
// Convert Twilio audio (μ-law 8kHz) to OpenAI format (PCM16 24kHz)
const twilioAudio = msg.media.payload;
const openaiAudio = audioConverter.twilioToOpenAI(twilioAudio);
// Send audio to OpenAI Realtime API
await voiceService.sendAudioToOpenAI(callSid, openaiAudio);
} catch (error: any) {
logger.error(`Error processing media: ${error.message}`);
}
break;
case 'stop':
logger.log(`=== MEDIA STREAM EVENT: STOP ===`);
logger.log(`StreamSid: ${streamSid}`);
logger.log(`Total media packets received: ${mediaPacketCount}`);
if (streamSid) {
mediaStreams.delete(streamSid);
logger.log(`Removed WebSocket for streamSid: ${streamSid}`);
}
// Clean up OpenAI connection
if (callSid) {
try {
logger.log(`Cleaning up OpenAI connection for call ${callSid}...`);
await voiceService.cleanupOpenAIConnection(callSid);
logger.log(`✓ OpenAI connection cleaned up`);
} catch (error: any) {
logger.error(`Failed to cleanup OpenAI: ${error.message}`);
}
}
break;
default:
logger.debug(`Unknown media stream event: ${msg.event}`);
}
} catch (error: any) {
logger.error(`Error processing media stream message: ${error.message}`);
}
});
ws.on('close', () => {
logger.log(`=== MEDIA STREAM WEBSOCKET CLOSED ===`);
if (streamSid) {
mediaStreams.delete(streamSid);
}
});
ws.on('error', (error: Error) => {
logger.error(`=== MEDIA STREAM WEBSOCKET ERROR ===`);
logger.error(`Error message: ${error.message}`);
});
}
console.log(`🚀 Application is running on: http://localhost:${port}/api`);
}

View File

@@ -242,4 +242,26 @@ export class TenantDatabaseService {
decrypted += decipher.final('utf8');
return decrypted;
}
/**
* Encrypt integrations config JSON object
* @param config - Plain object containing integration credentials
* @returns Encrypted JSON string
*/
encryptIntegrationsConfig(config: any): string {
if (!config) return null;
const jsonString = JSON.stringify(config);
return this.encryptPassword(jsonString);
}
/**
* Decrypt integrations config JSON string
* @param encryptedConfig - Encrypted JSON string
* @returns Plain object with integration credentials
*/
decryptIntegrationsConfig(encryptedConfig: string): any {
if (!encryptedConfig) return null;
const decrypted = this.decryptPassword(encryptedConfig);
return JSON.parse(decrypted);
}
}

View File

@@ -176,7 +176,7 @@ export class TenantProvisioningService {
* Seed default data for new tenant
*/
private async seedDefaultData(tenantId: string) {
const tenantKnex = await this.tenantDbService.getTenantKnex(tenantId);
const tenantKnex = await this.tenantDbService.getTenantKnexById(tenantId);
try {
// Create default roles

View File

@@ -0,0 +1,155 @@
import {
Controller,
Get,
Put,
Body,
UseGuards,
Req,
} from '@nestjs/common';
import { JwtAuthGuard } from '../auth/jwt-auth.guard';
import { TenantDatabaseService } from './tenant-database.service';
import { getCentralPrisma } from '../prisma/central-prisma.service';
import { TenantId } from './tenant.decorator';
@Controller('tenant')
@UseGuards(JwtAuthGuard)
export class TenantController {
constructor(private readonly tenantDbService: TenantDatabaseService) {}
/**
* Get integrations configuration for the current tenant
*/
@Get('integrations')
async getIntegrationsConfig(@TenantId() domain: string) {
const centralPrisma = getCentralPrisma();
// Look up tenant by domain
const domainRecord = await centralPrisma.domain.findUnique({
where: { domain },
include: { tenant: { select: { id: true, integrationsConfig: true } } },
});
if (!domainRecord?.tenant || !domainRecord.tenant.integrationsConfig) {
return { data: null };
}
// Decrypt the config
const config = this.tenantDbService.decryptIntegrationsConfig(
domainRecord.tenant.integrationsConfig as any,
);
// Return config with sensitive fields masked
const maskedConfig = this.maskSensitiveFields(config);
return { data: maskedConfig };
}
/**
* Update integrations configuration for the current tenant
*/
@Put('integrations')
async updateIntegrationsConfig(
@TenantId() domain: string,
@Body() body: { integrationsConfig: any },
) {
const { integrationsConfig } = body;
if (!domain) {
throw new Error('Domain is missing from request');
}
// Look up tenant by domain
const centralPrisma = getCentralPrisma();
const domainRecord = await centralPrisma.domain.findUnique({
where: { domain },
include: { tenant: { select: { id: true, integrationsConfig: true } } },
});
if (!domainRecord?.tenant) {
throw new Error(`Tenant with domain ${domain} not found`);
}
// Merge with existing config to preserve masked values
let finalConfig = integrationsConfig;
if (domainRecord.tenant.integrationsConfig) {
const existingConfig = this.tenantDbService.decryptIntegrationsConfig(
domainRecord.tenant.integrationsConfig as any,
);
// Replace masked values with actual values from existing config
finalConfig = this.unmaskConfig(integrationsConfig, existingConfig);
}
// Encrypt the config
const encryptedConfig = this.tenantDbService.encryptIntegrationsConfig(
finalConfig,
);
// Update in database
await centralPrisma.tenant.update({
where: { id: domainRecord.tenant.id },
data: {
integrationsConfig: encryptedConfig as any,
},
});
return {
success: true,
message: 'Integrations configuration updated successfully',
};
}
/**
* Unmask config by replacing masked values with actual values from existing config
*/
private unmaskConfig(newConfig: any, existingConfig: any): any {
const result = { ...newConfig };
// Unmask Twilio credentials
if (result.twilio && existingConfig.twilio) {
if (result.twilio.authToken === '••••••••' && existingConfig.twilio.authToken) {
result.twilio.authToken = existingConfig.twilio.authToken;
}
if (result.twilio.apiSecret === '••••••••' && existingConfig.twilio.apiSecret) {
result.twilio.apiSecret = existingConfig.twilio.apiSecret;
}
}
// Unmask OpenAI credentials
if (result.openai && existingConfig.openai) {
if (result.openai.apiKey === '••••••••' && existingConfig.openai.apiKey) {
result.openai.apiKey = existingConfig.openai.apiKey;
}
}
return result;
}
/**
* Mask sensitive fields for API responses
*/
private maskSensitiveFields(config: any): any {
if (!config) return null;
const masked = { ...config };
// Mask Twilio credentials
if (masked.twilio) {
masked.twilio = {
...masked.twilio,
authToken: masked.twilio.authToken ? '••••••••' : '',
apiSecret: masked.twilio.apiSecret ? '••••••••' : '',
};
}
// Mask OpenAI credentials
if (masked.openai) {
masked.openai = {
...masked.openai,
apiKey: masked.openai.apiKey ? '••••••••' : '',
};
}
return masked;
}
}

View File

@@ -4,11 +4,12 @@ import { TenantDatabaseService } from './tenant-database.service';
import { TenantProvisioningService } from './tenant-provisioning.service';
import { TenantProvisioningController } from './tenant-provisioning.controller';
import { CentralAdminController } from './central-admin.controller';
import { TenantController } from './tenant.controller';
import { PrismaModule } from '../prisma/prisma.module';
@Module({
imports: [PrismaModule],
controllers: [TenantProvisioningController, CentralAdminController],
controllers: [TenantProvisioningController, CentralAdminController, TenantController],
providers: [
TenantDatabaseService,
TenantProvisioningService,

View File

@@ -0,0 +1,214 @@
import { Injectable, Logger } from '@nestjs/common';
/**
* Audio format converter for Twilio <-> OpenAI audio streaming
*
* Twilio Media Streams format:
* - Codec: μ-law (G.711)
* - Sample rate: 8kHz
* - Encoding: base64
* - Chunk size: 20ms (160 bytes)
*
* OpenAI Realtime API format:
* - Codec: PCM16
* - Sample rate: 24kHz
* - Encoding: base64
* - Mono channel
*/
@Injectable()
export class AudioConverterService {
private readonly logger = new Logger(AudioConverterService.name);
// μ-law decode lookup table
private readonly MULAW_DECODE_TABLE = this.buildMuLawDecodeTable();
// μ-law encode lookup table
private readonly MULAW_ENCODE_TABLE = this.buildMuLawEncodeTable();
/**
* Build μ-law to linear PCM16 decode table
*/
private buildMuLawDecodeTable(): Int16Array {
const table = new Int16Array(256);
for (let i = 0; i < 256; i++) {
const mulaw = ~i;
const exponent = (mulaw >> 4) & 0x07;
const mantissa = mulaw & 0x0f;
let sample = (mantissa << 3) + 0x84;
sample <<= exponent;
sample -= 0x84;
if ((mulaw & 0x80) === 0) {
sample = -sample;
}
table[i] = sample;
}
return table;
}
/**
* Build linear PCM16 to μ-law encode table
*/
private buildMuLawEncodeTable(): Uint8Array {
const table = new Uint8Array(65536);
for (let i = 0; i < 65536; i++) {
const sample = (i - 32768);
const sign = sample < 0 ? 0x80 : 0x00;
const magnitude = Math.abs(sample);
// Add bias
let biased = magnitude + 0x84;
// Find exponent
let exponent = 7;
for (let exp = 0; exp < 8; exp++) {
if (biased <= (0xff << exp)) {
exponent = exp;
break;
}
}
// Extract mantissa
const mantissa = (biased >> (exponent + 3)) & 0x0f;
// Combine sign, exponent, mantissa
const mulaw = ~(sign | (exponent << 4) | mantissa);
table[i] = mulaw & 0xff;
}
return table;
}
/**
* Decode μ-law audio to linear PCM16
* @param mulawData - Buffer containing μ-law encoded audio
* @returns Buffer containing PCM16 audio (16-bit little-endian)
*/
decodeMuLaw(mulawData: Buffer): Buffer {
const pcm16 = Buffer.allocUnsafe(mulawData.length * 2);
for (let i = 0; i < mulawData.length; i++) {
const sample = this.MULAW_DECODE_TABLE[mulawData[i]];
pcm16.writeInt16LE(sample, i * 2);
}
return pcm16;
}
/**
* Encode linear PCM16 to μ-law
* @param pcm16Data - Buffer containing PCM16 audio (16-bit little-endian)
* @returns Buffer containing μ-law encoded audio
*/
encodeMuLaw(pcm16Data: Buffer): Buffer {
const mulaw = Buffer.allocUnsafe(pcm16Data.length / 2);
for (let i = 0; i < pcm16Data.length; i += 2) {
const sample = pcm16Data.readInt16LE(i);
const index = (sample + 32768) & 0xffff;
mulaw[i / 2] = this.MULAW_ENCODE_TABLE[index];
}
return mulaw;
}
/**
* Resample audio from 8kHz to 24kHz (linear interpolation)
* @param pcm16Data - Buffer containing 8kHz PCM16 audio
* @returns Buffer containing 24kHz PCM16 audio
*/
resample8kTo24k(pcm16Data: Buffer): Buffer {
const inputSamples = pcm16Data.length / 2;
const outputSamples = Math.floor(inputSamples * 3); // 8k * 3 = 24k
const output = Buffer.allocUnsafe(outputSamples * 2);
for (let i = 0; i < outputSamples; i++) {
const srcIndex = i / 3;
const srcIndexFloor = Math.floor(srcIndex);
const srcIndexCeil = Math.min(srcIndexFloor + 1, inputSamples - 1);
const fraction = srcIndex - srcIndexFloor;
const sample1 = pcm16Data.readInt16LE(srcIndexFloor * 2);
const sample2 = pcm16Data.readInt16LE(srcIndexCeil * 2);
// Linear interpolation
const interpolated = Math.round(sample1 + (sample2 - sample1) * fraction);
output.writeInt16LE(interpolated, i * 2);
}
return output;
}
/**
* Resample audio from 24kHz to 8kHz (decimation with averaging)
* @param pcm16Data - Buffer containing 24kHz PCM16 audio
* @returns Buffer containing 8kHz PCM16 audio
*/
resample24kTo8k(pcm16Data: Buffer): Buffer {
const inputSamples = pcm16Data.length / 2;
const outputSamples = Math.floor(inputSamples / 3); // 24k / 3 = 8k
const output = Buffer.allocUnsafe(outputSamples * 2);
for (let i = 0; i < outputSamples; i++) {
// Average 3 samples for anti-aliasing
const idx1 = Math.min(i * 3, inputSamples - 1);
const idx2 = Math.min(i * 3 + 1, inputSamples - 1);
const idx3 = Math.min(i * 3 + 2, inputSamples - 1);
const sample1 = pcm16Data.readInt16LE(idx1 * 2);
const sample2 = pcm16Data.readInt16LE(idx2 * 2);
const sample3 = pcm16Data.readInt16LE(idx3 * 2);
const averaged = Math.round((sample1 + sample2 + sample3) / 3);
output.writeInt16LE(averaged, i * 2);
}
return output;
}
/**
* Convert Twilio μ-law 8kHz to OpenAI PCM16 24kHz
* @param twilioBase64 - Base64-encoded μ-law audio from Twilio
* @returns Base64-encoded PCM16 24kHz audio for OpenAI
*/
twilioToOpenAI(twilioBase64: string): string {
try {
// Decode base64
const mulawBuffer = Buffer.from(twilioBase64, 'base64');
// μ-law -> PCM16
const pcm16_8k = this.decodeMuLaw(mulawBuffer);
// 8kHz -> 24kHz
const pcm16_24k = this.resample8kTo24k(pcm16_8k);
// Encode to base64
return pcm16_24k.toString('base64');
} catch (error) {
this.logger.error('Error converting Twilio to OpenAI audio', error);
throw error;
}
}
/**
* Convert OpenAI PCM16 24kHz to Twilio μ-law 8kHz
* @param openaiBase64 - Base64-encoded PCM16 24kHz audio from OpenAI
* @returns Base64-encoded μ-law 8kHz audio for Twilio
*/
openAIToTwilio(openaiBase64: string): string {
try {
// Decode base64
const pcm16_24k = Buffer.from(openaiBase64, 'base64');
// 24kHz -> 8kHz
const pcm16_8k = this.resample24kTo8k(pcm16_24k);
// PCM16 -> μ-law
const mulawBuffer = this.encodeMuLaw(pcm16_8k);
// Encode to base64
return mulawBuffer.toString('base64');
} catch (error) {
this.logger.error('Error converting OpenAI to Twilio audio', error);
throw error;
}
}
}

View File

@@ -0,0 +1,25 @@
export interface CallEventDto {
callSid: string;
direction: 'inbound' | 'outbound';
fromNumber: string;
toNumber: string;
status: string;
}
export interface DtmfEventDto {
callSid: string;
digit: string;
}
export interface TranscriptEventDto {
callSid: string;
transcript: string;
isFinal: boolean;
}
export interface AiSuggestionDto {
callSid: string;
suggestion: string;
type: 'response' | 'action' | 'insight';
data?: any;
}

View File

@@ -0,0 +1,10 @@
import { IsString, IsNotEmpty, Matches } from 'class-validator';
export class InitiateCallDto {
@IsString()
@IsNotEmpty()
@Matches(/^\+?[1-9]\d{1,14}$/, {
message: 'Invalid phone number format (use E.164 format)',
})
toNumber: string;
}

View File

@@ -0,0 +1,20 @@
export interface TwilioConfig {
accountSid: string;
authToken: string;
phoneNumber: string;
apiKey?: string; // API Key SID for generating access tokens
apiSecret?: string; // API Key Secret
twimlAppSid?: string; // TwiML App SID for Voice SDK
}
export interface OpenAIConfig {
apiKey: string;
assistantId?: string;
model?: string;
voice?: string;
}
export interface IntegrationsConfig {
twilio?: TwilioConfig;
openai?: OpenAIConfig;
}

View File

@@ -0,0 +1,495 @@
import {
Controller,
Post,
Get,
Body,
Req,
Res,
UseGuards,
Logger,
Query,
} from '@nestjs/common';
import { FastifyRequest, FastifyReply } from 'fastify';
import { JwtAuthGuard } from '../auth/jwt-auth.guard';
import { VoiceService } from './voice.service';
import { VoiceGateway } from './voice.gateway';
import { AudioConverterService } from './audio-converter.service';
import { InitiateCallDto } from './dto/initiate-call.dto';
import { TenantId } from '../tenant/tenant.decorator';
@Controller('voice')
export class VoiceController {
private readonly logger = new Logger(VoiceController.name);
// Track active Media Streams connections: streamSid -> WebSocket
private mediaStreams: Map<string, any> = new Map();
constructor(
private readonly voiceService: VoiceService,
private readonly voiceGateway: VoiceGateway,
private readonly audioConverter: AudioConverterService,
) {}
/**
* Initiate outbound call via REST
*/
@Post('call')
@UseGuards(JwtAuthGuard)
async initiateCall(
@Body() body: InitiateCallDto,
@Req() req: any,
@TenantId() tenantId: string,
) {
const userId = req.user?.userId || req.user?.sub;
const result = await this.voiceService.initiateCall({
tenantId,
userId,
toNumber: body.toNumber,
});
return {
success: true,
data: result,
};
}
/**
* Generate Twilio access token for browser client
*/
@Get('token')
@UseGuards(JwtAuthGuard)
async getAccessToken(
@Req() req: any,
@TenantId() tenantId: string,
) {
const userId = req.user?.userId || req.user?.sub;
const token = await this.voiceService.generateAccessToken(tenantId, userId);
return {
success: true,
data: { token },
};
}
/**
* Get call history
*/
@Get('calls')
@UseGuards(JwtAuthGuard)
async getCallHistory(
@Req() req: any,
@TenantId() tenantId: string,
@Query('limit') limit?: string,
) {
const userId = req.user?.userId || req.user?.sub;
const calls = await this.voiceService.getCallHistory(
tenantId,
userId,
limit ? parseInt(limit) : 50,
);
return {
success: true,
data: calls,
};
}
/**
* TwiML for outbound calls from browser (Twilio Device)
*/
@Post('twiml/outbound')
async outboundTwiml(@Req() req: FastifyRequest, @Res() res: FastifyReply) {
const body = req.body as any;
const to = body.To;
const from = body.From;
const callSid = body.CallSid;
this.logger.log(`=== TwiML OUTBOUND REQUEST RECEIVED ===`);
this.logger.log(`CallSid: ${callSid}, Body From: ${from}, Body To: ${to}`);
this.logger.log(`Full body: ${JSON.stringify(body)}`);
try {
// Extract tenant domain from Host header
const host = req.headers.host || '';
const tenantDomain = host.split('.')[0]; // e.g., "tenant1" from "tenant1.routebox.co"
this.logger.log(`Extracted tenant domain: ${tenantDomain}`);
// Look up tenant's Twilio phone number from config
let callerId = to; // Fallback (will cause error if not found)
try {
// Get Twilio config to find the phone number
const { config } = await this.voiceService['getTwilioClient'](tenantDomain);
callerId = config.phoneNumber;
this.logger.log(`Retrieved Twilio phone number for tenant: ${callerId}`);
} catch (error: any) {
this.logger.error(`Failed to get Twilio config: ${error.message}`);
}
const dialNumber = to;
this.logger.log(`Using callerId: ${callerId}, dialNumber: ${dialNumber}`);
// Return TwiML to DIAL the phone number with proper callerId
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
<Response>
<Dial callerId="${callerId}">
<Number>${dialNumber}</Number>
</Dial>
</Response>`;
this.logger.log(`Returning TwiML with Dial verb - callerId: ${callerId}, to: ${dialNumber}`);
res.type('text/xml').send(twiml);
} catch (error: any) {
this.logger.error(`=== ERROR GENERATING TWIML ===`);
this.logger.error(`Error: ${error.message}`);
this.logger.error(`Stack: ${error.stack}`);
const errorTwiml = `<?xml version="1.0" encoding="UTF-8"?>
<Response>
<Say>An error occurred while processing your call.</Say>
</Response>`;
res.type('text/xml').send(errorTwiml);
}
}
/**
* TwiML for inbound calls
*/
@Post('twiml/inbound')
async inboundTwiml(@Req() req: FastifyRequest, @Res() res: FastifyReply) {
const body = req.body as any;
const callSid = body.CallSid;
const fromNumber = body.From;
const toNumber = body.To;
this.logger.log(`\n\n╔════════════════════════════════════════╗`);
this.logger.log(`║ === INBOUND CALL RECEIVED ===`);
this.logger.log(`╚════════════════════════════════════════╝`);
this.logger.log(`CallSid: ${callSid}`);
this.logger.log(`From: ${fromNumber}`);
this.logger.log(`To: ${toNumber}`);
this.logger.log(`Full body: ${JSON.stringify(body)}`);
try {
// Extract tenant domain from Host header
const host = req.headers.host || '';
const tenantDomain = host.split('.')[0]; // e.g., "tenant1" from "tenant1.routebox.co"
this.logger.log(`Extracted tenant domain: ${tenantDomain}`);
// Get all connected users for this tenant
const connectedUsers = this.voiceGateway.getConnectedUsers(tenantDomain);
this.logger.log(`Connected users for tenant ${tenantDomain}: ${connectedUsers.length}`);
if (connectedUsers.length > 0) {
this.logger.log(`Connected user IDs: ${connectedUsers.join(', ')}`);
}
if (connectedUsers.length === 0) {
// No users online - send to voicemail or play message
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
<Response>
<Say>Sorry, no agents are currently available. Please try again later.</Say>
<Hangup/>
</Response>`;
this.logger.log(`❌ No users online - returning unavailable message`);
return res.type('text/xml').send(twiml);
}
// Build TwiML to dial all connected clients with Media Streams for AI
const clientElements = connectedUsers.map(userId => ` <Client>${userId}</Client>`).join('\n');
// Use wss:// for secure WebSocket (Traefik handles HTTPS)
const streamUrl = `wss://${host}/api/voice/media-stream`;
this.logger.log(`Stream URL: ${streamUrl}`);
this.logger.log(`Dialing ${connectedUsers.length} client(s)...`);
this.logger.log(`Client IDs to dial: ${connectedUsers.join(', ')}`);
// Verify we have client IDs in proper format
if (connectedUsers.length > 0) {
this.logger.log(`First Client ID format check: "${connectedUsers[0]}" (length: ${connectedUsers[0].length})`);
}
// Notify connected users about incoming call via Socket.IO
connectedUsers.forEach(userId => {
this.voiceGateway.notifyIncomingCall(userId, {
callSid,
fromNumber,
toNumber,
tenantDomain,
});
});
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
<Response>
<Start>
<Stream url="${streamUrl}">
<Parameter name="tenantId" value="${tenantDomain}"/>
<Parameter name="userId" value="${connectedUsers[0]}"/>
</Stream>
</Start>
<Dial timeout="30">
${clientElements}
</Dial>
</Response>`;
this.logger.log(`✓ Returning inbound TwiML with Media Streams - dialing ${connectedUsers.length} client(s)`);
this.logger.log(`Generated TwiML:\n${twiml}\n`);
res.type('text/xml').send(twiml);
} catch (error: any) {
this.logger.error(`Error generating inbound TwiML: ${error.message}`);
const errorTwiml = `<?xml version="1.0" encoding="UTF-8"?>
<Response>
<Say>Sorry, we are unable to connect your call at this time.</Say>
<Hangup/>
</Response>`;
res.type('text/xml').send(errorTwiml);
}
}
/**
* Twilio status webhook
*/
@Post('webhook/status')
async statusWebhook(@Req() req: FastifyRequest) {
const body = req.body as any;
const callSid = body.CallSid;
const status = body.CallStatus;
const duration = body.CallDuration ? parseInt(body.CallDuration) : undefined;
this.logger.log(`Call status webhook - CallSid: ${callSid}, Status: ${status}, Duration: ${duration}`);
this.logger.log(`Full status webhook body:`, JSON.stringify(body));
return { success: true };
}
/**
* Twilio recording webhook
*/
@Post('webhook/recording')
async recordingWebhook(@Req() req: FastifyRequest) {
const body = req.body as any;
const callSid = body.CallSid;
const recordingSid = body.RecordingSid;
const recordingStatus = body.RecordingStatus;
this.logger.log(`Recording webhook - CallSid: ${callSid}, RecordingSid: ${recordingSid}, Status: ${recordingStatus}`);
return { success: true };
}
/**
* Twilio Media Streams WebSocket endpoint
* Receives real-time audio from Twilio and forwards to OpenAI Realtime API
*
* This handles the HTTP GET request and upgrades it to WebSocket manually.
*/
@Get('media-stream')
mediaStream(@Req() req: FastifyRequest) {
// For WebSocket upgrade, we need to access the raw socket
let socket: any;
try {
this.logger.log(`=== MEDIA STREAM REQUEST ===`);
this.logger.log(`URL: ${req.url}`);
this.logger.log(`Headers keys: ${Object.keys(req.headers).join(', ')}`);
this.logger.log(`Headers: ${JSON.stringify(req.headers)}`);
// Check if this is a WebSocket upgrade request
const hasWebSocketKey = 'sec-websocket-key' in req.headers;
const hasWebSocketVersion = 'sec-websocket-version' in req.headers;
this.logger.log(`hasWebSocketKey: ${hasWebSocketKey}`);
this.logger.log(`hasWebSocketVersion: ${hasWebSocketVersion}`);
if (!hasWebSocketKey || !hasWebSocketVersion) {
this.logger.log('Not a WebSocket upgrade request - returning');
return;
}
this.logger.log('✓ WebSocket upgrade detected');
// Get the socket - try different ways
socket = (req.raw as any).socket;
this.logger.log(`Socket obtained: ${!!socket}`);
if (!socket) {
this.logger.error('Failed to get socket from req.raw');
return;
}
const rawRequest = req.raw;
const head = Buffer.alloc(0);
this.logger.log('Creating WebSocketServer...');
const WebSocketServer = require('ws').Server;
const wss = new WebSocketServer({ noServer: true });
this.logger.log('Calling handleUpgrade...');
// handleUpgrade will send the 101 response and take over the socket
wss.handleUpgrade(rawRequest, socket, head, (ws: any) => {
this.logger.log('=== TWILIO MEDIA STREAM WEBSOCKET UPGRADED SUCCESSFULLY ===');
this.handleMediaStreamSocket(ws);
});
this.logger.log('handleUpgrade completed');
} catch (error: any) {
this.logger.error(`=== FAILED TO UPGRADE TO WEBSOCKET ===`);
this.logger.error(`Error message: ${error.message}`);
this.logger.error(`Error stack: ${error.stack}`);
}
}
/**
* Handle incoming Media Stream WebSocket messages
*/
private handleMediaStreamSocket(ws: any) {
let streamSid: string | null = null;
let callSid: string | null = null;
let tenantDomain: string | null = null;
let mediaPacketCount = 0;
// WebSocket message handler
ws.on('message', async (message: Buffer) => {
try {
const msg = JSON.parse(message.toString());
switch (msg.event) {
case 'connected':
this.logger.log('=== MEDIA STREAM EVENT: CONNECTED ===');
this.logger.log(`Protocol: ${msg.protocol}`);
this.logger.log(`Version: ${msg.version}`);
break;
case 'start':
streamSid = msg.streamSid;
callSid = msg.start.callSid;
// Extract tenant from customParameters if available
tenantDomain = msg.start.customParameters?.tenantId || 'tenant1';
this.logger.log(`=== MEDIA STREAM EVENT: START ===`);
this.logger.log(`StreamSid: ${streamSid}`);
this.logger.log(`CallSid: ${callSid}`);
this.logger.log(`Tenant: ${tenantDomain}`);
this.logger.log(`AccountSid: ${msg.start.accountSid}`);
this.logger.log(`MediaFormat: ${JSON.stringify(msg.start.mediaFormat)}`);
this.logger.log(`Custom Parameters: ${JSON.stringify(msg.start.customParameters)}`);
// Store WebSocket connection
this.mediaStreams.set(streamSid, ws);
this.logger.log(`Stored WebSocket for streamSid: ${streamSid}. Total active streams: ${this.mediaStreams.size}`);
// Initialize OpenAI Realtime connection for this call
this.logger.log(`Initializing OpenAI Realtime for call ${callSid}...`);
await this.voiceService.initializeOpenAIRealtime({
callSid,
tenantId: tenantDomain,
userId: msg.start.customParameters?.userId || 'system',
});
this.logger.log(`✓ OpenAI Realtime initialized for call ${callSid}`);
break;
case 'media':
mediaPacketCount++;
if (mediaPacketCount % 50 === 0) {
// Log every 50th packet to avoid spam
this.logger.log(`Received media packet #${mediaPacketCount} for StreamSid: ${streamSid}, CallSid: ${callSid}, PayloadSize: ${msg.media.payload?.length || 0} bytes`);
}
if (!callSid || !tenantDomain) {
this.logger.warn('Received media before start event');
break;
}
// msg.media.payload is base64-encoded μ-law audio from Twilio
const twilioAudio = msg.media.payload;
// Convert Twilio audio (μ-law 8kHz) to OpenAI format (PCM16 24kHz)
const openaiAudio = this.audioConverter.twilioToOpenAI(twilioAudio);
// Send audio to OpenAI Realtime API
await this.voiceService.sendAudioToOpenAI(callSid, openaiAudio);
break;
case 'stop':
this.logger.log(`=== MEDIA STREAM EVENT: STOP ===`);
this.logger.log(`StreamSid: ${streamSid}`);
this.logger.log(`Total media packets received: ${mediaPacketCount}`);
if (streamSid) {
this.mediaStreams.delete(streamSid);
this.logger.log(`Removed WebSocket for streamSid: ${streamSid}. Remaining active streams: ${this.mediaStreams.size}`);
}
// Clean up OpenAI connection
if (callSid) {
this.logger.log(`Cleaning up OpenAI connection for call ${callSid}...`);
await this.voiceService.cleanupOpenAIConnection(callSid);
this.logger.log(`✓ OpenAI connection cleaned up for call ${callSid}`);
}
break;
default:
this.logger.debug(`Unknown media stream event: ${msg.event}`);
}
} catch (error: any) {
this.logger.error(`Error processing media stream message: ${error.message}`);
this.logger.error(`Stack: ${error.stack}`);
}
});
ws.on('close', () => {
this.logger.log(`=== MEDIA STREAM WEBSOCKET CLOSED ===`);
this.logger.log(`StreamSid: ${streamSid}`);
this.logger.log(`Total media packets in this stream: ${mediaPacketCount}`);
if (streamSid) {
this.mediaStreams.delete(streamSid);
this.logger.log(`Cleaned up streamSid on close. Remaining active streams: ${this.mediaStreams.size}`);
}
});
ws.on('error', (error: Error) => {
this.logger.error(`=== MEDIA STREAM WEBSOCKET ERROR ===`);
this.logger.error(`StreamSid: ${streamSid}`);
this.logger.error(`Error message: ${error.message}`);
this.logger.error(`Error stack: ${error.stack}`);
});
}
/**
* Send audio from OpenAI back to Twilio Media Stream
*/
async sendAudioToTwilio(streamSid: string, openaiAudioBase64: string) {
const ws = this.mediaStreams.get(streamSid);
if (!ws) {
this.logger.warn(`No Media Stream found for streamSid: ${streamSid}`);
return;
}
try {
// Convert OpenAI audio (PCM16 24kHz) to Twilio format (μ-law 8kHz)
const twilioAudio = this.audioConverter.openAIToTwilio(openaiAudioBase64);
// Send to Twilio Media Stream
const message = {
event: 'media',
streamSid,
media: {
payload: twilioAudio,
},
};
ws.send(JSON.stringify(message));
} catch (error: any) {
this.logger.error(`Error sending audio to Twilio: ${error.message}`);
}
}
}

View File

@@ -0,0 +1,319 @@
import {
WebSocketGateway,
WebSocketServer,
SubscribeMessage,
OnGatewayConnection,
OnGatewayDisconnect,
ConnectedSocket,
MessageBody,
} from '@nestjs/websockets';
import { Server, Socket } from 'socket.io';
import { Logger, UseGuards } from '@nestjs/common';
import { JwtService } from '@nestjs/jwt';
import { VoiceService } from './voice.service';
import { TenantDatabaseService } from '../tenant/tenant-database.service';
interface AuthenticatedSocket extends Socket {
tenantId?: string;
userId?: string;
tenantSlug?: string;
}
@WebSocketGateway({
namespace: 'voice',
cors: {
origin: true,
credentials: true,
},
})
export class VoiceGateway
implements OnGatewayConnection, OnGatewayDisconnect
{
@WebSocketServer()
server: Server;
private readonly logger = new Logger(VoiceGateway.name);
private connectedUsers: Map<string, AuthenticatedSocket> = new Map();
private activeCallsByUser: Map<string, string> = new Map(); // userId -> callSid
constructor(
private readonly jwtService: JwtService,
private readonly voiceService: VoiceService,
private readonly tenantDbService: TenantDatabaseService,
) {
// Set gateway reference in service to avoid circular dependency
this.voiceService.setGateway(this);
}
async handleConnection(client: AuthenticatedSocket) {
try {
// Extract token from handshake auth
const token =
client.handshake.auth.token || client.handshake.headers.authorization?.split(' ')[1];
if (!token) {
this.logger.warn('❌ Client connection rejected: No token provided');
client.disconnect();
return;
}
// Verify JWT token
const payload = await this.jwtService.verifyAsync(token);
// Extract domain from origin header (e.g., http://tenant1.routebox.co:3001)
// The domains table stores just the subdomain part (e.g., "tenant1")
const origin = client.handshake.headers.origin || client.handshake.headers.referer;
let domain = 'localhost';
if (origin) {
try {
const url = new URL(origin);
const hostname = url.hostname; // e.g., tenant1.routebox.co or localhost
// Extract first part of subdomain as domain
// tenant1.routebox.co -> tenant1
// localhost -> localhost
domain = hostname.split('.')[0];
} catch (error) {
this.logger.warn(`Failed to parse origin: ${origin}`);
}
}
client.tenantId = domain; // Store the subdomain as tenantId
client.userId = payload.sub;
client.tenantSlug = domain; // Same as subdomain
this.connectedUsers.set(client.userId, client);
this.logger.log(
`✓ Client connected: ${client.id} (User: ${client.userId}, Domain: ${domain})`,
);
this.logger.log(`Total connected users in ${domain}: ${this.getConnectedUsers(domain).length}`);
// Send current call state if any active call
const activeCallSid = this.activeCallsByUser.get(client.userId);
if (activeCallSid) {
const callState = await this.voiceService.getCallState(
activeCallSid,
client.tenantId,
);
client.emit('call:state', callState);
}
} catch (error) {
this.logger.error('❌ Authentication failed', error);
client.disconnect();
}
}
handleDisconnect(client: AuthenticatedSocket) {
if (client.userId) {
this.connectedUsers.delete(client.userId);
this.logger.log(`✓ Client disconnected: ${client.id} (User: ${client.userId})`);
this.logger.log(`Remaining connected users: ${this.connectedUsers.size}`);
}
}
/**
* Initiate outbound call
*/
@SubscribeMessage('call:initiate')
async handleInitiateCall(
@ConnectedSocket() client: AuthenticatedSocket,
@MessageBody() data: { toNumber: string },
) {
try {
this.logger.log(`Initiating call from user ${client.userId} to ${data.toNumber}`);
const result = await this.voiceService.initiateCall({
tenantId: client.tenantId,
userId: client.userId,
toNumber: data.toNumber,
});
this.activeCallsByUser.set(client.userId, result.callSid);
client.emit('call:initiated', {
callSid: result.callSid,
toNumber: data.toNumber,
status: 'queued',
});
return { success: true, callSid: result.callSid };
} catch (error) {
this.logger.error('Failed to initiate call', error);
client.emit('call:error', {
message: error.message || 'Failed to initiate call',
});
return { success: false, error: error.message };
}
}
/**
* Accept incoming call
*/
@SubscribeMessage('call:accept')
async handleAcceptCall(
@ConnectedSocket() client: AuthenticatedSocket,
@MessageBody() data: { callSid: string },
) {
try {
this.logger.log(`User ${client.userId} accepting call ${data.callSid}`);
await this.voiceService.acceptCall({
callSid: data.callSid,
tenantId: client.tenantId,
userId: client.userId,
});
this.activeCallsByUser.set(client.userId, data.callSid);
client.emit('call:accepted', { callSid: data.callSid });
return { success: true };
} catch (error) {
this.logger.error('Failed to accept call', error);
return { success: false, error: error.message };
}
}
/**
* Reject incoming call
*/
@SubscribeMessage('call:reject')
async handleRejectCall(
@ConnectedSocket() client: AuthenticatedSocket,
@MessageBody() data: { callSid: string },
) {
try {
this.logger.log(`User ${client.userId} rejecting call ${data.callSid}`);
await this.voiceService.rejectCall(data.callSid, client.tenantId);
client.emit('call:rejected', { callSid: data.callSid });
return { success: true };
} catch (error) {
this.logger.error('Failed to reject call', error);
return { success: false, error: error.message };
}
}
/**
* End active call
*/
@SubscribeMessage('call:end')
async handleEndCall(
@ConnectedSocket() client: AuthenticatedSocket,
@MessageBody() data: { callSid: string },
) {
try {
this.logger.log(`User ${client.userId} ending call ${data.callSid}`);
await this.voiceService.endCall(data.callSid, client.tenantId);
this.activeCallsByUser.delete(client.userId);
client.emit('call:ended', { callSid: data.callSid });
return { success: true };
} catch (error) {
this.logger.error('Failed to end call', error);
return { success: false, error: error.message };
}
}
/**
* Send DTMF tones
*/
@SubscribeMessage('call:dtmf')
async handleDtmf(
@ConnectedSocket() client: AuthenticatedSocket,
@MessageBody() data: { callSid: string; digit: string },
) {
try {
await this.voiceService.sendDtmf(
data.callSid,
data.digit,
client.tenantId,
);
return { success: true };
} catch (error) {
this.logger.error('Failed to send DTMF', error);
return { success: false, error: error.message };
}
}
/**
* Emit incoming call notification to specific user
*/
async notifyIncomingCall(userId: string, callData: any) {
const socket = this.connectedUsers.get(userId);
if (socket) {
socket.emit('call:incoming', callData);
this.logger.log(`Notified user ${userId} of incoming call`);
} else {
this.logger.warn(`User ${userId} not connected to receive call notification`);
}
}
/**
* Emit call status update to user
*/
async notifyCallUpdate(userId: string, callData: any) {
const socket = this.connectedUsers.get(userId);
if (socket) {
socket.emit('call:update', callData);
}
}
/**
* Emit AI transcript to user
*/
async notifyAiTranscript(userId: string, data: { callSid: string; transcript: string; isFinal: boolean }) {
const socket = this.connectedUsers.get(userId);
if (socket) {
socket.emit('ai:transcript', data);
}
}
/**
* Emit AI suggestion to user
*/
async notifyAiSuggestion(userId: string, data: any) {
const socket = this.connectedUsers.get(userId);
this.logger.log(`notifyAiSuggestion - userId: ${userId}, socket connected: ${!!socket}, total connected users: ${this.connectedUsers.size}`);
if (socket) {
this.logger.log(`Emitting ai:suggestion event with data:`, JSON.stringify(data));
socket.emit('ai:suggestion', data);
} else {
this.logger.warn(`No socket connection found for userId: ${userId}`);
this.logger.log(`Connected users: ${Array.from(this.connectedUsers.keys()).join(', ')}`);
}
}
/**
* Emit AI action result to user
*/
async notifyAiAction(userId: string, data: any) {
const socket = this.connectedUsers.get(userId);
if (socket) {
socket.emit('ai:action', data);
}
}
/**
* Get connected users for a tenant
*/
getConnectedUsers(tenantDomain?: string): string[] {
const userIds: string[] = [];
for (const [userId, socket] of this.connectedUsers.entries()) {
// If tenantDomain specified, filter by tenant
if (!tenantDomain || socket.tenantSlug === tenantDomain) {
userIds.push(userId);
}
}
return userIds;
}
}

View File

@@ -0,0 +1,23 @@
import { Module } from '@nestjs/common';
import { JwtModule } from '@nestjs/jwt';
import { VoiceGateway } from './voice.gateway';
import { VoiceService } from './voice.service';
import { VoiceController } from './voice.controller';
import { AudioConverterService } from './audio-converter.service';
import { TenantModule } from '../tenant/tenant.module';
import { AuthModule } from '../auth/auth.module';
@Module({
imports: [
TenantModule,
AuthModule,
JwtModule.register({
secret: process.env.JWT_SECRET || 'your-jwt-secret',
signOptions: { expiresIn: process.env.JWT_EXPIRES_IN || '24h' },
}),
],
providers: [VoiceGateway, VoiceService, AudioConverterService],
controllers: [VoiceController],
exports: [VoiceService],
})
export class VoiceModule {}

View File

@@ -0,0 +1,826 @@
import { Injectable, Logger } from '@nestjs/common';
import { TenantDatabaseService } from '../tenant/tenant-database.service';
import { getCentralPrisma } from '../prisma/central-prisma.service';
import { IntegrationsConfig, TwilioConfig, OpenAIConfig } from './interfaces/integration-config.interface';
import * as Twilio from 'twilio';
import { WebSocket } from 'ws';
import { v4 as uuidv4 } from 'uuid';
const AccessToken = Twilio.jwt.AccessToken;
const VoiceGrant = AccessToken.VoiceGrant;
@Injectable()
export class VoiceService {
private readonly logger = new Logger(VoiceService.name);
private twilioClients: Map<string, Twilio.Twilio> = new Map();
private openaiConnections: Map<string, WebSocket> = new Map(); // callSid -> WebSocket
private callStates: Map<string, any> = new Map(); // callSid -> call state
private voiceGateway: any; // Reference to gateway (to avoid circular dependency)
constructor(
private readonly tenantDbService: TenantDatabaseService,
) {}
/**
* Set gateway reference (called by gateway on init)
*/
setGateway(gateway: any) {
this.voiceGateway = gateway;
}
/**
* Get Twilio client for a tenant
*/
private async getTwilioClient(tenantIdOrDomain: string): Promise<{ client: Twilio.Twilio; config: TwilioConfig; tenantId: string }> {
// Check cache first
if (this.twilioClients.has(tenantIdOrDomain)) {
const centralPrisma = getCentralPrisma();
// Look up tenant by domain
const domainRecord = await centralPrisma.domain.findUnique({
where: { domain: tenantIdOrDomain },
include: { tenant: { select: { id: true, integrationsConfig: true } } },
});
const config = this.getIntegrationConfig(domainRecord?.tenant?.integrationsConfig as any);
return {
client: this.twilioClients.get(tenantIdOrDomain),
config: config.twilio,
tenantId: domainRecord.tenant.id
};
}
// Fetch tenant integrations config
const centralPrisma = getCentralPrisma();
this.logger.log(`Looking up domain: ${tenantIdOrDomain}`);
const domainRecord = await centralPrisma.domain.findUnique({
where: { domain: tenantIdOrDomain },
include: { tenant: { select: { id: true, integrationsConfig: true } } },
});
this.logger.log(`Domain record found: ${!!domainRecord}, Tenant: ${!!domainRecord?.tenant}, Config: ${!!domainRecord?.tenant?.integrationsConfig}`);
if (!domainRecord?.tenant) {
throw new Error(`Domain ${tenantIdOrDomain} not found`);
}
if (!domainRecord.tenant.integrationsConfig) {
throw new Error('Tenant integrations config not found. Please configure Twilio credentials in Settings > Integrations');
}
const config = this.getIntegrationConfig(domainRecord.tenant.integrationsConfig as any);
this.logger.log(`Config decrypted: ${!!config.twilio}, AccountSid: ${config.twilio?.accountSid?.substring(0, 10)}..., AuthToken: ${config.twilio?.authToken?.substring(0, 10)}..., Phone: ${config.twilio?.phoneNumber}`);
if (!config.twilio?.accountSid || !config.twilio?.authToken) {
throw new Error('Twilio credentials not configured for tenant');
}
const client = Twilio.default(config.twilio.accountSid, config.twilio.authToken);
this.twilioClients.set(tenantIdOrDomain, client);
return { client, config: config.twilio, tenantId: domainRecord.tenant.id };
}
/**
* Decrypt and parse integrations config
*/
private getIntegrationConfig(encryptedConfig: any): IntegrationsConfig {
if (!encryptedConfig) {
return {};
}
// If it's already decrypted (object), return it
if (typeof encryptedConfig === 'object' && encryptedConfig.twilio) {
return encryptedConfig;
}
// If it's encrypted (string), decrypt it
if (typeof encryptedConfig === 'string') {
return this.tenantDbService.decryptIntegrationsConfig(encryptedConfig);
}
return {};
}
/**
* Generate Twilio access token for browser Voice SDK
*/
async generateAccessToken(tenantDomain: string, userId: string): Promise<string> {
const { config, tenantId } = await this.getTwilioClient(tenantDomain);
if (!config.accountSid || !config.apiKey || !config.apiSecret) {
throw new Error('Twilio API credentials not configured. Please add API Key and Secret in Settings > Integrations');
}
// Create an access token
const token = new AccessToken(
config.accountSid,
config.apiKey,
config.apiSecret,
{ identity: userId, ttl: 3600 } // 1 hour expiry
);
// Create a Voice grant
const voiceGrant = new VoiceGrant({
outgoingApplicationSid: config.twimlAppSid, // TwiML App SID for outbound calls
incomingAllow: true, // Allow incoming calls
});
token.addGrant(voiceGrant);
return token.toJwt();
}
/**
* Initiate outbound call
*/
async initiateCall(params: {
tenantId: string;
userId: string;
toNumber: string;
}) {
const { tenantId: tenantDomain, userId, toNumber } = params;
try {
this.logger.log(`=== INITIATING CALL ===`);
this.logger.log(`Domain: ${tenantDomain}, To: ${toNumber}, User: ${userId}`);
// Validate phone number
if (!toNumber.match(/^\+?[1-9]\d{1,14}$/)) {
throw new Error(`Invalid phone number format: ${toNumber}. Use E.164 format (e.g., +1234567890)`);
}
const { client, config, tenantId } = await this.getTwilioClient(tenantDomain);
this.logger.log(`Twilio client obtained for tenant: ${tenantId}`);
// Get from number
const fromNumber = config.phoneNumber;
if (!fromNumber) {
throw new Error('Twilio phone number not configured');
}
this.logger.log(`From number: ${fromNumber}`);
// Construct tenant-specific webhook URLs using HTTPS (for Traefik)
const backendUrl = `https://${tenantDomain}`;
const twimlUrl = `${backendUrl}/api/voice/twiml/outbound?phoneNumber=${encodeURIComponent(fromNumber)}&toNumber=${encodeURIComponent(toNumber)}`;
const statusUrl = `${backendUrl}/api/voice/webhook/status`;
this.logger.log(`TwiML URL: ${twimlUrl}`);
this.logger.log(`Status URL: ${statusUrl}`);
// Create call record in database
const tenantKnex = await this.tenantDbService.getTenantKnexById(tenantId);
const callId = uuidv4();
// Initiate call via Twilio
this.logger.log(`Calling Twilio API...`);
// For Device-to-Number calls, we need to use a TwiML App SID
// The Twilio SDK will handle the Device connection, and we return TwiML with Dial
const call = await client.calls.create({
to: toNumber,
from: fromNumber, // Your Twilio phone number
url: twimlUrl,
statusCallback: statusUrl,
statusCallbackEvent: ['initiated', 'ringing', 'answered', 'completed'],
statusCallbackMethod: 'POST',
record: false,
machineDetection: 'Enable', // Optional: detect answering machines
});
this.logger.log(`Call created successfully: ${call.sid}, Status: ${call.status}`);
// Store call in database
await tenantKnex('calls').insert({
id: callId,
call_sid: call.sid,
direction: 'outbound',
from_number: fromNumber,
to_number: toNumber,
status: 'queued',
user_id: userId,
created_at: tenantKnex.fn.now(),
updated_at: tenantKnex.fn.now(),
});
// Store call state in memory
this.callStates.set(call.sid, {
callId,
callSid: call.sid,
tenantId,
userId,
direction: 'outbound',
status: 'queued',
});
this.logger.log(`Outbound call initiated: ${call.sid}`);
return {
callId,
callSid: call.sid,
status: 'queued',
};
} catch (error) {
this.logger.error('Failed to initiate call', error);
throw error;
}
}
/**
* Accept incoming call
*/
async acceptCall(params: {
callSid: string;
tenantId: string;
userId: string;
}) {
const { callSid, tenantId, userId } = params;
try {
// Note: Twilio doesn't support updating call to 'in-progress' via API
// Call status is managed by TwiML and call flow
// We'll update our database status instead
// Update database
const tenantKnex = await this.tenantDbService.getTenantKnexById(tenantId);
await tenantKnex('calls')
.where({ call_sid: callSid })
.update({
status: 'in-progress',
user_id: userId,
started_at: tenantKnex.fn.now(),
updated_at: tenantKnex.fn.now(),
});
// Update state
const state = this.callStates.get(callSid) || {};
this.callStates.set(callSid, {
...state,
status: 'in-progress',
userId,
});
this.logger.log(`Call accepted: ${callSid} by user ${userId}`);
} catch (error) {
this.logger.error('Failed to accept call', error);
throw error;
}
}
/**
* Reject incoming call
*/
async rejectCall(callSid: string, tenantId: string) {
try {
const { client } = await this.getTwilioClient(tenantId);
// End the call
await client.calls(callSid).update({
status: 'completed',
});
// Update database
const tenantKnex = await this.tenantDbService.getTenantKnexById(tenantId);
await tenantKnex('calls')
.where({ call_sid: callSid })
.update({
status: 'canceled',
updated_at: tenantKnex.fn.now(),
});
// Clean up state
this.callStates.delete(callSid);
this.logger.log(`Call rejected: ${callSid}`);
} catch (error) {
this.logger.error('Failed to reject call', error);
throw error;
}
}
/**
* End active call
*/
async endCall(callSid: string, tenantId: string) {
try {
const { client } = await this.getTwilioClient(tenantId);
// End the call
await client.calls(callSid).update({
status: 'completed',
});
// Clean up OpenAI connection if exists
const openaiWs = this.openaiConnections.get(callSid);
if (openaiWs) {
openaiWs.close();
this.openaiConnections.delete(callSid);
}
// Update database
const tenantKnex = await this.tenantDbService.getTenantKnexById(tenantId);
await tenantKnex('calls')
.where({ call_sid: callSid })
.update({
status: 'completed',
ended_at: tenantKnex.fn.now(),
updated_at: tenantKnex.fn.now(),
});
// Clean up state
this.callStates.delete(callSid);
this.logger.log(`Call ended: ${callSid}`);
} catch (error) {
this.logger.error('Failed to end call', error);
throw error;
}
}
/**
* Send DTMF tones
*/
async sendDtmf(callSid: string, digit: string, tenantId: string) {
try {
const { client } = await this.getTwilioClient(tenantId);
// Twilio doesn't support sending DTMF directly via API
// This would need to be handled via TwiML <Play> of DTMF tones
this.logger.log(`DTMF requested for call ${callSid}: ${digit}`);
// TODO: Implement DTMF sending via TwiML update
} catch (error) {
this.logger.error('Failed to send DTMF', error);
throw error;
}
}
/**
* Get call state
*/
async getCallState(callSid: string, tenantId: string) {
// Try memory first
if (this.callStates.has(callSid)) {
return this.callStates.get(callSid);
}
// Fallback to database
const tenantKnex = await this.tenantDbService.getTenantKnexById(tenantId);
const call = await tenantKnex('calls')
.where({ call_sid: callSid })
.first();
return call || null;
}
/**
* Update call status from webhook
*/
async updateCallStatus(params: {
callSid: string;
tenantId: string;
status: string;
duration?: number;
recordingUrl?: string;
}) {
const { callSid, tenantId, status, duration, recordingUrl } = params;
try {
const tenantKnex = await this.tenantDbService.getTenantKnexById(tenantId);
const updateData: any = {
status,
updated_at: tenantKnex.fn.now(),
};
if (duration !== undefined) {
updateData.duration_seconds = duration;
}
if (recordingUrl) {
updateData.recording_url = recordingUrl;
}
if (status === 'completed') {
updateData.ended_at = tenantKnex.fn.now();
}
await tenantKnex('calls')
.where({ call_sid: callSid })
.update(updateData);
// Update state
const state = this.callStates.get(callSid);
if (state) {
this.callStates.set(callSid, { ...state, status });
}
this.logger.log(`Call status updated: ${callSid} -> ${status}`);
} catch (error) {
this.logger.error('Failed to update call status', error);
throw error;
}
}
/**
* Initialize OpenAI Realtime connection for call
*/
async initializeOpenAIRealtime(params: {
callSid: string;
tenantId: string;
userId: string;
}) {
const { callSid, tenantId, userId } = params;
try {
// Get OpenAI config - tenantId might be a domain, so look it up
const centralPrisma = getCentralPrisma();
// Try to find tenant by domain first (if tenantId is like "tenant1")
let tenant;
if (!tenantId.match(/^[0-9a-f]{8}-[0-9a-f]{4}-/i)) {
// Looks like a domain, not a UUID
const domainRecord = await centralPrisma.domain.findUnique({
where: { domain: tenantId },
include: { tenant: { select: { id: true, integrationsConfig: true } } },
});
tenant = domainRecord?.tenant;
} else {
// It's a UUID
tenant = await centralPrisma.tenant.findUnique({
where: { id: tenantId },
select: { id: true, integrationsConfig: true },
});
}
if (!tenant) {
this.logger.warn(`Tenant not found for identifier: ${tenantId}`);
return;
}
const config = this.getIntegrationConfig(tenant?.integrationsConfig as any);
if (!config.openai?.apiKey) {
this.logger.warn('OpenAI not configured for tenant, skipping AI features');
return;
}
// Connect to OpenAI Realtime API
const model = config.openai.model || 'gpt-4o-realtime-preview-2024-10-01';
const ws = new WebSocket(`wss://api.openai.com/v1/realtime?model=${model}`, {
headers: {
'Authorization': `Bearer ${config.openai.apiKey}`,
'OpenAI-Beta': 'realtime=v1',
},
});
ws.on('open', () => {
this.logger.log(`OpenAI Realtime connected for call ${callSid}`);
// Add to connections map only after it's open
this.openaiConnections.set(callSid, ws);
// Store call state with userId for later use
this.callStates.set(callSid, {
callSid,
tenantId: tenant.id,
userId,
status: 'in-progress',
});
this.logger.log(`📝 Stored call state for ${callSid} with userId: ${userId}`);
// Initialize session
ws.send(JSON.stringify({
type: 'session.update',
session: {
model: config.openai.model || 'gpt-4o-realtime-preview',
voice: config.openai.voice || 'alloy',
instructions: `You are an AI assistant in LISTENING MODE, helping a sales/support agent during their phone call.
IMPORTANT: You are NOT talking to the caller. You are advising the agent who is handling the call.
Your role:
- Listen to the conversation between the agent and the caller
- Provide concise, actionable suggestions to help the agent
- Recommend CRM actions (search contacts, create tasks, update records)
- Alert the agent to important information or next steps
- Keep suggestions brief (1-2 sentences max)
Format your suggestions like:
"💡 Suggestion: [your advice]"
"⚠️ Alert: [important notice]"
"📋 Action: [recommended CRM action]"`,
turn_detection: {
type: 'server_vad',
},
tools: this.getOpenAITools(),
},
}));
});
ws.on('message', (data: Buffer) => {
// Pass the tenant UUID (tenant.id) instead of the domain string
this.handleOpenAIMessage(callSid, tenant.id, userId, JSON.parse(data.toString()));
});
ws.on('error', (error) => {
this.logger.error(`OpenAI WebSocket error for call ${callSid}:`, error);
this.openaiConnections.delete(callSid);
});
ws.on('close', (code, reason) => {
this.logger.log(`OpenAI Realtime disconnected for call ${callSid} - Code: ${code}, Reason: ${reason.toString()}`);
this.openaiConnections.delete(callSid);
});
// Don't add to connections here - wait for 'open' event
} catch (error) {
this.logger.error('Failed to initialize OpenAI Realtime', error);
}
}
/**
* Send audio data to OpenAI Realtime API
*/
async sendAudioToOpenAI(callSid: string, audioBase64: string) {
const ws = this.openaiConnections.get(callSid);
if (!ws) {
this.logger.warn(`No OpenAI connection for call ${callSid}`);
return;
}
try {
// Send audio chunk to OpenAI
ws.send(JSON.stringify({
type: 'input_audio_buffer.append',
audio: audioBase64,
}));
} catch (error) {
this.logger.error(`Failed to send audio to OpenAI for call ${callSid}`, error);
}
}
/**
* Commit audio buffer to OpenAI (trigger processing)
*/
async commitAudioBuffer(callSid: string) {
const ws = this.openaiConnections.get(callSid);
if (!ws) {
return;
}
try {
ws.send(JSON.stringify({
type: 'input_audio_buffer.commit',
}));
} catch (error) {
this.logger.error(`Failed to commit audio buffer for call ${callSid}`, error);
}
}
/**
* Clean up OpenAI connection for a call
*/
async cleanupOpenAIConnection(callSid: string) {
const ws = this.openaiConnections.get(callSid);
if (ws) {
try {
ws.close();
this.openaiConnections.delete(callSid);
this.logger.log(`Cleaned up OpenAI connection for call ${callSid}`);
} catch (error) {
this.logger.error(`Error cleaning up OpenAI connection for call ${callSid}`, error);
}
}
}
/**
* Handle OpenAI Realtime messages
*/
private async handleOpenAIMessage(
callSid: string,
tenantId: string,
userId: string,
message: any,
) {
try {
switch (message.type) {
case 'conversation.item.created':
// Skip logging for now
break;
case 'response.audio.delta':
// OpenAI is sending audio response (skip logging)
const state = this.callStates.get(callSid);
if (state?.streamSid && message.delta) {
if (!state.pendingAudio) {
state.pendingAudio = [];
}
state.pendingAudio.push(message.delta);
}
break;
case 'response.audio.done':
// Skip logging
break;
case 'response.audio_transcript.delta':
// Skip - not transmitting individual words to frontend
break;
case 'response.audio_transcript.done':
// Final transcript - this contains the AI's actual text suggestions!
const transcript = message.transcript;
this.logger.log(`💡 AI Suggestion: "${transcript}"`);
// Save to database
await this.updateCallTranscript(callSid, tenantId, transcript);
// Also send as suggestion to frontend if it looks like a suggestion
if (transcript && transcript.length > 0) {
// Determine suggestion type
let suggestionType: 'response' | 'action' | 'insight' = 'insight';
if (transcript.includes('💡') || transcript.toLowerCase().includes('suggest')) {
suggestionType = 'response';
} else if (transcript.includes('📋') || transcript.toLowerCase().includes('action')) {
suggestionType = 'action';
} else if (transcript.includes('⚠️') || transcript.toLowerCase().includes('alert')) {
suggestionType = 'insight';
}
// Emit to frontend
const state = this.callStates.get(callSid);
this.logger.log(`📊 Call state - userId: ${state?.userId}, gateway: ${!!this.voiceGateway}`);
if (state?.userId && this.voiceGateway) {
this.logger.log(`📤 Sending to user ${state.userId}`);
await this.voiceGateway.notifyAiSuggestion(state.userId, {
type: suggestionType,
text: transcript,
callSid,
timestamp: new Date().toISOString(),
});
this.logger.log(`✅ Suggestion sent to agent`);
} else {
this.logger.warn(`❌ Cannot send - userId: ${state?.userId}, gateway: ${!!this.voiceGateway}, callStates has ${this.callStates.size} entries`);
}
}
break;
case 'response.function_call_arguments.done':
// Tool call completed
await this.handleToolCall(callSid, tenantId, userId, message);
break;
case 'session.created':
case 'session.updated':
case 'response.created':
case 'response.output_item.added':
case 'response.content_part.added':
case 'response.content_part.done':
case 'response.output_item.done':
case 'response.done':
case 'input_audio_buffer.speech_started':
case 'input_audio_buffer.speech_stopped':
case 'input_audio_buffer.committed':
// Skip logging for these (too noisy)
break;
case 'error':
this.logger.error(`OpenAI error for call ${callSid}: ${JSON.stringify(message.error)}`);
break;
default:
// Only log unhandled types occasionally
break;
}
} catch (error) {
this.logger.error('Failed to handle OpenAI message', error);
}
}
/**
* Define OpenAI tools for CRM actions
*/
private getOpenAITools(): any[] {
return [
{
type: 'function',
name: 'search_contact',
description: 'Search for a contact by name, email, or phone number',
parameters: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search query (name, email, or phone)',
},
},
required: ['query'],
},
},
{
type: 'function',
name: 'create_task',
description: 'Create a follow-up task based on the call',
parameters: {
type: 'object',
properties: {
title: {
type: 'string',
description: 'Task title',
},
description: {
type: 'string',
description: 'Task description',
},
dueDate: {
type: 'string',
description: 'Due date (ISO format)',
},
},
required: ['title'],
},
},
{
type: 'function',
name: 'update_contact',
description: 'Update contact information',
parameters: {
type: 'object',
properties: {
contactId: {
type: 'string',
description: 'Contact ID',
},
fields: {
type: 'object',
description: 'Fields to update',
},
},
required: ['contactId', 'fields'],
},
},
];
}
/**
* Handle tool calls from OpenAI
*/
private async handleToolCall(
callSid: string,
tenantId: string,
userId: string,
message: any,
) {
// TODO: Implement actual tool execution
// This would call the appropriate services based on the tool name
// Respecting RBAC permissions for the user
this.logger.log(`Tool call for call ${callSid}: ${message.name}`);
}
/**
* Update call transcript
*/
private async updateCallTranscript(
callSid: string,
tenantId: string,
transcript: string,
) {
try {
const tenantKnex = await this.tenantDbService.getTenantKnexById(tenantId);
await tenantKnex('calls')
.where({ call_sid: callSid })
.update({
ai_transcript: transcript,
updated_at: tenantKnex.fn.now(),
});
} catch (error) {
this.logger.error('Failed to update transcript', error);
}
}
/**
* Get call history for user
*/
async getCallHistory(tenantId: string, userId: string, limit = 50) {
try {
const tenantKnex = await this.tenantDbService.getTenantKnexById(tenantId);
const calls = await tenantKnex('calls')
.where({ user_id: userId })
.orderBy('created_at', 'desc')
.limit(limit);
return calls;
} catch (error) {
this.logger.error('Failed to get call history', error);
throw error;
}
}
}