Add twilio softphone with integrated AI assistant

This commit is contained in:
Francisco Gaona
2026-01-04 08:48:43 +01:00
parent 16907aadf8
commit 5f3fcef1ec
42 changed files with 5689 additions and 923 deletions

View File

@@ -0,0 +1,214 @@
import { Injectable, Logger } from '@nestjs/common';
/**
* Audio format converter for Twilio <-> OpenAI audio streaming
*
* Twilio Media Streams format:
* - Codec: μ-law (G.711)
* - Sample rate: 8kHz
* - Encoding: base64
* - Chunk size: 20ms (160 bytes)
*
* OpenAI Realtime API format:
* - Codec: PCM16
* - Sample rate: 24kHz
* - Encoding: base64
* - Mono channel
*/
@Injectable()
export class AudioConverterService {
private readonly logger = new Logger(AudioConverterService.name);
// μ-law decode lookup table
private readonly MULAW_DECODE_TABLE = this.buildMuLawDecodeTable();
// μ-law encode lookup table
private readonly MULAW_ENCODE_TABLE = this.buildMuLawEncodeTable();
/**
* Build μ-law to linear PCM16 decode table
*/
private buildMuLawDecodeTable(): Int16Array {
const table = new Int16Array(256);
for (let i = 0; i < 256; i++) {
const mulaw = ~i;
const exponent = (mulaw >> 4) & 0x07;
const mantissa = mulaw & 0x0f;
let sample = (mantissa << 3) + 0x84;
sample <<= exponent;
sample -= 0x84;
if ((mulaw & 0x80) === 0) {
sample = -sample;
}
table[i] = sample;
}
return table;
}
/**
* Build linear PCM16 to μ-law encode table
*/
private buildMuLawEncodeTable(): Uint8Array {
const table = new Uint8Array(65536);
for (let i = 0; i < 65536; i++) {
const sample = (i - 32768);
const sign = sample < 0 ? 0x80 : 0x00;
const magnitude = Math.abs(sample);
// Add bias
let biased = magnitude + 0x84;
// Find exponent
let exponent = 7;
for (let exp = 0; exp < 8; exp++) {
if (biased <= (0xff << exp)) {
exponent = exp;
break;
}
}
// Extract mantissa
const mantissa = (biased >> (exponent + 3)) & 0x0f;
// Combine sign, exponent, mantissa
const mulaw = ~(sign | (exponent << 4) | mantissa);
table[i] = mulaw & 0xff;
}
return table;
}
/**
* Decode μ-law audio to linear PCM16
* @param mulawData - Buffer containing μ-law encoded audio
* @returns Buffer containing PCM16 audio (16-bit little-endian)
*/
decodeMuLaw(mulawData: Buffer): Buffer {
const pcm16 = Buffer.allocUnsafe(mulawData.length * 2);
for (let i = 0; i < mulawData.length; i++) {
const sample = this.MULAW_DECODE_TABLE[mulawData[i]];
pcm16.writeInt16LE(sample, i * 2);
}
return pcm16;
}
/**
* Encode linear PCM16 to μ-law
* @param pcm16Data - Buffer containing PCM16 audio (16-bit little-endian)
* @returns Buffer containing μ-law encoded audio
*/
encodeMuLaw(pcm16Data: Buffer): Buffer {
const mulaw = Buffer.allocUnsafe(pcm16Data.length / 2);
for (let i = 0; i < pcm16Data.length; i += 2) {
const sample = pcm16Data.readInt16LE(i);
const index = (sample + 32768) & 0xffff;
mulaw[i / 2] = this.MULAW_ENCODE_TABLE[index];
}
return mulaw;
}
/**
* Resample audio from 8kHz to 24kHz (linear interpolation)
* @param pcm16Data - Buffer containing 8kHz PCM16 audio
* @returns Buffer containing 24kHz PCM16 audio
*/
resample8kTo24k(pcm16Data: Buffer): Buffer {
const inputSamples = pcm16Data.length / 2;
const outputSamples = Math.floor(inputSamples * 3); // 8k * 3 = 24k
const output = Buffer.allocUnsafe(outputSamples * 2);
for (let i = 0; i < outputSamples; i++) {
const srcIndex = i / 3;
const srcIndexFloor = Math.floor(srcIndex);
const srcIndexCeil = Math.min(srcIndexFloor + 1, inputSamples - 1);
const fraction = srcIndex - srcIndexFloor;
const sample1 = pcm16Data.readInt16LE(srcIndexFloor * 2);
const sample2 = pcm16Data.readInt16LE(srcIndexCeil * 2);
// Linear interpolation
const interpolated = Math.round(sample1 + (sample2 - sample1) * fraction);
output.writeInt16LE(interpolated, i * 2);
}
return output;
}
/**
* Resample audio from 24kHz to 8kHz (decimation with averaging)
* @param pcm16Data - Buffer containing 24kHz PCM16 audio
* @returns Buffer containing 8kHz PCM16 audio
*/
resample24kTo8k(pcm16Data: Buffer): Buffer {
const inputSamples = pcm16Data.length / 2;
const outputSamples = Math.floor(inputSamples / 3); // 24k / 3 = 8k
const output = Buffer.allocUnsafe(outputSamples * 2);
for (let i = 0; i < outputSamples; i++) {
// Average 3 samples for anti-aliasing
const idx1 = Math.min(i * 3, inputSamples - 1);
const idx2 = Math.min(i * 3 + 1, inputSamples - 1);
const idx3 = Math.min(i * 3 + 2, inputSamples - 1);
const sample1 = pcm16Data.readInt16LE(idx1 * 2);
const sample2 = pcm16Data.readInt16LE(idx2 * 2);
const sample3 = pcm16Data.readInt16LE(idx3 * 2);
const averaged = Math.round((sample1 + sample2 + sample3) / 3);
output.writeInt16LE(averaged, i * 2);
}
return output;
}
/**
* Convert Twilio μ-law 8kHz to OpenAI PCM16 24kHz
* @param twilioBase64 - Base64-encoded μ-law audio from Twilio
* @returns Base64-encoded PCM16 24kHz audio for OpenAI
*/
twilioToOpenAI(twilioBase64: string): string {
try {
// Decode base64
const mulawBuffer = Buffer.from(twilioBase64, 'base64');
// μ-law -> PCM16
const pcm16_8k = this.decodeMuLaw(mulawBuffer);
// 8kHz -> 24kHz
const pcm16_24k = this.resample8kTo24k(pcm16_8k);
// Encode to base64
return pcm16_24k.toString('base64');
} catch (error) {
this.logger.error('Error converting Twilio to OpenAI audio', error);
throw error;
}
}
/**
* Convert OpenAI PCM16 24kHz to Twilio μ-law 8kHz
* @param openaiBase64 - Base64-encoded PCM16 24kHz audio from OpenAI
* @returns Base64-encoded μ-law 8kHz audio for Twilio
*/
openAIToTwilio(openaiBase64: string): string {
try {
// Decode base64
const pcm16_24k = Buffer.from(openaiBase64, 'base64');
// 24kHz -> 8kHz
const pcm16_8k = this.resample24kTo8k(pcm16_24k);
// PCM16 -> μ-law
const mulawBuffer = this.encodeMuLaw(pcm16_8k);
// Encode to base64
return mulawBuffer.toString('base64');
} catch (error) {
this.logger.error('Error converting OpenAI to Twilio audio', error);
throw error;
}
}
}