WIP - enable embedings
This commit is contained in:
@@ -1,6 +1,8 @@
|
|||||||
import { Injectable, Logger } from '@nestjs/common';
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
import { TenantDatabaseService } from '../../tenant/tenant-database.service';
|
import { TenantDatabaseService } from '../../tenant/tenant-database.service';
|
||||||
import { MeilisearchService } from '../../search/meilisearch.service';
|
import { MeilisearchService } from '../../search/meilisearch.service';
|
||||||
|
import { getCentralPrisma } from '../../prisma/central-prisma.service';
|
||||||
|
import { OpenAIConfig } from '../../voice/interfaces/integration-config.interface';
|
||||||
import {
|
import {
|
||||||
DefaultSemanticProjectionAdapter,
|
DefaultSemanticProjectionAdapter,
|
||||||
SemanticProjectionAdapter,
|
SemanticProjectionAdapter,
|
||||||
@@ -12,6 +14,9 @@ import { SemanticLinkService } from './semantic-link.service';
|
|||||||
export class SemanticOrchestratorService {
|
export class SemanticOrchestratorService {
|
||||||
private readonly logger = new Logger(SemanticOrchestratorService.name);
|
private readonly logger = new Logger(SemanticOrchestratorService.name);
|
||||||
private readonly adapters: SemanticProjectionAdapter[] = [new DefaultSemanticProjectionAdapter()];
|
private readonly adapters: SemanticProjectionAdapter[] = [new DefaultSemanticProjectionAdapter()];
|
||||||
|
private readonly defaultEmbeddingModel =
|
||||||
|
process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small';
|
||||||
|
private readonly semanticEmbedderName = 'semantic-openai';
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private readonly tenantDbService: TenantDatabaseService,
|
private readonly tenantDbService: TenantDatabaseService,
|
||||||
@@ -61,8 +66,9 @@ export class SemanticOrchestratorService {
|
|||||||
const chunks = this.chunkerService.chunkText(projection.narrative, comments);
|
const chunks = this.chunkerService.chunkText(projection.narrative, comments);
|
||||||
await this.replaceChunks(knex, documentId, chunks);
|
await this.replaceChunks(knex, documentId, chunks);
|
||||||
|
|
||||||
await this.indexChunks(resolvedTenantId, projection, chunks);
|
const openAiConfig = await this.getOpenAiConfig(resolvedTenantId);
|
||||||
await this.generateSuggestions(resolvedTenantId, projection, chunks, userId, trigger);
|
await this.indexChunks(resolvedTenantId, projection, chunks, openAiConfig);
|
||||||
|
await this.generateSuggestions(resolvedTenantId, projection, chunks, openAiConfig, userId, trigger);
|
||||||
|
|
||||||
return { documentId, chunkCount: chunks.length };
|
return { documentId, chunkCount: chunks.length };
|
||||||
}
|
}
|
||||||
@@ -139,12 +145,25 @@ export class SemanticOrchestratorService {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private async indexChunks(tenantId: string, projection: any, chunks: any[]) {
|
private async indexChunks(
|
||||||
|
tenantId: string,
|
||||||
|
projection: any,
|
||||||
|
chunks: any[],
|
||||||
|
openAiConfig: OpenAIConfig | null,
|
||||||
|
) {
|
||||||
if (!this.meilisearchService.isEnabled()) {
|
if (!this.meilisearchService.isEnabled()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId);
|
const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId);
|
||||||
|
if (openAiConfig?.apiKey) {
|
||||||
|
await this.meilisearchService.ensureOpenAiEmbedder(indexName, {
|
||||||
|
embedderName: this.semanticEmbedderName,
|
||||||
|
apiKey: openAiConfig.apiKey,
|
||||||
|
model: openAiConfig.embeddingModel || this.defaultEmbeddingModel,
|
||||||
|
documentTemplate: '{{doc.title}}\n{{doc.text}}',
|
||||||
|
});
|
||||||
|
}
|
||||||
await this.meilisearchService.upsertDocuments(indexName, chunks.map((chunk) => ({
|
await this.meilisearchService.upsertDocuments(indexName, chunks.map((chunk) => ({
|
||||||
id: `${projection.entityType}:${projection.entityId}:${chunk.chunkIndex}`,
|
id: `${projection.entityType}:${projection.entityId}:${chunk.chunkIndex}`,
|
||||||
entityType: projection.entityType,
|
entityType: projection.entityType,
|
||||||
@@ -160,6 +179,7 @@ export class SemanticOrchestratorService {
|
|||||||
tenantId: string,
|
tenantId: string,
|
||||||
projection: any,
|
projection: any,
|
||||||
chunks: any[],
|
chunks: any[],
|
||||||
|
openAiConfig: OpenAIConfig | null,
|
||||||
userId?: string,
|
userId?: string,
|
||||||
trigger: string = 'semantic_refresh',
|
trigger: string = 'semantic_refresh',
|
||||||
) {
|
) {
|
||||||
@@ -169,7 +189,12 @@ export class SemanticOrchestratorService {
|
|||||||
|
|
||||||
const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId);
|
const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId);
|
||||||
const queryText = chunks.slice(0, 3).map((chunk) => chunk.text).join(' ').slice(0, 1200);
|
const queryText = chunks.slice(0, 3).map((chunk) => chunk.text).join(' ').slice(0, 1200);
|
||||||
const search = await this.meilisearchService.searchIndex(indexName, queryText, 20);
|
const search = await this.meilisearchService.searchIndex(
|
||||||
|
indexName,
|
||||||
|
queryText,
|
||||||
|
20,
|
||||||
|
openAiConfig?.apiKey ? { embedder: this.semanticEmbedderName } : undefined,
|
||||||
|
);
|
||||||
|
|
||||||
const grouped = new Map<string, any[]>();
|
const grouped = new Map<string, any[]>();
|
||||||
for (const hit of search.hits || []) {
|
for (const hit of search.hits || []) {
|
||||||
@@ -222,4 +247,38 @@ export class SemanticOrchestratorService {
|
|||||||
|
|
||||||
return `${objectDefinition.apiName.toLowerCase()}s`;
|
return `${objectDefinition.apiName.toLowerCase()}s`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async getOpenAiConfig(tenantId: string): Promise<OpenAIConfig | null> {
|
||||||
|
const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
|
||||||
|
const centralPrisma = getCentralPrisma();
|
||||||
|
const tenant = await centralPrisma.tenant.findUnique({
|
||||||
|
where: { id: resolvedTenantId },
|
||||||
|
select: { integrationsConfig: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
let config = tenant?.integrationsConfig
|
||||||
|
? typeof tenant.integrationsConfig === 'string'
|
||||||
|
? this.tenantDbService.decryptIntegrationsConfig(tenant.integrationsConfig)
|
||||||
|
: tenant.integrationsConfig
|
||||||
|
: null;
|
||||||
|
|
||||||
|
if (!config?.openai && process.env.OPENAI_API_KEY) {
|
||||||
|
config = {
|
||||||
|
...(config || {}),
|
||||||
|
openai: {
|
||||||
|
apiKey: process.env.OPENAI_API_KEY,
|
||||||
|
embeddingModel: this.defaultEmbeddingModel,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config?.openai?.apiKey) {
|
||||||
|
return {
|
||||||
|
apiKey: config.openai.apiKey,
|
||||||
|
embeddingModel: config.openai.embeddingModel || this.defaultEmbeddingModel,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,9 +8,22 @@ type MeiliConfig = {
|
|||||||
indexPrefix: string;
|
indexPrefix: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
type HybridSearchOptions = {
|
||||||
|
embedder: string;
|
||||||
|
semanticRatio?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type OpenAiEmbedderConfig = {
|
||||||
|
embedderName: string;
|
||||||
|
apiKey: string;
|
||||||
|
model: string;
|
||||||
|
documentTemplate: string;
|
||||||
|
};
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class MeilisearchService {
|
export class MeilisearchService {
|
||||||
private readonly logger = new Logger(MeilisearchService.name);
|
private readonly logger = new Logger(MeilisearchService.name);
|
||||||
|
private readonly embedderCache = new Map<string, string>();
|
||||||
|
|
||||||
isEnabled(): boolean {
|
isEnabled(): boolean {
|
||||||
return Boolean(this.getConfig());
|
return Boolean(this.getConfig());
|
||||||
@@ -183,6 +196,7 @@ export class MeilisearchService {
|
|||||||
indexName: string,
|
indexName: string,
|
||||||
query: string,
|
query: string,
|
||||||
limit = 20,
|
limit = 20,
|
||||||
|
hybrid?: HybridSearchOptions,
|
||||||
): Promise<{ hits: any[]; total: number }> {
|
): Promise<{ hits: any[]; total: number }> {
|
||||||
const config = this.getConfig();
|
const config = this.getConfig();
|
||||||
if (!config) return { hits: [], total: 0 };
|
if (!config) return { hits: [], total: 0 };
|
||||||
@@ -192,7 +206,11 @@ export class MeilisearchService {
|
|||||||
const response = await this.requestJson(
|
const response = await this.requestJson(
|
||||||
'POST',
|
'POST',
|
||||||
url,
|
url,
|
||||||
{ q: query, limit },
|
{
|
||||||
|
q: query,
|
||||||
|
limit,
|
||||||
|
...(hybrid ? { hybrid } : {}),
|
||||||
|
},
|
||||||
this.buildHeaders(config),
|
this.buildHeaders(config),
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -250,7 +268,7 @@ export class MeilisearchService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private requestJson(
|
private requestJson(
|
||||||
method: 'POST' | 'DELETE',
|
method: 'POST' | 'DELETE' | 'PATCH',
|
||||||
url: string,
|
url: string,
|
||||||
payload: any,
|
payload: any,
|
||||||
headers: Record<string, string>,
|
headers: Record<string, string>,
|
||||||
@@ -293,4 +311,49 @@ export class MeilisearchService {
|
|||||||
request.end();
|
request.end();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async ensureOpenAiEmbedder(
|
||||||
|
indexName: string,
|
||||||
|
config: OpenAiEmbedderConfig,
|
||||||
|
): Promise<void> {
|
||||||
|
const meiliConfig = this.getConfig();
|
||||||
|
if (!meiliConfig || !config?.apiKey) return;
|
||||||
|
|
||||||
|
const signature = JSON.stringify({
|
||||||
|
embedderName: config.embedderName,
|
||||||
|
model: config.model,
|
||||||
|
documentTemplate: config.documentTemplate,
|
||||||
|
apiKey: config.apiKey,
|
||||||
|
});
|
||||||
|
const cacheKey = `${indexName}:${config.embedderName}`;
|
||||||
|
if (this.embedderCache.get(cacheKey) === signature) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const url = `${meiliConfig.host}/indexes/${encodeURIComponent(indexName)}/settings/embedders`;
|
||||||
|
try {
|
||||||
|
const response = await this.requestJson(
|
||||||
|
'PATCH',
|
||||||
|
url,
|
||||||
|
{
|
||||||
|
[config.embedderName]: {
|
||||||
|
source: 'openAi',
|
||||||
|
model: config.model,
|
||||||
|
apiKey: config.apiKey,
|
||||||
|
documentTemplate: config.documentTemplate,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
this.buildHeaders(meiliConfig),
|
||||||
|
);
|
||||||
|
if (!this.isSuccessStatus(response.status)) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Meilisearch embedder update failed for index ${indexName}: ${response.status}`,
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.embedderCache.set(cacheKey, signature);
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.warn(`Meilisearch embedder update failed: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ export interface OpenAIConfig {
|
|||||||
apiKey: string;
|
apiKey: string;
|
||||||
assistantId?: string;
|
assistantId?: string;
|
||||||
model?: string;
|
model?: string;
|
||||||
|
embeddingModel?: string;
|
||||||
voice?: string;
|
voice?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user