WIP - some progress with semantic linking but still needs a lot of work
This commit is contained in:
@@ -10,6 +10,8 @@ export type SemanticProjection = {
|
||||
entityId: string;
|
||||
title: string;
|
||||
narrative: string;
|
||||
/** Plain text used for embedding — no 'key: value' labels, no comments (chunker handles those separately). */
|
||||
embeddingNarrative: string;
|
||||
metadata: Record<string, any>;
|
||||
sourceSummary: {
|
||||
includedFieldCount: number;
|
||||
@@ -61,11 +63,17 @@ export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapt
|
||||
|
||||
const narrative = [fieldNarrative, commentNarrative].filter(Boolean).join('\n\n');
|
||||
|
||||
// Plain values only — no 'key:' prefixes, no comments (chunker adds those separately).
|
||||
const embeddingNarrative = fieldEntries
|
||||
.map(([, value]) => String(value))
|
||||
.join('\n');
|
||||
|
||||
return {
|
||||
entityType: input.objectApiName,
|
||||
entityId: input.record.id,
|
||||
title,
|
||||
narrative,
|
||||
embeddingNarrative,
|
||||
metadata: {
|
||||
objectApiName: input.objectApiName,
|
||||
hasComments: (input.comments || []).length > 0,
|
||||
|
||||
@@ -33,6 +33,9 @@ export class CommentService {
|
||||
})
|
||||
.returning('*');
|
||||
|
||||
console.log(
|
||||
`[Knowledge] Comment created: ${dto.parentObjectApiName}:${dto.parentRecordId} by ${userId}`,
|
||||
);
|
||||
await this.semanticOrchestratorService.refreshRecord(
|
||||
tenantId,
|
||||
dto.parentObjectApiName,
|
||||
@@ -63,6 +66,9 @@ export class CommentService {
|
||||
updated_at: knex.fn.now(),
|
||||
});
|
||||
|
||||
console.log(
|
||||
`[Knowledge] Comment updated: ${existing.parent_object_api_name}:${existing.parent_record_id} by ${userId}`,
|
||||
);
|
||||
await this.semanticOrchestratorService.refreshRecord(
|
||||
tenantId,
|
||||
existing.parent_object_api_name,
|
||||
@@ -88,6 +94,9 @@ export class CommentService {
|
||||
|
||||
await knex('comments').where({ id: commentId }).delete();
|
||||
|
||||
console.log(
|
||||
`[Knowledge] Comment deleted: ${existing.parent_object_api_name}:${existing.parent_record_id} by ${userId}`,
|
||||
);
|
||||
await this.semanticOrchestratorService.refreshRecord(
|
||||
tenantId,
|
||||
existing.parent_object_api_name,
|
||||
|
||||
@@ -51,7 +51,10 @@ export class SemanticLinkService {
|
||||
);
|
||||
|
||||
const payload = {
|
||||
...normalized,
|
||||
source_entity_type: normalized.sourceEntityType,
|
||||
source_entity_id: normalized.sourceEntityId,
|
||||
target_entity_type: normalized.targetEntityType,
|
||||
target_entity_id: normalized.targetEntityId,
|
||||
link_type: input.linkType || 'related_to',
|
||||
status: input.status || 'suggested',
|
||||
origin: input.origin || 'semantic',
|
||||
|
||||
@@ -3,6 +3,7 @@ import { TenantDatabaseService } from '../../tenant/tenant-database.service';
|
||||
import { MeilisearchService } from '../../search/meilisearch.service';
|
||||
import { getCentralPrisma } from '../../prisma/central-prisma.service';
|
||||
import { OpenAIConfig } from '../../voice/interfaces/integration-config.interface';
|
||||
import { randomUUID } from 'crypto';
|
||||
import {
|
||||
DefaultSemanticProjectionAdapter,
|
||||
SemanticProjectionAdapter,
|
||||
@@ -16,7 +17,7 @@ export class SemanticOrchestratorService {
|
||||
private readonly adapters: SemanticProjectionAdapter[] = [new DefaultSemanticProjectionAdapter()];
|
||||
private readonly defaultEmbeddingModel =
|
||||
process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small';
|
||||
private readonly semanticEmbedderName = 'semantic-openai';
|
||||
private readonly semanticEmbedderName = 'default';
|
||||
|
||||
constructor(
|
||||
private readonly tenantDbService: TenantDatabaseService,
|
||||
@@ -32,6 +33,9 @@ export class SemanticOrchestratorService {
|
||||
userId?: string,
|
||||
trigger: string = 'manual',
|
||||
) {
|
||||
this.logger.log(
|
||||
`Semantic refresh start: ${objectApiName}:${recordId} (trigger=${trigger})`,
|
||||
);
|
||||
const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
|
||||
const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);
|
||||
|
||||
@@ -44,6 +48,7 @@ export class SemanticOrchestratorService {
|
||||
const tableName = this.getTableName(objectDefinition);
|
||||
const record = await knex(tableName).where({ id: recordId }).first();
|
||||
if (!record) {
|
||||
this.logger.warn(`Record not found for semantic refresh: ${objectApiName}:${recordId}`);
|
||||
return { skipped: true };
|
||||
}
|
||||
|
||||
@@ -53,6 +58,9 @@ export class SemanticOrchestratorService {
|
||||
parent_record_id: recordId,
|
||||
})
|
||||
.orderBy('created_at', 'asc');
|
||||
this.logger.log(
|
||||
`Semantic refresh source: ${objectApiName}:${recordId} comments=${comments.length}`,
|
||||
);
|
||||
|
||||
const adapter = this.adapters.find((candidate) => candidate.supports(objectApiName))!;
|
||||
const projection = adapter.buildProjection({
|
||||
@@ -63,13 +71,30 @@ export class SemanticOrchestratorService {
|
||||
});
|
||||
|
||||
const documentId = await this.upsertSemanticDocument(knex, projection);
|
||||
const chunks = this.chunkerService.chunkText(projection.narrative, comments);
|
||||
// Use embeddingNarrative (plain values, no labels) so lexical noise from 'key:'
|
||||
// prefixes doesn't inflate match scores. Comments are passed separately so they
|
||||
// are not double-counted (narrative already embeds them with 'Comment N:' prefix).
|
||||
const chunks = this.chunkerService.chunkText(projection.embeddingNarrative, comments);
|
||||
this.logger.log(
|
||||
`Semantic refresh chunking: ${objectApiName}:${recordId} chunks=${chunks.length}`,
|
||||
);
|
||||
await this.replaceChunks(knex, documentId, chunks);
|
||||
|
||||
const openAiConfig = await this.getOpenAiConfig(resolvedTenantId);
|
||||
await this.indexChunks(resolvedTenantId, projection, chunks, openAiConfig);
|
||||
await this.generateSuggestions(resolvedTenantId, projection, chunks, openAiConfig, userId, trigger);
|
||||
const embedderReady = await this.indexChunks(resolvedTenantId, projection, chunks, openAiConfig);
|
||||
await this.generateSuggestions(
|
||||
resolvedTenantId,
|
||||
projection,
|
||||
chunks,
|
||||
openAiConfig,
|
||||
embedderReady,
|
||||
userId,
|
||||
trigger,
|
||||
);
|
||||
|
||||
this.logger.log(
|
||||
`Semantic refresh complete: ${objectApiName}:${recordId} document=${documentId}`,
|
||||
);
|
||||
return { documentId, chunkCount: chunks.length };
|
||||
}
|
||||
|
||||
@@ -111,8 +136,10 @@ export class SemanticOrchestratorService {
|
||||
return existing.id;
|
||||
}
|
||||
|
||||
const newId = randomUUID();
|
||||
const [created] = await knex('semantic_documents')
|
||||
.insert({
|
||||
id: newId,
|
||||
entity_type: projection.entityType,
|
||||
entity_id: projection.entityId,
|
||||
title: projection.title,
|
||||
@@ -124,10 +151,18 @@ export class SemanticOrchestratorService {
|
||||
})
|
||||
.returning('id');
|
||||
|
||||
return typeof created === 'string' ? created : created.id;
|
||||
if (created && typeof created === 'object' && created.id) {
|
||||
return created.id;
|
||||
}
|
||||
// MySQL may return a numeric insert id (often 0 for UUID PKs). Always trust the generated UUID.
|
||||
return newId;
|
||||
}
|
||||
|
||||
private async replaceChunks(knex: any, documentId: string, chunks: any[]) {
|
||||
if (!documentId) {
|
||||
this.logger.warn('Skipping chunk replace: missing semantic document id.');
|
||||
return;
|
||||
}
|
||||
await knex('semantic_chunks').where({ semantic_document_id: documentId }).delete();
|
||||
if (!chunks.length) return;
|
||||
|
||||
@@ -152,20 +187,28 @@ export class SemanticOrchestratorService {
|
||||
openAiConfig: OpenAIConfig | null,
|
||||
) {
|
||||
if (!this.meilisearchService.isEnabled()) {
|
||||
return;
|
||||
this.logger.warn('Meilisearch disabled; skipping semantic chunk indexing.');
|
||||
return false;
|
||||
}
|
||||
|
||||
const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId);
|
||||
let embedderReady = false;
|
||||
if (openAiConfig?.apiKey) {
|
||||
await this.meilisearchService.ensureOpenAiEmbedder(indexName, {
|
||||
embedderReady = await this.meilisearchService.ensureOpenAiEmbedder(indexName, {
|
||||
embedderName: this.semanticEmbedderName,
|
||||
apiKey: openAiConfig.apiKey,
|
||||
model: openAiConfig.embeddingModel || this.defaultEmbeddingModel,
|
||||
documentTemplate: '{{doc.title}}\n{{doc.text}}',
|
||||
});
|
||||
this.logger.log(
|
||||
`Meilisearch embedder ensured: index=${indexName} model=${openAiConfig.embeddingModel || this.defaultEmbeddingModel}`,
|
||||
);
|
||||
} else {
|
||||
this.logger.warn('OpenAI embedder not configured; semantic search will be lexical only.');
|
||||
}
|
||||
this.logger.log(`Indexing semantic chunks: index=${indexName} count=${chunks.length}`);
|
||||
await this.meilisearchService.upsertDocuments(indexName, chunks.map((chunk) => ({
|
||||
id: `${projection.entityType}:${projection.entityId}:${chunk.chunkIndex}`,
|
||||
id: `${projection.entityType}_${projection.entityId}_${chunk.chunkIndex}`,
|
||||
entityType: projection.entityType,
|
||||
entityId: projection.entityId,
|
||||
title: projection.title,
|
||||
@@ -173,6 +216,7 @@ export class SemanticOrchestratorService {
|
||||
sourceRefId: chunk.sourceRefId,
|
||||
text: chunk.text,
|
||||
})));
|
||||
return embedderReady;
|
||||
}
|
||||
|
||||
private async generateSuggestions(
|
||||
@@ -180,20 +224,32 @@ export class SemanticOrchestratorService {
|
||||
projection: any,
|
||||
chunks: any[],
|
||||
openAiConfig: OpenAIConfig | null,
|
||||
embedderReady: boolean,
|
||||
userId?: string,
|
||||
trigger: string = 'semantic_refresh',
|
||||
) {
|
||||
if (!this.meilisearchService.isEnabled() || !chunks.length) {
|
||||
this.logger.warn(
|
||||
`Skipping suggestion generation: meili=${this.meilisearchService.isEnabled()} chunks=${chunks.length}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId);
|
||||
const queryText = chunks.slice(0, 3).map((chunk) => chunk.text).join(' ').slice(0, 1200);
|
||||
this.logger.log(
|
||||
`Generating suggestions: index=${indexName} queryLen=${queryText.length} hybrid=${embedderReady}`,
|
||||
);
|
||||
const search = await this.meilisearchService.searchIndex(
|
||||
indexName,
|
||||
queryText,
|
||||
20,
|
||||
openAiConfig?.apiKey ? { embedder: this.semanticEmbedderName } : undefined,
|
||||
// semanticRatio:1.0 = pure vector search, no lexical component that would
|
||||
// match on shared tokens like 'name:' or 'Comment 1:' across all records.
|
||||
embedderReady ? { embedder: this.semanticEmbedderName, semanticRatio: 1.0 } : undefined,
|
||||
);
|
||||
this.logger.log(
|
||||
`Meilisearch results: index=${indexName} hits=${search.hits?.length || 0} total=${search.total}`,
|
||||
);
|
||||
|
||||
const grouped = new Map<string, any[]>();
|
||||
@@ -201,6 +257,10 @@ export class SemanticOrchestratorService {
|
||||
if (hit.entityType === projection.entityType && hit.entityId === projection.entityId) {
|
||||
continue;
|
||||
}
|
||||
// Skip self-links where source and target resolve to the same entity
|
||||
if (hit.entityId === projection.entityId) {
|
||||
continue;
|
||||
}
|
||||
const key = `${hit.entityType}:${hit.entityId}`;
|
||||
if (!grouped.has(key)) grouped.set(key, []);
|
||||
grouped.get(key).push(hit);
|
||||
|
||||
@@ -24,6 +24,7 @@ type OpenAiEmbedderConfig = {
|
||||
export class MeilisearchService {
|
||||
private readonly logger = new Logger(MeilisearchService.name);
|
||||
private readonly embedderCache = new Map<string, string>();
|
||||
private vectorStoreEnabled = false;
|
||||
|
||||
isEnabled(): boolean {
|
||||
return Boolean(this.getConfig());
|
||||
@@ -186,6 +187,16 @@ export class MeilisearchService {
|
||||
const response = await this.requestJson('POST', url, documents, this.buildHeaders(config));
|
||||
if (!this.isSuccessStatus(response.status)) {
|
||||
this.logger.warn(`Meilisearch document upsert failed for index ${indexName}: ${response.status}`);
|
||||
return;
|
||||
}
|
||||
// Meilisearch indexes (and embeds) documents asynchronously. Wait for the task
|
||||
// to complete so callers can immediately search and see the new documents.
|
||||
const taskUid = response.body?.taskUid ?? response.body?.uid;
|
||||
if (Number.isFinite(Number(taskUid))) {
|
||||
const succeeded = await this.waitForTask(config, Number(taskUid), 30000);
|
||||
if (!succeeded) {
|
||||
this.logger.warn(`Meilisearch indexing task did not succeed within timeout: taskUid=${taskUid} index=${indexName}`);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.warn(`Meilisearch document upsert failed: ${error.message}`);
|
||||
@@ -215,7 +226,33 @@ export class MeilisearchService {
|
||||
);
|
||||
|
||||
if (!this.isSuccessStatus(response.status)) {
|
||||
this.logger.warn(`Meilisearch search failed for index ${indexName}: ${response.status}`);
|
||||
this.logger.warn(
|
||||
`Meilisearch search failed for index ${indexName}: ${response.status}`,
|
||||
);
|
||||
this.logger.warn(
|
||||
`Meilisearch search payload: ${JSON.stringify({ q: query, limit, hybrid })}`,
|
||||
);
|
||||
this.logger.warn(
|
||||
`Meilisearch search error body: ${JSON.stringify(response.body)}`,
|
||||
);
|
||||
// If hybrid is invalid (embedder missing), retry once without hybrid
|
||||
if (hybrid && response.body?.code === 'invalid_embedder') {
|
||||
const fallback = await this.requestJson(
|
||||
'POST',
|
||||
url,
|
||||
{ q: query, limit },
|
||||
this.buildHeaders(config),
|
||||
);
|
||||
if (this.isSuccessStatus(fallback.status)) {
|
||||
const hits = Array.isArray(fallback.body?.hits) ? fallback.body.hits : [];
|
||||
const total =
|
||||
fallback.body?.estimatedTotalHits ?? fallback.body?.nbHits ?? hits.length;
|
||||
this.logger.warn(
|
||||
`Meilisearch hybrid failed; fell back to lexical search for index ${indexName}.`,
|
||||
);
|
||||
return { hits, total };
|
||||
}
|
||||
}
|
||||
return { hits: [], total: 0 };
|
||||
}
|
||||
|
||||
@@ -268,7 +305,7 @@ export class MeilisearchService {
|
||||
}
|
||||
|
||||
private requestJson(
|
||||
method: 'POST' | 'DELETE' | 'PATCH',
|
||||
method: 'POST' | 'DELETE' | 'PATCH' | 'GET',
|
||||
url: string,
|
||||
payload: any,
|
||||
headers: Record<string, string>,
|
||||
@@ -305,19 +342,49 @@ export class MeilisearchService {
|
||||
);
|
||||
|
||||
request.on('error', reject);
|
||||
if (payload !== undefined) {
|
||||
if (payload !== undefined && method !== 'GET') {
|
||||
request.write(JSON.stringify(payload));
|
||||
}
|
||||
request.end();
|
||||
});
|
||||
}
|
||||
|
||||
private async enableVectorStore(): Promise<void> {
|
||||
// Temporarily disabled to avoid the overhead of checking on every save.
|
||||
// Re-enable by removing the early return below.
|
||||
return;
|
||||
if (this.vectorStoreEnabled) return; // eslint-disable-line no-unreachable
|
||||
const meiliConfig = this.getConfig();
|
||||
if (!meiliConfig) return;
|
||||
const url = `${meiliConfig.host}/experimental-features`;
|
||||
try {
|
||||
const response = await this.requestJson(
|
||||
'PATCH',
|
||||
url,
|
||||
{ vectorStore: true },
|
||||
this.buildHeaders(meiliConfig),
|
||||
);
|
||||
if (this.isSuccessStatus(response.status)) {
|
||||
this.vectorStoreEnabled = true;
|
||||
this.logger.log('Meilisearch vector store experimental feature enabled');
|
||||
} else {
|
||||
this.logger.warn(
|
||||
`Failed to enable Meilisearch vector store: ${response.status} ${JSON.stringify(response.body)}`,
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.warn(`Failed to enable Meilisearch vector store: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
async ensureOpenAiEmbedder(
|
||||
indexName: string,
|
||||
config: OpenAiEmbedderConfig,
|
||||
): Promise<void> {
|
||||
): Promise<boolean> {
|
||||
const meiliConfig = this.getConfig();
|
||||
if (!meiliConfig || !config?.apiKey) return;
|
||||
if (!meiliConfig || !config?.apiKey) return false;
|
||||
|
||||
await this.enableVectorStore();
|
||||
|
||||
const signature = JSON.stringify({
|
||||
embedderName: config.embedderName,
|
||||
@@ -327,7 +394,7 @@ export class MeilisearchService {
|
||||
});
|
||||
const cacheKey = `${indexName}:${config.embedderName}`;
|
||||
if (this.embedderCache.get(cacheKey) === signature) {
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
const url = `${meiliConfig.host}/indexes/${encodeURIComponent(indexName)}/settings/embedders`;
|
||||
@@ -349,11 +416,67 @@ export class MeilisearchService {
|
||||
this.logger.warn(
|
||||
`Meilisearch embedder update failed for index ${indexName}: ${response.status}`,
|
||||
);
|
||||
return;
|
||||
this.logger.warn(
|
||||
`Meilisearch embedder error body: ${JSON.stringify(response.body)}`,
|
||||
);
|
||||
return false;
|
||||
}
|
||||
const taskUid = response.body?.taskUid ?? response.body?.uid;
|
||||
if (Number.isFinite(Number(taskUid))) {
|
||||
const succeeded = await this.waitForTask(meiliConfig, Number(taskUid), 8000);
|
||||
if (!succeeded) {
|
||||
this.logger.warn(`Meilisearch embedder task did not succeed: ${taskUid}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const hasEmbedder = await this.hasEmbedder(meiliConfig, indexName, config.embedderName);
|
||||
if (!hasEmbedder) {
|
||||
this.logger.warn(`Meilisearch embedder missing after update: ${config.embedderName}`);
|
||||
return false;
|
||||
}
|
||||
this.embedderCache.set(cacheKey, signature);
|
||||
return true;
|
||||
} catch (error) {
|
||||
this.logger.warn(`Meilisearch embedder update failed: ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private async waitForTask(
|
||||
config: MeiliConfig,
|
||||
taskUid: number,
|
||||
timeoutMs = 8000,
|
||||
): Promise<boolean> {
|
||||
const url = `${config.host}/tasks/${taskUid}`;
|
||||
const start = Date.now();
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
const response = await this.requestJson('GET', url, undefined, this.buildHeaders(config));
|
||||
if (!this.isSuccessStatus(response.status)) {
|
||||
return false;
|
||||
}
|
||||
const status = response.body?.status;
|
||||
if (status === 'succeeded') return true;
|
||||
if (status === 'failed' || status === 'canceled') {
|
||||
this.logger.warn(`Meilisearch task ${taskUid} failed: ${JSON.stringify(response.body?.error)}`);
|
||||
return false;
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, 300));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private async hasEmbedder(
|
||||
config: MeiliConfig,
|
||||
indexName: string,
|
||||
embedderName: string,
|
||||
): Promise<boolean> {
|
||||
const url = `${config.host}/indexes/${encodeURIComponent(indexName)}/settings/embedders`;
|
||||
const response = await this.requestJson('GET', url, undefined, this.buildHeaders(config));
|
||||
if (!this.isSuccessStatus(response.status)) {
|
||||
return false;
|
||||
}
|
||||
const embedders = response.body || {};
|
||||
return Boolean(embedders && embedders[embedderName]);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user