diff --git a/backend/src/knowledge/adapters/semantic-projection.adapter.ts b/backend/src/knowledge/adapters/semantic-projection.adapter.ts index f49edff..9e21912 100644 --- a/backend/src/knowledge/adapters/semantic-projection.adapter.ts +++ b/backend/src/knowledge/adapters/semantic-projection.adapter.ts @@ -10,6 +10,8 @@ export type SemanticProjection = { entityId: string; title: string; narrative: string; + /** Plain text used for embedding — no 'key: value' labels, no comments (chunker handles those separately). */ + embeddingNarrative: string; metadata: Record; sourceSummary: { includedFieldCount: number; @@ -61,11 +63,17 @@ export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapt const narrative = [fieldNarrative, commentNarrative].filter(Boolean).join('\n\n'); + // Plain values only — no 'key:' prefixes, no comments (chunker adds those separately). + const embeddingNarrative = fieldEntries + .map(([, value]) => String(value)) + .join('\n'); + return { entityType: input.objectApiName, entityId: input.record.id, title, narrative, + embeddingNarrative, metadata: { objectApiName: input.objectApiName, hasComments: (input.comments || []).length > 0, diff --git a/backend/src/knowledge/services/comment.service.ts b/backend/src/knowledge/services/comment.service.ts index 8c35ad6..ad1e2ac 100644 --- a/backend/src/knowledge/services/comment.service.ts +++ b/backend/src/knowledge/services/comment.service.ts @@ -33,6 +33,9 @@ export class CommentService { }) .returning('*'); + console.log( + `[Knowledge] Comment created: ${dto.parentObjectApiName}:${dto.parentRecordId} by ${userId}`, + ); await this.semanticOrchestratorService.refreshRecord( tenantId, dto.parentObjectApiName, @@ -63,6 +66,9 @@ export class CommentService { updated_at: knex.fn.now(), }); + console.log( + `[Knowledge] Comment updated: ${existing.parent_object_api_name}:${existing.parent_record_id} by ${userId}`, + ); await this.semanticOrchestratorService.refreshRecord( tenantId, existing.parent_object_api_name, @@ -88,6 +94,9 @@ export class CommentService { await knex('comments').where({ id: commentId }).delete(); + console.log( + `[Knowledge] Comment deleted: ${existing.parent_object_api_name}:${existing.parent_record_id} by ${userId}`, + ); await this.semanticOrchestratorService.refreshRecord( tenantId, existing.parent_object_api_name, diff --git a/backend/src/knowledge/services/semantic-link.service.ts b/backend/src/knowledge/services/semantic-link.service.ts index 5ee9cc1..589611c 100644 --- a/backend/src/knowledge/services/semantic-link.service.ts +++ b/backend/src/knowledge/services/semantic-link.service.ts @@ -51,7 +51,10 @@ export class SemanticLinkService { ); const payload = { - ...normalized, + source_entity_type: normalized.sourceEntityType, + source_entity_id: normalized.sourceEntityId, + target_entity_type: normalized.targetEntityType, + target_entity_id: normalized.targetEntityId, link_type: input.linkType || 'related_to', status: input.status || 'suggested', origin: input.origin || 'semantic', diff --git a/backend/src/knowledge/services/semantic-orchestrator.service.ts b/backend/src/knowledge/services/semantic-orchestrator.service.ts index af5894c..d5911ed 100644 --- a/backend/src/knowledge/services/semantic-orchestrator.service.ts +++ b/backend/src/knowledge/services/semantic-orchestrator.service.ts @@ -3,6 +3,7 @@ import { TenantDatabaseService } from '../../tenant/tenant-database.service'; import { MeilisearchService } from '../../search/meilisearch.service'; import { getCentralPrisma } from '../../prisma/central-prisma.service'; import { OpenAIConfig } from '../../voice/interfaces/integration-config.interface'; +import { randomUUID } from 'crypto'; import { DefaultSemanticProjectionAdapter, SemanticProjectionAdapter, @@ -16,7 +17,7 @@ export class SemanticOrchestratorService { private readonly adapters: SemanticProjectionAdapter[] = [new DefaultSemanticProjectionAdapter()]; private readonly defaultEmbeddingModel = process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small'; - private readonly semanticEmbedderName = 'semantic-openai'; + private readonly semanticEmbedderName = 'default'; constructor( private readonly tenantDbService: TenantDatabaseService, @@ -32,6 +33,9 @@ export class SemanticOrchestratorService { userId?: string, trigger: string = 'manual', ) { + this.logger.log( + `Semantic refresh start: ${objectApiName}:${recordId} (trigger=${trigger})`, + ); const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId); const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId); @@ -44,6 +48,7 @@ export class SemanticOrchestratorService { const tableName = this.getTableName(objectDefinition); const record = await knex(tableName).where({ id: recordId }).first(); if (!record) { + this.logger.warn(`Record not found for semantic refresh: ${objectApiName}:${recordId}`); return { skipped: true }; } @@ -53,6 +58,9 @@ export class SemanticOrchestratorService { parent_record_id: recordId, }) .orderBy('created_at', 'asc'); + this.logger.log( + `Semantic refresh source: ${objectApiName}:${recordId} comments=${comments.length}`, + ); const adapter = this.adapters.find((candidate) => candidate.supports(objectApiName))!; const projection = adapter.buildProjection({ @@ -63,13 +71,30 @@ export class SemanticOrchestratorService { }); const documentId = await this.upsertSemanticDocument(knex, projection); - const chunks = this.chunkerService.chunkText(projection.narrative, comments); + // Use embeddingNarrative (plain values, no labels) so lexical noise from 'key:' + // prefixes doesn't inflate match scores. Comments are passed separately so they + // are not double-counted (narrative already embeds them with 'Comment N:' prefix). + const chunks = this.chunkerService.chunkText(projection.embeddingNarrative, comments); + this.logger.log( + `Semantic refresh chunking: ${objectApiName}:${recordId} chunks=${chunks.length}`, + ); await this.replaceChunks(knex, documentId, chunks); const openAiConfig = await this.getOpenAiConfig(resolvedTenantId); - await this.indexChunks(resolvedTenantId, projection, chunks, openAiConfig); - await this.generateSuggestions(resolvedTenantId, projection, chunks, openAiConfig, userId, trigger); + const embedderReady = await this.indexChunks(resolvedTenantId, projection, chunks, openAiConfig); + await this.generateSuggestions( + resolvedTenantId, + projection, + chunks, + openAiConfig, + embedderReady, + userId, + trigger, + ); + this.logger.log( + `Semantic refresh complete: ${objectApiName}:${recordId} document=${documentId}`, + ); return { documentId, chunkCount: chunks.length }; } @@ -111,8 +136,10 @@ export class SemanticOrchestratorService { return existing.id; } + const newId = randomUUID(); const [created] = await knex('semantic_documents') .insert({ + id: newId, entity_type: projection.entityType, entity_id: projection.entityId, title: projection.title, @@ -124,10 +151,18 @@ export class SemanticOrchestratorService { }) .returning('id'); - return typeof created === 'string' ? created : created.id; + if (created && typeof created === 'object' && created.id) { + return created.id; + } + // MySQL may return a numeric insert id (often 0 for UUID PKs). Always trust the generated UUID. + return newId; } private async replaceChunks(knex: any, documentId: string, chunks: any[]) { + if (!documentId) { + this.logger.warn('Skipping chunk replace: missing semantic document id.'); + return; + } await knex('semantic_chunks').where({ semantic_document_id: documentId }).delete(); if (!chunks.length) return; @@ -152,20 +187,28 @@ export class SemanticOrchestratorService { openAiConfig: OpenAIConfig | null, ) { if (!this.meilisearchService.isEnabled()) { - return; + this.logger.warn('Meilisearch disabled; skipping semantic chunk indexing.'); + return false; } const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId); + let embedderReady = false; if (openAiConfig?.apiKey) { - await this.meilisearchService.ensureOpenAiEmbedder(indexName, { + embedderReady = await this.meilisearchService.ensureOpenAiEmbedder(indexName, { embedderName: this.semanticEmbedderName, apiKey: openAiConfig.apiKey, model: openAiConfig.embeddingModel || this.defaultEmbeddingModel, documentTemplate: '{{doc.title}}\n{{doc.text}}', }); + this.logger.log( + `Meilisearch embedder ensured: index=${indexName} model=${openAiConfig.embeddingModel || this.defaultEmbeddingModel}`, + ); + } else { + this.logger.warn('OpenAI embedder not configured; semantic search will be lexical only.'); } + this.logger.log(`Indexing semantic chunks: index=${indexName} count=${chunks.length}`); await this.meilisearchService.upsertDocuments(indexName, chunks.map((chunk) => ({ - id: `${projection.entityType}:${projection.entityId}:${chunk.chunkIndex}`, + id: `${projection.entityType}_${projection.entityId}_${chunk.chunkIndex}`, entityType: projection.entityType, entityId: projection.entityId, title: projection.title, @@ -173,6 +216,7 @@ export class SemanticOrchestratorService { sourceRefId: chunk.sourceRefId, text: chunk.text, }))); + return embedderReady; } private async generateSuggestions( @@ -180,20 +224,32 @@ export class SemanticOrchestratorService { projection: any, chunks: any[], openAiConfig: OpenAIConfig | null, + embedderReady: boolean, userId?: string, trigger: string = 'semantic_refresh', ) { if (!this.meilisearchService.isEnabled() || !chunks.length) { + this.logger.warn( + `Skipping suggestion generation: meili=${this.meilisearchService.isEnabled()} chunks=${chunks.length}`, + ); return; } const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId); const queryText = chunks.slice(0, 3).map((chunk) => chunk.text).join(' ').slice(0, 1200); + this.logger.log( + `Generating suggestions: index=${indexName} queryLen=${queryText.length} hybrid=${embedderReady}`, + ); const search = await this.meilisearchService.searchIndex( indexName, queryText, 20, - openAiConfig?.apiKey ? { embedder: this.semanticEmbedderName } : undefined, + // semanticRatio:1.0 = pure vector search, no lexical component that would + // match on shared tokens like 'name:' or 'Comment 1:' across all records. + embedderReady ? { embedder: this.semanticEmbedderName, semanticRatio: 1.0 } : undefined, + ); + this.logger.log( + `Meilisearch results: index=${indexName} hits=${search.hits?.length || 0} total=${search.total}`, ); const grouped = new Map(); @@ -201,6 +257,10 @@ export class SemanticOrchestratorService { if (hit.entityType === projection.entityType && hit.entityId === projection.entityId) { continue; } + // Skip self-links where source and target resolve to the same entity + if (hit.entityId === projection.entityId) { + continue; + } const key = `${hit.entityType}:${hit.entityId}`; if (!grouped.has(key)) grouped.set(key, []); grouped.get(key).push(hit); diff --git a/backend/src/search/meilisearch.service.ts b/backend/src/search/meilisearch.service.ts index 5549366..2381f82 100644 --- a/backend/src/search/meilisearch.service.ts +++ b/backend/src/search/meilisearch.service.ts @@ -24,6 +24,7 @@ type OpenAiEmbedderConfig = { export class MeilisearchService { private readonly logger = new Logger(MeilisearchService.name); private readonly embedderCache = new Map(); + private vectorStoreEnabled = false; isEnabled(): boolean { return Boolean(this.getConfig()); @@ -186,6 +187,16 @@ export class MeilisearchService { const response = await this.requestJson('POST', url, documents, this.buildHeaders(config)); if (!this.isSuccessStatus(response.status)) { this.logger.warn(`Meilisearch document upsert failed for index ${indexName}: ${response.status}`); + return; + } + // Meilisearch indexes (and embeds) documents asynchronously. Wait for the task + // to complete so callers can immediately search and see the new documents. + const taskUid = response.body?.taskUid ?? response.body?.uid; + if (Number.isFinite(Number(taskUid))) { + const succeeded = await this.waitForTask(config, Number(taskUid), 30000); + if (!succeeded) { + this.logger.warn(`Meilisearch indexing task did not succeed within timeout: taskUid=${taskUid} index=${indexName}`); + } } } catch (error) { this.logger.warn(`Meilisearch document upsert failed: ${error.message}`); @@ -215,7 +226,33 @@ export class MeilisearchService { ); if (!this.isSuccessStatus(response.status)) { - this.logger.warn(`Meilisearch search failed for index ${indexName}: ${response.status}`); + this.logger.warn( + `Meilisearch search failed for index ${indexName}: ${response.status}`, + ); + this.logger.warn( + `Meilisearch search payload: ${JSON.stringify({ q: query, limit, hybrid })}`, + ); + this.logger.warn( + `Meilisearch search error body: ${JSON.stringify(response.body)}`, + ); + // If hybrid is invalid (embedder missing), retry once without hybrid + if (hybrid && response.body?.code === 'invalid_embedder') { + const fallback = await this.requestJson( + 'POST', + url, + { q: query, limit }, + this.buildHeaders(config), + ); + if (this.isSuccessStatus(fallback.status)) { + const hits = Array.isArray(fallback.body?.hits) ? fallback.body.hits : []; + const total = + fallback.body?.estimatedTotalHits ?? fallback.body?.nbHits ?? hits.length; + this.logger.warn( + `Meilisearch hybrid failed; fell back to lexical search for index ${indexName}.`, + ); + return { hits, total }; + } + } return { hits: [], total: 0 }; } @@ -268,7 +305,7 @@ export class MeilisearchService { } private requestJson( - method: 'POST' | 'DELETE' | 'PATCH', + method: 'POST' | 'DELETE' | 'PATCH' | 'GET', url: string, payload: any, headers: Record, @@ -305,19 +342,49 @@ export class MeilisearchService { ); request.on('error', reject); - if (payload !== undefined) { + if (payload !== undefined && method !== 'GET') { request.write(JSON.stringify(payload)); } request.end(); }); } + private async enableVectorStore(): Promise { + // Temporarily disabled to avoid the overhead of checking on every save. + // Re-enable by removing the early return below. + return; + if (this.vectorStoreEnabled) return; // eslint-disable-line no-unreachable + const meiliConfig = this.getConfig(); + if (!meiliConfig) return; + const url = `${meiliConfig.host}/experimental-features`; + try { + const response = await this.requestJson( + 'PATCH', + url, + { vectorStore: true }, + this.buildHeaders(meiliConfig), + ); + if (this.isSuccessStatus(response.status)) { + this.vectorStoreEnabled = true; + this.logger.log('Meilisearch vector store experimental feature enabled'); + } else { + this.logger.warn( + `Failed to enable Meilisearch vector store: ${response.status} ${JSON.stringify(response.body)}`, + ); + } + } catch (error) { + this.logger.warn(`Failed to enable Meilisearch vector store: ${error.message}`); + } + } + async ensureOpenAiEmbedder( indexName: string, config: OpenAiEmbedderConfig, - ): Promise { + ): Promise { const meiliConfig = this.getConfig(); - if (!meiliConfig || !config?.apiKey) return; + if (!meiliConfig || !config?.apiKey) return false; + + await this.enableVectorStore(); const signature = JSON.stringify({ embedderName: config.embedderName, @@ -327,7 +394,7 @@ export class MeilisearchService { }); const cacheKey = `${indexName}:${config.embedderName}`; if (this.embedderCache.get(cacheKey) === signature) { - return; + return true; } const url = `${meiliConfig.host}/indexes/${encodeURIComponent(indexName)}/settings/embedders`; @@ -349,11 +416,67 @@ export class MeilisearchService { this.logger.warn( `Meilisearch embedder update failed for index ${indexName}: ${response.status}`, ); - return; + this.logger.warn( + `Meilisearch embedder error body: ${JSON.stringify(response.body)}`, + ); + return false; + } + const taskUid = response.body?.taskUid ?? response.body?.uid; + if (Number.isFinite(Number(taskUid))) { + const succeeded = await this.waitForTask(meiliConfig, Number(taskUid), 8000); + if (!succeeded) { + this.logger.warn(`Meilisearch embedder task did not succeed: ${taskUid}`); + return false; + } + } + + const hasEmbedder = await this.hasEmbedder(meiliConfig, indexName, config.embedderName); + if (!hasEmbedder) { + this.logger.warn(`Meilisearch embedder missing after update: ${config.embedderName}`); + return false; } this.embedderCache.set(cacheKey, signature); + return true; } catch (error) { this.logger.warn(`Meilisearch embedder update failed: ${error.message}`); + return false; } } + + private async waitForTask( + config: MeiliConfig, + taskUid: number, + timeoutMs = 8000, + ): Promise { + const url = `${config.host}/tasks/${taskUid}`; + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + const response = await this.requestJson('GET', url, undefined, this.buildHeaders(config)); + if (!this.isSuccessStatus(response.status)) { + return false; + } + const status = response.body?.status; + if (status === 'succeeded') return true; + if (status === 'failed' || status === 'canceled') { + this.logger.warn(`Meilisearch task ${taskUid} failed: ${JSON.stringify(response.body?.error)}`); + return false; + } + await new Promise((resolve) => setTimeout(resolve, 300)); + } + return false; + } + + private async hasEmbedder( + config: MeiliConfig, + indexName: string, + embedderName: string, + ): Promise { + const url = `${config.host}/indexes/${encodeURIComponent(indexName)}/settings/embedders`; + const response = await this.requestJson('GET', url, undefined, this.buildHeaders(config)); + if (!this.isSuccessStatus(response.status)) { + return false; + } + const embedders = response.body || {}; + return Boolean(embedders && embedders[embedderName]); + } }