From 385a842ab8cc0f8883d9dcd1511a6aa4424b4889 Mon Sep 17 00:00:00 2001 From: Francisco Gaona Date: Sun, 12 Apr 2026 09:26:23 +0200 Subject: [PATCH] WIP - semantic linking working with just name --- .../adapters/semantic-projection.adapter.ts | 12 +++-- .../services/semantic-orchestrator.service.ts | 44 ++++++++++++------- backend/src/search/meilisearch.service.ts | 3 +- 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/backend/src/knowledge/adapters/semantic-projection.adapter.ts b/backend/src/knowledge/adapters/semantic-projection.adapter.ts index 9e21912..e74ae52 100644 --- a/backend/src/knowledge/adapters/semantic-projection.adapter.ts +++ b/backend/src/knowledge/adapters/semantic-projection.adapter.ts @@ -53,9 +53,14 @@ export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapt input.record?.subject || `${input.objectApiName} ${input.record?.id || ''}`.trim(); + /* const fieldNarrative = fieldEntries .map(([key, value]) => `${key}: ${String(value)}`) .join('\n'); + */ + const fieldNarrative = fieldEntries + .map(([key, value]) => `${String(value)}`) + .join('\n'); const commentNarrative = (input.comments || []) .map((comment, index) => `Comment ${index + 1}: ${comment.content}`) @@ -63,10 +68,9 @@ export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapt const narrative = [fieldNarrative, commentNarrative].filter(Boolean).join('\n\n'); - // Plain values only — no 'key:' prefixes, no comments (chunker adds those separately). - const embeddingNarrative = fieldEntries - .map(([, value]) => String(value)) - .join('\n'); + // Temporary: use only the name field for embedding to test pure semantic matching. + // Widen this back to all field values once semantic results are validated. + const embeddingNarrative = String(input.record?.name || title); return { entityType: input.objectApiName, diff --git a/backend/src/knowledge/services/semantic-orchestrator.service.ts b/backend/src/knowledge/services/semantic-orchestrator.service.ts index d5911ed..7f93d23 100644 --- a/backend/src/knowledge/services/semantic-orchestrator.service.ts +++ b/backend/src/knowledge/services/semantic-orchestrator.service.ts @@ -18,6 +18,7 @@ export class SemanticOrchestratorService { private readonly defaultEmbeddingModel = process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small'; private readonly semanticEmbedderName = 'default'; + private readonly MIN_SEMANTIC_CONFIDENCE = 0.7; constructor( private readonly tenantDbService: TenantDatabaseService, @@ -71,10 +72,9 @@ export class SemanticOrchestratorService { }); const documentId = await this.upsertSemanticDocument(knex, projection); - // Use embeddingNarrative (plain values, no labels) so lexical noise from 'key:' - // prefixes doesn't inflate match scores. Comments are passed separately so they - // are not double-counted (narrative already embeds them with 'Comment N:' prefix). - const chunks = this.chunkerService.chunkText(projection.embeddingNarrative, comments); + // Temporary: pass empty comments so only embeddingNarrative (name field) is indexed. + // Re-enable by replacing [] with `comments` once semantic matching is validated. + const chunks = this.chunkerService.chunkText(projection.embeddingNarrative, []); this.logger.log( `Semantic refresh chunking: ${objectApiName}:${recordId} chunks=${chunks.length}`, ); @@ -252,26 +252,37 @@ export class SemanticOrchestratorService { `Meilisearch results: index=${indexName} hits=${search.hits?.length || 0} total=${search.total}`, ); - const grouped = new Map(); + const candidates = new Map(); for (const hit of search.hits || []) { - if (hit.entityType === projection.entityType && hit.entityId === projection.entityId) { - continue; - } - // Skip self-links where source and target resolve to the same entity - if (hit.entityId === projection.entityId) { + // Skip self + if (hit.entityId === projection.entityId) continue; + + const confidence = hit._semanticScore ?? hit._rankingScore ?? 0; + this.logger.log( + `Suggestion candidate: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} text="${String(hit.text || '').substring(0, 60)}"`, + ); + + if (confidence < this.MIN_SEMANTIC_CONFIDENCE) { + this.logger.log( + `Skipping low-confidence match: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} < ${this.MIN_SEMANTIC_CONFIDENCE}`, + ); continue; } + const key = `${hit.entityType}:${hit.entityId}`; - if (!grouped.has(key)) grouped.set(key, []); - grouped.get(key).push(hit); + const existing = candidates.get(key); + if (!existing || confidence > existing.confidence) { + candidates.set(key, { hit, confidence }); + } } + this.logger.log(`Filtered suggestions: ${candidates.size} passed threshold ${this.MIN_SEMANTIC_CONFIDENCE}`); + const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId); const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId); - for (const [key, hits] of grouped.entries()) { + for (const [key, { hit, confidence }] of candidates.entries()) { const [targetType, targetId] = key.split(':'); - const confidence = Math.min(0.99, 0.3 + hits.length * 0.1); await this.semanticLinkService.upsertSuggestedLink(knex, { sourceEntityType: projection.entityType, sourceEntityId: projection.entityId, @@ -288,10 +299,11 @@ export class SemanticOrchestratorService { sourceKind: chunk.sourceKind, text: chunk.text.slice(0, 180), })), - matchedChunks: hits.slice(0, 3).map((hit) => ({ + matchedChunks: [{ sourceKind: hit.sourceKind, text: String(hit.text || '').slice(0, 180), - })), + score: confidence, + }], }, suggestedByUserId: userId || null, }); diff --git a/backend/src/search/meilisearch.service.ts b/backend/src/search/meilisearch.service.ts index 2381f82..6377edd 100644 --- a/backend/src/search/meilisearch.service.ts +++ b/backend/src/search/meilisearch.service.ts @@ -220,7 +220,8 @@ export class MeilisearchService { { q: query, limit, - ...(hybrid ? { hybrid } : {}), + showRankingScore: true, + ...(hybrid ? { hybrid, showRankingScoreDetails: true } : {}), }, this.buildHeaders(config), );