From efa57c4ba82b2ebcd337e9bbd2747df67b29bb8d Mon Sep 17 00:00:00 2001 From: Francisco Gaona Date: Sun, 12 Apr 2026 11:09:43 +0200 Subject: [PATCH] WIP - semantic linking seems to be working fine --- .../services/semantic-orchestrator.service.ts | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/backend/src/knowledge/services/semantic-orchestrator.service.ts b/backend/src/knowledge/services/semantic-orchestrator.service.ts index c0d5d3e..23a9491 100644 --- a/backend/src/knowledge/services/semantic-orchestrator.service.ts +++ b/backend/src/knowledge/services/semantic-orchestrator.service.ts @@ -18,7 +18,8 @@ export class SemanticOrchestratorService { private readonly defaultEmbeddingModel = process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small'; private readonly semanticEmbedderName = 'default'; - private readonly MIN_SEMANTIC_CONFIDENCE = 0.7; + private readonly MIN_CONFIDENCE_BASE = 0.7; + private readonly MIN_CONFIDENCE_COMMENT = 0.52; constructor( private readonly tenantDbService: TenantDatabaseService, @@ -234,7 +235,12 @@ export class SemanticOrchestratorService { } const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId); - const queryText = chunks.slice(0, 3).map((chunk) => chunk.text).join(' ').slice(0, 1200); + // Build query from all chunks (base record + comments), prioritising comments + // since they carry the most distinctive semantic signal. + const commentChunks = chunks.filter((c) => c.sourceKind === 'comment'); + const baseChunks = chunks.filter((c) => c.sourceKind !== 'comment'); + const orderedChunks = [...commentChunks, ...baseChunks]; + const queryText = orderedChunks.map((chunk) => chunk.text).join(' ').slice(0, 1200); this.logger.log( `Generating suggestions: index=${indexName} queryLen=${queryText.length} hybrid=${embedderReady}`, ); @@ -256,13 +262,17 @@ export class SemanticOrchestratorService { if (hit.entityId === projection.entityId) continue; const confidence = hit._semanticScore ?? hit._rankingScore ?? 0; + // Use a lower threshold for comment chunks (short, conversational text + // naturally produces lower cosine similarity than structured field values). + const isComment = hit.sourceKind === 'comment'; + const threshold = isComment ? this.MIN_CONFIDENCE_COMMENT : this.MIN_CONFIDENCE_BASE; this.logger.log( - `Suggestion candidate: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} text="${String(hit.text || '').substring(0, 60)}"`, + `Suggestion candidate: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} kind=${hit.sourceKind || 'base'} threshold=${threshold} text="${String(hit.text || '').substring(0, 60)}"`, ); - if (confidence < this.MIN_SEMANTIC_CONFIDENCE) { + if (confidence < threshold) { this.logger.log( - `Skipping low-confidence match: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} < ${this.MIN_SEMANTIC_CONFIDENCE}`, + `Skipping low-confidence match: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} < ${threshold} (${isComment ? 'comment' : 'base'})`, ); continue; } @@ -274,7 +284,7 @@ export class SemanticOrchestratorService { } } - this.logger.log(`Filtered suggestions: ${candidates.size} passed threshold ${this.MIN_SEMANTIC_CONFIDENCE}`); + this.logger.log(`Filtered suggestions: ${candidates.size} passed thresholds (base=${this.MIN_CONFIDENCE_BASE}, comment=${this.MIN_CONFIDENCE_COMMENT})`); const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId); const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);