WIP - semantic linking seems to be working fine

This commit is contained in:
Francisco Gaona
2026-04-12 11:09:43 +02:00
parent 3f9be316ce
commit efa57c4ba8

View File

@@ -18,7 +18,8 @@ export class SemanticOrchestratorService {
private readonly defaultEmbeddingModel =
process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small';
private readonly semanticEmbedderName = 'default';
private readonly MIN_SEMANTIC_CONFIDENCE = 0.7;
private readonly MIN_CONFIDENCE_BASE = 0.7;
private readonly MIN_CONFIDENCE_COMMENT = 0.52;
constructor(
private readonly tenantDbService: TenantDatabaseService,
@@ -234,7 +235,12 @@ export class SemanticOrchestratorService {
}
const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId);
const queryText = chunks.slice(0, 3).map((chunk) => chunk.text).join(' ').slice(0, 1200);
// Build query from all chunks (base record + comments), prioritising comments
// since they carry the most distinctive semantic signal.
const commentChunks = chunks.filter((c) => c.sourceKind === 'comment');
const baseChunks = chunks.filter((c) => c.sourceKind !== 'comment');
const orderedChunks = [...commentChunks, ...baseChunks];
const queryText = orderedChunks.map((chunk) => chunk.text).join(' ').slice(0, 1200);
this.logger.log(
`Generating suggestions: index=${indexName} queryLen=${queryText.length} hybrid=${embedderReady}`,
);
@@ -256,13 +262,17 @@ export class SemanticOrchestratorService {
if (hit.entityId === projection.entityId) continue;
const confidence = hit._semanticScore ?? hit._rankingScore ?? 0;
// Use a lower threshold for comment chunks (short, conversational text
// naturally produces lower cosine similarity than structured field values).
const isComment = hit.sourceKind === 'comment';
const threshold = isComment ? this.MIN_CONFIDENCE_COMMENT : this.MIN_CONFIDENCE_BASE;
this.logger.log(
`Suggestion candidate: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} text="${String(hit.text || '').substring(0, 60)}"`,
`Suggestion candidate: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} kind=${hit.sourceKind || 'base'} threshold=${threshold} text="${String(hit.text || '').substring(0, 60)}"`,
);
if (confidence < this.MIN_SEMANTIC_CONFIDENCE) {
if (confidence < threshold) {
this.logger.log(
`Skipping low-confidence match: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} < ${this.MIN_SEMANTIC_CONFIDENCE}`,
`Skipping low-confidence match: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} < ${threshold} (${isComment ? 'comment' : 'base'})`,
);
continue;
}
@@ -274,7 +284,7 @@ export class SemanticOrchestratorService {
}
}
this.logger.log(`Filtered suggestions: ${candidates.size} passed threshold ${this.MIN_SEMANTIC_CONFIDENCE}`);
this.logger.log(`Filtered suggestions: ${candidates.size} passed thresholds (base=${this.MIN_CONFIDENCE_BASE}, comment=${this.MIN_CONFIDENCE_COMMENT})`);
const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);