WIP - semantic linking seems to be working fine
This commit is contained in:
@@ -18,7 +18,8 @@ export class SemanticOrchestratorService {
|
||||
private readonly defaultEmbeddingModel =
|
||||
process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small';
|
||||
private readonly semanticEmbedderName = 'default';
|
||||
private readonly MIN_SEMANTIC_CONFIDENCE = 0.7;
|
||||
private readonly MIN_CONFIDENCE_BASE = 0.7;
|
||||
private readonly MIN_CONFIDENCE_COMMENT = 0.52;
|
||||
|
||||
constructor(
|
||||
private readonly tenantDbService: TenantDatabaseService,
|
||||
@@ -234,7 +235,12 @@ export class SemanticOrchestratorService {
|
||||
}
|
||||
|
||||
const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId);
|
||||
const queryText = chunks.slice(0, 3).map((chunk) => chunk.text).join(' ').slice(0, 1200);
|
||||
// Build query from all chunks (base record + comments), prioritising comments
|
||||
// since they carry the most distinctive semantic signal.
|
||||
const commentChunks = chunks.filter((c) => c.sourceKind === 'comment');
|
||||
const baseChunks = chunks.filter((c) => c.sourceKind !== 'comment');
|
||||
const orderedChunks = [...commentChunks, ...baseChunks];
|
||||
const queryText = orderedChunks.map((chunk) => chunk.text).join(' ').slice(0, 1200);
|
||||
this.logger.log(
|
||||
`Generating suggestions: index=${indexName} queryLen=${queryText.length} hybrid=${embedderReady}`,
|
||||
);
|
||||
@@ -256,13 +262,17 @@ export class SemanticOrchestratorService {
|
||||
if (hit.entityId === projection.entityId) continue;
|
||||
|
||||
const confidence = hit._semanticScore ?? hit._rankingScore ?? 0;
|
||||
// Use a lower threshold for comment chunks (short, conversational text
|
||||
// naturally produces lower cosine similarity than structured field values).
|
||||
const isComment = hit.sourceKind === 'comment';
|
||||
const threshold = isComment ? this.MIN_CONFIDENCE_COMMENT : this.MIN_CONFIDENCE_BASE;
|
||||
this.logger.log(
|
||||
`Suggestion candidate: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} text="${String(hit.text || '').substring(0, 60)}"`,
|
||||
`Suggestion candidate: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} kind=${hit.sourceKind || 'base'} threshold=${threshold} text="${String(hit.text || '').substring(0, 60)}"`,
|
||||
);
|
||||
|
||||
if (confidence < this.MIN_SEMANTIC_CONFIDENCE) {
|
||||
if (confidence < threshold) {
|
||||
this.logger.log(
|
||||
`Skipping low-confidence match: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} < ${this.MIN_SEMANTIC_CONFIDENCE}`,
|
||||
`Skipping low-confidence match: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} < ${threshold} (${isComment ? 'comment' : 'base'})`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
@@ -274,7 +284,7 @@ export class SemanticOrchestratorService {
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.log(`Filtered suggestions: ${candidates.size} passed threshold ${this.MIN_SEMANTIC_CONFIDENCE}`);
|
||||
this.logger.log(`Filtered suggestions: ${candidates.size} passed thresholds (base=${this.MIN_CONFIDENCE_BASE}, comment=${this.MIN_CONFIDENCE_COMMENT})`);
|
||||
|
||||
const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
|
||||
const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);
|
||||
|
||||
Reference in New Issue
Block a user