WIP - semantic linking working with just name

This commit is contained in:
Francisco Gaona
2026-04-12 09:26:23 +02:00
parent 320f8c4266
commit 385a842ab8
3 changed files with 38 additions and 21 deletions

View File

@@ -53,9 +53,14 @@ export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapt
input.record?.subject ||
`${input.objectApiName} ${input.record?.id || ''}`.trim();
/*
const fieldNarrative = fieldEntries
.map(([key, value]) => `${key}: ${String(value)}`)
.join('\n');
*/
const fieldNarrative = fieldEntries
.map(([key, value]) => `${String(value)}`)
.join('\n');
const commentNarrative = (input.comments || [])
.map((comment, index) => `Comment ${index + 1}: ${comment.content}`)
@@ -63,10 +68,9 @@ export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapt
const narrative = [fieldNarrative, commentNarrative].filter(Boolean).join('\n\n');
// Plain values only — no 'key:' prefixes, no comments (chunker adds those separately).
const embeddingNarrative = fieldEntries
.map(([, value]) => String(value))
.join('\n');
// Temporary: use only the name field for embedding to test pure semantic matching.
// Widen this back to all field values once semantic results are validated.
const embeddingNarrative = String(input.record?.name || title);
return {
entityType: input.objectApiName,

View File

@@ -18,6 +18,7 @@ export class SemanticOrchestratorService {
private readonly defaultEmbeddingModel =
process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small';
private readonly semanticEmbedderName = 'default';
private readonly MIN_SEMANTIC_CONFIDENCE = 0.7;
constructor(
private readonly tenantDbService: TenantDatabaseService,
@@ -71,10 +72,9 @@ export class SemanticOrchestratorService {
});
const documentId = await this.upsertSemanticDocument(knex, projection);
// Use embeddingNarrative (plain values, no labels) so lexical noise from 'key:'
// prefixes doesn't inflate match scores. Comments are passed separately so they
// are not double-counted (narrative already embeds them with 'Comment N:' prefix).
const chunks = this.chunkerService.chunkText(projection.embeddingNarrative, comments);
// Temporary: pass empty comments so only embeddingNarrative (name field) is indexed.
// Re-enable by replacing [] with `comments` once semantic matching is validated.
const chunks = this.chunkerService.chunkText(projection.embeddingNarrative, []);
this.logger.log(
`Semantic refresh chunking: ${objectApiName}:${recordId} chunks=${chunks.length}`,
);
@@ -252,26 +252,37 @@ export class SemanticOrchestratorService {
`Meilisearch results: index=${indexName} hits=${search.hits?.length || 0} total=${search.total}`,
);
const grouped = new Map<string, any[]>();
const candidates = new Map<string, { hit: any; confidence: number }>();
for (const hit of search.hits || []) {
if (hit.entityType === projection.entityType && hit.entityId === projection.entityId) {
continue;
}
// Skip self-links where source and target resolve to the same entity
if (hit.entityId === projection.entityId) {
// Skip self
if (hit.entityId === projection.entityId) continue;
const confidence = hit._semanticScore ?? hit._rankingScore ?? 0;
this.logger.log(
`Suggestion candidate: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} text="${String(hit.text || '').substring(0, 60)}"`,
);
if (confidence < this.MIN_SEMANTIC_CONFIDENCE) {
this.logger.log(
`Skipping low-confidence match: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} < ${this.MIN_SEMANTIC_CONFIDENCE}`,
);
continue;
}
const key = `${hit.entityType}:${hit.entityId}`;
if (!grouped.has(key)) grouped.set(key, []);
grouped.get(key).push(hit);
const existing = candidates.get(key);
if (!existing || confidence > existing.confidence) {
candidates.set(key, { hit, confidence });
}
}
this.logger.log(`Filtered suggestions: ${candidates.size} passed threshold ${this.MIN_SEMANTIC_CONFIDENCE}`);
const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);
for (const [key, hits] of grouped.entries()) {
for (const [key, { hit, confidence }] of candidates.entries()) {
const [targetType, targetId] = key.split(':');
const confidence = Math.min(0.99, 0.3 + hits.length * 0.1);
await this.semanticLinkService.upsertSuggestedLink(knex, {
sourceEntityType: projection.entityType,
sourceEntityId: projection.entityId,
@@ -288,10 +299,11 @@ export class SemanticOrchestratorService {
sourceKind: chunk.sourceKind,
text: chunk.text.slice(0, 180),
})),
matchedChunks: hits.slice(0, 3).map((hit) => ({
matchedChunks: [{
sourceKind: hit.sourceKind,
text: String(hit.text || '').slice(0, 180),
})),
score: confidence,
}],
},
suggestedByUserId: userId || null,
});

View File

@@ -220,7 +220,8 @@ export class MeilisearchService {
{
q: query,
limit,
...(hybrid ? { hybrid } : {}),
showRankingScore: true,
...(hybrid ? { hybrid, showRankingScoreDetails: true } : {}),
},
this.buildHeaders(config),
);