WIP - semantic linking working with just name
This commit is contained in:
@@ -53,9 +53,14 @@ export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapt
|
||||
input.record?.subject ||
|
||||
`${input.objectApiName} ${input.record?.id || ''}`.trim();
|
||||
|
||||
/*
|
||||
const fieldNarrative = fieldEntries
|
||||
.map(([key, value]) => `${key}: ${String(value)}`)
|
||||
.join('\n');
|
||||
*/
|
||||
const fieldNarrative = fieldEntries
|
||||
.map(([key, value]) => `${String(value)}`)
|
||||
.join('\n');
|
||||
|
||||
const commentNarrative = (input.comments || [])
|
||||
.map((comment, index) => `Comment ${index + 1}: ${comment.content}`)
|
||||
@@ -63,10 +68,9 @@ export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapt
|
||||
|
||||
const narrative = [fieldNarrative, commentNarrative].filter(Boolean).join('\n\n');
|
||||
|
||||
// Plain values only — no 'key:' prefixes, no comments (chunker adds those separately).
|
||||
const embeddingNarrative = fieldEntries
|
||||
.map(([, value]) => String(value))
|
||||
.join('\n');
|
||||
// Temporary: use only the name field for embedding to test pure semantic matching.
|
||||
// Widen this back to all field values once semantic results are validated.
|
||||
const embeddingNarrative = String(input.record?.name || title);
|
||||
|
||||
return {
|
||||
entityType: input.objectApiName,
|
||||
|
||||
@@ -18,6 +18,7 @@ export class SemanticOrchestratorService {
|
||||
private readonly defaultEmbeddingModel =
|
||||
process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small';
|
||||
private readonly semanticEmbedderName = 'default';
|
||||
private readonly MIN_SEMANTIC_CONFIDENCE = 0.7;
|
||||
|
||||
constructor(
|
||||
private readonly tenantDbService: TenantDatabaseService,
|
||||
@@ -71,10 +72,9 @@ export class SemanticOrchestratorService {
|
||||
});
|
||||
|
||||
const documentId = await this.upsertSemanticDocument(knex, projection);
|
||||
// Use embeddingNarrative (plain values, no labels) so lexical noise from 'key:'
|
||||
// prefixes doesn't inflate match scores. Comments are passed separately so they
|
||||
// are not double-counted (narrative already embeds them with 'Comment N:' prefix).
|
||||
const chunks = this.chunkerService.chunkText(projection.embeddingNarrative, comments);
|
||||
// Temporary: pass empty comments so only embeddingNarrative (name field) is indexed.
|
||||
// Re-enable by replacing [] with `comments` once semantic matching is validated.
|
||||
const chunks = this.chunkerService.chunkText(projection.embeddingNarrative, []);
|
||||
this.logger.log(
|
||||
`Semantic refresh chunking: ${objectApiName}:${recordId} chunks=${chunks.length}`,
|
||||
);
|
||||
@@ -252,26 +252,37 @@ export class SemanticOrchestratorService {
|
||||
`Meilisearch results: index=${indexName} hits=${search.hits?.length || 0} total=${search.total}`,
|
||||
);
|
||||
|
||||
const grouped = new Map<string, any[]>();
|
||||
const candidates = new Map<string, { hit: any; confidence: number }>();
|
||||
for (const hit of search.hits || []) {
|
||||
if (hit.entityType === projection.entityType && hit.entityId === projection.entityId) {
|
||||
continue;
|
||||
}
|
||||
// Skip self-links where source and target resolve to the same entity
|
||||
if (hit.entityId === projection.entityId) {
|
||||
// Skip self
|
||||
if (hit.entityId === projection.entityId) continue;
|
||||
|
||||
const confidence = hit._semanticScore ?? hit._rankingScore ?? 0;
|
||||
this.logger.log(
|
||||
`Suggestion candidate: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} text="${String(hit.text || '').substring(0, 60)}"`,
|
||||
);
|
||||
|
||||
if (confidence < this.MIN_SEMANTIC_CONFIDENCE) {
|
||||
this.logger.log(
|
||||
`Skipping low-confidence match: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} < ${this.MIN_SEMANTIC_CONFIDENCE}`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
const key = `${hit.entityType}:${hit.entityId}`;
|
||||
if (!grouped.has(key)) grouped.set(key, []);
|
||||
grouped.get(key).push(hit);
|
||||
const existing = candidates.get(key);
|
||||
if (!existing || confidence > existing.confidence) {
|
||||
candidates.set(key, { hit, confidence });
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.log(`Filtered suggestions: ${candidates.size} passed threshold ${this.MIN_SEMANTIC_CONFIDENCE}`);
|
||||
|
||||
const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
|
||||
const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);
|
||||
|
||||
for (const [key, hits] of grouped.entries()) {
|
||||
for (const [key, { hit, confidence }] of candidates.entries()) {
|
||||
const [targetType, targetId] = key.split(':');
|
||||
const confidence = Math.min(0.99, 0.3 + hits.length * 0.1);
|
||||
await this.semanticLinkService.upsertSuggestedLink(knex, {
|
||||
sourceEntityType: projection.entityType,
|
||||
sourceEntityId: projection.entityId,
|
||||
@@ -288,10 +299,11 @@ export class SemanticOrchestratorService {
|
||||
sourceKind: chunk.sourceKind,
|
||||
text: chunk.text.slice(0, 180),
|
||||
})),
|
||||
matchedChunks: hits.slice(0, 3).map((hit) => ({
|
||||
matchedChunks: [{
|
||||
sourceKind: hit.sourceKind,
|
||||
text: String(hit.text || '').slice(0, 180),
|
||||
})),
|
||||
score: confidence,
|
||||
}],
|
||||
},
|
||||
suggestedByUserId: userId || null,
|
||||
});
|
||||
|
||||
@@ -220,7 +220,8 @@ export class MeilisearchService {
|
||||
{
|
||||
q: query,
|
||||
limit,
|
||||
...(hybrid ? { hybrid } : {}),
|
||||
showRankingScore: true,
|
||||
...(hybrid ? { hybrid, showRankingScoreDetails: true } : {}),
|
||||
},
|
||||
this.buildHeaders(config),
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user