WIP - semantic linking working with just name
This commit is contained in:
@@ -53,9 +53,14 @@ export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapt
|
|||||||
input.record?.subject ||
|
input.record?.subject ||
|
||||||
`${input.objectApiName} ${input.record?.id || ''}`.trim();
|
`${input.objectApiName} ${input.record?.id || ''}`.trim();
|
||||||
|
|
||||||
|
/*
|
||||||
const fieldNarrative = fieldEntries
|
const fieldNarrative = fieldEntries
|
||||||
.map(([key, value]) => `${key}: ${String(value)}`)
|
.map(([key, value]) => `${key}: ${String(value)}`)
|
||||||
.join('\n');
|
.join('\n');
|
||||||
|
*/
|
||||||
|
const fieldNarrative = fieldEntries
|
||||||
|
.map(([key, value]) => `${String(value)}`)
|
||||||
|
.join('\n');
|
||||||
|
|
||||||
const commentNarrative = (input.comments || [])
|
const commentNarrative = (input.comments || [])
|
||||||
.map((comment, index) => `Comment ${index + 1}: ${comment.content}`)
|
.map((comment, index) => `Comment ${index + 1}: ${comment.content}`)
|
||||||
@@ -63,10 +68,9 @@ export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapt
|
|||||||
|
|
||||||
const narrative = [fieldNarrative, commentNarrative].filter(Boolean).join('\n\n');
|
const narrative = [fieldNarrative, commentNarrative].filter(Boolean).join('\n\n');
|
||||||
|
|
||||||
// Plain values only — no 'key:' prefixes, no comments (chunker adds those separately).
|
// Temporary: use only the name field for embedding to test pure semantic matching.
|
||||||
const embeddingNarrative = fieldEntries
|
// Widen this back to all field values once semantic results are validated.
|
||||||
.map(([, value]) => String(value))
|
const embeddingNarrative = String(input.record?.name || title);
|
||||||
.join('\n');
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
entityType: input.objectApiName,
|
entityType: input.objectApiName,
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ export class SemanticOrchestratorService {
|
|||||||
private readonly defaultEmbeddingModel =
|
private readonly defaultEmbeddingModel =
|
||||||
process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small';
|
process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small';
|
||||||
private readonly semanticEmbedderName = 'default';
|
private readonly semanticEmbedderName = 'default';
|
||||||
|
private readonly MIN_SEMANTIC_CONFIDENCE = 0.7;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private readonly tenantDbService: TenantDatabaseService,
|
private readonly tenantDbService: TenantDatabaseService,
|
||||||
@@ -71,10 +72,9 @@ export class SemanticOrchestratorService {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const documentId = await this.upsertSemanticDocument(knex, projection);
|
const documentId = await this.upsertSemanticDocument(knex, projection);
|
||||||
// Use embeddingNarrative (plain values, no labels) so lexical noise from 'key:'
|
// Temporary: pass empty comments so only embeddingNarrative (name field) is indexed.
|
||||||
// prefixes doesn't inflate match scores. Comments are passed separately so they
|
// Re-enable by replacing [] with `comments` once semantic matching is validated.
|
||||||
// are not double-counted (narrative already embeds them with 'Comment N:' prefix).
|
const chunks = this.chunkerService.chunkText(projection.embeddingNarrative, []);
|
||||||
const chunks = this.chunkerService.chunkText(projection.embeddingNarrative, comments);
|
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`Semantic refresh chunking: ${objectApiName}:${recordId} chunks=${chunks.length}`,
|
`Semantic refresh chunking: ${objectApiName}:${recordId} chunks=${chunks.length}`,
|
||||||
);
|
);
|
||||||
@@ -252,26 +252,37 @@ export class SemanticOrchestratorService {
|
|||||||
`Meilisearch results: index=${indexName} hits=${search.hits?.length || 0} total=${search.total}`,
|
`Meilisearch results: index=${indexName} hits=${search.hits?.length || 0} total=${search.total}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
const grouped = new Map<string, any[]>();
|
const candidates = new Map<string, { hit: any; confidence: number }>();
|
||||||
for (const hit of search.hits || []) {
|
for (const hit of search.hits || []) {
|
||||||
if (hit.entityType === projection.entityType && hit.entityId === projection.entityId) {
|
// Skip self
|
||||||
continue;
|
if (hit.entityId === projection.entityId) continue;
|
||||||
}
|
|
||||||
// Skip self-links where source and target resolve to the same entity
|
const confidence = hit._semanticScore ?? hit._rankingScore ?? 0;
|
||||||
if (hit.entityId === projection.entityId) {
|
this.logger.log(
|
||||||
|
`Suggestion candidate: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} text="${String(hit.text || '').substring(0, 60)}"`,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (confidence < this.MIN_SEMANTIC_CONFIDENCE) {
|
||||||
|
this.logger.log(
|
||||||
|
`Skipping low-confidence match: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} < ${this.MIN_SEMANTIC_CONFIDENCE}`,
|
||||||
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const key = `${hit.entityType}:${hit.entityId}`;
|
const key = `${hit.entityType}:${hit.entityId}`;
|
||||||
if (!grouped.has(key)) grouped.set(key, []);
|
const existing = candidates.get(key);
|
||||||
grouped.get(key).push(hit);
|
if (!existing || confidence > existing.confidence) {
|
||||||
|
candidates.set(key, { hit, confidence });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.logger.log(`Filtered suggestions: ${candidates.size} passed threshold ${this.MIN_SEMANTIC_CONFIDENCE}`);
|
||||||
|
|
||||||
const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
|
const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
|
||||||
const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);
|
const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);
|
||||||
|
|
||||||
for (const [key, hits] of grouped.entries()) {
|
for (const [key, { hit, confidence }] of candidates.entries()) {
|
||||||
const [targetType, targetId] = key.split(':');
|
const [targetType, targetId] = key.split(':');
|
||||||
const confidence = Math.min(0.99, 0.3 + hits.length * 0.1);
|
|
||||||
await this.semanticLinkService.upsertSuggestedLink(knex, {
|
await this.semanticLinkService.upsertSuggestedLink(knex, {
|
||||||
sourceEntityType: projection.entityType,
|
sourceEntityType: projection.entityType,
|
||||||
sourceEntityId: projection.entityId,
|
sourceEntityId: projection.entityId,
|
||||||
@@ -288,10 +299,11 @@ export class SemanticOrchestratorService {
|
|||||||
sourceKind: chunk.sourceKind,
|
sourceKind: chunk.sourceKind,
|
||||||
text: chunk.text.slice(0, 180),
|
text: chunk.text.slice(0, 180),
|
||||||
})),
|
})),
|
||||||
matchedChunks: hits.slice(0, 3).map((hit) => ({
|
matchedChunks: [{
|
||||||
sourceKind: hit.sourceKind,
|
sourceKind: hit.sourceKind,
|
||||||
text: String(hit.text || '').slice(0, 180),
|
text: String(hit.text || '').slice(0, 180),
|
||||||
})),
|
score: confidence,
|
||||||
|
}],
|
||||||
},
|
},
|
||||||
suggestedByUserId: userId || null,
|
suggestedByUserId: userId || null,
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -220,7 +220,8 @@ export class MeilisearchService {
|
|||||||
{
|
{
|
||||||
q: query,
|
q: query,
|
||||||
limit,
|
limit,
|
||||||
...(hybrid ? { hybrid } : {}),
|
showRankingScore: true,
|
||||||
|
...(hybrid ? { hybrid, showRankingScoreDetails: true } : {}),
|
||||||
},
|
},
|
||||||
this.buildHeaders(config),
|
this.buildHeaders(config),
|
||||||
);
|
);
|
||||||
|
|||||||
Reference in New Issue
Block a user