Enhance semantic links with LLM classification and richer UI labels

This commit is contained in:
phyroslam
2026-04-12 21:37:07 -07:00
parent 12a82372f4
commit 96989d0ec3
3 changed files with 283 additions and 24 deletions

View File

@@ -98,7 +98,73 @@ export class SemanticLinkService {
query.andWhere({ status });
}
return query;
const links = await query;
if (!links.length) return links;
const typeSet = new Set<string>();
for (const link of links) {
typeSet.add(link.source_entity_type);
typeSet.add(link.target_entity_type);
}
const definitions = await knex('object_definitions')
.whereIn('apiName', Array.from(typeSet))
.select('apiName', 'label', 'pluralLabel', 'tableName', 'fields');
const definitionByType = new Map<string, any>(
definitions.map((item: any) => [item.apiName, item]),
);
const displayNameCache = new Map<string, string>();
const getDisplayField = (definition: any) => {
let fields = [];
if (Array.isArray(definition?.fields)) {
fields = definition.fields;
} else if (typeof definition?.fields === 'string') {
try {
fields = JSON.parse(definition.fields);
} catch {
fields = [];
}
}
if (fields.some((field: any) => field?.apiName === 'name')) return 'name';
const textField = fields.find((field: any) =>
['STRING', 'TEXT', 'EMAIL'].includes(String(field?.type || '').toUpperCase()),
);
return textField?.apiName || 'id';
};
const resolveTableName = (definition: any) => {
if (definition?.tableName) return definition.tableName;
if (definition?.pluralLabel) {
return String(definition.pluralLabel).toLowerCase().replace(/[^a-z0-9]+/g, '_');
}
return `${String(definition?.apiName || '').toLowerCase()}s`;
};
const loadDisplayName = async (type: string, id: string) => {
const cacheKey = `${type}:${id}`;
if (displayNameCache.has(cacheKey)) return displayNameCache.get(cacheKey);
const definition = definitionByType.get(type);
if (!definition) {
displayNameCache.set(cacheKey, id);
return id;
}
const tableName = resolveTableName(definition);
const displayField = getDisplayField(definition);
const record = await knex(tableName).where({ id }).first();
const display = record?.[displayField] ? String(record[displayField]) : id;
displayNameCache.set(cacheKey, display);
return display;
};
for (const link of links) {
link.source_entity_label = definitionByType.get(link.source_entity_type)?.label || link.source_entity_type;
link.target_entity_label = definitionByType.get(link.target_entity_type)?.label || link.target_entity_type;
link.source_entity_name = await loadDisplayName(link.source_entity_type, link.source_entity_id);
link.target_entity_name = await loadDisplayName(link.target_entity_type, link.target_entity_id);
}
return links;
}
async reviewLink(knex: any, linkId: string, status: string, reviewerUserId: string) {

View File

@@ -1,4 +1,6 @@
import { Injectable, Logger } from '@nestjs/common';
import { HumanMessage, SystemMessage } from '@langchain/core/messages';
import { ChatOpenAI } from '@langchain/openai';
import { TenantDatabaseService } from '../../tenant/tenant-database.service';
import { MeilisearchService } from '../../search/meilisearch.service';
import { getCentralPrisma } from '../../prisma/central-prisma.service';
@@ -20,6 +22,7 @@ export class SemanticOrchestratorService {
private readonly semanticEmbedderName = 'default';
private readonly MIN_CONFIDENCE_BASE = 0.7;
private readonly MIN_CONFIDENCE_COMMENT = 0.52;
private readonly defaultChatModel = process.env.OPENAI_CHAT_MODEL || 'gpt-4o-mini';
constructor(
private readonly tenantDbService: TenantDatabaseService,
@@ -256,7 +259,7 @@ export class SemanticOrchestratorService {
`Meilisearch results: index=${indexName} hits=${search.hits?.length || 0} total=${search.total}`,
);
const candidates = new Map<string, { hit: any; confidence: number }>();
const candidates = new Map<string, { hit: any; confidence: number; rankingDetails?: any }>();
for (const hit of search.hits || []) {
// Skip self
if (hit.entityId === projection.entityId) continue;
@@ -280,7 +283,11 @@ export class SemanticOrchestratorService {
const key = `${hit.entityType}:${hit.entityId}`;
const existing = candidates.get(key);
if (!existing || confidence > existing.confidence) {
candidates.set(key, { hit, confidence });
candidates.set(key, {
hit,
confidence,
rankingDetails: hit._rankingScoreDetails || null,
});
}
}
@@ -289,35 +296,204 @@ export class SemanticOrchestratorService {
const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);
for (const [key, { hit, confidence }] of candidates.entries()) {
for (const [key, { hit, confidence, rankingDetails }] of candidates.entries()) {
const [targetType, targetId] = key.split(':');
const llmAssessment = await this.assessLinkWithLlm(
openAiConfig,
trigger,
projection,
chunks,
hit,
confidence,
rankingDetails,
);
const reason =
llmAssessment?.reason ||
this.humanizeTrigger(trigger) ||
'Suggested from semantic similarity.';
await this.semanticLinkService.upsertSuggestedLink(knex, {
sourceEntityType: projection.entityType,
sourceEntityId: projection.entityId,
targetEntityType: targetType,
targetEntityId: targetId,
linkType: 'related_to',
linkType: llmAssessment?.linkType || 'related',
status: 'suggested',
origin: 'semantic',
confidence,
reason: `Suggested from semantic similarity (${trigger})`,
evidence: {
reason,
evidence: this.buildEvidencePayload(
trigger,
sourceSignals: chunks.slice(0, 2).map((chunk) => ({
sourceKind: chunk.sourceKind,
text: chunk.text.slice(0, 180),
})),
matchedChunks: [{
sourceKind: hit.sourceKind,
text: String(hit.text || '').slice(0, 180),
score: confidence,
}],
},
chunks,
hit,
confidence,
rankingDetails,
llmAssessment,
),
suggestedByUserId: userId || null,
});
}
}
private buildEvidencePayload(
trigger: string,
chunks: any[],
hit: any,
confidence: number,
rankingDetails: any,
llmAssessment?: {
reason?: string;
explanation?: string;
matchedSignals?: string[];
} | null,
) {
return {
trigger,
explanation:
llmAssessment?.explanation ||
llmAssessment?.reason ||
'Suggested using semantic similarity and ranked chunk evidence.',
sourceSignals: chunks.slice(0, 2).map((chunk) => ({
sourceKind: chunk.sourceKind,
text: chunk.text.slice(0, 220),
})),
matchedSignals: llmAssessment?.matchedSignals || [],
matchedChunks: [
{
sourceKind: hit.sourceKind,
text: String(hit.text || '').slice(0, 220),
score: confidence,
rankingDetails: rankingDetails || null,
},
],
};
}
private async assessLinkWithLlm(
openAiConfig: OpenAIConfig | null,
trigger: string,
projection: any,
chunks: any[],
hit: any,
confidence: number,
rankingDetails: any,
): Promise<{ linkType: string; reason?: string; explanation?: string; matchedSignals?: string[] } | null> {
if (!openAiConfig?.apiKey) {
return null;
}
const promptPayload = {
trigger,
source: {
entityType: projection.entityType,
title: projection.title,
narrative: String(projection.narrative || '').slice(0, 900),
keySignals: chunks.slice(0, 3).map((chunk) => ({
sourceKind: chunk.sourceKind,
text: String(chunk.text || '').slice(0, 220),
})),
},
target: {
entityType: hit.entityType,
title: hit.title,
sourceKind: hit.sourceKind,
text: String(hit.text || '').slice(0, 300),
},
confidence,
rankingDetails: rankingDetails || {},
allowedLinkTypes: [
'related',
'supports',
'contradicts',
'expands',
'duplicate_of',
'references',
'depends_on',
],
};
try {
const model = new ChatOpenAI({
apiKey: openAiConfig.apiKey,
model: openAiConfig.model || this.defaultChatModel,
temperature: 0.1,
});
const response = await model.invoke([
new SystemMessage(
'Classify semantic relationship. Return valid JSON only with keys: linkType, reason, explanation, matchedSignals. linkType must be one of related|supports|contradicts|expands|duplicate_of|references|depends_on.',
),
new HumanMessage(JSON.stringify(promptPayload)),
]);
const content = typeof response.content === 'string'
? response.content
: Array.isArray(response.content)
? response.content.map((part: any) => (typeof part === 'string' ? part : part?.text || '')).join('')
: '';
const normalized = this.extractJsonObject(content);
if (!normalized) return null;
const linkType = this.normalizeLinkType(normalized.linkType);
return {
linkType,
reason: typeof normalized.reason === 'string' ? normalized.reason.trim() : undefined,
explanation:
typeof normalized.explanation === 'string' ? normalized.explanation.trim() : undefined,
matchedSignals: Array.isArray(normalized.matchedSignals)
? normalized.matchedSignals
.map((item: any) => String(item || '').trim())
.filter(Boolean)
.slice(0, 3)
: undefined,
};
} catch (error) {
this.logger.warn(`Semantic LLM assessment failed: ${error.message}`);
return null;
}
}
private extractJsonObject(raw: string): Record<string, any> | null {
if (!raw) return null;
const trimmed = raw.trim();
try {
return JSON.parse(trimmed);
} catch {
const match = trimmed.match(/\{[\s\S]*\}/);
if (!match) return null;
try {
return JSON.parse(match[0]);
} catch {
return null;
}
}
}
private normalizeLinkType(value: any): string {
const supported = new Set([
'related',
'supports',
'contradicts',
'expands',
'duplicate_of',
'references',
'depends_on',
]);
const normalized = String(value || '').trim().toLowerCase();
if (supported.has(normalized)) return normalized;
return 'related';
}
private humanizeTrigger(trigger: string): string {
if (!trigger) return 'Suggested from semantic similarity.';
const map: Record<string, string> = {
comment_created: 'Suggested based on a comment added to the record.',
comment_updated: 'Suggested based on a comment update.',
manual_refresh: 'Suggested after a manual semantic refresh.',
batch_reindex: 'Suggested during semantic reindexing.',
};
return map[trigger] || 'Suggested from semantic similarity.';
}
private getTableName(objectDefinition: any): string {
if (objectDefinition.tableName) return objectDefinition.tableName;

View File

@@ -12,6 +12,10 @@ type SemanticLink = {
source_entity_id: string
target_entity_type: string
target_entity_id: string
source_entity_label?: string
target_entity_label?: string
source_entity_name?: string
target_entity_name?: string
link_type: string
status: string
origin: string
@@ -53,9 +57,18 @@ const getOtherSide = (link: SemanticLink) => {
return {
entityType: isSource ? link.target_entity_type : link.source_entity_type,
entityId: isSource ? link.target_entity_id : link.source_entity_id,
entityLabel: isSource ? link.target_entity_label : link.source_entity_label,
entityName: isSource ? link.target_entity_name : link.source_entity_name,
}
}
const formatLinkType = (value?: string) => {
if (!value) return 'Related'
return value
.replace(/_/g, ' ')
.replace(/\b\w/g, (c) => c.toUpperCase())
}
const parseEvidence = (raw: any) => {
if (!raw) return null
if (typeof raw === 'object') return raw
@@ -142,10 +155,11 @@ watch(
>
<div class="flex flex-wrap items-center justify-between gap-2">
<div class="text-sm font-medium">
{{ getOtherSide(link).entityType }} · {{ getOtherSide(link).entityId }}
{{ getOtherSide(link).entityLabel || getOtherSide(link).entityType }} ·
{{ getOtherSide(link).entityName || getOtherSide(link).entityId }}
</div>
<div class="text-xs text-muted-foreground">
{{ link.link_type }} {{ link.origin }} {{ formatConfidence(link.confidence) }}
{{ formatLinkType(link.link_type) }} {{ link.origin }} {{ formatConfidence(link.confidence) }}
</div>
</div>
@@ -160,19 +174,22 @@ watch(
<Separator />
<div>
<div class="font-medium text-foreground">Evidence</div>
<div v-if="parseEvidence(link.evidence)?.sourceSignals?.length">
<div class="mt-1">Source signals:</div>
<p v-if="parseEvidence(link.evidence)?.explanation" class="mt-1 text-foreground">
{{ parseEvidence(link.evidence).explanation }}
</p>
<div v-if="parseEvidence(link.evidence)?.matchedSignals?.length" class="mt-2">
<div>Matched context:</div>
<ul class="list-disc pl-4">
<li
v-for="(signal, idx) in parseEvidence(link.evidence).sourceSignals"
v-for="(signal, idx) in parseEvidence(link.evidence).matchedSignals"
:key="idx"
>
{{ signal.sourceKind }}: {{ signal.text }}
{{ signal }}
</li>
</ul>
</div>
<div v-if="parseEvidence(link.evidence)?.matchedChunks?.length" class="mt-2">
<div>Matched:</div>
<div>Matched excerpts:</div>
<ul class="list-disc pl-4">
<li
v-for="(match, idx) in parseEvidence(link.evidence).matchedChunks"