Enhance semantic links with LLM classification and richer UI labels
This commit is contained in:
@@ -98,7 +98,73 @@ export class SemanticLinkService {
|
||||
query.andWhere({ status });
|
||||
}
|
||||
|
||||
return query;
|
||||
const links = await query;
|
||||
if (!links.length) return links;
|
||||
|
||||
const typeSet = new Set<string>();
|
||||
for (const link of links) {
|
||||
typeSet.add(link.source_entity_type);
|
||||
typeSet.add(link.target_entity_type);
|
||||
}
|
||||
|
||||
const definitions = await knex('object_definitions')
|
||||
.whereIn('apiName', Array.from(typeSet))
|
||||
.select('apiName', 'label', 'pluralLabel', 'tableName', 'fields');
|
||||
const definitionByType = new Map<string, any>(
|
||||
definitions.map((item: any) => [item.apiName, item]),
|
||||
);
|
||||
|
||||
const displayNameCache = new Map<string, string>();
|
||||
const getDisplayField = (definition: any) => {
|
||||
let fields = [];
|
||||
if (Array.isArray(definition?.fields)) {
|
||||
fields = definition.fields;
|
||||
} else if (typeof definition?.fields === 'string') {
|
||||
try {
|
||||
fields = JSON.parse(definition.fields);
|
||||
} catch {
|
||||
fields = [];
|
||||
}
|
||||
}
|
||||
if (fields.some((field: any) => field?.apiName === 'name')) return 'name';
|
||||
const textField = fields.find((field: any) =>
|
||||
['STRING', 'TEXT', 'EMAIL'].includes(String(field?.type || '').toUpperCase()),
|
||||
);
|
||||
return textField?.apiName || 'id';
|
||||
};
|
||||
|
||||
const resolveTableName = (definition: any) => {
|
||||
if (definition?.tableName) return definition.tableName;
|
||||
if (definition?.pluralLabel) {
|
||||
return String(definition.pluralLabel).toLowerCase().replace(/[^a-z0-9]+/g, '_');
|
||||
}
|
||||
return `${String(definition?.apiName || '').toLowerCase()}s`;
|
||||
};
|
||||
|
||||
const loadDisplayName = async (type: string, id: string) => {
|
||||
const cacheKey = `${type}:${id}`;
|
||||
if (displayNameCache.has(cacheKey)) return displayNameCache.get(cacheKey);
|
||||
const definition = definitionByType.get(type);
|
||||
if (!definition) {
|
||||
displayNameCache.set(cacheKey, id);
|
||||
return id;
|
||||
}
|
||||
const tableName = resolveTableName(definition);
|
||||
const displayField = getDisplayField(definition);
|
||||
const record = await knex(tableName).where({ id }).first();
|
||||
const display = record?.[displayField] ? String(record[displayField]) : id;
|
||||
displayNameCache.set(cacheKey, display);
|
||||
return display;
|
||||
};
|
||||
|
||||
for (const link of links) {
|
||||
link.source_entity_label = definitionByType.get(link.source_entity_type)?.label || link.source_entity_type;
|
||||
link.target_entity_label = definitionByType.get(link.target_entity_type)?.label || link.target_entity_type;
|
||||
link.source_entity_name = await loadDisplayName(link.source_entity_type, link.source_entity_id);
|
||||
link.target_entity_name = await loadDisplayName(link.target_entity_type, link.target_entity_id);
|
||||
}
|
||||
|
||||
return links;
|
||||
}
|
||||
|
||||
async reviewLink(knex: any, linkId: string, status: string, reviewerUserId: string) {
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { HumanMessage, SystemMessage } from '@langchain/core/messages';
|
||||
import { ChatOpenAI } from '@langchain/openai';
|
||||
import { TenantDatabaseService } from '../../tenant/tenant-database.service';
|
||||
import { MeilisearchService } from '../../search/meilisearch.service';
|
||||
import { getCentralPrisma } from '../../prisma/central-prisma.service';
|
||||
@@ -20,6 +22,7 @@ export class SemanticOrchestratorService {
|
||||
private readonly semanticEmbedderName = 'default';
|
||||
private readonly MIN_CONFIDENCE_BASE = 0.7;
|
||||
private readonly MIN_CONFIDENCE_COMMENT = 0.52;
|
||||
private readonly defaultChatModel = process.env.OPENAI_CHAT_MODEL || 'gpt-4o-mini';
|
||||
|
||||
constructor(
|
||||
private readonly tenantDbService: TenantDatabaseService,
|
||||
@@ -256,7 +259,7 @@ export class SemanticOrchestratorService {
|
||||
`Meilisearch results: index=${indexName} hits=${search.hits?.length || 0} total=${search.total}`,
|
||||
);
|
||||
|
||||
const candidates = new Map<string, { hit: any; confidence: number }>();
|
||||
const candidates = new Map<string, { hit: any; confidence: number; rankingDetails?: any }>();
|
||||
for (const hit of search.hits || []) {
|
||||
// Skip self
|
||||
if (hit.entityId === projection.entityId) continue;
|
||||
@@ -280,7 +283,11 @@ export class SemanticOrchestratorService {
|
||||
const key = `${hit.entityType}:${hit.entityId}`;
|
||||
const existing = candidates.get(key);
|
||||
if (!existing || confidence > existing.confidence) {
|
||||
candidates.set(key, { hit, confidence });
|
||||
candidates.set(key, {
|
||||
hit,
|
||||
confidence,
|
||||
rankingDetails: hit._rankingScoreDetails || null,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -289,35 +296,204 @@ export class SemanticOrchestratorService {
|
||||
const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
|
||||
const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);
|
||||
|
||||
for (const [key, { hit, confidence }] of candidates.entries()) {
|
||||
for (const [key, { hit, confidence, rankingDetails }] of candidates.entries()) {
|
||||
const [targetType, targetId] = key.split(':');
|
||||
const llmAssessment = await this.assessLinkWithLlm(
|
||||
openAiConfig,
|
||||
trigger,
|
||||
projection,
|
||||
chunks,
|
||||
hit,
|
||||
confidence,
|
||||
rankingDetails,
|
||||
);
|
||||
const reason =
|
||||
llmAssessment?.reason ||
|
||||
this.humanizeTrigger(trigger) ||
|
||||
'Suggested from semantic similarity.';
|
||||
await this.semanticLinkService.upsertSuggestedLink(knex, {
|
||||
sourceEntityType: projection.entityType,
|
||||
sourceEntityId: projection.entityId,
|
||||
targetEntityType: targetType,
|
||||
targetEntityId: targetId,
|
||||
linkType: 'related_to',
|
||||
linkType: llmAssessment?.linkType || 'related',
|
||||
status: 'suggested',
|
||||
origin: 'semantic',
|
||||
confidence,
|
||||
reason: `Suggested from semantic similarity (${trigger})`,
|
||||
evidence: {
|
||||
reason,
|
||||
evidence: this.buildEvidencePayload(
|
||||
trigger,
|
||||
sourceSignals: chunks.slice(0, 2).map((chunk) => ({
|
||||
sourceKind: chunk.sourceKind,
|
||||
text: chunk.text.slice(0, 180),
|
||||
})),
|
||||
matchedChunks: [{
|
||||
sourceKind: hit.sourceKind,
|
||||
text: String(hit.text || '').slice(0, 180),
|
||||
score: confidence,
|
||||
}],
|
||||
},
|
||||
chunks,
|
||||
hit,
|
||||
confidence,
|
||||
rankingDetails,
|
||||
llmAssessment,
|
||||
),
|
||||
suggestedByUserId: userId || null,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private buildEvidencePayload(
|
||||
trigger: string,
|
||||
chunks: any[],
|
||||
hit: any,
|
||||
confidence: number,
|
||||
rankingDetails: any,
|
||||
llmAssessment?: {
|
||||
reason?: string;
|
||||
explanation?: string;
|
||||
matchedSignals?: string[];
|
||||
} | null,
|
||||
) {
|
||||
return {
|
||||
trigger,
|
||||
explanation:
|
||||
llmAssessment?.explanation ||
|
||||
llmAssessment?.reason ||
|
||||
'Suggested using semantic similarity and ranked chunk evidence.',
|
||||
sourceSignals: chunks.slice(0, 2).map((chunk) => ({
|
||||
sourceKind: chunk.sourceKind,
|
||||
text: chunk.text.slice(0, 220),
|
||||
})),
|
||||
matchedSignals: llmAssessment?.matchedSignals || [],
|
||||
matchedChunks: [
|
||||
{
|
||||
sourceKind: hit.sourceKind,
|
||||
text: String(hit.text || '').slice(0, 220),
|
||||
score: confidence,
|
||||
rankingDetails: rankingDetails || null,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
private async assessLinkWithLlm(
|
||||
openAiConfig: OpenAIConfig | null,
|
||||
trigger: string,
|
||||
projection: any,
|
||||
chunks: any[],
|
||||
hit: any,
|
||||
confidence: number,
|
||||
rankingDetails: any,
|
||||
): Promise<{ linkType: string; reason?: string; explanation?: string; matchedSignals?: string[] } | null> {
|
||||
if (!openAiConfig?.apiKey) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const promptPayload = {
|
||||
trigger,
|
||||
source: {
|
||||
entityType: projection.entityType,
|
||||
title: projection.title,
|
||||
narrative: String(projection.narrative || '').slice(0, 900),
|
||||
keySignals: chunks.slice(0, 3).map((chunk) => ({
|
||||
sourceKind: chunk.sourceKind,
|
||||
text: String(chunk.text || '').slice(0, 220),
|
||||
})),
|
||||
},
|
||||
target: {
|
||||
entityType: hit.entityType,
|
||||
title: hit.title,
|
||||
sourceKind: hit.sourceKind,
|
||||
text: String(hit.text || '').slice(0, 300),
|
||||
},
|
||||
confidence,
|
||||
rankingDetails: rankingDetails || {},
|
||||
allowedLinkTypes: [
|
||||
'related',
|
||||
'supports',
|
||||
'contradicts',
|
||||
'expands',
|
||||
'duplicate_of',
|
||||
'references',
|
||||
'depends_on',
|
||||
],
|
||||
};
|
||||
|
||||
try {
|
||||
const model = new ChatOpenAI({
|
||||
apiKey: openAiConfig.apiKey,
|
||||
model: openAiConfig.model || this.defaultChatModel,
|
||||
temperature: 0.1,
|
||||
});
|
||||
|
||||
const response = await model.invoke([
|
||||
new SystemMessage(
|
||||
'Classify semantic relationship. Return valid JSON only with keys: linkType, reason, explanation, matchedSignals. linkType must be one of related|supports|contradicts|expands|duplicate_of|references|depends_on.',
|
||||
),
|
||||
new HumanMessage(JSON.stringify(promptPayload)),
|
||||
]);
|
||||
|
||||
const content = typeof response.content === 'string'
|
||||
? response.content
|
||||
: Array.isArray(response.content)
|
||||
? response.content.map((part: any) => (typeof part === 'string' ? part : part?.text || '')).join('')
|
||||
: '';
|
||||
const normalized = this.extractJsonObject(content);
|
||||
if (!normalized) return null;
|
||||
|
||||
const linkType = this.normalizeLinkType(normalized.linkType);
|
||||
return {
|
||||
linkType,
|
||||
reason: typeof normalized.reason === 'string' ? normalized.reason.trim() : undefined,
|
||||
explanation:
|
||||
typeof normalized.explanation === 'string' ? normalized.explanation.trim() : undefined,
|
||||
matchedSignals: Array.isArray(normalized.matchedSignals)
|
||||
? normalized.matchedSignals
|
||||
.map((item: any) => String(item || '').trim())
|
||||
.filter(Boolean)
|
||||
.slice(0, 3)
|
||||
: undefined,
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.warn(`Semantic LLM assessment failed: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private extractJsonObject(raw: string): Record<string, any> | null {
|
||||
if (!raw) return null;
|
||||
const trimmed = raw.trim();
|
||||
try {
|
||||
return JSON.parse(trimmed);
|
||||
} catch {
|
||||
const match = trimmed.match(/\{[\s\S]*\}/);
|
||||
if (!match) return null;
|
||||
try {
|
||||
return JSON.parse(match[0]);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private normalizeLinkType(value: any): string {
|
||||
const supported = new Set([
|
||||
'related',
|
||||
'supports',
|
||||
'contradicts',
|
||||
'expands',
|
||||
'duplicate_of',
|
||||
'references',
|
||||
'depends_on',
|
||||
]);
|
||||
const normalized = String(value || '').trim().toLowerCase();
|
||||
if (supported.has(normalized)) return normalized;
|
||||
return 'related';
|
||||
}
|
||||
|
||||
private humanizeTrigger(trigger: string): string {
|
||||
if (!trigger) return 'Suggested from semantic similarity.';
|
||||
const map: Record<string, string> = {
|
||||
comment_created: 'Suggested based on a comment added to the record.',
|
||||
comment_updated: 'Suggested based on a comment update.',
|
||||
manual_refresh: 'Suggested after a manual semantic refresh.',
|
||||
batch_reindex: 'Suggested during semantic reindexing.',
|
||||
};
|
||||
return map[trigger] || 'Suggested from semantic similarity.';
|
||||
}
|
||||
|
||||
private getTableName(objectDefinition: any): string {
|
||||
if (objectDefinition.tableName) return objectDefinition.tableName;
|
||||
|
||||
|
||||
@@ -12,6 +12,10 @@ type SemanticLink = {
|
||||
source_entity_id: string
|
||||
target_entity_type: string
|
||||
target_entity_id: string
|
||||
source_entity_label?: string
|
||||
target_entity_label?: string
|
||||
source_entity_name?: string
|
||||
target_entity_name?: string
|
||||
link_type: string
|
||||
status: string
|
||||
origin: string
|
||||
@@ -53,9 +57,18 @@ const getOtherSide = (link: SemanticLink) => {
|
||||
return {
|
||||
entityType: isSource ? link.target_entity_type : link.source_entity_type,
|
||||
entityId: isSource ? link.target_entity_id : link.source_entity_id,
|
||||
entityLabel: isSource ? link.target_entity_label : link.source_entity_label,
|
||||
entityName: isSource ? link.target_entity_name : link.source_entity_name,
|
||||
}
|
||||
}
|
||||
|
||||
const formatLinkType = (value?: string) => {
|
||||
if (!value) return 'Related'
|
||||
return value
|
||||
.replace(/_/g, ' ')
|
||||
.replace(/\b\w/g, (c) => c.toUpperCase())
|
||||
}
|
||||
|
||||
const parseEvidence = (raw: any) => {
|
||||
if (!raw) return null
|
||||
if (typeof raw === 'object') return raw
|
||||
@@ -142,10 +155,11 @@ watch(
|
||||
>
|
||||
<div class="flex flex-wrap items-center justify-between gap-2">
|
||||
<div class="text-sm font-medium">
|
||||
{{ getOtherSide(link).entityType }} · {{ getOtherSide(link).entityId }}
|
||||
{{ getOtherSide(link).entityLabel || getOtherSide(link).entityType }} ·
|
||||
{{ getOtherSide(link).entityName || getOtherSide(link).entityId }}
|
||||
</div>
|
||||
<div class="text-xs text-muted-foreground">
|
||||
{{ link.link_type }} • {{ link.origin }} • {{ formatConfidence(link.confidence) }}
|
||||
{{ formatLinkType(link.link_type) }} • {{ link.origin }} • {{ formatConfidence(link.confidence) }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -160,19 +174,22 @@ watch(
|
||||
<Separator />
|
||||
<div>
|
||||
<div class="font-medium text-foreground">Evidence</div>
|
||||
<div v-if="parseEvidence(link.evidence)?.sourceSignals?.length">
|
||||
<div class="mt-1">Source signals:</div>
|
||||
<p v-if="parseEvidence(link.evidence)?.explanation" class="mt-1 text-foreground">
|
||||
{{ parseEvidence(link.evidence).explanation }}
|
||||
</p>
|
||||
<div v-if="parseEvidence(link.evidence)?.matchedSignals?.length" class="mt-2">
|
||||
<div>Matched context:</div>
|
||||
<ul class="list-disc pl-4">
|
||||
<li
|
||||
v-for="(signal, idx) in parseEvidence(link.evidence).sourceSignals"
|
||||
v-for="(signal, idx) in parseEvidence(link.evidence).matchedSignals"
|
||||
:key="idx"
|
||||
>
|
||||
{{ signal.sourceKind }}: {{ signal.text }}
|
||||
{{ signal }}
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div v-if="parseEvidence(link.evidence)?.matchedChunks?.length" class="mt-2">
|
||||
<div>Matched:</div>
|
||||
<div>Matched excerpts:</div>
|
||||
<ul class="list-disc pl-4">
|
||||
<li
|
||||
v-for="(match, idx) in parseEvidence(link.evidence).matchedChunks"
|
||||
|
||||
Reference in New Issue
Block a user