Enhance semantic links with LLM classification and richer UI labels
This commit is contained in:
@@ -98,7 +98,73 @@ export class SemanticLinkService {
|
|||||||
query.andWhere({ status });
|
query.andWhere({ status });
|
||||||
}
|
}
|
||||||
|
|
||||||
return query;
|
const links = await query;
|
||||||
|
if (!links.length) return links;
|
||||||
|
|
||||||
|
const typeSet = new Set<string>();
|
||||||
|
for (const link of links) {
|
||||||
|
typeSet.add(link.source_entity_type);
|
||||||
|
typeSet.add(link.target_entity_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
const definitions = await knex('object_definitions')
|
||||||
|
.whereIn('apiName', Array.from(typeSet))
|
||||||
|
.select('apiName', 'label', 'pluralLabel', 'tableName', 'fields');
|
||||||
|
const definitionByType = new Map<string, any>(
|
||||||
|
definitions.map((item: any) => [item.apiName, item]),
|
||||||
|
);
|
||||||
|
|
||||||
|
const displayNameCache = new Map<string, string>();
|
||||||
|
const getDisplayField = (definition: any) => {
|
||||||
|
let fields = [];
|
||||||
|
if (Array.isArray(definition?.fields)) {
|
||||||
|
fields = definition.fields;
|
||||||
|
} else if (typeof definition?.fields === 'string') {
|
||||||
|
try {
|
||||||
|
fields = JSON.parse(definition.fields);
|
||||||
|
} catch {
|
||||||
|
fields = [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fields.some((field: any) => field?.apiName === 'name')) return 'name';
|
||||||
|
const textField = fields.find((field: any) =>
|
||||||
|
['STRING', 'TEXT', 'EMAIL'].includes(String(field?.type || '').toUpperCase()),
|
||||||
|
);
|
||||||
|
return textField?.apiName || 'id';
|
||||||
|
};
|
||||||
|
|
||||||
|
const resolveTableName = (definition: any) => {
|
||||||
|
if (definition?.tableName) return definition.tableName;
|
||||||
|
if (definition?.pluralLabel) {
|
||||||
|
return String(definition.pluralLabel).toLowerCase().replace(/[^a-z0-9]+/g, '_');
|
||||||
|
}
|
||||||
|
return `${String(definition?.apiName || '').toLowerCase()}s`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const loadDisplayName = async (type: string, id: string) => {
|
||||||
|
const cacheKey = `${type}:${id}`;
|
||||||
|
if (displayNameCache.has(cacheKey)) return displayNameCache.get(cacheKey);
|
||||||
|
const definition = definitionByType.get(type);
|
||||||
|
if (!definition) {
|
||||||
|
displayNameCache.set(cacheKey, id);
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
const tableName = resolveTableName(definition);
|
||||||
|
const displayField = getDisplayField(definition);
|
||||||
|
const record = await knex(tableName).where({ id }).first();
|
||||||
|
const display = record?.[displayField] ? String(record[displayField]) : id;
|
||||||
|
displayNameCache.set(cacheKey, display);
|
||||||
|
return display;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const link of links) {
|
||||||
|
link.source_entity_label = definitionByType.get(link.source_entity_type)?.label || link.source_entity_type;
|
||||||
|
link.target_entity_label = definitionByType.get(link.target_entity_type)?.label || link.target_entity_type;
|
||||||
|
link.source_entity_name = await loadDisplayName(link.source_entity_type, link.source_entity_id);
|
||||||
|
link.target_entity_name = await loadDisplayName(link.target_entity_type, link.target_entity_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
return links;
|
||||||
}
|
}
|
||||||
|
|
||||||
async reviewLink(knex: any, linkId: string, status: string, reviewerUserId: string) {
|
async reviewLink(knex: any, linkId: string, status: string, reviewerUserId: string) {
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
import { Injectable, Logger } from '@nestjs/common';
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
|
import { HumanMessage, SystemMessage } from '@langchain/core/messages';
|
||||||
|
import { ChatOpenAI } from '@langchain/openai';
|
||||||
import { TenantDatabaseService } from '../../tenant/tenant-database.service';
|
import { TenantDatabaseService } from '../../tenant/tenant-database.service';
|
||||||
import { MeilisearchService } from '../../search/meilisearch.service';
|
import { MeilisearchService } from '../../search/meilisearch.service';
|
||||||
import { getCentralPrisma } from '../../prisma/central-prisma.service';
|
import { getCentralPrisma } from '../../prisma/central-prisma.service';
|
||||||
@@ -20,6 +22,7 @@ export class SemanticOrchestratorService {
|
|||||||
private readonly semanticEmbedderName = 'default';
|
private readonly semanticEmbedderName = 'default';
|
||||||
private readonly MIN_CONFIDENCE_BASE = 0.7;
|
private readonly MIN_CONFIDENCE_BASE = 0.7;
|
||||||
private readonly MIN_CONFIDENCE_COMMENT = 0.52;
|
private readonly MIN_CONFIDENCE_COMMENT = 0.52;
|
||||||
|
private readonly defaultChatModel = process.env.OPENAI_CHAT_MODEL || 'gpt-4o-mini';
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private readonly tenantDbService: TenantDatabaseService,
|
private readonly tenantDbService: TenantDatabaseService,
|
||||||
@@ -256,7 +259,7 @@ export class SemanticOrchestratorService {
|
|||||||
`Meilisearch results: index=${indexName} hits=${search.hits?.length || 0} total=${search.total}`,
|
`Meilisearch results: index=${indexName} hits=${search.hits?.length || 0} total=${search.total}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
const candidates = new Map<string, { hit: any; confidence: number }>();
|
const candidates = new Map<string, { hit: any; confidence: number; rankingDetails?: any }>();
|
||||||
for (const hit of search.hits || []) {
|
for (const hit of search.hits || []) {
|
||||||
// Skip self
|
// Skip self
|
||||||
if (hit.entityId === projection.entityId) continue;
|
if (hit.entityId === projection.entityId) continue;
|
||||||
@@ -280,7 +283,11 @@ export class SemanticOrchestratorService {
|
|||||||
const key = `${hit.entityType}:${hit.entityId}`;
|
const key = `${hit.entityType}:${hit.entityId}`;
|
||||||
const existing = candidates.get(key);
|
const existing = candidates.get(key);
|
||||||
if (!existing || confidence > existing.confidence) {
|
if (!existing || confidence > existing.confidence) {
|
||||||
candidates.set(key, { hit, confidence });
|
candidates.set(key, {
|
||||||
|
hit,
|
||||||
|
confidence,
|
||||||
|
rankingDetails: hit._rankingScoreDetails || null,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -289,35 +296,204 @@ export class SemanticOrchestratorService {
|
|||||||
const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
|
const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
|
||||||
const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);
|
const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);
|
||||||
|
|
||||||
for (const [key, { hit, confidence }] of candidates.entries()) {
|
for (const [key, { hit, confidence, rankingDetails }] of candidates.entries()) {
|
||||||
const [targetType, targetId] = key.split(':');
|
const [targetType, targetId] = key.split(':');
|
||||||
|
const llmAssessment = await this.assessLinkWithLlm(
|
||||||
|
openAiConfig,
|
||||||
|
trigger,
|
||||||
|
projection,
|
||||||
|
chunks,
|
||||||
|
hit,
|
||||||
|
confidence,
|
||||||
|
rankingDetails,
|
||||||
|
);
|
||||||
|
const reason =
|
||||||
|
llmAssessment?.reason ||
|
||||||
|
this.humanizeTrigger(trigger) ||
|
||||||
|
'Suggested from semantic similarity.';
|
||||||
await this.semanticLinkService.upsertSuggestedLink(knex, {
|
await this.semanticLinkService.upsertSuggestedLink(knex, {
|
||||||
sourceEntityType: projection.entityType,
|
sourceEntityType: projection.entityType,
|
||||||
sourceEntityId: projection.entityId,
|
sourceEntityId: projection.entityId,
|
||||||
targetEntityType: targetType,
|
targetEntityType: targetType,
|
||||||
targetEntityId: targetId,
|
targetEntityId: targetId,
|
||||||
linkType: 'related_to',
|
linkType: llmAssessment?.linkType || 'related',
|
||||||
status: 'suggested',
|
status: 'suggested',
|
||||||
origin: 'semantic',
|
origin: 'semantic',
|
||||||
confidence,
|
confidence,
|
||||||
reason: `Suggested from semantic similarity (${trigger})`,
|
reason,
|
||||||
evidence: {
|
evidence: this.buildEvidencePayload(
|
||||||
trigger,
|
trigger,
|
||||||
sourceSignals: chunks.slice(0, 2).map((chunk) => ({
|
chunks,
|
||||||
sourceKind: chunk.sourceKind,
|
hit,
|
||||||
text: chunk.text.slice(0, 180),
|
confidence,
|
||||||
})),
|
rankingDetails,
|
||||||
matchedChunks: [{
|
llmAssessment,
|
||||||
sourceKind: hit.sourceKind,
|
),
|
||||||
text: String(hit.text || '').slice(0, 180),
|
|
||||||
score: confidence,
|
|
||||||
}],
|
|
||||||
},
|
|
||||||
suggestedByUserId: userId || null,
|
suggestedByUserId: userId || null,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private buildEvidencePayload(
|
||||||
|
trigger: string,
|
||||||
|
chunks: any[],
|
||||||
|
hit: any,
|
||||||
|
confidence: number,
|
||||||
|
rankingDetails: any,
|
||||||
|
llmAssessment?: {
|
||||||
|
reason?: string;
|
||||||
|
explanation?: string;
|
||||||
|
matchedSignals?: string[];
|
||||||
|
} | null,
|
||||||
|
) {
|
||||||
|
return {
|
||||||
|
trigger,
|
||||||
|
explanation:
|
||||||
|
llmAssessment?.explanation ||
|
||||||
|
llmAssessment?.reason ||
|
||||||
|
'Suggested using semantic similarity and ranked chunk evidence.',
|
||||||
|
sourceSignals: chunks.slice(0, 2).map((chunk) => ({
|
||||||
|
sourceKind: chunk.sourceKind,
|
||||||
|
text: chunk.text.slice(0, 220),
|
||||||
|
})),
|
||||||
|
matchedSignals: llmAssessment?.matchedSignals || [],
|
||||||
|
matchedChunks: [
|
||||||
|
{
|
||||||
|
sourceKind: hit.sourceKind,
|
||||||
|
text: String(hit.text || '').slice(0, 220),
|
||||||
|
score: confidence,
|
||||||
|
rankingDetails: rankingDetails || null,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private async assessLinkWithLlm(
|
||||||
|
openAiConfig: OpenAIConfig | null,
|
||||||
|
trigger: string,
|
||||||
|
projection: any,
|
||||||
|
chunks: any[],
|
||||||
|
hit: any,
|
||||||
|
confidence: number,
|
||||||
|
rankingDetails: any,
|
||||||
|
): Promise<{ linkType: string; reason?: string; explanation?: string; matchedSignals?: string[] } | null> {
|
||||||
|
if (!openAiConfig?.apiKey) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const promptPayload = {
|
||||||
|
trigger,
|
||||||
|
source: {
|
||||||
|
entityType: projection.entityType,
|
||||||
|
title: projection.title,
|
||||||
|
narrative: String(projection.narrative || '').slice(0, 900),
|
||||||
|
keySignals: chunks.slice(0, 3).map((chunk) => ({
|
||||||
|
sourceKind: chunk.sourceKind,
|
||||||
|
text: String(chunk.text || '').slice(0, 220),
|
||||||
|
})),
|
||||||
|
},
|
||||||
|
target: {
|
||||||
|
entityType: hit.entityType,
|
||||||
|
title: hit.title,
|
||||||
|
sourceKind: hit.sourceKind,
|
||||||
|
text: String(hit.text || '').slice(0, 300),
|
||||||
|
},
|
||||||
|
confidence,
|
||||||
|
rankingDetails: rankingDetails || {},
|
||||||
|
allowedLinkTypes: [
|
||||||
|
'related',
|
||||||
|
'supports',
|
||||||
|
'contradicts',
|
||||||
|
'expands',
|
||||||
|
'duplicate_of',
|
||||||
|
'references',
|
||||||
|
'depends_on',
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const model = new ChatOpenAI({
|
||||||
|
apiKey: openAiConfig.apiKey,
|
||||||
|
model: openAiConfig.model || this.defaultChatModel,
|
||||||
|
temperature: 0.1,
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await model.invoke([
|
||||||
|
new SystemMessage(
|
||||||
|
'Classify semantic relationship. Return valid JSON only with keys: linkType, reason, explanation, matchedSignals. linkType must be one of related|supports|contradicts|expands|duplicate_of|references|depends_on.',
|
||||||
|
),
|
||||||
|
new HumanMessage(JSON.stringify(promptPayload)),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const content = typeof response.content === 'string'
|
||||||
|
? response.content
|
||||||
|
: Array.isArray(response.content)
|
||||||
|
? response.content.map((part: any) => (typeof part === 'string' ? part : part?.text || '')).join('')
|
||||||
|
: '';
|
||||||
|
const normalized = this.extractJsonObject(content);
|
||||||
|
if (!normalized) return null;
|
||||||
|
|
||||||
|
const linkType = this.normalizeLinkType(normalized.linkType);
|
||||||
|
return {
|
||||||
|
linkType,
|
||||||
|
reason: typeof normalized.reason === 'string' ? normalized.reason.trim() : undefined,
|
||||||
|
explanation:
|
||||||
|
typeof normalized.explanation === 'string' ? normalized.explanation.trim() : undefined,
|
||||||
|
matchedSignals: Array.isArray(normalized.matchedSignals)
|
||||||
|
? normalized.matchedSignals
|
||||||
|
.map((item: any) => String(item || '').trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
.slice(0, 3)
|
||||||
|
: undefined,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.warn(`Semantic LLM assessment failed: ${error.message}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private extractJsonObject(raw: string): Record<string, any> | null {
|
||||||
|
if (!raw) return null;
|
||||||
|
const trimmed = raw.trim();
|
||||||
|
try {
|
||||||
|
return JSON.parse(trimmed);
|
||||||
|
} catch {
|
||||||
|
const match = trimmed.match(/\{[\s\S]*\}/);
|
||||||
|
if (!match) return null;
|
||||||
|
try {
|
||||||
|
return JSON.parse(match[0]);
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private normalizeLinkType(value: any): string {
|
||||||
|
const supported = new Set([
|
||||||
|
'related',
|
||||||
|
'supports',
|
||||||
|
'contradicts',
|
||||||
|
'expands',
|
||||||
|
'duplicate_of',
|
||||||
|
'references',
|
||||||
|
'depends_on',
|
||||||
|
]);
|
||||||
|
const normalized = String(value || '').trim().toLowerCase();
|
||||||
|
if (supported.has(normalized)) return normalized;
|
||||||
|
return 'related';
|
||||||
|
}
|
||||||
|
|
||||||
|
private humanizeTrigger(trigger: string): string {
|
||||||
|
if (!trigger) return 'Suggested from semantic similarity.';
|
||||||
|
const map: Record<string, string> = {
|
||||||
|
comment_created: 'Suggested based on a comment added to the record.',
|
||||||
|
comment_updated: 'Suggested based on a comment update.',
|
||||||
|
manual_refresh: 'Suggested after a manual semantic refresh.',
|
||||||
|
batch_reindex: 'Suggested during semantic reindexing.',
|
||||||
|
};
|
||||||
|
return map[trigger] || 'Suggested from semantic similarity.';
|
||||||
|
}
|
||||||
|
|
||||||
private getTableName(objectDefinition: any): string {
|
private getTableName(objectDefinition: any): string {
|
||||||
if (objectDefinition.tableName) return objectDefinition.tableName;
|
if (objectDefinition.tableName) return objectDefinition.tableName;
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,10 @@ type SemanticLink = {
|
|||||||
source_entity_id: string
|
source_entity_id: string
|
||||||
target_entity_type: string
|
target_entity_type: string
|
||||||
target_entity_id: string
|
target_entity_id: string
|
||||||
|
source_entity_label?: string
|
||||||
|
target_entity_label?: string
|
||||||
|
source_entity_name?: string
|
||||||
|
target_entity_name?: string
|
||||||
link_type: string
|
link_type: string
|
||||||
status: string
|
status: string
|
||||||
origin: string
|
origin: string
|
||||||
@@ -53,9 +57,18 @@ const getOtherSide = (link: SemanticLink) => {
|
|||||||
return {
|
return {
|
||||||
entityType: isSource ? link.target_entity_type : link.source_entity_type,
|
entityType: isSource ? link.target_entity_type : link.source_entity_type,
|
||||||
entityId: isSource ? link.target_entity_id : link.source_entity_id,
|
entityId: isSource ? link.target_entity_id : link.source_entity_id,
|
||||||
|
entityLabel: isSource ? link.target_entity_label : link.source_entity_label,
|
||||||
|
entityName: isSource ? link.target_entity_name : link.source_entity_name,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const formatLinkType = (value?: string) => {
|
||||||
|
if (!value) return 'Related'
|
||||||
|
return value
|
||||||
|
.replace(/_/g, ' ')
|
||||||
|
.replace(/\b\w/g, (c) => c.toUpperCase())
|
||||||
|
}
|
||||||
|
|
||||||
const parseEvidence = (raw: any) => {
|
const parseEvidence = (raw: any) => {
|
||||||
if (!raw) return null
|
if (!raw) return null
|
||||||
if (typeof raw === 'object') return raw
|
if (typeof raw === 'object') return raw
|
||||||
@@ -142,10 +155,11 @@ watch(
|
|||||||
>
|
>
|
||||||
<div class="flex flex-wrap items-center justify-between gap-2">
|
<div class="flex flex-wrap items-center justify-between gap-2">
|
||||||
<div class="text-sm font-medium">
|
<div class="text-sm font-medium">
|
||||||
{{ getOtherSide(link).entityType }} · {{ getOtherSide(link).entityId }}
|
{{ getOtherSide(link).entityLabel || getOtherSide(link).entityType }} ·
|
||||||
|
{{ getOtherSide(link).entityName || getOtherSide(link).entityId }}
|
||||||
</div>
|
</div>
|
||||||
<div class="text-xs text-muted-foreground">
|
<div class="text-xs text-muted-foreground">
|
||||||
{{ link.link_type }} • {{ link.origin }} • {{ formatConfidence(link.confidence) }}
|
{{ formatLinkType(link.link_type) }} • {{ link.origin }} • {{ formatConfidence(link.confidence) }}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -160,19 +174,22 @@ watch(
|
|||||||
<Separator />
|
<Separator />
|
||||||
<div>
|
<div>
|
||||||
<div class="font-medium text-foreground">Evidence</div>
|
<div class="font-medium text-foreground">Evidence</div>
|
||||||
<div v-if="parseEvidence(link.evidence)?.sourceSignals?.length">
|
<p v-if="parseEvidence(link.evidence)?.explanation" class="mt-1 text-foreground">
|
||||||
<div class="mt-1">Source signals:</div>
|
{{ parseEvidence(link.evidence).explanation }}
|
||||||
|
</p>
|
||||||
|
<div v-if="parseEvidence(link.evidence)?.matchedSignals?.length" class="mt-2">
|
||||||
|
<div>Matched context:</div>
|
||||||
<ul class="list-disc pl-4">
|
<ul class="list-disc pl-4">
|
||||||
<li
|
<li
|
||||||
v-for="(signal, idx) in parseEvidence(link.evidence).sourceSignals"
|
v-for="(signal, idx) in parseEvidence(link.evidence).matchedSignals"
|
||||||
:key="idx"
|
:key="idx"
|
||||||
>
|
>
|
||||||
{{ signal.sourceKind }}: {{ signal.text }}
|
{{ signal }}
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
<div v-if="parseEvidence(link.evidence)?.matchedChunks?.length" class="mt-2">
|
<div v-if="parseEvidence(link.evidence)?.matchedChunks?.length" class="mt-2">
|
||||||
<div>Matched:</div>
|
<div>Matched excerpts:</div>
|
||||||
<ul class="list-disc pl-4">
|
<ul class="list-disc pl-4">
|
||||||
<li
|
<li
|
||||||
v-for="(match, idx) in parseEvidence(link.evidence).matchedChunks"
|
v-for="(match, idx) in parseEvidence(link.evidence).matchedChunks"
|
||||||
|
|||||||
Reference in New Issue
Block a user