WIP - semantic linking working with just name

2026-04-12 09:26:23 +02:00
parent 320f8c4266
commit 385a842ab8
3 changed files with 38 additions and 21 deletions
--- a/backend/src/knowledge/adapters/semantic-projection.adapter.ts
+++ b/backend/src/knowledge/adapters/semantic-projection.adapter.ts
@@ -53,9 +53,14 @@ export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapt
      input.record?.subject ||
      `${input.objectApiName} ${input.record?.id || ''}`.trim();

+    /*
    const fieldNarrative = fieldEntries
      .map(([key, value]) => `${key}: ${String(value)}`)
      .join('\n');
+    */
+   const fieldNarrative = fieldEntries
+      .map(([key, value]) => `${String(value)}`)
+      .join('\n');

    const commentNarrative = (input.comments || [])
      .map((comment, index) => `Comment ${index + 1}: ${comment.content}`)
@@ -63,10 +68,9 @@ export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapt

    const narrative = [fieldNarrative, commentNarrative].filter(Boolean).join('\n\n');

-    // Plain values only — no 'key:' prefixes, no comments (chunker adds those separately).
-    const embeddingNarrative = fieldEntries
-      .map(([, value]) => String(value))
-      .join('\n');
+    // Temporary: use only the name field for embedding to test pure semantic matching.
+    // Widen this back to all field values once semantic results are validated.
+    const embeddingNarrative = String(input.record?.name || title);

    return {
      entityType: input.objectApiName,
--- a/backend/src/knowledge/services/semantic-orchestrator.service.ts
+++ b/backend/src/knowledge/services/semantic-orchestrator.service.ts
@@ -18,6 +18,7 @@ export class SemanticOrchestratorService {
  private readonly defaultEmbeddingModel =
    process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small';
  private readonly semanticEmbedderName = 'default';
+  private readonly MIN_SEMANTIC_CONFIDENCE = 0.7;

  constructor(
    private readonly tenantDbService: TenantDatabaseService,
@@ -71,10 +72,9 @@ export class SemanticOrchestratorService {
    });

    const documentId = await this.upsertSemanticDocument(knex, projection);
-    // Use embeddingNarrative (plain values, no labels) so lexical noise from 'key:'
-    // prefixes doesn't inflate match scores. Comments are passed separately so they
-    // are not double-counted (narrative already embeds them with 'Comment N:' prefix).
-    const chunks = this.chunkerService.chunkText(projection.embeddingNarrative, comments);
+    // Temporary: pass empty comments so only embeddingNarrative (name field) is indexed.
+    // Re-enable by replacing [] with `comments` once semantic matching is validated.
+    const chunks = this.chunkerService.chunkText(projection.embeddingNarrative, []);
    this.logger.log(
      `Semantic refresh chunking: ${objectApiName}:${recordId} chunks=${chunks.length}`,
    );
@@ -252,26 +252,37 @@ export class SemanticOrchestratorService {
      `Meilisearch results: index=${indexName} hits=${search.hits?.length || 0} total=${search.total}`,
    );

-    const grouped = new Map<string, any[]>();
+    const candidates = new Map<string, { hit: any; confidence: number }>();
    for (const hit of search.hits || []) {
-      if (hit.entityType === projection.entityType && hit.entityId === projection.entityId) {
-        continue;
-      }
-      // Skip self-links where source and target resolve to the same entity
-      if (hit.entityId === projection.entityId) {
+      // Skip self
+      if (hit.entityId === projection.entityId) continue;
+
+      const confidence = hit._semanticScore ?? hit._rankingScore ?? 0;
+      this.logger.log(
+        `Suggestion candidate: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} text="${String(hit.text || '').substring(0, 60)}"`,
+      );
+
+      if (confidence < this.MIN_SEMANTIC_CONFIDENCE) {
+        this.logger.log(
+          `Skipping low-confidence match: ${hit.entityType}:${hit.entityId} confidence=${confidence.toFixed(4)} < ${this.MIN_SEMANTIC_CONFIDENCE}`,
+        );
        continue;
      }
+
      const key = `${hit.entityType}:${hit.entityId}`;
-      if (!grouped.has(key)) grouped.set(key, []);
-      grouped.get(key).push(hit);
+      const existing = candidates.get(key);
+      if (!existing || confidence > existing.confidence) {
+        candidates.set(key, { hit, confidence });
+      }
    }

+    this.logger.log(`Filtered suggestions: ${candidates.size} passed threshold ${this.MIN_SEMANTIC_CONFIDENCE}`);
+
    const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId);
    const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId);

-    for (const [key, hits] of grouped.entries()) {
+    for (const [key, { hit, confidence }] of candidates.entries()) {
      const [targetType, targetId] = key.split(':');
-      const confidence = Math.min(0.99, 0.3 + hits.length * 0.1);
      await this.semanticLinkService.upsertSuggestedLink(knex, {
        sourceEntityType: projection.entityType,
        sourceEntityId: projection.entityId,
@@ -288,10 +299,11 @@ export class SemanticOrchestratorService {
            sourceKind: chunk.sourceKind,
            text: chunk.text.slice(0, 180),
          })),
-          matchedChunks: hits.slice(0, 3).map((hit) => ({
+          matchedChunks: [{
            sourceKind: hit.sourceKind,
            text: String(hit.text || '').slice(0, 180),
-          })),
+            score: confidence,
+          }],
        },
        suggestedByUserId: userId || null,
      });
--- a/backend/src/search/meilisearch.service.ts
+++ b/backend/src/search/meilisearch.service.ts
@@ -220,7 +220,8 @@ export class MeilisearchService {
        {
          q: query,
          limit,
-          ...(hybrid ? { hybrid } : {}),
+          showRankingScore: true,
+          ...(hybrid ? { hybrid, showRankingScoreDetails: true } : {}),
        },
        this.buildHeaders(config),
      );