From df183230d863d316231662b42103cfc24809dc2f Mon Sep 17 00:00:00 2001 From: phyroslam Date: Sat, 11 Apr 2026 11:40:01 -0700 Subject: [PATCH] Add phased knowledge layer: comments and semantic linking pipeline --- ...411000001_create_knowledge_layer_tables.js | 93 ++++++++ backend/src/app.module.ts | 2 + .../adapters/semantic-projection.adapter.ts | 80 +++++++ backend/src/knowledge/dto/comment.dto.ts | 24 ++ .../src/knowledge/dto/semantic-link.dto.ts | 52 ++++ backend/src/knowledge/knowledge.controller.ts | 124 ++++++++++ backend/src/knowledge/knowledge.module.ts | 16 ++ .../src/knowledge/services/comment.service.ts | 106 +++++++++ .../services/semantic-chunker.service.spec.ts | 20 ++ .../services/semantic-chunker.service.ts | 71 ++++++ .../services/semantic-link.service.spec.ts | 20 ++ .../services/semantic-link.service.ts | 117 +++++++++ .../services/semantic-orchestrator.service.ts | 225 ++++++++++++++++++ backend/src/object/object.module.ts | 3 +- backend/src/object/object.service.ts | 16 ++ backend/src/search/meilisearch.service.ts | 52 ++++ 16 files changed, 1020 insertions(+), 1 deletion(-) create mode 100644 backend/migrations/tenant/20260411000001_create_knowledge_layer_tables.js create mode 100644 backend/src/knowledge/adapters/semantic-projection.adapter.ts create mode 100644 backend/src/knowledge/dto/comment.dto.ts create mode 100644 backend/src/knowledge/dto/semantic-link.dto.ts create mode 100644 backend/src/knowledge/knowledge.controller.ts create mode 100644 backend/src/knowledge/knowledge.module.ts create mode 100644 backend/src/knowledge/services/comment.service.ts create mode 100644 backend/src/knowledge/services/semantic-chunker.service.spec.ts create mode 100644 backend/src/knowledge/services/semantic-chunker.service.ts create mode 100644 backend/src/knowledge/services/semantic-link.service.spec.ts create mode 100644 backend/src/knowledge/services/semantic-link.service.ts create mode 100644 backend/src/knowledge/services/semantic-orchestrator.service.ts diff --git a/backend/migrations/tenant/20260411000001_create_knowledge_layer_tables.js b/backend/migrations/tenant/20260411000001_create_knowledge_layer_tables.js new file mode 100644 index 0000000..a1bbfd8 --- /dev/null +++ b/backend/migrations/tenant/20260411000001_create_knowledge_layer_tables.js @@ -0,0 +1,93 @@ +/** + * @param { import("knex").Knex } knex + * @returns { Promise } + */ +exports.up = async function (knex) { + await knex.schema.createTable('comments', (table) => { + table.uuid('id').primary().defaultTo(knex.raw('(UUID())')); + table.string('parent_object_api_name').notNullable(); + table.uuid('parent_record_id').notNullable(); + table.uuid('author_user_id').notNullable(); + table.text('content').notNullable(); + table.timestamps(true, true); + + table.foreign('author_user_id').references('id').inTable('users').onDelete('CASCADE'); + table.index(['parent_object_api_name', 'parent_record_id'], 'comments_parent_idx'); + table.index(['author_user_id'], 'comments_author_idx'); + }); + + await knex.schema.createTable('semantic_documents', (table) => { + table.uuid('id').primary().defaultTo(knex.raw('(UUID())')); + table.string('entity_type').notNullable(); + table.uuid('entity_id').notNullable(); + table.string('title').nullable(); + table.text('narrative').nullable(); + table.json('metadata').nullable(); + table.json('source_summary').nullable(); + table.timestamps(true, true); + + table.unique(['entity_type', 'entity_id'], { + indexName: 'semantic_documents_entity_unique', + }); + table.index(['entity_type'], 'semantic_documents_type_idx'); + }); + + await knex.schema.createTable('semantic_chunks', (table) => { + table.uuid('id').primary().defaultTo(knex.raw('(UUID())')); + table.uuid('semantic_document_id').notNullable(); + table.integer('chunk_index').notNullable(); + table.string('source_kind').notNullable().defaultTo('base_record'); + table.uuid('source_ref_id').nullable(); + table.text('text').notNullable(); + table.json('metadata').nullable(); + table.timestamps(true, true); + + table.foreign('semantic_document_id').references('id').inTable('semantic_documents').onDelete('CASCADE'); + table.unique(['semantic_document_id', 'chunk_index'], { + indexName: 'semantic_chunks_doc_index_unique', + }); + table.index(['semantic_document_id'], 'semantic_chunks_document_idx'); + table.index(['source_kind'], 'semantic_chunks_source_kind_idx'); + }); + + await knex.schema.createTable('semantic_links', (table) => { + table.uuid('id').primary().defaultTo(knex.raw('(UUID())')); + table.string('source_entity_type').notNullable(); + table.uuid('source_entity_id').notNullable(); + table.string('target_entity_type').notNullable(); + table.uuid('target_entity_id').notNullable(); + table.string('link_type').notNullable().defaultTo('related_to'); + table.string('status').notNullable().defaultTo('suggested'); + table.string('origin').notNullable().defaultTo('semantic'); + table.decimal('confidence', 5, 4).notNullable().defaultTo(0); + table.text('reason').nullable(); + table.json('evidence').nullable(); + table.uuid('suggested_by_user_id').nullable(); + table.uuid('reviewed_by_user_id').nullable(); + table.timestamp('reviewed_at').nullable(); + table.timestamps(true, true); + + table.foreign('suggested_by_user_id').references('id').inTable('users').onDelete('SET NULL'); + table.foreign('reviewed_by_user_id').references('id').inTable('users').onDelete('SET NULL'); + + table.unique( + ['source_entity_type', 'source_entity_id', 'target_entity_type', 'target_entity_id', 'link_type'], + { indexName: 'semantic_links_unique_pair_type' }, + ); + + table.index(['source_entity_type', 'source_entity_id'], 'semantic_links_source_idx'); + table.index(['target_entity_type', 'target_entity_id'], 'semantic_links_target_idx'); + table.index(['status'], 'semantic_links_status_idx'); + }); +}; + +/** + * @param { import("knex").Knex } knex + * @returns { Promise } + */ +exports.down = async function (knex) { + await knex.schema.dropTableIfExists('semantic_links'); + await knex.schema.dropTableIfExists('semantic_chunks'); + await knex.schema.dropTableIfExists('semantic_documents'); + await knex.schema.dropTableIfExists('comments'); +}; diff --git a/backend/src/app.module.ts b/backend/src/app.module.ts index f2fe194..fa47d08 100644 --- a/backend/src/app.module.ts +++ b/backend/src/app.module.ts @@ -10,6 +10,7 @@ import { PageLayoutModule } from './page-layout/page-layout.module'; import { VoiceModule } from './voice/voice.module'; import { AiAssistantModule } from './ai-assistant/ai-assistant.module'; import { SavedListViewModule } from './saved-list-view/saved-list-view.module'; +import { KnowledgeModule } from './knowledge/knowledge.module'; @Module({ imports: [ @@ -26,6 +27,7 @@ import { SavedListViewModule } from './saved-list-view/saved-list-view.module'; VoiceModule, AiAssistantModule, SavedListViewModule, + KnowledgeModule, ], }) export class AppModule {} diff --git a/backend/src/knowledge/adapters/semantic-projection.adapter.ts b/backend/src/knowledge/adapters/semantic-projection.adapter.ts new file mode 100644 index 0000000..f49edff --- /dev/null +++ b/backend/src/knowledge/adapters/semantic-projection.adapter.ts @@ -0,0 +1,80 @@ +export type SemanticProjectionInput = { + objectApiName: string; + record: Record; + objectDefinition?: any; + comments: Array<{ id: string; content: string; author_user_id: string; created_at?: string }>; +}; + +export type SemanticProjection = { + entityType: string; + entityId: string; + title: string; + narrative: string; + metadata: Record; + sourceSummary: { + includedFieldCount: number; + includedCommentCount: number; + includesComments: boolean; + }; +}; + +export interface SemanticProjectionAdapter { + supports(objectApiName: string): boolean; + buildProjection(input: SemanticProjectionInput): SemanticProjection; +} + +const EXCLUDED_FIELDS = new Set([ + 'id', + 'created_at', + 'updated_at', + 'ownerId', + 'owner_id', + 'tenantId', + 'tenant_id', +]); + +export class DefaultSemanticProjectionAdapter implements SemanticProjectionAdapter { + supports(): boolean { + return true; + } + + buildProjection(input: SemanticProjectionInput): SemanticProjection { + const fieldEntries = Object.entries(input.record || {}).filter(([key, value]) => { + if (EXCLUDED_FIELDS.has(key)) return false; + if (value === null || value === undefined || value === '') return false; + return ['string', 'number', 'boolean'].includes(typeof value); + }); + + const title = + input.record?.name || + input.record?.title || + input.record?.subject || + `${input.objectApiName} ${input.record?.id || ''}`.trim(); + + const fieldNarrative = fieldEntries + .map(([key, value]) => `${key}: ${String(value)}`) + .join('\n'); + + const commentNarrative = (input.comments || []) + .map((comment, index) => `Comment ${index + 1}: ${comment.content}`) + .join('\n'); + + const narrative = [fieldNarrative, commentNarrative].filter(Boolean).join('\n\n'); + + return { + entityType: input.objectApiName, + entityId: input.record.id, + title, + narrative, + metadata: { + objectApiName: input.objectApiName, + hasComments: (input.comments || []).length > 0, + }, + sourceSummary: { + includedFieldCount: fieldEntries.length, + includedCommentCount: (input.comments || []).length, + includesComments: (input.comments || []).length > 0, + }, + }; + } +} diff --git a/backend/src/knowledge/dto/comment.dto.ts b/backend/src/knowledge/dto/comment.dto.ts new file mode 100644 index 0000000..6b4a4fa --- /dev/null +++ b/backend/src/knowledge/dto/comment.dto.ts @@ -0,0 +1,24 @@ +import { IsNotEmpty, IsOptional, IsString, MaxLength, MinLength } from 'class-validator'; + +export class CreateCommentDto { + @IsString() + @IsNotEmpty() + parentObjectApiName: string; + + @IsString() + @IsNotEmpty() + parentRecordId: string; + + @IsString() + @MinLength(1) + @MaxLength(10000) + content: string; +} + +export class UpdateCommentDto { + @IsOptional() + @IsString() + @MinLength(1) + @MaxLength(10000) + content?: string; +} diff --git a/backend/src/knowledge/dto/semantic-link.dto.ts b/backend/src/knowledge/dto/semantic-link.dto.ts new file mode 100644 index 0000000..c6131d8 --- /dev/null +++ b/backend/src/knowledge/dto/semantic-link.dto.ts @@ -0,0 +1,52 @@ +import { IsIn, IsNumber, IsObject, IsOptional, IsString, Max, Min } from 'class-validator'; + +export const SEMANTIC_LINK_STATUSES = ['suggested', 'approved', 'rejected', 'dismissed'] as const; +export const SEMANTIC_LINK_ORIGINS = ['manual', 'semantic', 'llm', 'hybrid', 'rule_based'] as const; + +export class ReviewSemanticLinkDto { + @IsString() + @IsIn(SEMANTIC_LINK_STATUSES) + status: (typeof SEMANTIC_LINK_STATUSES)[number]; +} + +export class UpsertSemanticLinkDto { + @IsString() + sourceEntityType: string; + + @IsString() + sourceEntityId: string; + + @IsString() + targetEntityType: string; + + @IsString() + targetEntityId: string; + + @IsOptional() + @IsString() + linkType?: string; + + @IsOptional() + @IsString() + @IsIn(SEMANTIC_LINK_STATUSES) + status?: (typeof SEMANTIC_LINK_STATUSES)[number]; + + @IsOptional() + @IsString() + @IsIn(SEMANTIC_LINK_ORIGINS) + origin?: (typeof SEMANTIC_LINK_ORIGINS)[number]; + + @IsOptional() + @IsNumber() + @Min(0) + @Max(1) + confidence?: number; + + @IsOptional() + @IsString() + reason?: string; + + @IsOptional() + @IsObject() + evidence?: Record; +} diff --git a/backend/src/knowledge/knowledge.controller.ts b/backend/src/knowledge/knowledge.controller.ts new file mode 100644 index 0000000..3415673 --- /dev/null +++ b/backend/src/knowledge/knowledge.controller.ts @@ -0,0 +1,124 @@ +import { + Body, + Controller, + Delete, + Get, + Param, + Patch, + Post, + Query, + UseGuards, +} from '@nestjs/common'; +import { JwtAuthGuard } from '../auth/jwt-auth.guard'; +import { CurrentUser } from '../auth/current-user.decorator'; +import { TenantId } from '../tenant/tenant.decorator'; +import { CreateCommentDto, UpdateCommentDto } from './dto/comment.dto'; +import { ReviewSemanticLinkDto } from './dto/semantic-link.dto'; +import { CommentService } from './services/comment.service'; +import { SemanticOrchestratorService } from './services/semantic-orchestrator.service'; +import { SemanticLinkService } from './services/semantic-link.service'; +import { TenantDatabaseService } from '../tenant/tenant-database.service'; + +@Controller('knowledge') +@UseGuards(JwtAuthGuard) +export class KnowledgeController { + constructor( + private readonly commentService: CommentService, + private readonly semanticOrchestratorService: SemanticOrchestratorService, + private readonly semanticLinkService: SemanticLinkService, + private readonly tenantDbService: TenantDatabaseService, + ) {} + + @Get('comments/:objectApiName/:recordId') + async getComments( + @TenantId() tenantId: string, + @Param('objectApiName') objectApiName: string, + @Param('recordId') recordId: string, + ) { + return this.commentService.listComments(tenantId, objectApiName, recordId); + } + + @Post('comments') + async createComment( + @TenantId() tenantId: string, + @Body() dto: CreateCommentDto, + @CurrentUser() user: any, + ) { + return this.commentService.createComment(tenantId, dto, user.userId); + } + + @Patch('comments/:id') + async updateComment( + @TenantId() tenantId: string, + @Param('id') id: string, + @Body() dto: UpdateCommentDto, + @CurrentUser() user: any, + ) { + return this.commentService.updateComment(tenantId, id, dto, user.userId); + } + + @Delete('comments/:id') + async deleteComment( + @TenantId() tenantId: string, + @Param('id') id: string, + @CurrentUser() user: any, + ) { + return this.commentService.deleteComment(tenantId, id, user.userId); + } + + @Post('semantic/refresh/:objectApiName/:recordId') + async refreshSemantic( + @TenantId() tenantId: string, + @Param('objectApiName') objectApiName: string, + @Param('recordId') recordId: string, + @CurrentUser() user: any, + ) { + return this.semanticOrchestratorService.refreshRecord( + tenantId, + objectApiName, + recordId, + user.userId, + 'manual_refresh', + ); + } + + @Post('semantic/reindex/:objectApiName') + async reindexObject( + @TenantId() tenantId: string, + @Param('objectApiName') objectApiName: string, + @CurrentUser() user: any, + @Query('limit') limit?: string, + ) { + const parsedLimit = Number.isFinite(Number(limit)) ? Number(limit) : 250; + return this.semanticOrchestratorService.reindexObject( + tenantId, + objectApiName, + user.userId, + parsedLimit, + ); + } + + @Get('semantic/links/:objectApiName/:recordId') + async listLinks( + @TenantId() tenantId: string, + @Param('objectApiName') objectApiName: string, + @Param('recordId') recordId: string, + @Query('status') status?: string, + ) { + const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId); + const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId); + return this.semanticLinkService.listForRecord(knex, objectApiName, recordId, status); + } + + @Patch('semantic/links/:id/review') + async reviewLink( + @TenantId() tenantId: string, + @Param('id') id: string, + @Body() dto: ReviewSemanticLinkDto, + @CurrentUser() user: any, + ) { + const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId); + const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId); + return this.semanticLinkService.reviewLink(knex, id, dto.status, user.userId); + } +} diff --git a/backend/src/knowledge/knowledge.module.ts b/backend/src/knowledge/knowledge.module.ts new file mode 100644 index 0000000..98803e7 --- /dev/null +++ b/backend/src/knowledge/knowledge.module.ts @@ -0,0 +1,16 @@ +import { Module } from '@nestjs/common'; +import { KnowledgeController } from './knowledge.controller'; +import { CommentService } from './services/comment.service'; +import { SemanticOrchestratorService } from './services/semantic-orchestrator.service'; +import { SemanticChunkerService } from './services/semantic-chunker.service'; +import { SemanticLinkService } from './services/semantic-link.service'; +import { TenantModule } from '../tenant/tenant.module'; +import { MeilisearchModule } from '../search/meilisearch.module'; + +@Module({ + imports: [TenantModule, MeilisearchModule], + controllers: [KnowledgeController], + providers: [CommentService, SemanticOrchestratorService, SemanticChunkerService, SemanticLinkService], + exports: [SemanticOrchestratorService], +}) +export class KnowledgeModule {} diff --git a/backend/src/knowledge/services/comment.service.ts b/backend/src/knowledge/services/comment.service.ts new file mode 100644 index 0000000..8c35ad6 --- /dev/null +++ b/backend/src/knowledge/services/comment.service.ts @@ -0,0 +1,106 @@ +import { ForbiddenException, Injectable, NotFoundException } from '@nestjs/common'; +import { TenantDatabaseService } from '../../tenant/tenant-database.service'; +import { CreateCommentDto, UpdateCommentDto } from '../dto/comment.dto'; +import { SemanticOrchestratorService } from './semantic-orchestrator.service'; + +@Injectable() +export class CommentService { + constructor( + private readonly tenantDbService: TenantDatabaseService, + private readonly semanticOrchestratorService: SemanticOrchestratorService, + ) {} + + async listComments(tenantId: string, parentObjectApiName: string, parentRecordId: string) { + const knex = await this.getKnex(tenantId); + return knex('comments') + .where({ + parent_object_api_name: parentObjectApiName, + parent_record_id: parentRecordId, + }) + .orderBy('created_at', 'desc'); + } + + async createComment(tenantId: string, dto: CreateCommentDto, userId: string) { + const knex = await this.getKnex(tenantId); + const [created] = await knex('comments') + .insert({ + parent_object_api_name: dto.parentObjectApiName, + parent_record_id: dto.parentRecordId, + author_user_id: userId, + content: dto.content, + created_at: knex.fn.now(), + updated_at: knex.fn.now(), + }) + .returning('*'); + + await this.semanticOrchestratorService.refreshRecord( + tenantId, + dto.parentObjectApiName, + dto.parentRecordId, + userId, + 'comment_created', + ); + + return created; + } + + async updateComment(tenantId: string, commentId: string, dto: UpdateCommentDto, userId: string) { + const knex = await this.getKnex(tenantId); + const existing = await knex('comments').where({ id: commentId }).first(); + + if (!existing) { + throw new NotFoundException('Comment not found'); + } + + if (existing.author_user_id !== userId) { + throw new ForbiddenException('Only the author can edit this comment'); + } + + await knex('comments') + .where({ id: commentId }) + .update({ + ...(dto.content ? { content: dto.content } : {}), + updated_at: knex.fn.now(), + }); + + await this.semanticOrchestratorService.refreshRecord( + tenantId, + existing.parent_object_api_name, + existing.parent_record_id, + userId, + 'comment_updated', + ); + + return knex('comments').where({ id: commentId }).first(); + } + + async deleteComment(tenantId: string, commentId: string, userId: string) { + const knex = await this.getKnex(tenantId); + const existing = await knex('comments').where({ id: commentId }).first(); + + if (!existing) { + throw new NotFoundException('Comment not found'); + } + + if (existing.author_user_id !== userId) { + throw new ForbiddenException('Only the author can delete this comment'); + } + + await knex('comments').where({ id: commentId }).delete(); + + await this.semanticOrchestratorService.refreshRecord( + tenantId, + existing.parent_object_api_name, + existing.parent_record_id, + userId, + 'comment_deleted', + ); + + return { success: true }; + } + + private async getKnex(tenantId: string) { + const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId); + return this.tenantDbService.getTenantKnexById(resolvedTenantId); + } +} diff --git a/backend/src/knowledge/services/semantic-chunker.service.spec.ts b/backend/src/knowledge/services/semantic-chunker.service.spec.ts new file mode 100644 index 0000000..b6b1ff6 --- /dev/null +++ b/backend/src/knowledge/services/semantic-chunker.service.spec.ts @@ -0,0 +1,20 @@ +import { SemanticChunkerService } from './semantic-chunker.service'; + +describe('SemanticChunkerService', () => { + let service: SemanticChunkerService; + + beforeEach(() => { + service = new SemanticChunkerService(); + }); + + it('creates chunks from base narrative and comments', () => { + const chunks = service.chunkText('Intro paragraph\n\nSecond paragraph', [ + { id: 'c-1', content: 'Comment body' }, + ]); + + expect(chunks).toHaveLength(3); + expect(chunks[0].sourceKind).toBe('base_record'); + expect(chunks[2].sourceKind).toBe('comment'); + expect(chunks[2].sourceRefId).toBe('c-1'); + }); +}); diff --git a/backend/src/knowledge/services/semantic-chunker.service.ts b/backend/src/knowledge/services/semantic-chunker.service.ts new file mode 100644 index 0000000..decd18d --- /dev/null +++ b/backend/src/knowledge/services/semantic-chunker.service.ts @@ -0,0 +1,71 @@ +import { Injectable } from '@nestjs/common'; + +export type SemanticChunk = { + chunkIndex: number; + sourceKind: 'base_record' | 'comment' | 'mixed'; + sourceRefId: string | null; + text: string; + metadata: Record; +}; + +@Injectable() +export class SemanticChunkerService { + chunkText( + baseNarrative: string, + comments: Array<{ id: string; content: string }>, + ): SemanticChunk[] { + const chunks: SemanticChunk[] = []; + + const baseParts = this.splitText(baseNarrative); + for (const [index, text] of baseParts.entries()) { + chunks.push({ + chunkIndex: chunks.length, + sourceKind: 'base_record', + sourceRefId: null, + text, + metadata: { section: 'base', localIndex: index }, + }); + } + + for (const comment of comments || []) { + const commentParts = this.splitText(comment.content); + for (const [index, text] of commentParts.entries()) { + chunks.push({ + chunkIndex: chunks.length, + sourceKind: 'comment', + sourceRefId: comment.id, + text, + metadata: { section: 'comment', localIndex: index, commentId: comment.id }, + }); + } + } + + return chunks; + } + + private splitText(text: string): string[] { + const normalized = (text || '').trim(); + if (!normalized) return []; + + const paragraphs = normalized + .split(/\n{2,}/) + .map((part) => part.trim()) + .filter(Boolean); + + const chunks: string[] = []; + for (const paragraph of paragraphs) { + if (paragraph.length <= 500) { + chunks.push(paragraph); + continue; + } + + let cursor = 0; + while (cursor < paragraph.length) { + chunks.push(paragraph.slice(cursor, cursor + 500).trim()); + cursor += 500; + } + } + + return chunks.filter(Boolean); + } +} diff --git a/backend/src/knowledge/services/semantic-link.service.spec.ts b/backend/src/knowledge/services/semantic-link.service.spec.ts new file mode 100644 index 0000000..2db60fb --- /dev/null +++ b/backend/src/knowledge/services/semantic-link.service.spec.ts @@ -0,0 +1,20 @@ +import { SemanticLinkService } from './semantic-link.service'; + +describe('SemanticLinkService', () => { + let service: SemanticLinkService; + + beforeEach(() => { + service = new SemanticLinkService(); + }); + + it('normalizes undirected pairs in deterministic order', () => { + const normalized = service.normalizeUndirectedPair('Contact', 'b-id', 'Account', 'a-id'); + + expect(normalized).toEqual({ + sourceEntityType: 'Account', + sourceEntityId: 'a-id', + targetEntityType: 'Contact', + targetEntityId: 'b-id', + }); + }); +}); diff --git a/backend/src/knowledge/services/semantic-link.service.ts b/backend/src/knowledge/services/semantic-link.service.ts new file mode 100644 index 0000000..5ee9cc1 --- /dev/null +++ b/backend/src/knowledge/services/semantic-link.service.ts @@ -0,0 +1,117 @@ +import { Injectable, NotFoundException } from '@nestjs/common'; + +export type SemanticLinkUpsertInput = { + sourceEntityType: string; + sourceEntityId: string; + targetEntityType: string; + targetEntityId: string; + linkType?: string; + status?: string; + origin?: string; + confidence?: number; + reason?: string; + evidence?: Record; + suggestedByUserId?: string | null; +}; + +@Injectable() +export class SemanticLinkService { + normalizeUndirectedPair( + sourceEntityType: string, + sourceEntityId: string, + targetEntityType: string, + targetEntityId: string, + ) { + const sourceKey = `${sourceEntityType}:${sourceEntityId}`; + const targetKey = `${targetEntityType}:${targetEntityId}`; + + if (sourceKey <= targetKey) { + return { + sourceEntityType, + sourceEntityId, + targetEntityType, + targetEntityId, + }; + } + + return { + sourceEntityType: targetEntityType, + sourceEntityId: targetEntityId, + targetEntityType: sourceEntityType, + targetEntityId: sourceEntityId, + }; + } + + async upsertSuggestedLink(knex: any, input: SemanticLinkUpsertInput) { + const normalized = this.normalizeUndirectedPair( + input.sourceEntityType, + input.sourceEntityId, + input.targetEntityType, + input.targetEntityId, + ); + + const payload = { + ...normalized, + link_type: input.linkType || 'related_to', + status: input.status || 'suggested', + origin: input.origin || 'semantic', + confidence: input.confidence ?? 0, + reason: input.reason || null, + evidence: input.evidence ? JSON.stringify(input.evidence) : null, + suggested_by_user_id: input.suggestedByUserId || null, + updated_at: knex.fn.now(), + created_at: knex.fn.now(), + }; + + await knex('semantic_links') + .insert(payload) + .onConflict([ + 'source_entity_type', + 'source_entity_id', + 'target_entity_type', + 'target_entity_id', + 'link_type', + ]) + .merge({ + status: knex.raw("IF(status = 'approved', status, VALUES(status))"), + origin: payload.origin, + confidence: knex.raw('GREATEST(confidence, VALUES(confidence))'), + reason: payload.reason, + evidence: payload.evidence, + updated_at: knex.fn.now(), + }); + } + + async listForRecord(knex: any, entityType: string, entityId: string, status?: string) { + const query = knex('semantic_links') + .where((builder: any) => { + builder + .where({ source_entity_type: entityType, source_entity_id: entityId }) + .orWhere({ target_entity_type: entityType, target_entity_id: entityId }); + }) + .orderBy('updated_at', 'desc'); + + if (status) { + query.andWhere({ status }); + } + + return query; + } + + async reviewLink(knex: any, linkId: string, status: string, reviewerUserId: string) { + const updated = await knex('semantic_links') + .where({ id: linkId }) + .update({ + status, + reviewed_by_user_id: reviewerUserId, + reviewed_at: knex.fn.now(), + updated_at: knex.fn.now(), + }); + + if (!updated) { + throw new NotFoundException('Semantic link not found'); + } + + return knex('semantic_links').where({ id: linkId }).first(); + } +} diff --git a/backend/src/knowledge/services/semantic-orchestrator.service.ts b/backend/src/knowledge/services/semantic-orchestrator.service.ts new file mode 100644 index 0000000..d2e5e55 --- /dev/null +++ b/backend/src/knowledge/services/semantic-orchestrator.service.ts @@ -0,0 +1,225 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { TenantDatabaseService } from '../../tenant/tenant-database.service'; +import { MeilisearchService } from '../../search/meilisearch.service'; +import { + DefaultSemanticProjectionAdapter, + SemanticProjectionAdapter, +} from '../adapters/semantic-projection.adapter'; +import { SemanticChunkerService } from './semantic-chunker.service'; +import { SemanticLinkService } from './semantic-link.service'; + +@Injectable() +export class SemanticOrchestratorService { + private readonly logger = new Logger(SemanticOrchestratorService.name); + private readonly adapters: SemanticProjectionAdapter[] = [new DefaultSemanticProjectionAdapter()]; + + constructor( + private readonly tenantDbService: TenantDatabaseService, + private readonly meilisearchService: MeilisearchService, + private readonly chunkerService: SemanticChunkerService, + private readonly semanticLinkService: SemanticLinkService, + ) {} + + async refreshRecord( + tenantId: string, + objectApiName: string, + recordId: string, + userId?: string, + trigger: string = 'manual', + ) { + const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId); + const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId); + + const objectDefinition = await knex('object_definitions').where({ apiName: objectApiName }).first(); + if (!objectDefinition) { + this.logger.warn(`Object definition ${objectApiName} not found. Skipping semantic refresh.`); + return { skipped: true }; + } + + const tableName = this.getTableName(objectDefinition); + const record = await knex(tableName).where({ id: recordId }).first(); + if (!record) { + return { skipped: true }; + } + + const comments = await knex('comments') + .where({ + parent_object_api_name: objectApiName, + parent_record_id: recordId, + }) + .orderBy('created_at', 'asc'); + + const adapter = this.adapters.find((candidate) => candidate.supports(objectApiName))!; + const projection = adapter.buildProjection({ + objectApiName, + record, + objectDefinition, + comments, + }); + + const documentId = await this.upsertSemanticDocument(knex, projection); + const chunks = this.chunkerService.chunkText(projection.narrative, comments); + await this.replaceChunks(knex, documentId, chunks); + + await this.indexChunks(resolvedTenantId, projection, chunks); + await this.generateSuggestions(resolvedTenantId, projection, chunks, userId, trigger); + + return { documentId, chunkCount: chunks.length }; + } + + async reindexObject(tenantId: string, objectApiName: string, userId?: string, limit = 250) { + const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId); + const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId); + const objectDefinition = await knex('object_definitions').where({ apiName: objectApiName }).first(); + if (!objectDefinition) { + return { total: 0, processed: 0 }; + } + + const tableName = this.getTableName(objectDefinition); + const records = await knex(tableName).select('id').limit(limit); + + let processed = 0; + for (const record of records) { + await this.refreshRecord(resolvedTenantId, objectApiName, record.id, userId, 'batch_reindex'); + processed += 1; + } + + return { total: records.length, processed }; + } + + private async upsertSemanticDocument(knex: any, projection: any): Promise { + const existing = await knex('semantic_documents') + .where({ entity_type: projection.entityType, entity_id: projection.entityId }) + .first(); + + if (existing) { + await knex('semantic_documents') + .where({ id: existing.id }) + .update({ + title: projection.title, + narrative: projection.narrative, + metadata: JSON.stringify(projection.metadata || {}), + source_summary: JSON.stringify(projection.sourceSummary || {}), + updated_at: knex.fn.now(), + }); + return existing.id; + } + + const [created] = await knex('semantic_documents') + .insert({ + entity_type: projection.entityType, + entity_id: projection.entityId, + title: projection.title, + narrative: projection.narrative, + metadata: JSON.stringify(projection.metadata || {}), + source_summary: JSON.stringify(projection.sourceSummary || {}), + created_at: knex.fn.now(), + updated_at: knex.fn.now(), + }) + .returning('id'); + + return typeof created === 'string' ? created : created.id; + } + + private async replaceChunks(knex: any, documentId: string, chunks: any[]) { + await knex('semantic_chunks').where({ semantic_document_id: documentId }).delete(); + if (!chunks.length) return; + + await knex('semantic_chunks').insert( + chunks.map((chunk) => ({ + semantic_document_id: documentId, + chunk_index: chunk.chunkIndex, + source_kind: chunk.sourceKind, + source_ref_id: chunk.sourceRefId, + text: chunk.text, + metadata: JSON.stringify(chunk.metadata || {}), + created_at: knex.fn.now(), + updated_at: knex.fn.now(), + })), + ); + } + + private async indexChunks(tenantId: string, projection: any, chunks: any[]) { + if (!this.meilisearchService.isEnabled()) { + return; + } + + const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId); + await this.meilisearchService.upsertDocuments(indexName, chunks.map((chunk) => ({ + id: `${projection.entityType}:${projection.entityId}:${chunk.chunkIndex}`, + entityType: projection.entityType, + entityId: projection.entityId, + title: projection.title, + sourceKind: chunk.sourceKind, + sourceRefId: chunk.sourceRefId, + text: chunk.text, + }))); + } + + private async generateSuggestions( + tenantId: string, + projection: any, + chunks: any[], + userId?: string, + trigger: string = 'semantic_refresh', + ) { + if (!this.meilisearchService.isEnabled() || !chunks.length) { + return; + } + + const indexName = this.meilisearchService.buildSemanticChunkIndexName(tenantId); + const queryText = chunks.slice(0, 3).map((chunk) => chunk.text).join(' ').slice(0, 1200); + const search = await this.meilisearchService.searchIndex(indexName, queryText, 20); + + const grouped = new Map(); + for (const hit of search.hits || []) { + if (hit.entityType === projection.entityType && hit.entityId === projection.entityId) { + continue; + } + const key = `${hit.entityType}:${hit.entityId}`; + if (!grouped.has(key)) grouped.set(key, []); + grouped.get(key).push(hit); + } + + const resolvedTenantId = await this.tenantDbService.resolveTenantId(tenantId); + const knex = await this.tenantDbService.getTenantKnexById(resolvedTenantId); + + for (const [key, hits] of grouped.entries()) { + const [targetType, targetId] = key.split(':'); + const confidence = Math.min(0.99, 0.3 + hits.length * 0.1); + await this.semanticLinkService.upsertSuggestedLink(knex, { + sourceEntityType: projection.entityType, + sourceEntityId: projection.entityId, + targetEntityType: targetType, + targetEntityId: targetId, + linkType: 'related_to', + status: 'suggested', + origin: 'semantic', + confidence, + reason: `Suggested from semantic similarity (${trigger})`, + evidence: { + trigger, + sourceSignals: chunks.slice(0, 2).map((chunk) => ({ + sourceKind: chunk.sourceKind, + text: chunk.text.slice(0, 180), + })), + matchedChunks: hits.slice(0, 3).map((hit) => ({ + sourceKind: hit.sourceKind, + text: String(hit.text || '').slice(0, 180), + })), + }, + suggestedByUserId: userId || null, + }); + } + } + + private getTableName(objectDefinition: any): string { + if (objectDefinition.tableName) return objectDefinition.tableName; + + if (objectDefinition.pluralLabel) { + return objectDefinition.pluralLabel.toLowerCase().replace(/[^a-z0-9]+/g, '_'); + } + + return `${objectDefinition.apiName.toLowerCase()}s`; + } +} diff --git a/backend/src/object/object.module.ts b/backend/src/object/object.module.ts index 7a8e873..7d93390 100644 --- a/backend/src/object/object.module.ts +++ b/backend/src/object/object.module.ts @@ -10,9 +10,10 @@ import { RbacModule } from '../rbac/rbac.module'; import { ModelRegistry } from './models/model.registry'; import { ModelService } from './models/model.service'; import { MeilisearchModule } from '../search/meilisearch.module'; +import { KnowledgeModule } from '../knowledge/knowledge.module'; @Module({ - imports: [TenantModule, MigrationModule, RbacModule, MeilisearchModule], + imports: [TenantModule, MigrationModule, RbacModule, MeilisearchModule, KnowledgeModule], providers: [ ObjectService, SchemaManagementService, diff --git a/backend/src/object/object.service.ts b/backend/src/object/object.service.ts index 263b00f..dfd08d7 100644 --- a/backend/src/object/object.service.ts +++ b/backend/src/object/object.service.ts @@ -9,6 +9,7 @@ import { FieldDefinition } from '../models/field-definition.model'; import { User } from '../models/user.model'; import { ObjectMetadata } from './models/dynamic-model.factory'; import { MeilisearchService } from '../search/meilisearch.service'; +import { SemanticOrchestratorService } from '../knowledge/services/semantic-orchestrator.service'; type SearchFilter = { field: string; @@ -39,6 +40,7 @@ export class ObjectService { private modelService: ModelService, private authService: AuthorizationService, private meilisearchService: MeilisearchService, + private semanticOrchestratorService: SemanticOrchestratorService, ) {} // Setup endpoints - Object metadata management @@ -1128,6 +1130,13 @@ export class ObjectService { ); const record = await boundModel.query().insert(normalizedRecordData); await this.indexRecord(resolvedTenantId, objectApiName, objectDefModel.fields, record); + await this.semanticOrchestratorService.refreshRecord( + resolvedTenantId, + objectApiName, + record.id, + userId, + 'record_created', + ); return record; } @@ -1197,6 +1206,13 @@ export class ObjectService { await boundModel.query().patch(normalizedEditableData).where({ id: recordId }); const record = await boundModel.query().where({ id: recordId }).first(); await this.indexRecord(resolvedTenantId, objectApiName, objectDefModel.fields, record); + await this.semanticOrchestratorService.refreshRecord( + resolvedTenantId, + objectApiName, + recordId, + userId, + 'record_updated', + ); return record; } diff --git a/backend/src/search/meilisearch.service.ts b/backend/src/search/meilisearch.service.ts index ecd5d2a..e8f2bbb 100644 --- a/backend/src/search/meilisearch.service.ts +++ b/backend/src/search/meilisearch.service.ts @@ -158,6 +158,58 @@ export class MeilisearchService { } } + buildSemanticChunkIndexName(tenantId: string): string { + const config = this.getConfig(); + const prefix = config?.indexPrefix || 'tenant_'; + return `${prefix}${tenantId}_semantic_chunks`.toLowerCase(); + } + + async upsertDocuments(indexName: string, documents: Record[]): Promise { + const config = this.getConfig(); + if (!config || !Array.isArray(documents) || documents.length === 0) return; + + const url = `${config.host}/indexes/${encodeURIComponent(indexName)}/documents?primaryKey=id`; + try { + const response = await this.requestJson('POST', url, documents, this.buildHeaders(config)); + if (!this.isSuccessStatus(response.status)) { + this.logger.warn(`Meilisearch document upsert failed for index ${indexName}: ${response.status}`); + } + } catch (error) { + this.logger.warn(`Meilisearch document upsert failed: ${error.message}`); + } + } + + async searchIndex( + indexName: string, + query: string, + limit = 20, + ): Promise<{ hits: any[]; total: number }> { + const config = this.getConfig(); + if (!config) return { hits: [], total: 0 }; + + const url = `${config.host}/indexes/${encodeURIComponent(indexName)}/search`; + try { + const response = await this.requestJson( + 'POST', + url, + { q: query, limit }, + this.buildHeaders(config), + ); + + if (!this.isSuccessStatus(response.status)) { + this.logger.warn(`Meilisearch search failed for index ${indexName}: ${response.status}`); + return { hits: [], total: 0 }; + } + + const hits = Array.isArray(response.body?.hits) ? response.body.hits : []; + const total = response.body?.estimatedTotalHits ?? response.body?.nbHits ?? hits.length; + return { hits, total }; + } catch (error) { + this.logger.warn(`Meilisearch search failed: ${error.message}`); + return { hits: [], total: 0 }; + } + } + private getConfig(): MeiliConfig | null { const host = process.env.MEILI_HOST || process.env.MEILISEARCH_HOST; if (!host) return null;