Add phased knowledge layer: comments and semantic linking pipeline

This commit is contained in:
phyroslam
2026-04-11 11:40:01 -07:00
parent baf3997fb6
commit df183230d8
16 changed files with 1020 additions and 1 deletions

View File

@@ -0,0 +1,71 @@
import { Injectable } from '@nestjs/common';
export type SemanticChunk = {
chunkIndex: number;
sourceKind: 'base_record' | 'comment' | 'mixed';
sourceRefId: string | null;
text: string;
metadata: Record<string, any>;
};
@Injectable()
export class SemanticChunkerService {
chunkText(
baseNarrative: string,
comments: Array<{ id: string; content: string }>,
): SemanticChunk[] {
const chunks: SemanticChunk[] = [];
const baseParts = this.splitText(baseNarrative);
for (const [index, text] of baseParts.entries()) {
chunks.push({
chunkIndex: chunks.length,
sourceKind: 'base_record',
sourceRefId: null,
text,
metadata: { section: 'base', localIndex: index },
});
}
for (const comment of comments || []) {
const commentParts = this.splitText(comment.content);
for (const [index, text] of commentParts.entries()) {
chunks.push({
chunkIndex: chunks.length,
sourceKind: 'comment',
sourceRefId: comment.id,
text,
metadata: { section: 'comment', localIndex: index, commentId: comment.id },
});
}
}
return chunks;
}
private splitText(text: string): string[] {
const normalized = (text || '').trim();
if (!normalized) return [];
const paragraphs = normalized
.split(/\n{2,}/)
.map((part) => part.trim())
.filter(Boolean);
const chunks: string[] = [];
for (const paragraph of paragraphs) {
if (paragraph.length <= 500) {
chunks.push(paragraph);
continue;
}
let cursor = 0;
while (cursor < paragraph.length) {
chunks.push(paragraph.slice(cursor, cursor + 500).trim());
cursor += 500;
}
}
return chunks.filter(Boolean);
}
}