mirror of
https://github.com/zadam/trilium.git
synced 2025-11-06 21:36:05 +01:00
615 lines
18 KiB
TypeScript
615 lines
18 KiB
TypeScript
/**
|
|
* Context extraction module for LLM features
|
|
* Provides methods to extract relevant context from notes for LLM processing
|
|
*/
|
|
|
|
import becca from '../../../becca/becca.js';
|
|
import { getNoteContent, formatNoteContent, sanitizeHtmlContent } from './note_content.js';
|
|
import { detectLanguage, extractCodeStructure } from './code_handlers.js';
|
|
import { chunkContent, semanticChunking } from './content_chunking.js';
|
|
import type { ContentChunk, ChunkOptions } from './content_chunking.js';
|
|
import { summarizeContent, extractKeyPoints } from './summarization.js';
|
|
import { getParentNotes, getParentContext, getChildContext, getLinkedNotesContext } from './hierarchy.js';
|
|
import { getSemanticContext } from './semantic_context.js';
|
|
|
|
/**
|
|
* Options for context extraction
|
|
*/
|
|
export interface ContextOptions {
|
|
/**
|
|
* Include parent context
|
|
*/
|
|
includeParents?: boolean;
|
|
|
|
/**
|
|
* Include child notes in context
|
|
*/
|
|
includeChildren?: boolean;
|
|
|
|
/**
|
|
* Include linked notes in context
|
|
*/
|
|
includeLinks?: boolean;
|
|
|
|
/**
|
|
* Include semantically similar notes
|
|
*/
|
|
includeSimilar?: boolean;
|
|
|
|
/**
|
|
* Include note content in context
|
|
*/
|
|
includeContent?: boolean;
|
|
|
|
/**
|
|
* Maximum depth for parent hierarchy
|
|
*/
|
|
maxParentDepth?: number;
|
|
|
|
/**
|
|
* Maximum number of children to include
|
|
*/
|
|
maxChildren?: number;
|
|
|
|
/**
|
|
* Maximum number of linked notes to include
|
|
*/
|
|
maxLinks?: number;
|
|
|
|
/**
|
|
* Maximum number of similar notes to include
|
|
*/
|
|
maxSimilarNotes?: number;
|
|
|
|
/**
|
|
* Maximum content length
|
|
*/
|
|
maxContentLength?: number;
|
|
}
|
|
|
|
/**
|
|
* Default options for context extraction
|
|
*/
|
|
const DEFAULT_CONTEXT_OPTIONS: Required<ContextOptions> = {
|
|
includeParents: true,
|
|
includeChildren: true,
|
|
includeLinks: true,
|
|
includeSimilar: false,
|
|
includeContent: true,
|
|
maxParentDepth: 3,
|
|
maxChildren: 10,
|
|
maxLinks: 10,
|
|
maxSimilarNotes: 5,
|
|
maxContentLength: 2000
|
|
};
|
|
|
|
/**
|
|
* Context Extractor class
|
|
* Handles extraction of context from notes for LLM processing
|
|
*/
|
|
export class ContextExtractor {
|
|
/**
|
|
* Get content of a note
|
|
*/
|
|
static async getNoteContent(noteId: string): Promise<string | null> {
|
|
return getNoteContent(noteId);
|
|
}
|
|
|
|
/**
|
|
* Get content of a note - instance method
|
|
*/
|
|
async getNoteContent(noteId: string): Promise<string | null> {
|
|
return ContextExtractor.getNoteContent(noteId);
|
|
}
|
|
|
|
/**
|
|
* Format note content based on its type
|
|
*/
|
|
static formatNoteContent(content: string, type: string, mime: string, title: string): string {
|
|
return formatNoteContent(content, type, mime, title);
|
|
}
|
|
|
|
/**
|
|
* Format note content based on its type - instance method
|
|
*/
|
|
formatNoteContent(content: string, type: string, mime: string, title: string): string {
|
|
return ContextExtractor.formatNoteContent(content, type, mime, title);
|
|
}
|
|
|
|
/**
|
|
* Sanitize HTML content to plain text
|
|
*/
|
|
static sanitizeHtmlContent(html: string): string {
|
|
return sanitizeHtmlContent(html);
|
|
}
|
|
|
|
/**
|
|
* Sanitize HTML content to plain text - instance method
|
|
*/
|
|
sanitizeHtmlContent(html: string): string {
|
|
return ContextExtractor.sanitizeHtmlContent(html);
|
|
}
|
|
|
|
/**
|
|
* Detect programming language from content
|
|
*/
|
|
static detectLanguage(content: string, mime: string): string {
|
|
return detectLanguage(content, mime);
|
|
}
|
|
|
|
/**
|
|
* Detect programming language from content - instance method
|
|
*/
|
|
detectLanguage(content: string, mime: string): string {
|
|
return ContextExtractor.detectLanguage(content, mime);
|
|
}
|
|
|
|
/**
|
|
* Extract structure from code
|
|
*/
|
|
static extractCodeStructure(content: string, language: string): string {
|
|
return extractCodeStructure(content, language);
|
|
}
|
|
|
|
/**
|
|
* Extract structure from code - instance method
|
|
*/
|
|
extractCodeStructure(content: string, language: string): string {
|
|
return ContextExtractor.extractCodeStructure(content, language);
|
|
}
|
|
|
|
/**
|
|
* Chunk content into smaller pieces
|
|
*/
|
|
static async chunkContent(
|
|
content: string,
|
|
title: string = '',
|
|
noteId: string = '',
|
|
options: ChunkOptions = {}
|
|
): Promise<ContentChunk[]> {
|
|
return chunkContent(content, title, noteId, options);
|
|
}
|
|
|
|
/**
|
|
* Chunk content into smaller pieces - instance method
|
|
*/
|
|
async chunkContent(
|
|
content: string,
|
|
title: string = '',
|
|
noteId: string = '',
|
|
options: ChunkOptions = {}
|
|
): Promise<ContentChunk[]> {
|
|
return ContextExtractor.chunkContent(content, title, noteId, options);
|
|
}
|
|
|
|
/**
|
|
* Smarter chunking that respects semantic boundaries
|
|
*/
|
|
static async semanticChunking(
|
|
content: string,
|
|
title: string = '',
|
|
noteId: string = '',
|
|
options: ChunkOptions = {}
|
|
): Promise<ContentChunk[]> {
|
|
return semanticChunking(content, title, noteId, options);
|
|
}
|
|
|
|
/**
|
|
* Smarter chunking that respects semantic boundaries - instance method
|
|
*/
|
|
async semanticChunking(
|
|
content: string,
|
|
title: string = '',
|
|
noteId: string = '',
|
|
options: ChunkOptions = {}
|
|
): Promise<ContentChunk[]> {
|
|
return ContextExtractor.semanticChunking(content, title, noteId, options);
|
|
}
|
|
|
|
/**
|
|
* Summarize content
|
|
*/
|
|
static summarizeContent(
|
|
content: string,
|
|
title: string = ''
|
|
): string {
|
|
return summarizeContent(content, title);
|
|
}
|
|
|
|
/**
|
|
* Summarize content - instance method
|
|
*/
|
|
summarizeContent(
|
|
content: string,
|
|
title: string = ''
|
|
): string {
|
|
return ContextExtractor.summarizeContent(content, title);
|
|
}
|
|
|
|
/**
|
|
* Extract key points from content
|
|
*/
|
|
static extractKeyPoints(
|
|
content: string,
|
|
maxPoints: number = 5
|
|
): string[] {
|
|
return extractKeyPoints(content, maxPoints);
|
|
}
|
|
|
|
/**
|
|
* Extract key points from content - instance method
|
|
*/
|
|
extractKeyPoints(
|
|
content: string,
|
|
maxPoints: number = 5
|
|
): string[] {
|
|
return ContextExtractor.extractKeyPoints(content, maxPoints);
|
|
}
|
|
|
|
/**
|
|
* Get parent notes
|
|
*/
|
|
static async getParentNotes(
|
|
noteId: string,
|
|
maxParents: number = 5
|
|
): Promise<{id: string, title: string}[]> {
|
|
return getParentNotes(noteId, maxParents);
|
|
}
|
|
|
|
/**
|
|
* Get parent notes - instance method
|
|
*/
|
|
async getParentNotes(
|
|
noteId: string,
|
|
maxParents: number = 5
|
|
): Promise<{id: string, title: string}[]> {
|
|
return ContextExtractor.getParentNotes(noteId, maxParents);
|
|
}
|
|
|
|
/**
|
|
* Get hierarchical parent context
|
|
*/
|
|
static async getParentContext(
|
|
noteId: string,
|
|
maxDepth: number = 3,
|
|
maxParents: number = 3
|
|
): Promise<string> {
|
|
return getParentContext(noteId, maxDepth, maxParents);
|
|
}
|
|
|
|
/**
|
|
* Get hierarchical parent context - instance method
|
|
*/
|
|
async getParentContext(
|
|
noteId: string,
|
|
maxDepth: number = 3,
|
|
maxParents: number = 3
|
|
): Promise<string> {
|
|
return ContextExtractor.getParentContext(noteId, maxDepth, maxParents);
|
|
}
|
|
|
|
/**
|
|
* Get child context
|
|
*/
|
|
static async getChildContext(
|
|
noteId: string,
|
|
maxChildren: number = 10,
|
|
includeContent: boolean = false
|
|
): Promise<string> {
|
|
return getChildContext(noteId, maxChildren, includeContent);
|
|
}
|
|
|
|
/**
|
|
* Get child context - instance method
|
|
*/
|
|
async getChildContext(
|
|
noteId: string,
|
|
maxChildren: number = 10,
|
|
includeContent: boolean = false
|
|
): Promise<string> {
|
|
return ContextExtractor.getChildContext(noteId, maxChildren, includeContent);
|
|
}
|
|
|
|
/**
|
|
* Get linked notes context
|
|
*/
|
|
static async getLinkedNotesContext(
|
|
noteId: string,
|
|
maxRelations: number = 10
|
|
): Promise<string> {
|
|
return getLinkedNotesContext(noteId, maxRelations);
|
|
}
|
|
|
|
/**
|
|
* Get linked notes context - instance method
|
|
*/
|
|
async getLinkedNotesContext(
|
|
noteId: string,
|
|
maxRelations: number = 10
|
|
): Promise<string> {
|
|
return ContextExtractor.getLinkedNotesContext(noteId, maxRelations);
|
|
}
|
|
|
|
/**
|
|
* Get semantic context
|
|
*/
|
|
static async getSemanticContext(
|
|
noteId: string,
|
|
maxSimilarNotesOrQuery: number | string = 5
|
|
): Promise<string> {
|
|
// Handle both the new (number) and old (string query) parameter types
|
|
if (typeof maxSimilarNotesOrQuery === 'string') {
|
|
// Old API: The second parameter was a query string
|
|
// For backward compatibility, we'll still accept this
|
|
return getSemanticContext(noteId, { maxSimilarNotes: 5 });
|
|
} else {
|
|
// New API: The second parameter is maxSimilarNotes
|
|
return getSemanticContext(noteId, { maxSimilarNotes: maxSimilarNotesOrQuery });
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get semantic context - instance method
|
|
*/
|
|
async getSemanticContext(
|
|
noteId: string,
|
|
maxSimilarNotesOrQuery: number | string = 5
|
|
): Promise<string> {
|
|
return ContextExtractor.getSemanticContext(noteId, maxSimilarNotesOrQuery);
|
|
}
|
|
|
|
/**
|
|
* Extract full context for a note
|
|
* This combines various context sources based on provided options
|
|
*/
|
|
static async extractContext(
|
|
noteId: string,
|
|
options: ContextOptions = {}
|
|
): Promise<string> {
|
|
const config: Required<ContextOptions> = { ...DEFAULT_CONTEXT_OPTIONS, ...options };
|
|
const note = becca.getNote(noteId);
|
|
|
|
if (!note) {
|
|
return "Note not found.";
|
|
}
|
|
|
|
let context = `# Context for note: ${note.title}\n\n`;
|
|
|
|
// Include parent context
|
|
if (config.includeParents) {
|
|
const parentContext = await ContextExtractor.getParentContext(
|
|
noteId,
|
|
config.maxParentDepth,
|
|
3 // Default to 3 parents per level
|
|
);
|
|
|
|
if (parentContext) {
|
|
context += `## Parent Hierarchy\n${parentContext}\n\n`;
|
|
}
|
|
}
|
|
|
|
// Include note content
|
|
if (config.includeContent) {
|
|
const content = await ContextExtractor.getNoteContent(noteId);
|
|
|
|
if (content) {
|
|
// If content is too large, summarize it
|
|
let contentSection = '';
|
|
|
|
if (content.length > config.maxContentLength) {
|
|
contentSection = ContextExtractor.summarizeContent(content, note.title);
|
|
contentSection += "\n\n[Content summarized due to length]";
|
|
} else {
|
|
contentSection = content;
|
|
}
|
|
|
|
context += `## Note Content\n${contentSection}\n\n`;
|
|
}
|
|
}
|
|
|
|
// Include child context
|
|
if (config.includeChildren) {
|
|
const childContext = await ContextExtractor.getChildContext(
|
|
noteId,
|
|
config.maxChildren,
|
|
false // Don't include child content by default
|
|
);
|
|
|
|
if (childContext && childContext !== "No child notes.") {
|
|
context += `## Child Notes\n${childContext}\n\n`;
|
|
}
|
|
}
|
|
|
|
// Include linked notes
|
|
if (config.includeLinks) {
|
|
const linkedContext = await ContextExtractor.getLinkedNotesContext(
|
|
noteId,
|
|
config.maxLinks
|
|
);
|
|
|
|
if (linkedContext && linkedContext !== "No linked notes.") {
|
|
context += `## Linked Notes\n${linkedContext}\n\n`;
|
|
}
|
|
}
|
|
|
|
// Include semantically similar notes
|
|
if (config.includeSimilar) {
|
|
const semanticContext = await ContextExtractor.getSemanticContext(
|
|
noteId,
|
|
config.maxSimilarNotes
|
|
);
|
|
|
|
if (semanticContext && !semanticContext.includes("No semantically similar notes found.")) {
|
|
context += `## Similar Notes\n${semanticContext}\n\n`;
|
|
}
|
|
}
|
|
|
|
return context;
|
|
}
|
|
|
|
/**
|
|
* Extract full context for a note - instance method
|
|
*/
|
|
async extractContext(
|
|
noteId: string,
|
|
options: ContextOptions = {}
|
|
): Promise<string> {
|
|
return ContextExtractor.extractContext(noteId, options);
|
|
}
|
|
|
|
/**
|
|
* Get progressively loaded context based on depth level
|
|
* This provides different levels of context detail depending on the depth parameter
|
|
*
|
|
* @param noteId - The ID of the note to get context for
|
|
* @param depth - Depth level (1-4) determining how much context to include
|
|
* @returns Context appropriate for the requested depth
|
|
*/
|
|
static async getProgressiveContext(noteId: string, depth = 1): Promise<string> {
|
|
try {
|
|
// Use the new context service
|
|
const { default: aiServiceManager } = await import('../ai_service_manager.js');
|
|
const contextService = aiServiceManager.getInstance().getContextService();
|
|
|
|
if (!contextService) {
|
|
return ContextExtractor.extractContext(noteId);
|
|
}
|
|
|
|
return await contextService.getProgressiveContext(noteId, depth);
|
|
} catch (error) {
|
|
// Fall back to regular context if progressive loading fails
|
|
console.error('Error in progressive context loading:', error);
|
|
return ContextExtractor.extractContext(noteId);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get progressively loaded context based on depth level - instance method
|
|
*/
|
|
async getProgressiveContext(noteId: string, depth = 1): Promise<string> {
|
|
return ContextExtractor.getProgressiveContext(noteId, depth);
|
|
}
|
|
|
|
/**
|
|
* Get smart context based on the query complexity
|
|
* This automatically selects the appropriate context depth and relevance
|
|
*
|
|
* @param noteId - The ID of the note to get context for
|
|
* @param query - The user's query for semantic relevance matching
|
|
* @returns The optimal context for answering the query
|
|
*/
|
|
static async getSmartContext(noteId: string, query: string): Promise<string> {
|
|
try {
|
|
// Use the new context service
|
|
const { default: aiServiceManager } = await import('../ai_service_manager.js');
|
|
const contextService = aiServiceManager.getInstance().getContextService();
|
|
|
|
if (!contextService) {
|
|
return ContextExtractor.extractContext(noteId);
|
|
}
|
|
|
|
return await contextService.getSmartContext(noteId, query);
|
|
} catch (error) {
|
|
// Fall back to regular context if smart context fails
|
|
console.error('Error in smart context selection:', error);
|
|
return ContextExtractor.extractContext(noteId);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get smart context based on the query complexity - instance method
|
|
*/
|
|
async getSmartContext(noteId: string, query: string): Promise<string> {
|
|
return ContextExtractor.getSmartContext(noteId, query);
|
|
}
|
|
|
|
/**
|
|
* Get the full context for a note, including parent hierarchy, content, and children
|
|
* Legacy method for backwards compatibility
|
|
*/
|
|
static async getFullContext(noteId: string): Promise<string> {
|
|
// Use extractContext with default options
|
|
return ContextExtractor.extractContext(noteId);
|
|
}
|
|
|
|
/**
|
|
* Get the full context for a note - instance method
|
|
*/
|
|
async getFullContext(noteId: string): Promise<string> {
|
|
return ContextExtractor.getFullContext(noteId);
|
|
}
|
|
|
|
/**
|
|
* Get note summary - for backward compatibility
|
|
*/
|
|
static async getNoteSummary(noteId: string, maxLength = 5000): Promise<string> {
|
|
const note = becca.getNote(noteId);
|
|
if (!note) return '';
|
|
|
|
const content = await getNoteContent(noteId);
|
|
if (!content || content.length < maxLength) return content || '';
|
|
|
|
// For larger content, generate a summary
|
|
return summarizeContent(content, note.title);
|
|
}
|
|
|
|
/**
|
|
* Get note summary - instance method
|
|
*/
|
|
async getNoteSummary(noteId: string, maxLength = 5000): Promise<string> {
|
|
return ContextExtractor.getNoteSummary(noteId, maxLength);
|
|
}
|
|
|
|
/**
|
|
* Split a large note into smaller, semantically meaningful chunks
|
|
* This is useful for handling large notes that exceed the context window of LLMs
|
|
* For backward compatibility
|
|
*/
|
|
static async getChunkedNoteContent(noteId: string, maxChunkSize = 2000): Promise<string[]> {
|
|
const content = await getNoteContent(noteId);
|
|
if (!content) return [];
|
|
|
|
// Use the new chunking functionality
|
|
const chunks = await ContextExtractor.chunkContent(
|
|
content,
|
|
'',
|
|
noteId,
|
|
{ maxChunkSize, respectBoundaries: true }
|
|
);
|
|
|
|
// Convert to the old API format which was an array of strings
|
|
return (await chunks).map(chunk => chunk.content);
|
|
}
|
|
|
|
/**
|
|
* Split a large note into smaller chunks - instance method
|
|
*/
|
|
async getChunkedNoteContent(noteId: string, maxChunkSize = 2000): Promise<string[]> {
|
|
return ContextExtractor.getChunkedNoteContent(noteId, maxChunkSize);
|
|
}
|
|
}
|
|
|
|
// Export all modules
|
|
export {
|
|
getNoteContent,
|
|
formatNoteContent,
|
|
sanitizeHtmlContent,
|
|
detectLanguage,
|
|
extractCodeStructure,
|
|
chunkContent,
|
|
semanticChunking,
|
|
summarizeContent,
|
|
extractKeyPoints,
|
|
getParentNotes,
|
|
getParentContext,
|
|
getChildContext,
|
|
getLinkedNotesContext,
|
|
getSemanticContext
|
|
};
|
|
|
|
// Export types
|
|
export type {
|
|
ContentChunk,
|
|
ChunkOptions
|
|
};
|