diff --git a/apps/server/src/assets/db/schema.sql b/apps/server/src/assets/db/schema.sql index 887701167..11c0afb5e 100644 --- a/apps/server/src/assets/db/schema.sql +++ b/apps/server/src/assets/db/schema.sql @@ -219,12 +219,22 @@ CREATE TABLE IF NOT EXISTS sessions ( ); -- FTS5 Full-Text Search Support --- Create FTS5 virtual table for full-text searching +-- Create FTS5 virtual table with trigram tokenizer +-- Trigram tokenizer provides language-agnostic substring matching: +-- 1. Fast substring matching (50-100x speedup for LIKE queries without wildcards) +-- 2. Case-insensitive search without custom collation +-- 3. No language-specific stemming assumptions (works for all languages) +-- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes +-- +-- IMPORTANT: Trigram requires minimum 3-character tokens for matching +-- detail='none' reduces index size by ~50% while maintaining MATCH/rank performance +-- (loses position info for highlight() function, but snippet() still works) CREATE VIRTUAL TABLE notes_fts USING fts5( noteId UNINDEXED, title, content, - tokenize = 'porter unicode61' + tokenize = 'trigram', + detail = 'none' ); -- Triggers to keep FTS table synchronized with notes diff --git a/apps/server/src/migrations/0234__add_fts5_search.ts b/apps/server/src/migrations/0234__add_fts5_search.ts index f6f5c0005..9818f578d 100644 --- a/apps/server/src/migrations/0234__add_fts5_search.ts +++ b/apps/server/src/migrations/0234__add_fts5_search.ts @@ -14,23 +14,46 @@ import log from "../services/log.js"; export default function addFTS5SearchAndPerformanceIndexes() { log.info("Starting FTS5 and performance optimization migration..."); - + + // Verify SQLite version supports trigram tokenizer (requires 3.34.0+) + const sqliteVersion = sql.getValue(`SELECT sqlite_version()`); + const [major, minor, patch] = sqliteVersion.split('.').map(Number); + const versionNumber = major * 10000 + minor * 100 + (patch || 0); + const requiredVersion = 3 * 10000 + 34 * 100 + 0; // 3.34.0 + + if (versionNumber < requiredVersion) { + log.error(`SQLite version ${sqliteVersion} does not support trigram tokenizer (requires 3.34.0+)`); + log.info("Skipping FTS5 trigram migration - will use fallback search implementation"); + return; // Skip FTS5 setup, rely on fallback search + } + + log.info(`SQLite version ${sqliteVersion} confirmed - trigram tokenizer available`); + // Part 1: FTS5 Setup log.info("Creating FTS5 virtual table for full-text search..."); // Create FTS5 virtual table // We store noteId, title, and content for searching - // The 'tokenize' option uses porter stemming for better search results sql.executeScript(` -- Drop existing FTS table if it exists (for re-running migration in dev) DROP TABLE IF EXISTS notes_fts; - -- Create FTS5 virtual table + -- Create FTS5 virtual table with trigram tokenizer + -- Trigram tokenizer provides language-agnostic substring matching: + -- 1. Fast substring matching (50-100x speedup for LIKE queries without wildcards) + -- 2. Case-insensitive search without custom collation + -- 3. No language-specific stemming assumptions (works for all languages) + -- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes + -- + -- IMPORTANT: Trigram requires minimum 3-character tokens for matching + -- detail='none' reduces index size by ~50% while maintaining MATCH/rank performance + -- (loses position info for highlight() function, but snippet() still works) CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5( noteId UNINDEXED, title, content, - tokenize = 'porter unicode61' + tokenize = 'trigram', + detail = 'none' ); `); diff --git a/apps/server/src/migrations/0235__sqlite_native_search.ts b/apps/server/src/migrations/0235__sqlite_native_search.ts deleted file mode 100644 index b44419521..000000000 --- a/apps/server/src/migrations/0235__sqlite_native_search.ts +++ /dev/null @@ -1,826 +0,0 @@ -/** - * Migration to add SQLite native search support with normalized text tables - * - * This migration implements Phase 1 of the SQLite-based search plan: - * 1. Creates note_search_content table with normalized text columns - * 2. Creates note_tokens table for word-level token storage - * 3. Adds necessary indexes for optimization - * 4. Creates triggers to keep tables synchronized with note updates - * 5. Populates tables with existing note data in batches - * - * This provides 100% accurate search results with 10-30x performance improvement - * over TypeScript-based search, without the complexity of trigrams. - */ - -import sql from "../services/sql.js"; -import log from "../services/log.js"; -import { normalize as utilsNormalize, stripTags } from "../services/utils.js"; -import { getSqliteFunctionsService } from "../services/search/sqlite_functions.js"; - -/** - * Uses the existing normalize function from utils.ts for consistency - * This ensures all normalization throughout the codebase is identical - */ -function normalizeText(text: string): string { - if (!text) return ''; - return utilsNormalize(text); -} - -/** - * Tokenizes text into individual words for token-based searching - * Handles punctuation and special characters appropriately - */ -function tokenize(text: string): string[] { - if (!text) return []; - - // Split on word boundaries, filter out empty tokens - // This regex splits on spaces, punctuation, and other non-word characters - // but preserves apostrophes within words (e.g., "don't", "it's") - const tokens = text - .split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:_-]+/) - .filter(token => token.length > 0) - .map(token => token.toLowerCase()); - - // Also split on camelCase and snake_case boundaries for code content - const expandedTokens: string[] = []; - for (const token of tokens) { - // Add the original token - expandedTokens.push(token); - - // Split camelCase (e.g., "getUserName" -> ["get", "User", "Name"]) - const camelCaseParts = token.split(/(?=[A-Z])/); - if (camelCaseParts.length > 1) { - expandedTokens.push(...camelCaseParts.map(p => p.toLowerCase())); - } - - // Split snake_case (e.g., "user_name" -> ["user", "name"]) - const snakeCaseParts = token.split('_'); - if (snakeCaseParts.length > 1) { - expandedTokens.push(...snakeCaseParts); - } - } - - // Remove duplicates and return - return Array.from(new Set(expandedTokens)); -} - -/** - * Strips HTML tags from content for text-only indexing - * Uses the utils stripTags function for consistency - */ -function stripHtmlTags(html: string): string { - if (!html) return ''; - - // Remove script and style content entirely first - let text = html.replace(/)<[^<]*)*<\/script>/gi, ''); - text = text.replace(/)<[^<]*)*<\/style>/gi, ''); - - // Use utils stripTags for consistency - text = stripTags(text); - - // Decode HTML entities - text = text.replace(/ /g, ' '); - text = text.replace(/</g, '<'); - text = text.replace(/>/g, '>'); - text = text.replace(/&/g, '&'); - text = text.replace(/"/g, '"'); - text = text.replace(/'/g, "'"); - - // Normalize whitespace - text = text.replace(/\s+/g, ' ').trim(); - - return text; -} - -export default function sqliteNativeSearch() { - log.info("Starting SQLite native search migration..."); - - const startTime = Date.now(); - - // Wrap entire migration in a transaction for atomicity - sql.transactional(() => { - try { - // Register custom SQL functions first so they can be used in triggers - registerCustomFunctions(); - - // Create the search tables and indexes - createSearchTables(); - - // Create triggers to keep tables synchronized (before population) - createSearchTriggers(); - - // Populate the tables with existing note data - populateSearchTables(); - - // Run final verification and optimization - finalizeSearchSetup(); - - const duration = Date.now() - startTime; - log.info(`SQLite native search migration completed successfully in ${duration}ms`); - - } catch (error) { - log.error(`SQLite native search migration failed: ${error}`); - // Transaction will automatically rollback on error - throw error; - } - }); -} - -function createSearchTables() { - log.info("Creating search content and token tables..."); - - // Drop existing tables if they exist (for re-running migration in dev) - sql.execute("DROP TABLE IF EXISTS note_search_content"); - sql.execute("DROP TABLE IF EXISTS note_tokens"); - - // Create the main search content table - sql.execute(` - CREATE TABLE note_search_content ( - noteId TEXT PRIMARY KEY, - title TEXT NOT NULL, - content TEXT NOT NULL, - title_normalized TEXT NOT NULL, - content_normalized TEXT NOT NULL, - full_text_normalized TEXT NOT NULL - ) - `); - - // Create the token table for word-level operations - sql.execute(` - CREATE TABLE note_tokens ( - noteId TEXT NOT NULL, - token TEXT NOT NULL, - token_normalized TEXT NOT NULL, - position INTEGER NOT NULL, - source TEXT NOT NULL CHECK(source IN ('title', 'content')), - PRIMARY KEY (noteId, position, source) - ) - `); - - // Create indexes for search optimization - log.info("Creating search indexes..."); - - // Consolidated indexes - removed redundancy between COLLATE NOCASE and plain indexes - // Using COLLATE NOCASE for case-insensitive searches - sql.execute(` - CREATE INDEX idx_search_title_normalized - ON note_search_content(title_normalized COLLATE NOCASE) - `); - - sql.execute(` - CREATE INDEX idx_search_content_normalized - ON note_search_content(content_normalized COLLATE NOCASE) - `); - - sql.execute(` - CREATE INDEX idx_search_full_text - ON note_search_content(full_text_normalized COLLATE NOCASE) - `); - - // Token indexes - consolidated to avoid redundancy - sql.execute(` - CREATE INDEX idx_tokens_normalized - ON note_tokens(token_normalized COLLATE NOCASE) - `); - - sql.execute(` - CREATE INDEX idx_tokens_noteId - ON note_tokens(noteId) - `); - - // Composite index for token searches with source - sql.execute(` - CREATE INDEX idx_tokens_source_normalized - ON note_tokens(source, token_normalized COLLATE NOCASE) - `); - - log.info("Search tables and indexes created successfully"); -} - -function populateSearchTables() { - log.info("Populating search tables with existing note content..."); - - const batchSize = 100; - let offset = 0; - let totalProcessed = 0; - let totalTokens = 0; - - while (true) { - const notes = sql.getRows<{ - noteId: string; - title: string; - type: string; - mime: string; - content: string | null; - }>(` - SELECT - n.noteId, - n.title, - n.type, - n.mime, - b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.isDeleted = 0 - AND n.isProtected = 0 - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - ORDER BY n.noteId - LIMIT ? OFFSET ? - `, [batchSize, offset]); - - if (notes.length === 0) { - break; - } - - // Process batch of notes - for (const note of notes) { - try { - // Process content based on type - let processedContent = note.content || ''; - - // Strip HTML for text notes - if (note.type === 'text' && note.mime === 'text/html') { - processedContent = stripHtmlTags(processedContent); - } - - // Normalize text for searching using the utils normalize function - const titleNorm = normalizeText(note.title); - const contentNorm = normalizeText(processedContent); - const fullTextNorm = titleNorm + ' ' + contentNorm; - - // Insert into search content table - sql.execute(` - INSERT INTO note_search_content - (noteId, title, content, title_normalized, content_normalized, full_text_normalized) - VALUES (?, ?, ?, ?, ?, ?) - `, [ - note.noteId, - note.title, - processedContent, - titleNorm, - contentNorm, - fullTextNorm - ]); - - // Tokenize title and content separately to track source - const titleTokens = tokenize(note.title); - const contentTokens = tokenize(processedContent); - - let position = 0; - - // Insert title tokens - for (const token of titleTokens) { - if (token.length > 0) { - sql.execute(` - INSERT OR IGNORE INTO note_tokens - (noteId, token, token_normalized, position, source) - VALUES (?, ?, ?, ?, 'title') - `, [note.noteId, token, normalizeText(token), position]); - position++; - totalTokens++; - } - } - - // Insert content tokens with unique positions - for (const token of contentTokens) { - if (token.length > 0) { - sql.execute(` - INSERT OR IGNORE INTO note_tokens - (noteId, token, token_normalized, position, source) - VALUES (?, ?, ?, ?, 'content') - `, [note.noteId, token, normalizeText(token), position]); - position++; - totalTokens++; - } - } - - totalProcessed++; - - } catch (error) { - log.error(`Failed to index note ${note.noteId}: ${error}`); - // Continue with other notes even if one fails - } - } - - offset += batchSize; - - if (totalProcessed % 1000 === 0) { - log.info(`Processed ${totalProcessed} notes, ${totalTokens} tokens for search indexing...`); - } - } - - log.info(`Completed indexing ${totalProcessed} notes with ${totalTokens} total tokens`); -} - -function createSearchTriggers() { - log.info("Creating triggers to keep search tables synchronized..."); - - // Drop existing triggers if they exist - const triggers = [ - 'note_search_insert', - 'note_search_update', - 'note_search_delete', - 'note_search_soft_delete', - 'note_search_undelete', - 'note_search_protect', - 'note_search_unprotect', - 'note_search_blob_insert', - 'note_search_blob_update' - ]; - - for (const trigger of triggers) { - sql.execute(`DROP TRIGGER IF EXISTS ${trigger}`); - } - - // Trigger for INSERT operations on notes - simplified version - sql.execute(` - CREATE TRIGGER note_search_insert - AFTER INSERT ON notes - WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND NEW.isDeleted = 0 - AND NEW.isProtected = 0 - BEGIN - -- Delete any existing entries (for INSERT OR REPLACE) - DELETE FROM note_search_content WHERE noteId = NEW.noteId; - DELETE FROM note_tokens WHERE noteId = NEW.noteId; - - -- Insert basic content with title only (content will be populated by blob trigger) - INSERT INTO note_search_content - (noteId, title, content, title_normalized, content_normalized, full_text_normalized) - VALUES ( - NEW.noteId, - NEW.title, - '', - LOWER(NEW.title), - '', - LOWER(NEW.title) - ); - END - `); - - // Trigger for UPDATE operations on notes - simplified version - sql.execute(` - CREATE TRIGGER note_search_update - AFTER UPDATE ON notes - WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - BEGIN - -- Always delete the old entries - DELETE FROM note_search_content WHERE noteId = NEW.noteId; - DELETE FROM note_tokens WHERE noteId = NEW.noteId; - - -- Re-insert if note is not deleted and not protected - INSERT INTO note_search_content - (noteId, title, content, title_normalized, content_normalized, full_text_normalized) - SELECT - NEW.noteId, - NEW.title, - COALESCE(b.content, ''), - LOWER(NEW.title), - LOWER(COALESCE(b.content, '')), - LOWER(NEW.title || ' ' || COALESCE(b.content, '')) - FROM notes n - LEFT JOIN blobs b ON b.blobId = NEW.blobId - WHERE n.noteId = NEW.noteId - AND NEW.isDeleted = 0 - AND NEW.isProtected = 0; - END - `); - - // Trigger for DELETE operations on notes - sql.execute(` - CREATE TRIGGER note_search_delete - AFTER DELETE ON notes - BEGIN - DELETE FROM note_search_content WHERE noteId = OLD.noteId; - DELETE FROM note_tokens WHERE noteId = OLD.noteId; - END - `); - - // Trigger for soft delete (isDeleted = 1) - sql.execute(` - CREATE TRIGGER note_search_soft_delete - AFTER UPDATE ON notes - WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1 - BEGIN - DELETE FROM note_search_content WHERE noteId = NEW.noteId; - DELETE FROM note_tokens WHERE noteId = NEW.noteId; - END - `); - - // Trigger for undelete (isDeleted = 0) - simplified version - sql.execute(` - CREATE TRIGGER note_search_undelete - AFTER UPDATE ON notes - WHEN OLD.isDeleted = 1 AND NEW.isDeleted = 0 - AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND NEW.isProtected = 0 - BEGIN - DELETE FROM note_search_content WHERE noteId = NEW.noteId; - DELETE FROM note_tokens WHERE noteId = NEW.noteId; - - INSERT INTO note_search_content - (noteId, title, content, title_normalized, content_normalized, full_text_normalized) - SELECT - NEW.noteId, - NEW.title, - COALESCE(b.content, ''), - LOWER(NEW.title), - LOWER(COALESCE(b.content, '')), - LOWER(NEW.title || ' ' || COALESCE(b.content, '')) - FROM notes n - LEFT JOIN blobs b ON b.blobId = NEW.blobId - WHERE n.noteId = NEW.noteId; - END - `); - - // Trigger for notes becoming protected - sql.execute(` - CREATE TRIGGER note_search_protect - AFTER UPDATE ON notes - WHEN OLD.isProtected = 0 AND NEW.isProtected = 1 - BEGIN - DELETE FROM note_search_content WHERE noteId = NEW.noteId; - DELETE FROM note_tokens WHERE noteId = NEW.noteId; - END - `); - - // Trigger for notes becoming unprotected - simplified version - sql.execute(` - CREATE TRIGGER note_search_unprotect - AFTER UPDATE ON notes - WHEN OLD.isProtected = 1 AND NEW.isProtected = 0 - AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND NEW.isDeleted = 0 - BEGIN - DELETE FROM note_search_content WHERE noteId = NEW.noteId; - DELETE FROM note_tokens WHERE noteId = NEW.noteId; - - INSERT INTO note_search_content - (noteId, title, content, title_normalized, content_normalized, full_text_normalized) - SELECT - NEW.noteId, - NEW.title, - COALESCE(b.content, ''), - LOWER(NEW.title), - LOWER(COALESCE(b.content, '')), - LOWER(NEW.title || ' ' || COALESCE(b.content, '')) - FROM notes n - LEFT JOIN blobs b ON b.blobId = NEW.blobId - WHERE n.noteId = NEW.noteId; - END - `); - - // Trigger for INSERT operations on blobs - simplified version - sql.execute(` - CREATE TRIGGER note_search_blob_insert - AFTER INSERT ON blobs - BEGIN - -- Update search content for all notes that reference this blob - UPDATE note_search_content - SET content = NEW.content, - content_normalized = LOWER(NEW.content), - full_text_normalized = title_normalized || ' ' || LOWER(NEW.content) - WHERE noteId IN ( - SELECT n.noteId - FROM notes n - WHERE n.blobId = NEW.blobId - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - ); - - -- Clear tokens for affected notes (will be repopulated by post-processing) - DELETE FROM note_tokens - WHERE noteId IN ( - SELECT n.noteId - FROM notes n - WHERE n.blobId = NEW.blobId - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - ); - END - `); - - // Trigger for UPDATE operations on blobs - simplified version - sql.execute(` - CREATE TRIGGER note_search_blob_update - AFTER UPDATE ON blobs - BEGIN - -- Update search content for all notes that reference this blob - UPDATE note_search_content - SET content = NEW.content, - content_normalized = LOWER(NEW.content), - full_text_normalized = title_normalized || ' ' || LOWER(NEW.content) - WHERE noteId IN ( - SELECT n.noteId - FROM notes n - WHERE n.blobId = NEW.blobId - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - ); - - -- Clear tokens for affected notes (will be repopulated by post-processing) - DELETE FROM note_tokens - WHERE noteId IN ( - SELECT n.noteId - FROM notes n - WHERE n.blobId = NEW.blobId - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - ); - END - `); - - log.info("Search synchronization triggers created successfully"); -} - -function registerCustomFunctions() { - log.info("Registering custom SQL functions for search operations..."); - - try { - // Get the database connection to register functions - const db = sql.getDbConnection(); - - // Use the centralized SQLite functions service - const functionsService = getSqliteFunctionsService(); - - // Register functions if not already registered - if (!functionsService.isRegistered()) { - const success = functionsService.registerFunctions(db); - if (success) { - log.info("Custom SQL functions registered successfully via service"); - } else { - log.info("Custom SQL functions registration failed - using basic SQLite functions only"); - } - } else { - log.info("Custom SQL functions already registered"); - } - - // Register migration-specific helper function for tokenization - db.function('tokenize_for_migration', { - deterministic: true, - varargs: false - }, (text: string | null) => { - if (!text) return ''; - // Return as JSON array string for SQL processing - return JSON.stringify(tokenize(text)); - }); - - } catch (error) { - log.info(`Could not register custom SQL functions (will use basic SQLite functions): ${error}`); - // This is not critical - the migration will work with basic SQLite functions - } -} - -/** - * Populates tokens for a specific note - * This is called outside of triggers to avoid complex SQL within trigger constraints - */ -function populateNoteTokens(noteId: string): number { - try { - // Get the note's search content - const noteData = sql.getRow<{ - title: string; - content: string; - }>(` - SELECT title, content - FROM note_search_content - WHERE noteId = ? - `, [noteId]); - - if (!noteData) return 0; - - // Clear existing tokens for this note - sql.execute(`DELETE FROM note_tokens WHERE noteId = ?`, [noteId]); - - // Tokenize title and content - const titleTokens = tokenize(noteData.title); - const contentTokens = tokenize(noteData.content); - - let position = 0; - let tokenCount = 0; - - // Insert title tokens - for (const token of titleTokens) { - if (token.length > 0) { - sql.execute(` - INSERT OR IGNORE INTO note_tokens - (noteId, token, token_normalized, position, source) - VALUES (?, ?, ?, ?, 'title') - `, [noteId, token, normalizeText(token), position]); - position++; - tokenCount++; - } - } - - // Insert content tokens - for (const token of contentTokens) { - if (token.length > 0) { - sql.execute(` - INSERT OR IGNORE INTO note_tokens - (noteId, token, token_normalized, position, source) - VALUES (?, ?, ?, ?, 'content') - `, [noteId, token, normalizeText(token), position]); - position++; - tokenCount++; - } - } - - return tokenCount; - } catch (error) { - log.error(`Error populating tokens for note ${noteId}: ${error}`); - return 0; - } -} - -/** - * Populates tokens for multiple notes affected by blob operations - * This handles cases where blob triggers can affect multiple notes - */ -function populateBlobAffectedTokens(blobId: string): void { - try { - // Find all notes that reference this blob and need token updates - const affectedNoteIds = sql.getColumn(` - SELECT DISTINCT n.noteId - FROM notes n - INNER JOIN note_search_content nsc ON n.noteId = nsc.noteId - WHERE n.blobId = ? - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - `, [blobId]); - - if (affectedNoteIds.length === 0) return; - - log.info(`Updating tokens for ${affectedNoteIds.length} notes affected by blob ${blobId}`); - - let totalTokens = 0; - for (const noteId of affectedNoteIds) { - const tokenCount = populateNoteTokens(noteId); - totalTokens += tokenCount; - } - - log.info(`Updated ${totalTokens} tokens for blob-affected notes`); - } catch (error) { - log.error(`Error populating blob-affected tokens for blob ${blobId}: ${error}`); - } -} - -function populateAllTokens() { - log.info("Populating tokens for all search content..."); - - // Clear existing tokens first to ensure clean state - sql.execute("DELETE FROM note_tokens"); - - const batchSize = 100; - let offset = 0; - let totalProcessed = 0; - let totalTokens = 0; - - while (true) { - const notes = sql.getRows<{ - noteId: string; - title: string; - content: string; - }>(` - SELECT noteId, title, content - FROM note_search_content - ORDER BY noteId - LIMIT ? OFFSET ? - `, [batchSize, offset]); - - if (notes.length === 0) { - break; - } - - for (const note of notes) { - try { - // Tokenize title and content - const titleTokens = tokenize(note.title); - const contentTokens = tokenize(note.content); - - let position = 0; - - // Insert title tokens - for (const token of titleTokens) { - if (token.length > 0) { - sql.execute(` - INSERT OR IGNORE INTO note_tokens - (noteId, token, token_normalized, position, source) - VALUES (?, ?, ?, ?, 'title') - `, [note.noteId, token, normalizeText(token), position]); - position++; - totalTokens++; - } - } - - // Insert content tokens with continuous position numbering - for (const token of contentTokens) { - if (token.length > 0) { - sql.execute(` - INSERT OR IGNORE INTO note_tokens - (noteId, token, token_normalized, position, source) - VALUES (?, ?, ?, ?, 'content') - `, [note.noteId, token, normalizeText(token), position]); - position++; - totalTokens++; - } - } - - totalProcessed++; - - } catch (error) { - log.error(`Failed to tokenize note ${note.noteId}: ${error}`); - } - } - - offset += batchSize; - - if (totalProcessed % 1000 === 0) { - log.info(`Processed ${totalProcessed} notes, ${totalTokens} tokens so far...`); - } - } - - log.info(`Token population completed: ${totalProcessed} notes processed, ${totalTokens} total tokens`); -} - -function finalizeSearchSetup() { - log.info("Running final verification and optimization..."); - - // Check for missing notes that should be indexed - const missingCount = sql.getValue(` - SELECT COUNT(*) FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM note_search_content WHERE noteId = n.noteId) - `) || 0; - - if (missingCount > 0) { - log.info(`Found ${missingCount} notes that are missing from search index`); - - // Index missing notes using basic SQLite functions - sql.execute(` - INSERT INTO note_search_content - (noteId, title, content, title_normalized, content_normalized, full_text_normalized) - SELECT - n.noteId, - n.title, - COALESCE(b.content, ''), - LOWER(n.title), - LOWER(COALESCE(b.content, '')), - LOWER(n.title || ' ' || COALESCE(b.content, '')) - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM note_search_content WHERE noteId = n.noteId) - `); - - log.info(`Indexed ${missingCount} missing notes`); - } - - // Populate tokens for all existing content (including any missing notes we just added) - populateAllTokens(); - - // Verify table creation - const tables = sql.getColumn(` - SELECT name FROM sqlite_master - WHERE type = 'table' - AND name IN ('note_search_content', 'note_tokens') - `); - - if (tables.length !== 2) { - throw new Error("Search tables were not created properly"); - } - - // Check row counts - const searchContentCount = sql.getValue("SELECT COUNT(*) FROM note_search_content") || 0; - const tokenCount = sql.getValue("SELECT COUNT(*) FROM note_tokens") || 0; - - log.info(`Search content table has ${searchContentCount} entries`); - log.info(`Token table has ${tokenCount} entries`); - - // Run ANALYZE to update SQLite query planner statistics - log.info("Updating SQLite statistics for query optimization..."); - sql.execute("ANALYZE note_search_content"); - sql.execute("ANALYZE note_tokens"); - - // Verify indexes were created - const indexes = sql.getColumn(` - SELECT name FROM sqlite_master - WHERE type = 'index' - AND tbl_name IN ('note_search_content', 'note_tokens') - `); - - log.info(`Created ${indexes.length} indexes for search optimization`); - - log.info("Search setup finalization completed"); -} \ No newline at end of file diff --git a/apps/server/src/migrations/0236__cleanup_sqlite_search.ts b/apps/server/src/migrations/0236__cleanup_sqlite_search.ts new file mode 100644 index 000000000..933e33d50 --- /dev/null +++ b/apps/server/src/migrations/0236__cleanup_sqlite_search.ts @@ -0,0 +1,47 @@ +/** + * Migration to clean up custom SQLite search implementation + * + * This migration removes tables and triggers created by migration 0235 + * which implemented a custom SQLite-based search system. That system + * has been replaced by FTS5 with trigram tokenizer (migration 0234), + * making these custom tables redundant. + * + * Tables removed: + * - note_search_content: Stored normalized note content for custom search + * - note_tokens: Stored tokenized words for custom token-based search + * + * This migration is safe to run on databases that: + * 1. Never ran migration 0235 (tables don't exist) + * 2. Already ran migration 0235 (tables will be dropped) + */ + +import sql from "../services/sql.js"; +import log from "../services/log.js"; + +export default function cleanupSqliteSearch() { + log.info("Starting SQLite custom search cleanup migration..."); + + try { + sql.transactional(() => { + // Drop custom search tables if they exist + log.info("Dropping note_search_content table..."); + sql.executeScript(`DROP TABLE IF EXISTS note_search_content`); + + log.info("Dropping note_tokens table..."); + sql.executeScript(`DROP TABLE IF EXISTS note_tokens`); + + // Clean up any entity changes for these tables + // This prevents sync issues and cleans up change tracking + log.info("Cleaning up entity changes for removed tables..."); + sql.execute(` + DELETE FROM entity_changes + WHERE entityName IN ('note_search_content', 'note_tokens') + `); + + log.info("SQLite custom search cleanup completed successfully"); + }); + } catch (error) { + log.error(`Error during SQLite search cleanup: ${error}`); + throw new Error(`Failed to clean up SQLite search tables: ${error}`); + } +} diff --git a/apps/server/src/migrations/migrations.ts b/apps/server/src/migrations/migrations.ts index 6cab184f6..feafd4bc4 100644 --- a/apps/server/src/migrations/migrations.ts +++ b/apps/server/src/migrations/migrations.ts @@ -6,10 +6,10 @@ // Migrations should be kept in descending order, so the latest migration is first. const MIGRATIONS: (SqlMigration | JsMigration)[] = [ - // Add SQLite native search with normalized text tables + // Clean up custom SQLite search tables (replaced by FTS5 trigram) { - version: 235, - module: async () => import("./0235__sqlite_native_search.js") + version: 236, + module: async () => import("./0236__cleanup_sqlite_search.js") }, // Add FTS5 full-text search support and strategic performance indexes { diff --git a/apps/server/src/routes/api/search_admin.ts b/apps/server/src/routes/api/search_admin.ts deleted file mode 100644 index 394d097b2..000000000 --- a/apps/server/src/routes/api/search_admin.ts +++ /dev/null @@ -1,243 +0,0 @@ -/** - * API endpoints for search administration and monitoring - */ - -import { Router } from "express"; -import performanceMonitor from "../../services/search/performance_monitor.js"; -import abTestingService from "../../services/search/ab_testing.js"; -import { SQLiteSearchService } from "../../services/search/sqlite_search_service.js"; -import optionService from "../../services/options.js"; -import sql from "../../services/sql.js"; -import log from "../../services/log.js"; - -const router = Router(); - -/** - * Get search performance metrics - */ -router.get("/api/search-admin/metrics", (req, res) => { - const metrics = { - recent: performanceMonitor.getRecentMetrics(100), - averages: { - typescript: performanceMonitor.getAverageMetrics("typescript"), - sqlite: performanceMonitor.getAverageMetrics("sqlite") - }, - comparison: performanceMonitor.compareBackends() - }; - - res.json(metrics); -}); - -/** - * Get A/B testing results - */ -router.get("/api/search-admin/ab-tests", (req, res) => { - const results = { - summary: abTestingService.getSummary(), - recent: abTestingService.getRecentResults(50) - }; - - res.json(results); -}); - -/** - * Get current search configuration - */ -router.get("/api/search-admin/config", (req, res) => { - const config = { - backend: optionService.getOption("searchBackend"), - sqliteEnabled: optionService.getOptionBool("searchSqliteEnabled"), - performanceLogging: optionService.getOptionBool("searchSqlitePerformanceLogging"), - maxMemory: optionService.getOptionInt("searchSqliteMaxMemory"), - batchSize: optionService.getOptionInt("searchSqliteBatchSize"), - autoRebuild: optionService.getOptionBool("searchSqliteAutoRebuild") - }; - - res.json(config); -}); - -/** - * Update search configuration - */ -router.put("/api/search-admin/config", (req, res) => { - try { - const { backend, sqliteEnabled, performanceLogging, maxMemory, batchSize, autoRebuild } = req.body; - - if (backend !== undefined) { - if (!["typescript", "sqlite"].includes(backend)) { - return res.status(400).json({ error: "Invalid backend. Must be 'typescript' or 'sqlite'" }); - } - optionService.setOption("searchBackend", backend); - } - - if (sqliteEnabled !== undefined) { - optionService.setOption("searchSqliteEnabled", sqliteEnabled ? "true" : "false"); - } - - if (performanceLogging !== undefined) { - optionService.setOption("searchSqlitePerformanceLogging", performanceLogging ? "true" : "false"); - performanceMonitor.updateSettings(); - } - - if (maxMemory !== undefined) { - if (maxMemory < 1048576 || maxMemory > 1073741824) { // 1MB to 1GB - return res.status(400).json({ error: "Max memory must be between 1MB and 1GB" }); - } - optionService.setOption("searchSqliteMaxMemory", maxMemory.toString()); - } - - if (batchSize !== undefined) { - if (batchSize < 10 || batchSize > 1000) { - return res.status(400).json({ error: "Batch size must be between 10 and 1000" }); - } - optionService.setOption("searchSqliteBatchSize", batchSize.toString()); - } - - if (autoRebuild !== undefined) { - optionService.setOption("searchSqliteAutoRebuild", autoRebuild ? "true" : "false"); - } - - res.json({ success: true, message: "Configuration updated successfully" }); - } catch (error: any) { - log.error(`Failed to update search configuration: ${error}`); - res.status(500).json({ error: error.message }); - } -}); - -/** - * Get SQLite search index status - */ -router.get("/api/search-admin/sqlite/status", async (req, res) => { - try { - const service = SQLiteSearchService.getInstance(); - const status = await service.getIndexStatus(); - - // Add table sizes - const tableSizes = sql.getRows<{ name: string; size: number }>(` - SELECT - name, - (SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=m.name) as size - FROM sqlite_master m - WHERE type='table' AND name IN ('note_search_content', 'note_tokens', 'notes_fts', 'notes_fts_data', 'notes_fts_idx', 'notes_fts_content') - `); - - res.json({ - ...status, - tables: tableSizes - }); - } catch (error: any) { - log.error(`Failed to get SQLite search status: ${error}`); - res.status(500).json({ error: error.message }); - } -}); - -/** - * Rebuild SQLite search index - */ -router.post("/api/search-admin/sqlite/rebuild", async (req, res) => { - try { - const { force = false } = req.body; - - log.info("Starting SQLite search index rebuild via API"); - - const service = SQLiteSearchService.getInstance(); - const startTime = Date.now(); - - await service.rebuildIndex(force); - - const duration = Date.now() - startTime; - log.info(`SQLite search index rebuild completed in ${duration}ms`); - - res.json({ - success: true, - message: "Index rebuilt successfully", - duration - }); - } catch (error: any) { - log.error(`Failed to rebuild SQLite search index: ${error}`); - res.status(500).json({ error: error.message }); - } -}); - -/** - * Clear SQLite search index - */ -router.delete("/api/search-admin/sqlite/index", async (req, res) => { - try { - log.info("Clearing SQLite search index via API"); - - const service = SQLiteSearchService.getInstance(); - service.clearIndex(); - - res.json({ - success: true, - message: "Index cleared successfully" - }); - } catch (error: any) { - log.error(`Failed to clear SQLite search index: ${error}`); - res.status(500).json({ error: error.message }); - } -}); - -/** - * Reset performance metrics - */ -router.delete("/api/search-admin/metrics", (req, res) => { - performanceMonitor.reset(); - res.json({ success: true, message: "Metrics reset successfully" }); -}); - -/** - * Reset A/B test results - */ -router.delete("/api/search-admin/ab-tests", (req, res) => { - abTestingService.reset(); - res.json({ success: true, message: "A/B test results reset successfully" }); -}); - -/** - * Set A/B testing sample rate - */ -router.put("/api/search-admin/ab-tests/sample-rate", (req, res) => { - try { - const { rate } = req.body; - - if (rate === undefined || rate < 0 || rate > 1) { - return res.status(400).json({ error: "Sample rate must be between 0 and 1" }); - } - - abTestingService.setSampleRate(rate); - res.json({ success: true, message: `Sample rate set to ${rate * 100}%` }); - } catch (error: any) { - res.status(500).json({ error: error.message }); - } -}); - -/** - * Test search with both backends for comparison - */ -router.post("/api/search-admin/test", async (req, res) => { - try { - const { query } = req.body; - - if (!query) { - return res.status(400).json({ error: "Query is required" }); - } - - const result = await abTestingService.runComparison(query, {}); - - if (!result) { - return res.json({ - message: "Test not run (sampling or disabled)", - query - }); - } - - res.json(result); - } catch (error: any) { - log.error(`Search test failed: ${error}`); - res.status(500).json({ error: error.message }); - } -}); - -export default router; \ No newline at end of file diff --git a/apps/server/src/routes/routes.ts b/apps/server/src/routes/routes.ts index 387db2f1f..9ba6b686c 100644 --- a/apps/server/src/routes/routes.ts +++ b/apps/server/src/routes/routes.ts @@ -40,7 +40,6 @@ import scriptRoute from "./api/script.js"; import senderRoute from "./api/sender.js"; import filesRoute from "./api/files.js"; import searchRoute from "./api/search.js"; -import searchAdminRoute from "./api/search_admin.js"; import bulkActionRoute from "./api/bulk_action.js"; import specialNotesRoute from "./api/special_notes.js"; import noteMapRoute from "./api/note_map.js"; @@ -261,9 +260,6 @@ function register(app: express.Application) { apiRoute(GET, "/api/search/:searchString", searchRoute.search); apiRoute(GET, "/api/search-templates", searchRoute.searchTemplates); - // Search administration routes - app.use(searchAdminRoute); - apiRoute(PST, "/api/bulk-action/execute", bulkActionRoute.execute); apiRoute(PST, "/api/bulk-action/affected-notes", bulkActionRoute.getAffectedNoteCount); diff --git a/apps/server/src/services/options_init.ts b/apps/server/src/services/options_init.ts index be9cb01c7..c6e0231c5 100644 --- a/apps/server/src/services/options_init.ts +++ b/apps/server/src/services/options_init.ts @@ -215,14 +215,6 @@ const defaultOptions: DefaultOption[] = [ { name: "aiSystemPrompt", value: "", isSynced: true }, { name: "aiSelectedProvider", value: "openai", isSynced: true }, - // Search configuration - { name: "searchBackend", value: "typescript", isSynced: false }, // "typescript" or "sqlite" - { name: "searchSqliteEnabled", value: "false", isSynced: false }, - { name: "searchSqlitePerformanceLogging", value: "false", isSynced: false }, - { name: "searchSqliteMaxMemory", value: "67108864", isSynced: false }, // 64MB default - { name: "searchSqliteBatchSize", value: "100", isSynced: false }, - { name: "searchSqliteAutoRebuild", value: "true", isSynced: false }, - { name: "seenCallToActions", value: "[]", isSynced: true } ]; diff --git a/apps/server/src/services/search/ab_testing.ts b/apps/server/src/services/search/ab_testing.ts deleted file mode 100644 index 33465d746..000000000 --- a/apps/server/src/services/search/ab_testing.ts +++ /dev/null @@ -1,218 +0,0 @@ -/** - * A/B Testing utilities for comparing search backend performance - */ - -import SearchContext from "./search_context.js"; -import type { SearchParams } from "./services/types.js"; -import performanceMonitor from "./performance_monitor.js"; -import log from "../log.js"; -import optionService from "../options.js"; - -export interface ABTestResult { - query: string; - typescriptTime: number; - sqliteTime: number; - typescriptResults: number; - sqliteResults: number; - resultsMatch: boolean; - speedup: number; - winner: "typescript" | "sqlite" | "tie"; -} - -class ABTestingService { - private enabled: boolean = false; - private sampleRate: number = 0.1; // 10% of searches by default - private results: ABTestResult[] = []; - private maxResults: number = 1000; - - constructor() { - this.updateSettings(); - } - - updateSettings() { - try { - this.enabled = optionService.getOptionBool("searchSqliteEnabled"); - // Could add a separate AB testing option if needed - } catch { - this.enabled = false; - } - } - - /** - * Determines if we should run an A/B test for this query - */ - shouldRunTest(): boolean { - if (!this.enabled) { - return false; - } - - // Random sampling - return Math.random() < this.sampleRate; - } - - /** - * Run the same search query with both backends and compare results - */ - async runComparison(query: string, params: SearchParams): Promise { - if (!this.shouldRunTest()) { - return null; - } - - try { - // Dynamically import to avoid circular dependencies - const searchModule = await import("./services/search.js"); - - // Run with TypeScript backend - const tsContext = new SearchContext({ ...params, forceBackend: "typescript" }); - const tsTimer = performanceMonitor.startTimer(); - const tsResults = searchModule.default.findResultsWithQuery(query, tsContext); - const tsTime = tsTimer(); - - // Run with SQLite backend - const sqliteContext = new SearchContext({ ...params, forceBackend: "sqlite" }); - const sqliteTimer = performanceMonitor.startTimer(); - const sqliteResults = searchModule.default.findResultsWithQuery(query, sqliteContext); - const sqliteTime = sqliteTimer(); - - // Compare results - const tsNoteIds = new Set(tsResults.map(r => r.noteId)); - const sqliteNoteIds = new Set(sqliteResults.map(r => r.noteId)); - - // Check if results match (same notes found) - const resultsMatch = tsNoteIds.size === sqliteNoteIds.size && - [...tsNoteIds].every(id => sqliteNoteIds.has(id)); - - // Calculate speedup - const speedup = tsTime / sqliteTime; - - // Determine winner - let winner: "typescript" | "sqlite" | "tie"; - if (speedup > 1.2) { - winner = "sqlite"; - } else if (speedup < 0.83) { - winner = "typescript"; - } else { - winner = "tie"; - } - - const result: ABTestResult = { - query: query.substring(0, 100), - typescriptTime: tsTime, - sqliteTime: sqliteTime, - typescriptResults: tsResults.length, - sqliteResults: sqliteResults.length, - resultsMatch, - speedup, - winner - }; - - this.recordResult(result); - - // Log significant differences - if (!resultsMatch) { - log.info(`A/B test found different results for query "${query.substring(0, 50)}": TS=${tsResults.length}, SQLite=${sqliteResults.length}`); - } - - if (Math.abs(speedup - 1) > 0.5) { - log.info(`A/B test significant performance difference: ${winner} is ${Math.abs(speedup - 1).toFixed(1)}x faster for query "${query.substring(0, 50)}"`); - } - - return result; - } catch (error) { - log.error(`A/B test failed: ${error}`); - return null; - } - } - - private recordResult(result: ABTestResult) { - this.results.push(result); - - // Keep only the last N results - if (this.results.length > this.maxResults) { - this.results = this.results.slice(-this.maxResults); - } - } - - /** - * Get summary statistics from A/B tests - */ - getSummary(): { - totalTests: number; - avgSpeedup: number; - typescriptWins: number; - sqliteWins: number; - ties: number; - mismatchRate: number; - recommendation: string; - } { - if (this.results.length === 0) { - return { - totalTests: 0, - avgSpeedup: 1, - typescriptWins: 0, - sqliteWins: 0, - ties: 0, - mismatchRate: 0, - recommendation: "No A/B test data available" - }; - } - - const totalTests = this.results.length; - const avgSpeedup = this.results.reduce((sum, r) => sum + r.speedup, 0) / totalTests; - const typescriptWins = this.results.filter(r => r.winner === "typescript").length; - const sqliteWins = this.results.filter(r => r.winner === "sqlite").length; - const ties = this.results.filter(r => r.winner === "tie").length; - const mismatches = this.results.filter(r => !r.resultsMatch).length; - const mismatchRate = mismatches / totalTests; - - let recommendation: string; - if (mismatchRate > 0.1) { - recommendation = "High mismatch rate detected - SQLite search may have accuracy issues"; - } else if (avgSpeedup > 1.5) { - recommendation = `SQLite is ${avgSpeedup.toFixed(1)}x faster on average - consider enabling`; - } else if (avgSpeedup < 0.67) { - recommendation = `TypeScript is ${(1/avgSpeedup).toFixed(1)}x faster on average - keep using TypeScript`; - } else { - recommendation = "Both backends perform similarly - choice depends on other factors"; - } - - return { - totalTests, - avgSpeedup, - typescriptWins, - sqliteWins, - ties, - mismatchRate, - recommendation - }; - } - - /** - * Get recent test results - */ - getRecentResults(count: number = 100): ABTestResult[] { - return this.results.slice(-count); - } - - /** - * Clear all test results - */ - reset() { - this.results = []; - } - - /** - * Set the sampling rate for A/B tests - */ - setSampleRate(rate: number) { - if (rate < 0 || rate > 1) { - throw new Error("Sample rate must be between 0 and 1"); - } - this.sampleRate = rate; - } -} - -// Singleton instance -const abTestingService = new ABTestingService(); - -export default abTestingService; \ No newline at end of file diff --git a/apps/server/src/services/search/expressions/note_content_sqlite.ts b/apps/server/src/services/search/expressions/note_content_sqlite.ts deleted file mode 100644 index ac3f7653d..000000000 --- a/apps/server/src/services/search/expressions/note_content_sqlite.ts +++ /dev/null @@ -1,155 +0,0 @@ -/** - * SQLite-based Note Content Fulltext Expression - * - * This is a drop-in replacement for NoteContentFulltextExp that uses - * the SQLite search service for dramatically improved performance. - * It maintains 100% compatibility with the existing API while providing - * 10-30x speed improvements. - */ - -import type SearchContext from "../search_context.js"; -import Expression from "./expression.js"; -import NoteSet from "../note_set.js"; -import log from "../../log.js"; -import becca from "../../../becca/becca.js"; -import { getSQLiteSearchService, type SearchOptions } from "../sqlite_search_service.js"; - -const ALLOWED_OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", "%=", "~=", "~*"]); - -interface ConstructorOpts { - tokens: string[]; - raw?: boolean; - flatText?: boolean; -} - -/** - * SQLite-optimized implementation of note content fulltext search - */ -class NoteContentSQLiteExp extends Expression { - private operator: string; - tokens: string[]; - private raw: boolean; - private flatText: boolean; - private sqliteService = getSQLiteSearchService(); - - constructor(operator: string, { tokens, raw, flatText }: ConstructorOpts) { - super(); - - if (!operator || !tokens || !Array.isArray(tokens)) { - throw new Error('Invalid parameters: operator and tokens are required'); - } - - this.operator = operator; - this.tokens = tokens; - this.raw = !!raw; - this.flatText = !!flatText; - } - - execute(inputNoteSet: NoteSet, executionContext: {}, searchContext: SearchContext) { - if (!ALLOWED_OPERATORS.has(this.operator)) { - searchContext.addError(`Note content can be searched only with operators: ${Array.from(ALLOWED_OPERATORS).join(", ")}, operator ${this.operator} given.`); - return inputNoteSet; - } - - const resultNoteSet = new NoteSet(); - const startTime = Date.now(); - - try { - // Prepare search options - const searchOptions: SearchOptions = { - includeProtected: searchContext.includeArchivedNotes, - includeDeleted: false, - limit: searchContext.limit || undefined - }; - - // If we have an input note set, use it as a filter - if (inputNoteSet.notes.length > 0) { - searchOptions.noteIdFilter = new Set(inputNoteSet.getNoteIds()); - } - - // Map ~* operator to ~= for SQLite service - const mappedOperator = this.operator === "~*" ? "~=" : this.operator; - - // Execute SQLite search - const noteIds = this.sqliteService.search( - this.tokens, - mappedOperator, - searchContext, - searchOptions - ); - - // Build result note set from note IDs - for (const noteId of noteIds) { - const note = becca.notes[noteId]; - if (note) { - resultNoteSet.add(note); - } - } - - // Log performance if enabled - const elapsed = Date.now() - startTime; - if (searchContext.debug) { - log.info(`SQLite search completed: operator=${this.operator}, tokens=${this.tokens.join(" ")}, ` + - `results=${noteIds.size}, time=${elapsed}ms`); - } - - // Store highlighted tokens for UI - if (noteIds.size > 0) { - searchContext.highlightedTokens = this.tokens; - } - - } catch (error) { - log.error(`SQLite search failed: ${error}`); - searchContext.addError(`Search failed: ${error}`); - - // On error, return input set unchanged - return inputNoteSet; - } - - return resultNoteSet; - } - - /** - * Get performance statistics for monitoring - */ - getStatistics() { - return this.sqliteService.getStatistics(); - } - - /** - * Check if SQLite search is available - */ - static isAvailable(): boolean { - const service = getSQLiteSearchService(); - const stats = service.getStatistics(); - return stats.tablesInitialized; - } - - /** - * Create a compatible expression based on availability - * This allows gradual migration from the old implementation - */ - static createExpression(operator: string, opts: ConstructorOpts): Expression { - if (NoteContentSQLiteExp.isAvailable()) { - return new NoteContentSQLiteExp(operator, opts); - } else { - // Fall back to original implementation if SQLite not ready - // This would import the original NoteContentFulltextExp - log.info("SQLite search not available, using fallback implementation"); - - // Dynamic import to avoid circular dependency - const NoteContentFulltextExp = require("./note_content_fulltext.js").default; - return new NoteContentFulltextExp(operator, opts); - } - } -} - -export default NoteContentSQLiteExp; - -/** - * Factory function for creating search expressions - * This can be used as a drop-in replacement in the expression builder - */ -export function createNoteContentExpression(operator: string, opts: ConstructorOpts): Expression { - return NoteContentSQLiteExp.createExpression(operator, opts); -} \ No newline at end of file diff --git a/apps/server/src/services/search/fts_blob_deduplication.test.ts.disabled b/apps/server/src/services/search/fts_blob_deduplication.test.ts.disabled deleted file mode 100644 index 399d7af85..000000000 --- a/apps/server/src/services/search/fts_blob_deduplication.test.ts.disabled +++ /dev/null @@ -1,405 +0,0 @@ -/** - * Tests for FTS5 blob deduplication scenarios - * - * This test file validates that FTS indexing works correctly when: - * 1. Multiple notes share the same blob (deduplication) - * 2. Notes change content to match existing blobs - * 3. Blobs are updated and affect multiple notes - * 4. Notes switch between unique and shared blobs - */ - -import { describe, it, expect, beforeEach, afterEach } from 'vitest'; -import sql from '../sql.js'; -import beccaLoader from '../../becca/becca_loader.js'; -import noteService from '../notes.js'; -import searchService from './services/search.js'; -import { ftsSearchService } from './fts_search.js'; - -describe('FTS5 Blob Deduplication Tests', () => { - beforeEach(() => { - // Ensure we have a clean test database with FTS enabled - sql.execute("DELETE FROM notes WHERE noteId LIKE 'test_%'"); - sql.execute("DELETE FROM blobs WHERE blobId LIKE 'test_%'"); - sql.execute("DELETE FROM notes_fts WHERE noteId LIKE 'test_%'"); - - // Reload becca to ensure cache is in sync - beccaLoader.load(); - }); - - afterEach(() => { - // Clean up test data - sql.execute("DELETE FROM notes WHERE noteId LIKE 'test_%'"); - sql.execute("DELETE FROM blobs WHERE blobId LIKE 'test_%'"); - sql.execute("DELETE FROM notes_fts WHERE noteId LIKE 'test_%'"); - }); - - describe('Blob Deduplication Scenarios', () => { - it('should index multiple notes sharing the same blob', async () => { - // Create first note with unique content - const note1 = await noteService.createNewNote({ - noteId: 'test_note1', - parentNoteId: 'root', - title: 'Test Note 1', - content: 'Shared content for deduplication test', - type: 'text' - }); - - // Create second note with the same content (will share blob) - const note2 = await noteService.createNewNote({ - noteId: 'test_note2', - parentNoteId: 'root', - title: 'Test Note 2', - content: 'Shared content for deduplication test', - type: 'text' - }); - - // Verify both notes share the same blob - const blob1 = sql.getRow("SELECT blobId FROM notes WHERE noteId = ?", ['test_note1']); - const blob2 = sql.getRow("SELECT blobId FROM notes WHERE noteId = ?", ['test_note2']); - expect(blob1.blobId).toBe(blob2.blobId); - - // Verify both notes are indexed in FTS - const ftsCount = sql.getValue( - "SELECT COUNT(*) FROM notes_fts WHERE noteId IN (?, ?)", - ['test_note1', 'test_note2'] - ); - expect(ftsCount).toBe(2); - - // Search should find both notes - const searchResults = searchService.searchNotes('deduplication'); - const foundNoteIds = searchResults.map(r => r.noteId); - expect(foundNoteIds).toContain('test_note1'); - expect(foundNoteIds).toContain('test_note2'); - }); - - it('should update FTS when note content changes to match existing blob', async () => { - // Create first note with unique content - const note1 = await noteService.createNewNote({ - noteId: 'test_note3', - parentNoteId: 'root', - title: 'Note with existing content', - content: 'This is existing content in the database', - type: 'text' - }); - - // Create second note with different content - const note2 = await noteService.createNewNote({ - noteId: 'test_note4', - parentNoteId: 'root', - title: 'Note with different content', - content: 'This is completely different content', - type: 'text' - }); - - // Verify notes have different blobs initially - const initialBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note3']); - const initialBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note4']); - expect(initialBlob1).not.toBe(initialBlob2); - - // Change note2's content to match note1 (deduplication occurs) - await noteService.updateNoteContent('test_note4', 'This is existing content in the database'); - - // Verify both notes now share the same blob - const finalBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note3']); - const finalBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note4']); - expect(finalBlob1).toBe(finalBlob2); - - // Verify FTS is updated correctly for note2 - const ftsContent = sql.getValue( - "SELECT content FROM notes_fts WHERE noteId = ?", - ['test_note4'] - ); - expect(ftsContent).toBe('This is existing content in the database'); - - // Search for old content should not find note2 - const oldContentSearch = searchService.searchNotes('completely different'); - const oldSearchIds = oldContentSearch.map(r => r.noteId); - expect(oldSearchIds).not.toContain('test_note4'); - - // Search for new content should find both notes - const newContentSearch = searchService.searchNotes('existing content'); - const newSearchIds = newContentSearch.map(r => r.noteId); - expect(newSearchIds).toContain('test_note3'); - expect(newSearchIds).toContain('test_note4'); - }); - - it('should update all notes when shared blob content changes', async () => { - // Create three notes with the same content - const sharedContent = 'Original shared content for blob update test'; - - await noteService.createNewNote({ - noteId: 'test_note5', - parentNoteId: 'root', - title: 'Shared Note 1', - content: sharedContent, - type: 'text' - }); - - await noteService.createNewNote({ - noteId: 'test_note6', - parentNoteId: 'root', - title: 'Shared Note 2', - content: sharedContent, - type: 'text' - }); - - await noteService.createNewNote({ - noteId: 'test_note7', - parentNoteId: 'root', - title: 'Shared Note 3', - content: sharedContent, - type: 'text' - }); - - // Verify all three share the same blob - const blobIds = sql.getColumn( - "SELECT DISTINCT blobId FROM notes WHERE noteId IN (?, ?, ?)", - ['test_note5', 'test_note6', 'test_note7'] - ); - expect(blobIds.length).toBe(1); - const sharedBlobId = blobIds[0]; - - // Update the blob content directly (simulating what would happen in real update) - sql.execute( - "UPDATE blobs SET content = ? WHERE blobId = ?", - ['Updated shared content for all notes', sharedBlobId] - ); - - // Verify FTS is updated for all three notes - const ftsContents = sql.getColumn( - "SELECT content FROM notes_fts WHERE noteId IN (?, ?, ?) ORDER BY noteId", - ['test_note5', 'test_note6', 'test_note7'] - ); - - expect(ftsContents).toHaveLength(3); - ftsContents.forEach(content => { - expect(content).toBe('Updated shared content for all notes'); - }); - - // Search for old content should find nothing - const oldSearch = searchService.searchNotes('Original shared'); - expect(oldSearch.filter(r => r.noteId.startsWith('test_'))).toHaveLength(0); - - // Search for new content should find all three - const newSearch = searchService.searchNotes('Updated shared'); - const foundIds = newSearch.map(r => r.noteId).filter(id => id.startsWith('test_')); - expect(foundIds).toContain('test_note5'); - expect(foundIds).toContain('test_note6'); - expect(foundIds).toContain('test_note7'); - }); - - it('should handle note switching from shared to unique blob', async () => { - // Create two notes with shared content - const sharedContent = 'Shared content before divergence'; - - const note1 = await noteService.createNewNote({ - noteId: 'test_note8', - parentNoteId: 'root', - title: 'Diverging Note 1', - content: sharedContent, - type: 'text' - }); - - const note2 = await noteService.createNewNote({ - noteId: 'test_note9', - parentNoteId: 'root', - title: 'Diverging Note 2', - content: sharedContent, - type: 'text' - }); - - // Verify they share the same blob - const initialBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note8']); - const initialBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note9']); - expect(initialBlob1).toBe(initialBlob2); - - // Change note2 to unique content - await noteService.updateNoteContent('test_note9', 'Unique content after divergence'); - - // Verify they now have different blobs - const finalBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note8']); - const finalBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note9']); - expect(finalBlob1).not.toBe(finalBlob2); - - // Verify FTS is correctly updated - const ftsContent1 = sql.getValue( - "SELECT content FROM notes_fts WHERE noteId = ?", - ['test_note8'] - ); - const ftsContent2 = sql.getValue( - "SELECT content FROM notes_fts WHERE noteId = ?", - ['test_note9'] - ); - - expect(ftsContent1).toBe('Shared content before divergence'); - expect(ftsContent2).toBe('Unique content after divergence'); - - // Search should find correct notes - const sharedSearch = searchService.searchNotes('before divergence'); - expect(sharedSearch.map(r => r.noteId)).toContain('test_note8'); - expect(sharedSearch.map(r => r.noteId)).not.toContain('test_note9'); - - const uniqueSearch = searchService.searchNotes('after divergence'); - expect(uniqueSearch.map(r => r.noteId)).not.toContain('test_note8'); - expect(uniqueSearch.map(r => r.noteId)).toContain('test_note9'); - }); - - it('should handle import scenarios where notes exist before blobs', async () => { - // Simulate import scenario: create note without blob first - sql.execute(` - INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified) - VALUES ('test_note10', 'Import Test Note', 'text', 'text/html', 'pending_blob_123', 0, 0, datetime('now'), datetime('now'), datetime('now'), datetime('now')) - `); - - // Verify note is not in FTS yet (no blob content) - const initialFts = sql.getValue( - "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", - ['test_note10'] - ); - expect(initialFts).toBe(0); - - // Now create the blob (simulating delayed blob creation during import) - sql.execute(` - INSERT INTO blobs (blobId, content, dateModified, utcDateModified) - VALUES ('pending_blob_123', 'Imported content finally available', datetime('now'), datetime('now')) - `); - - // Verify note is now indexed in FTS - const finalFts = sql.getValue( - "SELECT content FROM notes_fts WHERE noteId = ?", - ['test_note10'] - ); - expect(finalFts).toBe('Imported content finally available'); - - // Search should now find the note - const searchResults = searchService.searchNotes('Imported content'); - expect(searchResults.map(r => r.noteId)).toContain('test_note10'); - }); - - it('should correctly handle protected notes during deduplication', async () => { - // Create a regular note - const note1 = await noteService.createNewNote({ - noteId: 'test_note11', - parentNoteId: 'root', - title: 'Regular Note', - content: 'Content that will be shared', - type: 'text' - }); - - // Create a protected note with same content - sql.execute(` - INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified) - VALUES ('test_note12', 'Protected Note', 'text', 'text/html', - (SELECT blobId FROM notes WHERE noteId = 'test_note11'), - 0, 1, datetime('now'), datetime('now'), datetime('now'), datetime('now')) - `); - - // Verify protected note is NOT in FTS - const protectedInFts = sql.getValue( - "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", - ['test_note12'] - ); - expect(protectedInFts).toBe(0); - - // Verify regular note IS in FTS - const regularInFts = sql.getValue( - "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", - ['test_note11'] - ); - expect(regularInFts).toBe(1); - - // Update blob content - const blobId = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note11']); - sql.execute("UPDATE blobs SET content = ? WHERE blobId = ?", ['Updated shared content', blobId]); - - // Verify regular note is updated in FTS - const updatedContent = sql.getValue( - "SELECT content FROM notes_fts WHERE noteId = ?", - ['test_note11'] - ); - expect(updatedContent).toBe('Updated shared content'); - - // Verify protected note is still NOT in FTS - const protectedStillNotInFts = sql.getValue( - "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", - ['test_note12'] - ); - expect(protectedStillNotInFts).toBe(0); - }); - }); - - describe('FTS Sync and Cleanup', () => { - it('should sync missing notes to FTS index', async () => { - // Manually create notes without triggering FTS (simulating missed triggers) - sql.execute(` - INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified) - VALUES ('test_note13', 'Missed Note 1', 'text', 'text/html', 'blob_missed_1', 0, 0, datetime('now'), datetime('now'), datetime('now'), datetime('now')) - `); - - sql.execute(` - INSERT INTO blobs (blobId, content, dateModified, utcDateModified) - VALUES ('blob_missed_1', 'Content that was missed by triggers', datetime('now'), datetime('now')) - `); - - // Delete from FTS to simulate missing index - sql.execute("DELETE FROM notes_fts WHERE noteId = 'test_note13'"); - - // Verify note is missing from FTS - const beforeSync = sql.getValue( - "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", - ['test_note13'] - ); - expect(beforeSync).toBe(0); - - // Run sync - const syncedCount = ftsSearchService.syncMissingNotes(['test_note13']); - expect(syncedCount).toBe(1); - - // Verify note is now in FTS - const afterSync = sql.getValue( - "SELECT content FROM notes_fts WHERE noteId = ?", - ['test_note13'] - ); - expect(afterSync).toBe('Content that was missed by triggers'); - }); - - it('should handle FTS rebuild correctly', () => { - // Create some test notes - const noteIds = ['test_note14', 'test_note15', 'test_note16']; - noteIds.forEach((noteId, index) => { - sql.execute(` - INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified) - VALUES (?, ?, 'text', 'text/html', ?, 0, 0, datetime('now'), datetime('now'), datetime('now'), datetime('now')) - `, [noteId, `Test Note ${index}`, `blob_${noteId}`]); - - sql.execute(` - INSERT INTO blobs (blobId, content, dateModified, utcDateModified) - VALUES (?, ?, datetime('now'), datetime('now')) - `, [`blob_${noteId}`, `Content for note ${index}`]); - }); - - // Corrupt FTS by adding invalid entries - sql.execute("INSERT INTO notes_fts (noteId, title, content) VALUES ('invalid_note', 'Invalid', 'Invalid content')"); - - // Rebuild index - ftsSearchService.rebuildIndex(); - - // Verify only valid notes are in FTS - const ftsCount = sql.getValue("SELECT COUNT(*) FROM notes_fts WHERE noteId LIKE 'test_%'"); - expect(ftsCount).toBe(3); - - // Verify invalid entry is gone - const invalidCount = sql.getValue("SELECT COUNT(*) FROM notes_fts WHERE noteId = 'invalid_note'"); - expect(invalidCount).toBe(0); - - // Verify content is correct - noteIds.forEach((noteId, index) => { - const content = sql.getValue( - "SELECT content FROM notes_fts WHERE noteId = ?", - [noteId] - ); - expect(content).toBe(`Content for note ${index}`); - }); - }); - }); -}); \ No newline at end of file diff --git a/apps/server/src/services/search/fts_search.ts b/apps/server/src/services/search/fts_search.ts index 82031953f..6f65347fb 100644 --- a/apps/server/src/services/search/fts_search.ts +++ b/apps/server/src/services/search/fts_search.ts @@ -1,9 +1,9 @@ /** * FTS5 Search Service - * + * * Encapsulates all FTS5-specific operations for full-text searching. * Provides efficient text search using SQLite's FTS5 extension with: - * - Porter stemming for better matching + * - Trigram tokenization for fast substring matching * - Snippet extraction for context * - Highlighting of matched terms * - Query syntax conversion from Trilium to FTS5 @@ -115,7 +115,7 @@ class FTSSearchService { /** * Converts Trilium search syntax to FTS5 MATCH syntax - * + * * @param tokens - Array of search tokens * @param operator - Trilium search operator * @returns FTS5 MATCH query string @@ -125,8 +125,18 @@ class FTSSearchService { throw new Error("No search tokens provided"); } + // Trigram tokenizer requires minimum 3 characters + const shortTokens = tokens.filter(token => token.length < 3); + if (shortTokens.length > 0) { + const shortList = shortTokens.join(', '); + log.info(`Tokens shorter than 3 characters detected (${shortList}) - cannot use trigram FTS5`); + throw new FTSNotAvailableError( + `Trigram tokenizer requires tokens of at least 3 characters. Short tokens: ${shortList}` + ); + } + // Sanitize tokens to prevent FTS5 syntax injection - const sanitizedTokens = tokens.map(token => + const sanitizedTokens = tokens.map(token => this.sanitizeFTS5Token(token) ); diff --git a/apps/server/src/services/search/search_context.ts b/apps/server/src/services/search/search_context.ts index 71e7cba9c..314c7e7ce 100644 --- a/apps/server/src/services/search/search_context.ts +++ b/apps/server/src/services/search/search_context.ts @@ -24,10 +24,6 @@ class SearchContext { fulltextQuery: string; dbLoadNeeded: boolean; error: string | null; - /** Determines which backend to use for fulltext search */ - searchBackend: "typescript" | "sqlite"; - /** Whether SQLite search is enabled (cached from options) */ - sqliteSearchEnabled: boolean; constructor(params: SearchParams = {}) { this.fastSearch = !!params.fastSearch; @@ -58,43 +54,6 @@ class SearchContext { // and some extra data needs to be loaded before executing this.dbLoadNeeded = false; this.error = null; - - // Determine search backend - this.sqliteSearchEnabled = this.checkSqliteEnabled(); - this.searchBackend = this.determineSearchBackend(params); - } - - private checkSqliteEnabled(): boolean { - try { - // Import dynamically to avoid circular dependencies - const optionService = require("../options.js").default; - // Default to true if the option doesn't exist - const enabled = optionService.getOptionOrNull("searchSqliteEnabled"); - return enabled === null ? true : enabled === "true"; - } catch { - return true; // Default to enabled - } - } - - private determineSearchBackend(params: SearchParams): "typescript" | "sqlite" { - // Allow override via params for testing - if (params.forceBackend) { - return params.forceBackend; - } - - // Check if SQLite is enabled - if (!this.sqliteSearchEnabled) { - return "typescript"; - } - - try { - const optionService = require("../options.js").default; - const backend = optionService.getOptionOrNull("searchBackend"); - // Default to sqlite if option doesn't exist - return backend === "typescript" ? "typescript" : "sqlite"; - } catch { - return "sqlite"; // Default to SQLite for better performance - } } addError(error: string) { diff --git a/apps/server/src/services/search/services/parse.ts b/apps/server/src/services/search/services/parse.ts index a8a7e7eef..b537ee562 100644 --- a/apps/server/src/services/search/services/parse.ts +++ b/apps/server/src/services/search/services/parse.ts @@ -13,7 +13,6 @@ import AttributeExistsExp from "../expressions/attribute_exists.js"; import LabelComparisonExp from "../expressions/label_comparison.js"; import NoteFlatTextExp from "../expressions/note_flat_text.js"; import NoteContentFulltextExp from "../expressions/note_content_fulltext.js"; -import NoteContentSqliteExp from "../expressions/note_content_sqlite.js"; import OrderByAndLimitExp from "../expressions/order_by_and_limit.js"; import AncestorExp from "../expressions/ancestor.js"; import buildComparator from "./build_comparator.js"; @@ -38,20 +37,15 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext, leading const operator = leadingOperator === "=" ? "=" : "*=*"; if (!searchContext.fastSearch) { - // Choose between SQLite and TypeScript backend - const ContentExp = searchContext.searchBackend === "sqlite" - ? NoteContentSqliteExp - : NoteContentFulltextExp; - // For exact match with "=", we need different behavior if (leadingOperator === "=" && tokens.length === 1) { // Exact match on title OR exact match on content return new OrExp([ new PropertyComparisonExp(searchContext, "title", "=", tokens[0]), - new ContentExp("=", { tokens, flatText: false }) + new NoteContentFulltextExp("=", { tokens, flatText: false }) ]); } - return new OrExp([new NoteFlatTextExp(tokens), new ContentExp(operator, { tokens, flatText: true })]); + return new OrExp([new NoteFlatTextExp(tokens), new NoteContentFulltextExp(operator, { tokens, flatText: true })]); } else { return new NoteFlatTextExp(tokens); } @@ -154,12 +148,7 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level i++; - // Choose between SQLite and TypeScript backend - const ContentExp = searchContext.searchBackend === "sqlite" - ? NoteContentSqliteExp - : NoteContentFulltextExp; - - return new ContentExp(operator.token, { tokens: [tokens[i].token], raw }); + return new NoteContentFulltextExp(operator.token, { tokens: [tokens[i].token], raw }); } if (tokens[i].token === "parents") { @@ -222,12 +211,7 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level i += 2; - // Choose between SQLite and TypeScript backend - const ContentExp = searchContext.searchBackend === "sqlite" - ? NoteContentSqliteExp - : NoteContentFulltextExp; - - return new OrExp([new PropertyComparisonExp(searchContext, "title", "*=*", tokens[i].token), new ContentExp("*=*", { tokens: [tokens[i].token] })]); + return new OrExp([new PropertyComparisonExp(searchContext, "title", "*=*", tokens[i].token), new NoteContentFulltextExp("*=*", { tokens: [tokens[i].token] })]); } if (PropertyComparisonExp.isProperty(tokens[i].token)) { diff --git a/apps/server/src/services/search/services/search.ts b/apps/server/src/services/search/services/search.ts index e151e8512..13b13305a 100644 --- a/apps/server/src/services/search/services/search.ts +++ b/apps/server/src/services/search/services/search.ts @@ -19,9 +19,6 @@ import sql from "../../sql.js"; import scriptService from "../../script.js"; import striptags from "striptags"; import protectedSessionService from "../../protected_session.js"; -import performanceMonitor from "../performance_monitor.js"; -import type { DetailedMetrics } from "../performance_monitor.js"; -import abTestingService from "../ab_testing.js"; export interface SearchNoteResult { searchResultNoteIds: string[]; @@ -405,14 +402,6 @@ function parseQueryToExpression(query: string, searchContext: SearchContext) { function searchNotes(query: string, params: SearchParams = {}): BNote[] { const searchContext = new SearchContext(params); - - // Run A/B test in background (non-blocking) - setImmediate(() => { - abTestingService.runComparison(query, params).catch(err => { - log.info(`A/B test failed: ${err}`); - }); - }); - const searchResults = findResultsWithQuery(query, searchContext); return searchResults.map((sr) => becca.notes[sr.noteId]); @@ -422,49 +411,25 @@ function findResultsWithQuery(query: string, searchContext: SearchContext): Sear query = query || ""; searchContext.originalQuery = query; - // Start performance monitoring - const totalTimer = performanceMonitor.startTimer(); - const phases: { name: string; duration: number }[] = []; - - // Parse query - const parseTimer = performanceMonitor.startTimer(); const expression = parseQueryToExpression(query, searchContext); - phases.push({ name: "parse", duration: parseTimer() }); if (!expression) { return []; } // If the query starts with '#', it's a pure expression query. - // Don't use progressive search for these as they may have complex + // Don't use progressive search for these as they may have complex // ordering or other logic that shouldn't be interfered with. const isPureExpressionQuery = query.trim().startsWith('#'); - + let results: SearchResult[]; - const searchTimer = performanceMonitor.startTimer(); - + if (isPureExpressionQuery) { // For pure expression queries, use standard search without progressive phases results = performSearch(expression, searchContext, searchContext.enableFuzzyMatching); } else { results = findResultsWithExpression(expression, searchContext); } - - phases.push({ name: "search", duration: searchTimer() }); - - // Record metrics - const metrics: DetailedMetrics = { - query: query.substring(0, 200), // Truncate long queries - backend: searchContext.searchBackend, - totalTime: totalTimer(), - parseTime: phases[0].duration, - searchTime: phases[1].duration, - resultCount: results.length, - phases, - error: searchContext.error || undefined - }; - - performanceMonitor.recordDetailedMetrics(metrics); return results; } diff --git a/apps/server/src/services/search/services/types.ts b/apps/server/src/services/search/services/types.ts index 63d8a4ba4..7edc3b4ae 100644 --- a/apps/server/src/services/search/services/types.ts +++ b/apps/server/src/services/search/services/types.ts @@ -21,6 +21,4 @@ export interface SearchParams { limit?: number | null; debug?: boolean; fuzzyAttributeSearch?: boolean; - /** Force a specific search backend for testing/comparison */ - forceBackend?: "typescript" | "sqlite"; } diff --git a/apps/server/src/services/search/sqlite_functions.spec.ts b/apps/server/src/services/search/sqlite_functions.spec.ts index 64bfd755a..c1cdcd75a 100644 --- a/apps/server/src/services/search/sqlite_functions.spec.ts +++ b/apps/server/src/services/search/sqlite_functions.spec.ts @@ -5,7 +5,6 @@ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import Database from 'better-sqlite3'; import { SqliteFunctionsService, getSqliteFunctionsService } from './sqlite_functions.js'; -import { normalize, stripTags } from '../utils.js'; describe('SqliteFunctionsService', () => { let db: Database.Database; @@ -46,38 +45,6 @@ describe('SqliteFunctionsService', () => { }); }); - describe('normalize_text function', () => { - beforeEach(() => { - service.registerFunctions(db); - }); - - it('should normalize text correctly', () => { - const tests = [ - ['café', 'cafe'], - ['naïve', 'naive'], - ['HELLO WORLD', 'hello world'], - ['Über', 'uber'], - ['', ''], - [null, ''], - ]; - - for (const [input, expected] of tests) { - const result = db.prepare('SELECT normalize_text(?) as result').get(input) as { result: string }; - expect(result.result).toBe(expected); - // Verify it matches the utils normalize function - if (input) { - expect(result.result).toBe(normalize(input as string)); - } - } - }); - - it('should handle special characters', () => { - const input = 'Ñoño 123 ABC!@#'; - const result = db.prepare('SELECT normalize_text(?) as result').get(input) as any; - expect(result.result).toBe(normalize(input)); - }); - }); - describe('edit_distance function', () => { beforeEach(() => { service.registerFunctions(db); @@ -143,199 +110,4 @@ describe('SqliteFunctionsService', () => { expect(result.match).toBe(0); }); }); - - describe('tokenize_text function', () => { - beforeEach(() => { - service.registerFunctions(db); - }); - - it('should tokenize text correctly', () => { - const tests = [ - ['hello world', ['hello', 'world']], - ['getUserName', ['getusername', 'get', 'user', 'name']], - ['user_name', ['user_name', 'user', 'name']], - ['hello-world', ['hello', 'world']], - ['test@example.com', ['test', 'example', 'com']], - ['', []], - ]; - - for (const [input, expected] of tests) { - const result = db.prepare('SELECT tokenize_text(?) as tokens').get(input) as any; - const tokens = JSON.parse(result.tokens); - // Check that all expected tokens are present (order may vary due to Set) - for (const token of expected) { - expect(tokens).toContain(token); - } - } - }); - - it('should handle camelCase and snake_case', () => { - const result = db.prepare('SELECT tokenize_text(?) as tokens').get('getUserById_async') as any; - const tokens = JSON.parse(result.tokens); - expect(tokens).toContain('getuserbyid_async'); - expect(tokens).toContain('getuserbyid'); - expect(tokens).toContain('async'); - expect(tokens).toContain('get'); - expect(tokens).toContain('user'); - expect(tokens).toContain('by'); - expect(tokens).toContain('id'); - }); - - it('should handle null input', () => { - const result = db.prepare('SELECT tokenize_text(?) as tokens').get(null) as any; - expect(result.tokens).toBe('[]'); - }); - }); - - describe('strip_html function', () => { - beforeEach(() => { - service.registerFunctions(db); - }); - - it('should strip HTML tags correctly', () => { - const tests = [ - ['

Hello World

', 'Hello World'], - ['
Test
', 'Test'], - ['content', 'content'], - ['text', 'text'], - ['Hello <world>', 'Hello '], - ['  Space', ' Space'], - ['', ''], - ]; - - for (const [input, expected] of tests) { - const result = db.prepare('SELECT strip_html(?) as text').get(input) as any; - expect(result.text).toBe(expected); - } - }); - - it('should handle complex HTML', () => { - const html = ` - - Test - -

Title

-

Paragraph with bold text.

- - - - `; - const result = db.prepare('SELECT strip_html(?) as text').get(html) as any; - expect(result.text).toContain('Title'); - expect(result.text).toContain('Paragraph with bold text'); - expect(result.text).not.toContain('console.log'); - }); - - it('should handle null input', () => { - const result = db.prepare('SELECT strip_html(?) as text').get(null) as any; - expect(result.text).toBe(''); - }); - }); - - describe('fuzzy_match function', () => { - beforeEach(() => { - service.registerFunctions(db); - }); - - it('should perform exact matches', () => { - const tests = [ - ['hello', 'hello world', 1], - ['world', 'hello world', 1], - ['foo', 'hello world', 0], - ]; - - for (const [needle, haystack, expected] of tests) { - const result = db.prepare('SELECT fuzzy_match(?, ?, 2) as match').get(needle, haystack) as any; - expect(result.match).toBe(expected); - } - }); - - it('should perform fuzzy matches within edit distance', () => { - const tests = [ - ['helo', 'hello world', 1], // 1 edit distance - ['wrld', 'hello world', 1], // 1 edit distance - ['hallo', 'hello world', 1], // 1 edit distance - ['xyz', 'hello world', 0], // Too different - ]; - - for (const [needle, haystack, expected] of tests) { - const result = db.prepare('SELECT fuzzy_match(?, ?, 2) as match').get(needle, haystack) as any; - expect(result.match).toBe(expected); - } - }); - - it('should handle case insensitive matching', () => { - const result = db.prepare('SELECT fuzzy_match(?, ?, 2) as match').get('HELLO', 'hello world') as any; - expect(result.match).toBe(1); - }); - - it('should handle null inputs', () => { - const result = db.prepare('SELECT fuzzy_match(?, ?, 2) as match').get(null, 'test') as any; - expect(result.match).toBe(0); - }); - }); - - describe('Integration with SQL queries', () => { - beforeEach(() => { - service.registerFunctions(db); - - // Create a test table - db.exec(` - CREATE TABLE test_notes ( - id INTEGER PRIMARY KEY, - title TEXT, - content TEXT - ) - `); - - // Insert test data - const insert = db.prepare('INSERT INTO test_notes (title, content) VALUES (?, ?)'); - insert.run('Café Meeting', '

Discussion about naïve implementation

'); - insert.run('über wichtig', 'Very important note with HTML & entities'); - insert.run('getUserData', 'Function to get_user_data from database'); - }); - - it('should work in WHERE clauses with normalize_text', () => { - const results = db.prepare(` - SELECT title FROM test_notes - WHERE normalize_text(title) LIKE '%cafe%' - `).all(); - - expect(results).toHaveLength(1); - expect((results[0] as any).title).toBe('Café Meeting'); - }); - - it('should work with fuzzy matching in queries', () => { - const results = db.prepare(` - SELECT title FROM test_notes - WHERE fuzzy_match('getuserdata', normalize_text(title), 2) = 1 - `).all(); - - expect(results).toHaveLength(1); - expect((results[0] as any).title).toBe('getUserData'); - }); - - it('should work with HTML stripping', () => { - const results = db.prepare(` - SELECT strip_html(content) as clean_content - FROM test_notes - WHERE title = 'Café Meeting' - `).all(); - - expect((results[0] as any).clean_content).toBe('Discussion about naïve implementation'); - }); - - it('should work with tokenization', () => { - const result = db.prepare(` - SELECT tokenize_text(title) as tokens - FROM test_notes - WHERE title = 'getUserData' - `).get() as any; - - const tokens = JSON.parse(result.tokens); - expect(tokens).toContain('get'); - expect(tokens).toContain('user'); - expect(tokens).toContain('data'); - }); - }); }); \ No newline at end of file diff --git a/apps/server/src/services/search/sqlite_functions.ts b/apps/server/src/services/search/sqlite_functions.ts index 904a04507..771a112bd 100644 --- a/apps/server/src/services/search/sqlite_functions.ts +++ b/apps/server/src/services/search/sqlite_functions.ts @@ -1,19 +1,17 @@ /** * SQLite Custom Functions Service - * - * This service manages custom SQLite functions that enhance search capabilities. + * + * This service manages custom SQLite functions for general database operations. * Functions are registered with better-sqlite3 to provide native-speed operations - * directly within SQL queries, enabling efficient search indexing and querying. - * + * directly within SQL queries. + * * These functions are used by: - * - Database triggers for automatic search index maintenance - * - Direct SQL queries for search operations - * - Migration scripts for initial data population + * - Fuzzy search fallback (edit_distance) + * - Regular expression matching (regex_match) */ import type { Database } from "better-sqlite3"; import log from "../log.js"; -import { normalize as utilsNormalize, stripTags } from "../utils.js"; /** * Configuration for fuzzy search operations @@ -67,15 +65,7 @@ export class SqliteFunctionsService { // Bind all methods to preserve 'this' context this.functions = [ { - name: "normalize_text", - implementation: this.normalizeText.bind(this), - options: { - deterministic: true, - varargs: false - } - }, - { - name: "edit_distance", + name: "edit_distance", implementation: this.editDistance.bind(this), options: { deterministic: true, @@ -89,30 +79,6 @@ export class SqliteFunctionsService { deterministic: true, varargs: true // Changed to true to handle variable arguments } - }, - { - name: "tokenize_text", - implementation: this.tokenizeText.bind(this), - options: { - deterministic: true, - varargs: false - } - }, - { - name: "strip_html", - implementation: this.stripHtml.bind(this), - options: { - deterministic: true, - varargs: false - } - }, - { - name: "fuzzy_match", - implementation: this.fuzzyMatch.bind(this), - options: { - deterministic: true, - varargs: true // Changed to true to handle variable arguments - } } ]; } @@ -182,22 +148,6 @@ export class SqliteFunctionsService { // ===== Function Implementations ===== - /** - * Normalize text by removing diacritics and converting to lowercase - * Matches the behavior of utils.normalize() exactly - * - * @param text Text to normalize - * @returns Normalized text - */ - private normalizeText(text: string | null | undefined): string { - if (!text || typeof text !== 'string') { - return ''; - } - - // Use the exact same normalization as the rest of the codebase - return utilsNormalize(text); - } - /** * Calculate Levenshtein edit distance between two strings * Optimized with early termination and single-array approach @@ -314,186 +264,6 @@ export class SqliteFunctionsService { return null; } } - - /** - * Tokenize text into searchable words - * Handles punctuation, camelCase, and snake_case - * - * @param text Text to tokenize - * @returns JSON array string of tokens - */ - private tokenizeText(text: string | null | undefined): string { - if (!text || typeof text !== 'string') { - return '[]'; - } - - try { - // Use a Set to avoid duplicates from the start - const expandedTokens: Set = new Set(); - - // Split on word boundaries, preserving apostrophes within words - // But we need to handle underscore separately for snake_case - const tokens = text - .split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:-]+/) - .filter(token => token.length > 0); - - // Process each token - for (const token of tokens) { - // Add the original token in lowercase - expandedTokens.add(token.toLowerCase()); - - // Handle snake_case first (split on underscore) - const snakeParts = token.split('_').filter(part => part.length > 0); - if (snakeParts.length > 1) { - // We have snake_case - for (const snakePart of snakeParts) { - // Add each snake part - expandedTokens.add(snakePart.toLowerCase()); - - // Also check for camelCase within each snake part - const camelParts = this.splitCamelCase(snakePart); - for (const camelPart of camelParts) { - if (camelPart.length > 0) { - expandedTokens.add(camelPart.toLowerCase()); - } - } - } - } else { - // No snake_case, just check for camelCase - const camelParts = this.splitCamelCase(token); - for (const camelPart of camelParts) { - if (camelPart.length > 0) { - expandedTokens.add(camelPart.toLowerCase()); - } - } - } - } - - // Convert Set to Array for JSON serialization - const uniqueTokens = Array.from(expandedTokens); - - // Return as JSON array string for SQL processing - return JSON.stringify(uniqueTokens); - } catch (error) { - log.error(`Error tokenizing text in SQL: ${error}`); - return '[]'; - } - } - - /** - * Helper method to split camelCase strings - * @param str String to split - * @returns Array of parts - */ - private splitCamelCase(str: string): string[] { - // Split on transitions from lowercase to uppercase - // Also handle sequences of uppercase letters (e.g., "XMLParser" -> ["XML", "Parser"]) - return str.split(/(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])/); - } - - /** - * Strip HTML tags from content - * Removes script and style content, then strips tags and decodes entities - * - * @param html HTML content - * @returns Plain text without HTML tags - */ - private stripHtml(html: string | null | undefined): string { - if (!html || typeof html !== 'string') { - return ''; - } - - try { - let text = html; - - // First remove script and style content entirely (including the tags) - // This needs to happen before stripTags to remove the content - text = text.replace(/)<[^<]*)*<\/script>/gi, ''); - text = text.replace(/)<[^<]*)*<\/style>/gi, ''); - - // Now use stripTags to remove remaining HTML tags - text = stripTags(text); - - // Decode common HTML entities - text = text.replace(/</g, '<'); - text = text.replace(/>/g, '>'); - text = text.replace(/&/g, '&'); - text = text.replace(/"/g, '"'); - text = text.replace(/'/g, "'"); - text = text.replace(/'/g, "'"); - text = text.replace(/ /g, ' '); - - // Normalize whitespace - reduce multiple spaces to single space - // But don't trim leading/trailing space if it was from   - text = text.replace(/\s+/g, ' '); - - return text; - } catch (error) { - log.error(`Error stripping HTML in SQL: ${error}`); - return html; // Return original on error - } - } - - /** - * Fuzzy match with configurable edit distance - * Combines exact and fuzzy matching for optimal performance - * - * SQLite will pass 2 or 3 arguments: - * - 2 args: needle, haystack (uses default maxDistance) - * - 3 args: needle, haystack, maxDistance - * - * @returns 1 if match found, 0 otherwise - */ - private fuzzyMatch(...args: any[]): number { - // Handle variable arguments from SQLite - let needle: string | null | undefined = args[0]; - let haystack: string | null | undefined = args[1]; - let maxDistance: number = args.length > 2 ? args[2] : FUZZY_CONFIG.MAX_EDIT_DISTANCE; - - // Validate input types - if (!needle || !haystack) { - return 0; - } - - if (typeof needle !== 'string' || typeof haystack !== 'string') { - return 0; - } - - // Validate and sanitize maxDistance - if (typeof maxDistance !== 'number' || !Number.isFinite(maxDistance)) { - maxDistance = FUZZY_CONFIG.MAX_EDIT_DISTANCE; - } else { - // Ensure it's a positive integer - maxDistance = Math.max(0, Math.floor(maxDistance)); - } - - // Normalize for comparison - const normalizedNeedle = needle.toLowerCase(); - const normalizedHaystack = haystack.toLowerCase(); - - // Check exact match first (most common case) - if (normalizedHaystack.includes(normalizedNeedle)) { - return 1; - } - - // For fuzzy matching, check individual words - const words = normalizedHaystack.split(/\s+/).filter(w => w.length > 0); - - for (const word of words) { - // Skip if word length difference is too large - if (Math.abs(word.length - normalizedNeedle.length) > maxDistance) { - continue; - } - - // Check edit distance - call with all 3 args since we're calling internally - const distance = this.editDistance(normalizedNeedle, word, maxDistance); - if (distance <= maxDistance) { - return 1; - } - } - - return 0; - } } // Export singleton instance getter diff --git a/apps/server/src/services/search/sqlite_integration.test.ts b/apps/server/src/services/search/sqlite_integration.test.ts deleted file mode 100644 index c6fd9de22..000000000 --- a/apps/server/src/services/search/sqlite_integration.test.ts +++ /dev/null @@ -1,153 +0,0 @@ -/** - * Integration tests for SQLite search implementation - */ - -import { describe, it, expect, beforeAll, afterAll } from "vitest"; -import sql from "../sql.js"; -import { getSQLiteSearchService } from "./sqlite_search_service.js"; -import SearchContext from "./search_context.js"; -import NoteContentSqliteExp from "./expressions/note_content_sqlite.js"; -import NoteSet from "./note_set.js"; -import { getSqliteFunctionsService } from "./sqlite_functions.js"; - -describe("SQLite Search Integration", () => { - let searchService: ReturnType; - let searchContext: SearchContext; - - beforeAll(() => { - // Initialize services - searchService = getSQLiteSearchService(); - searchContext = new SearchContext({ - // searchBackend: "sqlite", // TODO: Add to SearchParams type - // searchSqliteEnabled: true - }); - - // Register SQL functions - const functionsService = getSqliteFunctionsService(); - const db = sql.getDbConnection(); - functionsService.registerFunctions(db); - }); - - afterAll(() => { - // Cleanup if needed - }); - - describe("Service Initialization", () => { - it("should initialize SQLite search service", () => { - expect(searchService).toBeDefined(); - const stats = searchService.getStatistics(); - expect(stats).toBeDefined(); - expect(stats).toHaveProperty("tablesInitialized"); - }); - - it("should have registered SQL functions", () => { - const functionsService = getSqliteFunctionsService(); - expect(functionsService.isRegistered()).toBe(true); - }); - }); - - describe("Expression Creation", () => { - it("should create SQLite expression when available", () => { - const exp = NoteContentSqliteExp.createExpression("*=*", { - tokens: ["test"], - raw: false, - flatText: false - }); - - expect(exp).toBeDefined(); - // Check if it's the SQLite version or fallback - if (NoteContentSqliteExp.isAvailable()) { - expect(exp).toBeInstanceOf(NoteContentSqliteExp); - } - }); - - it("should handle different operators", () => { - const operators = ["=", "!=", "*=*", "*=", "=*", "%=", "~="]; - - for (const op of operators) { - const exp = new NoteContentSqliteExp(op, { - tokens: ["test"], - raw: false, - flatText: false - }); - - expect(exp).toBeDefined(); - expect(exp.tokens).toEqual(["test"]); - } - }); - }); - - describe("Search Execution", () => { - it("should execute search with empty input set", () => { - const exp = new NoteContentSqliteExp("*=*", { - tokens: ["test"], - raw: false, - flatText: false - }); - - const inputSet = new NoteSet(); - const resultSet = exp.execute(inputSet, {}, searchContext); - - expect(resultSet).toBeDefined(); - expect(resultSet).toBeInstanceOf(NoteSet); - }); - - it("should handle search errors gracefully", () => { - const exp = new NoteContentSqliteExp("invalid_op", { - tokens: ["test"], - raw: false, - flatText: false - }); - - const inputSet = new NoteSet(); - const resultSet = exp.execute(inputSet, {}, searchContext); - - expect(resultSet).toBeDefined(); - expect(searchContext.hasError()).toBe(true); - }); - }); - - describe("Backend Selection", () => { - it("should use SQLite backend when enabled", () => { - const ctx = new SearchContext({ - forceBackend: "sqlite" - }); - - expect(ctx.searchBackend).toBe("sqlite"); - }); - - it("should use TypeScript backend when forced", () => { - const ctx = new SearchContext({ - forceBackend: "typescript" - }); - - expect(ctx.searchBackend).toBe("typescript"); - }); - - it("should default to SQLite when no preference", () => { - const ctx = new SearchContext({}); - - // Should default to SQLite for better performance - expect(["sqlite", "typescript"]).toContain(ctx.searchBackend); - }); - }); - - describe("Performance Statistics", () => { - it("should track search statistics", () => { - const initialStats = searchService.getStatistics(); - const initialSearches = initialStats.totalSearches || 0; - - // Execute a search - searchService.search( - ["test"], - "*=*", - searchContext, - {} - ); - - const newStats = searchService.getStatistics(); - expect(newStats.totalSearches).toBeGreaterThan(initialSearches); - expect(newStats.lastSearchTimeMs).toBeGreaterThanOrEqual(0); - }); - }); -}); \ No newline at end of file diff --git a/apps/server/src/services/search/sqlite_search_service.spec.ts b/apps/server/src/services/search/sqlite_search_service.spec.ts deleted file mode 100644 index 6c7a48d86..000000000 --- a/apps/server/src/services/search/sqlite_search_service.spec.ts +++ /dev/null @@ -1,320 +0,0 @@ -/** - * Tests for SQLite Search Service - * - * These tests verify that the SQLite-based search implementation - * correctly handles all search operators and provides accurate results. - */ - -import { describe, it, expect, beforeAll, afterAll, beforeEach } from "vitest"; -import { SQLiteSearchService } from "./sqlite_search_service.js"; -import sql from "../sql.js"; -import SearchContext from "./search_context.js"; -import { initializeSqliteFunctions } from "./sqlite_functions.js"; - -describe("SQLiteSearchService", () => { - let searchService: SQLiteSearchService; - let searchContext: SearchContext; - - beforeAll(() => { - // Initialize SQLite functions for tests - const db = sql.getDbConnection(); - if (db) { - initializeSqliteFunctions(db); - } - - // Get search service instance - searchService = SQLiteSearchService.getInstance(); - - // Create test tables if they don't exist - sql.execute(` - CREATE TABLE IF NOT EXISTS note_search_content ( - noteId TEXT PRIMARY KEY, - noteContent TEXT, - normalized_content TEXT, - normalized_title TEXT, - isProtected INTEGER DEFAULT 0, - isDeleted INTEGER DEFAULT 0 - ) - `); - - sql.execute(` - CREATE TABLE IF NOT EXISTS note_tokens ( - noteId TEXT PRIMARY KEY, - tokens TEXT - ) - `); - - sql.execute(` - CREATE VIRTUAL TABLE IF NOT EXISTS note_fts USING fts5( - noteId UNINDEXED, - title, - content, - tokenize = 'unicode61' - ) - `); - }); - - beforeEach(() => { - // Clear test data - sql.execute(`DELETE FROM note_search_content`); - sql.execute(`DELETE FROM note_tokens`); - sql.execute(`DELETE FROM note_fts`); - - // Create fresh search context - searchContext = new SearchContext(); - - // Insert test data - insertTestNote("note1", "Hello World", "This is a test note with hello world content."); - insertTestNote("note2", "Programming", "JavaScript and TypeScript programming languages."); - insertTestNote("note3", "Fuzzy Search", "Testing fuzzy matching with similar words like helo and wrold."); - insertTestNote("note4", "Special Characters", "Testing with special@email.com and user_name variables."); - insertTestNote("note5", "CamelCase", "getUserName and setUserEmail functions in JavaScript."); - }); - - function insertTestNote(noteId: string, title: string, content: string) { - // Insert into search content table - sql.execute(` - INSERT INTO note_search_content (noteId, noteContent, normalized_content, normalized_title, isProtected, isDeleted) - VALUES (?, ?, LOWER(?), LOWER(?), 0, 0) - `, [noteId, content, content, title]); - - // Generate tokens - const tokens = tokenize(content + " " + title); - sql.execute(` - INSERT INTO note_tokens (noteId, tokens) - VALUES (?, ?) - `, [noteId, JSON.stringify(tokens)]); - - // Insert into FTS5 table - sql.execute(` - INSERT INTO note_fts (noteId, title, content) - VALUES (?, ?, ?) - `, [noteId, title, content]); - } - - function tokenize(text: string): string[] { - return text.toLowerCase() - .split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:_-]+/) - .filter(token => token.length > 0); - } - - describe("Substring Search (*=*)", () => { - it("should find notes containing substring", () => { - const results = searchService.search(["hello"], "*=*", searchContext); - expect(results).toContain("note1"); - expect(results.size).toBe(1); - }); - - it("should find notes with multiple tokens", () => { - const results = searchService.search(["java", "script"], "*=*", searchContext); - expect(results).toContain("note2"); - expect(results).toContain("note5"); - expect(results.size).toBe(2); - }); - - it("should be case insensitive", () => { - const results = searchService.search(["HELLO"], "*=*", searchContext); - expect(results).toContain("note1"); - }); - }); - - describe("Fuzzy Search (~=)", () => { - it("should find notes with fuzzy matching", () => { - const results = searchService.search(["helo"], "~=", searchContext); - expect(results).toContain("note3"); // Contains "helo" - expect(results).toContain("note1"); // Contains "hello" (1 edit distance) - }); - - it("should respect edit distance threshold", () => { - const results = searchService.search(["xyz"], "~=", searchContext); - expect(results.size).toBe(0); // Too different from any content - }); - - it("should handle multiple fuzzy tokens", () => { - const results = searchService.search(["fuzzy", "match"], "~=", searchContext); - expect(results).toContain("note3"); - }); - }); - - describe("Prefix Search (=*)", () => { - it("should find notes starting with prefix", () => { - const results = searchService.search(["test"], "=*", searchContext); - expect(results).toContain("note3"); // "Testing fuzzy..." - expect(results).toContain("note4"); // "Testing with..." - expect(results.size).toBe(2); - }); - - it("should handle multiple prefixes", () => { - const results = searchService.search(["java", "type"], "=*", searchContext); - expect(results).toContain("note2"); // Has both "JavaScript" and "TypeScript" - }); - }); - - describe("Suffix Search (*=)", () => { - it("should find notes ending with suffix", () => { - const results = searchService.search(["script"], "*=", searchContext); - expect(results).toContain("note2"); // "JavaScript" and "TypeScript" - expect(results).toContain("note5"); // "JavaScript" - }); - - it("should handle special suffixes", () => { - const results = searchService.search([".com"], "*=", searchContext); - expect(results).toContain("note4"); // "special@email.com" - }); - }); - - describe("Regex Search (%=)", () => { - it("should find notes matching regex pattern", () => { - const results = searchService.search(["\\w+@\\w+\\.com"], "%=", searchContext); - expect(results).toContain("note4"); // Contains email pattern - }); - - it("should handle complex patterns", () => { - const results = searchService.search(["get\\w+Name"], "%=", searchContext); - expect(results).toContain("note5"); // "getUserName" - }); - - it("should handle invalid regex gracefully", () => { - const results = searchService.search(["[invalid"], "%=", searchContext); - expect(results.size).toBe(0); // Should return empty on invalid regex - }); - }); - - describe("Exact Word Search (=)", () => { - it("should find notes with exact word match", () => { - const results = searchService.search(["hello"], "=", searchContext); - expect(results).toContain("note1"); - expect(results.size).toBe(1); - }); - - it("should not match partial words", () => { - const results = searchService.search(["java"], "=", searchContext); - expect(results.size).toBe(0); // "JavaScript" contains "java" but not as whole word - }); - - it("should find multiple exact words", () => { - const results = searchService.search(["fuzzy", "matching"], "=", searchContext); - expect(results).toContain("note3"); - }); - }); - - describe("Not Equals Search (!=)", () => { - it("should find notes not containing exact word", () => { - const results = searchService.search(["hello"], "!=", searchContext); - expect(results).not.toContain("note1"); - expect(results.size).toBe(4); // All except note1 - }); - - it("should handle multiple tokens", () => { - const results = searchService.search(["fuzzy", "matching"], "!=", searchContext); - expect(results).not.toContain("note3"); - expect(results.size).toBe(4); // All except note3 - }); - }); - - describe("Search Options", () => { - it("should respect limit option", () => { - const results = searchService.search(["test"], "*=*", searchContext, { limit: 1 }); - expect(results.size).toBeLessThanOrEqual(1); - }); - - it("should filter by noteId set", () => { - const noteIdFilter = new Set(["note1", "note3"]); - const results = searchService.search(["test"], "*=*", searchContext, { noteIdFilter }); - - for (const noteId of results) { - expect(noteIdFilter).toContain(noteId); - } - }); - - it("should exclude deleted notes by default", () => { - // Mark note1 as deleted - sql.execute(`UPDATE note_search_content SET isDeleted = 1 WHERE noteId = 'note1'`); - - const results = searchService.search(["hello"], "*=*", searchContext); - expect(results).not.toContain("note1"); - }); - - it("should include deleted notes when specified", () => { - // Mark note1 as deleted - sql.execute(`UPDATE note_search_content SET isDeleted = 1 WHERE noteId = 'note1'`); - - const results = searchService.search(["hello"], "*=*", searchContext, { includeDeleted: true }); - expect(results).toContain("note1"); - }); - }); - - describe("Complex Queries", () => { - it("should combine multiple searches with AND", () => { - const queries = [ - { tokens: ["java"], operator: "*=*" }, - { tokens: ["script"], operator: "*=*" } - ]; - - const results = searchService.searchMultiple(queries, "AND", searchContext); - expect(results).toContain("note2"); - expect(results).toContain("note5"); - }); - - it("should combine multiple searches with OR", () => { - const queries = [ - { tokens: ["hello"], operator: "*=*" }, - { tokens: ["fuzzy"], operator: "*=*" } - ]; - - const results = searchService.searchMultiple(queries, "OR", searchContext); - expect(results).toContain("note1"); - expect(results).toContain("note3"); - expect(results.size).toBe(2); - }); - }); - - describe("Performance", () => { - beforeEach(() => { - // Add more test data for performance testing - for (let i = 10; i < 1000; i++) { - insertTestNote( - `note${i}`, - `Title ${i}`, - `This is note number ${i} with some random content for testing performance.` - ); - } - }); - - it("should handle large result sets efficiently", () => { - const startTime = Date.now(); - const results = searchService.search(["note"], "*=*", searchContext); - const elapsed = Date.now() - startTime; - - expect(results.size).toBeGreaterThan(100); - expect(elapsed).toBeLessThan(1000); // Should complete within 1 second - }); - - it("should use limit to restrict results", () => { - const startTime = Date.now(); - const results = searchService.search(["note"], "*=*", searchContext, { limit: 10 }); - const elapsed = Date.now() - startTime; - - expect(results.size).toBeLessThanOrEqual(10); - expect(elapsed).toBeLessThan(100); // Should be very fast with limit - }); - }); - - describe("Statistics", () => { - it("should return correct statistics", () => { - const stats = searchService.getStatistics(); - - expect(stats.tablesInitialized).toBe(true); - expect(stats.indexedNotes).toBe(5); - expect(stats.totalTokens).toBe(5); - expect(stats.fts5Available).toBe(true); - }); - }); - - afterAll(() => { - // Clean up test data - sql.execute(`DELETE FROM note_search_content`); - sql.execute(`DELETE FROM note_tokens`); - sql.execute(`DELETE FROM note_fts`); - }); -}); \ No newline at end of file diff --git a/apps/server/src/services/search/sqlite_search_service.ts b/apps/server/src/services/search/sqlite_search_service.ts deleted file mode 100644 index 79b7acbc3..000000000 --- a/apps/server/src/services/search/sqlite_search_service.ts +++ /dev/null @@ -1,943 +0,0 @@ -/** - * SQLite Search Service - * - * This service provides high-performance search operations using pure SQLite queries. - * It implements all search operators with 100% accuracy and 10-30x performance improvement - * over the TypeScript-based implementation. - * - * Operators supported: - * - *=* (substring): Uses LIKE on normalized content - * - ~= (fuzzy): Uses edit_distance function with tokens - * - =* (prefix): Uses LIKE with prefix pattern - * - *= (suffix): Uses LIKE with suffix pattern - * - %= (regex): Uses regex_match function - * - = (exact word): Uses FTS5 table - * - != (not equals): Inverse of equals - * - * Performance characteristics: - * - Substring search: O(n) with optimized LIKE - * - Fuzzy search: O(n*m) where m is token count - * - Prefix/suffix: O(n) with optimized LIKE - * - Regex: O(n) with native regex support - * - Exact word: O(log n) with FTS5 index - */ - -import sql from "../sql.js"; -import log from "../log.js"; -import type SearchContext from "./search_context.js"; -import protectedSessionService from "../protected_session.js"; -import { normalize } from "../utils.js"; - -/** - * Configuration for search operations - */ -const SEARCH_CONFIG = { - MAX_EDIT_DISTANCE: 2, - MIN_TOKEN_LENGTH: 3, - MAX_RESULTS: 10000, - BATCH_SIZE: 1000, - LOG_PERFORMANCE: true, -} as const; - -/** - * Interface for search results - */ -export interface SearchResult { - noteId: string; - score?: number; - snippet?: string; -} - -/** - * Interface for search options - */ -export interface SearchOptions { - includeProtected?: boolean; - includeDeleted?: boolean; - noteIdFilter?: Set; - limit?: number; - offset?: number; -} - -/** - * SQLite-based search service for high-performance note searching - */ -export class SQLiteSearchService { - private static instance: SQLiteSearchService | null = null; - private isInitialized: boolean = false; - private statistics = { - tablesInitialized: false, - totalSearches: 0, - totalTimeMs: 0, - averageTimeMs: 0, - lastSearchTimeMs: 0 - }; - - private constructor() { - this.checkAndInitialize(); - } - - /** - * Get singleton instance of the search service - */ - static getInstance(): SQLiteSearchService { - if (!SQLiteSearchService.instance) { - SQLiteSearchService.instance = new SQLiteSearchService(); - } - return SQLiteSearchService.instance; - } - - /** - * Check if search tables are initialized and create them if needed - */ - private checkAndInitialize(): void { - try { - // Check if tables exist - const tableExists = sql.getValue(` - SELECT name FROM sqlite_master - WHERE type='table' AND name='note_search_content' - `); - - if (!tableExists) { - log.info("Search tables not found. They will be created by migration."); - this.isInitialized = false; - return; - } - - // Verify table structure - const columnCount = sql.getValue(` - SELECT COUNT(*) FROM pragma_table_info('note_search_content') - `) || 0; - - if (columnCount > 0) { - this.isInitialized = true; - this.statistics.tablesInitialized = true; - log.info("SQLite search service initialized successfully"); - } - } catch (error) { - log.error(`Failed to initialize SQLite search service: ${error}`); - this.isInitialized = false; - this.statistics.tablesInitialized = false; - } - } - - /** - * Main search method that delegates to appropriate operator implementation - */ - search( - tokens: string[], - operator: string, - searchContext: SearchContext, - options: SearchOptions = {} - ): Set { - if (!this.isInitialized) { - log.info("SQLite search service not initialized, falling back to traditional search"); - return new Set(); - } - - const startTime = Date.now(); - let results: Set; - - try { - // Normalize tokens for consistent searching - const normalizedTokens = tokens.map(token => normalize(token).toLowerCase()); - - // Delegate to appropriate search method based on operator - switch (operator) { - case "*=*": - results = this.searchSubstring(normalizedTokens, options); - break; - case "~=": - results = this.searchFuzzy(normalizedTokens, options); - break; - case "=*": - results = this.searchPrefix(normalizedTokens, options); - break; - case "*=": - results = this.searchSuffix(normalizedTokens, options); - break; - case "%=": - results = this.searchRegex(tokens, options); // Use original tokens for regex - break; - case "=": - results = this.searchExactWord(normalizedTokens, options); - break; - case "!=": - results = this.searchNotEquals(normalizedTokens, options); - break; - default: - log.info(`Unsupported search operator: ${operator}`); - return new Set(); - } - - const elapsed = Date.now() - startTime; - - // Update statistics - this.statistics.totalSearches++; - this.statistics.totalTimeMs += elapsed; - this.statistics.lastSearchTimeMs = elapsed; - this.statistics.averageTimeMs = this.statistics.totalTimeMs / this.statistics.totalSearches; - - if (SEARCH_CONFIG.LOG_PERFORMANCE) { - log.info(`SQLite search completed: operator=${operator}, tokens=${tokens.join(" ")}, ` + - `results=${results.size}, time=${elapsed}ms`); - } - - return results; - } catch (error) { - log.error(`SQLite search failed: ${error}`); - searchContext.addError(`Search failed: ${error}`); - return new Set(); - } - } - - /** - * Substring search using LIKE on normalized content - * Operator: *=* - */ - private searchSubstring(tokens: string[], options: SearchOptions): Set { - const results = new Set(); - - // Build WHERE clause for all tokens - const conditions = tokens.map(() => - `nsc.full_text_normalized LIKE '%' || ? || '%'` - ).join(' AND '); - - // Build base query - JOIN with notes table for isDeleted/isProtected filtering - let query = ` - SELECT DISTINCT nsc.noteId - FROM note_search_content nsc - JOIN notes n ON nsc.noteId = n.noteId - WHERE ${conditions} - `; - - const params = [...tokens]; - - // Add filters using the notes table columns - if (!options.includeDeleted) { - query += ` AND n.isDeleted = 0`; - } - - if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { - query += ` AND n.isProtected = 0`; - } - - // Add limit if specified - if (options.limit) { - query += ` LIMIT ${options.limit}`; - } - - // Execute query - for (const row of sql.iterateRows<{ noteId: string }>(query, params)) { - // Apply noteId filter if provided - if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) { - results.add(row.noteId); - } - } - - return results; - } - - /** - * Fuzzy search using edit distance on tokens - * Operator: ~= - */ - private searchFuzzy(tokens: string[], options: SearchOptions): Set { - const results = new Set(); - - // For fuzzy search, we need to check tokens individually - // First, get all note IDs that might match - let query = ` - SELECT DISTINCT nsc.noteId, nsc.full_text_normalized - FROM note_search_content nsc - JOIN notes n ON nsc.noteId = n.noteId - WHERE 1=1 - `; - - if (!options.includeDeleted) { - query += ` AND n.isDeleted = 0`; - } - - if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { - query += ` AND n.isProtected = 0`; - } - - // Process in batches for better performance - const noteData = new Map(); - - for (const row of sql.iterateRows<{ noteId: string, full_text_normalized: string }>(query)) { - if (options.noteIdFilter && !options.noteIdFilter.has(row.noteId)) { - continue; - } - - noteData.set(row.noteId, row.full_text_normalized || ''); - } - - // Get tokens for fuzzy matching - const tokenQuery = ` - SELECT DISTINCT noteId, token_normalized - FROM note_tokens - WHERE noteId IN (${Array.from(noteData.keys()).map(() => '?').join(',')}) - `; - - const noteTokens = new Map>(); - if (noteData.size > 0) { - for (const row of sql.iterateRows<{ noteId: string, token_normalized: string }>( - tokenQuery, Array.from(noteData.keys()) - )) { - if (!noteTokens.has(row.noteId)) { - noteTokens.set(row.noteId, new Set()); - } - noteTokens.get(row.noteId)!.add(row.token_normalized); - } - } - - // Now check each note for fuzzy matches - for (const [noteId, content] of noteData) { - let allTokensMatch = true; - const noteTokenSet = noteTokens.get(noteId) || new Set(); - - for (const searchToken of tokens) { - let tokenMatches = false; - - // Check if token matches any word in the note - // First check exact match in content - if (content.includes(searchToken)) { - tokenMatches = true; - } else { - // Check fuzzy match against tokens - for (const noteToken of noteTokenSet) { - if (this.fuzzyMatchTokens(searchToken, noteToken)) { - tokenMatches = true; - break; - } - } - } - - if (!tokenMatches) { - allTokensMatch = false; - break; - } - } - - if (allTokensMatch) { - results.add(noteId); - - if (options.limit && results.size >= options.limit) { - break; - } - } - } - - return results; - } - - /** - * Helper method for fuzzy matching between two tokens - */ - private fuzzyMatchTokens(token1: string, token2: string): boolean { - // Quick exact match check - if (token1 === token2) { - return true; - } - - // Don't fuzzy match very short tokens - if (token1.length < SEARCH_CONFIG.MIN_TOKEN_LENGTH || - token2.length < SEARCH_CONFIG.MIN_TOKEN_LENGTH) { - return false; - } - - // Check if length difference is within edit distance threshold - if (Math.abs(token1.length - token2.length) > SEARCH_CONFIG.MAX_EDIT_DISTANCE) { - return false; - } - - // Use SQL function for edit distance calculation - const distance = sql.getValue(` - SELECT edit_distance(?, ?, ?) - `, [token1, token2, SEARCH_CONFIG.MAX_EDIT_DISTANCE]); - - return distance <= SEARCH_CONFIG.MAX_EDIT_DISTANCE; - } - - /** - * Prefix search using LIKE with prefix pattern - * Operator: =* - */ - private searchPrefix(tokens: string[], options: SearchOptions): Set { - const results = new Set(); - - // Build WHERE clause for all tokens - const conditions = tokens.map(() => - `nsc.full_text_normalized LIKE ? || '%'` - ).join(' AND '); - - // Build query - JOIN with notes table for isDeleted/isProtected filtering - let query = ` - SELECT DISTINCT nsc.noteId - FROM note_search_content nsc - JOIN notes n ON nsc.noteId = n.noteId - WHERE ${conditions} - `; - - const params = [...tokens]; - - // Add filters using the notes table columns - if (!options.includeDeleted) { - query += ` AND n.isDeleted = 0`; - } - - if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { - query += ` AND n.isProtected = 0`; - } - - // Add limit if specified - if (options.limit) { - query += ` LIMIT ${options.limit}`; - } - - // Execute query - for (const row of sql.iterateRows<{ noteId: string }>(query, params)) { - if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) { - results.add(row.noteId); - } - } - - return results; - } - - /** - * Suffix search using LIKE with suffix pattern - * Operator: *= - */ - private searchSuffix(tokens: string[], options: SearchOptions): Set { - const results = new Set(); - - // Build WHERE clause for all tokens - const conditions = tokens.map(() => - `nsc.full_text_normalized LIKE '%' || ?` - ).join(' AND '); - - // Build query - JOIN with notes table for isDeleted/isProtected filtering - let query = ` - SELECT DISTINCT nsc.noteId - FROM note_search_content nsc - JOIN notes n ON nsc.noteId = n.noteId - WHERE ${conditions} - `; - - const params = [...tokens]; - - // Add filters using the notes table columns - if (!options.includeDeleted) { - query += ` AND n.isDeleted = 0`; - } - - if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { - query += ` AND n.isProtected = 0`; - } - - // Add limit if specified - if (options.limit) { - query += ` LIMIT ${options.limit}`; - } - - // Execute query - for (const row of sql.iterateRows<{ noteId: string }>(query, params)) { - if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) { - results.add(row.noteId); - } - } - - return results; - } - - /** - * Regex search using regex_match function - * Operator: %= - */ - private searchRegex(patterns: string[], options: SearchOptions): Set { - const results = new Set(); - - // For regex, we use the combined title+content (not normalized) - // Build WHERE clause for all patterns - const conditions = patterns.map(() => - `regex_match(nsc.title || ' ' || nsc.content, ?, 'ims') = 1` - ).join(' AND '); - - // Build query - JOIN with notes table for isDeleted/isProtected filtering - let query = ` - SELECT DISTINCT nsc.noteId - FROM note_search_content nsc - JOIN notes n ON nsc.noteId = n.noteId - WHERE ${conditions} - `; - - const params = [...patterns]; - - // Add filters using the notes table columns - if (!options.includeDeleted) { - query += ` AND n.isDeleted = 0`; - } - - if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { - query += ` AND n.isProtected = 0`; - } - - // Add limit if specified - if (options.limit) { - query += ` LIMIT ${options.limit}`; - } - - // Execute query - try { - for (const row of sql.iterateRows<{ noteId: string }>(query, params)) { - if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) { - results.add(row.noteId); - } - } - } catch (error) { - log.error(`Regex search failed: ${error}`); - // Return empty set on regex error - } - - return results; - } - - /** - * Exact word search using FTS5 or token matching - * Operator: = - */ - private searchExactWord(tokens: string[], options: SearchOptions): Set { - const results = new Set(); - - // Try FTS5 first if available - const fts5Available = this.checkFTS5Availability(); - - if (fts5Available) { - try { - // Build FTS5 query - const ftsQuery = tokens.map(t => `"${t}"`).join(' '); - - // FTS5 doesn't have isDeleted or isProtected columns, - // so we need to join with notes table for filtering - let query = ` - SELECT DISTINCT f.noteId - FROM notes_fts f - JOIN notes n ON f.noteId = n.noteId - WHERE f.notes_fts MATCH ? - `; - - const params = [ftsQuery]; - - // Add filters using the notes table columns - if (!options.includeDeleted) { - query += ` AND n.isDeleted = 0`; - } - - if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { - query += ` AND n.isProtected = 0`; - } - - // Add limit if specified - if (options.limit) { - query += ` LIMIT ${options.limit}`; - } - - for (const row of sql.iterateRows<{ noteId: string }>(query, params)) { - if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) { - results.add(row.noteId); - } - } - - return results; - } catch (error) { - log.info(`FTS5 search failed, falling back to token search: ${error}`); - } - } - - // Fallback to token-based exact match - // Build query to check if all tokens exist as whole words - let query = ` - SELECT DISTINCT nt.noteId, nt.token_normalized - FROM note_tokens nt - JOIN notes n ON nt.noteId = n.noteId - WHERE 1=1 - `; - - if (!options.includeDeleted) { - query += ` AND n.isDeleted = 0`; - } - - if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { - query += ` AND n.isProtected = 0`; - } - - // Get all matching notes and their tokens - const candidateNotes = new Map>(); - - for (const row of sql.iterateRows<{ noteId: string, token_normalized: string }>(query)) { - if (options.noteIdFilter && !options.noteIdFilter.has(row.noteId)) { - continue; - } - - if (!candidateNotes.has(row.noteId)) { - candidateNotes.set(row.noteId, new Set()); - } - candidateNotes.get(row.noteId)!.add(row.token_normalized); - } - - // Check each candidate for exact token matches - for (const [noteId, noteTokenSet] of candidateNotes) { - const allTokensFound = tokens.every(token => noteTokenSet.has(token)); - - if (allTokensFound) { - results.add(noteId); - - if (options.limit && results.size >= options.limit) { - break; - } - } - } - - return results; - } - - /** - * Not equals search - inverse of exact word search - * Operator: != - */ - private searchNotEquals(tokens: string[], options: SearchOptions): Set { - // Get all notes that DON'T match the exact word search - const matchingNotes = this.searchExactWord(tokens, options); - - // Get all notes - JOIN with notes table for isDeleted/isProtected filtering - let query = ` - SELECT DISTINCT nsc.noteId - FROM note_search_content nsc - JOIN notes n ON nsc.noteId = n.noteId - WHERE 1=1 - `; - - if (!options.includeDeleted) { - query += ` AND n.isDeleted = 0`; - } - - if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { - query += ` AND n.isProtected = 0`; - } - - const allNotes = new Set(); - for (const row of sql.iterateRows<{ noteId: string }>(query)) { - if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) { - allNotes.add(row.noteId); - } - } - - // Return the difference - const results = new Set(); - for (const noteId of allNotes) { - if (!matchingNotes.has(noteId)) { - results.add(noteId); - - if (options.limit && results.size >= options.limit) { - break; - } - } - } - - return results; - } - - /** - * Check if FTS5 is available - */ - private checkFTS5Availability(): boolean { - try { - const result = sql.getValue(` - SELECT name FROM sqlite_master - WHERE type='table' AND name='notes_fts' - `); - return !!result; - } catch { - return false; - } - } - - /** - * Search with multiple operators (for complex queries) - */ - searchMultiple( - queries: Array<{ tokens: string[], operator: string }>, - combineMode: 'AND' | 'OR', - searchContext: SearchContext, - options: SearchOptions = {} - ): Set { - if (queries.length === 0) { - return new Set(); - } - - const resultSets = queries.map(q => - this.search(q.tokens, q.operator, searchContext, options) - ); - - if (combineMode === 'AND') { - // Intersection of all result sets - return resultSets.reduce((acc, set) => { - const intersection = new Set(); - for (const item of acc) { - if (set.has(item)) { - intersection.add(item); - } - } - return intersection; - }); - } else { - // Union of all result sets - return resultSets.reduce((acc, set) => { - for (const item of set) { - acc.add(item); - } - return acc; - }, new Set()); - } - } - - /** - * Get search statistics for monitoring - */ - getStatistics() { - // Return the in-memory statistics object which includes performance data - return { - ...this.statistics, - indexedNotes: this.isInitialized ? this.getIndexedNotesCount() : 0, - totalTokens: this.isInitialized ? this.getTotalTokensCount() : 0, - fts5Available: this.isInitialized ? this.checkFTS5Availability() : false - }; - } - - /** - * Get count of indexed notes - */ - private getIndexedNotesCount(): number { - try { - return sql.getValue(` - SELECT COUNT(DISTINCT nsc.noteId) - FROM note_search_content nsc - JOIN notes n ON nsc.noteId = n.noteId - WHERE n.isDeleted = 0 - `) || 0; - } catch { - return 0; - } - } - - /** - * Get total tokens count - */ - private getTotalTokensCount(): number { - try { - return sql.getValue(` - SELECT COUNT(*) FROM note_tokens - `) || 0; - } catch { - return 0; - } - } - - /** - * Rebuild search index for a specific note - */ - rebuildNoteIndex(noteId: string): void { - if (!this.isInitialized) { - log.info("Cannot rebuild index - search tables not initialized"); - return; - } - - try { - // This will be handled by triggers automatically - // But we can force an update by touching the note - sql.execute(` - UPDATE notes - SET dateModified = strftime('%Y-%m-%d %H:%M:%S.%f', 'now') - WHERE noteId = ? - `, [noteId]); - - log.info(`Rebuilt search index for note ${noteId}`); - } catch (error) { - log.error(`Failed to rebuild index for note ${noteId}: ${error}`); - } - } - - /** - * Clear search index (for testing/maintenance) - */ - clearIndex(): void { - if (!this.isInitialized) { - return; - } - - try { - sql.execute(`DELETE FROM note_search_content`); - sql.execute(`DELETE FROM note_tokens`); - - if (this.checkFTS5Availability()) { - sql.execute(`DELETE FROM notes_fts`); - } - - log.info("Search index cleared"); - } catch (error) { - log.error(`Failed to clear search index: ${error}`); - } - } - - /** - * Get detailed index status information - */ - async getIndexStatus(): Promise<{ - initialized: boolean; - tablesExist: boolean; - indexedNotes: number; - totalNotes: number; - totalTokens: number; - fts5Available: boolean; - lastRebuild?: string; - coverage: number; - }> { - const tablesExist = this.isInitialized; - - if (!tablesExist) { - return { - initialized: false, - tablesExist: false, - indexedNotes: 0, - totalNotes: 0, - totalTokens: 0, - fts5Available: false, - coverage: 0 - }; - } - - // Get total indexable notes - const totalNotes = sql.getValue(` - SELECT COUNT(*) - FROM notes - WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND isDeleted = 0 - AND isProtected = 0 - `) || 0; - - // Get indexed notes count - const indexedNotes = sql.getValue(` - SELECT COUNT(DISTINCT nsc.noteId) - FROM note_search_content nsc - JOIN notes n ON nsc.noteId = n.noteId - WHERE n.isDeleted = 0 - `) || 0; - - // Get token count - const totalTokens = sql.getValue(` - SELECT COUNT(*) FROM note_tokens - `) || 0; - - // Calculate coverage percentage - const coverage = totalNotes > 0 ? (indexedNotes / totalNotes) * 100 : 0; - - return { - initialized: true, - tablesExist: true, - indexedNotes, - totalNotes, - totalTokens, - fts5Available: this.checkFTS5Availability(), - coverage: Math.round(coverage * 100) / 100 - }; - } - - /** - * Rebuild the entire search index - */ - async rebuildIndex(force: boolean = false): Promise { - if (!this.isInitialized && !force) { - throw new Error("Search tables not initialized. Use force=true to create tables."); - } - - log.info("Starting search index rebuild..."); - const startTime = Date.now(); - - try { - // Clear existing index - this.clearIndex(); - - // Rebuild from all notes - const batchSize = 100; - let offset = 0; - let totalProcessed = 0; - - while (true) { - const notes = sql.getRows<{ - noteId: string; - title: string; - type: string; - mime: string; - content: string | null; - }>(` - SELECT - n.noteId, - n.title, - n.type, - n.mime, - b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.isDeleted = 0 - AND n.isProtected = 0 - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - ORDER BY n.noteId - LIMIT ? OFFSET ? - `, [batchSize, offset]); - - if (notes.length === 0) { - break; - } - - // Process batch - trigger will handle the actual indexing - for (const note of notes) { - try { - // Touch the note to trigger re-indexing - sql.execute(` - UPDATE notes - SET dateModified = strftime('%Y-%m-%d %H:%M:%S.%f', 'now') - WHERE noteId = ? - `, [note.noteId]); - - totalProcessed++; - } catch (error) { - log.error(`Failed to reindex note ${note.noteId}: ${error}`); - } - } - - offset += batchSize; - - if (totalProcessed % 1000 === 0) { - log.info(`Reindexed ${totalProcessed} notes...`); - } - } - - const duration = Date.now() - startTime; - log.info(`Index rebuild completed: ${totalProcessed} notes in ${duration}ms`); - - } catch (error) { - log.error(`Index rebuild failed: ${error}`); - throw error; - } - } -} - -// Export singleton instance getter -export function getSQLiteSearchService(): SQLiteSearchService { - return SQLiteSearchService.getInstance(); -} - -// Export default getter function (not the instance, to avoid initialization issues) -export default getSQLiteSearchService; \ No newline at end of file diff --git a/apps/server/src/services/search/sqlite_search_utils.ts b/apps/server/src/services/search/sqlite_search_utils.ts deleted file mode 100644 index 414aaf290..000000000 --- a/apps/server/src/services/search/sqlite_search_utils.ts +++ /dev/null @@ -1,471 +0,0 @@ -/** - * SQLite Search Utilities - * - * Helper functions and utilities for SQLite-based search operations. - * These utilities provide common functionality needed by the search service - * and help with data preparation, validation, and performance monitoring. - */ - -import sql from "../sql.js"; -import log from "../log.js"; -import { normalize, stripTags } from "../utils.js"; - -/** - * Configuration for search utilities - */ -export const SEARCH_UTILS_CONFIG = { - BATCH_SIZE: 1000, - MAX_CONTENT_SIZE: 2 * 1024 * 1024, // 2MB - MIN_TOKEN_LENGTH: 2, - MAX_TOKEN_LENGTH: 100, - LOG_SLOW_QUERIES: true, - SLOW_QUERY_THRESHOLD: 100, // ms -} as const; - -/** - * Interface for note content data - */ -export interface NoteContentData { - noteId: string; - title: string; - content: string; - type: string; - mime: string; - isProtected: boolean; - isDeleted: boolean; -} - -/** - * Normalize text for search indexing - * Ensures consistent normalization across all search operations - */ -export function normalizeForSearch(text: string | null | undefined): string { - if (!text || typeof text !== 'string') { - return ''; - } - - // Use the standard normalize function and convert to lowercase - return normalize(text).toLowerCase(); -} - -/** - * Tokenize text into searchable words - * Handles camelCase, snake_case, and special characters - */ -export function tokenizeText(text: string | null | undefined): string[] { - if (!text || typeof text !== 'string') { - return []; - } - - const tokens = new Set(); - - // Split on word boundaries - const words = text - .split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:-]+/) - .filter(word => word.length >= SEARCH_UTILS_CONFIG.MIN_TOKEN_LENGTH && - word.length <= SEARCH_UTILS_CONFIG.MAX_TOKEN_LENGTH); - - for (const word of words) { - // Add the original word (lowercase) - tokens.add(word.toLowerCase()); - - // Handle snake_case - const snakeParts = word.split('_').filter(part => part.length > 0); - if (snakeParts.length > 1) { - for (const part of snakeParts) { - tokens.add(part.toLowerCase()); - - // Also handle camelCase within snake_case parts - const camelParts = splitCamelCase(part); - for (const camelPart of camelParts) { - if (camelPart.length >= SEARCH_UTILS_CONFIG.MIN_TOKEN_LENGTH) { - tokens.add(camelPart.toLowerCase()); - } - } - } - } else { - // Handle camelCase - const camelParts = splitCamelCase(word); - for (const part of camelParts) { - if (part.length >= SEARCH_UTILS_CONFIG.MIN_TOKEN_LENGTH) { - tokens.add(part.toLowerCase()); - } - } - } - } - - return Array.from(tokens); -} - -/** - * Split camelCase strings into parts - */ -function splitCamelCase(str: string): string[] { - // Split on transitions from lowercase to uppercase - // Also handle sequences of uppercase letters (e.g., "XMLParser" -> ["XML", "Parser"]) - return str.split(/(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])/); -} - -/** - * Process HTML content for indexing - * Removes tags and normalizes the text - */ -export function processHtmlContent(html: string | null | undefined): string { - if (!html || typeof html !== 'string') { - return ''; - } - - // Remove script and style content - let text = html.replace(/)<[^<]*)*<\/script>/gi, ''); - text = text.replace(/)<[^<]*)*<\/style>/gi, ''); - - // Strip remaining tags - text = stripTags(text); - - // Decode HTML entities - text = text.replace(/ /g, ' '); - text = text.replace(/</g, '<'); - text = text.replace(/>/g, '>'); - text = text.replace(/&/g, '&'); - text = text.replace(/"/g, '"'); - text = text.replace(/'/g, "'"); - text = text.replace(/'/g, "'"); - - // Normalize whitespace - text = text.replace(/\s+/g, ' ').trim(); - - return text; -} - -/** - * Process JSON content (e.g., mindmaps, canvas) for indexing - */ -export function processJsonContent(json: string | null | undefined, type: string): string { - if (!json || typeof json !== 'string') { - return ''; - } - - try { - const data = JSON.parse(json); - - if (type === 'mindMap') { - return extractMindMapText(data); - } else if (type === 'canvas') { - return extractCanvasText(data); - } - - // For other JSON types, try to extract text content - return extractTextFromObject(data); - } catch (error) { - log.info(`Failed to process JSON content: ${error}`); - return ''; - } -} - -/** - * Extract text from mindmap JSON structure - */ -function extractMindMapText(data: any): string { - const texts: string[] = []; - - function collectTopics(node: any): void { - if (!node) return; - - if (node.topic) { - texts.push(node.topic); - } - - if (node.children && Array.isArray(node.children)) { - for (const child of node.children) { - collectTopics(child); - } - } - } - - if (data.nodedata) { - collectTopics(data.nodedata); - } - - return texts.join(' '); -} - -/** - * Extract text from canvas JSON structure - */ -function extractCanvasText(data: any): string { - const texts: string[] = []; - - if (data.elements && Array.isArray(data.elements)) { - for (const element of data.elements) { - if (element.type === 'text' && element.text) { - texts.push(element.text); - } - } - } - - return texts.join(' '); -} - -/** - * Generic text extraction from JSON objects - */ -function extractTextFromObject(obj: any, maxDepth = 10): string { - if (maxDepth <= 0) return ''; - - const texts: string[] = []; - - if (typeof obj === 'string') { - return obj; - } else if (Array.isArray(obj)) { - for (const item of obj) { - const text = extractTextFromObject(item, maxDepth - 1); - if (text) texts.push(text); - } - } else if (typeof obj === 'object' && obj !== null) { - for (const key of Object.keys(obj)) { - // Look for common text field names - if (['text', 'content', 'value', 'title', 'name', 'label', 'description'].includes(key.toLowerCase())) { - const value = obj[key]; - if (typeof value === 'string') { - texts.push(value); - } - } else { - const text = extractTextFromObject(obj[key], maxDepth - 1); - if (text) texts.push(text); - } - } - } - - return texts.join(' '); -} - -/** - * Prepare note content for indexing - * Handles different note types and formats - */ -export function prepareNoteContent(note: NoteContentData): { - normalizedContent: string; - normalizedTitle: string; - tokens: string[]; -} { - let content = note.content; - - // Process content based on type - if (note.type === 'text' && note.mime === 'text/html') { - content = processHtmlContent(content); - } else if ((note.type === 'mindMap' || note.type === 'canvas') && note.mime === 'application/json') { - content = processJsonContent(content, note.type); - } - - // Check content size - if (content.length > SEARCH_UTILS_CONFIG.MAX_CONTENT_SIZE) { - log.info(`Note ${note.noteId} content exceeds max size (${content.length} bytes), truncating`); - content = content.substring(0, SEARCH_UTILS_CONFIG.MAX_CONTENT_SIZE); - } - - // Normalize content and title - const normalizedContent = normalizeForSearch(content); - const normalizedTitle = normalizeForSearch(note.title); - - // Generate tokens from both content and title - const allText = `${note.title} ${content}`; - const tokens = tokenizeText(allText); - - return { - normalizedContent, - normalizedTitle, - tokens - }; -} - -/** - * Update search index for a single note - */ -export async function updateNoteSearchIndex(noteId: string): Promise { - try { - // Get note data - const noteData = sql.getRow(` - SELECT n.noteId, n.title, b.content, n.type, n.mime, n.isProtected, n.isDeleted - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.noteId = ? - `, [noteId]); - - if (!noteData) { - log.info(`Note ${noteId} not found for indexing`); - return; - } - - // Prepare content for indexing - const { normalizedContent, normalizedTitle, tokens } = prepareNoteContent(noteData); - - // Update search content table - // Note: note_search_content doesn't have isProtected/isDeleted columns - // Those are in the notes table which we join with - sql.execute(` - INSERT OR REPLACE INTO note_search_content - (noteId, title, content, title_normalized, content_normalized, full_text_normalized) - VALUES (?, ?, ?, ?, ?, ?) - `, [noteId, noteData.title, noteData.content || '', - normalizedTitle, normalizedContent, - normalizedTitle + ' ' + normalizedContent]); - - // Delete existing tokens for this note - sql.execute(`DELETE FROM note_tokens WHERE noteId = ?`, [noteId]); - - // Insert new tokens with proper structure - let position = 0; - for (const token of tokens) { - sql.execute(` - INSERT INTO note_tokens (noteId, token, token_normalized, position, source) - VALUES (?, ?, ?, ?, 'content') - `, [noteId, token, normalizeForSearch(token), position]); - position++; - } - - log.info(`Updated search index for note ${noteId}`); - } catch (error) { - log.error(`Failed to update search index for note ${noteId}: ${error}`); - throw error; - } -} - -/** - * Batch update search index for multiple notes - */ -export async function batchUpdateSearchIndex(noteIds: string[]): Promise { - const startTime = Date.now(); - let successCount = 0; - let errorCount = 0; - - // Process in batches - for (let i = 0; i < noteIds.length; i += SEARCH_UTILS_CONFIG.BATCH_SIZE) { - const batch = noteIds.slice(i, i + SEARCH_UTILS_CONFIG.BATCH_SIZE); - - try { - sql.transactional(() => { - for (const noteId of batch) { - try { - updateNoteSearchIndex(noteId); - successCount++; - } catch (error) { - log.error(`Failed to index note ${noteId}: ${error}`); - errorCount++; - } - } - }); - } catch (error) { - log.error(`Batch indexing failed: ${error}`); - errorCount += batch.length; - } - } - - const elapsed = Date.now() - startTime; - log.info(`Batch search indexing completed: ${successCount} success, ${errorCount} errors, ${elapsed}ms`); -} - -/** - * Verify search index integrity - */ -export function verifySearchIndex(): { - valid: boolean; - issues: string[]; - stats: { - totalNotes: number; - indexedNotes: number; - missingFromIndex: number; - orphanedEntries: number; - }; -} { - const issues: string[] = []; - - // Count total notes - const totalNotes = sql.getValue(` - SELECT COUNT(*) FROM notes WHERE isDeleted = 0 - `) || 0; - - // Count indexed notes - JOIN with notes table for isDeleted filter - const indexedNotes = sql.getValue(` - SELECT COUNT(DISTINCT nsc.noteId) - FROM note_search_content nsc - JOIN notes n ON nsc.noteId = n.noteId - WHERE n.isDeleted = 0 - `) || 0; - - // Find notes missing from index - const missingNotes = sql.getColumn(` - SELECT noteId FROM notes - WHERE isDeleted = 0 - AND noteId NOT IN (SELECT noteId FROM note_search_content) - `); - - if (missingNotes.length > 0) { - issues.push(`${missingNotes.length} notes missing from search index`); - } - - // Find orphaned index entries - const orphanedEntries = sql.getColumn(` - SELECT noteId FROM note_search_content - WHERE noteId NOT IN (SELECT noteId FROM notes) - `); - - if (orphanedEntries.length > 0) { - issues.push(`${orphanedEntries.length} orphaned entries in search index`); - } - - // Check token table consistency - const tokenMismatch = sql.getValue(` - SELECT COUNT(*) FROM note_search_content - WHERE noteId NOT IN (SELECT noteId FROM note_tokens) - `) || 0; - - if (tokenMismatch > 0) { - issues.push(`${tokenMismatch} notes missing from token index`); - } - - return { - valid: issues.length === 0, - issues, - stats: { - totalNotes, - indexedNotes, - missingFromIndex: missingNotes.length, - orphanedEntries: orphanedEntries.length - } - }; -} - -/** - * Performance monitoring wrapper for search queries - */ -export function monitorQuery( - queryName: string, - queryFn: () => T -): T { - const startTime = Date.now(); - - try { - const result = queryFn(); - - const elapsed = Date.now() - startTime; - if (SEARCH_UTILS_CONFIG.LOG_SLOW_QUERIES && elapsed > SEARCH_UTILS_CONFIG.SLOW_QUERY_THRESHOLD) { - log.info(`Slow search query detected: ${queryName} took ${elapsed}ms`); - } - - return result; - } catch (error) { - const elapsed = Date.now() - startTime; - log.error(`Search query failed: ${queryName} after ${elapsed}ms - ${error}`); - throw error; - } -} - -/** - * Export utility functions for testing - */ -export const testUtils = { - splitCamelCase, - extractMindMapText, - extractCanvasText, - extractTextFromObject -}; \ No newline at end of file diff --git a/apps/server/src/services/search/verify_sqlite_search.ts b/apps/server/src/services/search/verify_sqlite_search.ts deleted file mode 100644 index 34e78a667..000000000 --- a/apps/server/src/services/search/verify_sqlite_search.ts +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/env ts-node - -/** - * Verification script for SQLite search implementation - * - * This script checks: - * 1. If migration 0235 has run (tables exist) - * 2. If SQL functions are registered - * 3. If search queries work correctly - * 4. Performance comparison between SQLite and TypeScript - */ - -import sql from "../sql.js"; -import log from "../log.js"; -import { getSQLiteSearchService } from "./sqlite_search_service.js"; -import SearchContext from "./search_context.js"; -import becca from "../../becca/becca.js"; - -async function verifyTables(): Promise { - console.log("\n=== Checking Database Tables ==="); - - const tables = [ - { name: 'note_search_content', required: true }, - { name: 'note_tokens', required: true }, - { name: 'notes_fts', required: false } // From migration 0234 - ]; - - let allExist = true; - - for (const table of tables) { - const exists = sql.getValue(` - SELECT COUNT(*) FROM sqlite_master - WHERE type='table' AND name=? - `, [table.name]) > 0; - - const status = exists ? '✓' : '✗'; - const requiredText = table.required ? ' (REQUIRED)' : ' (optional)'; - console.log(` ${status} ${table.name}${requiredText}`); - - if (table.required && !exists) { - allExist = false; - } - } - - if (!allExist) { - console.log("\n❌ Required tables are missing!"); - console.log(" Migration 0235 needs to run."); - console.log(" The APP_DB_VERSION has been updated to 235."); - console.log(" Restart the server to run the migration."); - } - - return allExist; -} - -async function verifyFunctions(): Promise { - console.log("\n=== Checking SQL Functions ==="); - - const functions = [ - { name: 'normalize_text', test: "SELECT normalize_text('Café')" }, - { name: 'edit_distance', test: "SELECT edit_distance('test', 'text', 2)" }, - { name: 'regex_match', test: "SELECT regex_match('test', 'testing')" }, - { name: 'tokenize_text', test: "SELECT tokenize_text('hello world')" }, - { name: 'strip_html', test: "SELECT strip_html('

test

')" } - ]; - - let allWork = true; - - for (const func of functions) { - try { - const result = sql.getValue(func.test); - console.log(` ✓ ${func.name} - Result: ${result}`); - } catch (error: any) { - console.log(` ✗ ${func.name} - Error: ${error.message}`); - allWork = false; - } - } - - if (!allWork) { - console.log("\n⚠️ Some SQL functions are not working."); - console.log(" They should be registered when the server starts."); - } - - return allWork; -} - -async function verifySearchContent(): Promise { - console.log("\n=== Checking Search Index Content ==="); - - const noteCount = sql.getValue(` - SELECT COUNT(*) FROM notes - WHERE isDeleted = 0 AND isProtected = 0 - `) || 0; - - const indexedCount = sql.getValue(` - SELECT COUNT(*) FROM note_search_content - `) || 0; - - const tokenCount = sql.getValue(` - SELECT COUNT(DISTINCT noteId) FROM note_tokens - `) || 0; - - console.log(` Notes eligible for indexing: ${noteCount}`); - console.log(` Notes in search index: ${indexedCount}`); - console.log(` Notes with tokens: ${tokenCount}`); - - if (indexedCount === 0 && noteCount > 0) { - console.log("\n⚠️ Search index is empty but there are notes to index."); - console.log(" The migration should populate the index automatically."); - } else if (indexedCount < noteCount) { - console.log("\n⚠️ Some notes are not indexed."); - console.log(` Missing: ${noteCount - indexedCount} notes`); - } else { - console.log("\n✓ Search index is populated"); - } -} - -async function testSearch(): Promise { - console.log("\n=== Testing Search Functionality ==="); - - // Initialize becca if needed - if (!becca.loaded) { - console.log(" Loading becca..."); - // Note: becca may not have a load method in this version - } - - const searchService = getSQLiteSearchService(); - const searchContext = new SearchContext({ - fastSearch: false, - includeArchivedNotes: false, - fuzzyAttributeSearch: false, - debug: false - }); - - // Test different operators - const tests = [ - { operator: '*=*', tokens: ['note'], description: 'Substring search' }, - { operator: '=*', tokens: ['test'], description: 'Prefix search' }, - { operator: '*=', tokens: ['ing'], description: 'Suffix search' }, - { operator: '~=', tokens: ['nite'], description: 'Fuzzy search' } - ]; - - for (const test of tests) { - try { - console.log(`\n Testing ${test.description} (${test.operator}):`); - const startTime = Date.now(); - const results = searchService.search(test.tokens, test.operator, searchContext); - const duration = Date.now() - startTime; - const resultCount = Array.isArray(results) ? results.length : results.size || 0; - console.log(` Found ${resultCount} results in ${duration}ms`); - - if (resultCount > 0) { - const sampleResults = Array.isArray(results) ? results.slice(0, 3) : Array.from(results).slice(0, 3); - console.log(` Sample results: ${sampleResults.join(', ')}...`); - } - } catch (error: any) { - console.log(` ✗ Error: ${error.message}`); - } - } -} - -async function main() { - console.log("========================================"); - console.log(" SQLite Search Implementation Test"); - console.log("========================================"); - - try { - // Check current database version - const currentDbVersion = sql.getValue("SELECT value FROM options WHERE name = 'dbVersion'") || 0; - console.log(`\nCurrent database version: ${currentDbVersion}`); - console.log(`Target database version: 235`); - - if (currentDbVersion < 235) { - console.log("\n⚠️ Database needs migration from version " + currentDbVersion + " to 235"); - console.log(" Restart the server to run migrations."); - return; - } - - // Verify tables exist - const tablesExist = await verifyTables(); - if (!tablesExist) { - return; - } - - // Verify functions work - const functionsWork = await verifyFunctions(); - - // Check index content - await verifySearchContent(); - - // Test search if everything is ready - if (tablesExist && functionsWork) { - await testSearch(); - } - - console.log("\n========================================"); - console.log(" Test Complete"); - console.log("========================================"); - - if (tablesExist && functionsWork) { - console.log("\n✅ SQLite search implementation is ready!"); - console.log("\nTo enable SQLite search:"); - console.log(" 1. Set searchBackend option to 'sqlite'"); - console.log(" 2. Or use the admin API: PUT /api/search-admin/config"); - } else { - console.log("\n❌ SQLite search is not ready. See issues above."); - } - - } catch (error: any) { - console.error("\n❌ Test failed with error:", error); - console.error(error.stack); - } -} - -// Run if executed directly -if (require.main === module) { - main().then(() => process.exit(0)).catch(() => process.exit(1)); -} - -export { verifyTables, verifyFunctions, testSearch }; \ No newline at end of file diff --git a/packages/commons/src/lib/options_interface.ts b/packages/commons/src/lib/options_interface.ts index fe91fb82a..7671d4315 100644 --- a/packages/commons/src/lib/options_interface.ts +++ b/packages/commons/src/lib/options_interface.ts @@ -136,14 +136,6 @@ export interface OptionDefinitions extends KeyboardShortcutsOptions