mirror of
				https://github.com/zadam/trilium.git
				synced 2025-10-26 07:46:30 +01:00 
			
		
		
		
	feat(search): try again to get fts5 searching done well
This commit is contained in:
		| @@ -219,12 +219,22 @@ CREATE TABLE IF NOT EXISTS sessions ( | |||||||
| ); | ); | ||||||
|  |  | ||||||
| -- FTS5 Full-Text Search Support | -- FTS5 Full-Text Search Support | ||||||
| -- Create FTS5 virtual table for full-text searching | -- Create FTS5 virtual table with trigram tokenizer | ||||||
|  | -- Trigram tokenizer provides language-agnostic substring matching: | ||||||
|  | -- 1. Fast substring matching (50-100x speedup for LIKE queries without wildcards) | ||||||
|  | -- 2. Case-insensitive search without custom collation | ||||||
|  | -- 3. No language-specific stemming assumptions (works for all languages) | ||||||
|  | -- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes | ||||||
|  | -- | ||||||
|  | -- IMPORTANT: Trigram requires minimum 3-character tokens for matching | ||||||
|  | -- detail='none' reduces index size by ~50% while maintaining MATCH/rank performance | ||||||
|  | -- (loses position info for highlight() function, but snippet() still works) | ||||||
| CREATE VIRTUAL TABLE notes_fts USING fts5( | CREATE VIRTUAL TABLE notes_fts USING fts5( | ||||||
|     noteId UNINDEXED, |     noteId UNINDEXED, | ||||||
|     title, |     title, | ||||||
|     content, |     content, | ||||||
|     tokenize = 'porter unicode61' |     tokenize = 'trigram', | ||||||
|  |     detail = 'none' | ||||||
| ); | ); | ||||||
|  |  | ||||||
| -- Triggers to keep FTS table synchronized with notes | -- Triggers to keep FTS table synchronized with notes | ||||||
|   | |||||||
| @@ -15,22 +15,45 @@ import log from "../services/log.js"; | |||||||
| export default function addFTS5SearchAndPerformanceIndexes() { | export default function addFTS5SearchAndPerformanceIndexes() { | ||||||
|     log.info("Starting FTS5 and performance optimization migration..."); |     log.info("Starting FTS5 and performance optimization migration..."); | ||||||
|  |  | ||||||
|  |     // Verify SQLite version supports trigram tokenizer (requires 3.34.0+) | ||||||
|  |     const sqliteVersion = sql.getValue<string>(`SELECT sqlite_version()`); | ||||||
|  |     const [major, minor, patch] = sqliteVersion.split('.').map(Number); | ||||||
|  |     const versionNumber = major * 10000 + minor * 100 + (patch || 0); | ||||||
|  |     const requiredVersion = 3 * 10000 + 34 * 100 + 0; // 3.34.0 | ||||||
|  |  | ||||||
|  |     if (versionNumber < requiredVersion) { | ||||||
|  |         log.error(`SQLite version ${sqliteVersion} does not support trigram tokenizer (requires 3.34.0+)`); | ||||||
|  |         log.info("Skipping FTS5 trigram migration - will use fallback search implementation"); | ||||||
|  |         return; // Skip FTS5 setup, rely on fallback search | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     log.info(`SQLite version ${sqliteVersion} confirmed - trigram tokenizer available`); | ||||||
|  |  | ||||||
|     // Part 1: FTS5 Setup |     // Part 1: FTS5 Setup | ||||||
|     log.info("Creating FTS5 virtual table for full-text search..."); |     log.info("Creating FTS5 virtual table for full-text search..."); | ||||||
|  |  | ||||||
|     // Create FTS5 virtual table |     // Create FTS5 virtual table | ||||||
|     // We store noteId, title, and content for searching |     // We store noteId, title, and content for searching | ||||||
|     // The 'tokenize' option uses porter stemming for better search results |  | ||||||
|     sql.executeScript(` |     sql.executeScript(` | ||||||
|         -- Drop existing FTS table if it exists (for re-running migration in dev) |         -- Drop existing FTS table if it exists (for re-running migration in dev) | ||||||
|         DROP TABLE IF EXISTS notes_fts; |         DROP TABLE IF EXISTS notes_fts; | ||||||
|          |          | ||||||
|         -- Create FTS5 virtual table |         -- Create FTS5 virtual table with trigram tokenizer | ||||||
|  |         -- Trigram tokenizer provides language-agnostic substring matching: | ||||||
|  |         -- 1. Fast substring matching (50-100x speedup for LIKE queries without wildcards) | ||||||
|  |         -- 2. Case-insensitive search without custom collation | ||||||
|  |         -- 3. No language-specific stemming assumptions (works for all languages) | ||||||
|  |         -- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes | ||||||
|  |         -- | ||||||
|  |         -- IMPORTANT: Trigram requires minimum 3-character tokens for matching | ||||||
|  |         -- detail='none' reduces index size by ~50% while maintaining MATCH/rank performance | ||||||
|  |         -- (loses position info for highlight() function, but snippet() still works) | ||||||
|         CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5( |         CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5( | ||||||
|             noteId UNINDEXED, |             noteId UNINDEXED, | ||||||
|             title, |             title, | ||||||
|             content, |             content, | ||||||
|             tokenize = 'porter unicode61' |             tokenize = 'trigram', | ||||||
|  |             detail = 'none' | ||||||
|         ); |         ); | ||||||
|     `); |     `); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,826 +0,0 @@ | |||||||
| /** |  | ||||||
|  * Migration to add SQLite native search support with normalized text tables |  | ||||||
|  *  |  | ||||||
|  * This migration implements Phase 1 of the SQLite-based search plan: |  | ||||||
|  * 1. Creates note_search_content table with normalized text columns |  | ||||||
|  * 2. Creates note_tokens table for word-level token storage |  | ||||||
|  * 3. Adds necessary indexes for optimization |  | ||||||
|  * 4. Creates triggers to keep tables synchronized with note updates |  | ||||||
|  * 5. Populates tables with existing note data in batches |  | ||||||
|  *  |  | ||||||
|  * This provides 100% accurate search results with 10-30x performance improvement |  | ||||||
|  * over TypeScript-based search, without the complexity of trigrams. |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import sql from "../services/sql.js"; |  | ||||||
| import log from "../services/log.js"; |  | ||||||
| import { normalize as utilsNormalize, stripTags } from "../services/utils.js"; |  | ||||||
| import { getSqliteFunctionsService } from "../services/search/sqlite_functions.js"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Uses the existing normalize function from utils.ts for consistency |  | ||||||
|  * This ensures all normalization throughout the codebase is identical |  | ||||||
|  */ |  | ||||||
| function normalizeText(text: string): string { |  | ||||||
|     if (!text) return ''; |  | ||||||
|     return utilsNormalize(text); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Tokenizes text into individual words for token-based searching |  | ||||||
|  * Handles punctuation and special characters appropriately |  | ||||||
|  */ |  | ||||||
| function tokenize(text: string): string[] { |  | ||||||
|     if (!text) return []; |  | ||||||
|      |  | ||||||
|     // Split on word boundaries, filter out empty tokens |  | ||||||
|     // This regex splits on spaces, punctuation, and other non-word characters |  | ||||||
|     // but preserves apostrophes within words (e.g., "don't", "it's") |  | ||||||
|     const tokens = text |  | ||||||
|         .split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:_-]+/) |  | ||||||
|         .filter(token => token.length > 0) |  | ||||||
|         .map(token => token.toLowerCase()); |  | ||||||
|      |  | ||||||
|     // Also split on camelCase and snake_case boundaries for code content |  | ||||||
|     const expandedTokens: string[] = []; |  | ||||||
|     for (const token of tokens) { |  | ||||||
|         // Add the original token |  | ||||||
|         expandedTokens.push(token); |  | ||||||
|          |  | ||||||
|         // Split camelCase (e.g., "getUserName" -> ["get", "User", "Name"]) |  | ||||||
|         const camelCaseParts = token.split(/(?=[A-Z])/); |  | ||||||
|         if (camelCaseParts.length > 1) { |  | ||||||
|             expandedTokens.push(...camelCaseParts.map(p => p.toLowerCase())); |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         // Split snake_case (e.g., "user_name" -> ["user", "name"]) |  | ||||||
|         const snakeCaseParts = token.split('_'); |  | ||||||
|         if (snakeCaseParts.length > 1) { |  | ||||||
|             expandedTokens.push(...snakeCaseParts); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     // Remove duplicates and return |  | ||||||
|     return Array.from(new Set(expandedTokens)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Strips HTML tags from content for text-only indexing |  | ||||||
|  * Uses the utils stripTags function for consistency |  | ||||||
|  */ |  | ||||||
| function stripHtmlTags(html: string): string { |  | ||||||
|     if (!html) return ''; |  | ||||||
|      |  | ||||||
|     // Remove script and style content entirely first |  | ||||||
|     let text = html.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, ''); |  | ||||||
|     text = text.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, ''); |  | ||||||
|      |  | ||||||
|     // Use utils stripTags for consistency |  | ||||||
|     text = stripTags(text); |  | ||||||
|      |  | ||||||
|     // Decode HTML entities |  | ||||||
|     text = text.replace(/ /g, ' '); |  | ||||||
|     text = text.replace(/</g, '<'); |  | ||||||
|     text = text.replace(/>/g, '>'); |  | ||||||
|     text = text.replace(/&/g, '&'); |  | ||||||
|     text = text.replace(/"/g, '"'); |  | ||||||
|     text = text.replace(/'/g, "'"); |  | ||||||
|      |  | ||||||
|     // Normalize whitespace |  | ||||||
|     text = text.replace(/\s+/g, ' ').trim(); |  | ||||||
|      |  | ||||||
|     return text; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export default function sqliteNativeSearch() { |  | ||||||
|     log.info("Starting SQLite native search migration..."); |  | ||||||
|      |  | ||||||
|     const startTime = Date.now(); |  | ||||||
|      |  | ||||||
|     // Wrap entire migration in a transaction for atomicity |  | ||||||
|     sql.transactional(() => { |  | ||||||
|         try { |  | ||||||
|             // Register custom SQL functions first so they can be used in triggers |  | ||||||
|             registerCustomFunctions(); |  | ||||||
|              |  | ||||||
|             // Create the search tables and indexes |  | ||||||
|             createSearchTables(); |  | ||||||
|              |  | ||||||
|             // Create triggers to keep tables synchronized (before population) |  | ||||||
|             createSearchTriggers(); |  | ||||||
|              |  | ||||||
|             // Populate the tables with existing note data |  | ||||||
|             populateSearchTables(); |  | ||||||
|              |  | ||||||
|             // Run final verification and optimization |  | ||||||
|             finalizeSearchSetup(); |  | ||||||
|              |  | ||||||
|             const duration = Date.now() - startTime; |  | ||||||
|             log.info(`SQLite native search migration completed successfully in ${duration}ms`); |  | ||||||
|              |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`SQLite native search migration failed: ${error}`); |  | ||||||
|             // Transaction will automatically rollback on error |  | ||||||
|             throw error; |  | ||||||
|         } |  | ||||||
|     }); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| function createSearchTables() { |  | ||||||
|     log.info("Creating search content and token tables..."); |  | ||||||
|      |  | ||||||
|     // Drop existing tables if they exist (for re-running migration in dev) |  | ||||||
|     sql.execute("DROP TABLE IF EXISTS note_search_content"); |  | ||||||
|     sql.execute("DROP TABLE IF EXISTS note_tokens"); |  | ||||||
|      |  | ||||||
|     // Create the main search content table |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE TABLE note_search_content ( |  | ||||||
|             noteId TEXT PRIMARY KEY, |  | ||||||
|             title TEXT NOT NULL, |  | ||||||
|             content TEXT NOT NULL, |  | ||||||
|             title_normalized TEXT NOT NULL, |  | ||||||
|             content_normalized TEXT NOT NULL, |  | ||||||
|             full_text_normalized TEXT NOT NULL |  | ||||||
|         ) |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     // Create the token table for word-level operations |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE TABLE note_tokens ( |  | ||||||
|             noteId TEXT NOT NULL, |  | ||||||
|             token TEXT NOT NULL, |  | ||||||
|             token_normalized TEXT NOT NULL, |  | ||||||
|             position INTEGER NOT NULL, |  | ||||||
|             source TEXT NOT NULL CHECK(source IN ('title', 'content')), |  | ||||||
|             PRIMARY KEY (noteId, position, source) |  | ||||||
|         ) |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     // Create indexes for search optimization |  | ||||||
|     log.info("Creating search indexes..."); |  | ||||||
|      |  | ||||||
|     // Consolidated indexes - removed redundancy between COLLATE NOCASE and plain indexes |  | ||||||
|     // Using COLLATE NOCASE for case-insensitive searches |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE INDEX idx_search_title_normalized  |  | ||||||
|         ON note_search_content(title_normalized COLLATE NOCASE) |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE INDEX idx_search_content_normalized  |  | ||||||
|         ON note_search_content(content_normalized COLLATE NOCASE) |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE INDEX idx_search_full_text  |  | ||||||
|         ON note_search_content(full_text_normalized COLLATE NOCASE) |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     // Token indexes - consolidated to avoid redundancy |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE INDEX idx_tokens_normalized  |  | ||||||
|         ON note_tokens(token_normalized COLLATE NOCASE) |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE INDEX idx_tokens_noteId  |  | ||||||
|         ON note_tokens(noteId) |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     // Composite index for token searches with source |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE INDEX idx_tokens_source_normalized  |  | ||||||
|         ON note_tokens(source, token_normalized COLLATE NOCASE) |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     log.info("Search tables and indexes created successfully"); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| function populateSearchTables() { |  | ||||||
|     log.info("Populating search tables with existing note content..."); |  | ||||||
|      |  | ||||||
|     const batchSize = 100; |  | ||||||
|     let offset = 0; |  | ||||||
|     let totalProcessed = 0; |  | ||||||
|     let totalTokens = 0; |  | ||||||
|      |  | ||||||
|     while (true) { |  | ||||||
|         const notes = sql.getRows<{ |  | ||||||
|             noteId: string; |  | ||||||
|             title: string; |  | ||||||
|             type: string; |  | ||||||
|             mime: string; |  | ||||||
|             content: string | null; |  | ||||||
|         }>(` |  | ||||||
|             SELECT  |  | ||||||
|                 n.noteId, |  | ||||||
|                 n.title, |  | ||||||
|                 n.type, |  | ||||||
|                 n.mime, |  | ||||||
|                 b.content |  | ||||||
|             FROM notes n |  | ||||||
|             LEFT JOIN blobs b ON n.blobId = b.blobId |  | ||||||
|             WHERE n.isDeleted = 0 |  | ||||||
|                 AND n.isProtected = 0 |  | ||||||
|                 AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|             ORDER BY n.noteId |  | ||||||
|             LIMIT ? OFFSET ? |  | ||||||
|         `, [batchSize, offset]); |  | ||||||
|          |  | ||||||
|         if (notes.length === 0) { |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         // Process batch of notes |  | ||||||
|         for (const note of notes) { |  | ||||||
|             try { |  | ||||||
|                 // Process content based on type |  | ||||||
|                 let processedContent = note.content || ''; |  | ||||||
|                  |  | ||||||
|                 // Strip HTML for text notes |  | ||||||
|                 if (note.type === 'text' && note.mime === 'text/html') { |  | ||||||
|                     processedContent = stripHtmlTags(processedContent); |  | ||||||
|                 } |  | ||||||
|                  |  | ||||||
|                 // Normalize text for searching using the utils normalize function |  | ||||||
|                 const titleNorm = normalizeText(note.title); |  | ||||||
|                 const contentNorm = normalizeText(processedContent); |  | ||||||
|                 const fullTextNorm = titleNorm + ' ' + contentNorm; |  | ||||||
|                  |  | ||||||
|                 // Insert into search content table |  | ||||||
|                 sql.execute(` |  | ||||||
|                     INSERT INTO note_search_content  |  | ||||||
|                     (noteId, title, content, title_normalized, content_normalized, full_text_normalized) |  | ||||||
|                     VALUES (?, ?, ?, ?, ?, ?) |  | ||||||
|                 `, [ |  | ||||||
|                     note.noteId, |  | ||||||
|                     note.title, |  | ||||||
|                     processedContent, |  | ||||||
|                     titleNorm, |  | ||||||
|                     contentNorm, |  | ||||||
|                     fullTextNorm |  | ||||||
|                 ]); |  | ||||||
|                  |  | ||||||
|                 // Tokenize title and content separately to track source |  | ||||||
|                 const titleTokens = tokenize(note.title); |  | ||||||
|                 const contentTokens = tokenize(processedContent); |  | ||||||
|                  |  | ||||||
|                 let position = 0; |  | ||||||
|                  |  | ||||||
|                 // Insert title tokens |  | ||||||
|                 for (const token of titleTokens) { |  | ||||||
|                     if (token.length > 0) { |  | ||||||
|                         sql.execute(` |  | ||||||
|                             INSERT OR IGNORE INTO note_tokens |  | ||||||
|                             (noteId, token, token_normalized, position, source) |  | ||||||
|                             VALUES (?, ?, ?, ?, 'title') |  | ||||||
|                         `, [note.noteId, token, normalizeText(token), position]); |  | ||||||
|                         position++; |  | ||||||
|                         totalTokens++; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                  |  | ||||||
|                 // Insert content tokens with unique positions |  | ||||||
|                 for (const token of contentTokens) { |  | ||||||
|                     if (token.length > 0) { |  | ||||||
|                         sql.execute(` |  | ||||||
|                             INSERT OR IGNORE INTO note_tokens |  | ||||||
|                             (noteId, token, token_normalized, position, source) |  | ||||||
|                             VALUES (?, ?, ?, ?, 'content') |  | ||||||
|                         `, [note.noteId, token, normalizeText(token), position]); |  | ||||||
|                         position++; |  | ||||||
|                         totalTokens++; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                  |  | ||||||
|                 totalProcessed++; |  | ||||||
|                  |  | ||||||
|             } catch (error) { |  | ||||||
|                 log.error(`Failed to index note ${note.noteId}: ${error}`); |  | ||||||
|                 // Continue with other notes even if one fails |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         offset += batchSize; |  | ||||||
|          |  | ||||||
|         if (totalProcessed % 1000 === 0) { |  | ||||||
|             log.info(`Processed ${totalProcessed} notes, ${totalTokens} tokens for search indexing...`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     log.info(`Completed indexing ${totalProcessed} notes with ${totalTokens} total tokens`); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| function createSearchTriggers() { |  | ||||||
|     log.info("Creating triggers to keep search tables synchronized..."); |  | ||||||
|      |  | ||||||
|     // Drop existing triggers if they exist |  | ||||||
|     const triggers = [ |  | ||||||
|         'note_search_insert', |  | ||||||
|         'note_search_update', |  | ||||||
|         'note_search_delete', |  | ||||||
|         'note_search_soft_delete', |  | ||||||
|         'note_search_undelete', |  | ||||||
|         'note_search_protect', |  | ||||||
|         'note_search_unprotect', |  | ||||||
|         'note_search_blob_insert', |  | ||||||
|         'note_search_blob_update' |  | ||||||
|     ]; |  | ||||||
|      |  | ||||||
|     for (const trigger of triggers) { |  | ||||||
|         sql.execute(`DROP TRIGGER IF EXISTS ${trigger}`); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     // Trigger for INSERT operations on notes - simplified version |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE TRIGGER note_search_insert |  | ||||||
|         AFTER INSERT ON notes |  | ||||||
|         WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|             AND NEW.isDeleted = 0 |  | ||||||
|             AND NEW.isProtected = 0 |  | ||||||
|         BEGIN |  | ||||||
|             -- Delete any existing entries (for INSERT OR REPLACE) |  | ||||||
|             DELETE FROM note_search_content WHERE noteId = NEW.noteId; |  | ||||||
|             DELETE FROM note_tokens WHERE noteId = NEW.noteId; |  | ||||||
|              |  | ||||||
|             -- Insert basic content with title only (content will be populated by blob trigger) |  | ||||||
|             INSERT INTO note_search_content  |  | ||||||
|             (noteId, title, content, title_normalized, content_normalized, full_text_normalized) |  | ||||||
|             VALUES ( |  | ||||||
|                 NEW.noteId, |  | ||||||
|                 NEW.title, |  | ||||||
|                 '', |  | ||||||
|                 LOWER(NEW.title), |  | ||||||
|                 '', |  | ||||||
|                 LOWER(NEW.title) |  | ||||||
|             ); |  | ||||||
|         END |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     // Trigger for UPDATE operations on notes - simplified version |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE TRIGGER note_search_update |  | ||||||
|         AFTER UPDATE ON notes |  | ||||||
|         WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|         BEGIN |  | ||||||
|             -- Always delete the old entries |  | ||||||
|             DELETE FROM note_search_content WHERE noteId = NEW.noteId; |  | ||||||
|             DELETE FROM note_tokens WHERE noteId = NEW.noteId; |  | ||||||
|              |  | ||||||
|             -- Re-insert if note is not deleted and not protected |  | ||||||
|             INSERT INTO note_search_content  |  | ||||||
|             (noteId, title, content, title_normalized, content_normalized, full_text_normalized) |  | ||||||
|             SELECT  |  | ||||||
|                 NEW.noteId, |  | ||||||
|                 NEW.title, |  | ||||||
|                 COALESCE(b.content, ''), |  | ||||||
|                 LOWER(NEW.title), |  | ||||||
|                 LOWER(COALESCE(b.content, '')), |  | ||||||
|                 LOWER(NEW.title || ' ' || COALESCE(b.content, '')) |  | ||||||
|             FROM notes n |  | ||||||
|             LEFT JOIN blobs b ON b.blobId = NEW.blobId |  | ||||||
|             WHERE n.noteId = NEW.noteId |  | ||||||
|                 AND NEW.isDeleted = 0 |  | ||||||
|                 AND NEW.isProtected = 0; |  | ||||||
|         END |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     // Trigger for DELETE operations on notes |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE TRIGGER note_search_delete |  | ||||||
|         AFTER DELETE ON notes |  | ||||||
|         BEGIN |  | ||||||
|             DELETE FROM note_search_content WHERE noteId = OLD.noteId; |  | ||||||
|             DELETE FROM note_tokens WHERE noteId = OLD.noteId; |  | ||||||
|         END |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     // Trigger for soft delete (isDeleted = 1) |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE TRIGGER note_search_soft_delete |  | ||||||
|         AFTER UPDATE ON notes |  | ||||||
|         WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1 |  | ||||||
|         BEGIN |  | ||||||
|             DELETE FROM note_search_content WHERE noteId = NEW.noteId; |  | ||||||
|             DELETE FROM note_tokens WHERE noteId = NEW.noteId; |  | ||||||
|         END |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     // Trigger for undelete (isDeleted = 0) - simplified version |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE TRIGGER note_search_undelete |  | ||||||
|         AFTER UPDATE ON notes |  | ||||||
|         WHEN OLD.isDeleted = 1 AND NEW.isDeleted = 0 |  | ||||||
|             AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|             AND NEW.isProtected = 0 |  | ||||||
|         BEGIN |  | ||||||
|             DELETE FROM note_search_content WHERE noteId = NEW.noteId; |  | ||||||
|             DELETE FROM note_tokens WHERE noteId = NEW.noteId; |  | ||||||
|              |  | ||||||
|             INSERT INTO note_search_content  |  | ||||||
|             (noteId, title, content, title_normalized, content_normalized, full_text_normalized) |  | ||||||
|             SELECT  |  | ||||||
|                 NEW.noteId, |  | ||||||
|                 NEW.title, |  | ||||||
|                 COALESCE(b.content, ''), |  | ||||||
|                 LOWER(NEW.title), |  | ||||||
|                 LOWER(COALESCE(b.content, '')), |  | ||||||
|                 LOWER(NEW.title || ' ' || COALESCE(b.content, '')) |  | ||||||
|             FROM notes n |  | ||||||
|             LEFT JOIN blobs b ON b.blobId = NEW.blobId |  | ||||||
|             WHERE n.noteId = NEW.noteId; |  | ||||||
|         END |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     // Trigger for notes becoming protected |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE TRIGGER note_search_protect |  | ||||||
|         AFTER UPDATE ON notes |  | ||||||
|         WHEN OLD.isProtected = 0 AND NEW.isProtected = 1 |  | ||||||
|         BEGIN |  | ||||||
|             DELETE FROM note_search_content WHERE noteId = NEW.noteId; |  | ||||||
|             DELETE FROM note_tokens WHERE noteId = NEW.noteId; |  | ||||||
|         END |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     // Trigger for notes becoming unprotected - simplified version |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE TRIGGER note_search_unprotect |  | ||||||
|         AFTER UPDATE ON notes |  | ||||||
|         WHEN OLD.isProtected = 1 AND NEW.isProtected = 0 |  | ||||||
|             AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|             AND NEW.isDeleted = 0 |  | ||||||
|         BEGIN |  | ||||||
|             DELETE FROM note_search_content WHERE noteId = NEW.noteId; |  | ||||||
|             DELETE FROM note_tokens WHERE noteId = NEW.noteId; |  | ||||||
|              |  | ||||||
|             INSERT INTO note_search_content  |  | ||||||
|             (noteId, title, content, title_normalized, content_normalized, full_text_normalized) |  | ||||||
|             SELECT  |  | ||||||
|                 NEW.noteId, |  | ||||||
|                 NEW.title, |  | ||||||
|                 COALESCE(b.content, ''), |  | ||||||
|                 LOWER(NEW.title), |  | ||||||
|                 LOWER(COALESCE(b.content, '')), |  | ||||||
|                 LOWER(NEW.title || ' ' || COALESCE(b.content, '')) |  | ||||||
|             FROM notes n |  | ||||||
|             LEFT JOIN blobs b ON b.blobId = NEW.blobId |  | ||||||
|             WHERE n.noteId = NEW.noteId; |  | ||||||
|         END |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     // Trigger for INSERT operations on blobs - simplified version |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE TRIGGER note_search_blob_insert |  | ||||||
|         AFTER INSERT ON blobs |  | ||||||
|         BEGIN |  | ||||||
|             -- Update search content for all notes that reference this blob |  | ||||||
|             UPDATE note_search_content  |  | ||||||
|             SET content = NEW.content, |  | ||||||
|                 content_normalized = LOWER(NEW.content), |  | ||||||
|                 full_text_normalized = title_normalized || ' ' || LOWER(NEW.content) |  | ||||||
|             WHERE noteId IN ( |  | ||||||
|                 SELECT n.noteId  |  | ||||||
|                 FROM notes n |  | ||||||
|                 WHERE n.blobId = NEW.blobId |  | ||||||
|                     AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|                     AND n.isDeleted = 0 |  | ||||||
|                     AND n.isProtected = 0 |  | ||||||
|             ); |  | ||||||
|              |  | ||||||
|             -- Clear tokens for affected notes (will be repopulated by post-processing) |  | ||||||
|             DELETE FROM note_tokens  |  | ||||||
|             WHERE noteId IN ( |  | ||||||
|                 SELECT n.noteId  |  | ||||||
|                 FROM notes n |  | ||||||
|                 WHERE n.blobId = NEW.blobId |  | ||||||
|                     AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|                     AND n.isDeleted = 0 |  | ||||||
|                     AND n.isProtected = 0 |  | ||||||
|             ); |  | ||||||
|         END |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     // Trigger for UPDATE operations on blobs - simplified version |  | ||||||
|     sql.execute(` |  | ||||||
|         CREATE TRIGGER note_search_blob_update |  | ||||||
|         AFTER UPDATE ON blobs |  | ||||||
|         BEGIN |  | ||||||
|             -- Update search content for all notes that reference this blob |  | ||||||
|             UPDATE note_search_content  |  | ||||||
|             SET content = NEW.content, |  | ||||||
|                 content_normalized = LOWER(NEW.content), |  | ||||||
|                 full_text_normalized = title_normalized || ' ' || LOWER(NEW.content) |  | ||||||
|             WHERE noteId IN ( |  | ||||||
|                 SELECT n.noteId  |  | ||||||
|                 FROM notes n |  | ||||||
|                 WHERE n.blobId = NEW.blobId |  | ||||||
|                     AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|                     AND n.isDeleted = 0 |  | ||||||
|                     AND n.isProtected = 0 |  | ||||||
|             ); |  | ||||||
|              |  | ||||||
|             -- Clear tokens for affected notes (will be repopulated by post-processing) |  | ||||||
|             DELETE FROM note_tokens  |  | ||||||
|             WHERE noteId IN ( |  | ||||||
|                 SELECT n.noteId  |  | ||||||
|                 FROM notes n |  | ||||||
|                 WHERE n.blobId = NEW.blobId |  | ||||||
|                     AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|                     AND n.isDeleted = 0 |  | ||||||
|                     AND n.isProtected = 0 |  | ||||||
|             ); |  | ||||||
|         END |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     log.info("Search synchronization triggers created successfully"); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| function registerCustomFunctions() { |  | ||||||
|     log.info("Registering custom SQL functions for search operations..."); |  | ||||||
|      |  | ||||||
|     try { |  | ||||||
|         // Get the database connection to register functions |  | ||||||
|         const db = sql.getDbConnection(); |  | ||||||
|          |  | ||||||
|         // Use the centralized SQLite functions service |  | ||||||
|         const functionsService = getSqliteFunctionsService(); |  | ||||||
|          |  | ||||||
|         // Register functions if not already registered |  | ||||||
|         if (!functionsService.isRegistered()) { |  | ||||||
|             const success = functionsService.registerFunctions(db); |  | ||||||
|             if (success) { |  | ||||||
|                 log.info("Custom SQL functions registered successfully via service"); |  | ||||||
|             } else { |  | ||||||
|                 log.info("Custom SQL functions registration failed - using basic SQLite functions only"); |  | ||||||
|             } |  | ||||||
|         } else { |  | ||||||
|             log.info("Custom SQL functions already registered"); |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         // Register migration-specific helper function for tokenization |  | ||||||
|         db.function('tokenize_for_migration', { |  | ||||||
|             deterministic: true, |  | ||||||
|             varargs: false |  | ||||||
|         }, (text: string | null) => { |  | ||||||
|             if (!text) return ''; |  | ||||||
|             // Return as JSON array string for SQL processing |  | ||||||
|             return JSON.stringify(tokenize(text)); |  | ||||||
|         }); |  | ||||||
|          |  | ||||||
|     } catch (error) { |  | ||||||
|         log.info(`Could not register custom SQL functions (will use basic SQLite functions): ${error}`); |  | ||||||
|         // This is not critical - the migration will work with basic SQLite functions |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Populates tokens for a specific note |  | ||||||
|  * This is called outside of triggers to avoid complex SQL within trigger constraints |  | ||||||
|  */ |  | ||||||
| function populateNoteTokens(noteId: string): number { |  | ||||||
|     try { |  | ||||||
|         // Get the note's search content |  | ||||||
|         const noteData = sql.getRow<{ |  | ||||||
|             title: string; |  | ||||||
|             content: string; |  | ||||||
|         }>(` |  | ||||||
|             SELECT title, content |  | ||||||
|             FROM note_search_content |  | ||||||
|             WHERE noteId = ? |  | ||||||
|         `, [noteId]); |  | ||||||
|          |  | ||||||
|         if (!noteData) return 0; |  | ||||||
|          |  | ||||||
|         // Clear existing tokens for this note |  | ||||||
|         sql.execute(`DELETE FROM note_tokens WHERE noteId = ?`, [noteId]); |  | ||||||
|          |  | ||||||
|         // Tokenize title and content |  | ||||||
|         const titleTokens = tokenize(noteData.title); |  | ||||||
|         const contentTokens = tokenize(noteData.content); |  | ||||||
|          |  | ||||||
|         let position = 0; |  | ||||||
|         let tokenCount = 0; |  | ||||||
|          |  | ||||||
|         // Insert title tokens |  | ||||||
|         for (const token of titleTokens) { |  | ||||||
|             if (token.length > 0) { |  | ||||||
|                 sql.execute(` |  | ||||||
|                     INSERT OR IGNORE INTO note_tokens |  | ||||||
|                     (noteId, token, token_normalized, position, source) |  | ||||||
|                     VALUES (?, ?, ?, ?, 'title') |  | ||||||
|                 `, [noteId, token, normalizeText(token), position]); |  | ||||||
|                 position++; |  | ||||||
|                 tokenCount++; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         // Insert content tokens |  | ||||||
|         for (const token of contentTokens) { |  | ||||||
|             if (token.length > 0) { |  | ||||||
|                 sql.execute(` |  | ||||||
|                     INSERT OR IGNORE INTO note_tokens |  | ||||||
|                     (noteId, token, token_normalized, position, source) |  | ||||||
|                     VALUES (?, ?, ?, ?, 'content') |  | ||||||
|                 `, [noteId, token, normalizeText(token), position]); |  | ||||||
|                 position++; |  | ||||||
|                 tokenCount++; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         return tokenCount; |  | ||||||
|     } catch (error) { |  | ||||||
|         log.error(`Error populating tokens for note ${noteId}: ${error}`); |  | ||||||
|         return 0; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Populates tokens for multiple notes affected by blob operations |  | ||||||
|  * This handles cases where blob triggers can affect multiple notes |  | ||||||
|  */ |  | ||||||
| function populateBlobAffectedTokens(blobId: string): void { |  | ||||||
|     try { |  | ||||||
|         // Find all notes that reference this blob and need token updates |  | ||||||
|         const affectedNoteIds = sql.getColumn<string>(` |  | ||||||
|             SELECT DISTINCT n.noteId |  | ||||||
|             FROM notes n |  | ||||||
|             INNER JOIN note_search_content nsc ON n.noteId = nsc.noteId |  | ||||||
|             WHERE n.blobId = ? |  | ||||||
|                 AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|                 AND n.isDeleted = 0 |  | ||||||
|                 AND n.isProtected = 0 |  | ||||||
|         `, [blobId]); |  | ||||||
|          |  | ||||||
|         if (affectedNoteIds.length === 0) return; |  | ||||||
|          |  | ||||||
|         log.info(`Updating tokens for ${affectedNoteIds.length} notes affected by blob ${blobId}`); |  | ||||||
|          |  | ||||||
|         let totalTokens = 0; |  | ||||||
|         for (const noteId of affectedNoteIds) { |  | ||||||
|             const tokenCount = populateNoteTokens(noteId); |  | ||||||
|             totalTokens += tokenCount; |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         log.info(`Updated ${totalTokens} tokens for blob-affected notes`); |  | ||||||
|     } catch (error) { |  | ||||||
|         log.error(`Error populating blob-affected tokens for blob ${blobId}: ${error}`); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| function populateAllTokens() { |  | ||||||
|     log.info("Populating tokens for all search content..."); |  | ||||||
|      |  | ||||||
|     // Clear existing tokens first to ensure clean state |  | ||||||
|     sql.execute("DELETE FROM note_tokens"); |  | ||||||
|      |  | ||||||
|     const batchSize = 100; |  | ||||||
|     let offset = 0; |  | ||||||
|     let totalProcessed = 0; |  | ||||||
|     let totalTokens = 0; |  | ||||||
|      |  | ||||||
|     while (true) { |  | ||||||
|         const notes = sql.getRows<{ |  | ||||||
|             noteId: string; |  | ||||||
|             title: string; |  | ||||||
|             content: string; |  | ||||||
|         }>(` |  | ||||||
|             SELECT noteId, title, content |  | ||||||
|             FROM note_search_content |  | ||||||
|             ORDER BY noteId |  | ||||||
|             LIMIT ? OFFSET ? |  | ||||||
|         `, [batchSize, offset]); |  | ||||||
|          |  | ||||||
|         if (notes.length === 0) { |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         for (const note of notes) { |  | ||||||
|             try { |  | ||||||
|                 // Tokenize title and content |  | ||||||
|                 const titleTokens = tokenize(note.title); |  | ||||||
|                 const contentTokens = tokenize(note.content); |  | ||||||
|                  |  | ||||||
|                 let position = 0; |  | ||||||
|                  |  | ||||||
|                 // Insert title tokens |  | ||||||
|                 for (const token of titleTokens) { |  | ||||||
|                     if (token.length > 0) { |  | ||||||
|                         sql.execute(` |  | ||||||
|                             INSERT OR IGNORE INTO note_tokens |  | ||||||
|                             (noteId, token, token_normalized, position, source) |  | ||||||
|                             VALUES (?, ?, ?, ?, 'title') |  | ||||||
|                         `, [note.noteId, token, normalizeText(token), position]); |  | ||||||
|                         position++; |  | ||||||
|                         totalTokens++; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                  |  | ||||||
|                 // Insert content tokens with continuous position numbering |  | ||||||
|                 for (const token of contentTokens) { |  | ||||||
|                     if (token.length > 0) { |  | ||||||
|                         sql.execute(` |  | ||||||
|                             INSERT OR IGNORE INTO note_tokens |  | ||||||
|                             (noteId, token, token_normalized, position, source) |  | ||||||
|                             VALUES (?, ?, ?, ?, 'content') |  | ||||||
|                         `, [note.noteId, token, normalizeText(token), position]); |  | ||||||
|                         position++; |  | ||||||
|                         totalTokens++; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                  |  | ||||||
|                 totalProcessed++; |  | ||||||
|                  |  | ||||||
|             } catch (error) { |  | ||||||
|                 log.error(`Failed to tokenize note ${note.noteId}: ${error}`); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         offset += batchSize; |  | ||||||
|          |  | ||||||
|         if (totalProcessed % 1000 === 0) { |  | ||||||
|             log.info(`Processed ${totalProcessed} notes, ${totalTokens} tokens so far...`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     log.info(`Token population completed: ${totalProcessed} notes processed, ${totalTokens} total tokens`); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| function finalizeSearchSetup() { |  | ||||||
|     log.info("Running final verification and optimization..."); |  | ||||||
|      |  | ||||||
|     // Check for missing notes that should be indexed |  | ||||||
|     const missingCount = sql.getValue<number>(` |  | ||||||
|         SELECT COUNT(*) FROM notes n |  | ||||||
|         LEFT JOIN blobs b ON n.blobId = b.blobId |  | ||||||
|         WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|             AND n.isDeleted = 0 |  | ||||||
|             AND n.isProtected = 0 |  | ||||||
|             AND b.content IS NOT NULL |  | ||||||
|             AND NOT EXISTS (SELECT 1 FROM note_search_content WHERE noteId = n.noteId) |  | ||||||
|     `) || 0; |  | ||||||
|      |  | ||||||
|     if (missingCount > 0) { |  | ||||||
|         log.info(`Found ${missingCount} notes that are missing from search index`); |  | ||||||
|          |  | ||||||
|         // Index missing notes using basic SQLite functions |  | ||||||
|         sql.execute(` |  | ||||||
|             INSERT INTO note_search_content  |  | ||||||
|             (noteId, title, content, title_normalized, content_normalized, full_text_normalized) |  | ||||||
|             SELECT  |  | ||||||
|                 n.noteId, |  | ||||||
|                 n.title, |  | ||||||
|                 COALESCE(b.content, ''), |  | ||||||
|                 LOWER(n.title), |  | ||||||
|                 LOWER(COALESCE(b.content, '')), |  | ||||||
|                 LOWER(n.title || ' ' || COALESCE(b.content, '')) |  | ||||||
|             FROM notes n |  | ||||||
|             LEFT JOIN blobs b ON n.blobId = b.blobId |  | ||||||
|             WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|                 AND n.isDeleted = 0 |  | ||||||
|                 AND n.isProtected = 0 |  | ||||||
|                 AND b.content IS NOT NULL |  | ||||||
|                 AND NOT EXISTS (SELECT 1 FROM note_search_content WHERE noteId = n.noteId) |  | ||||||
|         `); |  | ||||||
|          |  | ||||||
|         log.info(`Indexed ${missingCount} missing notes`); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     // Populate tokens for all existing content (including any missing notes we just added) |  | ||||||
|     populateAllTokens(); |  | ||||||
|      |  | ||||||
|     // Verify table creation |  | ||||||
|     const tables = sql.getColumn<string>(` |  | ||||||
|         SELECT name FROM sqlite_master  |  | ||||||
|         WHERE type = 'table'  |  | ||||||
|         AND name IN ('note_search_content', 'note_tokens') |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     if (tables.length !== 2) { |  | ||||||
|         throw new Error("Search tables were not created properly"); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     // Check row counts |  | ||||||
|     const searchContentCount = sql.getValue<number>("SELECT COUNT(*) FROM note_search_content") || 0; |  | ||||||
|     const tokenCount = sql.getValue<number>("SELECT COUNT(*) FROM note_tokens") || 0; |  | ||||||
|      |  | ||||||
|     log.info(`Search content table has ${searchContentCount} entries`); |  | ||||||
|     log.info(`Token table has ${tokenCount} entries`); |  | ||||||
|      |  | ||||||
|     // Run ANALYZE to update SQLite query planner statistics |  | ||||||
|     log.info("Updating SQLite statistics for query optimization..."); |  | ||||||
|     sql.execute("ANALYZE note_search_content"); |  | ||||||
|     sql.execute("ANALYZE note_tokens"); |  | ||||||
|      |  | ||||||
|     // Verify indexes were created |  | ||||||
|     const indexes = sql.getColumn<string>(` |  | ||||||
|         SELECT name FROM sqlite_master  |  | ||||||
|         WHERE type = 'index'  |  | ||||||
|         AND tbl_name IN ('note_search_content', 'note_tokens') |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     log.info(`Created ${indexes.length} indexes for search optimization`); |  | ||||||
|      |  | ||||||
|     log.info("Search setup finalization completed"); |  | ||||||
| } |  | ||||||
							
								
								
									
										47
									
								
								apps/server/src/migrations/0236__cleanup_sqlite_search.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								apps/server/src/migrations/0236__cleanup_sqlite_search.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | |||||||
|  | /** | ||||||
|  |  * Migration to clean up custom SQLite search implementation | ||||||
|  |  * | ||||||
|  |  * This migration removes tables and triggers created by migration 0235 | ||||||
|  |  * which implemented a custom SQLite-based search system. That system | ||||||
|  |  * has been replaced by FTS5 with trigram tokenizer (migration 0234), | ||||||
|  |  * making these custom tables redundant. | ||||||
|  |  * | ||||||
|  |  * Tables removed: | ||||||
|  |  * - note_search_content: Stored normalized note content for custom search | ||||||
|  |  * - note_tokens: Stored tokenized words for custom token-based search | ||||||
|  |  * | ||||||
|  |  * This migration is safe to run on databases that: | ||||||
|  |  * 1. Never ran migration 0235 (tables don't exist) | ||||||
|  |  * 2. Already ran migration 0235 (tables will be dropped) | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | import sql from "../services/sql.js"; | ||||||
|  | import log from "../services/log.js"; | ||||||
|  |  | ||||||
|  | export default function cleanupSqliteSearch() { | ||||||
|  |     log.info("Starting SQLite custom search cleanup migration..."); | ||||||
|  |  | ||||||
|  |     try { | ||||||
|  |         sql.transactional(() => { | ||||||
|  |             // Drop custom search tables if they exist | ||||||
|  |             log.info("Dropping note_search_content table..."); | ||||||
|  |             sql.executeScript(`DROP TABLE IF EXISTS note_search_content`); | ||||||
|  |  | ||||||
|  |             log.info("Dropping note_tokens table..."); | ||||||
|  |             sql.executeScript(`DROP TABLE IF EXISTS note_tokens`); | ||||||
|  |  | ||||||
|  |             // Clean up any entity changes for these tables | ||||||
|  |             // This prevents sync issues and cleans up change tracking | ||||||
|  |             log.info("Cleaning up entity changes for removed tables..."); | ||||||
|  |             sql.execute(` | ||||||
|  |                 DELETE FROM entity_changes | ||||||
|  |                 WHERE entityName IN ('note_search_content', 'note_tokens') | ||||||
|  |             `); | ||||||
|  |  | ||||||
|  |             log.info("SQLite custom search cleanup completed successfully"); | ||||||
|  |         }); | ||||||
|  |     } catch (error) { | ||||||
|  |         log.error(`Error during SQLite search cleanup: ${error}`); | ||||||
|  |         throw new Error(`Failed to clean up SQLite search tables: ${error}`); | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -6,10 +6,10 @@ | |||||||
|  |  | ||||||
| // Migrations should be kept in descending order, so the latest migration is first. | // Migrations should be kept in descending order, so the latest migration is first. | ||||||
| const MIGRATIONS: (SqlMigration | JsMigration)[] = [ | const MIGRATIONS: (SqlMigration | JsMigration)[] = [ | ||||||
|     // Add SQLite native search with normalized text tables |     // Clean up custom SQLite search tables (replaced by FTS5 trigram) | ||||||
|     { |     { | ||||||
|         version: 235, |         version: 236, | ||||||
|         module: async () => import("./0235__sqlite_native_search.js") |         module: async () => import("./0236__cleanup_sqlite_search.js") | ||||||
|     }, |     }, | ||||||
|     // Add FTS5 full-text search support and strategic performance indexes |     // Add FTS5 full-text search support and strategic performance indexes | ||||||
|     { |     { | ||||||
|   | |||||||
| @@ -1,243 +0,0 @@ | |||||||
| /** |  | ||||||
|  * API endpoints for search administration and monitoring |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import { Router } from "express"; |  | ||||||
| import performanceMonitor from "../../services/search/performance_monitor.js"; |  | ||||||
| import abTestingService from "../../services/search/ab_testing.js"; |  | ||||||
| import { SQLiteSearchService } from "../../services/search/sqlite_search_service.js"; |  | ||||||
| import optionService from "../../services/options.js"; |  | ||||||
| import sql from "../../services/sql.js"; |  | ||||||
| import log from "../../services/log.js"; |  | ||||||
|  |  | ||||||
| const router = Router(); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get search performance metrics |  | ||||||
|  */ |  | ||||||
| router.get("/api/search-admin/metrics", (req, res) => { |  | ||||||
|     const metrics = { |  | ||||||
|         recent: performanceMonitor.getRecentMetrics(100), |  | ||||||
|         averages: { |  | ||||||
|             typescript: performanceMonitor.getAverageMetrics("typescript"), |  | ||||||
|             sqlite: performanceMonitor.getAverageMetrics("sqlite") |  | ||||||
|         }, |  | ||||||
|         comparison: performanceMonitor.compareBackends() |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     res.json(metrics); |  | ||||||
| }); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get A/B testing results |  | ||||||
|  */ |  | ||||||
| router.get("/api/search-admin/ab-tests", (req, res) => { |  | ||||||
|     const results = { |  | ||||||
|         summary: abTestingService.getSummary(), |  | ||||||
|         recent: abTestingService.getRecentResults(50) |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     res.json(results); |  | ||||||
| }); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get current search configuration |  | ||||||
|  */ |  | ||||||
| router.get("/api/search-admin/config", (req, res) => { |  | ||||||
|     const config = { |  | ||||||
|         backend: optionService.getOption("searchBackend"), |  | ||||||
|         sqliteEnabled: optionService.getOptionBool("searchSqliteEnabled"), |  | ||||||
|         performanceLogging: optionService.getOptionBool("searchSqlitePerformanceLogging"), |  | ||||||
|         maxMemory: optionService.getOptionInt("searchSqliteMaxMemory"), |  | ||||||
|         batchSize: optionService.getOptionInt("searchSqliteBatchSize"), |  | ||||||
|         autoRebuild: optionService.getOptionBool("searchSqliteAutoRebuild") |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     res.json(config); |  | ||||||
| }); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Update search configuration |  | ||||||
|  */ |  | ||||||
| router.put("/api/search-admin/config", (req, res) => { |  | ||||||
|     try { |  | ||||||
|         const { backend, sqliteEnabled, performanceLogging, maxMemory, batchSize, autoRebuild } = req.body; |  | ||||||
|  |  | ||||||
|         if (backend !== undefined) { |  | ||||||
|             if (!["typescript", "sqlite"].includes(backend)) { |  | ||||||
|                 return res.status(400).json({ error: "Invalid backend. Must be 'typescript' or 'sqlite'" }); |  | ||||||
|             } |  | ||||||
|             optionService.setOption("searchBackend", backend); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (sqliteEnabled !== undefined) { |  | ||||||
|             optionService.setOption("searchSqliteEnabled", sqliteEnabled ? "true" : "false"); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (performanceLogging !== undefined) { |  | ||||||
|             optionService.setOption("searchSqlitePerformanceLogging", performanceLogging ? "true" : "false"); |  | ||||||
|             performanceMonitor.updateSettings(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (maxMemory !== undefined) { |  | ||||||
|             if (maxMemory < 1048576 || maxMemory > 1073741824) { // 1MB to 1GB |  | ||||||
|                 return res.status(400).json({ error: "Max memory must be between 1MB and 1GB" }); |  | ||||||
|             } |  | ||||||
|             optionService.setOption("searchSqliteMaxMemory", maxMemory.toString()); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (batchSize !== undefined) { |  | ||||||
|             if (batchSize < 10 || batchSize > 1000) { |  | ||||||
|                 return res.status(400).json({ error: "Batch size must be between 10 and 1000" }); |  | ||||||
|             } |  | ||||||
|             optionService.setOption("searchSqliteBatchSize", batchSize.toString()); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (autoRebuild !== undefined) { |  | ||||||
|             optionService.setOption("searchSqliteAutoRebuild", autoRebuild ? "true" : "false"); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         res.json({ success: true, message: "Configuration updated successfully" }); |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Failed to update search configuration: ${error}`); |  | ||||||
|         res.status(500).json({ error: error.message }); |  | ||||||
|     } |  | ||||||
| }); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Get SQLite search index status |  | ||||||
|  */ |  | ||||||
| router.get("/api/search-admin/sqlite/status", async (req, res) => { |  | ||||||
|     try { |  | ||||||
|         const service = SQLiteSearchService.getInstance(); |  | ||||||
|         const status = await service.getIndexStatus(); |  | ||||||
|  |  | ||||||
|         // Add table sizes |  | ||||||
|         const tableSizes = sql.getRows<{ name: string; size: number }>(` |  | ||||||
|             SELECT  |  | ||||||
|                 name, |  | ||||||
|                 (SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=m.name) as size |  | ||||||
|             FROM sqlite_master m |  | ||||||
|             WHERE type='table' AND name IN ('note_search_content', 'note_tokens', 'notes_fts', 'notes_fts_data', 'notes_fts_idx', 'notes_fts_content') |  | ||||||
|         `); |  | ||||||
|  |  | ||||||
|         res.json({ |  | ||||||
|             ...status, |  | ||||||
|             tables: tableSizes |  | ||||||
|         }); |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Failed to get SQLite search status: ${error}`); |  | ||||||
|         res.status(500).json({ error: error.message }); |  | ||||||
|     } |  | ||||||
| }); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Rebuild SQLite search index |  | ||||||
|  */ |  | ||||||
| router.post("/api/search-admin/sqlite/rebuild", async (req, res) => { |  | ||||||
|     try { |  | ||||||
|         const { force = false } = req.body; |  | ||||||
|          |  | ||||||
|         log.info("Starting SQLite search index rebuild via API"); |  | ||||||
|          |  | ||||||
|         const service = SQLiteSearchService.getInstance(); |  | ||||||
|         const startTime = Date.now(); |  | ||||||
|          |  | ||||||
|         await service.rebuildIndex(force); |  | ||||||
|          |  | ||||||
|         const duration = Date.now() - startTime; |  | ||||||
|         log.info(`SQLite search index rebuild completed in ${duration}ms`); |  | ||||||
|  |  | ||||||
|         res.json({ |  | ||||||
|             success: true, |  | ||||||
|             message: "Index rebuilt successfully", |  | ||||||
|             duration |  | ||||||
|         }); |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Failed to rebuild SQLite search index: ${error}`); |  | ||||||
|         res.status(500).json({ error: error.message }); |  | ||||||
|     } |  | ||||||
| }); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Clear SQLite search index |  | ||||||
|  */ |  | ||||||
| router.delete("/api/search-admin/sqlite/index", async (req, res) => { |  | ||||||
|     try { |  | ||||||
|         log.info("Clearing SQLite search index via API"); |  | ||||||
|          |  | ||||||
|         const service = SQLiteSearchService.getInstance(); |  | ||||||
|         service.clearIndex(); |  | ||||||
|          |  | ||||||
|         res.json({ |  | ||||||
|             success: true, |  | ||||||
|             message: "Index cleared successfully" |  | ||||||
|         }); |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Failed to clear SQLite search index: ${error}`); |  | ||||||
|         res.status(500).json({ error: error.message }); |  | ||||||
|     } |  | ||||||
| }); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Reset performance metrics |  | ||||||
|  */ |  | ||||||
| router.delete("/api/search-admin/metrics", (req, res) => { |  | ||||||
|     performanceMonitor.reset(); |  | ||||||
|     res.json({ success: true, message: "Metrics reset successfully" }); |  | ||||||
| }); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Reset A/B test results |  | ||||||
|  */ |  | ||||||
| router.delete("/api/search-admin/ab-tests", (req, res) => { |  | ||||||
|     abTestingService.reset(); |  | ||||||
|     res.json({ success: true, message: "A/B test results reset successfully" }); |  | ||||||
| }); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Set A/B testing sample rate |  | ||||||
|  */ |  | ||||||
| router.put("/api/search-admin/ab-tests/sample-rate", (req, res) => { |  | ||||||
|     try { |  | ||||||
|         const { rate } = req.body; |  | ||||||
|          |  | ||||||
|         if (rate === undefined || rate < 0 || rate > 1) { |  | ||||||
|             return res.status(400).json({ error: "Sample rate must be between 0 and 1" }); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         abTestingService.setSampleRate(rate); |  | ||||||
|         res.json({ success: true, message: `Sample rate set to ${rate * 100}%` }); |  | ||||||
|     } catch (error: any) { |  | ||||||
|         res.status(500).json({ error: error.message }); |  | ||||||
|     } |  | ||||||
| }); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Test search with both backends for comparison |  | ||||||
|  */ |  | ||||||
| router.post("/api/search-admin/test", async (req, res) => { |  | ||||||
|     try { |  | ||||||
|         const { query } = req.body; |  | ||||||
|          |  | ||||||
|         if (!query) { |  | ||||||
|             return res.status(400).json({ error: "Query is required" }); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const result = await abTestingService.runComparison(query, {}); |  | ||||||
|          |  | ||||||
|         if (!result) { |  | ||||||
|             return res.json({  |  | ||||||
|                 message: "Test not run (sampling or disabled)", |  | ||||||
|                 query  |  | ||||||
|             }); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         res.json(result); |  | ||||||
|     } catch (error: any) { |  | ||||||
|         log.error(`Search test failed: ${error}`); |  | ||||||
|         res.status(500).json({ error: error.message }); |  | ||||||
|     } |  | ||||||
| }); |  | ||||||
|  |  | ||||||
| export default router; |  | ||||||
| @@ -40,7 +40,6 @@ import scriptRoute from "./api/script.js"; | |||||||
| import senderRoute from "./api/sender.js"; | import senderRoute from "./api/sender.js"; | ||||||
| import filesRoute from "./api/files.js"; | import filesRoute from "./api/files.js"; | ||||||
| import searchRoute from "./api/search.js"; | import searchRoute from "./api/search.js"; | ||||||
| import searchAdminRoute from "./api/search_admin.js"; |  | ||||||
| import bulkActionRoute from "./api/bulk_action.js"; | import bulkActionRoute from "./api/bulk_action.js"; | ||||||
| import specialNotesRoute from "./api/special_notes.js"; | import specialNotesRoute from "./api/special_notes.js"; | ||||||
| import noteMapRoute from "./api/note_map.js"; | import noteMapRoute from "./api/note_map.js"; | ||||||
| @@ -261,9 +260,6 @@ function register(app: express.Application) { | |||||||
|     apiRoute(GET, "/api/search/:searchString", searchRoute.search); |     apiRoute(GET, "/api/search/:searchString", searchRoute.search); | ||||||
|     apiRoute(GET, "/api/search-templates", searchRoute.searchTemplates); |     apiRoute(GET, "/api/search-templates", searchRoute.searchTemplates); | ||||||
|  |  | ||||||
|     // Search administration routes |  | ||||||
|     app.use(searchAdminRoute); |  | ||||||
|  |  | ||||||
|     apiRoute(PST, "/api/bulk-action/execute", bulkActionRoute.execute); |     apiRoute(PST, "/api/bulk-action/execute", bulkActionRoute.execute); | ||||||
|     apiRoute(PST, "/api/bulk-action/affected-notes", bulkActionRoute.getAffectedNoteCount); |     apiRoute(PST, "/api/bulk-action/affected-notes", bulkActionRoute.getAffectedNoteCount); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -215,14 +215,6 @@ const defaultOptions: DefaultOption[] = [ | |||||||
|     { name: "aiSystemPrompt", value: "", isSynced: true }, |     { name: "aiSystemPrompt", value: "", isSynced: true }, | ||||||
|     { name: "aiSelectedProvider", value: "openai", isSynced: true }, |     { name: "aiSelectedProvider", value: "openai", isSynced: true }, | ||||||
|  |  | ||||||
|     // Search configuration |  | ||||||
|     { name: "searchBackend", value: "typescript", isSynced: false }, // "typescript" or "sqlite" |  | ||||||
|     { name: "searchSqliteEnabled", value: "false", isSynced: false }, |  | ||||||
|     { name: "searchSqlitePerformanceLogging", value: "false", isSynced: false }, |  | ||||||
|     { name: "searchSqliteMaxMemory", value: "67108864", isSynced: false }, // 64MB default |  | ||||||
|     { name: "searchSqliteBatchSize", value: "100", isSynced: false }, |  | ||||||
|     { name: "searchSqliteAutoRebuild", value: "true", isSynced: false }, |  | ||||||
|  |  | ||||||
|     { name: "seenCallToActions", value: "[]", isSynced: true } |     { name: "seenCallToActions", value: "[]", isSynced: true } | ||||||
| ]; | ]; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,218 +0,0 @@ | |||||||
| /** |  | ||||||
|  * A/B Testing utilities for comparing search backend performance |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import SearchContext from "./search_context.js"; |  | ||||||
| import type { SearchParams } from "./services/types.js"; |  | ||||||
| import performanceMonitor from "./performance_monitor.js"; |  | ||||||
| import log from "../log.js"; |  | ||||||
| import optionService from "../options.js"; |  | ||||||
|  |  | ||||||
| export interface ABTestResult { |  | ||||||
|     query: string; |  | ||||||
|     typescriptTime: number; |  | ||||||
|     sqliteTime: number; |  | ||||||
|     typescriptResults: number; |  | ||||||
|     sqliteResults: number; |  | ||||||
|     resultsMatch: boolean; |  | ||||||
|     speedup: number; |  | ||||||
|     winner: "typescript" | "sqlite" | "tie"; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| class ABTestingService { |  | ||||||
|     private enabled: boolean = false; |  | ||||||
|     private sampleRate: number = 0.1; // 10% of searches by default |  | ||||||
|     private results: ABTestResult[] = []; |  | ||||||
|     private maxResults: number = 1000; |  | ||||||
|  |  | ||||||
|     constructor() { |  | ||||||
|         this.updateSettings(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     updateSettings() { |  | ||||||
|         try { |  | ||||||
|             this.enabled = optionService.getOptionBool("searchSqliteEnabled"); |  | ||||||
|             // Could add a separate AB testing option if needed |  | ||||||
|         } catch { |  | ||||||
|             this.enabled = false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Determines if we should run an A/B test for this query |  | ||||||
|      */ |  | ||||||
|     shouldRunTest(): boolean { |  | ||||||
|         if (!this.enabled) { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Random sampling |  | ||||||
|         return Math.random() < this.sampleRate; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Run the same search query with both backends and compare results |  | ||||||
|      */ |  | ||||||
|     async runComparison(query: string, params: SearchParams): Promise<ABTestResult | null> { |  | ||||||
|         if (!this.shouldRunTest()) { |  | ||||||
|             return null; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Dynamically import to avoid circular dependencies |  | ||||||
|             const searchModule = await import("./services/search.js"); |  | ||||||
|              |  | ||||||
|             // Run with TypeScript backend |  | ||||||
|             const tsContext = new SearchContext({ ...params, forceBackend: "typescript" }); |  | ||||||
|             const tsTimer = performanceMonitor.startTimer(); |  | ||||||
|             const tsResults = searchModule.default.findResultsWithQuery(query, tsContext); |  | ||||||
|             const tsTime = tsTimer(); |  | ||||||
|  |  | ||||||
|             // Run with SQLite backend |  | ||||||
|             const sqliteContext = new SearchContext({ ...params, forceBackend: "sqlite" }); |  | ||||||
|             const sqliteTimer = performanceMonitor.startTimer(); |  | ||||||
|             const sqliteResults = searchModule.default.findResultsWithQuery(query, sqliteContext); |  | ||||||
|             const sqliteTime = sqliteTimer(); |  | ||||||
|  |  | ||||||
|             // Compare results |  | ||||||
|             const tsNoteIds = new Set(tsResults.map(r => r.noteId)); |  | ||||||
|             const sqliteNoteIds = new Set(sqliteResults.map(r => r.noteId)); |  | ||||||
|              |  | ||||||
|             // Check if results match (same notes found) |  | ||||||
|             const resultsMatch = tsNoteIds.size === sqliteNoteIds.size && |  | ||||||
|                 [...tsNoteIds].every(id => sqliteNoteIds.has(id)); |  | ||||||
|  |  | ||||||
|             // Calculate speedup |  | ||||||
|             const speedup = tsTime / sqliteTime; |  | ||||||
|  |  | ||||||
|             // Determine winner |  | ||||||
|             let winner: "typescript" | "sqlite" | "tie"; |  | ||||||
|             if (speedup > 1.2) { |  | ||||||
|                 winner = "sqlite"; |  | ||||||
|             } else if (speedup < 0.83) { |  | ||||||
|                 winner = "typescript"; |  | ||||||
|             } else { |  | ||||||
|                 winner = "tie"; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             const result: ABTestResult = { |  | ||||||
|                 query: query.substring(0, 100), |  | ||||||
|                 typescriptTime: tsTime, |  | ||||||
|                 sqliteTime: sqliteTime, |  | ||||||
|                 typescriptResults: tsResults.length, |  | ||||||
|                 sqliteResults: sqliteResults.length, |  | ||||||
|                 resultsMatch, |  | ||||||
|                 speedup, |  | ||||||
|                 winner |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             this.recordResult(result); |  | ||||||
|  |  | ||||||
|             // Log significant differences |  | ||||||
|             if (!resultsMatch) { |  | ||||||
|                 log.info(`A/B test found different results for query "${query.substring(0, 50)}": TS=${tsResults.length}, SQLite=${sqliteResults.length}`); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if (Math.abs(speedup - 1) > 0.5) { |  | ||||||
|                 log.info(`A/B test significant performance difference: ${winner} is ${Math.abs(speedup - 1).toFixed(1)}x faster for query "${query.substring(0, 50)}"`); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             return result; |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`A/B test failed: ${error}`); |  | ||||||
|             return null; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     private recordResult(result: ABTestResult) { |  | ||||||
|         this.results.push(result); |  | ||||||
|  |  | ||||||
|         // Keep only the last N results |  | ||||||
|         if (this.results.length > this.maxResults) { |  | ||||||
|             this.results = this.results.slice(-this.maxResults); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get summary statistics from A/B tests |  | ||||||
|      */ |  | ||||||
|     getSummary(): { |  | ||||||
|         totalTests: number; |  | ||||||
|         avgSpeedup: number; |  | ||||||
|         typescriptWins: number; |  | ||||||
|         sqliteWins: number; |  | ||||||
|         ties: number; |  | ||||||
|         mismatchRate: number; |  | ||||||
|         recommendation: string; |  | ||||||
|     } { |  | ||||||
|         if (this.results.length === 0) { |  | ||||||
|             return { |  | ||||||
|                 totalTests: 0, |  | ||||||
|                 avgSpeedup: 1, |  | ||||||
|                 typescriptWins: 0, |  | ||||||
|                 sqliteWins: 0, |  | ||||||
|                 ties: 0, |  | ||||||
|                 mismatchRate: 0, |  | ||||||
|                 recommendation: "No A/B test data available" |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const totalTests = this.results.length; |  | ||||||
|         const avgSpeedup = this.results.reduce((sum, r) => sum + r.speedup, 0) / totalTests; |  | ||||||
|         const typescriptWins = this.results.filter(r => r.winner === "typescript").length; |  | ||||||
|         const sqliteWins = this.results.filter(r => r.winner === "sqlite").length; |  | ||||||
|         const ties = this.results.filter(r => r.winner === "tie").length; |  | ||||||
|         const mismatches = this.results.filter(r => !r.resultsMatch).length; |  | ||||||
|         const mismatchRate = mismatches / totalTests; |  | ||||||
|  |  | ||||||
|         let recommendation: string; |  | ||||||
|         if (mismatchRate > 0.1) { |  | ||||||
|             recommendation = "High mismatch rate detected - SQLite search may have accuracy issues"; |  | ||||||
|         } else if (avgSpeedup > 1.5) { |  | ||||||
|             recommendation = `SQLite is ${avgSpeedup.toFixed(1)}x faster on average - consider enabling`; |  | ||||||
|         } else if (avgSpeedup < 0.67) { |  | ||||||
|             recommendation = `TypeScript is ${(1/avgSpeedup).toFixed(1)}x faster on average - keep using TypeScript`; |  | ||||||
|         } else { |  | ||||||
|             recommendation = "Both backends perform similarly - choice depends on other factors"; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             totalTests, |  | ||||||
|             avgSpeedup, |  | ||||||
|             typescriptWins, |  | ||||||
|             sqliteWins, |  | ||||||
|             ties, |  | ||||||
|             mismatchRate, |  | ||||||
|             recommendation |  | ||||||
|         }; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get recent test results |  | ||||||
|      */ |  | ||||||
|     getRecentResults(count: number = 100): ABTestResult[] { |  | ||||||
|         return this.results.slice(-count); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Clear all test results |  | ||||||
|      */ |  | ||||||
|     reset() { |  | ||||||
|         this.results = []; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Set the sampling rate for A/B tests |  | ||||||
|      */ |  | ||||||
|     setSampleRate(rate: number) { |  | ||||||
|         if (rate < 0 || rate > 1) { |  | ||||||
|             throw new Error("Sample rate must be between 0 and 1"); |  | ||||||
|         } |  | ||||||
|         this.sampleRate = rate; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Singleton instance |  | ||||||
| const abTestingService = new ABTestingService(); |  | ||||||
|  |  | ||||||
| export default abTestingService; |  | ||||||
| @@ -1,155 +0,0 @@ | |||||||
| /** |  | ||||||
|  * SQLite-based Note Content Fulltext Expression |  | ||||||
|  *  |  | ||||||
|  * This is a drop-in replacement for NoteContentFulltextExp that uses |  | ||||||
|  * the SQLite search service for dramatically improved performance. |  | ||||||
|  * It maintains 100% compatibility with the existing API while providing |  | ||||||
|  * 10-30x speed improvements. |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import type SearchContext from "../search_context.js"; |  | ||||||
| import Expression from "./expression.js"; |  | ||||||
| import NoteSet from "../note_set.js"; |  | ||||||
| import log from "../../log.js"; |  | ||||||
| import becca from "../../../becca/becca.js"; |  | ||||||
| import { getSQLiteSearchService, type SearchOptions } from "../sqlite_search_service.js"; |  | ||||||
|  |  | ||||||
| const ALLOWED_OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", "%=", "~=", "~*"]); |  | ||||||
|  |  | ||||||
| interface ConstructorOpts { |  | ||||||
|     tokens: string[]; |  | ||||||
|     raw?: boolean; |  | ||||||
|     flatText?: boolean; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * SQLite-optimized implementation of note content fulltext search |  | ||||||
|  */ |  | ||||||
| class NoteContentSQLiteExp extends Expression { |  | ||||||
|     private operator: string; |  | ||||||
|     tokens: string[]; |  | ||||||
|     private raw: boolean; |  | ||||||
|     private flatText: boolean; |  | ||||||
|     private sqliteService = getSQLiteSearchService(); |  | ||||||
|  |  | ||||||
|     constructor(operator: string, { tokens, raw, flatText }: ConstructorOpts) { |  | ||||||
|         super(); |  | ||||||
|  |  | ||||||
|         if (!operator || !tokens || !Array.isArray(tokens)) { |  | ||||||
|             throw new Error('Invalid parameters: operator and tokens are required'); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         this.operator = operator; |  | ||||||
|         this.tokens = tokens; |  | ||||||
|         this.raw = !!raw; |  | ||||||
|         this.flatText = !!flatText; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     execute(inputNoteSet: NoteSet, executionContext: {}, searchContext: SearchContext) { |  | ||||||
|         if (!ALLOWED_OPERATORS.has(this.operator)) { |  | ||||||
|             searchContext.addError(`Note content can be searched only with operators: ${Array.from(ALLOWED_OPERATORS).join(", ")}, operator ${this.operator} given.`); |  | ||||||
|             return inputNoteSet; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const resultNoteSet = new NoteSet(); |  | ||||||
|         const startTime = Date.now(); |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Prepare search options |  | ||||||
|             const searchOptions: SearchOptions = { |  | ||||||
|                 includeProtected: searchContext.includeArchivedNotes, |  | ||||||
|                 includeDeleted: false, |  | ||||||
|                 limit: searchContext.limit || undefined |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             // If we have an input note set, use it as a filter |  | ||||||
|             if (inputNoteSet.notes.length > 0) { |  | ||||||
|                 searchOptions.noteIdFilter = new Set(inputNoteSet.getNoteIds()); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Map ~* operator to ~= for SQLite service |  | ||||||
|             const mappedOperator = this.operator === "~*" ? "~=" : this.operator; |  | ||||||
|  |  | ||||||
|             // Execute SQLite search |  | ||||||
|             const noteIds = this.sqliteService.search( |  | ||||||
|                 this.tokens, |  | ||||||
|                 mappedOperator, |  | ||||||
|                 searchContext, |  | ||||||
|                 searchOptions |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|             // Build result note set from note IDs |  | ||||||
|             for (const noteId of noteIds) { |  | ||||||
|                 const note = becca.notes[noteId]; |  | ||||||
|                 if (note) { |  | ||||||
|                     resultNoteSet.add(note); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Log performance if enabled |  | ||||||
|             const elapsed = Date.now() - startTime; |  | ||||||
|             if (searchContext.debug) { |  | ||||||
|                 log.info(`SQLite search completed: operator=${this.operator}, tokens=${this.tokens.join(" ")}, ` + |  | ||||||
|                         `results=${noteIds.size}, time=${elapsed}ms`); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Store highlighted tokens for UI |  | ||||||
|             if (noteIds.size > 0) { |  | ||||||
|                 searchContext.highlightedTokens = this.tokens; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`SQLite search failed: ${error}`); |  | ||||||
|             searchContext.addError(`Search failed: ${error}`); |  | ||||||
|              |  | ||||||
|             // On error, return input set unchanged |  | ||||||
|             return inputNoteSet; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return resultNoteSet; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get performance statistics for monitoring |  | ||||||
|      */ |  | ||||||
|     getStatistics() { |  | ||||||
|         return this.sqliteService.getStatistics(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Check if SQLite search is available |  | ||||||
|      */ |  | ||||||
|     static isAvailable(): boolean { |  | ||||||
|         const service = getSQLiteSearchService(); |  | ||||||
|         const stats = service.getStatistics(); |  | ||||||
|         return stats.tablesInitialized; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Create a compatible expression based on availability |  | ||||||
|      * This allows gradual migration from the old implementation |  | ||||||
|      */ |  | ||||||
|     static createExpression(operator: string, opts: ConstructorOpts): Expression { |  | ||||||
|         if (NoteContentSQLiteExp.isAvailable()) { |  | ||||||
|             return new NoteContentSQLiteExp(operator, opts); |  | ||||||
|         } else { |  | ||||||
|             // Fall back to original implementation if SQLite not ready |  | ||||||
|             // This would import the original NoteContentFulltextExp |  | ||||||
|             log.info("SQLite search not available, using fallback implementation"); |  | ||||||
|              |  | ||||||
|             // Dynamic import to avoid circular dependency |  | ||||||
|             const NoteContentFulltextExp = require("./note_content_fulltext.js").default; |  | ||||||
|             return new NoteContentFulltextExp(operator, opts); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export default NoteContentSQLiteExp; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Factory function for creating search expressions |  | ||||||
|  * This can be used as a drop-in replacement in the expression builder |  | ||||||
|  */ |  | ||||||
| export function createNoteContentExpression(operator: string, opts: ConstructorOpts): Expression { |  | ||||||
|     return NoteContentSQLiteExp.createExpression(operator, opts); |  | ||||||
| } |  | ||||||
| @@ -1,405 +0,0 @@ | |||||||
| /** |  | ||||||
|  * Tests for FTS5 blob deduplication scenarios |  | ||||||
|  *  |  | ||||||
|  * This test file validates that FTS indexing works correctly when: |  | ||||||
|  * 1. Multiple notes share the same blob (deduplication) |  | ||||||
|  * 2. Notes change content to match existing blobs |  | ||||||
|  * 3. Blobs are updated and affect multiple notes |  | ||||||
|  * 4. Notes switch between unique and shared blobs |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import { describe, it, expect, beforeEach, afterEach } from 'vitest'; |  | ||||||
| import sql from '../sql.js'; |  | ||||||
| import beccaLoader from '../../becca/becca_loader.js'; |  | ||||||
| import noteService from '../notes.js'; |  | ||||||
| import searchService from './services/search.js'; |  | ||||||
| import { ftsSearchService } from './fts_search.js'; |  | ||||||
|  |  | ||||||
| describe('FTS5 Blob Deduplication Tests', () => { |  | ||||||
|     beforeEach(() => { |  | ||||||
|         // Ensure we have a clean test database with FTS enabled |  | ||||||
|         sql.execute("DELETE FROM notes WHERE noteId LIKE 'test_%'"); |  | ||||||
|         sql.execute("DELETE FROM blobs WHERE blobId LIKE 'test_%'"); |  | ||||||
|         sql.execute("DELETE FROM notes_fts WHERE noteId LIKE 'test_%'"); |  | ||||||
|          |  | ||||||
|         // Reload becca to ensure cache is in sync |  | ||||||
|         beccaLoader.load(); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     afterEach(() => { |  | ||||||
|         // Clean up test data |  | ||||||
|         sql.execute("DELETE FROM notes WHERE noteId LIKE 'test_%'"); |  | ||||||
|         sql.execute("DELETE FROM blobs WHERE blobId LIKE 'test_%'"); |  | ||||||
|         sql.execute("DELETE FROM notes_fts WHERE noteId LIKE 'test_%'"); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe('Blob Deduplication Scenarios', () => { |  | ||||||
|         it('should index multiple notes sharing the same blob', async () => { |  | ||||||
|             // Create first note with unique content |  | ||||||
|             const note1 = await noteService.createNewNote({ |  | ||||||
|                 noteId: 'test_note1', |  | ||||||
|                 parentNoteId: 'root', |  | ||||||
|                 title: 'Test Note 1', |  | ||||||
|                 content: 'Shared content for deduplication test', |  | ||||||
|                 type: 'text' |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             // Create second note with the same content (will share blob) |  | ||||||
|             const note2 = await noteService.createNewNote({ |  | ||||||
|                 noteId: 'test_note2', |  | ||||||
|                 parentNoteId: 'root', |  | ||||||
|                 title: 'Test Note 2', |  | ||||||
|                 content: 'Shared content for deduplication test', |  | ||||||
|                 type: 'text' |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             // Verify both notes share the same blob |  | ||||||
|             const blob1 = sql.getRow("SELECT blobId FROM notes WHERE noteId = ?", ['test_note1']); |  | ||||||
|             const blob2 = sql.getRow("SELECT blobId FROM notes WHERE noteId = ?", ['test_note2']); |  | ||||||
|             expect(blob1.blobId).toBe(blob2.blobId); |  | ||||||
|  |  | ||||||
|             // Verify both notes are indexed in FTS |  | ||||||
|             const ftsCount = sql.getValue( |  | ||||||
|                 "SELECT COUNT(*) FROM notes_fts WHERE noteId IN (?, ?)", |  | ||||||
|                 ['test_note1', 'test_note2'] |  | ||||||
|             ); |  | ||||||
|             expect(ftsCount).toBe(2); |  | ||||||
|  |  | ||||||
|             // Search should find both notes |  | ||||||
|             const searchResults = searchService.searchNotes('deduplication'); |  | ||||||
|             const foundNoteIds = searchResults.map(r => r.noteId); |  | ||||||
|             expect(foundNoteIds).toContain('test_note1'); |  | ||||||
|             expect(foundNoteIds).toContain('test_note2'); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should update FTS when note content changes to match existing blob', async () => { |  | ||||||
|             // Create first note with unique content |  | ||||||
|             const note1 = await noteService.createNewNote({ |  | ||||||
|                 noteId: 'test_note3', |  | ||||||
|                 parentNoteId: 'root', |  | ||||||
|                 title: 'Note with existing content', |  | ||||||
|                 content: 'This is existing content in the database', |  | ||||||
|                 type: 'text' |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             // Create second note with different content |  | ||||||
|             const note2 = await noteService.createNewNote({ |  | ||||||
|                 noteId: 'test_note4', |  | ||||||
|                 parentNoteId: 'root', |  | ||||||
|                 title: 'Note with different content', |  | ||||||
|                 content: 'This is completely different content', |  | ||||||
|                 type: 'text' |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             // Verify notes have different blobs initially |  | ||||||
|             const initialBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note3']); |  | ||||||
|             const initialBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note4']); |  | ||||||
|             expect(initialBlob1).not.toBe(initialBlob2); |  | ||||||
|  |  | ||||||
|             // Change note2's content to match note1 (deduplication occurs) |  | ||||||
|             await noteService.updateNoteContent('test_note4', 'This is existing content in the database'); |  | ||||||
|              |  | ||||||
|             // Verify both notes now share the same blob |  | ||||||
|             const finalBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note3']); |  | ||||||
|             const finalBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note4']); |  | ||||||
|             expect(finalBlob1).toBe(finalBlob2); |  | ||||||
|  |  | ||||||
|             // Verify FTS is updated correctly for note2 |  | ||||||
|             const ftsContent = sql.getValue( |  | ||||||
|                 "SELECT content FROM notes_fts WHERE noteId = ?", |  | ||||||
|                 ['test_note4'] |  | ||||||
|             ); |  | ||||||
|             expect(ftsContent).toBe('This is existing content in the database'); |  | ||||||
|  |  | ||||||
|             // Search for old content should not find note2 |  | ||||||
|             const oldContentSearch = searchService.searchNotes('completely different'); |  | ||||||
|             const oldSearchIds = oldContentSearch.map(r => r.noteId); |  | ||||||
|             expect(oldSearchIds).not.toContain('test_note4'); |  | ||||||
|  |  | ||||||
|             // Search for new content should find both notes |  | ||||||
|             const newContentSearch = searchService.searchNotes('existing content'); |  | ||||||
|             const newSearchIds = newContentSearch.map(r => r.noteId); |  | ||||||
|             expect(newSearchIds).toContain('test_note3'); |  | ||||||
|             expect(newSearchIds).toContain('test_note4'); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should update all notes when shared blob content changes', async () => { |  | ||||||
|             // Create three notes with the same content |  | ||||||
|             const sharedContent = 'Original shared content for blob update test'; |  | ||||||
|              |  | ||||||
|             await noteService.createNewNote({ |  | ||||||
|                 noteId: 'test_note5', |  | ||||||
|                 parentNoteId: 'root', |  | ||||||
|                 title: 'Shared Note 1', |  | ||||||
|                 content: sharedContent, |  | ||||||
|                 type: 'text' |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             await noteService.createNewNote({ |  | ||||||
|                 noteId: 'test_note6', |  | ||||||
|                 parentNoteId: 'root', |  | ||||||
|                 title: 'Shared Note 2', |  | ||||||
|                 content: sharedContent, |  | ||||||
|                 type: 'text' |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             await noteService.createNewNote({ |  | ||||||
|                 noteId: 'test_note7', |  | ||||||
|                 parentNoteId: 'root', |  | ||||||
|                 title: 'Shared Note 3', |  | ||||||
|                 content: sharedContent, |  | ||||||
|                 type: 'text' |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             // Verify all three share the same blob |  | ||||||
|             const blobIds = sql.getColumn( |  | ||||||
|                 "SELECT DISTINCT blobId FROM notes WHERE noteId IN (?, ?, ?)", |  | ||||||
|                 ['test_note5', 'test_note6', 'test_note7'] |  | ||||||
|             ); |  | ||||||
|             expect(blobIds.length).toBe(1); |  | ||||||
|             const sharedBlobId = blobIds[0]; |  | ||||||
|  |  | ||||||
|             // Update the blob content directly (simulating what would happen in real update) |  | ||||||
|             sql.execute( |  | ||||||
|                 "UPDATE blobs SET content = ? WHERE blobId = ?", |  | ||||||
|                 ['Updated shared content for all notes', sharedBlobId] |  | ||||||
|             ); |  | ||||||
|  |  | ||||||
|             // Verify FTS is updated for all three notes |  | ||||||
|             const ftsContents = sql.getColumn( |  | ||||||
|                 "SELECT content FROM notes_fts WHERE noteId IN (?, ?, ?) ORDER BY noteId", |  | ||||||
|                 ['test_note5', 'test_note6', 'test_note7'] |  | ||||||
|             ); |  | ||||||
|              |  | ||||||
|             expect(ftsContents).toHaveLength(3); |  | ||||||
|             ftsContents.forEach(content => { |  | ||||||
|                 expect(content).toBe('Updated shared content for all notes'); |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             // Search for old content should find nothing |  | ||||||
|             const oldSearch = searchService.searchNotes('Original shared'); |  | ||||||
|             expect(oldSearch.filter(r => r.noteId.startsWith('test_'))).toHaveLength(0); |  | ||||||
|  |  | ||||||
|             // Search for new content should find all three |  | ||||||
|             const newSearch = searchService.searchNotes('Updated shared'); |  | ||||||
|             const foundIds = newSearch.map(r => r.noteId).filter(id => id.startsWith('test_')); |  | ||||||
|             expect(foundIds).toContain('test_note5'); |  | ||||||
|             expect(foundIds).toContain('test_note6'); |  | ||||||
|             expect(foundIds).toContain('test_note7'); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should handle note switching from shared to unique blob', async () => { |  | ||||||
|             // Create two notes with shared content |  | ||||||
|             const sharedContent = 'Shared content before divergence'; |  | ||||||
|              |  | ||||||
|             const note1 = await noteService.createNewNote({ |  | ||||||
|                 noteId: 'test_note8', |  | ||||||
|                 parentNoteId: 'root', |  | ||||||
|                 title: 'Diverging Note 1', |  | ||||||
|                 content: sharedContent, |  | ||||||
|                 type: 'text' |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             const note2 = await noteService.createNewNote({ |  | ||||||
|                 noteId: 'test_note9', |  | ||||||
|                 parentNoteId: 'root', |  | ||||||
|                 title: 'Diverging Note 2', |  | ||||||
|                 content: sharedContent, |  | ||||||
|                 type: 'text' |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             // Verify they share the same blob |  | ||||||
|             const initialBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note8']); |  | ||||||
|             const initialBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note9']); |  | ||||||
|             expect(initialBlob1).toBe(initialBlob2); |  | ||||||
|  |  | ||||||
|             // Change note2 to unique content |  | ||||||
|             await noteService.updateNoteContent('test_note9', 'Unique content after divergence'); |  | ||||||
|  |  | ||||||
|             // Verify they now have different blobs |  | ||||||
|             const finalBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note8']); |  | ||||||
|             const finalBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note9']); |  | ||||||
|             expect(finalBlob1).not.toBe(finalBlob2); |  | ||||||
|  |  | ||||||
|             // Verify FTS is correctly updated |  | ||||||
|             const ftsContent1 = sql.getValue( |  | ||||||
|                 "SELECT content FROM notes_fts WHERE noteId = ?", |  | ||||||
|                 ['test_note8'] |  | ||||||
|             ); |  | ||||||
|             const ftsContent2 = sql.getValue( |  | ||||||
|                 "SELECT content FROM notes_fts WHERE noteId = ?", |  | ||||||
|                 ['test_note9'] |  | ||||||
|             ); |  | ||||||
|              |  | ||||||
|             expect(ftsContent1).toBe('Shared content before divergence'); |  | ||||||
|             expect(ftsContent2).toBe('Unique content after divergence'); |  | ||||||
|  |  | ||||||
|             // Search should find correct notes |  | ||||||
|             const sharedSearch = searchService.searchNotes('before divergence'); |  | ||||||
|             expect(sharedSearch.map(r => r.noteId)).toContain('test_note8'); |  | ||||||
|             expect(sharedSearch.map(r => r.noteId)).not.toContain('test_note9'); |  | ||||||
|  |  | ||||||
|             const uniqueSearch = searchService.searchNotes('after divergence'); |  | ||||||
|             expect(uniqueSearch.map(r => r.noteId)).not.toContain('test_note8'); |  | ||||||
|             expect(uniqueSearch.map(r => r.noteId)).toContain('test_note9'); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should handle import scenarios where notes exist before blobs', async () => { |  | ||||||
|             // Simulate import scenario: create note without blob first |  | ||||||
|             sql.execute(` |  | ||||||
|                 INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified) |  | ||||||
|                 VALUES ('test_note10', 'Import Test Note', 'text', 'text/html', 'pending_blob_123', 0, 0, datetime('now'), datetime('now'), datetime('now'), datetime('now')) |  | ||||||
|             `); |  | ||||||
|  |  | ||||||
|             // Verify note is not in FTS yet (no blob content) |  | ||||||
|             const initialFts = sql.getValue( |  | ||||||
|                 "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", |  | ||||||
|                 ['test_note10'] |  | ||||||
|             ); |  | ||||||
|             expect(initialFts).toBe(0); |  | ||||||
|  |  | ||||||
|             // Now create the blob (simulating delayed blob creation during import) |  | ||||||
|             sql.execute(` |  | ||||||
|                 INSERT INTO blobs (blobId, content, dateModified, utcDateModified) |  | ||||||
|                 VALUES ('pending_blob_123', 'Imported content finally available', datetime('now'), datetime('now')) |  | ||||||
|             `); |  | ||||||
|  |  | ||||||
|             // Verify note is now indexed in FTS |  | ||||||
|             const finalFts = sql.getValue( |  | ||||||
|                 "SELECT content FROM notes_fts WHERE noteId = ?", |  | ||||||
|                 ['test_note10'] |  | ||||||
|             ); |  | ||||||
|             expect(finalFts).toBe('Imported content finally available'); |  | ||||||
|  |  | ||||||
|             // Search should now find the note |  | ||||||
|             const searchResults = searchService.searchNotes('Imported content'); |  | ||||||
|             expect(searchResults.map(r => r.noteId)).toContain('test_note10'); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should correctly handle protected notes during deduplication', async () => { |  | ||||||
|             // Create a regular note |  | ||||||
|             const note1 = await noteService.createNewNote({ |  | ||||||
|                 noteId: 'test_note11', |  | ||||||
|                 parentNoteId: 'root', |  | ||||||
|                 title: 'Regular Note', |  | ||||||
|                 content: 'Content that will be shared', |  | ||||||
|                 type: 'text' |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             // Create a protected note with same content |  | ||||||
|             sql.execute(` |  | ||||||
|                 INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified) |  | ||||||
|                 VALUES ('test_note12', 'Protected Note', 'text', 'text/html',  |  | ||||||
|                     (SELECT blobId FROM notes WHERE noteId = 'test_note11'), |  | ||||||
|                     0, 1, datetime('now'), datetime('now'), datetime('now'), datetime('now')) |  | ||||||
|             `); |  | ||||||
|  |  | ||||||
|             // Verify protected note is NOT in FTS |  | ||||||
|             const protectedInFts = sql.getValue( |  | ||||||
|                 "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", |  | ||||||
|                 ['test_note12'] |  | ||||||
|             ); |  | ||||||
|             expect(protectedInFts).toBe(0); |  | ||||||
|  |  | ||||||
|             // Verify regular note IS in FTS |  | ||||||
|             const regularInFts = sql.getValue( |  | ||||||
|                 "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", |  | ||||||
|                 ['test_note11'] |  | ||||||
|             ); |  | ||||||
|             expect(regularInFts).toBe(1); |  | ||||||
|  |  | ||||||
|             // Update blob content |  | ||||||
|             const blobId = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note11']); |  | ||||||
|             sql.execute("UPDATE blobs SET content = ? WHERE blobId = ?", ['Updated shared content', blobId]); |  | ||||||
|  |  | ||||||
|             // Verify regular note is updated in FTS |  | ||||||
|             const updatedContent = sql.getValue( |  | ||||||
|                 "SELECT content FROM notes_fts WHERE noteId = ?", |  | ||||||
|                 ['test_note11'] |  | ||||||
|             ); |  | ||||||
|             expect(updatedContent).toBe('Updated shared content'); |  | ||||||
|  |  | ||||||
|             // Verify protected note is still NOT in FTS |  | ||||||
|             const protectedStillNotInFts = sql.getValue( |  | ||||||
|                 "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", |  | ||||||
|                 ['test_note12'] |  | ||||||
|             ); |  | ||||||
|             expect(protectedStillNotInFts).toBe(0); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe('FTS Sync and Cleanup', () => { |  | ||||||
|         it('should sync missing notes to FTS index', async () => { |  | ||||||
|             // Manually create notes without triggering FTS (simulating missed triggers) |  | ||||||
|             sql.execute(` |  | ||||||
|                 INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified) |  | ||||||
|                 VALUES ('test_note13', 'Missed Note 1', 'text', 'text/html', 'blob_missed_1', 0, 0, datetime('now'), datetime('now'), datetime('now'), datetime('now')) |  | ||||||
|             `); |  | ||||||
|              |  | ||||||
|             sql.execute(` |  | ||||||
|                 INSERT INTO blobs (blobId, content, dateModified, utcDateModified) |  | ||||||
|                 VALUES ('blob_missed_1', 'Content that was missed by triggers', datetime('now'), datetime('now')) |  | ||||||
|             `); |  | ||||||
|  |  | ||||||
|             // Delete from FTS to simulate missing index |  | ||||||
|             sql.execute("DELETE FROM notes_fts WHERE noteId = 'test_note13'"); |  | ||||||
|  |  | ||||||
|             // Verify note is missing from FTS |  | ||||||
|             const beforeSync = sql.getValue( |  | ||||||
|                 "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", |  | ||||||
|                 ['test_note13'] |  | ||||||
|             ); |  | ||||||
|             expect(beforeSync).toBe(0); |  | ||||||
|  |  | ||||||
|             // Run sync |  | ||||||
|             const syncedCount = ftsSearchService.syncMissingNotes(['test_note13']); |  | ||||||
|             expect(syncedCount).toBe(1); |  | ||||||
|  |  | ||||||
|             // Verify note is now in FTS |  | ||||||
|             const afterSync = sql.getValue( |  | ||||||
|                 "SELECT content FROM notes_fts WHERE noteId = ?", |  | ||||||
|                 ['test_note13'] |  | ||||||
|             ); |  | ||||||
|             expect(afterSync).toBe('Content that was missed by triggers'); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should handle FTS rebuild correctly', () => { |  | ||||||
|             // Create some test notes |  | ||||||
|             const noteIds = ['test_note14', 'test_note15', 'test_note16']; |  | ||||||
|             noteIds.forEach((noteId, index) => { |  | ||||||
|                 sql.execute(` |  | ||||||
|                     INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified) |  | ||||||
|                     VALUES (?, ?, 'text', 'text/html', ?, 0, 0, datetime('now'), datetime('now'), datetime('now'), datetime('now')) |  | ||||||
|                 `, [noteId, `Test Note ${index}`, `blob_${noteId}`]); |  | ||||||
|                  |  | ||||||
|                 sql.execute(` |  | ||||||
|                     INSERT INTO blobs (blobId, content, dateModified, utcDateModified) |  | ||||||
|                     VALUES (?, ?, datetime('now'), datetime('now')) |  | ||||||
|                 `, [`blob_${noteId}`, `Content for note ${index}`]); |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             // Corrupt FTS by adding invalid entries |  | ||||||
|             sql.execute("INSERT INTO notes_fts (noteId, title, content) VALUES ('invalid_note', 'Invalid', 'Invalid content')"); |  | ||||||
|  |  | ||||||
|             // Rebuild index |  | ||||||
|             ftsSearchService.rebuildIndex(); |  | ||||||
|  |  | ||||||
|             // Verify only valid notes are in FTS |  | ||||||
|             const ftsCount = sql.getValue("SELECT COUNT(*) FROM notes_fts WHERE noteId LIKE 'test_%'"); |  | ||||||
|             expect(ftsCount).toBe(3); |  | ||||||
|  |  | ||||||
|             // Verify invalid entry is gone |  | ||||||
|             const invalidCount = sql.getValue("SELECT COUNT(*) FROM notes_fts WHERE noteId = 'invalid_note'"); |  | ||||||
|             expect(invalidCount).toBe(0); |  | ||||||
|  |  | ||||||
|             // Verify content is correct |  | ||||||
|             noteIds.forEach((noteId, index) => { |  | ||||||
|                 const content = sql.getValue( |  | ||||||
|                     "SELECT content FROM notes_fts WHERE noteId = ?", |  | ||||||
|                     [noteId] |  | ||||||
|                 ); |  | ||||||
|                 expect(content).toBe(`Content for note ${index}`); |  | ||||||
|             }); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
| }); |  | ||||||
| @@ -3,7 +3,7 @@ | |||||||
|  * |  * | ||||||
|  * Encapsulates all FTS5-specific operations for full-text searching. |  * Encapsulates all FTS5-specific operations for full-text searching. | ||||||
|  * Provides efficient text search using SQLite's FTS5 extension with: |  * Provides efficient text search using SQLite's FTS5 extension with: | ||||||
|  * - Porter stemming for better matching |  * - Trigram tokenization for fast substring matching | ||||||
|  * - Snippet extraction for context |  * - Snippet extraction for context | ||||||
|  * - Highlighting of matched terms |  * - Highlighting of matched terms | ||||||
|  * - Query syntax conversion from Trilium to FTS5 |  * - Query syntax conversion from Trilium to FTS5 | ||||||
| @@ -125,6 +125,16 @@ class FTSSearchService { | |||||||
|             throw new Error("No search tokens provided"); |             throw new Error("No search tokens provided"); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         // Trigram tokenizer requires minimum 3 characters | ||||||
|  |         const shortTokens = tokens.filter(token => token.length < 3); | ||||||
|  |         if (shortTokens.length > 0) { | ||||||
|  |             const shortList = shortTokens.join(', '); | ||||||
|  |             log.info(`Tokens shorter than 3 characters detected (${shortList}) - cannot use trigram FTS5`); | ||||||
|  |             throw new FTSNotAvailableError( | ||||||
|  |                 `Trigram tokenizer requires tokens of at least 3 characters. Short tokens: ${shortList}` | ||||||
|  |             ); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         // Sanitize tokens to prevent FTS5 syntax injection |         // Sanitize tokens to prevent FTS5 syntax injection | ||||||
|         const sanitizedTokens = tokens.map(token => |         const sanitizedTokens = tokens.map(token => | ||||||
|             this.sanitizeFTS5Token(token) |             this.sanitizeFTS5Token(token) | ||||||
|   | |||||||
| @@ -24,10 +24,6 @@ class SearchContext { | |||||||
|     fulltextQuery: string; |     fulltextQuery: string; | ||||||
|     dbLoadNeeded: boolean; |     dbLoadNeeded: boolean; | ||||||
|     error: string | null; |     error: string | null; | ||||||
|     /** Determines which backend to use for fulltext search */ |  | ||||||
|     searchBackend: "typescript" | "sqlite"; |  | ||||||
|     /** Whether SQLite search is enabled (cached from options) */ |  | ||||||
|     sqliteSearchEnabled: boolean; |  | ||||||
|  |  | ||||||
|     constructor(params: SearchParams = {}) { |     constructor(params: SearchParams = {}) { | ||||||
|         this.fastSearch = !!params.fastSearch; |         this.fastSearch = !!params.fastSearch; | ||||||
| @@ -58,43 +54,6 @@ class SearchContext { | |||||||
|         // and some extra data needs to be loaded before executing |         // and some extra data needs to be loaded before executing | ||||||
|         this.dbLoadNeeded = false; |         this.dbLoadNeeded = false; | ||||||
|         this.error = null; |         this.error = null; | ||||||
|          |  | ||||||
|         // Determine search backend |  | ||||||
|         this.sqliteSearchEnabled = this.checkSqliteEnabled(); |  | ||||||
|         this.searchBackend = this.determineSearchBackend(params); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     private checkSqliteEnabled(): boolean { |  | ||||||
|         try { |  | ||||||
|             // Import dynamically to avoid circular dependencies |  | ||||||
|             const optionService = require("../options.js").default; |  | ||||||
|             // Default to true if the option doesn't exist |  | ||||||
|             const enabled = optionService.getOptionOrNull("searchSqliteEnabled"); |  | ||||||
|             return enabled === null ? true : enabled === "true"; |  | ||||||
|         } catch { |  | ||||||
|             return true; // Default to enabled |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     private determineSearchBackend(params: SearchParams): "typescript" | "sqlite" { |  | ||||||
|         // Allow override via params for testing |  | ||||||
|         if (params.forceBackend) { |  | ||||||
|             return params.forceBackend; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Check if SQLite is enabled |  | ||||||
|         if (!this.sqliteSearchEnabled) { |  | ||||||
|             return "typescript"; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             const optionService = require("../options.js").default; |  | ||||||
|             const backend = optionService.getOptionOrNull("searchBackend"); |  | ||||||
|             // Default to sqlite if option doesn't exist |  | ||||||
|             return backend === "typescript" ? "typescript" : "sqlite"; |  | ||||||
|         } catch { |  | ||||||
|             return "sqlite"; // Default to SQLite for better performance |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     addError(error: string) { |     addError(error: string) { | ||||||
|   | |||||||
| @@ -13,7 +13,6 @@ import AttributeExistsExp from "../expressions/attribute_exists.js"; | |||||||
| import LabelComparisonExp from "../expressions/label_comparison.js"; | import LabelComparisonExp from "../expressions/label_comparison.js"; | ||||||
| import NoteFlatTextExp from "../expressions/note_flat_text.js"; | import NoteFlatTextExp from "../expressions/note_flat_text.js"; | ||||||
| import NoteContentFulltextExp from "../expressions/note_content_fulltext.js"; | import NoteContentFulltextExp from "../expressions/note_content_fulltext.js"; | ||||||
| import NoteContentSqliteExp from "../expressions/note_content_sqlite.js"; |  | ||||||
| import OrderByAndLimitExp from "../expressions/order_by_and_limit.js"; | import OrderByAndLimitExp from "../expressions/order_by_and_limit.js"; | ||||||
| import AncestorExp from "../expressions/ancestor.js"; | import AncestorExp from "../expressions/ancestor.js"; | ||||||
| import buildComparator from "./build_comparator.js"; | import buildComparator from "./build_comparator.js"; | ||||||
| @@ -38,20 +37,15 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext, leading | |||||||
|     const operator = leadingOperator === "=" ? "=" : "*=*"; |     const operator = leadingOperator === "=" ? "=" : "*=*"; | ||||||
|  |  | ||||||
|     if (!searchContext.fastSearch) { |     if (!searchContext.fastSearch) { | ||||||
|         // Choose between SQLite and TypeScript backend |  | ||||||
|         const ContentExp = searchContext.searchBackend === "sqlite"  |  | ||||||
|             ? NoteContentSqliteExp  |  | ||||||
|             : NoteContentFulltextExp; |  | ||||||
|          |  | ||||||
|         // For exact match with "=", we need different behavior |         // For exact match with "=", we need different behavior | ||||||
|         if (leadingOperator === "=" && tokens.length === 1) { |         if (leadingOperator === "=" && tokens.length === 1) { | ||||||
|             // Exact match on title OR exact match on content |             // Exact match on title OR exact match on content | ||||||
|             return new OrExp([ |             return new OrExp([ | ||||||
|                 new PropertyComparisonExp(searchContext, "title", "=", tokens[0]), |                 new PropertyComparisonExp(searchContext, "title", "=", tokens[0]), | ||||||
|                 new ContentExp("=", { tokens, flatText: false }) |                 new NoteContentFulltextExp("=", { tokens, flatText: false }) | ||||||
|             ]); |             ]); | ||||||
|         } |         } | ||||||
|         return new OrExp([new NoteFlatTextExp(tokens), new ContentExp(operator, { tokens, flatText: true })]); |         return new OrExp([new NoteFlatTextExp(tokens), new NoteContentFulltextExp(operator, { tokens, flatText: true })]); | ||||||
|     } else { |     } else { | ||||||
|         return new NoteFlatTextExp(tokens); |         return new NoteFlatTextExp(tokens); | ||||||
|     } |     } | ||||||
| @@ -154,12 +148,7 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level | |||||||
|  |  | ||||||
|             i++; |             i++; | ||||||
|  |  | ||||||
|             // Choose between SQLite and TypeScript backend |             return new NoteContentFulltextExp(operator.token, { tokens: [tokens[i].token], raw }); | ||||||
|             const ContentExp = searchContext.searchBackend === "sqlite"  |  | ||||||
|                 ? NoteContentSqliteExp  |  | ||||||
|                 : NoteContentFulltextExp; |  | ||||||
|  |  | ||||||
|             return new ContentExp(operator.token, { tokens: [tokens[i].token], raw }); |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (tokens[i].token === "parents") { |         if (tokens[i].token === "parents") { | ||||||
| @@ -222,12 +211,7 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level | |||||||
|  |  | ||||||
|             i += 2; |             i += 2; | ||||||
|  |  | ||||||
|             // Choose between SQLite and TypeScript backend |             return new OrExp([new PropertyComparisonExp(searchContext, "title", "*=*", tokens[i].token), new NoteContentFulltextExp("*=*", { tokens: [tokens[i].token] })]); | ||||||
|             const ContentExp = searchContext.searchBackend === "sqlite"  |  | ||||||
|                 ? NoteContentSqliteExp  |  | ||||||
|                 : NoteContentFulltextExp; |  | ||||||
|  |  | ||||||
|             return new OrExp([new PropertyComparisonExp(searchContext, "title", "*=*", tokens[i].token), new ContentExp("*=*", { tokens: [tokens[i].token] })]); |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (PropertyComparisonExp.isProperty(tokens[i].token)) { |         if (PropertyComparisonExp.isProperty(tokens[i].token)) { | ||||||
|   | |||||||
| @@ -19,9 +19,6 @@ import sql from "../../sql.js"; | |||||||
| import scriptService from "../../script.js"; | import scriptService from "../../script.js"; | ||||||
| import striptags from "striptags"; | import striptags from "striptags"; | ||||||
| import protectedSessionService from "../../protected_session.js"; | import protectedSessionService from "../../protected_session.js"; | ||||||
| import performanceMonitor from "../performance_monitor.js"; |  | ||||||
| import type { DetailedMetrics } from "../performance_monitor.js"; |  | ||||||
| import abTestingService from "../ab_testing.js"; |  | ||||||
|  |  | ||||||
| export interface SearchNoteResult { | export interface SearchNoteResult { | ||||||
|     searchResultNoteIds: string[]; |     searchResultNoteIds: string[]; | ||||||
| @@ -405,14 +402,6 @@ function parseQueryToExpression(query: string, searchContext: SearchContext) { | |||||||
|  |  | ||||||
| function searchNotes(query: string, params: SearchParams = {}): BNote[] { | function searchNotes(query: string, params: SearchParams = {}): BNote[] { | ||||||
|     const searchContext = new SearchContext(params); |     const searchContext = new SearchContext(params); | ||||||
|      |  | ||||||
|     // Run A/B test in background (non-blocking) |  | ||||||
|     setImmediate(() => { |  | ||||||
|         abTestingService.runComparison(query, params).catch(err => { |  | ||||||
|             log.info(`A/B test failed: ${err}`); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|      |  | ||||||
|     const searchResults = findResultsWithQuery(query, searchContext); |     const searchResults = findResultsWithQuery(query, searchContext); | ||||||
|  |  | ||||||
|     return searchResults.map((sr) => becca.notes[sr.noteId]); |     return searchResults.map((sr) => becca.notes[sr.noteId]); | ||||||
| @@ -422,14 +411,7 @@ function findResultsWithQuery(query: string, searchContext: SearchContext): Sear | |||||||
|     query = query || ""; |     query = query || ""; | ||||||
|     searchContext.originalQuery = query; |     searchContext.originalQuery = query; | ||||||
|  |  | ||||||
|     // Start performance monitoring |  | ||||||
|     const totalTimer = performanceMonitor.startTimer(); |  | ||||||
|     const phases: { name: string; duration: number }[] = []; |  | ||||||
|  |  | ||||||
|     // Parse query |  | ||||||
|     const parseTimer = performanceMonitor.startTimer(); |  | ||||||
|     const expression = parseQueryToExpression(query, searchContext); |     const expression = parseQueryToExpression(query, searchContext); | ||||||
|     phases.push({ name: "parse", duration: parseTimer() }); |  | ||||||
|  |  | ||||||
|     if (!expression) { |     if (!expression) { | ||||||
|         return []; |         return []; | ||||||
| @@ -441,7 +423,6 @@ function findResultsWithQuery(query: string, searchContext: SearchContext): Sear | |||||||
|     const isPureExpressionQuery = query.trim().startsWith('#'); |     const isPureExpressionQuery = query.trim().startsWith('#'); | ||||||
|  |  | ||||||
|     let results: SearchResult[]; |     let results: SearchResult[]; | ||||||
|     const searchTimer = performanceMonitor.startTimer(); |  | ||||||
|  |  | ||||||
|     if (isPureExpressionQuery) { |     if (isPureExpressionQuery) { | ||||||
|         // For pure expression queries, use standard search without progressive phases |         // For pure expression queries, use standard search without progressive phases | ||||||
| @@ -450,22 +431,6 @@ function findResultsWithQuery(query: string, searchContext: SearchContext): Sear | |||||||
|         results = findResultsWithExpression(expression, searchContext); |         results = findResultsWithExpression(expression, searchContext); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     phases.push({ name: "search", duration: searchTimer() }); |  | ||||||
|  |  | ||||||
|     // Record metrics |  | ||||||
|     const metrics: DetailedMetrics = { |  | ||||||
|         query: query.substring(0, 200), // Truncate long queries |  | ||||||
|         backend: searchContext.searchBackend, |  | ||||||
|         totalTime: totalTimer(), |  | ||||||
|         parseTime: phases[0].duration, |  | ||||||
|         searchTime: phases[1].duration, |  | ||||||
|         resultCount: results.length, |  | ||||||
|         phases, |  | ||||||
|         error: searchContext.error || undefined |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     performanceMonitor.recordDetailedMetrics(metrics); |  | ||||||
|  |  | ||||||
|     return results; |     return results; | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -21,6 +21,4 @@ export interface SearchParams { | |||||||
|     limit?: number | null; |     limit?: number | null; | ||||||
|     debug?: boolean; |     debug?: boolean; | ||||||
|     fuzzyAttributeSearch?: boolean; |     fuzzyAttributeSearch?: boolean; | ||||||
|     /** Force a specific search backend for testing/comparison */ |  | ||||||
|     forceBackend?: "typescript" | "sqlite"; |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -5,7 +5,6 @@ | |||||||
| import { describe, it, expect, beforeEach, afterEach } from 'vitest'; | import { describe, it, expect, beforeEach, afterEach } from 'vitest'; | ||||||
| import Database from 'better-sqlite3'; | import Database from 'better-sqlite3'; | ||||||
| import { SqliteFunctionsService, getSqliteFunctionsService } from './sqlite_functions.js'; | import { SqliteFunctionsService, getSqliteFunctionsService } from './sqlite_functions.js'; | ||||||
| import { normalize, stripTags } from '../utils.js'; |  | ||||||
|  |  | ||||||
| describe('SqliteFunctionsService', () => { | describe('SqliteFunctionsService', () => { | ||||||
|     let db: Database.Database; |     let db: Database.Database; | ||||||
| @@ -46,38 +45,6 @@ describe('SqliteFunctionsService', () => { | |||||||
|         }); |         }); | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
|     describe('normalize_text function', () => { |  | ||||||
|         beforeEach(() => { |  | ||||||
|             service.registerFunctions(db); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should normalize text correctly', () => { |  | ||||||
|             const tests = [ |  | ||||||
|                 ['café', 'cafe'], |  | ||||||
|                 ['naïve', 'naive'], |  | ||||||
|                 ['HELLO WORLD', 'hello world'], |  | ||||||
|                 ['Über', 'uber'], |  | ||||||
|                 ['', ''], |  | ||||||
|                 [null, ''], |  | ||||||
|             ]; |  | ||||||
|  |  | ||||||
|             for (const [input, expected] of tests) { |  | ||||||
|                 const result = db.prepare('SELECT normalize_text(?) as result').get(input) as { result: string }; |  | ||||||
|                 expect(result.result).toBe(expected); |  | ||||||
|                 // Verify it matches the utils normalize function |  | ||||||
|                 if (input) { |  | ||||||
|                     expect(result.result).toBe(normalize(input as string)); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should handle special characters', () => { |  | ||||||
|             const input = 'Ñoño 123 ABC!@#'; |  | ||||||
|             const result = db.prepare('SELECT normalize_text(?) as result').get(input) as any; |  | ||||||
|             expect(result.result).toBe(normalize(input)); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe('edit_distance function', () => { |     describe('edit_distance function', () => { | ||||||
|         beforeEach(() => { |         beforeEach(() => { | ||||||
|             service.registerFunctions(db); |             service.registerFunctions(db); | ||||||
| @@ -143,199 +110,4 @@ describe('SqliteFunctionsService', () => { | |||||||
|             expect(result.match).toBe(0); |             expect(result.match).toBe(0); | ||||||
|         }); |         }); | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
|     describe('tokenize_text function', () => { |  | ||||||
|         beforeEach(() => { |  | ||||||
|             service.registerFunctions(db); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should tokenize text correctly', () => { |  | ||||||
|             const tests = [ |  | ||||||
|                 ['hello world', ['hello', 'world']], |  | ||||||
|                 ['getUserName', ['getusername', 'get', 'user', 'name']], |  | ||||||
|                 ['user_name', ['user_name', 'user', 'name']], |  | ||||||
|                 ['hello-world', ['hello', 'world']], |  | ||||||
|                 ['test@example.com', ['test', 'example', 'com']], |  | ||||||
|                 ['', []], |  | ||||||
|             ]; |  | ||||||
|  |  | ||||||
|             for (const [input, expected] of tests) { |  | ||||||
|                 const result = db.prepare('SELECT tokenize_text(?) as tokens').get(input) as any; |  | ||||||
|                 const tokens = JSON.parse(result.tokens); |  | ||||||
|                 // Check that all expected tokens are present (order may vary due to Set) |  | ||||||
|                 for (const token of expected) { |  | ||||||
|                     expect(tokens).toContain(token); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should handle camelCase and snake_case', () => { |  | ||||||
|             const result = db.prepare('SELECT tokenize_text(?) as tokens').get('getUserById_async') as any; |  | ||||||
|             const tokens = JSON.parse(result.tokens); |  | ||||||
|             expect(tokens).toContain('getuserbyid_async'); |  | ||||||
|             expect(tokens).toContain('getuserbyid'); |  | ||||||
|             expect(tokens).toContain('async'); |  | ||||||
|             expect(tokens).toContain('get'); |  | ||||||
|             expect(tokens).toContain('user'); |  | ||||||
|             expect(tokens).toContain('by'); |  | ||||||
|             expect(tokens).toContain('id'); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should handle null input', () => { |  | ||||||
|             const result = db.prepare('SELECT tokenize_text(?) as tokens').get(null) as any; |  | ||||||
|             expect(result.tokens).toBe('[]'); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe('strip_html function', () => { |  | ||||||
|         beforeEach(() => { |  | ||||||
|             service.registerFunctions(db); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should strip HTML tags correctly', () => { |  | ||||||
|             const tests = [ |  | ||||||
|                 ['<p>Hello World</p>', 'Hello World'], |  | ||||||
|                 ['<div><span>Test</span></div>', 'Test'], |  | ||||||
|                 ['<script>alert("bad")</script>content', 'content'], |  | ||||||
|                 ['<style>body{color:red}</style>text', 'text'], |  | ||||||
|                 ['Hello <world>', 'Hello <world>'], |  | ||||||
|                 ['  Space', ' Space'], |  | ||||||
|                 ['', ''], |  | ||||||
|             ]; |  | ||||||
|  |  | ||||||
|             for (const [input, expected] of tests) { |  | ||||||
|                 const result = db.prepare('SELECT strip_html(?) as text').get(input) as any; |  | ||||||
|                 expect(result.text).toBe(expected); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should handle complex HTML', () => { |  | ||||||
|             const html = ` |  | ||||||
|                 <html> |  | ||||||
|                     <head><title>Test</title></head> |  | ||||||
|                     <body> |  | ||||||
|                         <h1>Title</h1> |  | ||||||
|                         <p>Paragraph with <strong>bold</strong> text.</p> |  | ||||||
|                         <script>console.log("test")</script> |  | ||||||
|                     </body> |  | ||||||
|                 </html> |  | ||||||
|             `; |  | ||||||
|             const result = db.prepare('SELECT strip_html(?) as text').get(html) as any; |  | ||||||
|             expect(result.text).toContain('Title'); |  | ||||||
|             expect(result.text).toContain('Paragraph with bold text'); |  | ||||||
|             expect(result.text).not.toContain('console.log'); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should handle null input', () => { |  | ||||||
|             const result = db.prepare('SELECT strip_html(?) as text').get(null) as any; |  | ||||||
|             expect(result.text).toBe(''); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe('fuzzy_match function', () => { |  | ||||||
|         beforeEach(() => { |  | ||||||
|             service.registerFunctions(db); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should perform exact matches', () => { |  | ||||||
|             const tests = [ |  | ||||||
|                 ['hello', 'hello world', 1], |  | ||||||
|                 ['world', 'hello world', 1], |  | ||||||
|                 ['foo', 'hello world', 0], |  | ||||||
|             ]; |  | ||||||
|  |  | ||||||
|             for (const [needle, haystack, expected] of tests) { |  | ||||||
|                 const result = db.prepare('SELECT fuzzy_match(?, ?, 2) as match').get(needle, haystack) as any; |  | ||||||
|                 expect(result.match).toBe(expected); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should perform fuzzy matches within edit distance', () => { |  | ||||||
|             const tests = [ |  | ||||||
|                 ['helo', 'hello world', 1],  // 1 edit distance |  | ||||||
|                 ['wrld', 'hello world', 1],  // 1 edit distance |  | ||||||
|                 ['hallo', 'hello world', 1], // 1 edit distance |  | ||||||
|                 ['xyz', 'hello world', 0],   // Too different |  | ||||||
|             ]; |  | ||||||
|  |  | ||||||
|             for (const [needle, haystack, expected] of tests) { |  | ||||||
|                 const result = db.prepare('SELECT fuzzy_match(?, ?, 2) as match').get(needle, haystack) as any; |  | ||||||
|                 expect(result.match).toBe(expected); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should handle case insensitive matching', () => { |  | ||||||
|             const result = db.prepare('SELECT fuzzy_match(?, ?, 2) as match').get('HELLO', 'hello world') as any; |  | ||||||
|             expect(result.match).toBe(1); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should handle null inputs', () => { |  | ||||||
|             const result = db.prepare('SELECT fuzzy_match(?, ?, 2) as match').get(null, 'test') as any; |  | ||||||
|             expect(result.match).toBe(0); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe('Integration with SQL queries', () => { |  | ||||||
|         beforeEach(() => { |  | ||||||
|             service.registerFunctions(db); |  | ||||||
|              |  | ||||||
|             // Create a test table |  | ||||||
|             db.exec(` |  | ||||||
|                 CREATE TABLE test_notes ( |  | ||||||
|                     id INTEGER PRIMARY KEY, |  | ||||||
|                     title TEXT, |  | ||||||
|                     content TEXT |  | ||||||
|                 ) |  | ||||||
|             `); |  | ||||||
|              |  | ||||||
|             // Insert test data |  | ||||||
|             const insert = db.prepare('INSERT INTO test_notes (title, content) VALUES (?, ?)'); |  | ||||||
|             insert.run('Café Meeting', '<p>Discussion about naïve implementation</p>'); |  | ||||||
|             insert.run('über wichtig', 'Very important note with HTML & entities'); |  | ||||||
|             insert.run('getUserData', 'Function to get_user_data from database'); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should work in WHERE clauses with normalize_text', () => { |  | ||||||
|             const results = db.prepare(` |  | ||||||
|                 SELECT title FROM test_notes  |  | ||||||
|                 WHERE normalize_text(title) LIKE '%cafe%' |  | ||||||
|             `).all(); |  | ||||||
|              |  | ||||||
|             expect(results).toHaveLength(1); |  | ||||||
|             expect((results[0] as any).title).toBe('Café Meeting'); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should work with fuzzy matching in queries', () => { |  | ||||||
|             const results = db.prepare(` |  | ||||||
|                 SELECT title FROM test_notes  |  | ||||||
|                 WHERE fuzzy_match('getuserdata', normalize_text(title), 2) = 1 |  | ||||||
|             `).all(); |  | ||||||
|              |  | ||||||
|             expect(results).toHaveLength(1); |  | ||||||
|             expect((results[0] as any).title).toBe('getUserData'); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should work with HTML stripping', () => { |  | ||||||
|             const results = db.prepare(` |  | ||||||
|                 SELECT strip_html(content) as clean_content  |  | ||||||
|                 FROM test_notes  |  | ||||||
|                 WHERE title = 'Café Meeting' |  | ||||||
|             `).all(); |  | ||||||
|              |  | ||||||
|             expect((results[0] as any).clean_content).toBe('Discussion about naïve implementation'); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it('should work with tokenization', () => { |  | ||||||
|             const result = db.prepare(` |  | ||||||
|                 SELECT tokenize_text(title) as tokens  |  | ||||||
|                 FROM test_notes  |  | ||||||
|                 WHERE title = 'getUserData' |  | ||||||
|             `).get() as any; |  | ||||||
|              |  | ||||||
|             const tokens = JSON.parse(result.tokens); |  | ||||||
|             expect(tokens).toContain('get'); |  | ||||||
|             expect(tokens).toContain('user'); |  | ||||||
|             expect(tokens).toContain('data'); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
| }); | }); | ||||||
| @@ -1,19 +1,17 @@ | |||||||
| /** | /** | ||||||
|  * SQLite Custom Functions Service |  * SQLite Custom Functions Service | ||||||
|  * |  * | ||||||
|  * This service manages custom SQLite functions that enhance search capabilities. |  * This service manages custom SQLite functions for general database operations. | ||||||
|  * Functions are registered with better-sqlite3 to provide native-speed operations |  * Functions are registered with better-sqlite3 to provide native-speed operations | ||||||
|  * directly within SQL queries, enabling efficient search indexing and querying. |  * directly within SQL queries. | ||||||
|  * |  * | ||||||
|  * These functions are used by: |  * These functions are used by: | ||||||
|  * - Database triggers for automatic search index maintenance |  * - Fuzzy search fallback (edit_distance) | ||||||
|  * - Direct SQL queries for search operations |  * - Regular expression matching (regex_match) | ||||||
|  * - Migration scripts for initial data population |  | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| import type { Database } from "better-sqlite3"; | import type { Database } from "better-sqlite3"; | ||||||
| import log from "../log.js"; | import log from "../log.js"; | ||||||
| import { normalize as utilsNormalize, stripTags } from "../utils.js"; |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Configuration for fuzzy search operations |  * Configuration for fuzzy search operations | ||||||
| @@ -66,14 +64,6 @@ export class SqliteFunctionsService { | |||||||
|     private initializeFunctions(): void { |     private initializeFunctions(): void { | ||||||
|         // Bind all methods to preserve 'this' context |         // Bind all methods to preserve 'this' context | ||||||
|         this.functions = [ |         this.functions = [ | ||||||
|             { |  | ||||||
|                 name: "normalize_text", |  | ||||||
|                 implementation: this.normalizeText.bind(this), |  | ||||||
|                 options: { |  | ||||||
|                     deterministic: true, |  | ||||||
|                     varargs: false |  | ||||||
|                 } |  | ||||||
|             }, |  | ||||||
|             { |             { | ||||||
|                 name: "edit_distance", |                 name: "edit_distance", | ||||||
|                 implementation: this.editDistance.bind(this), |                 implementation: this.editDistance.bind(this), | ||||||
| @@ -89,30 +79,6 @@ export class SqliteFunctionsService { | |||||||
|                     deterministic: true, |                     deterministic: true, | ||||||
|                     varargs: true  // Changed to true to handle variable arguments |                     varargs: true  // Changed to true to handle variable arguments | ||||||
|                 } |                 } | ||||||
|             }, |  | ||||||
|             { |  | ||||||
|                 name: "tokenize_text", |  | ||||||
|                 implementation: this.tokenizeText.bind(this), |  | ||||||
|                 options: { |  | ||||||
|                     deterministic: true, |  | ||||||
|                     varargs: false |  | ||||||
|                 } |  | ||||||
|             }, |  | ||||||
|             { |  | ||||||
|                 name: "strip_html", |  | ||||||
|                 implementation: this.stripHtml.bind(this), |  | ||||||
|                 options: { |  | ||||||
|                     deterministic: true, |  | ||||||
|                     varargs: false |  | ||||||
|                 } |  | ||||||
|             }, |  | ||||||
|             { |  | ||||||
|                 name: "fuzzy_match", |  | ||||||
|                 implementation: this.fuzzyMatch.bind(this), |  | ||||||
|                 options: { |  | ||||||
|                     deterministic: true, |  | ||||||
|                     varargs: true  // Changed to true to handle variable arguments |  | ||||||
|                 } |  | ||||||
|             } |             } | ||||||
|         ]; |         ]; | ||||||
|     } |     } | ||||||
| @@ -182,22 +148,6 @@ export class SqliteFunctionsService { | |||||||
|  |  | ||||||
|     // ===== Function Implementations ===== |     // ===== Function Implementations ===== | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Normalize text by removing diacritics and converting to lowercase |  | ||||||
|      * Matches the behavior of utils.normalize() exactly |  | ||||||
|      *  |  | ||||||
|      * @param text Text to normalize |  | ||||||
|      * @returns Normalized text |  | ||||||
|      */ |  | ||||||
|     private normalizeText(text: string | null | undefined): string { |  | ||||||
|         if (!text || typeof text !== 'string') { |  | ||||||
|             return ''; |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         // Use the exact same normalization as the rest of the codebase |  | ||||||
|         return utilsNormalize(text); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Calculate Levenshtein edit distance between two strings |      * Calculate Levenshtein edit distance between two strings | ||||||
|      * Optimized with early termination and single-array approach |      * Optimized with early termination and single-array approach | ||||||
| @@ -314,186 +264,6 @@ export class SqliteFunctionsService { | |||||||
|             return null; |             return null; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Tokenize text into searchable words |  | ||||||
|      * Handles punctuation, camelCase, and snake_case |  | ||||||
|      *  |  | ||||||
|      * @param text Text to tokenize |  | ||||||
|      * @returns JSON array string of tokens |  | ||||||
|      */ |  | ||||||
|     private tokenizeText(text: string | null | undefined): string { |  | ||||||
|         if (!text || typeof text !== 'string') { |  | ||||||
|             return '[]'; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Use a Set to avoid duplicates from the start |  | ||||||
|             const expandedTokens: Set<string> = new Set(); |  | ||||||
|              |  | ||||||
|             // Split on word boundaries, preserving apostrophes within words |  | ||||||
|             // But we need to handle underscore separately for snake_case |  | ||||||
|             const tokens = text |  | ||||||
|                 .split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:-]+/) |  | ||||||
|                 .filter(token => token.length > 0); |  | ||||||
|              |  | ||||||
|             // Process each token |  | ||||||
|             for (const token of tokens) { |  | ||||||
|                 // Add the original token in lowercase |  | ||||||
|                 expandedTokens.add(token.toLowerCase()); |  | ||||||
|                  |  | ||||||
|                 // Handle snake_case first (split on underscore) |  | ||||||
|                 const snakeParts = token.split('_').filter(part => part.length > 0); |  | ||||||
|                 if (snakeParts.length > 1) { |  | ||||||
|                     // We have snake_case |  | ||||||
|                     for (const snakePart of snakeParts) { |  | ||||||
|                         // Add each snake part |  | ||||||
|                         expandedTokens.add(snakePart.toLowerCase()); |  | ||||||
|                          |  | ||||||
|                         // Also check for camelCase within each snake part |  | ||||||
|                         const camelParts = this.splitCamelCase(snakePart); |  | ||||||
|                         for (const camelPart of camelParts) { |  | ||||||
|                             if (camelPart.length > 0) { |  | ||||||
|                                 expandedTokens.add(camelPart.toLowerCase()); |  | ||||||
|                             } |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } else { |  | ||||||
|                     // No snake_case, just check for camelCase |  | ||||||
|                     const camelParts = this.splitCamelCase(token); |  | ||||||
|                     for (const camelPart of camelParts) { |  | ||||||
|                         if (camelPart.length > 0) { |  | ||||||
|                             expandedTokens.add(camelPart.toLowerCase()); |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|              |  | ||||||
|             // Convert Set to Array for JSON serialization |  | ||||||
|             const uniqueTokens = Array.from(expandedTokens); |  | ||||||
|              |  | ||||||
|             // Return as JSON array string for SQL processing |  | ||||||
|             return JSON.stringify(uniqueTokens); |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error tokenizing text in SQL: ${error}`); |  | ||||||
|             return '[]'; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     /** |  | ||||||
|      * Helper method to split camelCase strings |  | ||||||
|      * @param str String to split |  | ||||||
|      * @returns Array of parts |  | ||||||
|      */ |  | ||||||
|     private splitCamelCase(str: string): string[] { |  | ||||||
|         // Split on transitions from lowercase to uppercase |  | ||||||
|         // Also handle sequences of uppercase letters (e.g., "XMLParser" -> ["XML", "Parser"]) |  | ||||||
|         return str.split(/(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])/); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Strip HTML tags from content |  | ||||||
|      * Removes script and style content, then strips tags and decodes entities |  | ||||||
|      *  |  | ||||||
|      * @param html HTML content |  | ||||||
|      * @returns Plain text without HTML tags |  | ||||||
|      */ |  | ||||||
|     private stripHtml(html: string | null | undefined): string { |  | ||||||
|         if (!html || typeof html !== 'string') { |  | ||||||
|             return ''; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             let text = html; |  | ||||||
|              |  | ||||||
|             // First remove script and style content entirely (including the tags) |  | ||||||
|             // This needs to happen before stripTags to remove the content |  | ||||||
|             text = text.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, ''); |  | ||||||
|             text = text.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, ''); |  | ||||||
|              |  | ||||||
|             // Now use stripTags to remove remaining HTML tags |  | ||||||
|             text = stripTags(text); |  | ||||||
|              |  | ||||||
|             // Decode common HTML entities |  | ||||||
|             text = text.replace(/</g, '<'); |  | ||||||
|             text = text.replace(/>/g, '>'); |  | ||||||
|             text = text.replace(/&/g, '&'); |  | ||||||
|             text = text.replace(/"/g, '"'); |  | ||||||
|             text = text.replace(/'/g, "'"); |  | ||||||
|             text = text.replace(/'/g, "'"); |  | ||||||
|             text = text.replace(/ /g, ' '); |  | ||||||
|              |  | ||||||
|             // Normalize whitespace - reduce multiple spaces to single space |  | ||||||
|             // But don't trim leading/trailing space if it was from   |  | ||||||
|             text = text.replace(/\s+/g, ' '); |  | ||||||
|              |  | ||||||
|             return text; |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Error stripping HTML in SQL: ${error}`); |  | ||||||
|             return html; // Return original on error |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Fuzzy match with configurable edit distance |  | ||||||
|      * Combines exact and fuzzy matching for optimal performance |  | ||||||
|      *  |  | ||||||
|      * SQLite will pass 2 or 3 arguments: |  | ||||||
|      * - 2 args: needle, haystack (uses default maxDistance) |  | ||||||
|      * - 3 args: needle, haystack, maxDistance |  | ||||||
|      *  |  | ||||||
|      * @returns 1 if match found, 0 otherwise |  | ||||||
|      */ |  | ||||||
|     private fuzzyMatch(...args: any[]): number { |  | ||||||
|         // Handle variable arguments from SQLite |  | ||||||
|         let needle: string | null | undefined = args[0]; |  | ||||||
|         let haystack: string | null | undefined = args[1]; |  | ||||||
|         let maxDistance: number = args.length > 2 ? args[2] : FUZZY_CONFIG.MAX_EDIT_DISTANCE; |  | ||||||
|          |  | ||||||
|         // Validate input types |  | ||||||
|         if (!needle || !haystack) { |  | ||||||
|             return 0; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (typeof needle !== 'string' || typeof haystack !== 'string') { |  | ||||||
|             return 0; |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         // Validate and sanitize maxDistance |  | ||||||
|         if (typeof maxDistance !== 'number' || !Number.isFinite(maxDistance)) { |  | ||||||
|             maxDistance = FUZZY_CONFIG.MAX_EDIT_DISTANCE; |  | ||||||
|         } else { |  | ||||||
|             // Ensure it's a positive integer |  | ||||||
|             maxDistance = Math.max(0, Math.floor(maxDistance)); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Normalize for comparison |  | ||||||
|         const normalizedNeedle = needle.toLowerCase(); |  | ||||||
|         const normalizedHaystack = haystack.toLowerCase(); |  | ||||||
|  |  | ||||||
|         // Check exact match first (most common case) |  | ||||||
|         if (normalizedHaystack.includes(normalizedNeedle)) { |  | ||||||
|             return 1; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // For fuzzy matching, check individual words |  | ||||||
|         const words = normalizedHaystack.split(/\s+/).filter(w => w.length > 0); |  | ||||||
|          |  | ||||||
|         for (const word of words) { |  | ||||||
|             // Skip if word length difference is too large |  | ||||||
|             if (Math.abs(word.length - normalizedNeedle.length) > maxDistance) { |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Check edit distance - call with all 3 args since we're calling internally |  | ||||||
|             const distance = this.editDistance(normalizedNeedle, word, maxDistance); |  | ||||||
|             if (distance <= maxDistance) { |  | ||||||
|                 return 1; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return 0; |  | ||||||
|     } |  | ||||||
| } | } | ||||||
|  |  | ||||||
| // Export singleton instance getter | // Export singleton instance getter | ||||||
|   | |||||||
| @@ -1,153 +0,0 @@ | |||||||
| /** |  | ||||||
|  * Integration tests for SQLite search implementation |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import { describe, it, expect, beforeAll, afterAll } from "vitest"; |  | ||||||
| import sql from "../sql.js"; |  | ||||||
| import { getSQLiteSearchService } from "./sqlite_search_service.js"; |  | ||||||
| import SearchContext from "./search_context.js"; |  | ||||||
| import NoteContentSqliteExp from "./expressions/note_content_sqlite.js"; |  | ||||||
| import NoteSet from "./note_set.js"; |  | ||||||
| import { getSqliteFunctionsService } from "./sqlite_functions.js"; |  | ||||||
|  |  | ||||||
| describe("SQLite Search Integration", () => { |  | ||||||
|     let searchService: ReturnType<typeof getSQLiteSearchService>; |  | ||||||
|     let searchContext: SearchContext; |  | ||||||
|  |  | ||||||
|     beforeAll(() => { |  | ||||||
|         // Initialize services |  | ||||||
|         searchService = getSQLiteSearchService(); |  | ||||||
|         searchContext = new SearchContext({ |  | ||||||
|             // searchBackend: "sqlite", // TODO: Add to SearchParams type |  | ||||||
|             // searchSqliteEnabled: true |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         // Register SQL functions |  | ||||||
|         const functionsService = getSqliteFunctionsService(); |  | ||||||
|         const db = sql.getDbConnection(); |  | ||||||
|         functionsService.registerFunctions(db); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     afterAll(() => { |  | ||||||
|         // Cleanup if needed |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Service Initialization", () => { |  | ||||||
|         it("should initialize SQLite search service", () => { |  | ||||||
|             expect(searchService).toBeDefined(); |  | ||||||
|             const stats = searchService.getStatistics(); |  | ||||||
|             expect(stats).toBeDefined(); |  | ||||||
|             expect(stats).toHaveProperty("tablesInitialized"); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should have registered SQL functions", () => { |  | ||||||
|             const functionsService = getSqliteFunctionsService(); |  | ||||||
|             expect(functionsService.isRegistered()).toBe(true); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Expression Creation", () => { |  | ||||||
|         it("should create SQLite expression when available", () => { |  | ||||||
|             const exp = NoteContentSqliteExp.createExpression("*=*", { |  | ||||||
|                 tokens: ["test"], |  | ||||||
|                 raw: false, |  | ||||||
|                 flatText: false |  | ||||||
|             }); |  | ||||||
|              |  | ||||||
|             expect(exp).toBeDefined(); |  | ||||||
|             // Check if it's the SQLite version or fallback |  | ||||||
|             if (NoteContentSqliteExp.isAvailable()) { |  | ||||||
|                 expect(exp).toBeInstanceOf(NoteContentSqliteExp); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should handle different operators", () => { |  | ||||||
|             const operators = ["=", "!=", "*=*", "*=", "=*", "%=", "~="]; |  | ||||||
|              |  | ||||||
|             for (const op of operators) { |  | ||||||
|                 const exp = new NoteContentSqliteExp(op, { |  | ||||||
|                     tokens: ["test"], |  | ||||||
|                     raw: false, |  | ||||||
|                     flatText: false |  | ||||||
|                 }); |  | ||||||
|                  |  | ||||||
|                 expect(exp).toBeDefined(); |  | ||||||
|                 expect(exp.tokens).toEqual(["test"]); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Search Execution", () => { |  | ||||||
|         it("should execute search with empty input set", () => { |  | ||||||
|             const exp = new NoteContentSqliteExp("*=*", { |  | ||||||
|                 tokens: ["test"], |  | ||||||
|                 raw: false, |  | ||||||
|                 flatText: false |  | ||||||
|             }); |  | ||||||
|              |  | ||||||
|             const inputSet = new NoteSet(); |  | ||||||
|             const resultSet = exp.execute(inputSet, {}, searchContext); |  | ||||||
|              |  | ||||||
|             expect(resultSet).toBeDefined(); |  | ||||||
|             expect(resultSet).toBeInstanceOf(NoteSet); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should handle search errors gracefully", () => { |  | ||||||
|             const exp = new NoteContentSqliteExp("invalid_op", { |  | ||||||
|                 tokens: ["test"], |  | ||||||
|                 raw: false, |  | ||||||
|                 flatText: false |  | ||||||
|             }); |  | ||||||
|              |  | ||||||
|             const inputSet = new NoteSet(); |  | ||||||
|             const resultSet = exp.execute(inputSet, {}, searchContext); |  | ||||||
|              |  | ||||||
|             expect(resultSet).toBeDefined(); |  | ||||||
|             expect(searchContext.hasError()).toBe(true); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Backend Selection", () => { |  | ||||||
|         it("should use SQLite backend when enabled", () => { |  | ||||||
|             const ctx = new SearchContext({ |  | ||||||
|                 forceBackend: "sqlite" |  | ||||||
|             }); |  | ||||||
|              |  | ||||||
|             expect(ctx.searchBackend).toBe("sqlite"); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should use TypeScript backend when forced", () => { |  | ||||||
|             const ctx = new SearchContext({ |  | ||||||
|                 forceBackend: "typescript" |  | ||||||
|             }); |  | ||||||
|              |  | ||||||
|             expect(ctx.searchBackend).toBe("typescript"); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should default to SQLite when no preference", () => { |  | ||||||
|             const ctx = new SearchContext({}); |  | ||||||
|              |  | ||||||
|             // Should default to SQLite for better performance |  | ||||||
|             expect(["sqlite", "typescript"]).toContain(ctx.searchBackend); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Performance Statistics", () => { |  | ||||||
|         it("should track search statistics", () => { |  | ||||||
|             const initialStats = searchService.getStatistics(); |  | ||||||
|             const initialSearches = initialStats.totalSearches || 0; |  | ||||||
|              |  | ||||||
|             // Execute a search |  | ||||||
|             searchService.search( |  | ||||||
|                 ["test"], |  | ||||||
|                 "*=*", |  | ||||||
|                 searchContext, |  | ||||||
|                 {} |  | ||||||
|             ); |  | ||||||
|              |  | ||||||
|             const newStats = searchService.getStatistics(); |  | ||||||
|             expect(newStats.totalSearches).toBeGreaterThan(initialSearches); |  | ||||||
|             expect(newStats.lastSearchTimeMs).toBeGreaterThanOrEqual(0); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
| }); |  | ||||||
| @@ -1,320 +0,0 @@ | |||||||
| /** |  | ||||||
|  * Tests for SQLite Search Service |  | ||||||
|  *  |  | ||||||
|  * These tests verify that the SQLite-based search implementation |  | ||||||
|  * correctly handles all search operators and provides accurate results. |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import { describe, it, expect, beforeAll, afterAll, beforeEach } from "vitest"; |  | ||||||
| import { SQLiteSearchService } from "./sqlite_search_service.js"; |  | ||||||
| import sql from "../sql.js"; |  | ||||||
| import SearchContext from "./search_context.js"; |  | ||||||
| import { initializeSqliteFunctions } from "./sqlite_functions.js"; |  | ||||||
|  |  | ||||||
| describe("SQLiteSearchService", () => { |  | ||||||
|     let searchService: SQLiteSearchService; |  | ||||||
|     let searchContext: SearchContext; |  | ||||||
|  |  | ||||||
|     beforeAll(() => { |  | ||||||
|         // Initialize SQLite functions for tests |  | ||||||
|         const db = sql.getDbConnection(); |  | ||||||
|         if (db) { |  | ||||||
|             initializeSqliteFunctions(db); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Get search service instance |  | ||||||
|         searchService = SQLiteSearchService.getInstance(); |  | ||||||
|          |  | ||||||
|         // Create test tables if they don't exist |  | ||||||
|         sql.execute(` |  | ||||||
|             CREATE TABLE IF NOT EXISTS note_search_content ( |  | ||||||
|                 noteId TEXT PRIMARY KEY, |  | ||||||
|                 noteContent TEXT, |  | ||||||
|                 normalized_content TEXT, |  | ||||||
|                 normalized_title TEXT, |  | ||||||
|                 isProtected INTEGER DEFAULT 0, |  | ||||||
|                 isDeleted INTEGER DEFAULT 0 |  | ||||||
|             ) |  | ||||||
|         `); |  | ||||||
|  |  | ||||||
|         sql.execute(` |  | ||||||
|             CREATE TABLE IF NOT EXISTS note_tokens ( |  | ||||||
|                 noteId TEXT PRIMARY KEY, |  | ||||||
|                 tokens TEXT |  | ||||||
|             ) |  | ||||||
|         `); |  | ||||||
|  |  | ||||||
|         sql.execute(` |  | ||||||
|             CREATE VIRTUAL TABLE IF NOT EXISTS note_fts USING fts5( |  | ||||||
|                 noteId UNINDEXED, |  | ||||||
|                 title, |  | ||||||
|                 content, |  | ||||||
|                 tokenize = 'unicode61' |  | ||||||
|             ) |  | ||||||
|         `); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     beforeEach(() => { |  | ||||||
|         // Clear test data |  | ||||||
|         sql.execute(`DELETE FROM note_search_content`); |  | ||||||
|         sql.execute(`DELETE FROM note_tokens`); |  | ||||||
|         sql.execute(`DELETE FROM note_fts`); |  | ||||||
|  |  | ||||||
|         // Create fresh search context |  | ||||||
|         searchContext = new SearchContext(); |  | ||||||
|  |  | ||||||
|         // Insert test data |  | ||||||
|         insertTestNote("note1", "Hello World", "This is a test note with hello world content."); |  | ||||||
|         insertTestNote("note2", "Programming", "JavaScript and TypeScript programming languages."); |  | ||||||
|         insertTestNote("note3", "Fuzzy Search", "Testing fuzzy matching with similar words like helo and wrold."); |  | ||||||
|         insertTestNote("note4", "Special Characters", "Testing with special@email.com and user_name variables."); |  | ||||||
|         insertTestNote("note5", "CamelCase", "getUserName and setUserEmail functions in JavaScript."); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     function insertTestNote(noteId: string, title: string, content: string) { |  | ||||||
|         // Insert into search content table |  | ||||||
|         sql.execute(` |  | ||||||
|             INSERT INTO note_search_content (noteId, noteContent, normalized_content, normalized_title, isProtected, isDeleted) |  | ||||||
|             VALUES (?, ?, LOWER(?), LOWER(?), 0, 0) |  | ||||||
|         `, [noteId, content, content, title]); |  | ||||||
|  |  | ||||||
|         // Generate tokens |  | ||||||
|         const tokens = tokenize(content + " " + title); |  | ||||||
|         sql.execute(` |  | ||||||
|             INSERT INTO note_tokens (noteId, tokens) |  | ||||||
|             VALUES (?, ?) |  | ||||||
|         `, [noteId, JSON.stringify(tokens)]); |  | ||||||
|  |  | ||||||
|         // Insert into FTS5 table |  | ||||||
|         sql.execute(` |  | ||||||
|             INSERT INTO note_fts (noteId, title, content) |  | ||||||
|             VALUES (?, ?, ?) |  | ||||||
|         `, [noteId, title, content]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     function tokenize(text: string): string[] { |  | ||||||
|         return text.toLowerCase() |  | ||||||
|             .split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:_-]+/) |  | ||||||
|             .filter(token => token.length > 0); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     describe("Substring Search (*=*)", () => { |  | ||||||
|         it("should find notes containing substring", () => { |  | ||||||
|             const results = searchService.search(["hello"], "*=*", searchContext); |  | ||||||
|             expect(results).toContain("note1"); |  | ||||||
|             expect(results.size).toBe(1); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should find notes with multiple tokens", () => { |  | ||||||
|             const results = searchService.search(["java", "script"], "*=*", searchContext); |  | ||||||
|             expect(results).toContain("note2"); |  | ||||||
|             expect(results).toContain("note5"); |  | ||||||
|             expect(results.size).toBe(2); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should be case insensitive", () => { |  | ||||||
|             const results = searchService.search(["HELLO"], "*=*", searchContext); |  | ||||||
|             expect(results).toContain("note1"); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Fuzzy Search (~=)", () => { |  | ||||||
|         it("should find notes with fuzzy matching", () => { |  | ||||||
|             const results = searchService.search(["helo"], "~=", searchContext); |  | ||||||
|             expect(results).toContain("note3"); // Contains "helo" |  | ||||||
|             expect(results).toContain("note1"); // Contains "hello" (1 edit distance) |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should respect edit distance threshold", () => { |  | ||||||
|             const results = searchService.search(["xyz"], "~=", searchContext); |  | ||||||
|             expect(results.size).toBe(0); // Too different from any content |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should handle multiple fuzzy tokens", () => { |  | ||||||
|             const results = searchService.search(["fuzzy", "match"], "~=", searchContext); |  | ||||||
|             expect(results).toContain("note3"); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Prefix Search (=*)", () => { |  | ||||||
|         it("should find notes starting with prefix", () => { |  | ||||||
|             const results = searchService.search(["test"], "=*", searchContext); |  | ||||||
|             expect(results).toContain("note3"); // "Testing fuzzy..." |  | ||||||
|             expect(results).toContain("note4"); // "Testing with..." |  | ||||||
|             expect(results.size).toBe(2); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should handle multiple prefixes", () => { |  | ||||||
|             const results = searchService.search(["java", "type"], "=*", searchContext); |  | ||||||
|             expect(results).toContain("note2"); // Has both "JavaScript" and "TypeScript" |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Suffix Search (*=)", () => { |  | ||||||
|         it("should find notes ending with suffix", () => { |  | ||||||
|             const results = searchService.search(["script"], "*=", searchContext); |  | ||||||
|             expect(results).toContain("note2"); // "JavaScript" and "TypeScript" |  | ||||||
|             expect(results).toContain("note5"); // "JavaScript" |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should handle special suffixes", () => { |  | ||||||
|             const results = searchService.search([".com"], "*=", searchContext); |  | ||||||
|             expect(results).toContain("note4"); // "special@email.com" |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Regex Search (%=)", () => { |  | ||||||
|         it("should find notes matching regex pattern", () => { |  | ||||||
|             const results = searchService.search(["\\w+@\\w+\\.com"], "%=", searchContext); |  | ||||||
|             expect(results).toContain("note4"); // Contains email pattern |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should handle complex patterns", () => { |  | ||||||
|             const results = searchService.search(["get\\w+Name"], "%=", searchContext); |  | ||||||
|             expect(results).toContain("note5"); // "getUserName" |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should handle invalid regex gracefully", () => { |  | ||||||
|             const results = searchService.search(["[invalid"], "%=", searchContext); |  | ||||||
|             expect(results.size).toBe(0); // Should return empty on invalid regex |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Exact Word Search (=)", () => { |  | ||||||
|         it("should find notes with exact word match", () => { |  | ||||||
|             const results = searchService.search(["hello"], "=", searchContext); |  | ||||||
|             expect(results).toContain("note1"); |  | ||||||
|             expect(results.size).toBe(1); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should not match partial words", () => { |  | ||||||
|             const results = searchService.search(["java"], "=", searchContext); |  | ||||||
|             expect(results.size).toBe(0); // "JavaScript" contains "java" but not as whole word |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should find multiple exact words", () => { |  | ||||||
|             const results = searchService.search(["fuzzy", "matching"], "=", searchContext); |  | ||||||
|             expect(results).toContain("note3"); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Not Equals Search (!=)", () => { |  | ||||||
|         it("should find notes not containing exact word", () => { |  | ||||||
|             const results = searchService.search(["hello"], "!=", searchContext); |  | ||||||
|             expect(results).not.toContain("note1"); |  | ||||||
|             expect(results.size).toBe(4); // All except note1 |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should handle multiple tokens", () => { |  | ||||||
|             const results = searchService.search(["fuzzy", "matching"], "!=", searchContext); |  | ||||||
|             expect(results).not.toContain("note3"); |  | ||||||
|             expect(results.size).toBe(4); // All except note3 |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Search Options", () => { |  | ||||||
|         it("should respect limit option", () => { |  | ||||||
|             const results = searchService.search(["test"], "*=*", searchContext, { limit: 1 }); |  | ||||||
|             expect(results.size).toBeLessThanOrEqual(1); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should filter by noteId set", () => { |  | ||||||
|             const noteIdFilter = new Set(["note1", "note3"]); |  | ||||||
|             const results = searchService.search(["test"], "*=*", searchContext, { noteIdFilter }); |  | ||||||
|              |  | ||||||
|             for (const noteId of results) { |  | ||||||
|                 expect(noteIdFilter).toContain(noteId); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should exclude deleted notes by default", () => { |  | ||||||
|             // Mark note1 as deleted |  | ||||||
|             sql.execute(`UPDATE note_search_content SET isDeleted = 1 WHERE noteId = 'note1'`); |  | ||||||
|              |  | ||||||
|             const results = searchService.search(["hello"], "*=*", searchContext); |  | ||||||
|             expect(results).not.toContain("note1"); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should include deleted notes when specified", () => { |  | ||||||
|             // Mark note1 as deleted |  | ||||||
|             sql.execute(`UPDATE note_search_content SET isDeleted = 1 WHERE noteId = 'note1'`); |  | ||||||
|              |  | ||||||
|             const results = searchService.search(["hello"], "*=*", searchContext, { includeDeleted: true }); |  | ||||||
|             expect(results).toContain("note1"); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Complex Queries", () => { |  | ||||||
|         it("should combine multiple searches with AND", () => { |  | ||||||
|             const queries = [ |  | ||||||
|                 { tokens: ["java"], operator: "*=*" }, |  | ||||||
|                 { tokens: ["script"], operator: "*=*" } |  | ||||||
|             ]; |  | ||||||
|              |  | ||||||
|             const results = searchService.searchMultiple(queries, "AND", searchContext); |  | ||||||
|             expect(results).toContain("note2"); |  | ||||||
|             expect(results).toContain("note5"); |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should combine multiple searches with OR", () => { |  | ||||||
|             const queries = [ |  | ||||||
|                 { tokens: ["hello"], operator: "*=*" }, |  | ||||||
|                 { tokens: ["fuzzy"], operator: "*=*" } |  | ||||||
|             ]; |  | ||||||
|              |  | ||||||
|             const results = searchService.searchMultiple(queries, "OR", searchContext); |  | ||||||
|             expect(results).toContain("note1"); |  | ||||||
|             expect(results).toContain("note3"); |  | ||||||
|             expect(results.size).toBe(2); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Performance", () => { |  | ||||||
|         beforeEach(() => { |  | ||||||
|             // Add more test data for performance testing |  | ||||||
|             for (let i = 10; i < 1000; i++) { |  | ||||||
|                 insertTestNote( |  | ||||||
|                     `note${i}`, |  | ||||||
|                     `Title ${i}`, |  | ||||||
|                     `This is note number ${i} with some random content for testing performance.` |  | ||||||
|                 ); |  | ||||||
|             } |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should handle large result sets efficiently", () => { |  | ||||||
|             const startTime = Date.now(); |  | ||||||
|             const results = searchService.search(["note"], "*=*", searchContext); |  | ||||||
|             const elapsed = Date.now() - startTime; |  | ||||||
|              |  | ||||||
|             expect(results.size).toBeGreaterThan(100); |  | ||||||
|             expect(elapsed).toBeLessThan(1000); // Should complete within 1 second |  | ||||||
|         }); |  | ||||||
|  |  | ||||||
|         it("should use limit to restrict results", () => { |  | ||||||
|             const startTime = Date.now(); |  | ||||||
|             const results = searchService.search(["note"], "*=*", searchContext, { limit: 10 }); |  | ||||||
|             const elapsed = Date.now() - startTime; |  | ||||||
|              |  | ||||||
|             expect(results.size).toBeLessThanOrEqual(10); |  | ||||||
|             expect(elapsed).toBeLessThan(100); // Should be very fast with limit |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     describe("Statistics", () => { |  | ||||||
|         it("should return correct statistics", () => { |  | ||||||
|             const stats = searchService.getStatistics(); |  | ||||||
|              |  | ||||||
|             expect(stats.tablesInitialized).toBe(true); |  | ||||||
|             expect(stats.indexedNotes).toBe(5); |  | ||||||
|             expect(stats.totalTokens).toBe(5); |  | ||||||
|             expect(stats.fts5Available).toBe(true); |  | ||||||
|         }); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     afterAll(() => { |  | ||||||
|         // Clean up test data |  | ||||||
|         sql.execute(`DELETE FROM note_search_content`); |  | ||||||
|         sql.execute(`DELETE FROM note_tokens`); |  | ||||||
|         sql.execute(`DELETE FROM note_fts`); |  | ||||||
|     }); |  | ||||||
| }); |  | ||||||
| @@ -1,943 +0,0 @@ | |||||||
| /** |  | ||||||
|  * SQLite Search Service |  | ||||||
|  *  |  | ||||||
|  * This service provides high-performance search operations using pure SQLite queries. |  | ||||||
|  * It implements all search operators with 100% accuracy and 10-30x performance improvement |  | ||||||
|  * over the TypeScript-based implementation. |  | ||||||
|  *  |  | ||||||
|  * Operators supported: |  | ||||||
|  * - *=* (substring): Uses LIKE on normalized content |  | ||||||
|  * - ~= (fuzzy): Uses edit_distance function with tokens |  | ||||||
|  * - =* (prefix): Uses LIKE with prefix pattern |  | ||||||
|  * - *= (suffix): Uses LIKE with suffix pattern |  | ||||||
|  * - %= (regex): Uses regex_match function |  | ||||||
|  * - = (exact word): Uses FTS5 table |  | ||||||
|  * - != (not equals): Inverse of equals |  | ||||||
|  *  |  | ||||||
|  * Performance characteristics: |  | ||||||
|  * - Substring search: O(n) with optimized LIKE |  | ||||||
|  * - Fuzzy search: O(n*m) where m is token count |  | ||||||
|  * - Prefix/suffix: O(n) with optimized LIKE |  | ||||||
|  * - Regex: O(n) with native regex support |  | ||||||
|  * - Exact word: O(log n) with FTS5 index |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import sql from "../sql.js"; |  | ||||||
| import log from "../log.js"; |  | ||||||
| import type SearchContext from "./search_context.js"; |  | ||||||
| import protectedSessionService from "../protected_session.js"; |  | ||||||
| import { normalize } from "../utils.js"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Configuration for search operations |  | ||||||
|  */ |  | ||||||
| const SEARCH_CONFIG = { |  | ||||||
|     MAX_EDIT_DISTANCE: 2, |  | ||||||
|     MIN_TOKEN_LENGTH: 3, |  | ||||||
|     MAX_RESULTS: 10000, |  | ||||||
|     BATCH_SIZE: 1000, |  | ||||||
|     LOG_PERFORMANCE: true, |  | ||||||
| } as const; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Interface for search results |  | ||||||
|  */ |  | ||||||
| export interface SearchResult { |  | ||||||
|     noteId: string; |  | ||||||
|     score?: number; |  | ||||||
|     snippet?: string; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Interface for search options |  | ||||||
|  */ |  | ||||||
| export interface SearchOptions { |  | ||||||
|     includeProtected?: boolean; |  | ||||||
|     includeDeleted?: boolean; |  | ||||||
|     noteIdFilter?: Set<string>; |  | ||||||
|     limit?: number; |  | ||||||
|     offset?: number; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * SQLite-based search service for high-performance note searching |  | ||||||
|  */ |  | ||||||
| export class SQLiteSearchService { |  | ||||||
|     private static instance: SQLiteSearchService | null = null; |  | ||||||
|     private isInitialized: boolean = false; |  | ||||||
|     private statistics = { |  | ||||||
|         tablesInitialized: false, |  | ||||||
|         totalSearches: 0, |  | ||||||
|         totalTimeMs: 0, |  | ||||||
|         averageTimeMs: 0, |  | ||||||
|         lastSearchTimeMs: 0 |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     private constructor() { |  | ||||||
|         this.checkAndInitialize(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get singleton instance of the search service |  | ||||||
|      */ |  | ||||||
|     static getInstance(): SQLiteSearchService { |  | ||||||
|         if (!SQLiteSearchService.instance) { |  | ||||||
|             SQLiteSearchService.instance = new SQLiteSearchService(); |  | ||||||
|         } |  | ||||||
|         return SQLiteSearchService.instance; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Check if search tables are initialized and create them if needed |  | ||||||
|      */ |  | ||||||
|     private checkAndInitialize(): void { |  | ||||||
|         try { |  | ||||||
|             // Check if tables exist |  | ||||||
|             const tableExists = sql.getValue(` |  | ||||||
|                 SELECT name FROM sqlite_master  |  | ||||||
|                 WHERE type='table' AND name='note_search_content' |  | ||||||
|             `); |  | ||||||
|  |  | ||||||
|             if (!tableExists) { |  | ||||||
|                 log.info("Search tables not found. They will be created by migration."); |  | ||||||
|                 this.isInitialized = false; |  | ||||||
|                 return; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // Verify table structure |  | ||||||
|             const columnCount = sql.getValue<number>(` |  | ||||||
|                 SELECT COUNT(*) FROM pragma_table_info('note_search_content') |  | ||||||
|             `) || 0; |  | ||||||
|  |  | ||||||
|             if (columnCount > 0) { |  | ||||||
|                 this.isInitialized = true; |  | ||||||
|                 this.statistics.tablesInitialized = true; |  | ||||||
|                 log.info("SQLite search service initialized successfully"); |  | ||||||
|             } |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Failed to initialize SQLite search service: ${error}`); |  | ||||||
|             this.isInitialized = false; |  | ||||||
|             this.statistics.tablesInitialized = false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Main search method that delegates to appropriate operator implementation |  | ||||||
|      */ |  | ||||||
|     search( |  | ||||||
|         tokens: string[], |  | ||||||
|         operator: string, |  | ||||||
|         searchContext: SearchContext, |  | ||||||
|         options: SearchOptions = {} |  | ||||||
|     ): Set<string> { |  | ||||||
|         if (!this.isInitialized) { |  | ||||||
|             log.info("SQLite search service not initialized, falling back to traditional search"); |  | ||||||
|             return new Set(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const startTime = Date.now(); |  | ||||||
|         let results: Set<string>; |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Normalize tokens for consistent searching |  | ||||||
|             const normalizedTokens = tokens.map(token => normalize(token).toLowerCase()); |  | ||||||
|  |  | ||||||
|             // Delegate to appropriate search method based on operator |  | ||||||
|             switch (operator) { |  | ||||||
|                 case "*=*": |  | ||||||
|                     results = this.searchSubstring(normalizedTokens, options); |  | ||||||
|                     break; |  | ||||||
|                 case "~=": |  | ||||||
|                     results = this.searchFuzzy(normalizedTokens, options); |  | ||||||
|                     break; |  | ||||||
|                 case "=*": |  | ||||||
|                     results = this.searchPrefix(normalizedTokens, options); |  | ||||||
|                     break; |  | ||||||
|                 case "*=": |  | ||||||
|                     results = this.searchSuffix(normalizedTokens, options); |  | ||||||
|                     break; |  | ||||||
|                 case "%=": |  | ||||||
|                     results = this.searchRegex(tokens, options); // Use original tokens for regex |  | ||||||
|                     break; |  | ||||||
|                 case "=": |  | ||||||
|                     results = this.searchExactWord(normalizedTokens, options); |  | ||||||
|                     break; |  | ||||||
|                 case "!=": |  | ||||||
|                     results = this.searchNotEquals(normalizedTokens, options); |  | ||||||
|                     break; |  | ||||||
|                 default: |  | ||||||
|                     log.info(`Unsupported search operator: ${operator}`); |  | ||||||
|                     return new Set(); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             const elapsed = Date.now() - startTime; |  | ||||||
|              |  | ||||||
|             // Update statistics |  | ||||||
|             this.statistics.totalSearches++; |  | ||||||
|             this.statistics.totalTimeMs += elapsed; |  | ||||||
|             this.statistics.lastSearchTimeMs = elapsed; |  | ||||||
|             this.statistics.averageTimeMs = this.statistics.totalTimeMs / this.statistics.totalSearches; |  | ||||||
|              |  | ||||||
|             if (SEARCH_CONFIG.LOG_PERFORMANCE) { |  | ||||||
|                 log.info(`SQLite search completed: operator=${operator}, tokens=${tokens.join(" ")}, ` + |  | ||||||
|                         `results=${results.size}, time=${elapsed}ms`); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             return results; |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`SQLite search failed: ${error}`); |  | ||||||
|             searchContext.addError(`Search failed: ${error}`); |  | ||||||
|             return new Set(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Substring search using LIKE on normalized content |  | ||||||
|      * Operator: *=* |  | ||||||
|      */ |  | ||||||
|     private searchSubstring(tokens: string[], options: SearchOptions): Set<string> { |  | ||||||
|         const results = new Set<string>(); |  | ||||||
|          |  | ||||||
|         // Build WHERE clause for all tokens |  | ||||||
|         const conditions = tokens.map(() =>  |  | ||||||
|             `nsc.full_text_normalized LIKE '%' || ? || '%'` |  | ||||||
|         ).join(' AND '); |  | ||||||
|  |  | ||||||
|         // Build base query - JOIN with notes table for isDeleted/isProtected filtering |  | ||||||
|         let query = ` |  | ||||||
|             SELECT DISTINCT nsc.noteId  |  | ||||||
|             FROM note_search_content nsc |  | ||||||
|             JOIN notes n ON nsc.noteId = n.noteId |  | ||||||
|             WHERE ${conditions} |  | ||||||
|         `; |  | ||||||
|  |  | ||||||
|         const params = [...tokens]; |  | ||||||
|  |  | ||||||
|         // Add filters using the notes table columns |  | ||||||
|         if (!options.includeDeleted) { |  | ||||||
|             query += ` AND n.isDeleted = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { |  | ||||||
|             query += ` AND n.isProtected = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Add limit if specified |  | ||||||
|         if (options.limit) { |  | ||||||
|             query += ` LIMIT ${options.limit}`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Execute query |  | ||||||
|         for (const row of sql.iterateRows<{ noteId: string }>(query, params)) { |  | ||||||
|             // Apply noteId filter if provided |  | ||||||
|             if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) { |  | ||||||
|                 results.add(row.noteId); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return results; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Fuzzy search using edit distance on tokens |  | ||||||
|      * Operator: ~= |  | ||||||
|      */ |  | ||||||
|     private searchFuzzy(tokens: string[], options: SearchOptions): Set<string> { |  | ||||||
|         const results = new Set<string>(); |  | ||||||
|          |  | ||||||
|         // For fuzzy search, we need to check tokens individually |  | ||||||
|         // First, get all note IDs that might match |  | ||||||
|         let query = ` |  | ||||||
|             SELECT DISTINCT nsc.noteId, nsc.full_text_normalized |  | ||||||
|             FROM note_search_content nsc |  | ||||||
|             JOIN notes n ON nsc.noteId = n.noteId |  | ||||||
|             WHERE 1=1 |  | ||||||
|         `; |  | ||||||
|  |  | ||||||
|         if (!options.includeDeleted) { |  | ||||||
|             query += ` AND n.isDeleted = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { |  | ||||||
|             query += ` AND n.isProtected = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Process in batches for better performance |  | ||||||
|         const noteData = new Map<string, string>(); |  | ||||||
|          |  | ||||||
|         for (const row of sql.iterateRows<{ noteId: string, full_text_normalized: string }>(query)) { |  | ||||||
|             if (options.noteIdFilter && !options.noteIdFilter.has(row.noteId)) { |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             noteData.set(row.noteId, row.full_text_normalized || ''); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Get tokens for fuzzy matching |  | ||||||
|         const tokenQuery = ` |  | ||||||
|             SELECT DISTINCT noteId, token_normalized  |  | ||||||
|             FROM note_tokens |  | ||||||
|             WHERE noteId IN (${Array.from(noteData.keys()).map(() => '?').join(',')}) |  | ||||||
|         `; |  | ||||||
|  |  | ||||||
|         const noteTokens = new Map<string, Set<string>>(); |  | ||||||
|         if (noteData.size > 0) { |  | ||||||
|             for (const row of sql.iterateRows<{ noteId: string, token_normalized: string }>( |  | ||||||
|                 tokenQuery, Array.from(noteData.keys()) |  | ||||||
|             )) { |  | ||||||
|                 if (!noteTokens.has(row.noteId)) { |  | ||||||
|                     noteTokens.set(row.noteId, new Set()); |  | ||||||
|                 } |  | ||||||
|                 noteTokens.get(row.noteId)!.add(row.token_normalized); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Now check each note for fuzzy matches |  | ||||||
|         for (const [noteId, content] of noteData) { |  | ||||||
|             let allTokensMatch = true; |  | ||||||
|             const noteTokenSet = noteTokens.get(noteId) || new Set(); |  | ||||||
|  |  | ||||||
|             for (const searchToken of tokens) { |  | ||||||
|                 let tokenMatches = false; |  | ||||||
|  |  | ||||||
|                 // Check if token matches any word in the note |  | ||||||
|                 // First check exact match in content |  | ||||||
|                 if (content.includes(searchToken)) { |  | ||||||
|                     tokenMatches = true; |  | ||||||
|                 } else { |  | ||||||
|                     // Check fuzzy match against tokens |  | ||||||
|                     for (const noteToken of noteTokenSet) { |  | ||||||
|                         if (this.fuzzyMatchTokens(searchToken, noteToken)) { |  | ||||||
|                             tokenMatches = true; |  | ||||||
|                             break; |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 if (!tokenMatches) { |  | ||||||
|                     allTokensMatch = false; |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if (allTokensMatch) { |  | ||||||
|                 results.add(noteId); |  | ||||||
|                  |  | ||||||
|                 if (options.limit && results.size >= options.limit) { |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return results; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Helper method for fuzzy matching between two tokens |  | ||||||
|      */ |  | ||||||
|     private fuzzyMatchTokens(token1: string, token2: string): boolean { |  | ||||||
|         // Quick exact match check |  | ||||||
|         if (token1 === token2) { |  | ||||||
|             return true; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Don't fuzzy match very short tokens |  | ||||||
|         if (token1.length < SEARCH_CONFIG.MIN_TOKEN_LENGTH ||  |  | ||||||
|             token2.length < SEARCH_CONFIG.MIN_TOKEN_LENGTH) { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Check if length difference is within edit distance threshold |  | ||||||
|         if (Math.abs(token1.length - token2.length) > SEARCH_CONFIG.MAX_EDIT_DISTANCE) { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Use SQL function for edit distance calculation |  | ||||||
|         const distance = sql.getValue<number>(` |  | ||||||
|             SELECT edit_distance(?, ?, ?) |  | ||||||
|         `, [token1, token2, SEARCH_CONFIG.MAX_EDIT_DISTANCE]); |  | ||||||
|  |  | ||||||
|         return distance <= SEARCH_CONFIG.MAX_EDIT_DISTANCE; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Prefix search using LIKE with prefix pattern |  | ||||||
|      * Operator: =* |  | ||||||
|      */ |  | ||||||
|     private searchPrefix(tokens: string[], options: SearchOptions): Set<string> { |  | ||||||
|         const results = new Set<string>(); |  | ||||||
|          |  | ||||||
|         // Build WHERE clause for all tokens |  | ||||||
|         const conditions = tokens.map(() =>  |  | ||||||
|             `nsc.full_text_normalized LIKE ? || '%'` |  | ||||||
|         ).join(' AND '); |  | ||||||
|  |  | ||||||
|         // Build query - JOIN with notes table for isDeleted/isProtected filtering |  | ||||||
|         let query = ` |  | ||||||
|             SELECT DISTINCT nsc.noteId  |  | ||||||
|             FROM note_search_content nsc |  | ||||||
|             JOIN notes n ON nsc.noteId = n.noteId |  | ||||||
|             WHERE ${conditions} |  | ||||||
|         `; |  | ||||||
|  |  | ||||||
|         const params = [...tokens]; |  | ||||||
|  |  | ||||||
|         // Add filters using the notes table columns |  | ||||||
|         if (!options.includeDeleted) { |  | ||||||
|             query += ` AND n.isDeleted = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { |  | ||||||
|             query += ` AND n.isProtected = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Add limit if specified |  | ||||||
|         if (options.limit) { |  | ||||||
|             query += ` LIMIT ${options.limit}`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Execute query |  | ||||||
|         for (const row of sql.iterateRows<{ noteId: string }>(query, params)) { |  | ||||||
|             if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) { |  | ||||||
|                 results.add(row.noteId); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return results; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Suffix search using LIKE with suffix pattern |  | ||||||
|      * Operator: *= |  | ||||||
|      */ |  | ||||||
|     private searchSuffix(tokens: string[], options: SearchOptions): Set<string> { |  | ||||||
|         const results = new Set<string>(); |  | ||||||
|          |  | ||||||
|         // Build WHERE clause for all tokens |  | ||||||
|         const conditions = tokens.map(() =>  |  | ||||||
|             `nsc.full_text_normalized LIKE '%' || ?` |  | ||||||
|         ).join(' AND '); |  | ||||||
|  |  | ||||||
|         // Build query - JOIN with notes table for isDeleted/isProtected filtering |  | ||||||
|         let query = ` |  | ||||||
|             SELECT DISTINCT nsc.noteId  |  | ||||||
|             FROM note_search_content nsc |  | ||||||
|             JOIN notes n ON nsc.noteId = n.noteId |  | ||||||
|             WHERE ${conditions} |  | ||||||
|         `; |  | ||||||
|  |  | ||||||
|         const params = [...tokens]; |  | ||||||
|  |  | ||||||
|         // Add filters using the notes table columns |  | ||||||
|         if (!options.includeDeleted) { |  | ||||||
|             query += ` AND n.isDeleted = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { |  | ||||||
|             query += ` AND n.isProtected = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Add limit if specified |  | ||||||
|         if (options.limit) { |  | ||||||
|             query += ` LIMIT ${options.limit}`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Execute query |  | ||||||
|         for (const row of sql.iterateRows<{ noteId: string }>(query, params)) { |  | ||||||
|             if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) { |  | ||||||
|                 results.add(row.noteId); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return results; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Regex search using regex_match function |  | ||||||
|      * Operator: %= |  | ||||||
|      */ |  | ||||||
|     private searchRegex(patterns: string[], options: SearchOptions): Set<string> { |  | ||||||
|         const results = new Set<string>(); |  | ||||||
|          |  | ||||||
|         // For regex, we use the combined title+content (not normalized) |  | ||||||
|         // Build WHERE clause for all patterns |  | ||||||
|         const conditions = patterns.map(() =>  |  | ||||||
|             `regex_match(nsc.title || ' ' || nsc.content, ?, 'ims') = 1` |  | ||||||
|         ).join(' AND '); |  | ||||||
|  |  | ||||||
|         // Build query - JOIN with notes table for isDeleted/isProtected filtering |  | ||||||
|         let query = ` |  | ||||||
|             SELECT DISTINCT nsc.noteId  |  | ||||||
|             FROM note_search_content nsc |  | ||||||
|             JOIN notes n ON nsc.noteId = n.noteId |  | ||||||
|             WHERE ${conditions} |  | ||||||
|         `; |  | ||||||
|  |  | ||||||
|         const params = [...patterns]; |  | ||||||
|  |  | ||||||
|         // Add filters using the notes table columns |  | ||||||
|         if (!options.includeDeleted) { |  | ||||||
|             query += ` AND n.isDeleted = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { |  | ||||||
|             query += ` AND n.isProtected = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Add limit if specified |  | ||||||
|         if (options.limit) { |  | ||||||
|             query += ` LIMIT ${options.limit}`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Execute query |  | ||||||
|         try { |  | ||||||
|             for (const row of sql.iterateRows<{ noteId: string }>(query, params)) { |  | ||||||
|                 if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) { |  | ||||||
|                     results.add(row.noteId); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Regex search failed: ${error}`); |  | ||||||
|             // Return empty set on regex error |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return results; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Exact word search using FTS5 or token matching |  | ||||||
|      * Operator: = |  | ||||||
|      */ |  | ||||||
|     private searchExactWord(tokens: string[], options: SearchOptions): Set<string> { |  | ||||||
|         const results = new Set<string>(); |  | ||||||
|          |  | ||||||
|         // Try FTS5 first if available |  | ||||||
|         const fts5Available = this.checkFTS5Availability(); |  | ||||||
|          |  | ||||||
|         if (fts5Available) { |  | ||||||
|             try { |  | ||||||
|                 // Build FTS5 query |  | ||||||
|                 const ftsQuery = tokens.map(t => `"${t}"`).join(' '); |  | ||||||
|                  |  | ||||||
|                 // FTS5 doesn't have isDeleted or isProtected columns, |  | ||||||
|                 // so we need to join with notes table for filtering |  | ||||||
|                 let query = ` |  | ||||||
|                     SELECT DISTINCT f.noteId |  | ||||||
|                     FROM notes_fts f |  | ||||||
|                     JOIN notes n ON f.noteId = n.noteId |  | ||||||
|                     WHERE f.notes_fts MATCH ? |  | ||||||
|                 `; |  | ||||||
|  |  | ||||||
|                 const params = [ftsQuery]; |  | ||||||
|  |  | ||||||
|                 // Add filters using the notes table columns |  | ||||||
|                 if (!options.includeDeleted) { |  | ||||||
|                     query += ` AND n.isDeleted = 0`; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { |  | ||||||
|                     query += ` AND n.isProtected = 0`; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Add limit if specified |  | ||||||
|                 if (options.limit) { |  | ||||||
|                     query += ` LIMIT ${options.limit}`; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 for (const row of sql.iterateRows<{ noteId: string }>(query, params)) { |  | ||||||
|                     if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) { |  | ||||||
|                         results.add(row.noteId); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 return results; |  | ||||||
|             } catch (error) { |  | ||||||
|                 log.info(`FTS5 search failed, falling back to token search: ${error}`); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Fallback to token-based exact match |  | ||||||
|         // Build query to check if all tokens exist as whole words |  | ||||||
|         let query = ` |  | ||||||
|             SELECT DISTINCT nt.noteId, nt.token_normalized |  | ||||||
|             FROM note_tokens nt |  | ||||||
|             JOIN notes n ON nt.noteId = n.noteId |  | ||||||
|             WHERE 1=1 |  | ||||||
|         `; |  | ||||||
|  |  | ||||||
|         if (!options.includeDeleted) { |  | ||||||
|             query += ` AND n.isDeleted = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { |  | ||||||
|             query += ` AND n.isProtected = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Get all matching notes and their tokens |  | ||||||
|         const candidateNotes = new Map<string, Set<string>>(); |  | ||||||
|          |  | ||||||
|         for (const row of sql.iterateRows<{ noteId: string, token_normalized: string }>(query)) { |  | ||||||
|             if (options.noteIdFilter && !options.noteIdFilter.has(row.noteId)) { |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if (!candidateNotes.has(row.noteId)) { |  | ||||||
|                 candidateNotes.set(row.noteId, new Set()); |  | ||||||
|             } |  | ||||||
|             candidateNotes.get(row.noteId)!.add(row.token_normalized); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Check each candidate for exact token matches |  | ||||||
|         for (const [noteId, noteTokenSet] of candidateNotes) { |  | ||||||
|             const allTokensFound = tokens.every(token => noteTokenSet.has(token)); |  | ||||||
|              |  | ||||||
|             if (allTokensFound) { |  | ||||||
|                 results.add(noteId); |  | ||||||
|                  |  | ||||||
|                 if (options.limit && results.size >= options.limit) { |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return results; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Not equals search - inverse of exact word search |  | ||||||
|      * Operator: != |  | ||||||
|      */ |  | ||||||
|     private searchNotEquals(tokens: string[], options: SearchOptions): Set<string> { |  | ||||||
|         // Get all notes that DON'T match the exact word search |  | ||||||
|         const matchingNotes = this.searchExactWord(tokens, options); |  | ||||||
|          |  | ||||||
|         // Get all notes - JOIN with notes table for isDeleted/isProtected filtering |  | ||||||
|         let query = ` |  | ||||||
|             SELECT DISTINCT nsc.noteId  |  | ||||||
|             FROM note_search_content nsc |  | ||||||
|             JOIN notes n ON nsc.noteId = n.noteId |  | ||||||
|             WHERE 1=1 |  | ||||||
|         `; |  | ||||||
|  |  | ||||||
|         if (!options.includeDeleted) { |  | ||||||
|             query += ` AND n.isDeleted = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) { |  | ||||||
|             query += ` AND n.isProtected = 0`; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const allNotes = new Set<string>(); |  | ||||||
|         for (const row of sql.iterateRows<{ noteId: string }>(query)) { |  | ||||||
|             if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) { |  | ||||||
|                 allNotes.add(row.noteId); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Return the difference |  | ||||||
|         const results = new Set<string>(); |  | ||||||
|         for (const noteId of allNotes) { |  | ||||||
|             if (!matchingNotes.has(noteId)) { |  | ||||||
|                 results.add(noteId); |  | ||||||
|                  |  | ||||||
|                 if (options.limit && results.size >= options.limit) { |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         return results; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Check if FTS5 is available |  | ||||||
|      */ |  | ||||||
|     private checkFTS5Availability(): boolean { |  | ||||||
|         try { |  | ||||||
|             const result = sql.getValue(` |  | ||||||
|                 SELECT name FROM sqlite_master  |  | ||||||
|                 WHERE type='table' AND name='notes_fts' |  | ||||||
|             `); |  | ||||||
|             return !!result; |  | ||||||
|         } catch { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Search with multiple operators (for complex queries) |  | ||||||
|      */ |  | ||||||
|     searchMultiple( |  | ||||||
|         queries: Array<{ tokens: string[], operator: string }>, |  | ||||||
|         combineMode: 'AND' | 'OR', |  | ||||||
|         searchContext: SearchContext, |  | ||||||
|         options: SearchOptions = {} |  | ||||||
|     ): Set<string> { |  | ||||||
|         if (queries.length === 0) { |  | ||||||
|             return new Set(); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         const resultSets = queries.map(q =>  |  | ||||||
|             this.search(q.tokens, q.operator, searchContext, options) |  | ||||||
|         ); |  | ||||||
|  |  | ||||||
|         if (combineMode === 'AND') { |  | ||||||
|             // Intersection of all result sets |  | ||||||
|             return resultSets.reduce((acc, set) => { |  | ||||||
|                 const intersection = new Set<string>(); |  | ||||||
|                 for (const item of acc) { |  | ||||||
|                     if (set.has(item)) { |  | ||||||
|                         intersection.add(item); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                 return intersection; |  | ||||||
|             }); |  | ||||||
|         } else { |  | ||||||
|             // Union of all result sets |  | ||||||
|             return resultSets.reduce((acc, set) => { |  | ||||||
|                 for (const item of set) { |  | ||||||
|                     acc.add(item); |  | ||||||
|                 } |  | ||||||
|                 return acc; |  | ||||||
|             }, new Set<string>()); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get search statistics for monitoring |  | ||||||
|      */ |  | ||||||
|     getStatistics() { |  | ||||||
|         // Return the in-memory statistics object which includes performance data |  | ||||||
|         return { |  | ||||||
|             ...this.statistics, |  | ||||||
|             indexedNotes: this.isInitialized ? this.getIndexedNotesCount() : 0, |  | ||||||
|             totalTokens: this.isInitialized ? this.getTotalTokensCount() : 0, |  | ||||||
|             fts5Available: this.isInitialized ? this.checkFTS5Availability() : false |  | ||||||
|         }; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get count of indexed notes |  | ||||||
|      */ |  | ||||||
|     private getIndexedNotesCount(): number { |  | ||||||
|         try { |  | ||||||
|             return sql.getValue<number>(` |  | ||||||
|                 SELECT COUNT(DISTINCT nsc.noteId)  |  | ||||||
|                 FROM note_search_content nsc |  | ||||||
|                 JOIN notes n ON nsc.noteId = n.noteId |  | ||||||
|                 WHERE n.isDeleted = 0 |  | ||||||
|             `) || 0; |  | ||||||
|         } catch { |  | ||||||
|             return 0; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get total tokens count |  | ||||||
|      */ |  | ||||||
|     private getTotalTokensCount(): number { |  | ||||||
|         try { |  | ||||||
|             return sql.getValue<number>(` |  | ||||||
|                 SELECT COUNT(*) FROM note_tokens |  | ||||||
|             `) || 0; |  | ||||||
|         } catch { |  | ||||||
|             return 0; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Rebuild search index for a specific note |  | ||||||
|      */ |  | ||||||
|     rebuildNoteIndex(noteId: string): void { |  | ||||||
|         if (!this.isInitialized) { |  | ||||||
|             log.info("Cannot rebuild index - search tables not initialized"); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // This will be handled by triggers automatically |  | ||||||
|             // But we can force an update by touching the note |  | ||||||
|             sql.execute(` |  | ||||||
|                 UPDATE notes  |  | ||||||
|                 SET dateModified = strftime('%Y-%m-%d %H:%M:%S.%f', 'now') |  | ||||||
|                 WHERE noteId = ? |  | ||||||
|             `, [noteId]); |  | ||||||
|  |  | ||||||
|             log.info(`Rebuilt search index for note ${noteId}`); |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Failed to rebuild index for note ${noteId}: ${error}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Clear search index (for testing/maintenance) |  | ||||||
|      */ |  | ||||||
|     clearIndex(): void { |  | ||||||
|         if (!this.isInitialized) { |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             sql.execute(`DELETE FROM note_search_content`); |  | ||||||
|             sql.execute(`DELETE FROM note_tokens`); |  | ||||||
|              |  | ||||||
|             if (this.checkFTS5Availability()) { |  | ||||||
|                 sql.execute(`DELETE FROM notes_fts`); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             log.info("Search index cleared"); |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Failed to clear search index: ${error}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Get detailed index status information |  | ||||||
|      */ |  | ||||||
|     async getIndexStatus(): Promise<{ |  | ||||||
|         initialized: boolean; |  | ||||||
|         tablesExist: boolean; |  | ||||||
|         indexedNotes: number; |  | ||||||
|         totalNotes: number; |  | ||||||
|         totalTokens: number; |  | ||||||
|         fts5Available: boolean; |  | ||||||
|         lastRebuild?: string; |  | ||||||
|         coverage: number; |  | ||||||
|     }> { |  | ||||||
|         const tablesExist = this.isInitialized; |  | ||||||
|          |  | ||||||
|         if (!tablesExist) { |  | ||||||
|             return { |  | ||||||
|                 initialized: false, |  | ||||||
|                 tablesExist: false, |  | ||||||
|                 indexedNotes: 0, |  | ||||||
|                 totalNotes: 0, |  | ||||||
|                 totalTokens: 0, |  | ||||||
|                 fts5Available: false, |  | ||||||
|                 coverage: 0 |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Get total indexable notes |  | ||||||
|         const totalNotes = sql.getValue<number>(` |  | ||||||
|             SELECT COUNT(*)  |  | ||||||
|             FROM notes  |  | ||||||
|             WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|                 AND isDeleted = 0 |  | ||||||
|                 AND isProtected = 0 |  | ||||||
|         `) || 0; |  | ||||||
|  |  | ||||||
|         // Get indexed notes count |  | ||||||
|         const indexedNotes = sql.getValue<number>(` |  | ||||||
|             SELECT COUNT(DISTINCT nsc.noteId)  |  | ||||||
|             FROM note_search_content nsc |  | ||||||
|             JOIN notes n ON nsc.noteId = n.noteId |  | ||||||
|             WHERE n.isDeleted = 0 |  | ||||||
|         `) || 0; |  | ||||||
|  |  | ||||||
|         // Get token count |  | ||||||
|         const totalTokens = sql.getValue<number>(` |  | ||||||
|             SELECT COUNT(*) FROM note_tokens |  | ||||||
|         `) || 0; |  | ||||||
|  |  | ||||||
|         // Calculate coverage percentage |  | ||||||
|         const coverage = totalNotes > 0 ? (indexedNotes / totalNotes) * 100 : 0; |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             initialized: true, |  | ||||||
|             tablesExist: true, |  | ||||||
|             indexedNotes, |  | ||||||
|             totalNotes, |  | ||||||
|             totalTokens, |  | ||||||
|             fts5Available: this.checkFTS5Availability(), |  | ||||||
|             coverage: Math.round(coverage * 100) / 100 |  | ||||||
|         }; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Rebuild the entire search index |  | ||||||
|      */ |  | ||||||
|     async rebuildIndex(force: boolean = false): Promise<void> { |  | ||||||
|         if (!this.isInitialized && !force) { |  | ||||||
|             throw new Error("Search tables not initialized. Use force=true to create tables."); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         log.info("Starting search index rebuild..."); |  | ||||||
|         const startTime = Date.now(); |  | ||||||
|  |  | ||||||
|         try { |  | ||||||
|             // Clear existing index |  | ||||||
|             this.clearIndex(); |  | ||||||
|  |  | ||||||
|             // Rebuild from all notes |  | ||||||
|             const batchSize = 100; |  | ||||||
|             let offset = 0; |  | ||||||
|             let totalProcessed = 0; |  | ||||||
|  |  | ||||||
|             while (true) { |  | ||||||
|                 const notes = sql.getRows<{ |  | ||||||
|                     noteId: string; |  | ||||||
|                     title: string; |  | ||||||
|                     type: string; |  | ||||||
|                     mime: string; |  | ||||||
|                     content: string | null; |  | ||||||
|                 }>(` |  | ||||||
|                     SELECT  |  | ||||||
|                         n.noteId, |  | ||||||
|                         n.title, |  | ||||||
|                         n.type, |  | ||||||
|                         n.mime, |  | ||||||
|                         b.content |  | ||||||
|                     FROM notes n |  | ||||||
|                     LEFT JOIN blobs b ON n.blobId = b.blobId |  | ||||||
|                     WHERE n.isDeleted = 0 |  | ||||||
|                         AND n.isProtected = 0 |  | ||||||
|                         AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') |  | ||||||
|                     ORDER BY n.noteId |  | ||||||
|                     LIMIT ? OFFSET ? |  | ||||||
|                 `, [batchSize, offset]); |  | ||||||
|  |  | ||||||
|                 if (notes.length === 0) { |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // Process batch - trigger will handle the actual indexing |  | ||||||
|                 for (const note of notes) { |  | ||||||
|                     try { |  | ||||||
|                         // Touch the note to trigger re-indexing |  | ||||||
|                         sql.execute(` |  | ||||||
|                             UPDATE notes  |  | ||||||
|                             SET dateModified = strftime('%Y-%m-%d %H:%M:%S.%f', 'now') |  | ||||||
|                             WHERE noteId = ? |  | ||||||
|                         `, [note.noteId]); |  | ||||||
|                          |  | ||||||
|                         totalProcessed++; |  | ||||||
|                     } catch (error) { |  | ||||||
|                         log.error(`Failed to reindex note ${note.noteId}: ${error}`); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 offset += batchSize; |  | ||||||
|                  |  | ||||||
|                 if (totalProcessed % 1000 === 0) { |  | ||||||
|                     log.info(`Reindexed ${totalProcessed} notes...`); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             const duration = Date.now() - startTime; |  | ||||||
|             log.info(`Index rebuild completed: ${totalProcessed} notes in ${duration}ms`); |  | ||||||
|  |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Index rebuild failed: ${error}`); |  | ||||||
|             throw error; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Export singleton instance getter |  | ||||||
| export function getSQLiteSearchService(): SQLiteSearchService { |  | ||||||
|     return SQLiteSearchService.getInstance(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Export default getter function (not the instance, to avoid initialization issues) |  | ||||||
| export default getSQLiteSearchService; |  | ||||||
| @@ -1,471 +0,0 @@ | |||||||
| /** |  | ||||||
|  * SQLite Search Utilities |  | ||||||
|  *  |  | ||||||
|  * Helper functions and utilities for SQLite-based search operations. |  | ||||||
|  * These utilities provide common functionality needed by the search service |  | ||||||
|  * and help with data preparation, validation, and performance monitoring. |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import sql from "../sql.js"; |  | ||||||
| import log from "../log.js"; |  | ||||||
| import { normalize, stripTags } from "../utils.js"; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Configuration for search utilities |  | ||||||
|  */ |  | ||||||
| export const SEARCH_UTILS_CONFIG = { |  | ||||||
|     BATCH_SIZE: 1000, |  | ||||||
|     MAX_CONTENT_SIZE: 2 * 1024 * 1024, // 2MB |  | ||||||
|     MIN_TOKEN_LENGTH: 2, |  | ||||||
|     MAX_TOKEN_LENGTH: 100, |  | ||||||
|     LOG_SLOW_QUERIES: true, |  | ||||||
|     SLOW_QUERY_THRESHOLD: 100, // ms |  | ||||||
| } as const; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Interface for note content data |  | ||||||
|  */ |  | ||||||
| export interface NoteContentData { |  | ||||||
|     noteId: string; |  | ||||||
|     title: string; |  | ||||||
|     content: string; |  | ||||||
|     type: string; |  | ||||||
|     mime: string; |  | ||||||
|     isProtected: boolean; |  | ||||||
|     isDeleted: boolean; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Normalize text for search indexing |  | ||||||
|  * Ensures consistent normalization across all search operations |  | ||||||
|  */ |  | ||||||
| export function normalizeForSearch(text: string | null | undefined): string { |  | ||||||
|     if (!text || typeof text !== 'string') { |  | ||||||
|         return ''; |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     // Use the standard normalize function and convert to lowercase |  | ||||||
|     return normalize(text).toLowerCase(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Tokenize text into searchable words |  | ||||||
|  * Handles camelCase, snake_case, and special characters |  | ||||||
|  */ |  | ||||||
| export function tokenizeText(text: string | null | undefined): string[] { |  | ||||||
|     if (!text || typeof text !== 'string') { |  | ||||||
|         return []; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const tokens = new Set<string>(); |  | ||||||
|      |  | ||||||
|     // Split on word boundaries |  | ||||||
|     const words = text |  | ||||||
|         .split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:-]+/) |  | ||||||
|         .filter(word => word.length >= SEARCH_UTILS_CONFIG.MIN_TOKEN_LENGTH && |  | ||||||
|                        word.length <= SEARCH_UTILS_CONFIG.MAX_TOKEN_LENGTH); |  | ||||||
|      |  | ||||||
|     for (const word of words) { |  | ||||||
|         // Add the original word (lowercase) |  | ||||||
|         tokens.add(word.toLowerCase()); |  | ||||||
|          |  | ||||||
|         // Handle snake_case |  | ||||||
|         const snakeParts = word.split('_').filter(part => part.length > 0); |  | ||||||
|         if (snakeParts.length > 1) { |  | ||||||
|             for (const part of snakeParts) { |  | ||||||
|                 tokens.add(part.toLowerCase()); |  | ||||||
|                  |  | ||||||
|                 // Also handle camelCase within snake_case parts |  | ||||||
|                 const camelParts = splitCamelCase(part); |  | ||||||
|                 for (const camelPart of camelParts) { |  | ||||||
|                     if (camelPart.length >= SEARCH_UTILS_CONFIG.MIN_TOKEN_LENGTH) { |  | ||||||
|                         tokens.add(camelPart.toLowerCase()); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } else { |  | ||||||
|             // Handle camelCase |  | ||||||
|             const camelParts = splitCamelCase(word); |  | ||||||
|             for (const part of camelParts) { |  | ||||||
|                 if (part.length >= SEARCH_UTILS_CONFIG.MIN_TOKEN_LENGTH) { |  | ||||||
|                     tokens.add(part.toLowerCase()); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     return Array.from(tokens); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Split camelCase strings into parts |  | ||||||
|  */ |  | ||||||
| function splitCamelCase(str: string): string[] { |  | ||||||
|     // Split on transitions from lowercase to uppercase |  | ||||||
|     // Also handle sequences of uppercase letters (e.g., "XMLParser" -> ["XML", "Parser"]) |  | ||||||
|     return str.split(/(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])/); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Process HTML content for indexing |  | ||||||
|  * Removes tags and normalizes the text |  | ||||||
|  */ |  | ||||||
| export function processHtmlContent(html: string | null | undefined): string { |  | ||||||
|     if (!html || typeof html !== 'string') { |  | ||||||
|         return ''; |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     // Remove script and style content |  | ||||||
|     let text = html.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, ''); |  | ||||||
|     text = text.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, ''); |  | ||||||
|      |  | ||||||
|     // Strip remaining tags |  | ||||||
|     text = stripTags(text); |  | ||||||
|      |  | ||||||
|     // Decode HTML entities |  | ||||||
|     text = text.replace(/ /g, ' '); |  | ||||||
|     text = text.replace(/</g, '<'); |  | ||||||
|     text = text.replace(/>/g, '>'); |  | ||||||
|     text = text.replace(/&/g, '&'); |  | ||||||
|     text = text.replace(/"/g, '"'); |  | ||||||
|     text = text.replace(/'/g, "'"); |  | ||||||
|     text = text.replace(/'/g, "'"); |  | ||||||
|      |  | ||||||
|     // Normalize whitespace |  | ||||||
|     text = text.replace(/\s+/g, ' ').trim(); |  | ||||||
|      |  | ||||||
|     return text; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Process JSON content (e.g., mindmaps, canvas) for indexing |  | ||||||
|  */ |  | ||||||
| export function processJsonContent(json: string | null | undefined, type: string): string { |  | ||||||
|     if (!json || typeof json !== 'string') { |  | ||||||
|         return ''; |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     try { |  | ||||||
|         const data = JSON.parse(json); |  | ||||||
|          |  | ||||||
|         if (type === 'mindMap') { |  | ||||||
|             return extractMindMapText(data); |  | ||||||
|         } else if (type === 'canvas') { |  | ||||||
|             return extractCanvasText(data); |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         // For other JSON types, try to extract text content |  | ||||||
|         return extractTextFromObject(data); |  | ||||||
|     } catch (error) { |  | ||||||
|         log.info(`Failed to process JSON content: ${error}`); |  | ||||||
|         return ''; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Extract text from mindmap JSON structure |  | ||||||
|  */ |  | ||||||
| function extractMindMapText(data: any): string { |  | ||||||
|     const texts: string[] = []; |  | ||||||
|      |  | ||||||
|     function collectTopics(node: any): void { |  | ||||||
|         if (!node) return; |  | ||||||
|          |  | ||||||
|         if (node.topic) { |  | ||||||
|             texts.push(node.topic); |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         if (node.children && Array.isArray(node.children)) { |  | ||||||
|             for (const child of node.children) { |  | ||||||
|                 collectTopics(child); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     if (data.nodedata) { |  | ||||||
|         collectTopics(data.nodedata); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     return texts.join(' '); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Extract text from canvas JSON structure |  | ||||||
|  */ |  | ||||||
| function extractCanvasText(data: any): string { |  | ||||||
|     const texts: string[] = []; |  | ||||||
|      |  | ||||||
|     if (data.elements && Array.isArray(data.elements)) { |  | ||||||
|         for (const element of data.elements) { |  | ||||||
|             if (element.type === 'text' && element.text) { |  | ||||||
|                 texts.push(element.text); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     return texts.join(' '); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Generic text extraction from JSON objects |  | ||||||
|  */ |  | ||||||
| function extractTextFromObject(obj: any, maxDepth = 10): string { |  | ||||||
|     if (maxDepth <= 0) return ''; |  | ||||||
|      |  | ||||||
|     const texts: string[] = []; |  | ||||||
|      |  | ||||||
|     if (typeof obj === 'string') { |  | ||||||
|         return obj; |  | ||||||
|     } else if (Array.isArray(obj)) { |  | ||||||
|         for (const item of obj) { |  | ||||||
|             const text = extractTextFromObject(item, maxDepth - 1); |  | ||||||
|             if (text) texts.push(text); |  | ||||||
|         } |  | ||||||
|     } else if (typeof obj === 'object' && obj !== null) { |  | ||||||
|         for (const key of Object.keys(obj)) { |  | ||||||
|             // Look for common text field names |  | ||||||
|             if (['text', 'content', 'value', 'title', 'name', 'label', 'description'].includes(key.toLowerCase())) { |  | ||||||
|                 const value = obj[key]; |  | ||||||
|                 if (typeof value === 'string') { |  | ||||||
|                     texts.push(value); |  | ||||||
|                 } |  | ||||||
|             } else { |  | ||||||
|                 const text = extractTextFromObject(obj[key], maxDepth - 1); |  | ||||||
|                 if (text) texts.push(text); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     return texts.join(' '); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Prepare note content for indexing |  | ||||||
|  * Handles different note types and formats |  | ||||||
|  */ |  | ||||||
| export function prepareNoteContent(note: NoteContentData): { |  | ||||||
|     normalizedContent: string; |  | ||||||
|     normalizedTitle: string; |  | ||||||
|     tokens: string[]; |  | ||||||
| } { |  | ||||||
|     let content = note.content; |  | ||||||
|      |  | ||||||
|     // Process content based on type |  | ||||||
|     if (note.type === 'text' && note.mime === 'text/html') { |  | ||||||
|         content = processHtmlContent(content); |  | ||||||
|     } else if ((note.type === 'mindMap' || note.type === 'canvas') && note.mime === 'application/json') { |  | ||||||
|         content = processJsonContent(content, note.type); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     // Check content size |  | ||||||
|     if (content.length > SEARCH_UTILS_CONFIG.MAX_CONTENT_SIZE) { |  | ||||||
|         log.info(`Note ${note.noteId} content exceeds max size (${content.length} bytes), truncating`); |  | ||||||
|         content = content.substring(0, SEARCH_UTILS_CONFIG.MAX_CONTENT_SIZE); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     // Normalize content and title |  | ||||||
|     const normalizedContent = normalizeForSearch(content); |  | ||||||
|     const normalizedTitle = normalizeForSearch(note.title); |  | ||||||
|      |  | ||||||
|     // Generate tokens from both content and title |  | ||||||
|     const allText = `${note.title} ${content}`; |  | ||||||
|     const tokens = tokenizeText(allText); |  | ||||||
|      |  | ||||||
|     return { |  | ||||||
|         normalizedContent, |  | ||||||
|         normalizedTitle, |  | ||||||
|         tokens |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Update search index for a single note |  | ||||||
|  */ |  | ||||||
| export async function updateNoteSearchIndex(noteId: string): Promise<void> { |  | ||||||
|     try { |  | ||||||
|         // Get note data |  | ||||||
|         const noteData = sql.getRow<NoteContentData>(` |  | ||||||
|             SELECT n.noteId, n.title, b.content, n.type, n.mime, n.isProtected, n.isDeleted |  | ||||||
|             FROM notes n |  | ||||||
|             LEFT JOIN blobs b ON n.blobId = b.blobId |  | ||||||
|             WHERE n.noteId = ? |  | ||||||
|         `, [noteId]); |  | ||||||
|          |  | ||||||
|         if (!noteData) { |  | ||||||
|             log.info(`Note ${noteId} not found for indexing`); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         // Prepare content for indexing |  | ||||||
|         const { normalizedContent, normalizedTitle, tokens } = prepareNoteContent(noteData); |  | ||||||
|          |  | ||||||
|         // Update search content table |  | ||||||
|         // Note: note_search_content doesn't have isProtected/isDeleted columns |  | ||||||
|         // Those are in the notes table which we join with |  | ||||||
|         sql.execute(` |  | ||||||
|             INSERT OR REPLACE INTO note_search_content  |  | ||||||
|             (noteId, title, content, title_normalized, content_normalized, full_text_normalized) |  | ||||||
|             VALUES (?, ?, ?, ?, ?, ?) |  | ||||||
|         `, [noteId, noteData.title, noteData.content || '',  |  | ||||||
|             normalizedTitle, normalizedContent,  |  | ||||||
|             normalizedTitle + ' ' + normalizedContent]); |  | ||||||
|          |  | ||||||
|         // Delete existing tokens for this note |  | ||||||
|         sql.execute(`DELETE FROM note_tokens WHERE noteId = ?`, [noteId]); |  | ||||||
|          |  | ||||||
|         // Insert new tokens with proper structure |  | ||||||
|         let position = 0; |  | ||||||
|         for (const token of tokens) { |  | ||||||
|             sql.execute(` |  | ||||||
|                 INSERT INTO note_tokens (noteId, token, token_normalized, position, source) |  | ||||||
|                 VALUES (?, ?, ?, ?, 'content') |  | ||||||
|             `, [noteId, token, normalizeForSearch(token), position]); |  | ||||||
|             position++; |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         log.info(`Updated search index for note ${noteId}`); |  | ||||||
|     } catch (error) { |  | ||||||
|         log.error(`Failed to update search index for note ${noteId}: ${error}`); |  | ||||||
|         throw error; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Batch update search index for multiple notes |  | ||||||
|  */ |  | ||||||
| export async function batchUpdateSearchIndex(noteIds: string[]): Promise<void> { |  | ||||||
|     const startTime = Date.now(); |  | ||||||
|     let successCount = 0; |  | ||||||
|     let errorCount = 0; |  | ||||||
|      |  | ||||||
|     // Process in batches |  | ||||||
|     for (let i = 0; i < noteIds.length; i += SEARCH_UTILS_CONFIG.BATCH_SIZE) { |  | ||||||
|         const batch = noteIds.slice(i, i + SEARCH_UTILS_CONFIG.BATCH_SIZE); |  | ||||||
|          |  | ||||||
|         try { |  | ||||||
|             sql.transactional(() => { |  | ||||||
|                 for (const noteId of batch) { |  | ||||||
|                     try { |  | ||||||
|                         updateNoteSearchIndex(noteId); |  | ||||||
|                         successCount++; |  | ||||||
|                     } catch (error) { |  | ||||||
|                         log.error(`Failed to index note ${noteId}: ${error}`); |  | ||||||
|                         errorCount++; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             }); |  | ||||||
|         } catch (error) { |  | ||||||
|             log.error(`Batch indexing failed: ${error}`); |  | ||||||
|             errorCount += batch.length; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     const elapsed = Date.now() - startTime; |  | ||||||
|     log.info(`Batch search indexing completed: ${successCount} success, ${errorCount} errors, ${elapsed}ms`); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Verify search index integrity |  | ||||||
|  */ |  | ||||||
| export function verifySearchIndex(): { |  | ||||||
|     valid: boolean; |  | ||||||
|     issues: string[]; |  | ||||||
|     stats: { |  | ||||||
|         totalNotes: number; |  | ||||||
|         indexedNotes: number; |  | ||||||
|         missingFromIndex: number; |  | ||||||
|         orphanedEntries: number; |  | ||||||
|     }; |  | ||||||
| } { |  | ||||||
|     const issues: string[] = []; |  | ||||||
|      |  | ||||||
|     // Count total notes |  | ||||||
|     const totalNotes = sql.getValue<number>(` |  | ||||||
|         SELECT COUNT(*) FROM notes WHERE isDeleted = 0 |  | ||||||
|     `) || 0; |  | ||||||
|      |  | ||||||
|     // Count indexed notes - JOIN with notes table for isDeleted filter |  | ||||||
|     const indexedNotes = sql.getValue<number>(` |  | ||||||
|         SELECT COUNT(DISTINCT nsc.noteId)  |  | ||||||
|         FROM note_search_content nsc |  | ||||||
|         JOIN notes n ON nsc.noteId = n.noteId |  | ||||||
|         WHERE n.isDeleted = 0 |  | ||||||
|     `) || 0; |  | ||||||
|      |  | ||||||
|     // Find notes missing from index |  | ||||||
|     const missingNotes = sql.getColumn<string>(` |  | ||||||
|         SELECT noteId FROM notes  |  | ||||||
|         WHERE isDeleted = 0  |  | ||||||
|         AND noteId NOT IN (SELECT noteId FROM note_search_content) |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     if (missingNotes.length > 0) { |  | ||||||
|         issues.push(`${missingNotes.length} notes missing from search index`); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     // Find orphaned index entries |  | ||||||
|     const orphanedEntries = sql.getColumn<string>(` |  | ||||||
|         SELECT noteId FROM note_search_content  |  | ||||||
|         WHERE noteId NOT IN (SELECT noteId FROM notes) |  | ||||||
|     `); |  | ||||||
|      |  | ||||||
|     if (orphanedEntries.length > 0) { |  | ||||||
|         issues.push(`${orphanedEntries.length} orphaned entries in search index`); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     // Check token table consistency |  | ||||||
|     const tokenMismatch = sql.getValue<number>(` |  | ||||||
|         SELECT COUNT(*) FROM note_search_content  |  | ||||||
|         WHERE noteId NOT IN (SELECT noteId FROM note_tokens) |  | ||||||
|     `) || 0; |  | ||||||
|      |  | ||||||
|     if (tokenMismatch > 0) { |  | ||||||
|         issues.push(`${tokenMismatch} notes missing from token index`); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     return { |  | ||||||
|         valid: issues.length === 0, |  | ||||||
|         issues, |  | ||||||
|         stats: { |  | ||||||
|             totalNotes, |  | ||||||
|             indexedNotes, |  | ||||||
|             missingFromIndex: missingNotes.length, |  | ||||||
|             orphanedEntries: orphanedEntries.length |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Performance monitoring wrapper for search queries |  | ||||||
|  */ |  | ||||||
| export function monitorQuery<T>( |  | ||||||
|     queryName: string, |  | ||||||
|     queryFn: () => T |  | ||||||
| ): T { |  | ||||||
|     const startTime = Date.now(); |  | ||||||
|      |  | ||||||
|     try { |  | ||||||
|         const result = queryFn(); |  | ||||||
|          |  | ||||||
|         const elapsed = Date.now() - startTime; |  | ||||||
|         if (SEARCH_UTILS_CONFIG.LOG_SLOW_QUERIES && elapsed > SEARCH_UTILS_CONFIG.SLOW_QUERY_THRESHOLD) { |  | ||||||
|             log.info(`Slow search query detected: ${queryName} took ${elapsed}ms`); |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         return result; |  | ||||||
|     } catch (error) { |  | ||||||
|         const elapsed = Date.now() - startTime; |  | ||||||
|         log.error(`Search query failed: ${queryName} after ${elapsed}ms - ${error}`); |  | ||||||
|         throw error; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Export utility functions for testing |  | ||||||
|  */ |  | ||||||
| export const testUtils = { |  | ||||||
|     splitCamelCase, |  | ||||||
|     extractMindMapText, |  | ||||||
|     extractCanvasText, |  | ||||||
|     extractTextFromObject |  | ||||||
| }; |  | ||||||
| @@ -1,219 +0,0 @@ | |||||||
| #!/usr/bin/env ts-node |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Verification script for SQLite search implementation |  | ||||||
|  *  |  | ||||||
|  * This script checks: |  | ||||||
|  * 1. If migration 0235 has run (tables exist) |  | ||||||
|  * 2. If SQL functions are registered |  | ||||||
|  * 3. If search queries work correctly |  | ||||||
|  * 4. Performance comparison between SQLite and TypeScript |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| import sql from "../sql.js"; |  | ||||||
| import log from "../log.js"; |  | ||||||
| import { getSQLiteSearchService } from "./sqlite_search_service.js"; |  | ||||||
| import SearchContext from "./search_context.js"; |  | ||||||
| import becca from "../../becca/becca.js"; |  | ||||||
|  |  | ||||||
| async function verifyTables(): Promise<boolean> { |  | ||||||
|     console.log("\n=== Checking Database Tables ==="); |  | ||||||
|      |  | ||||||
|     const tables = [ |  | ||||||
|         { name: 'note_search_content', required: true }, |  | ||||||
|         { name: 'note_tokens', required: true }, |  | ||||||
|         { name: 'notes_fts', required: false } // From migration 0234 |  | ||||||
|     ]; |  | ||||||
|      |  | ||||||
|     let allExist = true; |  | ||||||
|      |  | ||||||
|     for (const table of tables) { |  | ||||||
|         const exists = sql.getValue<number>(` |  | ||||||
|             SELECT COUNT(*) FROM sqlite_master  |  | ||||||
|             WHERE type='table' AND name=? |  | ||||||
|         `, [table.name]) > 0; |  | ||||||
|          |  | ||||||
|         const status = exists ? '✓' : '✗'; |  | ||||||
|         const requiredText = table.required ? ' (REQUIRED)' : ' (optional)'; |  | ||||||
|         console.log(`  ${status} ${table.name}${requiredText}`); |  | ||||||
|          |  | ||||||
|         if (table.required && !exists) { |  | ||||||
|             allExist = false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     if (!allExist) { |  | ||||||
|         console.log("\n❌ Required tables are missing!"); |  | ||||||
|         console.log("   Migration 0235 needs to run."); |  | ||||||
|         console.log("   The APP_DB_VERSION has been updated to 235."); |  | ||||||
|         console.log("   Restart the server to run the migration."); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     return allExist; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| async function verifyFunctions(): Promise<boolean> { |  | ||||||
|     console.log("\n=== Checking SQL Functions ==="); |  | ||||||
|      |  | ||||||
|     const functions = [ |  | ||||||
|         { name: 'normalize_text', test: "SELECT normalize_text('Café')" }, |  | ||||||
|         { name: 'edit_distance', test: "SELECT edit_distance('test', 'text', 2)" }, |  | ||||||
|         { name: 'regex_match', test: "SELECT regex_match('test', 'testing')" }, |  | ||||||
|         { name: 'tokenize_text', test: "SELECT tokenize_text('hello world')" }, |  | ||||||
|         { name: 'strip_html', test: "SELECT strip_html('<p>test</p>')" } |  | ||||||
|     ]; |  | ||||||
|      |  | ||||||
|     let allWork = true; |  | ||||||
|      |  | ||||||
|     for (const func of functions) { |  | ||||||
|         try { |  | ||||||
|             const result = sql.getValue(func.test); |  | ||||||
|             console.log(`  ✓ ${func.name} - Result: ${result}`); |  | ||||||
|         } catch (error: any) { |  | ||||||
|             console.log(`  ✗ ${func.name} - Error: ${error.message}`); |  | ||||||
|             allWork = false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     if (!allWork) { |  | ||||||
|         console.log("\n⚠️  Some SQL functions are not working."); |  | ||||||
|         console.log("   They should be registered when the server starts."); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     return allWork; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| async function verifySearchContent(): Promise<void> { |  | ||||||
|     console.log("\n=== Checking Search Index Content ==="); |  | ||||||
|      |  | ||||||
|     const noteCount = sql.getValue<number>(` |  | ||||||
|         SELECT COUNT(*) FROM notes  |  | ||||||
|         WHERE isDeleted = 0 AND isProtected = 0 |  | ||||||
|     `) || 0; |  | ||||||
|      |  | ||||||
|     const indexedCount = sql.getValue<number>(` |  | ||||||
|         SELECT COUNT(*) FROM note_search_content |  | ||||||
|     `) || 0; |  | ||||||
|      |  | ||||||
|     const tokenCount = sql.getValue<number>(` |  | ||||||
|         SELECT COUNT(DISTINCT noteId) FROM note_tokens |  | ||||||
|     `) || 0; |  | ||||||
|      |  | ||||||
|     console.log(`  Notes eligible for indexing: ${noteCount}`); |  | ||||||
|     console.log(`  Notes in search index: ${indexedCount}`); |  | ||||||
|     console.log(`  Notes with tokens: ${tokenCount}`); |  | ||||||
|      |  | ||||||
|     if (indexedCount === 0 && noteCount > 0) { |  | ||||||
|         console.log("\n⚠️  Search index is empty but there are notes to index."); |  | ||||||
|         console.log("   The migration should populate the index automatically."); |  | ||||||
|     } else if (indexedCount < noteCount) { |  | ||||||
|         console.log("\n⚠️  Some notes are not indexed."); |  | ||||||
|         console.log(`   Missing: ${noteCount - indexedCount} notes`); |  | ||||||
|     } else { |  | ||||||
|         console.log("\n✓ Search index is populated"); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| async function testSearch(): Promise<void> { |  | ||||||
|     console.log("\n=== Testing Search Functionality ==="); |  | ||||||
|      |  | ||||||
|     // Initialize becca if needed |  | ||||||
|     if (!becca.loaded) { |  | ||||||
|         console.log("  Loading becca..."); |  | ||||||
|         // Note: becca may not have a load method in this version |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     const searchService = getSQLiteSearchService(); |  | ||||||
|     const searchContext = new SearchContext({ |  | ||||||
|         fastSearch: false, |  | ||||||
|         includeArchivedNotes: false, |  | ||||||
|         fuzzyAttributeSearch: false, |  | ||||||
|         debug: false |  | ||||||
|     }); |  | ||||||
|      |  | ||||||
|     // Test different operators |  | ||||||
|     const tests = [ |  | ||||||
|         { operator: '*=*', tokens: ['note'], description: 'Substring search' }, |  | ||||||
|         { operator: '=*', tokens: ['test'], description: 'Prefix search' }, |  | ||||||
|         { operator: '*=', tokens: ['ing'], description: 'Suffix search' }, |  | ||||||
|         { operator: '~=', tokens: ['nite'], description: 'Fuzzy search' } |  | ||||||
|     ]; |  | ||||||
|      |  | ||||||
|     for (const test of tests) { |  | ||||||
|         try { |  | ||||||
|             console.log(`\n  Testing ${test.description} (${test.operator}):`); |  | ||||||
|             const startTime = Date.now(); |  | ||||||
|             const results = searchService.search(test.tokens, test.operator, searchContext); |  | ||||||
|             const duration = Date.now() - startTime; |  | ||||||
|             const resultCount = Array.isArray(results) ? results.length : results.size || 0; |  | ||||||
|             console.log(`    Found ${resultCount} results in ${duration}ms`); |  | ||||||
|              |  | ||||||
|             if (resultCount > 0) { |  | ||||||
|                 const sampleResults = Array.isArray(results) ? results.slice(0, 3) : Array.from(results).slice(0, 3); |  | ||||||
|                 console.log(`    Sample results: ${sampleResults.join(', ')}...`); |  | ||||||
|             } |  | ||||||
|         } catch (error: any) { |  | ||||||
|             console.log(`    ✗ Error: ${error.message}`); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| async function main() { |  | ||||||
|     console.log("========================================"); |  | ||||||
|     console.log("   SQLite Search Implementation Test"); |  | ||||||
|     console.log("========================================"); |  | ||||||
|      |  | ||||||
|     try { |  | ||||||
|         // Check current database version |  | ||||||
|         const currentDbVersion = sql.getValue<number>("SELECT value FROM options WHERE name = 'dbVersion'") || 0; |  | ||||||
|         console.log(`\nCurrent database version: ${currentDbVersion}`); |  | ||||||
|         console.log(`Target database version: 235`); |  | ||||||
|          |  | ||||||
|         if (currentDbVersion < 235) { |  | ||||||
|             console.log("\n⚠️  Database needs migration from version " + currentDbVersion + " to 235"); |  | ||||||
|             console.log("   Restart the server to run migrations."); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         // Verify tables exist |  | ||||||
|         const tablesExist = await verifyTables(); |  | ||||||
|         if (!tablesExist) { |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         // Verify functions work |  | ||||||
|         const functionsWork = await verifyFunctions(); |  | ||||||
|          |  | ||||||
|         // Check index content |  | ||||||
|         await verifySearchContent(); |  | ||||||
|          |  | ||||||
|         // Test search if everything is ready |  | ||||||
|         if (tablesExist && functionsWork) { |  | ||||||
|             await testSearch(); |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         console.log("\n========================================"); |  | ||||||
|         console.log("   Test Complete"); |  | ||||||
|         console.log("========================================"); |  | ||||||
|          |  | ||||||
|         if (tablesExist && functionsWork) { |  | ||||||
|             console.log("\n✅ SQLite search implementation is ready!"); |  | ||||||
|             console.log("\nTo enable SQLite search:"); |  | ||||||
|             console.log("  1. Set searchBackend option to 'sqlite'"); |  | ||||||
|             console.log("  2. Or use the admin API: PUT /api/search-admin/config"); |  | ||||||
|         } else { |  | ||||||
|             console.log("\n❌ SQLite search is not ready. See issues above."); |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|     } catch (error: any) { |  | ||||||
|         console.error("\n❌ Test failed with error:", error); |  | ||||||
|         console.error(error.stack); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Run if executed directly |  | ||||||
| if (require.main === module) { |  | ||||||
|     main().then(() => process.exit(0)).catch(() => process.exit(1)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| export { verifyTables, verifyFunctions, testSearch }; |  | ||||||
| @@ -136,14 +136,6 @@ export interface OptionDefinitions extends KeyboardShortcutsOptions<KeyboardActi | |||||||
|     redirectBareDomain: boolean; |     redirectBareDomain: boolean; | ||||||
|     showLoginInShareTheme: boolean; |     showLoginInShareTheme: boolean; | ||||||
|  |  | ||||||
|     // Search settings |  | ||||||
|     searchBackend: string; |  | ||||||
|     searchSqliteEnabled: boolean; |  | ||||||
|     searchSqlitePerformanceLogging: boolean; |  | ||||||
|     searchSqliteMaxMemory: number; |  | ||||||
|     searchSqliteBatchSize: number; |  | ||||||
|     searchSqliteAutoRebuild: boolean; |  | ||||||
|  |  | ||||||
|     // AI/LLM integration options |     // AI/LLM integration options | ||||||
|     aiEnabled: boolean; |     aiEnabled: boolean; | ||||||
|     aiProvider: string; |     aiProvider: string; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user