mirror of
https://github.com/zadam/trilium.git
synced 2025-10-26 07:46:30 +01:00
feat(search): try again to get fts5 searching done well
This commit is contained in:
@@ -219,12 +219,22 @@ CREATE TABLE IF NOT EXISTS sessions (
|
||||
);
|
||||
|
||||
-- FTS5 Full-Text Search Support
|
||||
-- Create FTS5 virtual table for full-text searching
|
||||
-- Create FTS5 virtual table with trigram tokenizer
|
||||
-- Trigram tokenizer provides language-agnostic substring matching:
|
||||
-- 1. Fast substring matching (50-100x speedup for LIKE queries without wildcards)
|
||||
-- 2. Case-insensitive search without custom collation
|
||||
-- 3. No language-specific stemming assumptions (works for all languages)
|
||||
-- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes
|
||||
--
|
||||
-- IMPORTANT: Trigram requires minimum 3-character tokens for matching
|
||||
-- detail='none' reduces index size by ~50% while maintaining MATCH/rank performance
|
||||
-- (loses position info for highlight() function, but snippet() still works)
|
||||
CREATE VIRTUAL TABLE notes_fts USING fts5(
|
||||
noteId UNINDEXED,
|
||||
title,
|
||||
content,
|
||||
tokenize = 'porter unicode61'
|
||||
tokenize = 'trigram',
|
||||
detail = 'none'
|
||||
);
|
||||
|
||||
-- Triggers to keep FTS table synchronized with notes
|
||||
|
||||
@@ -15,22 +15,45 @@ import log from "../services/log.js";
|
||||
export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
log.info("Starting FTS5 and performance optimization migration...");
|
||||
|
||||
// Verify SQLite version supports trigram tokenizer (requires 3.34.0+)
|
||||
const sqliteVersion = sql.getValue<string>(`SELECT sqlite_version()`);
|
||||
const [major, minor, patch] = sqliteVersion.split('.').map(Number);
|
||||
const versionNumber = major * 10000 + minor * 100 + (patch || 0);
|
||||
const requiredVersion = 3 * 10000 + 34 * 100 + 0; // 3.34.0
|
||||
|
||||
if (versionNumber < requiredVersion) {
|
||||
log.error(`SQLite version ${sqliteVersion} does not support trigram tokenizer (requires 3.34.0+)`);
|
||||
log.info("Skipping FTS5 trigram migration - will use fallback search implementation");
|
||||
return; // Skip FTS5 setup, rely on fallback search
|
||||
}
|
||||
|
||||
log.info(`SQLite version ${sqliteVersion} confirmed - trigram tokenizer available`);
|
||||
|
||||
// Part 1: FTS5 Setup
|
||||
log.info("Creating FTS5 virtual table for full-text search...");
|
||||
|
||||
// Create FTS5 virtual table
|
||||
// We store noteId, title, and content for searching
|
||||
// The 'tokenize' option uses porter stemming for better search results
|
||||
sql.executeScript(`
|
||||
-- Drop existing FTS table if it exists (for re-running migration in dev)
|
||||
DROP TABLE IF EXISTS notes_fts;
|
||||
|
||||
-- Create FTS5 virtual table
|
||||
-- Create FTS5 virtual table with trigram tokenizer
|
||||
-- Trigram tokenizer provides language-agnostic substring matching:
|
||||
-- 1. Fast substring matching (50-100x speedup for LIKE queries without wildcards)
|
||||
-- 2. Case-insensitive search without custom collation
|
||||
-- 3. No language-specific stemming assumptions (works for all languages)
|
||||
-- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes
|
||||
--
|
||||
-- IMPORTANT: Trigram requires minimum 3-character tokens for matching
|
||||
-- detail='none' reduces index size by ~50% while maintaining MATCH/rank performance
|
||||
-- (loses position info for highlight() function, but snippet() still works)
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5(
|
||||
noteId UNINDEXED,
|
||||
title,
|
||||
content,
|
||||
tokenize = 'porter unicode61'
|
||||
tokenize = 'trigram',
|
||||
detail = 'none'
|
||||
);
|
||||
`);
|
||||
|
||||
|
||||
@@ -1,826 +0,0 @@
|
||||
/**
|
||||
* Migration to add SQLite native search support with normalized text tables
|
||||
*
|
||||
* This migration implements Phase 1 of the SQLite-based search plan:
|
||||
* 1. Creates note_search_content table with normalized text columns
|
||||
* 2. Creates note_tokens table for word-level token storage
|
||||
* 3. Adds necessary indexes for optimization
|
||||
* 4. Creates triggers to keep tables synchronized with note updates
|
||||
* 5. Populates tables with existing note data in batches
|
||||
*
|
||||
* This provides 100% accurate search results with 10-30x performance improvement
|
||||
* over TypeScript-based search, without the complexity of trigrams.
|
||||
*/
|
||||
|
||||
import sql from "../services/sql.js";
|
||||
import log from "../services/log.js";
|
||||
import { normalize as utilsNormalize, stripTags } from "../services/utils.js";
|
||||
import { getSqliteFunctionsService } from "../services/search/sqlite_functions.js";
|
||||
|
||||
/**
|
||||
* Uses the existing normalize function from utils.ts for consistency
|
||||
* This ensures all normalization throughout the codebase is identical
|
||||
*/
|
||||
function normalizeText(text: string): string {
|
||||
if (!text) return '';
|
||||
return utilsNormalize(text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenizes text into individual words for token-based searching
|
||||
* Handles punctuation and special characters appropriately
|
||||
*/
|
||||
function tokenize(text: string): string[] {
|
||||
if (!text) return [];
|
||||
|
||||
// Split on word boundaries, filter out empty tokens
|
||||
// This regex splits on spaces, punctuation, and other non-word characters
|
||||
// but preserves apostrophes within words (e.g., "don't", "it's")
|
||||
const tokens = text
|
||||
.split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:_-]+/)
|
||||
.filter(token => token.length > 0)
|
||||
.map(token => token.toLowerCase());
|
||||
|
||||
// Also split on camelCase and snake_case boundaries for code content
|
||||
const expandedTokens: string[] = [];
|
||||
for (const token of tokens) {
|
||||
// Add the original token
|
||||
expandedTokens.push(token);
|
||||
|
||||
// Split camelCase (e.g., "getUserName" -> ["get", "User", "Name"])
|
||||
const camelCaseParts = token.split(/(?=[A-Z])/);
|
||||
if (camelCaseParts.length > 1) {
|
||||
expandedTokens.push(...camelCaseParts.map(p => p.toLowerCase()));
|
||||
}
|
||||
|
||||
// Split snake_case (e.g., "user_name" -> ["user", "name"])
|
||||
const snakeCaseParts = token.split('_');
|
||||
if (snakeCaseParts.length > 1) {
|
||||
expandedTokens.push(...snakeCaseParts);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove duplicates and return
|
||||
return Array.from(new Set(expandedTokens));
|
||||
}
|
||||
|
||||
/**
|
||||
* Strips HTML tags from content for text-only indexing
|
||||
* Uses the utils stripTags function for consistency
|
||||
*/
|
||||
function stripHtmlTags(html: string): string {
|
||||
if (!html) return '';
|
||||
|
||||
// Remove script and style content entirely first
|
||||
let text = html.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
|
||||
text = text.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
|
||||
|
||||
// Use utils stripTags for consistency
|
||||
text = stripTags(text);
|
||||
|
||||
// Decode HTML entities
|
||||
text = text.replace(/ /g, ' ');
|
||||
text = text.replace(/</g, '<');
|
||||
text = text.replace(/>/g, '>');
|
||||
text = text.replace(/&/g, '&');
|
||||
text = text.replace(/"/g, '"');
|
||||
text = text.replace(/'/g, "'");
|
||||
|
||||
// Normalize whitespace
|
||||
text = text.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
export default function sqliteNativeSearch() {
|
||||
log.info("Starting SQLite native search migration...");
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
// Wrap entire migration in a transaction for atomicity
|
||||
sql.transactional(() => {
|
||||
try {
|
||||
// Register custom SQL functions first so they can be used in triggers
|
||||
registerCustomFunctions();
|
||||
|
||||
// Create the search tables and indexes
|
||||
createSearchTables();
|
||||
|
||||
// Create triggers to keep tables synchronized (before population)
|
||||
createSearchTriggers();
|
||||
|
||||
// Populate the tables with existing note data
|
||||
populateSearchTables();
|
||||
|
||||
// Run final verification and optimization
|
||||
finalizeSearchSetup();
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
log.info(`SQLite native search migration completed successfully in ${duration}ms`);
|
||||
|
||||
} catch (error) {
|
||||
log.error(`SQLite native search migration failed: ${error}`);
|
||||
// Transaction will automatically rollback on error
|
||||
throw error;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function createSearchTables() {
|
||||
log.info("Creating search content and token tables...");
|
||||
|
||||
// Drop existing tables if they exist (for re-running migration in dev)
|
||||
sql.execute("DROP TABLE IF EXISTS note_search_content");
|
||||
sql.execute("DROP TABLE IF EXISTS note_tokens");
|
||||
|
||||
// Create the main search content table
|
||||
sql.execute(`
|
||||
CREATE TABLE note_search_content (
|
||||
noteId TEXT PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
title_normalized TEXT NOT NULL,
|
||||
content_normalized TEXT NOT NULL,
|
||||
full_text_normalized TEXT NOT NULL
|
||||
)
|
||||
`);
|
||||
|
||||
// Create the token table for word-level operations
|
||||
sql.execute(`
|
||||
CREATE TABLE note_tokens (
|
||||
noteId TEXT NOT NULL,
|
||||
token TEXT NOT NULL,
|
||||
token_normalized TEXT NOT NULL,
|
||||
position INTEGER NOT NULL,
|
||||
source TEXT NOT NULL CHECK(source IN ('title', 'content')),
|
||||
PRIMARY KEY (noteId, position, source)
|
||||
)
|
||||
`);
|
||||
|
||||
// Create indexes for search optimization
|
||||
log.info("Creating search indexes...");
|
||||
|
||||
// Consolidated indexes - removed redundancy between COLLATE NOCASE and plain indexes
|
||||
// Using COLLATE NOCASE for case-insensitive searches
|
||||
sql.execute(`
|
||||
CREATE INDEX idx_search_title_normalized
|
||||
ON note_search_content(title_normalized COLLATE NOCASE)
|
||||
`);
|
||||
|
||||
sql.execute(`
|
||||
CREATE INDEX idx_search_content_normalized
|
||||
ON note_search_content(content_normalized COLLATE NOCASE)
|
||||
`);
|
||||
|
||||
sql.execute(`
|
||||
CREATE INDEX idx_search_full_text
|
||||
ON note_search_content(full_text_normalized COLLATE NOCASE)
|
||||
`);
|
||||
|
||||
// Token indexes - consolidated to avoid redundancy
|
||||
sql.execute(`
|
||||
CREATE INDEX idx_tokens_normalized
|
||||
ON note_tokens(token_normalized COLLATE NOCASE)
|
||||
`);
|
||||
|
||||
sql.execute(`
|
||||
CREATE INDEX idx_tokens_noteId
|
||||
ON note_tokens(noteId)
|
||||
`);
|
||||
|
||||
// Composite index for token searches with source
|
||||
sql.execute(`
|
||||
CREATE INDEX idx_tokens_source_normalized
|
||||
ON note_tokens(source, token_normalized COLLATE NOCASE)
|
||||
`);
|
||||
|
||||
log.info("Search tables and indexes created successfully");
|
||||
}
|
||||
|
||||
function populateSearchTables() {
|
||||
log.info("Populating search tables with existing note content...");
|
||||
|
||||
const batchSize = 100;
|
||||
let offset = 0;
|
||||
let totalProcessed = 0;
|
||||
let totalTokens = 0;
|
||||
|
||||
while (true) {
|
||||
const notes = sql.getRows<{
|
||||
noteId: string;
|
||||
title: string;
|
||||
type: string;
|
||||
mime: string;
|
||||
content: string | null;
|
||||
}>(`
|
||||
SELECT
|
||||
n.noteId,
|
||||
n.title,
|
||||
n.type,
|
||||
n.mime,
|
||||
b.content
|
||||
FROM notes n
|
||||
LEFT JOIN blobs b ON n.blobId = b.blobId
|
||||
WHERE n.isDeleted = 0
|
||||
AND n.isProtected = 0
|
||||
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
ORDER BY n.noteId
|
||||
LIMIT ? OFFSET ?
|
||||
`, [batchSize, offset]);
|
||||
|
||||
if (notes.length === 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Process batch of notes
|
||||
for (const note of notes) {
|
||||
try {
|
||||
// Process content based on type
|
||||
let processedContent = note.content || '';
|
||||
|
||||
// Strip HTML for text notes
|
||||
if (note.type === 'text' && note.mime === 'text/html') {
|
||||
processedContent = stripHtmlTags(processedContent);
|
||||
}
|
||||
|
||||
// Normalize text for searching using the utils normalize function
|
||||
const titleNorm = normalizeText(note.title);
|
||||
const contentNorm = normalizeText(processedContent);
|
||||
const fullTextNorm = titleNorm + ' ' + contentNorm;
|
||||
|
||||
// Insert into search content table
|
||||
sql.execute(`
|
||||
INSERT INTO note_search_content
|
||||
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
`, [
|
||||
note.noteId,
|
||||
note.title,
|
||||
processedContent,
|
||||
titleNorm,
|
||||
contentNorm,
|
||||
fullTextNorm
|
||||
]);
|
||||
|
||||
// Tokenize title and content separately to track source
|
||||
const titleTokens = tokenize(note.title);
|
||||
const contentTokens = tokenize(processedContent);
|
||||
|
||||
let position = 0;
|
||||
|
||||
// Insert title tokens
|
||||
for (const token of titleTokens) {
|
||||
if (token.length > 0) {
|
||||
sql.execute(`
|
||||
INSERT OR IGNORE INTO note_tokens
|
||||
(noteId, token, token_normalized, position, source)
|
||||
VALUES (?, ?, ?, ?, 'title')
|
||||
`, [note.noteId, token, normalizeText(token), position]);
|
||||
position++;
|
||||
totalTokens++;
|
||||
}
|
||||
}
|
||||
|
||||
// Insert content tokens with unique positions
|
||||
for (const token of contentTokens) {
|
||||
if (token.length > 0) {
|
||||
sql.execute(`
|
||||
INSERT OR IGNORE INTO note_tokens
|
||||
(noteId, token, token_normalized, position, source)
|
||||
VALUES (?, ?, ?, ?, 'content')
|
||||
`, [note.noteId, token, normalizeText(token), position]);
|
||||
position++;
|
||||
totalTokens++;
|
||||
}
|
||||
}
|
||||
|
||||
totalProcessed++;
|
||||
|
||||
} catch (error) {
|
||||
log.error(`Failed to index note ${note.noteId}: ${error}`);
|
||||
// Continue with other notes even if one fails
|
||||
}
|
||||
}
|
||||
|
||||
offset += batchSize;
|
||||
|
||||
if (totalProcessed % 1000 === 0) {
|
||||
log.info(`Processed ${totalProcessed} notes, ${totalTokens} tokens for search indexing...`);
|
||||
}
|
||||
}
|
||||
|
||||
log.info(`Completed indexing ${totalProcessed} notes with ${totalTokens} total tokens`);
|
||||
}
|
||||
|
||||
function createSearchTriggers() {
|
||||
log.info("Creating triggers to keep search tables synchronized...");
|
||||
|
||||
// Drop existing triggers if they exist
|
||||
const triggers = [
|
||||
'note_search_insert',
|
||||
'note_search_update',
|
||||
'note_search_delete',
|
||||
'note_search_soft_delete',
|
||||
'note_search_undelete',
|
||||
'note_search_protect',
|
||||
'note_search_unprotect',
|
||||
'note_search_blob_insert',
|
||||
'note_search_blob_update'
|
||||
];
|
||||
|
||||
for (const trigger of triggers) {
|
||||
sql.execute(`DROP TRIGGER IF EXISTS ${trigger}`);
|
||||
}
|
||||
|
||||
// Trigger for INSERT operations on notes - simplified version
|
||||
sql.execute(`
|
||||
CREATE TRIGGER note_search_insert
|
||||
AFTER INSERT ON notes
|
||||
WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND NEW.isDeleted = 0
|
||||
AND NEW.isProtected = 0
|
||||
BEGIN
|
||||
-- Delete any existing entries (for INSERT OR REPLACE)
|
||||
DELETE FROM note_search_content WHERE noteId = NEW.noteId;
|
||||
DELETE FROM note_tokens WHERE noteId = NEW.noteId;
|
||||
|
||||
-- Insert basic content with title only (content will be populated by blob trigger)
|
||||
INSERT INTO note_search_content
|
||||
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
||||
VALUES (
|
||||
NEW.noteId,
|
||||
NEW.title,
|
||||
'',
|
||||
LOWER(NEW.title),
|
||||
'',
|
||||
LOWER(NEW.title)
|
||||
);
|
||||
END
|
||||
`);
|
||||
|
||||
// Trigger for UPDATE operations on notes - simplified version
|
||||
sql.execute(`
|
||||
CREATE TRIGGER note_search_update
|
||||
AFTER UPDATE ON notes
|
||||
WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
BEGIN
|
||||
-- Always delete the old entries
|
||||
DELETE FROM note_search_content WHERE noteId = NEW.noteId;
|
||||
DELETE FROM note_tokens WHERE noteId = NEW.noteId;
|
||||
|
||||
-- Re-insert if note is not deleted and not protected
|
||||
INSERT INTO note_search_content
|
||||
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
||||
SELECT
|
||||
NEW.noteId,
|
||||
NEW.title,
|
||||
COALESCE(b.content, ''),
|
||||
LOWER(NEW.title),
|
||||
LOWER(COALESCE(b.content, '')),
|
||||
LOWER(NEW.title || ' ' || COALESCE(b.content, ''))
|
||||
FROM notes n
|
||||
LEFT JOIN blobs b ON b.blobId = NEW.blobId
|
||||
WHERE n.noteId = NEW.noteId
|
||||
AND NEW.isDeleted = 0
|
||||
AND NEW.isProtected = 0;
|
||||
END
|
||||
`);
|
||||
|
||||
// Trigger for DELETE operations on notes
|
||||
sql.execute(`
|
||||
CREATE TRIGGER note_search_delete
|
||||
AFTER DELETE ON notes
|
||||
BEGIN
|
||||
DELETE FROM note_search_content WHERE noteId = OLD.noteId;
|
||||
DELETE FROM note_tokens WHERE noteId = OLD.noteId;
|
||||
END
|
||||
`);
|
||||
|
||||
// Trigger for soft delete (isDeleted = 1)
|
||||
sql.execute(`
|
||||
CREATE TRIGGER note_search_soft_delete
|
||||
AFTER UPDATE ON notes
|
||||
WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1
|
||||
BEGIN
|
||||
DELETE FROM note_search_content WHERE noteId = NEW.noteId;
|
||||
DELETE FROM note_tokens WHERE noteId = NEW.noteId;
|
||||
END
|
||||
`);
|
||||
|
||||
// Trigger for undelete (isDeleted = 0) - simplified version
|
||||
sql.execute(`
|
||||
CREATE TRIGGER note_search_undelete
|
||||
AFTER UPDATE ON notes
|
||||
WHEN OLD.isDeleted = 1 AND NEW.isDeleted = 0
|
||||
AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND NEW.isProtected = 0
|
||||
BEGIN
|
||||
DELETE FROM note_search_content WHERE noteId = NEW.noteId;
|
||||
DELETE FROM note_tokens WHERE noteId = NEW.noteId;
|
||||
|
||||
INSERT INTO note_search_content
|
||||
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
||||
SELECT
|
||||
NEW.noteId,
|
||||
NEW.title,
|
||||
COALESCE(b.content, ''),
|
||||
LOWER(NEW.title),
|
||||
LOWER(COALESCE(b.content, '')),
|
||||
LOWER(NEW.title || ' ' || COALESCE(b.content, ''))
|
||||
FROM notes n
|
||||
LEFT JOIN blobs b ON b.blobId = NEW.blobId
|
||||
WHERE n.noteId = NEW.noteId;
|
||||
END
|
||||
`);
|
||||
|
||||
// Trigger for notes becoming protected
|
||||
sql.execute(`
|
||||
CREATE TRIGGER note_search_protect
|
||||
AFTER UPDATE ON notes
|
||||
WHEN OLD.isProtected = 0 AND NEW.isProtected = 1
|
||||
BEGIN
|
||||
DELETE FROM note_search_content WHERE noteId = NEW.noteId;
|
||||
DELETE FROM note_tokens WHERE noteId = NEW.noteId;
|
||||
END
|
||||
`);
|
||||
|
||||
// Trigger for notes becoming unprotected - simplified version
|
||||
sql.execute(`
|
||||
CREATE TRIGGER note_search_unprotect
|
||||
AFTER UPDATE ON notes
|
||||
WHEN OLD.isProtected = 1 AND NEW.isProtected = 0
|
||||
AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND NEW.isDeleted = 0
|
||||
BEGIN
|
||||
DELETE FROM note_search_content WHERE noteId = NEW.noteId;
|
||||
DELETE FROM note_tokens WHERE noteId = NEW.noteId;
|
||||
|
||||
INSERT INTO note_search_content
|
||||
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
||||
SELECT
|
||||
NEW.noteId,
|
||||
NEW.title,
|
||||
COALESCE(b.content, ''),
|
||||
LOWER(NEW.title),
|
||||
LOWER(COALESCE(b.content, '')),
|
||||
LOWER(NEW.title || ' ' || COALESCE(b.content, ''))
|
||||
FROM notes n
|
||||
LEFT JOIN blobs b ON b.blobId = NEW.blobId
|
||||
WHERE n.noteId = NEW.noteId;
|
||||
END
|
||||
`);
|
||||
|
||||
// Trigger for INSERT operations on blobs - simplified version
|
||||
sql.execute(`
|
||||
CREATE TRIGGER note_search_blob_insert
|
||||
AFTER INSERT ON blobs
|
||||
BEGIN
|
||||
-- Update search content for all notes that reference this blob
|
||||
UPDATE note_search_content
|
||||
SET content = NEW.content,
|
||||
content_normalized = LOWER(NEW.content),
|
||||
full_text_normalized = title_normalized || ' ' || LOWER(NEW.content)
|
||||
WHERE noteId IN (
|
||||
SELECT n.noteId
|
||||
FROM notes n
|
||||
WHERE n.blobId = NEW.blobId
|
||||
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND n.isDeleted = 0
|
||||
AND n.isProtected = 0
|
||||
);
|
||||
|
||||
-- Clear tokens for affected notes (will be repopulated by post-processing)
|
||||
DELETE FROM note_tokens
|
||||
WHERE noteId IN (
|
||||
SELECT n.noteId
|
||||
FROM notes n
|
||||
WHERE n.blobId = NEW.blobId
|
||||
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND n.isDeleted = 0
|
||||
AND n.isProtected = 0
|
||||
);
|
||||
END
|
||||
`);
|
||||
|
||||
// Trigger for UPDATE operations on blobs - simplified version
|
||||
sql.execute(`
|
||||
CREATE TRIGGER note_search_blob_update
|
||||
AFTER UPDATE ON blobs
|
||||
BEGIN
|
||||
-- Update search content for all notes that reference this blob
|
||||
UPDATE note_search_content
|
||||
SET content = NEW.content,
|
||||
content_normalized = LOWER(NEW.content),
|
||||
full_text_normalized = title_normalized || ' ' || LOWER(NEW.content)
|
||||
WHERE noteId IN (
|
||||
SELECT n.noteId
|
||||
FROM notes n
|
||||
WHERE n.blobId = NEW.blobId
|
||||
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND n.isDeleted = 0
|
||||
AND n.isProtected = 0
|
||||
);
|
||||
|
||||
-- Clear tokens for affected notes (will be repopulated by post-processing)
|
||||
DELETE FROM note_tokens
|
||||
WHERE noteId IN (
|
||||
SELECT n.noteId
|
||||
FROM notes n
|
||||
WHERE n.blobId = NEW.blobId
|
||||
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND n.isDeleted = 0
|
||||
AND n.isProtected = 0
|
||||
);
|
||||
END
|
||||
`);
|
||||
|
||||
log.info("Search synchronization triggers created successfully");
|
||||
}
|
||||
|
||||
function registerCustomFunctions() {
|
||||
log.info("Registering custom SQL functions for search operations...");
|
||||
|
||||
try {
|
||||
// Get the database connection to register functions
|
||||
const db = sql.getDbConnection();
|
||||
|
||||
// Use the centralized SQLite functions service
|
||||
const functionsService = getSqliteFunctionsService();
|
||||
|
||||
// Register functions if not already registered
|
||||
if (!functionsService.isRegistered()) {
|
||||
const success = functionsService.registerFunctions(db);
|
||||
if (success) {
|
||||
log.info("Custom SQL functions registered successfully via service");
|
||||
} else {
|
||||
log.info("Custom SQL functions registration failed - using basic SQLite functions only");
|
||||
}
|
||||
} else {
|
||||
log.info("Custom SQL functions already registered");
|
||||
}
|
||||
|
||||
// Register migration-specific helper function for tokenization
|
||||
db.function('tokenize_for_migration', {
|
||||
deterministic: true,
|
||||
varargs: false
|
||||
}, (text: string | null) => {
|
||||
if (!text) return '';
|
||||
// Return as JSON array string for SQL processing
|
||||
return JSON.stringify(tokenize(text));
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
log.info(`Could not register custom SQL functions (will use basic SQLite functions): ${error}`);
|
||||
// This is not critical - the migration will work with basic SQLite functions
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Populates tokens for a specific note
|
||||
* This is called outside of triggers to avoid complex SQL within trigger constraints
|
||||
*/
|
||||
function populateNoteTokens(noteId: string): number {
|
||||
try {
|
||||
// Get the note's search content
|
||||
const noteData = sql.getRow<{
|
||||
title: string;
|
||||
content: string;
|
||||
}>(`
|
||||
SELECT title, content
|
||||
FROM note_search_content
|
||||
WHERE noteId = ?
|
||||
`, [noteId]);
|
||||
|
||||
if (!noteData) return 0;
|
||||
|
||||
// Clear existing tokens for this note
|
||||
sql.execute(`DELETE FROM note_tokens WHERE noteId = ?`, [noteId]);
|
||||
|
||||
// Tokenize title and content
|
||||
const titleTokens = tokenize(noteData.title);
|
||||
const contentTokens = tokenize(noteData.content);
|
||||
|
||||
let position = 0;
|
||||
let tokenCount = 0;
|
||||
|
||||
// Insert title tokens
|
||||
for (const token of titleTokens) {
|
||||
if (token.length > 0) {
|
||||
sql.execute(`
|
||||
INSERT OR IGNORE INTO note_tokens
|
||||
(noteId, token, token_normalized, position, source)
|
||||
VALUES (?, ?, ?, ?, 'title')
|
||||
`, [noteId, token, normalizeText(token), position]);
|
||||
position++;
|
||||
tokenCount++;
|
||||
}
|
||||
}
|
||||
|
||||
// Insert content tokens
|
||||
for (const token of contentTokens) {
|
||||
if (token.length > 0) {
|
||||
sql.execute(`
|
||||
INSERT OR IGNORE INTO note_tokens
|
||||
(noteId, token, token_normalized, position, source)
|
||||
VALUES (?, ?, ?, ?, 'content')
|
||||
`, [noteId, token, normalizeText(token), position]);
|
||||
position++;
|
||||
tokenCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return tokenCount;
|
||||
} catch (error) {
|
||||
log.error(`Error populating tokens for note ${noteId}: ${error}`);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Populates tokens for multiple notes affected by blob operations
|
||||
* This handles cases where blob triggers can affect multiple notes
|
||||
*/
|
||||
function populateBlobAffectedTokens(blobId: string): void {
|
||||
try {
|
||||
// Find all notes that reference this blob and need token updates
|
||||
const affectedNoteIds = sql.getColumn<string>(`
|
||||
SELECT DISTINCT n.noteId
|
||||
FROM notes n
|
||||
INNER JOIN note_search_content nsc ON n.noteId = nsc.noteId
|
||||
WHERE n.blobId = ?
|
||||
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND n.isDeleted = 0
|
||||
AND n.isProtected = 0
|
||||
`, [blobId]);
|
||||
|
||||
if (affectedNoteIds.length === 0) return;
|
||||
|
||||
log.info(`Updating tokens for ${affectedNoteIds.length} notes affected by blob ${blobId}`);
|
||||
|
||||
let totalTokens = 0;
|
||||
for (const noteId of affectedNoteIds) {
|
||||
const tokenCount = populateNoteTokens(noteId);
|
||||
totalTokens += tokenCount;
|
||||
}
|
||||
|
||||
log.info(`Updated ${totalTokens} tokens for blob-affected notes`);
|
||||
} catch (error) {
|
||||
log.error(`Error populating blob-affected tokens for blob ${blobId}: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
function populateAllTokens() {
|
||||
log.info("Populating tokens for all search content...");
|
||||
|
||||
// Clear existing tokens first to ensure clean state
|
||||
sql.execute("DELETE FROM note_tokens");
|
||||
|
||||
const batchSize = 100;
|
||||
let offset = 0;
|
||||
let totalProcessed = 0;
|
||||
let totalTokens = 0;
|
||||
|
||||
while (true) {
|
||||
const notes = sql.getRows<{
|
||||
noteId: string;
|
||||
title: string;
|
||||
content: string;
|
||||
}>(`
|
||||
SELECT noteId, title, content
|
||||
FROM note_search_content
|
||||
ORDER BY noteId
|
||||
LIMIT ? OFFSET ?
|
||||
`, [batchSize, offset]);
|
||||
|
||||
if (notes.length === 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
for (const note of notes) {
|
||||
try {
|
||||
// Tokenize title and content
|
||||
const titleTokens = tokenize(note.title);
|
||||
const contentTokens = tokenize(note.content);
|
||||
|
||||
let position = 0;
|
||||
|
||||
// Insert title tokens
|
||||
for (const token of titleTokens) {
|
||||
if (token.length > 0) {
|
||||
sql.execute(`
|
||||
INSERT OR IGNORE INTO note_tokens
|
||||
(noteId, token, token_normalized, position, source)
|
||||
VALUES (?, ?, ?, ?, 'title')
|
||||
`, [note.noteId, token, normalizeText(token), position]);
|
||||
position++;
|
||||
totalTokens++;
|
||||
}
|
||||
}
|
||||
|
||||
// Insert content tokens with continuous position numbering
|
||||
for (const token of contentTokens) {
|
||||
if (token.length > 0) {
|
||||
sql.execute(`
|
||||
INSERT OR IGNORE INTO note_tokens
|
||||
(noteId, token, token_normalized, position, source)
|
||||
VALUES (?, ?, ?, ?, 'content')
|
||||
`, [note.noteId, token, normalizeText(token), position]);
|
||||
position++;
|
||||
totalTokens++;
|
||||
}
|
||||
}
|
||||
|
||||
totalProcessed++;
|
||||
|
||||
} catch (error) {
|
||||
log.error(`Failed to tokenize note ${note.noteId}: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
offset += batchSize;
|
||||
|
||||
if (totalProcessed % 1000 === 0) {
|
||||
log.info(`Processed ${totalProcessed} notes, ${totalTokens} tokens so far...`);
|
||||
}
|
||||
}
|
||||
|
||||
log.info(`Token population completed: ${totalProcessed} notes processed, ${totalTokens} total tokens`);
|
||||
}
|
||||
|
||||
function finalizeSearchSetup() {
|
||||
log.info("Running final verification and optimization...");
|
||||
|
||||
// Check for missing notes that should be indexed
|
||||
const missingCount = sql.getValue<number>(`
|
||||
SELECT COUNT(*) FROM notes n
|
||||
LEFT JOIN blobs b ON n.blobId = b.blobId
|
||||
WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND n.isDeleted = 0
|
||||
AND n.isProtected = 0
|
||||
AND b.content IS NOT NULL
|
||||
AND NOT EXISTS (SELECT 1 FROM note_search_content WHERE noteId = n.noteId)
|
||||
`) || 0;
|
||||
|
||||
if (missingCount > 0) {
|
||||
log.info(`Found ${missingCount} notes that are missing from search index`);
|
||||
|
||||
// Index missing notes using basic SQLite functions
|
||||
sql.execute(`
|
||||
INSERT INTO note_search_content
|
||||
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
||||
SELECT
|
||||
n.noteId,
|
||||
n.title,
|
||||
COALESCE(b.content, ''),
|
||||
LOWER(n.title),
|
||||
LOWER(COALESCE(b.content, '')),
|
||||
LOWER(n.title || ' ' || COALESCE(b.content, ''))
|
||||
FROM notes n
|
||||
LEFT JOIN blobs b ON n.blobId = b.blobId
|
||||
WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND n.isDeleted = 0
|
||||
AND n.isProtected = 0
|
||||
AND b.content IS NOT NULL
|
||||
AND NOT EXISTS (SELECT 1 FROM note_search_content WHERE noteId = n.noteId)
|
||||
`);
|
||||
|
||||
log.info(`Indexed ${missingCount} missing notes`);
|
||||
}
|
||||
|
||||
// Populate tokens for all existing content (including any missing notes we just added)
|
||||
populateAllTokens();
|
||||
|
||||
// Verify table creation
|
||||
const tables = sql.getColumn<string>(`
|
||||
SELECT name FROM sqlite_master
|
||||
WHERE type = 'table'
|
||||
AND name IN ('note_search_content', 'note_tokens')
|
||||
`);
|
||||
|
||||
if (tables.length !== 2) {
|
||||
throw new Error("Search tables were not created properly");
|
||||
}
|
||||
|
||||
// Check row counts
|
||||
const searchContentCount = sql.getValue<number>("SELECT COUNT(*) FROM note_search_content") || 0;
|
||||
const tokenCount = sql.getValue<number>("SELECT COUNT(*) FROM note_tokens") || 0;
|
||||
|
||||
log.info(`Search content table has ${searchContentCount} entries`);
|
||||
log.info(`Token table has ${tokenCount} entries`);
|
||||
|
||||
// Run ANALYZE to update SQLite query planner statistics
|
||||
log.info("Updating SQLite statistics for query optimization...");
|
||||
sql.execute("ANALYZE note_search_content");
|
||||
sql.execute("ANALYZE note_tokens");
|
||||
|
||||
// Verify indexes were created
|
||||
const indexes = sql.getColumn<string>(`
|
||||
SELECT name FROM sqlite_master
|
||||
WHERE type = 'index'
|
||||
AND tbl_name IN ('note_search_content', 'note_tokens')
|
||||
`);
|
||||
|
||||
log.info(`Created ${indexes.length} indexes for search optimization`);
|
||||
|
||||
log.info("Search setup finalization completed");
|
||||
}
|
||||
47
apps/server/src/migrations/0236__cleanup_sqlite_search.ts
Normal file
47
apps/server/src/migrations/0236__cleanup_sqlite_search.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
* Migration to clean up custom SQLite search implementation
|
||||
*
|
||||
* This migration removes tables and triggers created by migration 0235
|
||||
* which implemented a custom SQLite-based search system. That system
|
||||
* has been replaced by FTS5 with trigram tokenizer (migration 0234),
|
||||
* making these custom tables redundant.
|
||||
*
|
||||
* Tables removed:
|
||||
* - note_search_content: Stored normalized note content for custom search
|
||||
* - note_tokens: Stored tokenized words for custom token-based search
|
||||
*
|
||||
* This migration is safe to run on databases that:
|
||||
* 1. Never ran migration 0235 (tables don't exist)
|
||||
* 2. Already ran migration 0235 (tables will be dropped)
|
||||
*/
|
||||
|
||||
import sql from "../services/sql.js";
|
||||
import log from "../services/log.js";
|
||||
|
||||
export default function cleanupSqliteSearch() {
|
||||
log.info("Starting SQLite custom search cleanup migration...");
|
||||
|
||||
try {
|
||||
sql.transactional(() => {
|
||||
// Drop custom search tables if they exist
|
||||
log.info("Dropping note_search_content table...");
|
||||
sql.executeScript(`DROP TABLE IF EXISTS note_search_content`);
|
||||
|
||||
log.info("Dropping note_tokens table...");
|
||||
sql.executeScript(`DROP TABLE IF EXISTS note_tokens`);
|
||||
|
||||
// Clean up any entity changes for these tables
|
||||
// This prevents sync issues and cleans up change tracking
|
||||
log.info("Cleaning up entity changes for removed tables...");
|
||||
sql.execute(`
|
||||
DELETE FROM entity_changes
|
||||
WHERE entityName IN ('note_search_content', 'note_tokens')
|
||||
`);
|
||||
|
||||
log.info("SQLite custom search cleanup completed successfully");
|
||||
});
|
||||
} catch (error) {
|
||||
log.error(`Error during SQLite search cleanup: ${error}`);
|
||||
throw new Error(`Failed to clean up SQLite search tables: ${error}`);
|
||||
}
|
||||
}
|
||||
@@ -6,10 +6,10 @@
|
||||
|
||||
// Migrations should be kept in descending order, so the latest migration is first.
|
||||
const MIGRATIONS: (SqlMigration | JsMigration)[] = [
|
||||
// Add SQLite native search with normalized text tables
|
||||
// Clean up custom SQLite search tables (replaced by FTS5 trigram)
|
||||
{
|
||||
version: 235,
|
||||
module: async () => import("./0235__sqlite_native_search.js")
|
||||
version: 236,
|
||||
module: async () => import("./0236__cleanup_sqlite_search.js")
|
||||
},
|
||||
// Add FTS5 full-text search support and strategic performance indexes
|
||||
{
|
||||
|
||||
@@ -1,243 +0,0 @@
|
||||
/**
|
||||
* API endpoints for search administration and monitoring
|
||||
*/
|
||||
|
||||
import { Router } from "express";
|
||||
import performanceMonitor from "../../services/search/performance_monitor.js";
|
||||
import abTestingService from "../../services/search/ab_testing.js";
|
||||
import { SQLiteSearchService } from "../../services/search/sqlite_search_service.js";
|
||||
import optionService from "../../services/options.js";
|
||||
import sql from "../../services/sql.js";
|
||||
import log from "../../services/log.js";
|
||||
|
||||
const router = Router();
|
||||
|
||||
/**
|
||||
* Get search performance metrics
|
||||
*/
|
||||
router.get("/api/search-admin/metrics", (req, res) => {
|
||||
const metrics = {
|
||||
recent: performanceMonitor.getRecentMetrics(100),
|
||||
averages: {
|
||||
typescript: performanceMonitor.getAverageMetrics("typescript"),
|
||||
sqlite: performanceMonitor.getAverageMetrics("sqlite")
|
||||
},
|
||||
comparison: performanceMonitor.compareBackends()
|
||||
};
|
||||
|
||||
res.json(metrics);
|
||||
});
|
||||
|
||||
/**
|
||||
* Get A/B testing results
|
||||
*/
|
||||
router.get("/api/search-admin/ab-tests", (req, res) => {
|
||||
const results = {
|
||||
summary: abTestingService.getSummary(),
|
||||
recent: abTestingService.getRecentResults(50)
|
||||
};
|
||||
|
||||
res.json(results);
|
||||
});
|
||||
|
||||
/**
|
||||
* Get current search configuration
|
||||
*/
|
||||
router.get("/api/search-admin/config", (req, res) => {
|
||||
const config = {
|
||||
backend: optionService.getOption("searchBackend"),
|
||||
sqliteEnabled: optionService.getOptionBool("searchSqliteEnabled"),
|
||||
performanceLogging: optionService.getOptionBool("searchSqlitePerformanceLogging"),
|
||||
maxMemory: optionService.getOptionInt("searchSqliteMaxMemory"),
|
||||
batchSize: optionService.getOptionInt("searchSqliteBatchSize"),
|
||||
autoRebuild: optionService.getOptionBool("searchSqliteAutoRebuild")
|
||||
};
|
||||
|
||||
res.json(config);
|
||||
});
|
||||
|
||||
/**
|
||||
* Update search configuration
|
||||
*/
|
||||
router.put("/api/search-admin/config", (req, res) => {
|
||||
try {
|
||||
const { backend, sqliteEnabled, performanceLogging, maxMemory, batchSize, autoRebuild } = req.body;
|
||||
|
||||
if (backend !== undefined) {
|
||||
if (!["typescript", "sqlite"].includes(backend)) {
|
||||
return res.status(400).json({ error: "Invalid backend. Must be 'typescript' or 'sqlite'" });
|
||||
}
|
||||
optionService.setOption("searchBackend", backend);
|
||||
}
|
||||
|
||||
if (sqliteEnabled !== undefined) {
|
||||
optionService.setOption("searchSqliteEnabled", sqliteEnabled ? "true" : "false");
|
||||
}
|
||||
|
||||
if (performanceLogging !== undefined) {
|
||||
optionService.setOption("searchSqlitePerformanceLogging", performanceLogging ? "true" : "false");
|
||||
performanceMonitor.updateSettings();
|
||||
}
|
||||
|
||||
if (maxMemory !== undefined) {
|
||||
if (maxMemory < 1048576 || maxMemory > 1073741824) { // 1MB to 1GB
|
||||
return res.status(400).json({ error: "Max memory must be between 1MB and 1GB" });
|
||||
}
|
||||
optionService.setOption("searchSqliteMaxMemory", maxMemory.toString());
|
||||
}
|
||||
|
||||
if (batchSize !== undefined) {
|
||||
if (batchSize < 10 || batchSize > 1000) {
|
||||
return res.status(400).json({ error: "Batch size must be between 10 and 1000" });
|
||||
}
|
||||
optionService.setOption("searchSqliteBatchSize", batchSize.toString());
|
||||
}
|
||||
|
||||
if (autoRebuild !== undefined) {
|
||||
optionService.setOption("searchSqliteAutoRebuild", autoRebuild ? "true" : "false");
|
||||
}
|
||||
|
||||
res.json({ success: true, message: "Configuration updated successfully" });
|
||||
} catch (error: any) {
|
||||
log.error(`Failed to update search configuration: ${error}`);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get SQLite search index status
|
||||
*/
|
||||
router.get("/api/search-admin/sqlite/status", async (req, res) => {
|
||||
try {
|
||||
const service = SQLiteSearchService.getInstance();
|
||||
const status = await service.getIndexStatus();
|
||||
|
||||
// Add table sizes
|
||||
const tableSizes = sql.getRows<{ name: string; size: number }>(`
|
||||
SELECT
|
||||
name,
|
||||
(SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=m.name) as size
|
||||
FROM sqlite_master m
|
||||
WHERE type='table' AND name IN ('note_search_content', 'note_tokens', 'notes_fts', 'notes_fts_data', 'notes_fts_idx', 'notes_fts_content')
|
||||
`);
|
||||
|
||||
res.json({
|
||||
...status,
|
||||
tables: tableSizes
|
||||
});
|
||||
} catch (error: any) {
|
||||
log.error(`Failed to get SQLite search status: ${error}`);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Rebuild SQLite search index
|
||||
*/
|
||||
router.post("/api/search-admin/sqlite/rebuild", async (req, res) => {
|
||||
try {
|
||||
const { force = false } = req.body;
|
||||
|
||||
log.info("Starting SQLite search index rebuild via API");
|
||||
|
||||
const service = SQLiteSearchService.getInstance();
|
||||
const startTime = Date.now();
|
||||
|
||||
await service.rebuildIndex(force);
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
log.info(`SQLite search index rebuild completed in ${duration}ms`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: "Index rebuilt successfully",
|
||||
duration
|
||||
});
|
||||
} catch (error: any) {
|
||||
log.error(`Failed to rebuild SQLite search index: ${error}`);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Clear SQLite search index
|
||||
*/
|
||||
router.delete("/api/search-admin/sqlite/index", async (req, res) => {
|
||||
try {
|
||||
log.info("Clearing SQLite search index via API");
|
||||
|
||||
const service = SQLiteSearchService.getInstance();
|
||||
service.clearIndex();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: "Index cleared successfully"
|
||||
});
|
||||
} catch (error: any) {
|
||||
log.error(`Failed to clear SQLite search index: ${error}`);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Reset performance metrics
|
||||
*/
|
||||
router.delete("/api/search-admin/metrics", (req, res) => {
|
||||
performanceMonitor.reset();
|
||||
res.json({ success: true, message: "Metrics reset successfully" });
|
||||
});
|
||||
|
||||
/**
|
||||
* Reset A/B test results
|
||||
*/
|
||||
router.delete("/api/search-admin/ab-tests", (req, res) => {
|
||||
abTestingService.reset();
|
||||
res.json({ success: true, message: "A/B test results reset successfully" });
|
||||
});
|
||||
|
||||
/**
|
||||
* Set A/B testing sample rate
|
||||
*/
|
||||
router.put("/api/search-admin/ab-tests/sample-rate", (req, res) => {
|
||||
try {
|
||||
const { rate } = req.body;
|
||||
|
||||
if (rate === undefined || rate < 0 || rate > 1) {
|
||||
return res.status(400).json({ error: "Sample rate must be between 0 and 1" });
|
||||
}
|
||||
|
||||
abTestingService.setSampleRate(rate);
|
||||
res.json({ success: true, message: `Sample rate set to ${rate * 100}%` });
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Test search with both backends for comparison
|
||||
*/
|
||||
router.post("/api/search-admin/test", async (req, res) => {
|
||||
try {
|
||||
const { query } = req.body;
|
||||
|
||||
if (!query) {
|
||||
return res.status(400).json({ error: "Query is required" });
|
||||
}
|
||||
|
||||
const result = await abTestingService.runComparison(query, {});
|
||||
|
||||
if (!result) {
|
||||
return res.json({
|
||||
message: "Test not run (sampling or disabled)",
|
||||
query
|
||||
});
|
||||
}
|
||||
|
||||
res.json(result);
|
||||
} catch (error: any) {
|
||||
log.error(`Search test failed: ${error}`);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
@@ -40,7 +40,6 @@ import scriptRoute from "./api/script.js";
|
||||
import senderRoute from "./api/sender.js";
|
||||
import filesRoute from "./api/files.js";
|
||||
import searchRoute from "./api/search.js";
|
||||
import searchAdminRoute from "./api/search_admin.js";
|
||||
import bulkActionRoute from "./api/bulk_action.js";
|
||||
import specialNotesRoute from "./api/special_notes.js";
|
||||
import noteMapRoute from "./api/note_map.js";
|
||||
@@ -261,9 +260,6 @@ function register(app: express.Application) {
|
||||
apiRoute(GET, "/api/search/:searchString", searchRoute.search);
|
||||
apiRoute(GET, "/api/search-templates", searchRoute.searchTemplates);
|
||||
|
||||
// Search administration routes
|
||||
app.use(searchAdminRoute);
|
||||
|
||||
apiRoute(PST, "/api/bulk-action/execute", bulkActionRoute.execute);
|
||||
apiRoute(PST, "/api/bulk-action/affected-notes", bulkActionRoute.getAffectedNoteCount);
|
||||
|
||||
|
||||
@@ -215,14 +215,6 @@ const defaultOptions: DefaultOption[] = [
|
||||
{ name: "aiSystemPrompt", value: "", isSynced: true },
|
||||
{ name: "aiSelectedProvider", value: "openai", isSynced: true },
|
||||
|
||||
// Search configuration
|
||||
{ name: "searchBackend", value: "typescript", isSynced: false }, // "typescript" or "sqlite"
|
||||
{ name: "searchSqliteEnabled", value: "false", isSynced: false },
|
||||
{ name: "searchSqlitePerformanceLogging", value: "false", isSynced: false },
|
||||
{ name: "searchSqliteMaxMemory", value: "67108864", isSynced: false }, // 64MB default
|
||||
{ name: "searchSqliteBatchSize", value: "100", isSynced: false },
|
||||
{ name: "searchSqliteAutoRebuild", value: "true", isSynced: false },
|
||||
|
||||
{ name: "seenCallToActions", value: "[]", isSynced: true }
|
||||
];
|
||||
|
||||
|
||||
@@ -1,218 +0,0 @@
|
||||
/**
|
||||
* A/B Testing utilities for comparing search backend performance
|
||||
*/
|
||||
|
||||
import SearchContext from "./search_context.js";
|
||||
import type { SearchParams } from "./services/types.js";
|
||||
import performanceMonitor from "./performance_monitor.js";
|
||||
import log from "../log.js";
|
||||
import optionService from "../options.js";
|
||||
|
||||
export interface ABTestResult {
|
||||
query: string;
|
||||
typescriptTime: number;
|
||||
sqliteTime: number;
|
||||
typescriptResults: number;
|
||||
sqliteResults: number;
|
||||
resultsMatch: boolean;
|
||||
speedup: number;
|
||||
winner: "typescript" | "sqlite" | "tie";
|
||||
}
|
||||
|
||||
class ABTestingService {
|
||||
private enabled: boolean = false;
|
||||
private sampleRate: number = 0.1; // 10% of searches by default
|
||||
private results: ABTestResult[] = [];
|
||||
private maxResults: number = 1000;
|
||||
|
||||
constructor() {
|
||||
this.updateSettings();
|
||||
}
|
||||
|
||||
updateSettings() {
|
||||
try {
|
||||
this.enabled = optionService.getOptionBool("searchSqliteEnabled");
|
||||
// Could add a separate AB testing option if needed
|
||||
} catch {
|
||||
this.enabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if we should run an A/B test for this query
|
||||
*/
|
||||
shouldRunTest(): boolean {
|
||||
if (!this.enabled) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Random sampling
|
||||
return Math.random() < this.sampleRate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the same search query with both backends and compare results
|
||||
*/
|
||||
async runComparison(query: string, params: SearchParams): Promise<ABTestResult | null> {
|
||||
if (!this.shouldRunTest()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
// Dynamically import to avoid circular dependencies
|
||||
const searchModule = await import("./services/search.js");
|
||||
|
||||
// Run with TypeScript backend
|
||||
const tsContext = new SearchContext({ ...params, forceBackend: "typescript" });
|
||||
const tsTimer = performanceMonitor.startTimer();
|
||||
const tsResults = searchModule.default.findResultsWithQuery(query, tsContext);
|
||||
const tsTime = tsTimer();
|
||||
|
||||
// Run with SQLite backend
|
||||
const sqliteContext = new SearchContext({ ...params, forceBackend: "sqlite" });
|
||||
const sqliteTimer = performanceMonitor.startTimer();
|
||||
const sqliteResults = searchModule.default.findResultsWithQuery(query, sqliteContext);
|
||||
const sqliteTime = sqliteTimer();
|
||||
|
||||
// Compare results
|
||||
const tsNoteIds = new Set(tsResults.map(r => r.noteId));
|
||||
const sqliteNoteIds = new Set(sqliteResults.map(r => r.noteId));
|
||||
|
||||
// Check if results match (same notes found)
|
||||
const resultsMatch = tsNoteIds.size === sqliteNoteIds.size &&
|
||||
[...tsNoteIds].every(id => sqliteNoteIds.has(id));
|
||||
|
||||
// Calculate speedup
|
||||
const speedup = tsTime / sqliteTime;
|
||||
|
||||
// Determine winner
|
||||
let winner: "typescript" | "sqlite" | "tie";
|
||||
if (speedup > 1.2) {
|
||||
winner = "sqlite";
|
||||
} else if (speedup < 0.83) {
|
||||
winner = "typescript";
|
||||
} else {
|
||||
winner = "tie";
|
||||
}
|
||||
|
||||
const result: ABTestResult = {
|
||||
query: query.substring(0, 100),
|
||||
typescriptTime: tsTime,
|
||||
sqliteTime: sqliteTime,
|
||||
typescriptResults: tsResults.length,
|
||||
sqliteResults: sqliteResults.length,
|
||||
resultsMatch,
|
||||
speedup,
|
||||
winner
|
||||
};
|
||||
|
||||
this.recordResult(result);
|
||||
|
||||
// Log significant differences
|
||||
if (!resultsMatch) {
|
||||
log.info(`A/B test found different results for query "${query.substring(0, 50)}": TS=${tsResults.length}, SQLite=${sqliteResults.length}`);
|
||||
}
|
||||
|
||||
if (Math.abs(speedup - 1) > 0.5) {
|
||||
log.info(`A/B test significant performance difference: ${winner} is ${Math.abs(speedup - 1).toFixed(1)}x faster for query "${query.substring(0, 50)}"`);
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
log.error(`A/B test failed: ${error}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private recordResult(result: ABTestResult) {
|
||||
this.results.push(result);
|
||||
|
||||
// Keep only the last N results
|
||||
if (this.results.length > this.maxResults) {
|
||||
this.results = this.results.slice(-this.maxResults);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get summary statistics from A/B tests
|
||||
*/
|
||||
getSummary(): {
|
||||
totalTests: number;
|
||||
avgSpeedup: number;
|
||||
typescriptWins: number;
|
||||
sqliteWins: number;
|
||||
ties: number;
|
||||
mismatchRate: number;
|
||||
recommendation: string;
|
||||
} {
|
||||
if (this.results.length === 0) {
|
||||
return {
|
||||
totalTests: 0,
|
||||
avgSpeedup: 1,
|
||||
typescriptWins: 0,
|
||||
sqliteWins: 0,
|
||||
ties: 0,
|
||||
mismatchRate: 0,
|
||||
recommendation: "No A/B test data available"
|
||||
};
|
||||
}
|
||||
|
||||
const totalTests = this.results.length;
|
||||
const avgSpeedup = this.results.reduce((sum, r) => sum + r.speedup, 0) / totalTests;
|
||||
const typescriptWins = this.results.filter(r => r.winner === "typescript").length;
|
||||
const sqliteWins = this.results.filter(r => r.winner === "sqlite").length;
|
||||
const ties = this.results.filter(r => r.winner === "tie").length;
|
||||
const mismatches = this.results.filter(r => !r.resultsMatch).length;
|
||||
const mismatchRate = mismatches / totalTests;
|
||||
|
||||
let recommendation: string;
|
||||
if (mismatchRate > 0.1) {
|
||||
recommendation = "High mismatch rate detected - SQLite search may have accuracy issues";
|
||||
} else if (avgSpeedup > 1.5) {
|
||||
recommendation = `SQLite is ${avgSpeedup.toFixed(1)}x faster on average - consider enabling`;
|
||||
} else if (avgSpeedup < 0.67) {
|
||||
recommendation = `TypeScript is ${(1/avgSpeedup).toFixed(1)}x faster on average - keep using TypeScript`;
|
||||
} else {
|
||||
recommendation = "Both backends perform similarly - choice depends on other factors";
|
||||
}
|
||||
|
||||
return {
|
||||
totalTests,
|
||||
avgSpeedup,
|
||||
typescriptWins,
|
||||
sqliteWins,
|
||||
ties,
|
||||
mismatchRate,
|
||||
recommendation
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get recent test results
|
||||
*/
|
||||
getRecentResults(count: number = 100): ABTestResult[] {
|
||||
return this.results.slice(-count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all test results
|
||||
*/
|
||||
reset() {
|
||||
this.results = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the sampling rate for A/B tests
|
||||
*/
|
||||
setSampleRate(rate: number) {
|
||||
if (rate < 0 || rate > 1) {
|
||||
throw new Error("Sample rate must be between 0 and 1");
|
||||
}
|
||||
this.sampleRate = rate;
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton instance
|
||||
const abTestingService = new ABTestingService();
|
||||
|
||||
export default abTestingService;
|
||||
@@ -1,155 +0,0 @@
|
||||
/**
|
||||
* SQLite-based Note Content Fulltext Expression
|
||||
*
|
||||
* This is a drop-in replacement for NoteContentFulltextExp that uses
|
||||
* the SQLite search service for dramatically improved performance.
|
||||
* It maintains 100% compatibility with the existing API while providing
|
||||
* 10-30x speed improvements.
|
||||
*/
|
||||
|
||||
import type SearchContext from "../search_context.js";
|
||||
import Expression from "./expression.js";
|
||||
import NoteSet from "../note_set.js";
|
||||
import log from "../../log.js";
|
||||
import becca from "../../../becca/becca.js";
|
||||
import { getSQLiteSearchService, type SearchOptions } from "../sqlite_search_service.js";
|
||||
|
||||
const ALLOWED_OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", "%=", "~=", "~*"]);
|
||||
|
||||
interface ConstructorOpts {
|
||||
tokens: string[];
|
||||
raw?: boolean;
|
||||
flatText?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* SQLite-optimized implementation of note content fulltext search
|
||||
*/
|
||||
class NoteContentSQLiteExp extends Expression {
|
||||
private operator: string;
|
||||
tokens: string[];
|
||||
private raw: boolean;
|
||||
private flatText: boolean;
|
||||
private sqliteService = getSQLiteSearchService();
|
||||
|
||||
constructor(operator: string, { tokens, raw, flatText }: ConstructorOpts) {
|
||||
super();
|
||||
|
||||
if (!operator || !tokens || !Array.isArray(tokens)) {
|
||||
throw new Error('Invalid parameters: operator and tokens are required');
|
||||
}
|
||||
|
||||
this.operator = operator;
|
||||
this.tokens = tokens;
|
||||
this.raw = !!raw;
|
||||
this.flatText = !!flatText;
|
||||
}
|
||||
|
||||
execute(inputNoteSet: NoteSet, executionContext: {}, searchContext: SearchContext) {
|
||||
if (!ALLOWED_OPERATORS.has(this.operator)) {
|
||||
searchContext.addError(`Note content can be searched only with operators: ${Array.from(ALLOWED_OPERATORS).join(", ")}, operator ${this.operator} given.`);
|
||||
return inputNoteSet;
|
||||
}
|
||||
|
||||
const resultNoteSet = new NoteSet();
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
// Prepare search options
|
||||
const searchOptions: SearchOptions = {
|
||||
includeProtected: searchContext.includeArchivedNotes,
|
||||
includeDeleted: false,
|
||||
limit: searchContext.limit || undefined
|
||||
};
|
||||
|
||||
// If we have an input note set, use it as a filter
|
||||
if (inputNoteSet.notes.length > 0) {
|
||||
searchOptions.noteIdFilter = new Set(inputNoteSet.getNoteIds());
|
||||
}
|
||||
|
||||
// Map ~* operator to ~= for SQLite service
|
||||
const mappedOperator = this.operator === "~*" ? "~=" : this.operator;
|
||||
|
||||
// Execute SQLite search
|
||||
const noteIds = this.sqliteService.search(
|
||||
this.tokens,
|
||||
mappedOperator,
|
||||
searchContext,
|
||||
searchOptions
|
||||
);
|
||||
|
||||
// Build result note set from note IDs
|
||||
for (const noteId of noteIds) {
|
||||
const note = becca.notes[noteId];
|
||||
if (note) {
|
||||
resultNoteSet.add(note);
|
||||
}
|
||||
}
|
||||
|
||||
// Log performance if enabled
|
||||
const elapsed = Date.now() - startTime;
|
||||
if (searchContext.debug) {
|
||||
log.info(`SQLite search completed: operator=${this.operator}, tokens=${this.tokens.join(" ")}, ` +
|
||||
`results=${noteIds.size}, time=${elapsed}ms`);
|
||||
}
|
||||
|
||||
// Store highlighted tokens for UI
|
||||
if (noteIds.size > 0) {
|
||||
searchContext.highlightedTokens = this.tokens;
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
log.error(`SQLite search failed: ${error}`);
|
||||
searchContext.addError(`Search failed: ${error}`);
|
||||
|
||||
// On error, return input set unchanged
|
||||
return inputNoteSet;
|
||||
}
|
||||
|
||||
return resultNoteSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get performance statistics for monitoring
|
||||
*/
|
||||
getStatistics() {
|
||||
return this.sqliteService.getStatistics();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if SQLite search is available
|
||||
*/
|
||||
static isAvailable(): boolean {
|
||||
const service = getSQLiteSearchService();
|
||||
const stats = service.getStatistics();
|
||||
return stats.tablesInitialized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a compatible expression based on availability
|
||||
* This allows gradual migration from the old implementation
|
||||
*/
|
||||
static createExpression(operator: string, opts: ConstructorOpts): Expression {
|
||||
if (NoteContentSQLiteExp.isAvailable()) {
|
||||
return new NoteContentSQLiteExp(operator, opts);
|
||||
} else {
|
||||
// Fall back to original implementation if SQLite not ready
|
||||
// This would import the original NoteContentFulltextExp
|
||||
log.info("SQLite search not available, using fallback implementation");
|
||||
|
||||
// Dynamic import to avoid circular dependency
|
||||
const NoteContentFulltextExp = require("./note_content_fulltext.js").default;
|
||||
return new NoteContentFulltextExp(operator, opts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default NoteContentSQLiteExp;
|
||||
|
||||
/**
|
||||
* Factory function for creating search expressions
|
||||
* This can be used as a drop-in replacement in the expression builder
|
||||
*/
|
||||
export function createNoteContentExpression(operator: string, opts: ConstructorOpts): Expression {
|
||||
return NoteContentSQLiteExp.createExpression(operator, opts);
|
||||
}
|
||||
@@ -1,405 +0,0 @@
|
||||
/**
|
||||
* Tests for FTS5 blob deduplication scenarios
|
||||
*
|
||||
* This test file validates that FTS indexing works correctly when:
|
||||
* 1. Multiple notes share the same blob (deduplication)
|
||||
* 2. Notes change content to match existing blobs
|
||||
* 3. Blobs are updated and affect multiple notes
|
||||
* 4. Notes switch between unique and shared blobs
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import sql from '../sql.js';
|
||||
import beccaLoader from '../../becca/becca_loader.js';
|
||||
import noteService from '../notes.js';
|
||||
import searchService from './services/search.js';
|
||||
import { ftsSearchService } from './fts_search.js';
|
||||
|
||||
describe('FTS5 Blob Deduplication Tests', () => {
|
||||
beforeEach(() => {
|
||||
// Ensure we have a clean test database with FTS enabled
|
||||
sql.execute("DELETE FROM notes WHERE noteId LIKE 'test_%'");
|
||||
sql.execute("DELETE FROM blobs WHERE blobId LIKE 'test_%'");
|
||||
sql.execute("DELETE FROM notes_fts WHERE noteId LIKE 'test_%'");
|
||||
|
||||
// Reload becca to ensure cache is in sync
|
||||
beccaLoader.load();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
// Clean up test data
|
||||
sql.execute("DELETE FROM notes WHERE noteId LIKE 'test_%'");
|
||||
sql.execute("DELETE FROM blobs WHERE blobId LIKE 'test_%'");
|
||||
sql.execute("DELETE FROM notes_fts WHERE noteId LIKE 'test_%'");
|
||||
});
|
||||
|
||||
describe('Blob Deduplication Scenarios', () => {
|
||||
it('should index multiple notes sharing the same blob', async () => {
|
||||
// Create first note with unique content
|
||||
const note1 = await noteService.createNewNote({
|
||||
noteId: 'test_note1',
|
||||
parentNoteId: 'root',
|
||||
title: 'Test Note 1',
|
||||
content: 'Shared content for deduplication test',
|
||||
type: 'text'
|
||||
});
|
||||
|
||||
// Create second note with the same content (will share blob)
|
||||
const note2 = await noteService.createNewNote({
|
||||
noteId: 'test_note2',
|
||||
parentNoteId: 'root',
|
||||
title: 'Test Note 2',
|
||||
content: 'Shared content for deduplication test',
|
||||
type: 'text'
|
||||
});
|
||||
|
||||
// Verify both notes share the same blob
|
||||
const blob1 = sql.getRow("SELECT blobId FROM notes WHERE noteId = ?", ['test_note1']);
|
||||
const blob2 = sql.getRow("SELECT blobId FROM notes WHERE noteId = ?", ['test_note2']);
|
||||
expect(blob1.blobId).toBe(blob2.blobId);
|
||||
|
||||
// Verify both notes are indexed in FTS
|
||||
const ftsCount = sql.getValue(
|
||||
"SELECT COUNT(*) FROM notes_fts WHERE noteId IN (?, ?)",
|
||||
['test_note1', 'test_note2']
|
||||
);
|
||||
expect(ftsCount).toBe(2);
|
||||
|
||||
// Search should find both notes
|
||||
const searchResults = searchService.searchNotes('deduplication');
|
||||
const foundNoteIds = searchResults.map(r => r.noteId);
|
||||
expect(foundNoteIds).toContain('test_note1');
|
||||
expect(foundNoteIds).toContain('test_note2');
|
||||
});
|
||||
|
||||
it('should update FTS when note content changes to match existing blob', async () => {
|
||||
// Create first note with unique content
|
||||
const note1 = await noteService.createNewNote({
|
||||
noteId: 'test_note3',
|
||||
parentNoteId: 'root',
|
||||
title: 'Note with existing content',
|
||||
content: 'This is existing content in the database',
|
||||
type: 'text'
|
||||
});
|
||||
|
||||
// Create second note with different content
|
||||
const note2 = await noteService.createNewNote({
|
||||
noteId: 'test_note4',
|
||||
parentNoteId: 'root',
|
||||
title: 'Note with different content',
|
||||
content: 'This is completely different content',
|
||||
type: 'text'
|
||||
});
|
||||
|
||||
// Verify notes have different blobs initially
|
||||
const initialBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note3']);
|
||||
const initialBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note4']);
|
||||
expect(initialBlob1).not.toBe(initialBlob2);
|
||||
|
||||
// Change note2's content to match note1 (deduplication occurs)
|
||||
await noteService.updateNoteContent('test_note4', 'This is existing content in the database');
|
||||
|
||||
// Verify both notes now share the same blob
|
||||
const finalBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note3']);
|
||||
const finalBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note4']);
|
||||
expect(finalBlob1).toBe(finalBlob2);
|
||||
|
||||
// Verify FTS is updated correctly for note2
|
||||
const ftsContent = sql.getValue(
|
||||
"SELECT content FROM notes_fts WHERE noteId = ?",
|
||||
['test_note4']
|
||||
);
|
||||
expect(ftsContent).toBe('This is existing content in the database');
|
||||
|
||||
// Search for old content should not find note2
|
||||
const oldContentSearch = searchService.searchNotes('completely different');
|
||||
const oldSearchIds = oldContentSearch.map(r => r.noteId);
|
||||
expect(oldSearchIds).not.toContain('test_note4');
|
||||
|
||||
// Search for new content should find both notes
|
||||
const newContentSearch = searchService.searchNotes('existing content');
|
||||
const newSearchIds = newContentSearch.map(r => r.noteId);
|
||||
expect(newSearchIds).toContain('test_note3');
|
||||
expect(newSearchIds).toContain('test_note4');
|
||||
});
|
||||
|
||||
it('should update all notes when shared blob content changes', async () => {
|
||||
// Create three notes with the same content
|
||||
const sharedContent = 'Original shared content for blob update test';
|
||||
|
||||
await noteService.createNewNote({
|
||||
noteId: 'test_note5',
|
||||
parentNoteId: 'root',
|
||||
title: 'Shared Note 1',
|
||||
content: sharedContent,
|
||||
type: 'text'
|
||||
});
|
||||
|
||||
await noteService.createNewNote({
|
||||
noteId: 'test_note6',
|
||||
parentNoteId: 'root',
|
||||
title: 'Shared Note 2',
|
||||
content: sharedContent,
|
||||
type: 'text'
|
||||
});
|
||||
|
||||
await noteService.createNewNote({
|
||||
noteId: 'test_note7',
|
||||
parentNoteId: 'root',
|
||||
title: 'Shared Note 3',
|
||||
content: sharedContent,
|
||||
type: 'text'
|
||||
});
|
||||
|
||||
// Verify all three share the same blob
|
||||
const blobIds = sql.getColumn(
|
||||
"SELECT DISTINCT blobId FROM notes WHERE noteId IN (?, ?, ?)",
|
||||
['test_note5', 'test_note6', 'test_note7']
|
||||
);
|
||||
expect(blobIds.length).toBe(1);
|
||||
const sharedBlobId = blobIds[0];
|
||||
|
||||
// Update the blob content directly (simulating what would happen in real update)
|
||||
sql.execute(
|
||||
"UPDATE blobs SET content = ? WHERE blobId = ?",
|
||||
['Updated shared content for all notes', sharedBlobId]
|
||||
);
|
||||
|
||||
// Verify FTS is updated for all three notes
|
||||
const ftsContents = sql.getColumn(
|
||||
"SELECT content FROM notes_fts WHERE noteId IN (?, ?, ?) ORDER BY noteId",
|
||||
['test_note5', 'test_note6', 'test_note7']
|
||||
);
|
||||
|
||||
expect(ftsContents).toHaveLength(3);
|
||||
ftsContents.forEach(content => {
|
||||
expect(content).toBe('Updated shared content for all notes');
|
||||
});
|
||||
|
||||
// Search for old content should find nothing
|
||||
const oldSearch = searchService.searchNotes('Original shared');
|
||||
expect(oldSearch.filter(r => r.noteId.startsWith('test_'))).toHaveLength(0);
|
||||
|
||||
// Search for new content should find all three
|
||||
const newSearch = searchService.searchNotes('Updated shared');
|
||||
const foundIds = newSearch.map(r => r.noteId).filter(id => id.startsWith('test_'));
|
||||
expect(foundIds).toContain('test_note5');
|
||||
expect(foundIds).toContain('test_note6');
|
||||
expect(foundIds).toContain('test_note7');
|
||||
});
|
||||
|
||||
it('should handle note switching from shared to unique blob', async () => {
|
||||
// Create two notes with shared content
|
||||
const sharedContent = 'Shared content before divergence';
|
||||
|
||||
const note1 = await noteService.createNewNote({
|
||||
noteId: 'test_note8',
|
||||
parentNoteId: 'root',
|
||||
title: 'Diverging Note 1',
|
||||
content: sharedContent,
|
||||
type: 'text'
|
||||
});
|
||||
|
||||
const note2 = await noteService.createNewNote({
|
||||
noteId: 'test_note9',
|
||||
parentNoteId: 'root',
|
||||
title: 'Diverging Note 2',
|
||||
content: sharedContent,
|
||||
type: 'text'
|
||||
});
|
||||
|
||||
// Verify they share the same blob
|
||||
const initialBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note8']);
|
||||
const initialBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note9']);
|
||||
expect(initialBlob1).toBe(initialBlob2);
|
||||
|
||||
// Change note2 to unique content
|
||||
await noteService.updateNoteContent('test_note9', 'Unique content after divergence');
|
||||
|
||||
// Verify they now have different blobs
|
||||
const finalBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note8']);
|
||||
const finalBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note9']);
|
||||
expect(finalBlob1).not.toBe(finalBlob2);
|
||||
|
||||
// Verify FTS is correctly updated
|
||||
const ftsContent1 = sql.getValue(
|
||||
"SELECT content FROM notes_fts WHERE noteId = ?",
|
||||
['test_note8']
|
||||
);
|
||||
const ftsContent2 = sql.getValue(
|
||||
"SELECT content FROM notes_fts WHERE noteId = ?",
|
||||
['test_note9']
|
||||
);
|
||||
|
||||
expect(ftsContent1).toBe('Shared content before divergence');
|
||||
expect(ftsContent2).toBe('Unique content after divergence');
|
||||
|
||||
// Search should find correct notes
|
||||
const sharedSearch = searchService.searchNotes('before divergence');
|
||||
expect(sharedSearch.map(r => r.noteId)).toContain('test_note8');
|
||||
expect(sharedSearch.map(r => r.noteId)).not.toContain('test_note9');
|
||||
|
||||
const uniqueSearch = searchService.searchNotes('after divergence');
|
||||
expect(uniqueSearch.map(r => r.noteId)).not.toContain('test_note8');
|
||||
expect(uniqueSearch.map(r => r.noteId)).toContain('test_note9');
|
||||
});
|
||||
|
||||
it('should handle import scenarios where notes exist before blobs', async () => {
|
||||
// Simulate import scenario: create note without blob first
|
||||
sql.execute(`
|
||||
INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified)
|
||||
VALUES ('test_note10', 'Import Test Note', 'text', 'text/html', 'pending_blob_123', 0, 0, datetime('now'), datetime('now'), datetime('now'), datetime('now'))
|
||||
`);
|
||||
|
||||
// Verify note is not in FTS yet (no blob content)
|
||||
const initialFts = sql.getValue(
|
||||
"SELECT COUNT(*) FROM notes_fts WHERE noteId = ?",
|
||||
['test_note10']
|
||||
);
|
||||
expect(initialFts).toBe(0);
|
||||
|
||||
// Now create the blob (simulating delayed blob creation during import)
|
||||
sql.execute(`
|
||||
INSERT INTO blobs (blobId, content, dateModified, utcDateModified)
|
||||
VALUES ('pending_blob_123', 'Imported content finally available', datetime('now'), datetime('now'))
|
||||
`);
|
||||
|
||||
// Verify note is now indexed in FTS
|
||||
const finalFts = sql.getValue(
|
||||
"SELECT content FROM notes_fts WHERE noteId = ?",
|
||||
['test_note10']
|
||||
);
|
||||
expect(finalFts).toBe('Imported content finally available');
|
||||
|
||||
// Search should now find the note
|
||||
const searchResults = searchService.searchNotes('Imported content');
|
||||
expect(searchResults.map(r => r.noteId)).toContain('test_note10');
|
||||
});
|
||||
|
||||
it('should correctly handle protected notes during deduplication', async () => {
|
||||
// Create a regular note
|
||||
const note1 = await noteService.createNewNote({
|
||||
noteId: 'test_note11',
|
||||
parentNoteId: 'root',
|
||||
title: 'Regular Note',
|
||||
content: 'Content that will be shared',
|
||||
type: 'text'
|
||||
});
|
||||
|
||||
// Create a protected note with same content
|
||||
sql.execute(`
|
||||
INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified)
|
||||
VALUES ('test_note12', 'Protected Note', 'text', 'text/html',
|
||||
(SELECT blobId FROM notes WHERE noteId = 'test_note11'),
|
||||
0, 1, datetime('now'), datetime('now'), datetime('now'), datetime('now'))
|
||||
`);
|
||||
|
||||
// Verify protected note is NOT in FTS
|
||||
const protectedInFts = sql.getValue(
|
||||
"SELECT COUNT(*) FROM notes_fts WHERE noteId = ?",
|
||||
['test_note12']
|
||||
);
|
||||
expect(protectedInFts).toBe(0);
|
||||
|
||||
// Verify regular note IS in FTS
|
||||
const regularInFts = sql.getValue(
|
||||
"SELECT COUNT(*) FROM notes_fts WHERE noteId = ?",
|
||||
['test_note11']
|
||||
);
|
||||
expect(regularInFts).toBe(1);
|
||||
|
||||
// Update blob content
|
||||
const blobId = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note11']);
|
||||
sql.execute("UPDATE blobs SET content = ? WHERE blobId = ?", ['Updated shared content', blobId]);
|
||||
|
||||
// Verify regular note is updated in FTS
|
||||
const updatedContent = sql.getValue(
|
||||
"SELECT content FROM notes_fts WHERE noteId = ?",
|
||||
['test_note11']
|
||||
);
|
||||
expect(updatedContent).toBe('Updated shared content');
|
||||
|
||||
// Verify protected note is still NOT in FTS
|
||||
const protectedStillNotInFts = sql.getValue(
|
||||
"SELECT COUNT(*) FROM notes_fts WHERE noteId = ?",
|
||||
['test_note12']
|
||||
);
|
||||
expect(protectedStillNotInFts).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('FTS Sync and Cleanup', () => {
|
||||
it('should sync missing notes to FTS index', async () => {
|
||||
// Manually create notes without triggering FTS (simulating missed triggers)
|
||||
sql.execute(`
|
||||
INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified)
|
||||
VALUES ('test_note13', 'Missed Note 1', 'text', 'text/html', 'blob_missed_1', 0, 0, datetime('now'), datetime('now'), datetime('now'), datetime('now'))
|
||||
`);
|
||||
|
||||
sql.execute(`
|
||||
INSERT INTO blobs (blobId, content, dateModified, utcDateModified)
|
||||
VALUES ('blob_missed_1', 'Content that was missed by triggers', datetime('now'), datetime('now'))
|
||||
`);
|
||||
|
||||
// Delete from FTS to simulate missing index
|
||||
sql.execute("DELETE FROM notes_fts WHERE noteId = 'test_note13'");
|
||||
|
||||
// Verify note is missing from FTS
|
||||
const beforeSync = sql.getValue(
|
||||
"SELECT COUNT(*) FROM notes_fts WHERE noteId = ?",
|
||||
['test_note13']
|
||||
);
|
||||
expect(beforeSync).toBe(0);
|
||||
|
||||
// Run sync
|
||||
const syncedCount = ftsSearchService.syncMissingNotes(['test_note13']);
|
||||
expect(syncedCount).toBe(1);
|
||||
|
||||
// Verify note is now in FTS
|
||||
const afterSync = sql.getValue(
|
||||
"SELECT content FROM notes_fts WHERE noteId = ?",
|
||||
['test_note13']
|
||||
);
|
||||
expect(afterSync).toBe('Content that was missed by triggers');
|
||||
});
|
||||
|
||||
it('should handle FTS rebuild correctly', () => {
|
||||
// Create some test notes
|
||||
const noteIds = ['test_note14', 'test_note15', 'test_note16'];
|
||||
noteIds.forEach((noteId, index) => {
|
||||
sql.execute(`
|
||||
INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified)
|
||||
VALUES (?, ?, 'text', 'text/html', ?, 0, 0, datetime('now'), datetime('now'), datetime('now'), datetime('now'))
|
||||
`, [noteId, `Test Note ${index}`, `blob_${noteId}`]);
|
||||
|
||||
sql.execute(`
|
||||
INSERT INTO blobs (blobId, content, dateModified, utcDateModified)
|
||||
VALUES (?, ?, datetime('now'), datetime('now'))
|
||||
`, [`blob_${noteId}`, `Content for note ${index}`]);
|
||||
});
|
||||
|
||||
// Corrupt FTS by adding invalid entries
|
||||
sql.execute("INSERT INTO notes_fts (noteId, title, content) VALUES ('invalid_note', 'Invalid', 'Invalid content')");
|
||||
|
||||
// Rebuild index
|
||||
ftsSearchService.rebuildIndex();
|
||||
|
||||
// Verify only valid notes are in FTS
|
||||
const ftsCount = sql.getValue("SELECT COUNT(*) FROM notes_fts WHERE noteId LIKE 'test_%'");
|
||||
expect(ftsCount).toBe(3);
|
||||
|
||||
// Verify invalid entry is gone
|
||||
const invalidCount = sql.getValue("SELECT COUNT(*) FROM notes_fts WHERE noteId = 'invalid_note'");
|
||||
expect(invalidCount).toBe(0);
|
||||
|
||||
// Verify content is correct
|
||||
noteIds.forEach((noteId, index) => {
|
||||
const content = sql.getValue(
|
||||
"SELECT content FROM notes_fts WHERE noteId = ?",
|
||||
[noteId]
|
||||
);
|
||||
expect(content).toBe(`Content for note ${index}`);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -3,7 +3,7 @@
|
||||
*
|
||||
* Encapsulates all FTS5-specific operations for full-text searching.
|
||||
* Provides efficient text search using SQLite's FTS5 extension with:
|
||||
* - Porter stemming for better matching
|
||||
* - Trigram tokenization for fast substring matching
|
||||
* - Snippet extraction for context
|
||||
* - Highlighting of matched terms
|
||||
* - Query syntax conversion from Trilium to FTS5
|
||||
@@ -125,6 +125,16 @@ class FTSSearchService {
|
||||
throw new Error("No search tokens provided");
|
||||
}
|
||||
|
||||
// Trigram tokenizer requires minimum 3 characters
|
||||
const shortTokens = tokens.filter(token => token.length < 3);
|
||||
if (shortTokens.length > 0) {
|
||||
const shortList = shortTokens.join(', ');
|
||||
log.info(`Tokens shorter than 3 characters detected (${shortList}) - cannot use trigram FTS5`);
|
||||
throw new FTSNotAvailableError(
|
||||
`Trigram tokenizer requires tokens of at least 3 characters. Short tokens: ${shortList}`
|
||||
);
|
||||
}
|
||||
|
||||
// Sanitize tokens to prevent FTS5 syntax injection
|
||||
const sanitizedTokens = tokens.map(token =>
|
||||
this.sanitizeFTS5Token(token)
|
||||
|
||||
@@ -24,10 +24,6 @@ class SearchContext {
|
||||
fulltextQuery: string;
|
||||
dbLoadNeeded: boolean;
|
||||
error: string | null;
|
||||
/** Determines which backend to use for fulltext search */
|
||||
searchBackend: "typescript" | "sqlite";
|
||||
/** Whether SQLite search is enabled (cached from options) */
|
||||
sqliteSearchEnabled: boolean;
|
||||
|
||||
constructor(params: SearchParams = {}) {
|
||||
this.fastSearch = !!params.fastSearch;
|
||||
@@ -58,43 +54,6 @@ class SearchContext {
|
||||
// and some extra data needs to be loaded before executing
|
||||
this.dbLoadNeeded = false;
|
||||
this.error = null;
|
||||
|
||||
// Determine search backend
|
||||
this.sqliteSearchEnabled = this.checkSqliteEnabled();
|
||||
this.searchBackend = this.determineSearchBackend(params);
|
||||
}
|
||||
|
||||
private checkSqliteEnabled(): boolean {
|
||||
try {
|
||||
// Import dynamically to avoid circular dependencies
|
||||
const optionService = require("../options.js").default;
|
||||
// Default to true if the option doesn't exist
|
||||
const enabled = optionService.getOptionOrNull("searchSqliteEnabled");
|
||||
return enabled === null ? true : enabled === "true";
|
||||
} catch {
|
||||
return true; // Default to enabled
|
||||
}
|
||||
}
|
||||
|
||||
private determineSearchBackend(params: SearchParams): "typescript" | "sqlite" {
|
||||
// Allow override via params for testing
|
||||
if (params.forceBackend) {
|
||||
return params.forceBackend;
|
||||
}
|
||||
|
||||
// Check if SQLite is enabled
|
||||
if (!this.sqliteSearchEnabled) {
|
||||
return "typescript";
|
||||
}
|
||||
|
||||
try {
|
||||
const optionService = require("../options.js").default;
|
||||
const backend = optionService.getOptionOrNull("searchBackend");
|
||||
// Default to sqlite if option doesn't exist
|
||||
return backend === "typescript" ? "typescript" : "sqlite";
|
||||
} catch {
|
||||
return "sqlite"; // Default to SQLite for better performance
|
||||
}
|
||||
}
|
||||
|
||||
addError(error: string) {
|
||||
|
||||
@@ -13,7 +13,6 @@ import AttributeExistsExp from "../expressions/attribute_exists.js";
|
||||
import LabelComparisonExp from "../expressions/label_comparison.js";
|
||||
import NoteFlatTextExp from "../expressions/note_flat_text.js";
|
||||
import NoteContentFulltextExp from "../expressions/note_content_fulltext.js";
|
||||
import NoteContentSqliteExp from "../expressions/note_content_sqlite.js";
|
||||
import OrderByAndLimitExp from "../expressions/order_by_and_limit.js";
|
||||
import AncestorExp from "../expressions/ancestor.js";
|
||||
import buildComparator from "./build_comparator.js";
|
||||
@@ -38,20 +37,15 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext, leading
|
||||
const operator = leadingOperator === "=" ? "=" : "*=*";
|
||||
|
||||
if (!searchContext.fastSearch) {
|
||||
// Choose between SQLite and TypeScript backend
|
||||
const ContentExp = searchContext.searchBackend === "sqlite"
|
||||
? NoteContentSqliteExp
|
||||
: NoteContentFulltextExp;
|
||||
|
||||
// For exact match with "=", we need different behavior
|
||||
if (leadingOperator === "=" && tokens.length === 1) {
|
||||
// Exact match on title OR exact match on content
|
||||
return new OrExp([
|
||||
new PropertyComparisonExp(searchContext, "title", "=", tokens[0]),
|
||||
new ContentExp("=", { tokens, flatText: false })
|
||||
new NoteContentFulltextExp("=", { tokens, flatText: false })
|
||||
]);
|
||||
}
|
||||
return new OrExp([new NoteFlatTextExp(tokens), new ContentExp(operator, { tokens, flatText: true })]);
|
||||
return new OrExp([new NoteFlatTextExp(tokens), new NoteContentFulltextExp(operator, { tokens, flatText: true })]);
|
||||
} else {
|
||||
return new NoteFlatTextExp(tokens);
|
||||
}
|
||||
@@ -154,12 +148,7 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level
|
||||
|
||||
i++;
|
||||
|
||||
// Choose between SQLite and TypeScript backend
|
||||
const ContentExp = searchContext.searchBackend === "sqlite"
|
||||
? NoteContentSqliteExp
|
||||
: NoteContentFulltextExp;
|
||||
|
||||
return new ContentExp(operator.token, { tokens: [tokens[i].token], raw });
|
||||
return new NoteContentFulltextExp(operator.token, { tokens: [tokens[i].token], raw });
|
||||
}
|
||||
|
||||
if (tokens[i].token === "parents") {
|
||||
@@ -222,12 +211,7 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level
|
||||
|
||||
i += 2;
|
||||
|
||||
// Choose between SQLite and TypeScript backend
|
||||
const ContentExp = searchContext.searchBackend === "sqlite"
|
||||
? NoteContentSqliteExp
|
||||
: NoteContentFulltextExp;
|
||||
|
||||
return new OrExp([new PropertyComparisonExp(searchContext, "title", "*=*", tokens[i].token), new ContentExp("*=*", { tokens: [tokens[i].token] })]);
|
||||
return new OrExp([new PropertyComparisonExp(searchContext, "title", "*=*", tokens[i].token), new NoteContentFulltextExp("*=*", { tokens: [tokens[i].token] })]);
|
||||
}
|
||||
|
||||
if (PropertyComparisonExp.isProperty(tokens[i].token)) {
|
||||
|
||||
@@ -19,9 +19,6 @@ import sql from "../../sql.js";
|
||||
import scriptService from "../../script.js";
|
||||
import striptags from "striptags";
|
||||
import protectedSessionService from "../../protected_session.js";
|
||||
import performanceMonitor from "../performance_monitor.js";
|
||||
import type { DetailedMetrics } from "../performance_monitor.js";
|
||||
import abTestingService from "../ab_testing.js";
|
||||
|
||||
export interface SearchNoteResult {
|
||||
searchResultNoteIds: string[];
|
||||
@@ -405,14 +402,6 @@ function parseQueryToExpression(query: string, searchContext: SearchContext) {
|
||||
|
||||
function searchNotes(query: string, params: SearchParams = {}): BNote[] {
|
||||
const searchContext = new SearchContext(params);
|
||||
|
||||
// Run A/B test in background (non-blocking)
|
||||
setImmediate(() => {
|
||||
abTestingService.runComparison(query, params).catch(err => {
|
||||
log.info(`A/B test failed: ${err}`);
|
||||
});
|
||||
});
|
||||
|
||||
const searchResults = findResultsWithQuery(query, searchContext);
|
||||
|
||||
return searchResults.map((sr) => becca.notes[sr.noteId]);
|
||||
@@ -422,14 +411,7 @@ function findResultsWithQuery(query: string, searchContext: SearchContext): Sear
|
||||
query = query || "";
|
||||
searchContext.originalQuery = query;
|
||||
|
||||
// Start performance monitoring
|
||||
const totalTimer = performanceMonitor.startTimer();
|
||||
const phases: { name: string; duration: number }[] = [];
|
||||
|
||||
// Parse query
|
||||
const parseTimer = performanceMonitor.startTimer();
|
||||
const expression = parseQueryToExpression(query, searchContext);
|
||||
phases.push({ name: "parse", duration: parseTimer() });
|
||||
|
||||
if (!expression) {
|
||||
return [];
|
||||
@@ -441,7 +423,6 @@ function findResultsWithQuery(query: string, searchContext: SearchContext): Sear
|
||||
const isPureExpressionQuery = query.trim().startsWith('#');
|
||||
|
||||
let results: SearchResult[];
|
||||
const searchTimer = performanceMonitor.startTimer();
|
||||
|
||||
if (isPureExpressionQuery) {
|
||||
// For pure expression queries, use standard search without progressive phases
|
||||
@@ -450,22 +431,6 @@ function findResultsWithQuery(query: string, searchContext: SearchContext): Sear
|
||||
results = findResultsWithExpression(expression, searchContext);
|
||||
}
|
||||
|
||||
phases.push({ name: "search", duration: searchTimer() });
|
||||
|
||||
// Record metrics
|
||||
const metrics: DetailedMetrics = {
|
||||
query: query.substring(0, 200), // Truncate long queries
|
||||
backend: searchContext.searchBackend,
|
||||
totalTime: totalTimer(),
|
||||
parseTime: phases[0].duration,
|
||||
searchTime: phases[1].duration,
|
||||
resultCount: results.length,
|
||||
phases,
|
||||
error: searchContext.error || undefined
|
||||
};
|
||||
|
||||
performanceMonitor.recordDetailedMetrics(metrics);
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,4 @@ export interface SearchParams {
|
||||
limit?: number | null;
|
||||
debug?: boolean;
|
||||
fuzzyAttributeSearch?: boolean;
|
||||
/** Force a specific search backend for testing/comparison */
|
||||
forceBackend?: "typescript" | "sqlite";
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import Database from 'better-sqlite3';
|
||||
import { SqliteFunctionsService, getSqliteFunctionsService } from './sqlite_functions.js';
|
||||
import { normalize, stripTags } from '../utils.js';
|
||||
|
||||
describe('SqliteFunctionsService', () => {
|
||||
let db: Database.Database;
|
||||
@@ -46,38 +45,6 @@ describe('SqliteFunctionsService', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('normalize_text function', () => {
|
||||
beforeEach(() => {
|
||||
service.registerFunctions(db);
|
||||
});
|
||||
|
||||
it('should normalize text correctly', () => {
|
||||
const tests = [
|
||||
['café', 'cafe'],
|
||||
['naïve', 'naive'],
|
||||
['HELLO WORLD', 'hello world'],
|
||||
['Über', 'uber'],
|
||||
['', ''],
|
||||
[null, ''],
|
||||
];
|
||||
|
||||
for (const [input, expected] of tests) {
|
||||
const result = db.prepare('SELECT normalize_text(?) as result').get(input) as { result: string };
|
||||
expect(result.result).toBe(expected);
|
||||
// Verify it matches the utils normalize function
|
||||
if (input) {
|
||||
expect(result.result).toBe(normalize(input as string));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle special characters', () => {
|
||||
const input = 'Ñoño 123 ABC!@#';
|
||||
const result = db.prepare('SELECT normalize_text(?) as result').get(input) as any;
|
||||
expect(result.result).toBe(normalize(input));
|
||||
});
|
||||
});
|
||||
|
||||
describe('edit_distance function', () => {
|
||||
beforeEach(() => {
|
||||
service.registerFunctions(db);
|
||||
@@ -143,199 +110,4 @@ describe('SqliteFunctionsService', () => {
|
||||
expect(result.match).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('tokenize_text function', () => {
|
||||
beforeEach(() => {
|
||||
service.registerFunctions(db);
|
||||
});
|
||||
|
||||
it('should tokenize text correctly', () => {
|
||||
const tests = [
|
||||
['hello world', ['hello', 'world']],
|
||||
['getUserName', ['getusername', 'get', 'user', 'name']],
|
||||
['user_name', ['user_name', 'user', 'name']],
|
||||
['hello-world', ['hello', 'world']],
|
||||
['test@example.com', ['test', 'example', 'com']],
|
||||
['', []],
|
||||
];
|
||||
|
||||
for (const [input, expected] of tests) {
|
||||
const result = db.prepare('SELECT tokenize_text(?) as tokens').get(input) as any;
|
||||
const tokens = JSON.parse(result.tokens);
|
||||
// Check that all expected tokens are present (order may vary due to Set)
|
||||
for (const token of expected) {
|
||||
expect(tokens).toContain(token);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle camelCase and snake_case', () => {
|
||||
const result = db.prepare('SELECT tokenize_text(?) as tokens').get('getUserById_async') as any;
|
||||
const tokens = JSON.parse(result.tokens);
|
||||
expect(tokens).toContain('getuserbyid_async');
|
||||
expect(tokens).toContain('getuserbyid');
|
||||
expect(tokens).toContain('async');
|
||||
expect(tokens).toContain('get');
|
||||
expect(tokens).toContain('user');
|
||||
expect(tokens).toContain('by');
|
||||
expect(tokens).toContain('id');
|
||||
});
|
||||
|
||||
it('should handle null input', () => {
|
||||
const result = db.prepare('SELECT tokenize_text(?) as tokens').get(null) as any;
|
||||
expect(result.tokens).toBe('[]');
|
||||
});
|
||||
});
|
||||
|
||||
describe('strip_html function', () => {
|
||||
beforeEach(() => {
|
||||
service.registerFunctions(db);
|
||||
});
|
||||
|
||||
it('should strip HTML tags correctly', () => {
|
||||
const tests = [
|
||||
['<p>Hello World</p>', 'Hello World'],
|
||||
['<div><span>Test</span></div>', 'Test'],
|
||||
['<script>alert("bad")</script>content', 'content'],
|
||||
['<style>body{color:red}</style>text', 'text'],
|
||||
['Hello <world>', 'Hello <world>'],
|
||||
[' Space', ' Space'],
|
||||
['', ''],
|
||||
];
|
||||
|
||||
for (const [input, expected] of tests) {
|
||||
const result = db.prepare('SELECT strip_html(?) as text').get(input) as any;
|
||||
expect(result.text).toBe(expected);
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle complex HTML', () => {
|
||||
const html = `
|
||||
<html>
|
||||
<head><title>Test</title></head>
|
||||
<body>
|
||||
<h1>Title</h1>
|
||||
<p>Paragraph with <strong>bold</strong> text.</p>
|
||||
<script>console.log("test")</script>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
const result = db.prepare('SELECT strip_html(?) as text').get(html) as any;
|
||||
expect(result.text).toContain('Title');
|
||||
expect(result.text).toContain('Paragraph with bold text');
|
||||
expect(result.text).not.toContain('console.log');
|
||||
});
|
||||
|
||||
it('should handle null input', () => {
|
||||
const result = db.prepare('SELECT strip_html(?) as text').get(null) as any;
|
||||
expect(result.text).toBe('');
|
||||
});
|
||||
});
|
||||
|
||||
describe('fuzzy_match function', () => {
|
||||
beforeEach(() => {
|
||||
service.registerFunctions(db);
|
||||
});
|
||||
|
||||
it('should perform exact matches', () => {
|
||||
const tests = [
|
||||
['hello', 'hello world', 1],
|
||||
['world', 'hello world', 1],
|
||||
['foo', 'hello world', 0],
|
||||
];
|
||||
|
||||
for (const [needle, haystack, expected] of tests) {
|
||||
const result = db.prepare('SELECT fuzzy_match(?, ?, 2) as match').get(needle, haystack) as any;
|
||||
expect(result.match).toBe(expected);
|
||||
}
|
||||
});
|
||||
|
||||
it('should perform fuzzy matches within edit distance', () => {
|
||||
const tests = [
|
||||
['helo', 'hello world', 1], // 1 edit distance
|
||||
['wrld', 'hello world', 1], // 1 edit distance
|
||||
['hallo', 'hello world', 1], // 1 edit distance
|
||||
['xyz', 'hello world', 0], // Too different
|
||||
];
|
||||
|
||||
for (const [needle, haystack, expected] of tests) {
|
||||
const result = db.prepare('SELECT fuzzy_match(?, ?, 2) as match').get(needle, haystack) as any;
|
||||
expect(result.match).toBe(expected);
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle case insensitive matching', () => {
|
||||
const result = db.prepare('SELECT fuzzy_match(?, ?, 2) as match').get('HELLO', 'hello world') as any;
|
||||
expect(result.match).toBe(1);
|
||||
});
|
||||
|
||||
it('should handle null inputs', () => {
|
||||
const result = db.prepare('SELECT fuzzy_match(?, ?, 2) as match').get(null, 'test') as any;
|
||||
expect(result.match).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Integration with SQL queries', () => {
|
||||
beforeEach(() => {
|
||||
service.registerFunctions(db);
|
||||
|
||||
// Create a test table
|
||||
db.exec(`
|
||||
CREATE TABLE test_notes (
|
||||
id INTEGER PRIMARY KEY,
|
||||
title TEXT,
|
||||
content TEXT
|
||||
)
|
||||
`);
|
||||
|
||||
// Insert test data
|
||||
const insert = db.prepare('INSERT INTO test_notes (title, content) VALUES (?, ?)');
|
||||
insert.run('Café Meeting', '<p>Discussion about naïve implementation</p>');
|
||||
insert.run('über wichtig', 'Very important note with HTML & entities');
|
||||
insert.run('getUserData', 'Function to get_user_data from database');
|
||||
});
|
||||
|
||||
it('should work in WHERE clauses with normalize_text', () => {
|
||||
const results = db.prepare(`
|
||||
SELECT title FROM test_notes
|
||||
WHERE normalize_text(title) LIKE '%cafe%'
|
||||
`).all();
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect((results[0] as any).title).toBe('Café Meeting');
|
||||
});
|
||||
|
||||
it('should work with fuzzy matching in queries', () => {
|
||||
const results = db.prepare(`
|
||||
SELECT title FROM test_notes
|
||||
WHERE fuzzy_match('getuserdata', normalize_text(title), 2) = 1
|
||||
`).all();
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect((results[0] as any).title).toBe('getUserData');
|
||||
});
|
||||
|
||||
it('should work with HTML stripping', () => {
|
||||
const results = db.prepare(`
|
||||
SELECT strip_html(content) as clean_content
|
||||
FROM test_notes
|
||||
WHERE title = 'Café Meeting'
|
||||
`).all();
|
||||
|
||||
expect((results[0] as any).clean_content).toBe('Discussion about naïve implementation');
|
||||
});
|
||||
|
||||
it('should work with tokenization', () => {
|
||||
const result = db.prepare(`
|
||||
SELECT tokenize_text(title) as tokens
|
||||
FROM test_notes
|
||||
WHERE title = 'getUserData'
|
||||
`).get() as any;
|
||||
|
||||
const tokens = JSON.parse(result.tokens);
|
||||
expect(tokens).toContain('get');
|
||||
expect(tokens).toContain('user');
|
||||
expect(tokens).toContain('data');
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,19 +1,17 @@
|
||||
/**
|
||||
* SQLite Custom Functions Service
|
||||
*
|
||||
* This service manages custom SQLite functions that enhance search capabilities.
|
||||
* This service manages custom SQLite functions for general database operations.
|
||||
* Functions are registered with better-sqlite3 to provide native-speed operations
|
||||
* directly within SQL queries, enabling efficient search indexing and querying.
|
||||
* directly within SQL queries.
|
||||
*
|
||||
* These functions are used by:
|
||||
* - Database triggers for automatic search index maintenance
|
||||
* - Direct SQL queries for search operations
|
||||
* - Migration scripts for initial data population
|
||||
* - Fuzzy search fallback (edit_distance)
|
||||
* - Regular expression matching (regex_match)
|
||||
*/
|
||||
|
||||
import type { Database } from "better-sqlite3";
|
||||
import log from "../log.js";
|
||||
import { normalize as utilsNormalize, stripTags } from "../utils.js";
|
||||
|
||||
/**
|
||||
* Configuration for fuzzy search operations
|
||||
@@ -66,14 +64,6 @@ export class SqliteFunctionsService {
|
||||
private initializeFunctions(): void {
|
||||
// Bind all methods to preserve 'this' context
|
||||
this.functions = [
|
||||
{
|
||||
name: "normalize_text",
|
||||
implementation: this.normalizeText.bind(this),
|
||||
options: {
|
||||
deterministic: true,
|
||||
varargs: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: "edit_distance",
|
||||
implementation: this.editDistance.bind(this),
|
||||
@@ -89,30 +79,6 @@ export class SqliteFunctionsService {
|
||||
deterministic: true,
|
||||
varargs: true // Changed to true to handle variable arguments
|
||||
}
|
||||
},
|
||||
{
|
||||
name: "tokenize_text",
|
||||
implementation: this.tokenizeText.bind(this),
|
||||
options: {
|
||||
deterministic: true,
|
||||
varargs: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: "strip_html",
|
||||
implementation: this.stripHtml.bind(this),
|
||||
options: {
|
||||
deterministic: true,
|
||||
varargs: false
|
||||
}
|
||||
},
|
||||
{
|
||||
name: "fuzzy_match",
|
||||
implementation: this.fuzzyMatch.bind(this),
|
||||
options: {
|
||||
deterministic: true,
|
||||
varargs: true // Changed to true to handle variable arguments
|
||||
}
|
||||
}
|
||||
];
|
||||
}
|
||||
@@ -182,22 +148,6 @@ export class SqliteFunctionsService {
|
||||
|
||||
// ===== Function Implementations =====
|
||||
|
||||
/**
|
||||
* Normalize text by removing diacritics and converting to lowercase
|
||||
* Matches the behavior of utils.normalize() exactly
|
||||
*
|
||||
* @param text Text to normalize
|
||||
* @returns Normalized text
|
||||
*/
|
||||
private normalizeText(text: string | null | undefined): string {
|
||||
if (!text || typeof text !== 'string') {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Use the exact same normalization as the rest of the codebase
|
||||
return utilsNormalize(text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate Levenshtein edit distance between two strings
|
||||
* Optimized with early termination and single-array approach
|
||||
@@ -314,186 +264,6 @@ export class SqliteFunctionsService {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenize text into searchable words
|
||||
* Handles punctuation, camelCase, and snake_case
|
||||
*
|
||||
* @param text Text to tokenize
|
||||
* @returns JSON array string of tokens
|
||||
*/
|
||||
private tokenizeText(text: string | null | undefined): string {
|
||||
if (!text || typeof text !== 'string') {
|
||||
return '[]';
|
||||
}
|
||||
|
||||
try {
|
||||
// Use a Set to avoid duplicates from the start
|
||||
const expandedTokens: Set<string> = new Set();
|
||||
|
||||
// Split on word boundaries, preserving apostrophes within words
|
||||
// But we need to handle underscore separately for snake_case
|
||||
const tokens = text
|
||||
.split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:-]+/)
|
||||
.filter(token => token.length > 0);
|
||||
|
||||
// Process each token
|
||||
for (const token of tokens) {
|
||||
// Add the original token in lowercase
|
||||
expandedTokens.add(token.toLowerCase());
|
||||
|
||||
// Handle snake_case first (split on underscore)
|
||||
const snakeParts = token.split('_').filter(part => part.length > 0);
|
||||
if (snakeParts.length > 1) {
|
||||
// We have snake_case
|
||||
for (const snakePart of snakeParts) {
|
||||
// Add each snake part
|
||||
expandedTokens.add(snakePart.toLowerCase());
|
||||
|
||||
// Also check for camelCase within each snake part
|
||||
const camelParts = this.splitCamelCase(snakePart);
|
||||
for (const camelPart of camelParts) {
|
||||
if (camelPart.length > 0) {
|
||||
expandedTokens.add(camelPart.toLowerCase());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No snake_case, just check for camelCase
|
||||
const camelParts = this.splitCamelCase(token);
|
||||
for (const camelPart of camelParts) {
|
||||
if (camelPart.length > 0) {
|
||||
expandedTokens.add(camelPart.toLowerCase());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert Set to Array for JSON serialization
|
||||
const uniqueTokens = Array.from(expandedTokens);
|
||||
|
||||
// Return as JSON array string for SQL processing
|
||||
return JSON.stringify(uniqueTokens);
|
||||
} catch (error) {
|
||||
log.error(`Error tokenizing text in SQL: ${error}`);
|
||||
return '[]';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to split camelCase strings
|
||||
* @param str String to split
|
||||
* @returns Array of parts
|
||||
*/
|
||||
private splitCamelCase(str: string): string[] {
|
||||
// Split on transitions from lowercase to uppercase
|
||||
// Also handle sequences of uppercase letters (e.g., "XMLParser" -> ["XML", "Parser"])
|
||||
return str.split(/(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])/);
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip HTML tags from content
|
||||
* Removes script and style content, then strips tags and decodes entities
|
||||
*
|
||||
* @param html HTML content
|
||||
* @returns Plain text without HTML tags
|
||||
*/
|
||||
private stripHtml(html: string | null | undefined): string {
|
||||
if (!html || typeof html !== 'string') {
|
||||
return '';
|
||||
}
|
||||
|
||||
try {
|
||||
let text = html;
|
||||
|
||||
// First remove script and style content entirely (including the tags)
|
||||
// This needs to happen before stripTags to remove the content
|
||||
text = text.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
|
||||
text = text.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
|
||||
|
||||
// Now use stripTags to remove remaining HTML tags
|
||||
text = stripTags(text);
|
||||
|
||||
// Decode common HTML entities
|
||||
text = text.replace(/</g, '<');
|
||||
text = text.replace(/>/g, '>');
|
||||
text = text.replace(/&/g, '&');
|
||||
text = text.replace(/"/g, '"');
|
||||
text = text.replace(/'/g, "'");
|
||||
text = text.replace(/'/g, "'");
|
||||
text = text.replace(/ /g, ' ');
|
||||
|
||||
// Normalize whitespace - reduce multiple spaces to single space
|
||||
// But don't trim leading/trailing space if it was from
|
||||
text = text.replace(/\s+/g, ' ');
|
||||
|
||||
return text;
|
||||
} catch (error) {
|
||||
log.error(`Error stripping HTML in SQL: ${error}`);
|
||||
return html; // Return original on error
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fuzzy match with configurable edit distance
|
||||
* Combines exact and fuzzy matching for optimal performance
|
||||
*
|
||||
* SQLite will pass 2 or 3 arguments:
|
||||
* - 2 args: needle, haystack (uses default maxDistance)
|
||||
* - 3 args: needle, haystack, maxDistance
|
||||
*
|
||||
* @returns 1 if match found, 0 otherwise
|
||||
*/
|
||||
private fuzzyMatch(...args: any[]): number {
|
||||
// Handle variable arguments from SQLite
|
||||
let needle: string | null | undefined = args[0];
|
||||
let haystack: string | null | undefined = args[1];
|
||||
let maxDistance: number = args.length > 2 ? args[2] : FUZZY_CONFIG.MAX_EDIT_DISTANCE;
|
||||
|
||||
// Validate input types
|
||||
if (!needle || !haystack) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (typeof needle !== 'string' || typeof haystack !== 'string') {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Validate and sanitize maxDistance
|
||||
if (typeof maxDistance !== 'number' || !Number.isFinite(maxDistance)) {
|
||||
maxDistance = FUZZY_CONFIG.MAX_EDIT_DISTANCE;
|
||||
} else {
|
||||
// Ensure it's a positive integer
|
||||
maxDistance = Math.max(0, Math.floor(maxDistance));
|
||||
}
|
||||
|
||||
// Normalize for comparison
|
||||
const normalizedNeedle = needle.toLowerCase();
|
||||
const normalizedHaystack = haystack.toLowerCase();
|
||||
|
||||
// Check exact match first (most common case)
|
||||
if (normalizedHaystack.includes(normalizedNeedle)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// For fuzzy matching, check individual words
|
||||
const words = normalizedHaystack.split(/\s+/).filter(w => w.length > 0);
|
||||
|
||||
for (const word of words) {
|
||||
// Skip if word length difference is too large
|
||||
if (Math.abs(word.length - normalizedNeedle.length) > maxDistance) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check edit distance - call with all 3 args since we're calling internally
|
||||
const distance = this.editDistance(normalizedNeedle, word, maxDistance);
|
||||
if (distance <= maxDistance) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance getter
|
||||
|
||||
@@ -1,153 +0,0 @@
|
||||
/**
|
||||
* Integration tests for SQLite search implementation
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
||||
import sql from "../sql.js";
|
||||
import { getSQLiteSearchService } from "./sqlite_search_service.js";
|
||||
import SearchContext from "./search_context.js";
|
||||
import NoteContentSqliteExp from "./expressions/note_content_sqlite.js";
|
||||
import NoteSet from "./note_set.js";
|
||||
import { getSqliteFunctionsService } from "./sqlite_functions.js";
|
||||
|
||||
describe("SQLite Search Integration", () => {
|
||||
let searchService: ReturnType<typeof getSQLiteSearchService>;
|
||||
let searchContext: SearchContext;
|
||||
|
||||
beforeAll(() => {
|
||||
// Initialize services
|
||||
searchService = getSQLiteSearchService();
|
||||
searchContext = new SearchContext({
|
||||
// searchBackend: "sqlite", // TODO: Add to SearchParams type
|
||||
// searchSqliteEnabled: true
|
||||
});
|
||||
|
||||
// Register SQL functions
|
||||
const functionsService = getSqliteFunctionsService();
|
||||
const db = sql.getDbConnection();
|
||||
functionsService.registerFunctions(db);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
// Cleanup if needed
|
||||
});
|
||||
|
||||
describe("Service Initialization", () => {
|
||||
it("should initialize SQLite search service", () => {
|
||||
expect(searchService).toBeDefined();
|
||||
const stats = searchService.getStatistics();
|
||||
expect(stats).toBeDefined();
|
||||
expect(stats).toHaveProperty("tablesInitialized");
|
||||
});
|
||||
|
||||
it("should have registered SQL functions", () => {
|
||||
const functionsService = getSqliteFunctionsService();
|
||||
expect(functionsService.isRegistered()).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Expression Creation", () => {
|
||||
it("should create SQLite expression when available", () => {
|
||||
const exp = NoteContentSqliteExp.createExpression("*=*", {
|
||||
tokens: ["test"],
|
||||
raw: false,
|
||||
flatText: false
|
||||
});
|
||||
|
||||
expect(exp).toBeDefined();
|
||||
// Check if it's the SQLite version or fallback
|
||||
if (NoteContentSqliteExp.isAvailable()) {
|
||||
expect(exp).toBeInstanceOf(NoteContentSqliteExp);
|
||||
}
|
||||
});
|
||||
|
||||
it("should handle different operators", () => {
|
||||
const operators = ["=", "!=", "*=*", "*=", "=*", "%=", "~="];
|
||||
|
||||
for (const op of operators) {
|
||||
const exp = new NoteContentSqliteExp(op, {
|
||||
tokens: ["test"],
|
||||
raw: false,
|
||||
flatText: false
|
||||
});
|
||||
|
||||
expect(exp).toBeDefined();
|
||||
expect(exp.tokens).toEqual(["test"]);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("Search Execution", () => {
|
||||
it("should execute search with empty input set", () => {
|
||||
const exp = new NoteContentSqliteExp("*=*", {
|
||||
tokens: ["test"],
|
||||
raw: false,
|
||||
flatText: false
|
||||
});
|
||||
|
||||
const inputSet = new NoteSet();
|
||||
const resultSet = exp.execute(inputSet, {}, searchContext);
|
||||
|
||||
expect(resultSet).toBeDefined();
|
||||
expect(resultSet).toBeInstanceOf(NoteSet);
|
||||
});
|
||||
|
||||
it("should handle search errors gracefully", () => {
|
||||
const exp = new NoteContentSqliteExp("invalid_op", {
|
||||
tokens: ["test"],
|
||||
raw: false,
|
||||
flatText: false
|
||||
});
|
||||
|
||||
const inputSet = new NoteSet();
|
||||
const resultSet = exp.execute(inputSet, {}, searchContext);
|
||||
|
||||
expect(resultSet).toBeDefined();
|
||||
expect(searchContext.hasError()).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Backend Selection", () => {
|
||||
it("should use SQLite backend when enabled", () => {
|
||||
const ctx = new SearchContext({
|
||||
forceBackend: "sqlite"
|
||||
});
|
||||
|
||||
expect(ctx.searchBackend).toBe("sqlite");
|
||||
});
|
||||
|
||||
it("should use TypeScript backend when forced", () => {
|
||||
const ctx = new SearchContext({
|
||||
forceBackend: "typescript"
|
||||
});
|
||||
|
||||
expect(ctx.searchBackend).toBe("typescript");
|
||||
});
|
||||
|
||||
it("should default to SQLite when no preference", () => {
|
||||
const ctx = new SearchContext({});
|
||||
|
||||
// Should default to SQLite for better performance
|
||||
expect(["sqlite", "typescript"]).toContain(ctx.searchBackend);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Performance Statistics", () => {
|
||||
it("should track search statistics", () => {
|
||||
const initialStats = searchService.getStatistics();
|
||||
const initialSearches = initialStats.totalSearches || 0;
|
||||
|
||||
// Execute a search
|
||||
searchService.search(
|
||||
["test"],
|
||||
"*=*",
|
||||
searchContext,
|
||||
{}
|
||||
);
|
||||
|
||||
const newStats = searchService.getStatistics();
|
||||
expect(newStats.totalSearches).toBeGreaterThan(initialSearches);
|
||||
expect(newStats.lastSearchTimeMs).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,320 +0,0 @@
|
||||
/**
|
||||
* Tests for SQLite Search Service
|
||||
*
|
||||
* These tests verify that the SQLite-based search implementation
|
||||
* correctly handles all search operators and provides accurate results.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll, beforeEach } from "vitest";
|
||||
import { SQLiteSearchService } from "./sqlite_search_service.js";
|
||||
import sql from "../sql.js";
|
||||
import SearchContext from "./search_context.js";
|
||||
import { initializeSqliteFunctions } from "./sqlite_functions.js";
|
||||
|
||||
describe("SQLiteSearchService", () => {
|
||||
let searchService: SQLiteSearchService;
|
||||
let searchContext: SearchContext;
|
||||
|
||||
beforeAll(() => {
|
||||
// Initialize SQLite functions for tests
|
||||
const db = sql.getDbConnection();
|
||||
if (db) {
|
||||
initializeSqliteFunctions(db);
|
||||
}
|
||||
|
||||
// Get search service instance
|
||||
searchService = SQLiteSearchService.getInstance();
|
||||
|
||||
// Create test tables if they don't exist
|
||||
sql.execute(`
|
||||
CREATE TABLE IF NOT EXISTS note_search_content (
|
||||
noteId TEXT PRIMARY KEY,
|
||||
noteContent TEXT,
|
||||
normalized_content TEXT,
|
||||
normalized_title TEXT,
|
||||
isProtected INTEGER DEFAULT 0,
|
||||
isDeleted INTEGER DEFAULT 0
|
||||
)
|
||||
`);
|
||||
|
||||
sql.execute(`
|
||||
CREATE TABLE IF NOT EXISTS note_tokens (
|
||||
noteId TEXT PRIMARY KEY,
|
||||
tokens TEXT
|
||||
)
|
||||
`);
|
||||
|
||||
sql.execute(`
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS note_fts USING fts5(
|
||||
noteId UNINDEXED,
|
||||
title,
|
||||
content,
|
||||
tokenize = 'unicode61'
|
||||
)
|
||||
`);
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
// Clear test data
|
||||
sql.execute(`DELETE FROM note_search_content`);
|
||||
sql.execute(`DELETE FROM note_tokens`);
|
||||
sql.execute(`DELETE FROM note_fts`);
|
||||
|
||||
// Create fresh search context
|
||||
searchContext = new SearchContext();
|
||||
|
||||
// Insert test data
|
||||
insertTestNote("note1", "Hello World", "This is a test note with hello world content.");
|
||||
insertTestNote("note2", "Programming", "JavaScript and TypeScript programming languages.");
|
||||
insertTestNote("note3", "Fuzzy Search", "Testing fuzzy matching with similar words like helo and wrold.");
|
||||
insertTestNote("note4", "Special Characters", "Testing with special@email.com and user_name variables.");
|
||||
insertTestNote("note5", "CamelCase", "getUserName and setUserEmail functions in JavaScript.");
|
||||
});
|
||||
|
||||
function insertTestNote(noteId: string, title: string, content: string) {
|
||||
// Insert into search content table
|
||||
sql.execute(`
|
||||
INSERT INTO note_search_content (noteId, noteContent, normalized_content, normalized_title, isProtected, isDeleted)
|
||||
VALUES (?, ?, LOWER(?), LOWER(?), 0, 0)
|
||||
`, [noteId, content, content, title]);
|
||||
|
||||
// Generate tokens
|
||||
const tokens = tokenize(content + " " + title);
|
||||
sql.execute(`
|
||||
INSERT INTO note_tokens (noteId, tokens)
|
||||
VALUES (?, ?)
|
||||
`, [noteId, JSON.stringify(tokens)]);
|
||||
|
||||
// Insert into FTS5 table
|
||||
sql.execute(`
|
||||
INSERT INTO note_fts (noteId, title, content)
|
||||
VALUES (?, ?, ?)
|
||||
`, [noteId, title, content]);
|
||||
}
|
||||
|
||||
function tokenize(text: string): string[] {
|
||||
return text.toLowerCase()
|
||||
.split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:_-]+/)
|
||||
.filter(token => token.length > 0);
|
||||
}
|
||||
|
||||
describe("Substring Search (*=*)", () => {
|
||||
it("should find notes containing substring", () => {
|
||||
const results = searchService.search(["hello"], "*=*", searchContext);
|
||||
expect(results).toContain("note1");
|
||||
expect(results.size).toBe(1);
|
||||
});
|
||||
|
||||
it("should find notes with multiple tokens", () => {
|
||||
const results = searchService.search(["java", "script"], "*=*", searchContext);
|
||||
expect(results).toContain("note2");
|
||||
expect(results).toContain("note5");
|
||||
expect(results.size).toBe(2);
|
||||
});
|
||||
|
||||
it("should be case insensitive", () => {
|
||||
const results = searchService.search(["HELLO"], "*=*", searchContext);
|
||||
expect(results).toContain("note1");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Fuzzy Search (~=)", () => {
|
||||
it("should find notes with fuzzy matching", () => {
|
||||
const results = searchService.search(["helo"], "~=", searchContext);
|
||||
expect(results).toContain("note3"); // Contains "helo"
|
||||
expect(results).toContain("note1"); // Contains "hello" (1 edit distance)
|
||||
});
|
||||
|
||||
it("should respect edit distance threshold", () => {
|
||||
const results = searchService.search(["xyz"], "~=", searchContext);
|
||||
expect(results.size).toBe(0); // Too different from any content
|
||||
});
|
||||
|
||||
it("should handle multiple fuzzy tokens", () => {
|
||||
const results = searchService.search(["fuzzy", "match"], "~=", searchContext);
|
||||
expect(results).toContain("note3");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Prefix Search (=*)", () => {
|
||||
it("should find notes starting with prefix", () => {
|
||||
const results = searchService.search(["test"], "=*", searchContext);
|
||||
expect(results).toContain("note3"); // "Testing fuzzy..."
|
||||
expect(results).toContain("note4"); // "Testing with..."
|
||||
expect(results.size).toBe(2);
|
||||
});
|
||||
|
||||
it("should handle multiple prefixes", () => {
|
||||
const results = searchService.search(["java", "type"], "=*", searchContext);
|
||||
expect(results).toContain("note2"); // Has both "JavaScript" and "TypeScript"
|
||||
});
|
||||
});
|
||||
|
||||
describe("Suffix Search (*=)", () => {
|
||||
it("should find notes ending with suffix", () => {
|
||||
const results = searchService.search(["script"], "*=", searchContext);
|
||||
expect(results).toContain("note2"); // "JavaScript" and "TypeScript"
|
||||
expect(results).toContain("note5"); // "JavaScript"
|
||||
});
|
||||
|
||||
it("should handle special suffixes", () => {
|
||||
const results = searchService.search([".com"], "*=", searchContext);
|
||||
expect(results).toContain("note4"); // "special@email.com"
|
||||
});
|
||||
});
|
||||
|
||||
describe("Regex Search (%=)", () => {
|
||||
it("should find notes matching regex pattern", () => {
|
||||
const results = searchService.search(["\\w+@\\w+\\.com"], "%=", searchContext);
|
||||
expect(results).toContain("note4"); // Contains email pattern
|
||||
});
|
||||
|
||||
it("should handle complex patterns", () => {
|
||||
const results = searchService.search(["get\\w+Name"], "%=", searchContext);
|
||||
expect(results).toContain("note5"); // "getUserName"
|
||||
});
|
||||
|
||||
it("should handle invalid regex gracefully", () => {
|
||||
const results = searchService.search(["[invalid"], "%=", searchContext);
|
||||
expect(results.size).toBe(0); // Should return empty on invalid regex
|
||||
});
|
||||
});
|
||||
|
||||
describe("Exact Word Search (=)", () => {
|
||||
it("should find notes with exact word match", () => {
|
||||
const results = searchService.search(["hello"], "=", searchContext);
|
||||
expect(results).toContain("note1");
|
||||
expect(results.size).toBe(1);
|
||||
});
|
||||
|
||||
it("should not match partial words", () => {
|
||||
const results = searchService.search(["java"], "=", searchContext);
|
||||
expect(results.size).toBe(0); // "JavaScript" contains "java" but not as whole word
|
||||
});
|
||||
|
||||
it("should find multiple exact words", () => {
|
||||
const results = searchService.search(["fuzzy", "matching"], "=", searchContext);
|
||||
expect(results).toContain("note3");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Not Equals Search (!=)", () => {
|
||||
it("should find notes not containing exact word", () => {
|
||||
const results = searchService.search(["hello"], "!=", searchContext);
|
||||
expect(results).not.toContain("note1");
|
||||
expect(results.size).toBe(4); // All except note1
|
||||
});
|
||||
|
||||
it("should handle multiple tokens", () => {
|
||||
const results = searchService.search(["fuzzy", "matching"], "!=", searchContext);
|
||||
expect(results).not.toContain("note3");
|
||||
expect(results.size).toBe(4); // All except note3
|
||||
});
|
||||
});
|
||||
|
||||
describe("Search Options", () => {
|
||||
it("should respect limit option", () => {
|
||||
const results = searchService.search(["test"], "*=*", searchContext, { limit: 1 });
|
||||
expect(results.size).toBeLessThanOrEqual(1);
|
||||
});
|
||||
|
||||
it("should filter by noteId set", () => {
|
||||
const noteIdFilter = new Set(["note1", "note3"]);
|
||||
const results = searchService.search(["test"], "*=*", searchContext, { noteIdFilter });
|
||||
|
||||
for (const noteId of results) {
|
||||
expect(noteIdFilter).toContain(noteId);
|
||||
}
|
||||
});
|
||||
|
||||
it("should exclude deleted notes by default", () => {
|
||||
// Mark note1 as deleted
|
||||
sql.execute(`UPDATE note_search_content SET isDeleted = 1 WHERE noteId = 'note1'`);
|
||||
|
||||
const results = searchService.search(["hello"], "*=*", searchContext);
|
||||
expect(results).not.toContain("note1");
|
||||
});
|
||||
|
||||
it("should include deleted notes when specified", () => {
|
||||
// Mark note1 as deleted
|
||||
sql.execute(`UPDATE note_search_content SET isDeleted = 1 WHERE noteId = 'note1'`);
|
||||
|
||||
const results = searchService.search(["hello"], "*=*", searchContext, { includeDeleted: true });
|
||||
expect(results).toContain("note1");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Complex Queries", () => {
|
||||
it("should combine multiple searches with AND", () => {
|
||||
const queries = [
|
||||
{ tokens: ["java"], operator: "*=*" },
|
||||
{ tokens: ["script"], operator: "*=*" }
|
||||
];
|
||||
|
||||
const results = searchService.searchMultiple(queries, "AND", searchContext);
|
||||
expect(results).toContain("note2");
|
||||
expect(results).toContain("note5");
|
||||
});
|
||||
|
||||
it("should combine multiple searches with OR", () => {
|
||||
const queries = [
|
||||
{ tokens: ["hello"], operator: "*=*" },
|
||||
{ tokens: ["fuzzy"], operator: "*=*" }
|
||||
];
|
||||
|
||||
const results = searchService.searchMultiple(queries, "OR", searchContext);
|
||||
expect(results).toContain("note1");
|
||||
expect(results).toContain("note3");
|
||||
expect(results.size).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Performance", () => {
|
||||
beforeEach(() => {
|
||||
// Add more test data for performance testing
|
||||
for (let i = 10; i < 1000; i++) {
|
||||
insertTestNote(
|
||||
`note${i}`,
|
||||
`Title ${i}`,
|
||||
`This is note number ${i} with some random content for testing performance.`
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it("should handle large result sets efficiently", () => {
|
||||
const startTime = Date.now();
|
||||
const results = searchService.search(["note"], "*=*", searchContext);
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
expect(results.size).toBeGreaterThan(100);
|
||||
expect(elapsed).toBeLessThan(1000); // Should complete within 1 second
|
||||
});
|
||||
|
||||
it("should use limit to restrict results", () => {
|
||||
const startTime = Date.now();
|
||||
const results = searchService.search(["note"], "*=*", searchContext, { limit: 10 });
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
expect(results.size).toBeLessThanOrEqual(10);
|
||||
expect(elapsed).toBeLessThan(100); // Should be very fast with limit
|
||||
});
|
||||
});
|
||||
|
||||
describe("Statistics", () => {
|
||||
it("should return correct statistics", () => {
|
||||
const stats = searchService.getStatistics();
|
||||
|
||||
expect(stats.tablesInitialized).toBe(true);
|
||||
expect(stats.indexedNotes).toBe(5);
|
||||
expect(stats.totalTokens).toBe(5);
|
||||
expect(stats.fts5Available).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
// Clean up test data
|
||||
sql.execute(`DELETE FROM note_search_content`);
|
||||
sql.execute(`DELETE FROM note_tokens`);
|
||||
sql.execute(`DELETE FROM note_fts`);
|
||||
});
|
||||
});
|
||||
@@ -1,943 +0,0 @@
|
||||
/**
|
||||
* SQLite Search Service
|
||||
*
|
||||
* This service provides high-performance search operations using pure SQLite queries.
|
||||
* It implements all search operators with 100% accuracy and 10-30x performance improvement
|
||||
* over the TypeScript-based implementation.
|
||||
*
|
||||
* Operators supported:
|
||||
* - *=* (substring): Uses LIKE on normalized content
|
||||
* - ~= (fuzzy): Uses edit_distance function with tokens
|
||||
* - =* (prefix): Uses LIKE with prefix pattern
|
||||
* - *= (suffix): Uses LIKE with suffix pattern
|
||||
* - %= (regex): Uses regex_match function
|
||||
* - = (exact word): Uses FTS5 table
|
||||
* - != (not equals): Inverse of equals
|
||||
*
|
||||
* Performance characteristics:
|
||||
* - Substring search: O(n) with optimized LIKE
|
||||
* - Fuzzy search: O(n*m) where m is token count
|
||||
* - Prefix/suffix: O(n) with optimized LIKE
|
||||
* - Regex: O(n) with native regex support
|
||||
* - Exact word: O(log n) with FTS5 index
|
||||
*/
|
||||
|
||||
import sql from "../sql.js";
|
||||
import log from "../log.js";
|
||||
import type SearchContext from "./search_context.js";
|
||||
import protectedSessionService from "../protected_session.js";
|
||||
import { normalize } from "../utils.js";
|
||||
|
||||
/**
|
||||
* Configuration for search operations
|
||||
*/
|
||||
const SEARCH_CONFIG = {
|
||||
MAX_EDIT_DISTANCE: 2,
|
||||
MIN_TOKEN_LENGTH: 3,
|
||||
MAX_RESULTS: 10000,
|
||||
BATCH_SIZE: 1000,
|
||||
LOG_PERFORMANCE: true,
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Interface for search results
|
||||
*/
|
||||
export interface SearchResult {
|
||||
noteId: string;
|
||||
score?: number;
|
||||
snippet?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface for search options
|
||||
*/
|
||||
export interface SearchOptions {
|
||||
includeProtected?: boolean;
|
||||
includeDeleted?: boolean;
|
||||
noteIdFilter?: Set<string>;
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* SQLite-based search service for high-performance note searching
|
||||
*/
|
||||
export class SQLiteSearchService {
|
||||
private static instance: SQLiteSearchService | null = null;
|
||||
private isInitialized: boolean = false;
|
||||
private statistics = {
|
||||
tablesInitialized: false,
|
||||
totalSearches: 0,
|
||||
totalTimeMs: 0,
|
||||
averageTimeMs: 0,
|
||||
lastSearchTimeMs: 0
|
||||
};
|
||||
|
||||
private constructor() {
|
||||
this.checkAndInitialize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get singleton instance of the search service
|
||||
*/
|
||||
static getInstance(): SQLiteSearchService {
|
||||
if (!SQLiteSearchService.instance) {
|
||||
SQLiteSearchService.instance = new SQLiteSearchService();
|
||||
}
|
||||
return SQLiteSearchService.instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if search tables are initialized and create them if needed
|
||||
*/
|
||||
private checkAndInitialize(): void {
|
||||
try {
|
||||
// Check if tables exist
|
||||
const tableExists = sql.getValue(`
|
||||
SELECT name FROM sqlite_master
|
||||
WHERE type='table' AND name='note_search_content'
|
||||
`);
|
||||
|
||||
if (!tableExists) {
|
||||
log.info("Search tables not found. They will be created by migration.");
|
||||
this.isInitialized = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Verify table structure
|
||||
const columnCount = sql.getValue<number>(`
|
||||
SELECT COUNT(*) FROM pragma_table_info('note_search_content')
|
||||
`) || 0;
|
||||
|
||||
if (columnCount > 0) {
|
||||
this.isInitialized = true;
|
||||
this.statistics.tablesInitialized = true;
|
||||
log.info("SQLite search service initialized successfully");
|
||||
}
|
||||
} catch (error) {
|
||||
log.error(`Failed to initialize SQLite search service: ${error}`);
|
||||
this.isInitialized = false;
|
||||
this.statistics.tablesInitialized = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main search method that delegates to appropriate operator implementation
|
||||
*/
|
||||
search(
|
||||
tokens: string[],
|
||||
operator: string,
|
||||
searchContext: SearchContext,
|
||||
options: SearchOptions = {}
|
||||
): Set<string> {
|
||||
if (!this.isInitialized) {
|
||||
log.info("SQLite search service not initialized, falling back to traditional search");
|
||||
return new Set();
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
let results: Set<string>;
|
||||
|
||||
try {
|
||||
// Normalize tokens for consistent searching
|
||||
const normalizedTokens = tokens.map(token => normalize(token).toLowerCase());
|
||||
|
||||
// Delegate to appropriate search method based on operator
|
||||
switch (operator) {
|
||||
case "*=*":
|
||||
results = this.searchSubstring(normalizedTokens, options);
|
||||
break;
|
||||
case "~=":
|
||||
results = this.searchFuzzy(normalizedTokens, options);
|
||||
break;
|
||||
case "=*":
|
||||
results = this.searchPrefix(normalizedTokens, options);
|
||||
break;
|
||||
case "*=":
|
||||
results = this.searchSuffix(normalizedTokens, options);
|
||||
break;
|
||||
case "%=":
|
||||
results = this.searchRegex(tokens, options); // Use original tokens for regex
|
||||
break;
|
||||
case "=":
|
||||
results = this.searchExactWord(normalizedTokens, options);
|
||||
break;
|
||||
case "!=":
|
||||
results = this.searchNotEquals(normalizedTokens, options);
|
||||
break;
|
||||
default:
|
||||
log.info(`Unsupported search operator: ${operator}`);
|
||||
return new Set();
|
||||
}
|
||||
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
// Update statistics
|
||||
this.statistics.totalSearches++;
|
||||
this.statistics.totalTimeMs += elapsed;
|
||||
this.statistics.lastSearchTimeMs = elapsed;
|
||||
this.statistics.averageTimeMs = this.statistics.totalTimeMs / this.statistics.totalSearches;
|
||||
|
||||
if (SEARCH_CONFIG.LOG_PERFORMANCE) {
|
||||
log.info(`SQLite search completed: operator=${operator}, tokens=${tokens.join(" ")}, ` +
|
||||
`results=${results.size}, time=${elapsed}ms`);
|
||||
}
|
||||
|
||||
return results;
|
||||
} catch (error) {
|
||||
log.error(`SQLite search failed: ${error}`);
|
||||
searchContext.addError(`Search failed: ${error}`);
|
||||
return new Set();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Substring search using LIKE on normalized content
|
||||
* Operator: *=*
|
||||
*/
|
||||
private searchSubstring(tokens: string[], options: SearchOptions): Set<string> {
|
||||
const results = new Set<string>();
|
||||
|
||||
// Build WHERE clause for all tokens
|
||||
const conditions = tokens.map(() =>
|
||||
`nsc.full_text_normalized LIKE '%' || ? || '%'`
|
||||
).join(' AND ');
|
||||
|
||||
// Build base query - JOIN with notes table for isDeleted/isProtected filtering
|
||||
let query = `
|
||||
SELECT DISTINCT nsc.noteId
|
||||
FROM note_search_content nsc
|
||||
JOIN notes n ON nsc.noteId = n.noteId
|
||||
WHERE ${conditions}
|
||||
`;
|
||||
|
||||
const params = [...tokens];
|
||||
|
||||
// Add filters using the notes table columns
|
||||
if (!options.includeDeleted) {
|
||||
query += ` AND n.isDeleted = 0`;
|
||||
}
|
||||
|
||||
if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) {
|
||||
query += ` AND n.isProtected = 0`;
|
||||
}
|
||||
|
||||
// Add limit if specified
|
||||
if (options.limit) {
|
||||
query += ` LIMIT ${options.limit}`;
|
||||
}
|
||||
|
||||
// Execute query
|
||||
for (const row of sql.iterateRows<{ noteId: string }>(query, params)) {
|
||||
// Apply noteId filter if provided
|
||||
if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) {
|
||||
results.add(row.noteId);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fuzzy search using edit distance on tokens
|
||||
* Operator: ~=
|
||||
*/
|
||||
private searchFuzzy(tokens: string[], options: SearchOptions): Set<string> {
|
||||
const results = new Set<string>();
|
||||
|
||||
// For fuzzy search, we need to check tokens individually
|
||||
// First, get all note IDs that might match
|
||||
let query = `
|
||||
SELECT DISTINCT nsc.noteId, nsc.full_text_normalized
|
||||
FROM note_search_content nsc
|
||||
JOIN notes n ON nsc.noteId = n.noteId
|
||||
WHERE 1=1
|
||||
`;
|
||||
|
||||
if (!options.includeDeleted) {
|
||||
query += ` AND n.isDeleted = 0`;
|
||||
}
|
||||
|
||||
if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) {
|
||||
query += ` AND n.isProtected = 0`;
|
||||
}
|
||||
|
||||
// Process in batches for better performance
|
||||
const noteData = new Map<string, string>();
|
||||
|
||||
for (const row of sql.iterateRows<{ noteId: string, full_text_normalized: string }>(query)) {
|
||||
if (options.noteIdFilter && !options.noteIdFilter.has(row.noteId)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
noteData.set(row.noteId, row.full_text_normalized || '');
|
||||
}
|
||||
|
||||
// Get tokens for fuzzy matching
|
||||
const tokenQuery = `
|
||||
SELECT DISTINCT noteId, token_normalized
|
||||
FROM note_tokens
|
||||
WHERE noteId IN (${Array.from(noteData.keys()).map(() => '?').join(',')})
|
||||
`;
|
||||
|
||||
const noteTokens = new Map<string, Set<string>>();
|
||||
if (noteData.size > 0) {
|
||||
for (const row of sql.iterateRows<{ noteId: string, token_normalized: string }>(
|
||||
tokenQuery, Array.from(noteData.keys())
|
||||
)) {
|
||||
if (!noteTokens.has(row.noteId)) {
|
||||
noteTokens.set(row.noteId, new Set());
|
||||
}
|
||||
noteTokens.get(row.noteId)!.add(row.token_normalized);
|
||||
}
|
||||
}
|
||||
|
||||
// Now check each note for fuzzy matches
|
||||
for (const [noteId, content] of noteData) {
|
||||
let allTokensMatch = true;
|
||||
const noteTokenSet = noteTokens.get(noteId) || new Set();
|
||||
|
||||
for (const searchToken of tokens) {
|
||||
let tokenMatches = false;
|
||||
|
||||
// Check if token matches any word in the note
|
||||
// First check exact match in content
|
||||
if (content.includes(searchToken)) {
|
||||
tokenMatches = true;
|
||||
} else {
|
||||
// Check fuzzy match against tokens
|
||||
for (const noteToken of noteTokenSet) {
|
||||
if (this.fuzzyMatchTokens(searchToken, noteToken)) {
|
||||
tokenMatches = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!tokenMatches) {
|
||||
allTokensMatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (allTokensMatch) {
|
||||
results.add(noteId);
|
||||
|
||||
if (options.limit && results.size >= options.limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method for fuzzy matching between two tokens
|
||||
*/
|
||||
private fuzzyMatchTokens(token1: string, token2: string): boolean {
|
||||
// Quick exact match check
|
||||
if (token1 === token2) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Don't fuzzy match very short tokens
|
||||
if (token1.length < SEARCH_CONFIG.MIN_TOKEN_LENGTH ||
|
||||
token2.length < SEARCH_CONFIG.MIN_TOKEN_LENGTH) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if length difference is within edit distance threshold
|
||||
if (Math.abs(token1.length - token2.length) > SEARCH_CONFIG.MAX_EDIT_DISTANCE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Use SQL function for edit distance calculation
|
||||
const distance = sql.getValue<number>(`
|
||||
SELECT edit_distance(?, ?, ?)
|
||||
`, [token1, token2, SEARCH_CONFIG.MAX_EDIT_DISTANCE]);
|
||||
|
||||
return distance <= SEARCH_CONFIG.MAX_EDIT_DISTANCE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prefix search using LIKE with prefix pattern
|
||||
* Operator: =*
|
||||
*/
|
||||
private searchPrefix(tokens: string[], options: SearchOptions): Set<string> {
|
||||
const results = new Set<string>();
|
||||
|
||||
// Build WHERE clause for all tokens
|
||||
const conditions = tokens.map(() =>
|
||||
`nsc.full_text_normalized LIKE ? || '%'`
|
||||
).join(' AND ');
|
||||
|
||||
// Build query - JOIN with notes table for isDeleted/isProtected filtering
|
||||
let query = `
|
||||
SELECT DISTINCT nsc.noteId
|
||||
FROM note_search_content nsc
|
||||
JOIN notes n ON nsc.noteId = n.noteId
|
||||
WHERE ${conditions}
|
||||
`;
|
||||
|
||||
const params = [...tokens];
|
||||
|
||||
// Add filters using the notes table columns
|
||||
if (!options.includeDeleted) {
|
||||
query += ` AND n.isDeleted = 0`;
|
||||
}
|
||||
|
||||
if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) {
|
||||
query += ` AND n.isProtected = 0`;
|
||||
}
|
||||
|
||||
// Add limit if specified
|
||||
if (options.limit) {
|
||||
query += ` LIMIT ${options.limit}`;
|
||||
}
|
||||
|
||||
// Execute query
|
||||
for (const row of sql.iterateRows<{ noteId: string }>(query, params)) {
|
||||
if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) {
|
||||
results.add(row.noteId);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Suffix search using LIKE with suffix pattern
|
||||
* Operator: *=
|
||||
*/
|
||||
private searchSuffix(tokens: string[], options: SearchOptions): Set<string> {
|
||||
const results = new Set<string>();
|
||||
|
||||
// Build WHERE clause for all tokens
|
||||
const conditions = tokens.map(() =>
|
||||
`nsc.full_text_normalized LIKE '%' || ?`
|
||||
).join(' AND ');
|
||||
|
||||
// Build query - JOIN with notes table for isDeleted/isProtected filtering
|
||||
let query = `
|
||||
SELECT DISTINCT nsc.noteId
|
||||
FROM note_search_content nsc
|
||||
JOIN notes n ON nsc.noteId = n.noteId
|
||||
WHERE ${conditions}
|
||||
`;
|
||||
|
||||
const params = [...tokens];
|
||||
|
||||
// Add filters using the notes table columns
|
||||
if (!options.includeDeleted) {
|
||||
query += ` AND n.isDeleted = 0`;
|
||||
}
|
||||
|
||||
if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) {
|
||||
query += ` AND n.isProtected = 0`;
|
||||
}
|
||||
|
||||
// Add limit if specified
|
||||
if (options.limit) {
|
||||
query += ` LIMIT ${options.limit}`;
|
||||
}
|
||||
|
||||
// Execute query
|
||||
for (const row of sql.iterateRows<{ noteId: string }>(query, params)) {
|
||||
if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) {
|
||||
results.add(row.noteId);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Regex search using regex_match function
|
||||
* Operator: %=
|
||||
*/
|
||||
private searchRegex(patterns: string[], options: SearchOptions): Set<string> {
|
||||
const results = new Set<string>();
|
||||
|
||||
// For regex, we use the combined title+content (not normalized)
|
||||
// Build WHERE clause for all patterns
|
||||
const conditions = patterns.map(() =>
|
||||
`regex_match(nsc.title || ' ' || nsc.content, ?, 'ims') = 1`
|
||||
).join(' AND ');
|
||||
|
||||
// Build query - JOIN with notes table for isDeleted/isProtected filtering
|
||||
let query = `
|
||||
SELECT DISTINCT nsc.noteId
|
||||
FROM note_search_content nsc
|
||||
JOIN notes n ON nsc.noteId = n.noteId
|
||||
WHERE ${conditions}
|
||||
`;
|
||||
|
||||
const params = [...patterns];
|
||||
|
||||
// Add filters using the notes table columns
|
||||
if (!options.includeDeleted) {
|
||||
query += ` AND n.isDeleted = 0`;
|
||||
}
|
||||
|
||||
if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) {
|
||||
query += ` AND n.isProtected = 0`;
|
||||
}
|
||||
|
||||
// Add limit if specified
|
||||
if (options.limit) {
|
||||
query += ` LIMIT ${options.limit}`;
|
||||
}
|
||||
|
||||
// Execute query
|
||||
try {
|
||||
for (const row of sql.iterateRows<{ noteId: string }>(query, params)) {
|
||||
if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) {
|
||||
results.add(row.noteId);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
log.error(`Regex search failed: ${error}`);
|
||||
// Return empty set on regex error
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Exact word search using FTS5 or token matching
|
||||
* Operator: =
|
||||
*/
|
||||
private searchExactWord(tokens: string[], options: SearchOptions): Set<string> {
|
||||
const results = new Set<string>();
|
||||
|
||||
// Try FTS5 first if available
|
||||
const fts5Available = this.checkFTS5Availability();
|
||||
|
||||
if (fts5Available) {
|
||||
try {
|
||||
// Build FTS5 query
|
||||
const ftsQuery = tokens.map(t => `"${t}"`).join(' ');
|
||||
|
||||
// FTS5 doesn't have isDeleted or isProtected columns,
|
||||
// so we need to join with notes table for filtering
|
||||
let query = `
|
||||
SELECT DISTINCT f.noteId
|
||||
FROM notes_fts f
|
||||
JOIN notes n ON f.noteId = n.noteId
|
||||
WHERE f.notes_fts MATCH ?
|
||||
`;
|
||||
|
||||
const params = [ftsQuery];
|
||||
|
||||
// Add filters using the notes table columns
|
||||
if (!options.includeDeleted) {
|
||||
query += ` AND n.isDeleted = 0`;
|
||||
}
|
||||
|
||||
if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) {
|
||||
query += ` AND n.isProtected = 0`;
|
||||
}
|
||||
|
||||
// Add limit if specified
|
||||
if (options.limit) {
|
||||
query += ` LIMIT ${options.limit}`;
|
||||
}
|
||||
|
||||
for (const row of sql.iterateRows<{ noteId: string }>(query, params)) {
|
||||
if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) {
|
||||
results.add(row.noteId);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
} catch (error) {
|
||||
log.info(`FTS5 search failed, falling back to token search: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to token-based exact match
|
||||
// Build query to check if all tokens exist as whole words
|
||||
let query = `
|
||||
SELECT DISTINCT nt.noteId, nt.token_normalized
|
||||
FROM note_tokens nt
|
||||
JOIN notes n ON nt.noteId = n.noteId
|
||||
WHERE 1=1
|
||||
`;
|
||||
|
||||
if (!options.includeDeleted) {
|
||||
query += ` AND n.isDeleted = 0`;
|
||||
}
|
||||
|
||||
if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) {
|
||||
query += ` AND n.isProtected = 0`;
|
||||
}
|
||||
|
||||
// Get all matching notes and their tokens
|
||||
const candidateNotes = new Map<string, Set<string>>();
|
||||
|
||||
for (const row of sql.iterateRows<{ noteId: string, token_normalized: string }>(query)) {
|
||||
if (options.noteIdFilter && !options.noteIdFilter.has(row.noteId)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!candidateNotes.has(row.noteId)) {
|
||||
candidateNotes.set(row.noteId, new Set());
|
||||
}
|
||||
candidateNotes.get(row.noteId)!.add(row.token_normalized);
|
||||
}
|
||||
|
||||
// Check each candidate for exact token matches
|
||||
for (const [noteId, noteTokenSet] of candidateNotes) {
|
||||
const allTokensFound = tokens.every(token => noteTokenSet.has(token));
|
||||
|
||||
if (allTokensFound) {
|
||||
results.add(noteId);
|
||||
|
||||
if (options.limit && results.size >= options.limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Not equals search - inverse of exact word search
|
||||
* Operator: !=
|
||||
*/
|
||||
private searchNotEquals(tokens: string[], options: SearchOptions): Set<string> {
|
||||
// Get all notes that DON'T match the exact word search
|
||||
const matchingNotes = this.searchExactWord(tokens, options);
|
||||
|
||||
// Get all notes - JOIN with notes table for isDeleted/isProtected filtering
|
||||
let query = `
|
||||
SELECT DISTINCT nsc.noteId
|
||||
FROM note_search_content nsc
|
||||
JOIN notes n ON nsc.noteId = n.noteId
|
||||
WHERE 1=1
|
||||
`;
|
||||
|
||||
if (!options.includeDeleted) {
|
||||
query += ` AND n.isDeleted = 0`;
|
||||
}
|
||||
|
||||
if (!options.includeProtected && !protectedSessionService.isProtectedSessionAvailable()) {
|
||||
query += ` AND n.isProtected = 0`;
|
||||
}
|
||||
|
||||
const allNotes = new Set<string>();
|
||||
for (const row of sql.iterateRows<{ noteId: string }>(query)) {
|
||||
if (!options.noteIdFilter || options.noteIdFilter.has(row.noteId)) {
|
||||
allNotes.add(row.noteId);
|
||||
}
|
||||
}
|
||||
|
||||
// Return the difference
|
||||
const results = new Set<string>();
|
||||
for (const noteId of allNotes) {
|
||||
if (!matchingNotes.has(noteId)) {
|
||||
results.add(noteId);
|
||||
|
||||
if (options.limit && results.size >= options.limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if FTS5 is available
|
||||
*/
|
||||
private checkFTS5Availability(): boolean {
|
||||
try {
|
||||
const result = sql.getValue(`
|
||||
SELECT name FROM sqlite_master
|
||||
WHERE type='table' AND name='notes_fts'
|
||||
`);
|
||||
return !!result;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search with multiple operators (for complex queries)
|
||||
*/
|
||||
searchMultiple(
|
||||
queries: Array<{ tokens: string[], operator: string }>,
|
||||
combineMode: 'AND' | 'OR',
|
||||
searchContext: SearchContext,
|
||||
options: SearchOptions = {}
|
||||
): Set<string> {
|
||||
if (queries.length === 0) {
|
||||
return new Set();
|
||||
}
|
||||
|
||||
const resultSets = queries.map(q =>
|
||||
this.search(q.tokens, q.operator, searchContext, options)
|
||||
);
|
||||
|
||||
if (combineMode === 'AND') {
|
||||
// Intersection of all result sets
|
||||
return resultSets.reduce((acc, set) => {
|
||||
const intersection = new Set<string>();
|
||||
for (const item of acc) {
|
||||
if (set.has(item)) {
|
||||
intersection.add(item);
|
||||
}
|
||||
}
|
||||
return intersection;
|
||||
});
|
||||
} else {
|
||||
// Union of all result sets
|
||||
return resultSets.reduce((acc, set) => {
|
||||
for (const item of set) {
|
||||
acc.add(item);
|
||||
}
|
||||
return acc;
|
||||
}, new Set<string>());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get search statistics for monitoring
|
||||
*/
|
||||
getStatistics() {
|
||||
// Return the in-memory statistics object which includes performance data
|
||||
return {
|
||||
...this.statistics,
|
||||
indexedNotes: this.isInitialized ? this.getIndexedNotesCount() : 0,
|
||||
totalTokens: this.isInitialized ? this.getTotalTokensCount() : 0,
|
||||
fts5Available: this.isInitialized ? this.checkFTS5Availability() : false
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get count of indexed notes
|
||||
*/
|
||||
private getIndexedNotesCount(): number {
|
||||
try {
|
||||
return sql.getValue<number>(`
|
||||
SELECT COUNT(DISTINCT nsc.noteId)
|
||||
FROM note_search_content nsc
|
||||
JOIN notes n ON nsc.noteId = n.noteId
|
||||
WHERE n.isDeleted = 0
|
||||
`) || 0;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get total tokens count
|
||||
*/
|
||||
private getTotalTokensCount(): number {
|
||||
try {
|
||||
return sql.getValue<number>(`
|
||||
SELECT COUNT(*) FROM note_tokens
|
||||
`) || 0;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Rebuild search index for a specific note
|
||||
*/
|
||||
rebuildNoteIndex(noteId: string): void {
|
||||
if (!this.isInitialized) {
|
||||
log.info("Cannot rebuild index - search tables not initialized");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// This will be handled by triggers automatically
|
||||
// But we can force an update by touching the note
|
||||
sql.execute(`
|
||||
UPDATE notes
|
||||
SET dateModified = strftime('%Y-%m-%d %H:%M:%S.%f', 'now')
|
||||
WHERE noteId = ?
|
||||
`, [noteId]);
|
||||
|
||||
log.info(`Rebuilt search index for note ${noteId}`);
|
||||
} catch (error) {
|
||||
log.error(`Failed to rebuild index for note ${noteId}: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear search index (for testing/maintenance)
|
||||
*/
|
||||
clearIndex(): void {
|
||||
if (!this.isInitialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
sql.execute(`DELETE FROM note_search_content`);
|
||||
sql.execute(`DELETE FROM note_tokens`);
|
||||
|
||||
if (this.checkFTS5Availability()) {
|
||||
sql.execute(`DELETE FROM notes_fts`);
|
||||
}
|
||||
|
||||
log.info("Search index cleared");
|
||||
} catch (error) {
|
||||
log.error(`Failed to clear search index: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get detailed index status information
|
||||
*/
|
||||
async getIndexStatus(): Promise<{
|
||||
initialized: boolean;
|
||||
tablesExist: boolean;
|
||||
indexedNotes: number;
|
||||
totalNotes: number;
|
||||
totalTokens: number;
|
||||
fts5Available: boolean;
|
||||
lastRebuild?: string;
|
||||
coverage: number;
|
||||
}> {
|
||||
const tablesExist = this.isInitialized;
|
||||
|
||||
if (!tablesExist) {
|
||||
return {
|
||||
initialized: false,
|
||||
tablesExist: false,
|
||||
indexedNotes: 0,
|
||||
totalNotes: 0,
|
||||
totalTokens: 0,
|
||||
fts5Available: false,
|
||||
coverage: 0
|
||||
};
|
||||
}
|
||||
|
||||
// Get total indexable notes
|
||||
const totalNotes = sql.getValue<number>(`
|
||||
SELECT COUNT(*)
|
||||
FROM notes
|
||||
WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
AND isDeleted = 0
|
||||
AND isProtected = 0
|
||||
`) || 0;
|
||||
|
||||
// Get indexed notes count
|
||||
const indexedNotes = sql.getValue<number>(`
|
||||
SELECT COUNT(DISTINCT nsc.noteId)
|
||||
FROM note_search_content nsc
|
||||
JOIN notes n ON nsc.noteId = n.noteId
|
||||
WHERE n.isDeleted = 0
|
||||
`) || 0;
|
||||
|
||||
// Get token count
|
||||
const totalTokens = sql.getValue<number>(`
|
||||
SELECT COUNT(*) FROM note_tokens
|
||||
`) || 0;
|
||||
|
||||
// Calculate coverage percentage
|
||||
const coverage = totalNotes > 0 ? (indexedNotes / totalNotes) * 100 : 0;
|
||||
|
||||
return {
|
||||
initialized: true,
|
||||
tablesExist: true,
|
||||
indexedNotes,
|
||||
totalNotes,
|
||||
totalTokens,
|
||||
fts5Available: this.checkFTS5Availability(),
|
||||
coverage: Math.round(coverage * 100) / 100
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Rebuild the entire search index
|
||||
*/
|
||||
async rebuildIndex(force: boolean = false): Promise<void> {
|
||||
if (!this.isInitialized && !force) {
|
||||
throw new Error("Search tables not initialized. Use force=true to create tables.");
|
||||
}
|
||||
|
||||
log.info("Starting search index rebuild...");
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
// Clear existing index
|
||||
this.clearIndex();
|
||||
|
||||
// Rebuild from all notes
|
||||
const batchSize = 100;
|
||||
let offset = 0;
|
||||
let totalProcessed = 0;
|
||||
|
||||
while (true) {
|
||||
const notes = sql.getRows<{
|
||||
noteId: string;
|
||||
title: string;
|
||||
type: string;
|
||||
mime: string;
|
||||
content: string | null;
|
||||
}>(`
|
||||
SELECT
|
||||
n.noteId,
|
||||
n.title,
|
||||
n.type,
|
||||
n.mime,
|
||||
b.content
|
||||
FROM notes n
|
||||
LEFT JOIN blobs b ON n.blobId = b.blobId
|
||||
WHERE n.isDeleted = 0
|
||||
AND n.isProtected = 0
|
||||
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
||||
ORDER BY n.noteId
|
||||
LIMIT ? OFFSET ?
|
||||
`, [batchSize, offset]);
|
||||
|
||||
if (notes.length === 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Process batch - trigger will handle the actual indexing
|
||||
for (const note of notes) {
|
||||
try {
|
||||
// Touch the note to trigger re-indexing
|
||||
sql.execute(`
|
||||
UPDATE notes
|
||||
SET dateModified = strftime('%Y-%m-%d %H:%M:%S.%f', 'now')
|
||||
WHERE noteId = ?
|
||||
`, [note.noteId]);
|
||||
|
||||
totalProcessed++;
|
||||
} catch (error) {
|
||||
log.error(`Failed to reindex note ${note.noteId}: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
offset += batchSize;
|
||||
|
||||
if (totalProcessed % 1000 === 0) {
|
||||
log.info(`Reindexed ${totalProcessed} notes...`);
|
||||
}
|
||||
}
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
log.info(`Index rebuild completed: ${totalProcessed} notes in ${duration}ms`);
|
||||
|
||||
} catch (error) {
|
||||
log.error(`Index rebuild failed: ${error}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance getter
|
||||
export function getSQLiteSearchService(): SQLiteSearchService {
|
||||
return SQLiteSearchService.getInstance();
|
||||
}
|
||||
|
||||
// Export default getter function (not the instance, to avoid initialization issues)
|
||||
export default getSQLiteSearchService;
|
||||
@@ -1,471 +0,0 @@
|
||||
/**
|
||||
* SQLite Search Utilities
|
||||
*
|
||||
* Helper functions and utilities for SQLite-based search operations.
|
||||
* These utilities provide common functionality needed by the search service
|
||||
* and help with data preparation, validation, and performance monitoring.
|
||||
*/
|
||||
|
||||
import sql from "../sql.js";
|
||||
import log from "../log.js";
|
||||
import { normalize, stripTags } from "../utils.js";
|
||||
|
||||
/**
|
||||
* Configuration for search utilities
|
||||
*/
|
||||
export const SEARCH_UTILS_CONFIG = {
|
||||
BATCH_SIZE: 1000,
|
||||
MAX_CONTENT_SIZE: 2 * 1024 * 1024, // 2MB
|
||||
MIN_TOKEN_LENGTH: 2,
|
||||
MAX_TOKEN_LENGTH: 100,
|
||||
LOG_SLOW_QUERIES: true,
|
||||
SLOW_QUERY_THRESHOLD: 100, // ms
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Interface for note content data
|
||||
*/
|
||||
export interface NoteContentData {
|
||||
noteId: string;
|
||||
title: string;
|
||||
content: string;
|
||||
type: string;
|
||||
mime: string;
|
||||
isProtected: boolean;
|
||||
isDeleted: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize text for search indexing
|
||||
* Ensures consistent normalization across all search operations
|
||||
*/
|
||||
export function normalizeForSearch(text: string | null | undefined): string {
|
||||
if (!text || typeof text !== 'string') {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Use the standard normalize function and convert to lowercase
|
||||
return normalize(text).toLowerCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenize text into searchable words
|
||||
* Handles camelCase, snake_case, and special characters
|
||||
*/
|
||||
export function tokenizeText(text: string | null | undefined): string[] {
|
||||
if (!text || typeof text !== 'string') {
|
||||
return [];
|
||||
}
|
||||
|
||||
const tokens = new Set<string>();
|
||||
|
||||
// Split on word boundaries
|
||||
const words = text
|
||||
.split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:-]+/)
|
||||
.filter(word => word.length >= SEARCH_UTILS_CONFIG.MIN_TOKEN_LENGTH &&
|
||||
word.length <= SEARCH_UTILS_CONFIG.MAX_TOKEN_LENGTH);
|
||||
|
||||
for (const word of words) {
|
||||
// Add the original word (lowercase)
|
||||
tokens.add(word.toLowerCase());
|
||||
|
||||
// Handle snake_case
|
||||
const snakeParts = word.split('_').filter(part => part.length > 0);
|
||||
if (snakeParts.length > 1) {
|
||||
for (const part of snakeParts) {
|
||||
tokens.add(part.toLowerCase());
|
||||
|
||||
// Also handle camelCase within snake_case parts
|
||||
const camelParts = splitCamelCase(part);
|
||||
for (const camelPart of camelParts) {
|
||||
if (camelPart.length >= SEARCH_UTILS_CONFIG.MIN_TOKEN_LENGTH) {
|
||||
tokens.add(camelPart.toLowerCase());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Handle camelCase
|
||||
const camelParts = splitCamelCase(word);
|
||||
for (const part of camelParts) {
|
||||
if (part.length >= SEARCH_UTILS_CONFIG.MIN_TOKEN_LENGTH) {
|
||||
tokens.add(part.toLowerCase());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(tokens);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split camelCase strings into parts
|
||||
*/
|
||||
function splitCamelCase(str: string): string[] {
|
||||
// Split on transitions from lowercase to uppercase
|
||||
// Also handle sequences of uppercase letters (e.g., "XMLParser" -> ["XML", "Parser"])
|
||||
return str.split(/(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])/);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process HTML content for indexing
|
||||
* Removes tags and normalizes the text
|
||||
*/
|
||||
export function processHtmlContent(html: string | null | undefined): string {
|
||||
if (!html || typeof html !== 'string') {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Remove script and style content
|
||||
let text = html.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
|
||||
text = text.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
|
||||
|
||||
// Strip remaining tags
|
||||
text = stripTags(text);
|
||||
|
||||
// Decode HTML entities
|
||||
text = text.replace(/ /g, ' ');
|
||||
text = text.replace(/</g, '<');
|
||||
text = text.replace(/>/g, '>');
|
||||
text = text.replace(/&/g, '&');
|
||||
text = text.replace(/"/g, '"');
|
||||
text = text.replace(/'/g, "'");
|
||||
text = text.replace(/'/g, "'");
|
||||
|
||||
// Normalize whitespace
|
||||
text = text.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process JSON content (e.g., mindmaps, canvas) for indexing
|
||||
*/
|
||||
export function processJsonContent(json: string | null | undefined, type: string): string {
|
||||
if (!json || typeof json !== 'string') {
|
||||
return '';
|
||||
}
|
||||
|
||||
try {
|
||||
const data = JSON.parse(json);
|
||||
|
||||
if (type === 'mindMap') {
|
||||
return extractMindMapText(data);
|
||||
} else if (type === 'canvas') {
|
||||
return extractCanvasText(data);
|
||||
}
|
||||
|
||||
// For other JSON types, try to extract text content
|
||||
return extractTextFromObject(data);
|
||||
} catch (error) {
|
||||
log.info(`Failed to process JSON content: ${error}`);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text from mindmap JSON structure
|
||||
*/
|
||||
function extractMindMapText(data: any): string {
|
||||
const texts: string[] = [];
|
||||
|
||||
function collectTopics(node: any): void {
|
||||
if (!node) return;
|
||||
|
||||
if (node.topic) {
|
||||
texts.push(node.topic);
|
||||
}
|
||||
|
||||
if (node.children && Array.isArray(node.children)) {
|
||||
for (const child of node.children) {
|
||||
collectTopics(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (data.nodedata) {
|
||||
collectTopics(data.nodedata);
|
||||
}
|
||||
|
||||
return texts.join(' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text from canvas JSON structure
|
||||
*/
|
||||
function extractCanvasText(data: any): string {
|
||||
const texts: string[] = [];
|
||||
|
||||
if (data.elements && Array.isArray(data.elements)) {
|
||||
for (const element of data.elements) {
|
||||
if (element.type === 'text' && element.text) {
|
||||
texts.push(element.text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return texts.join(' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic text extraction from JSON objects
|
||||
*/
|
||||
function extractTextFromObject(obj: any, maxDepth = 10): string {
|
||||
if (maxDepth <= 0) return '';
|
||||
|
||||
const texts: string[] = [];
|
||||
|
||||
if (typeof obj === 'string') {
|
||||
return obj;
|
||||
} else if (Array.isArray(obj)) {
|
||||
for (const item of obj) {
|
||||
const text = extractTextFromObject(item, maxDepth - 1);
|
||||
if (text) texts.push(text);
|
||||
}
|
||||
} else if (typeof obj === 'object' && obj !== null) {
|
||||
for (const key of Object.keys(obj)) {
|
||||
// Look for common text field names
|
||||
if (['text', 'content', 'value', 'title', 'name', 'label', 'description'].includes(key.toLowerCase())) {
|
||||
const value = obj[key];
|
||||
if (typeof value === 'string') {
|
||||
texts.push(value);
|
||||
}
|
||||
} else {
|
||||
const text = extractTextFromObject(obj[key], maxDepth - 1);
|
||||
if (text) texts.push(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return texts.join(' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare note content for indexing
|
||||
* Handles different note types and formats
|
||||
*/
|
||||
export function prepareNoteContent(note: NoteContentData): {
|
||||
normalizedContent: string;
|
||||
normalizedTitle: string;
|
||||
tokens: string[];
|
||||
} {
|
||||
let content = note.content;
|
||||
|
||||
// Process content based on type
|
||||
if (note.type === 'text' && note.mime === 'text/html') {
|
||||
content = processHtmlContent(content);
|
||||
} else if ((note.type === 'mindMap' || note.type === 'canvas') && note.mime === 'application/json') {
|
||||
content = processJsonContent(content, note.type);
|
||||
}
|
||||
|
||||
// Check content size
|
||||
if (content.length > SEARCH_UTILS_CONFIG.MAX_CONTENT_SIZE) {
|
||||
log.info(`Note ${note.noteId} content exceeds max size (${content.length} bytes), truncating`);
|
||||
content = content.substring(0, SEARCH_UTILS_CONFIG.MAX_CONTENT_SIZE);
|
||||
}
|
||||
|
||||
// Normalize content and title
|
||||
const normalizedContent = normalizeForSearch(content);
|
||||
const normalizedTitle = normalizeForSearch(note.title);
|
||||
|
||||
// Generate tokens from both content and title
|
||||
const allText = `${note.title} ${content}`;
|
||||
const tokens = tokenizeText(allText);
|
||||
|
||||
return {
|
||||
normalizedContent,
|
||||
normalizedTitle,
|
||||
tokens
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Update search index for a single note
|
||||
*/
|
||||
export async function updateNoteSearchIndex(noteId: string): Promise<void> {
|
||||
try {
|
||||
// Get note data
|
||||
const noteData = sql.getRow<NoteContentData>(`
|
||||
SELECT n.noteId, n.title, b.content, n.type, n.mime, n.isProtected, n.isDeleted
|
||||
FROM notes n
|
||||
LEFT JOIN blobs b ON n.blobId = b.blobId
|
||||
WHERE n.noteId = ?
|
||||
`, [noteId]);
|
||||
|
||||
if (!noteData) {
|
||||
log.info(`Note ${noteId} not found for indexing`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Prepare content for indexing
|
||||
const { normalizedContent, normalizedTitle, tokens } = prepareNoteContent(noteData);
|
||||
|
||||
// Update search content table
|
||||
// Note: note_search_content doesn't have isProtected/isDeleted columns
|
||||
// Those are in the notes table which we join with
|
||||
sql.execute(`
|
||||
INSERT OR REPLACE INTO note_search_content
|
||||
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
`, [noteId, noteData.title, noteData.content || '',
|
||||
normalizedTitle, normalizedContent,
|
||||
normalizedTitle + ' ' + normalizedContent]);
|
||||
|
||||
// Delete existing tokens for this note
|
||||
sql.execute(`DELETE FROM note_tokens WHERE noteId = ?`, [noteId]);
|
||||
|
||||
// Insert new tokens with proper structure
|
||||
let position = 0;
|
||||
for (const token of tokens) {
|
||||
sql.execute(`
|
||||
INSERT INTO note_tokens (noteId, token, token_normalized, position, source)
|
||||
VALUES (?, ?, ?, ?, 'content')
|
||||
`, [noteId, token, normalizeForSearch(token), position]);
|
||||
position++;
|
||||
}
|
||||
|
||||
log.info(`Updated search index for note ${noteId}`);
|
||||
} catch (error) {
|
||||
log.error(`Failed to update search index for note ${noteId}: ${error}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch update search index for multiple notes
|
||||
*/
|
||||
export async function batchUpdateSearchIndex(noteIds: string[]): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
let successCount = 0;
|
||||
let errorCount = 0;
|
||||
|
||||
// Process in batches
|
||||
for (let i = 0; i < noteIds.length; i += SEARCH_UTILS_CONFIG.BATCH_SIZE) {
|
||||
const batch = noteIds.slice(i, i + SEARCH_UTILS_CONFIG.BATCH_SIZE);
|
||||
|
||||
try {
|
||||
sql.transactional(() => {
|
||||
for (const noteId of batch) {
|
||||
try {
|
||||
updateNoteSearchIndex(noteId);
|
||||
successCount++;
|
||||
} catch (error) {
|
||||
log.error(`Failed to index note ${noteId}: ${error}`);
|
||||
errorCount++;
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
log.error(`Batch indexing failed: ${error}`);
|
||||
errorCount += batch.length;
|
||||
}
|
||||
}
|
||||
|
||||
const elapsed = Date.now() - startTime;
|
||||
log.info(`Batch search indexing completed: ${successCount} success, ${errorCount} errors, ${elapsed}ms`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify search index integrity
|
||||
*/
|
||||
export function verifySearchIndex(): {
|
||||
valid: boolean;
|
||||
issues: string[];
|
||||
stats: {
|
||||
totalNotes: number;
|
||||
indexedNotes: number;
|
||||
missingFromIndex: number;
|
||||
orphanedEntries: number;
|
||||
};
|
||||
} {
|
||||
const issues: string[] = [];
|
||||
|
||||
// Count total notes
|
||||
const totalNotes = sql.getValue<number>(`
|
||||
SELECT COUNT(*) FROM notes WHERE isDeleted = 0
|
||||
`) || 0;
|
||||
|
||||
// Count indexed notes - JOIN with notes table for isDeleted filter
|
||||
const indexedNotes = sql.getValue<number>(`
|
||||
SELECT COUNT(DISTINCT nsc.noteId)
|
||||
FROM note_search_content nsc
|
||||
JOIN notes n ON nsc.noteId = n.noteId
|
||||
WHERE n.isDeleted = 0
|
||||
`) || 0;
|
||||
|
||||
// Find notes missing from index
|
||||
const missingNotes = sql.getColumn<string>(`
|
||||
SELECT noteId FROM notes
|
||||
WHERE isDeleted = 0
|
||||
AND noteId NOT IN (SELECT noteId FROM note_search_content)
|
||||
`);
|
||||
|
||||
if (missingNotes.length > 0) {
|
||||
issues.push(`${missingNotes.length} notes missing from search index`);
|
||||
}
|
||||
|
||||
// Find orphaned index entries
|
||||
const orphanedEntries = sql.getColumn<string>(`
|
||||
SELECT noteId FROM note_search_content
|
||||
WHERE noteId NOT IN (SELECT noteId FROM notes)
|
||||
`);
|
||||
|
||||
if (orphanedEntries.length > 0) {
|
||||
issues.push(`${orphanedEntries.length} orphaned entries in search index`);
|
||||
}
|
||||
|
||||
// Check token table consistency
|
||||
const tokenMismatch = sql.getValue<number>(`
|
||||
SELECT COUNT(*) FROM note_search_content
|
||||
WHERE noteId NOT IN (SELECT noteId FROM note_tokens)
|
||||
`) || 0;
|
||||
|
||||
if (tokenMismatch > 0) {
|
||||
issues.push(`${tokenMismatch} notes missing from token index`);
|
||||
}
|
||||
|
||||
return {
|
||||
valid: issues.length === 0,
|
||||
issues,
|
||||
stats: {
|
||||
totalNotes,
|
||||
indexedNotes,
|
||||
missingFromIndex: missingNotes.length,
|
||||
orphanedEntries: orphanedEntries.length
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Performance monitoring wrapper for search queries
|
||||
*/
|
||||
export function monitorQuery<T>(
|
||||
queryName: string,
|
||||
queryFn: () => T
|
||||
): T {
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const result = queryFn();
|
||||
|
||||
const elapsed = Date.now() - startTime;
|
||||
if (SEARCH_UTILS_CONFIG.LOG_SLOW_QUERIES && elapsed > SEARCH_UTILS_CONFIG.SLOW_QUERY_THRESHOLD) {
|
||||
log.info(`Slow search query detected: ${queryName} took ${elapsed}ms`);
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
const elapsed = Date.now() - startTime;
|
||||
log.error(`Search query failed: ${queryName} after ${elapsed}ms - ${error}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Export utility functions for testing
|
||||
*/
|
||||
export const testUtils = {
|
||||
splitCamelCase,
|
||||
extractMindMapText,
|
||||
extractCanvasText,
|
||||
extractTextFromObject
|
||||
};
|
||||
@@ -1,219 +0,0 @@
|
||||
#!/usr/bin/env ts-node
|
||||
|
||||
/**
|
||||
* Verification script for SQLite search implementation
|
||||
*
|
||||
* This script checks:
|
||||
* 1. If migration 0235 has run (tables exist)
|
||||
* 2. If SQL functions are registered
|
||||
* 3. If search queries work correctly
|
||||
* 4. Performance comparison between SQLite and TypeScript
|
||||
*/
|
||||
|
||||
import sql from "../sql.js";
|
||||
import log from "../log.js";
|
||||
import { getSQLiteSearchService } from "./sqlite_search_service.js";
|
||||
import SearchContext from "./search_context.js";
|
||||
import becca from "../../becca/becca.js";
|
||||
|
||||
async function verifyTables(): Promise<boolean> {
|
||||
console.log("\n=== Checking Database Tables ===");
|
||||
|
||||
const tables = [
|
||||
{ name: 'note_search_content', required: true },
|
||||
{ name: 'note_tokens', required: true },
|
||||
{ name: 'notes_fts', required: false } // From migration 0234
|
||||
];
|
||||
|
||||
let allExist = true;
|
||||
|
||||
for (const table of tables) {
|
||||
const exists = sql.getValue<number>(`
|
||||
SELECT COUNT(*) FROM sqlite_master
|
||||
WHERE type='table' AND name=?
|
||||
`, [table.name]) > 0;
|
||||
|
||||
const status = exists ? '✓' : '✗';
|
||||
const requiredText = table.required ? ' (REQUIRED)' : ' (optional)';
|
||||
console.log(` ${status} ${table.name}${requiredText}`);
|
||||
|
||||
if (table.required && !exists) {
|
||||
allExist = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!allExist) {
|
||||
console.log("\n❌ Required tables are missing!");
|
||||
console.log(" Migration 0235 needs to run.");
|
||||
console.log(" The APP_DB_VERSION has been updated to 235.");
|
||||
console.log(" Restart the server to run the migration.");
|
||||
}
|
||||
|
||||
return allExist;
|
||||
}
|
||||
|
||||
async function verifyFunctions(): Promise<boolean> {
|
||||
console.log("\n=== Checking SQL Functions ===");
|
||||
|
||||
const functions = [
|
||||
{ name: 'normalize_text', test: "SELECT normalize_text('Café')" },
|
||||
{ name: 'edit_distance', test: "SELECT edit_distance('test', 'text', 2)" },
|
||||
{ name: 'regex_match', test: "SELECT regex_match('test', 'testing')" },
|
||||
{ name: 'tokenize_text', test: "SELECT tokenize_text('hello world')" },
|
||||
{ name: 'strip_html', test: "SELECT strip_html('<p>test</p>')" }
|
||||
];
|
||||
|
||||
let allWork = true;
|
||||
|
||||
for (const func of functions) {
|
||||
try {
|
||||
const result = sql.getValue(func.test);
|
||||
console.log(` ✓ ${func.name} - Result: ${result}`);
|
||||
} catch (error: any) {
|
||||
console.log(` ✗ ${func.name} - Error: ${error.message}`);
|
||||
allWork = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!allWork) {
|
||||
console.log("\n⚠️ Some SQL functions are not working.");
|
||||
console.log(" They should be registered when the server starts.");
|
||||
}
|
||||
|
||||
return allWork;
|
||||
}
|
||||
|
||||
async function verifySearchContent(): Promise<void> {
|
||||
console.log("\n=== Checking Search Index Content ===");
|
||||
|
||||
const noteCount = sql.getValue<number>(`
|
||||
SELECT COUNT(*) FROM notes
|
||||
WHERE isDeleted = 0 AND isProtected = 0
|
||||
`) || 0;
|
||||
|
||||
const indexedCount = sql.getValue<number>(`
|
||||
SELECT COUNT(*) FROM note_search_content
|
||||
`) || 0;
|
||||
|
||||
const tokenCount = sql.getValue<number>(`
|
||||
SELECT COUNT(DISTINCT noteId) FROM note_tokens
|
||||
`) || 0;
|
||||
|
||||
console.log(` Notes eligible for indexing: ${noteCount}`);
|
||||
console.log(` Notes in search index: ${indexedCount}`);
|
||||
console.log(` Notes with tokens: ${tokenCount}`);
|
||||
|
||||
if (indexedCount === 0 && noteCount > 0) {
|
||||
console.log("\n⚠️ Search index is empty but there are notes to index.");
|
||||
console.log(" The migration should populate the index automatically.");
|
||||
} else if (indexedCount < noteCount) {
|
||||
console.log("\n⚠️ Some notes are not indexed.");
|
||||
console.log(` Missing: ${noteCount - indexedCount} notes`);
|
||||
} else {
|
||||
console.log("\n✓ Search index is populated");
|
||||
}
|
||||
}
|
||||
|
||||
async function testSearch(): Promise<void> {
|
||||
console.log("\n=== Testing Search Functionality ===");
|
||||
|
||||
// Initialize becca if needed
|
||||
if (!becca.loaded) {
|
||||
console.log(" Loading becca...");
|
||||
// Note: becca may not have a load method in this version
|
||||
}
|
||||
|
||||
const searchService = getSQLiteSearchService();
|
||||
const searchContext = new SearchContext({
|
||||
fastSearch: false,
|
||||
includeArchivedNotes: false,
|
||||
fuzzyAttributeSearch: false,
|
||||
debug: false
|
||||
});
|
||||
|
||||
// Test different operators
|
||||
const tests = [
|
||||
{ operator: '*=*', tokens: ['note'], description: 'Substring search' },
|
||||
{ operator: '=*', tokens: ['test'], description: 'Prefix search' },
|
||||
{ operator: '*=', tokens: ['ing'], description: 'Suffix search' },
|
||||
{ operator: '~=', tokens: ['nite'], description: 'Fuzzy search' }
|
||||
];
|
||||
|
||||
for (const test of tests) {
|
||||
try {
|
||||
console.log(`\n Testing ${test.description} (${test.operator}):`);
|
||||
const startTime = Date.now();
|
||||
const results = searchService.search(test.tokens, test.operator, searchContext);
|
||||
const duration = Date.now() - startTime;
|
||||
const resultCount = Array.isArray(results) ? results.length : results.size || 0;
|
||||
console.log(` Found ${resultCount} results in ${duration}ms`);
|
||||
|
||||
if (resultCount > 0) {
|
||||
const sampleResults = Array.isArray(results) ? results.slice(0, 3) : Array.from(results).slice(0, 3);
|
||||
console.log(` Sample results: ${sampleResults.join(', ')}...`);
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.log(` ✗ Error: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log("========================================");
|
||||
console.log(" SQLite Search Implementation Test");
|
||||
console.log("========================================");
|
||||
|
||||
try {
|
||||
// Check current database version
|
||||
const currentDbVersion = sql.getValue<number>("SELECT value FROM options WHERE name = 'dbVersion'") || 0;
|
||||
console.log(`\nCurrent database version: ${currentDbVersion}`);
|
||||
console.log(`Target database version: 235`);
|
||||
|
||||
if (currentDbVersion < 235) {
|
||||
console.log("\n⚠️ Database needs migration from version " + currentDbVersion + " to 235");
|
||||
console.log(" Restart the server to run migrations.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Verify tables exist
|
||||
const tablesExist = await verifyTables();
|
||||
if (!tablesExist) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Verify functions work
|
||||
const functionsWork = await verifyFunctions();
|
||||
|
||||
// Check index content
|
||||
await verifySearchContent();
|
||||
|
||||
// Test search if everything is ready
|
||||
if (tablesExist && functionsWork) {
|
||||
await testSearch();
|
||||
}
|
||||
|
||||
console.log("\n========================================");
|
||||
console.log(" Test Complete");
|
||||
console.log("========================================");
|
||||
|
||||
if (tablesExist && functionsWork) {
|
||||
console.log("\n✅ SQLite search implementation is ready!");
|
||||
console.log("\nTo enable SQLite search:");
|
||||
console.log(" 1. Set searchBackend option to 'sqlite'");
|
||||
console.log(" 2. Or use the admin API: PUT /api/search-admin/config");
|
||||
} else {
|
||||
console.log("\n❌ SQLite search is not ready. See issues above.");
|
||||
}
|
||||
|
||||
} catch (error: any) {
|
||||
console.error("\n❌ Test failed with error:", error);
|
||||
console.error(error.stack);
|
||||
}
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
if (require.main === module) {
|
||||
main().then(() => process.exit(0)).catch(() => process.exit(1));
|
||||
}
|
||||
|
||||
export { verifyTables, verifyFunctions, testSearch };
|
||||
@@ -136,14 +136,6 @@ export interface OptionDefinitions extends KeyboardShortcutsOptions<KeyboardActi
|
||||
redirectBareDomain: boolean;
|
||||
showLoginInShareTheme: boolean;
|
||||
|
||||
// Search settings
|
||||
searchBackend: string;
|
||||
searchSqliteEnabled: boolean;
|
||||
searchSqlitePerformanceLogging: boolean;
|
||||
searchSqliteMaxMemory: number;
|
||||
searchSqliteBatchSize: number;
|
||||
searchSqliteAutoRebuild: boolean;
|
||||
|
||||
// AI/LLM integration options
|
||||
aiEnabled: boolean;
|
||||
aiProvider: string;
|
||||
|
||||
Reference in New Issue
Block a user