mirror of
https://github.com/zadam/trilium.git
synced 2025-11-10 15:25:51 +01:00
826 lines
29 KiB
TypeScript
826 lines
29 KiB
TypeScript
/**
|
|
* Migration to add SQLite native search support with normalized text tables
|
|
*
|
|
* This migration implements Phase 1 of the SQLite-based search plan:
|
|
* 1. Creates note_search_content table with normalized text columns
|
|
* 2. Creates note_tokens table for word-level token storage
|
|
* 3. Adds necessary indexes for optimization
|
|
* 4. Creates triggers to keep tables synchronized with note updates
|
|
* 5. Populates tables with existing note data in batches
|
|
*
|
|
* This provides 100% accurate search results with 10-30x performance improvement
|
|
* over TypeScript-based search, without the complexity of trigrams.
|
|
*/
|
|
|
|
import sql from "../services/sql.js";
|
|
import log from "../services/log.js";
|
|
import { normalize as utilsNormalize, stripTags } from "../services/utils.js";
|
|
import { getSqliteFunctionsService } from "../services/search/sqlite_functions.js";
|
|
|
|
/**
|
|
* Uses the existing normalize function from utils.ts for consistency
|
|
* This ensures all normalization throughout the codebase is identical
|
|
*/
|
|
function normalizeText(text: string): string {
|
|
if (!text) return '';
|
|
return utilsNormalize(text);
|
|
}
|
|
|
|
/**
|
|
* Tokenizes text into individual words for token-based searching
|
|
* Handles punctuation and special characters appropriately
|
|
*/
|
|
function tokenize(text: string): string[] {
|
|
if (!text) return [];
|
|
|
|
// Split on word boundaries, filter out empty tokens
|
|
// This regex splits on spaces, punctuation, and other non-word characters
|
|
// but preserves apostrophes within words (e.g., "don't", "it's")
|
|
const tokens = text
|
|
.split(/[\s\n\r\t,;.!?()[\]{}"'`~@#$%^&*+=|\\/<>:_-]+/)
|
|
.filter(token => token.length > 0)
|
|
.map(token => token.toLowerCase());
|
|
|
|
// Also split on camelCase and snake_case boundaries for code content
|
|
const expandedTokens: string[] = [];
|
|
for (const token of tokens) {
|
|
// Add the original token
|
|
expandedTokens.push(token);
|
|
|
|
// Split camelCase (e.g., "getUserName" -> ["get", "User", "Name"])
|
|
const camelCaseParts = token.split(/(?=[A-Z])/);
|
|
if (camelCaseParts.length > 1) {
|
|
expandedTokens.push(...camelCaseParts.map(p => p.toLowerCase()));
|
|
}
|
|
|
|
// Split snake_case (e.g., "user_name" -> ["user", "name"])
|
|
const snakeCaseParts = token.split('_');
|
|
if (snakeCaseParts.length > 1) {
|
|
expandedTokens.push(...snakeCaseParts);
|
|
}
|
|
}
|
|
|
|
// Remove duplicates and return
|
|
return Array.from(new Set(expandedTokens));
|
|
}
|
|
|
|
/**
|
|
* Strips HTML tags from content for text-only indexing
|
|
* Uses the utils stripTags function for consistency
|
|
*/
|
|
function stripHtmlTags(html: string): string {
|
|
if (!html) return '';
|
|
|
|
// Remove script and style content entirely first
|
|
let text = html.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
|
|
text = text.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
|
|
|
|
// Use utils stripTags for consistency
|
|
text = stripTags(text);
|
|
|
|
// Decode HTML entities
|
|
text = text.replace(/ /g, ' ');
|
|
text = text.replace(/</g, '<');
|
|
text = text.replace(/>/g, '>');
|
|
text = text.replace(/&/g, '&');
|
|
text = text.replace(/"/g, '"');
|
|
text = text.replace(/'/g, "'");
|
|
|
|
// Normalize whitespace
|
|
text = text.replace(/\s+/g, ' ').trim();
|
|
|
|
return text;
|
|
}
|
|
|
|
export default function sqliteNativeSearch() {
|
|
log.info("Starting SQLite native search migration...");
|
|
|
|
const startTime = Date.now();
|
|
|
|
// Wrap entire migration in a transaction for atomicity
|
|
sql.transactional(() => {
|
|
try {
|
|
// Register custom SQL functions first so they can be used in triggers
|
|
registerCustomFunctions();
|
|
|
|
// Create the search tables and indexes
|
|
createSearchTables();
|
|
|
|
// Create triggers to keep tables synchronized (before population)
|
|
createSearchTriggers();
|
|
|
|
// Populate the tables with existing note data
|
|
populateSearchTables();
|
|
|
|
// Run final verification and optimization
|
|
finalizeSearchSetup();
|
|
|
|
const duration = Date.now() - startTime;
|
|
log.info(`SQLite native search migration completed successfully in ${duration}ms`);
|
|
|
|
} catch (error) {
|
|
log.error(`SQLite native search migration failed: ${error}`);
|
|
// Transaction will automatically rollback on error
|
|
throw error;
|
|
}
|
|
});
|
|
}
|
|
|
|
function createSearchTables() {
|
|
log.info("Creating search content and token tables...");
|
|
|
|
// Drop existing tables if they exist (for re-running migration in dev)
|
|
sql.execute("DROP TABLE IF EXISTS note_search_content");
|
|
sql.execute("DROP TABLE IF EXISTS note_tokens");
|
|
|
|
// Create the main search content table
|
|
sql.execute(`
|
|
CREATE TABLE note_search_content (
|
|
noteId TEXT PRIMARY KEY,
|
|
title TEXT NOT NULL,
|
|
content TEXT NOT NULL,
|
|
title_normalized TEXT NOT NULL,
|
|
content_normalized TEXT NOT NULL,
|
|
full_text_normalized TEXT NOT NULL
|
|
)
|
|
`);
|
|
|
|
// Create the token table for word-level operations
|
|
sql.execute(`
|
|
CREATE TABLE note_tokens (
|
|
noteId TEXT NOT NULL,
|
|
token TEXT NOT NULL,
|
|
token_normalized TEXT NOT NULL,
|
|
position INTEGER NOT NULL,
|
|
source TEXT NOT NULL CHECK(source IN ('title', 'content')),
|
|
PRIMARY KEY (noteId, position, source)
|
|
)
|
|
`);
|
|
|
|
// Create indexes for search optimization
|
|
log.info("Creating search indexes...");
|
|
|
|
// Consolidated indexes - removed redundancy between COLLATE NOCASE and plain indexes
|
|
// Using COLLATE NOCASE for case-insensitive searches
|
|
sql.execute(`
|
|
CREATE INDEX idx_search_title_normalized
|
|
ON note_search_content(title_normalized COLLATE NOCASE)
|
|
`);
|
|
|
|
sql.execute(`
|
|
CREATE INDEX idx_search_content_normalized
|
|
ON note_search_content(content_normalized COLLATE NOCASE)
|
|
`);
|
|
|
|
sql.execute(`
|
|
CREATE INDEX idx_search_full_text
|
|
ON note_search_content(full_text_normalized COLLATE NOCASE)
|
|
`);
|
|
|
|
// Token indexes - consolidated to avoid redundancy
|
|
sql.execute(`
|
|
CREATE INDEX idx_tokens_normalized
|
|
ON note_tokens(token_normalized COLLATE NOCASE)
|
|
`);
|
|
|
|
sql.execute(`
|
|
CREATE INDEX idx_tokens_noteId
|
|
ON note_tokens(noteId)
|
|
`);
|
|
|
|
// Composite index for token searches with source
|
|
sql.execute(`
|
|
CREATE INDEX idx_tokens_source_normalized
|
|
ON note_tokens(source, token_normalized COLLATE NOCASE)
|
|
`);
|
|
|
|
log.info("Search tables and indexes created successfully");
|
|
}
|
|
|
|
function populateSearchTables() {
|
|
log.info("Populating search tables with existing note content...");
|
|
|
|
const batchSize = 100;
|
|
let offset = 0;
|
|
let totalProcessed = 0;
|
|
let totalTokens = 0;
|
|
|
|
while (true) {
|
|
const notes = sql.getRows<{
|
|
noteId: string;
|
|
title: string;
|
|
type: string;
|
|
mime: string;
|
|
content: string | null;
|
|
}>(`
|
|
SELECT
|
|
n.noteId,
|
|
n.title,
|
|
n.type,
|
|
n.mime,
|
|
b.content
|
|
FROM notes n
|
|
LEFT JOIN blobs b ON n.blobId = b.blobId
|
|
WHERE n.isDeleted = 0
|
|
AND n.isProtected = 0
|
|
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
ORDER BY n.noteId
|
|
LIMIT ? OFFSET ?
|
|
`, [batchSize, offset]);
|
|
|
|
if (notes.length === 0) {
|
|
break;
|
|
}
|
|
|
|
// Process batch of notes
|
|
for (const note of notes) {
|
|
try {
|
|
// Process content based on type
|
|
let processedContent = note.content || '';
|
|
|
|
// Strip HTML for text notes
|
|
if (note.type === 'text' && note.mime === 'text/html') {
|
|
processedContent = stripHtmlTags(processedContent);
|
|
}
|
|
|
|
// Normalize text for searching using the utils normalize function
|
|
const titleNorm = normalizeText(note.title);
|
|
const contentNorm = normalizeText(processedContent);
|
|
const fullTextNorm = titleNorm + ' ' + contentNorm;
|
|
|
|
// Insert into search content table
|
|
sql.execute(`
|
|
INSERT INTO note_search_content
|
|
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
|
VALUES (?, ?, ?, ?, ?, ?)
|
|
`, [
|
|
note.noteId,
|
|
note.title,
|
|
processedContent,
|
|
titleNorm,
|
|
contentNorm,
|
|
fullTextNorm
|
|
]);
|
|
|
|
// Tokenize title and content separately to track source
|
|
const titleTokens = tokenize(note.title);
|
|
const contentTokens = tokenize(processedContent);
|
|
|
|
let position = 0;
|
|
|
|
// Insert title tokens
|
|
for (const token of titleTokens) {
|
|
if (token.length > 0) {
|
|
sql.execute(`
|
|
INSERT OR IGNORE INTO note_tokens
|
|
(noteId, token, token_normalized, position, source)
|
|
VALUES (?, ?, ?, ?, 'title')
|
|
`, [note.noteId, token, normalizeText(token), position]);
|
|
position++;
|
|
totalTokens++;
|
|
}
|
|
}
|
|
|
|
// Insert content tokens with unique positions
|
|
for (const token of contentTokens) {
|
|
if (token.length > 0) {
|
|
sql.execute(`
|
|
INSERT OR IGNORE INTO note_tokens
|
|
(noteId, token, token_normalized, position, source)
|
|
VALUES (?, ?, ?, ?, 'content')
|
|
`, [note.noteId, token, normalizeText(token), position]);
|
|
position++;
|
|
totalTokens++;
|
|
}
|
|
}
|
|
|
|
totalProcessed++;
|
|
|
|
} catch (error) {
|
|
log.error(`Failed to index note ${note.noteId}: ${error}`);
|
|
// Continue with other notes even if one fails
|
|
}
|
|
}
|
|
|
|
offset += batchSize;
|
|
|
|
if (totalProcessed % 1000 === 0) {
|
|
log.info(`Processed ${totalProcessed} notes, ${totalTokens} tokens for search indexing...`);
|
|
}
|
|
}
|
|
|
|
log.info(`Completed indexing ${totalProcessed} notes with ${totalTokens} total tokens`);
|
|
}
|
|
|
|
function createSearchTriggers() {
|
|
log.info("Creating triggers to keep search tables synchronized...");
|
|
|
|
// Drop existing triggers if they exist
|
|
const triggers = [
|
|
'note_search_insert',
|
|
'note_search_update',
|
|
'note_search_delete',
|
|
'note_search_soft_delete',
|
|
'note_search_undelete',
|
|
'note_search_protect',
|
|
'note_search_unprotect',
|
|
'note_search_blob_insert',
|
|
'note_search_blob_update'
|
|
];
|
|
|
|
for (const trigger of triggers) {
|
|
sql.execute(`DROP TRIGGER IF EXISTS ${trigger}`);
|
|
}
|
|
|
|
// Trigger for INSERT operations on notes - simplified version
|
|
sql.execute(`
|
|
CREATE TRIGGER note_search_insert
|
|
AFTER INSERT ON notes
|
|
WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
AND NEW.isDeleted = 0
|
|
AND NEW.isProtected = 0
|
|
BEGIN
|
|
-- Delete any existing entries (for INSERT OR REPLACE)
|
|
DELETE FROM note_search_content WHERE noteId = NEW.noteId;
|
|
DELETE FROM note_tokens WHERE noteId = NEW.noteId;
|
|
|
|
-- Insert basic content with title only (content will be populated by blob trigger)
|
|
INSERT INTO note_search_content
|
|
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
|
VALUES (
|
|
NEW.noteId,
|
|
NEW.title,
|
|
'',
|
|
LOWER(NEW.title),
|
|
'',
|
|
LOWER(NEW.title)
|
|
);
|
|
END
|
|
`);
|
|
|
|
// Trigger for UPDATE operations on notes - simplified version
|
|
sql.execute(`
|
|
CREATE TRIGGER note_search_update
|
|
AFTER UPDATE ON notes
|
|
WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
BEGIN
|
|
-- Always delete the old entries
|
|
DELETE FROM note_search_content WHERE noteId = NEW.noteId;
|
|
DELETE FROM note_tokens WHERE noteId = NEW.noteId;
|
|
|
|
-- Re-insert if note is not deleted and not protected
|
|
INSERT INTO note_search_content
|
|
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
|
SELECT
|
|
NEW.noteId,
|
|
NEW.title,
|
|
COALESCE(b.content, ''),
|
|
LOWER(NEW.title),
|
|
LOWER(COALESCE(b.content, '')),
|
|
LOWER(NEW.title || ' ' || COALESCE(b.content, ''))
|
|
FROM notes n
|
|
LEFT JOIN blobs b ON b.blobId = NEW.blobId
|
|
WHERE n.noteId = NEW.noteId
|
|
AND NEW.isDeleted = 0
|
|
AND NEW.isProtected = 0;
|
|
END
|
|
`);
|
|
|
|
// Trigger for DELETE operations on notes
|
|
sql.execute(`
|
|
CREATE TRIGGER note_search_delete
|
|
AFTER DELETE ON notes
|
|
BEGIN
|
|
DELETE FROM note_search_content WHERE noteId = OLD.noteId;
|
|
DELETE FROM note_tokens WHERE noteId = OLD.noteId;
|
|
END
|
|
`);
|
|
|
|
// Trigger for soft delete (isDeleted = 1)
|
|
sql.execute(`
|
|
CREATE TRIGGER note_search_soft_delete
|
|
AFTER UPDATE ON notes
|
|
WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1
|
|
BEGIN
|
|
DELETE FROM note_search_content WHERE noteId = NEW.noteId;
|
|
DELETE FROM note_tokens WHERE noteId = NEW.noteId;
|
|
END
|
|
`);
|
|
|
|
// Trigger for undelete (isDeleted = 0) - simplified version
|
|
sql.execute(`
|
|
CREATE TRIGGER note_search_undelete
|
|
AFTER UPDATE ON notes
|
|
WHEN OLD.isDeleted = 1 AND NEW.isDeleted = 0
|
|
AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
AND NEW.isProtected = 0
|
|
BEGIN
|
|
DELETE FROM note_search_content WHERE noteId = NEW.noteId;
|
|
DELETE FROM note_tokens WHERE noteId = NEW.noteId;
|
|
|
|
INSERT INTO note_search_content
|
|
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
|
SELECT
|
|
NEW.noteId,
|
|
NEW.title,
|
|
COALESCE(b.content, ''),
|
|
LOWER(NEW.title),
|
|
LOWER(COALESCE(b.content, '')),
|
|
LOWER(NEW.title || ' ' || COALESCE(b.content, ''))
|
|
FROM notes n
|
|
LEFT JOIN blobs b ON b.blobId = NEW.blobId
|
|
WHERE n.noteId = NEW.noteId;
|
|
END
|
|
`);
|
|
|
|
// Trigger for notes becoming protected
|
|
sql.execute(`
|
|
CREATE TRIGGER note_search_protect
|
|
AFTER UPDATE ON notes
|
|
WHEN OLD.isProtected = 0 AND NEW.isProtected = 1
|
|
BEGIN
|
|
DELETE FROM note_search_content WHERE noteId = NEW.noteId;
|
|
DELETE FROM note_tokens WHERE noteId = NEW.noteId;
|
|
END
|
|
`);
|
|
|
|
// Trigger for notes becoming unprotected - simplified version
|
|
sql.execute(`
|
|
CREATE TRIGGER note_search_unprotect
|
|
AFTER UPDATE ON notes
|
|
WHEN OLD.isProtected = 1 AND NEW.isProtected = 0
|
|
AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
AND NEW.isDeleted = 0
|
|
BEGIN
|
|
DELETE FROM note_search_content WHERE noteId = NEW.noteId;
|
|
DELETE FROM note_tokens WHERE noteId = NEW.noteId;
|
|
|
|
INSERT INTO note_search_content
|
|
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
|
SELECT
|
|
NEW.noteId,
|
|
NEW.title,
|
|
COALESCE(b.content, ''),
|
|
LOWER(NEW.title),
|
|
LOWER(COALESCE(b.content, '')),
|
|
LOWER(NEW.title || ' ' || COALESCE(b.content, ''))
|
|
FROM notes n
|
|
LEFT JOIN blobs b ON b.blobId = NEW.blobId
|
|
WHERE n.noteId = NEW.noteId;
|
|
END
|
|
`);
|
|
|
|
// Trigger for INSERT operations on blobs - simplified version
|
|
sql.execute(`
|
|
CREATE TRIGGER note_search_blob_insert
|
|
AFTER INSERT ON blobs
|
|
BEGIN
|
|
-- Update search content for all notes that reference this blob
|
|
UPDATE note_search_content
|
|
SET content = NEW.content,
|
|
content_normalized = LOWER(NEW.content),
|
|
full_text_normalized = title_normalized || ' ' || LOWER(NEW.content)
|
|
WHERE noteId IN (
|
|
SELECT n.noteId
|
|
FROM notes n
|
|
WHERE n.blobId = NEW.blobId
|
|
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
AND n.isDeleted = 0
|
|
AND n.isProtected = 0
|
|
);
|
|
|
|
-- Clear tokens for affected notes (will be repopulated by post-processing)
|
|
DELETE FROM note_tokens
|
|
WHERE noteId IN (
|
|
SELECT n.noteId
|
|
FROM notes n
|
|
WHERE n.blobId = NEW.blobId
|
|
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
AND n.isDeleted = 0
|
|
AND n.isProtected = 0
|
|
);
|
|
END
|
|
`);
|
|
|
|
// Trigger for UPDATE operations on blobs - simplified version
|
|
sql.execute(`
|
|
CREATE TRIGGER note_search_blob_update
|
|
AFTER UPDATE ON blobs
|
|
BEGIN
|
|
-- Update search content for all notes that reference this blob
|
|
UPDATE note_search_content
|
|
SET content = NEW.content,
|
|
content_normalized = LOWER(NEW.content),
|
|
full_text_normalized = title_normalized || ' ' || LOWER(NEW.content)
|
|
WHERE noteId IN (
|
|
SELECT n.noteId
|
|
FROM notes n
|
|
WHERE n.blobId = NEW.blobId
|
|
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
AND n.isDeleted = 0
|
|
AND n.isProtected = 0
|
|
);
|
|
|
|
-- Clear tokens for affected notes (will be repopulated by post-processing)
|
|
DELETE FROM note_tokens
|
|
WHERE noteId IN (
|
|
SELECT n.noteId
|
|
FROM notes n
|
|
WHERE n.blobId = NEW.blobId
|
|
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
AND n.isDeleted = 0
|
|
AND n.isProtected = 0
|
|
);
|
|
END
|
|
`);
|
|
|
|
log.info("Search synchronization triggers created successfully");
|
|
}
|
|
|
|
function registerCustomFunctions() {
|
|
log.info("Registering custom SQL functions for search operations...");
|
|
|
|
try {
|
|
// Get the database connection to register functions
|
|
const db = sql.getDbConnection();
|
|
|
|
// Use the centralized SQLite functions service
|
|
const functionsService = getSqliteFunctionsService();
|
|
|
|
// Register functions if not already registered
|
|
if (!functionsService.isRegistered()) {
|
|
const success = functionsService.registerFunctions(db);
|
|
if (success) {
|
|
log.info("Custom SQL functions registered successfully via service");
|
|
} else {
|
|
log.info("Custom SQL functions registration failed - using basic SQLite functions only");
|
|
}
|
|
} else {
|
|
log.info("Custom SQL functions already registered");
|
|
}
|
|
|
|
// Register migration-specific helper function for tokenization
|
|
db.function('tokenize_for_migration', {
|
|
deterministic: true,
|
|
varargs: false
|
|
}, (text: string | null) => {
|
|
if (!text) return '';
|
|
// Return as JSON array string for SQL processing
|
|
return JSON.stringify(tokenize(text));
|
|
});
|
|
|
|
} catch (error) {
|
|
log.info(`Could not register custom SQL functions (will use basic SQLite functions): ${error}`);
|
|
// This is not critical - the migration will work with basic SQLite functions
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Populates tokens for a specific note
|
|
* This is called outside of triggers to avoid complex SQL within trigger constraints
|
|
*/
|
|
function populateNoteTokens(noteId: string): number {
|
|
try {
|
|
// Get the note's search content
|
|
const noteData = sql.getRow<{
|
|
title: string;
|
|
content: string;
|
|
}>(`
|
|
SELECT title, content
|
|
FROM note_search_content
|
|
WHERE noteId = ?
|
|
`, [noteId]);
|
|
|
|
if (!noteData) return 0;
|
|
|
|
// Clear existing tokens for this note
|
|
sql.execute(`DELETE FROM note_tokens WHERE noteId = ?`, [noteId]);
|
|
|
|
// Tokenize title and content
|
|
const titleTokens = tokenize(noteData.title);
|
|
const contentTokens = tokenize(noteData.content);
|
|
|
|
let position = 0;
|
|
let tokenCount = 0;
|
|
|
|
// Insert title tokens
|
|
for (const token of titleTokens) {
|
|
if (token.length > 0) {
|
|
sql.execute(`
|
|
INSERT OR IGNORE INTO note_tokens
|
|
(noteId, token, token_normalized, position, source)
|
|
VALUES (?, ?, ?, ?, 'title')
|
|
`, [noteId, token, normalizeText(token), position]);
|
|
position++;
|
|
tokenCount++;
|
|
}
|
|
}
|
|
|
|
// Insert content tokens
|
|
for (const token of contentTokens) {
|
|
if (token.length > 0) {
|
|
sql.execute(`
|
|
INSERT OR IGNORE INTO note_tokens
|
|
(noteId, token, token_normalized, position, source)
|
|
VALUES (?, ?, ?, ?, 'content')
|
|
`, [noteId, token, normalizeText(token), position]);
|
|
position++;
|
|
tokenCount++;
|
|
}
|
|
}
|
|
|
|
return tokenCount;
|
|
} catch (error) {
|
|
log.error(`Error populating tokens for note ${noteId}: ${error}`);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Populates tokens for multiple notes affected by blob operations
|
|
* This handles cases where blob triggers can affect multiple notes
|
|
*/
|
|
function populateBlobAffectedTokens(blobId: string): void {
|
|
try {
|
|
// Find all notes that reference this blob and need token updates
|
|
const affectedNoteIds = sql.getColumn<string>(`
|
|
SELECT DISTINCT n.noteId
|
|
FROM notes n
|
|
INNER JOIN note_search_content nsc ON n.noteId = nsc.noteId
|
|
WHERE n.blobId = ?
|
|
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
AND n.isDeleted = 0
|
|
AND n.isProtected = 0
|
|
`, [blobId]);
|
|
|
|
if (affectedNoteIds.length === 0) return;
|
|
|
|
log.info(`Updating tokens for ${affectedNoteIds.length} notes affected by blob ${blobId}`);
|
|
|
|
let totalTokens = 0;
|
|
for (const noteId of affectedNoteIds) {
|
|
const tokenCount = populateNoteTokens(noteId);
|
|
totalTokens += tokenCount;
|
|
}
|
|
|
|
log.info(`Updated ${totalTokens} tokens for blob-affected notes`);
|
|
} catch (error) {
|
|
log.error(`Error populating blob-affected tokens for blob ${blobId}: ${error}`);
|
|
}
|
|
}
|
|
|
|
function populateAllTokens() {
|
|
log.info("Populating tokens for all search content...");
|
|
|
|
// Clear existing tokens first to ensure clean state
|
|
sql.execute("DELETE FROM note_tokens");
|
|
|
|
const batchSize = 100;
|
|
let offset = 0;
|
|
let totalProcessed = 0;
|
|
let totalTokens = 0;
|
|
|
|
while (true) {
|
|
const notes = sql.getRows<{
|
|
noteId: string;
|
|
title: string;
|
|
content: string;
|
|
}>(`
|
|
SELECT noteId, title, content
|
|
FROM note_search_content
|
|
ORDER BY noteId
|
|
LIMIT ? OFFSET ?
|
|
`, [batchSize, offset]);
|
|
|
|
if (notes.length === 0) {
|
|
break;
|
|
}
|
|
|
|
for (const note of notes) {
|
|
try {
|
|
// Tokenize title and content
|
|
const titleTokens = tokenize(note.title);
|
|
const contentTokens = tokenize(note.content);
|
|
|
|
let position = 0;
|
|
|
|
// Insert title tokens
|
|
for (const token of titleTokens) {
|
|
if (token.length > 0) {
|
|
sql.execute(`
|
|
INSERT OR IGNORE INTO note_tokens
|
|
(noteId, token, token_normalized, position, source)
|
|
VALUES (?, ?, ?, ?, 'title')
|
|
`, [note.noteId, token, normalizeText(token), position]);
|
|
position++;
|
|
totalTokens++;
|
|
}
|
|
}
|
|
|
|
// Insert content tokens with continuous position numbering
|
|
for (const token of contentTokens) {
|
|
if (token.length > 0) {
|
|
sql.execute(`
|
|
INSERT OR IGNORE INTO note_tokens
|
|
(noteId, token, token_normalized, position, source)
|
|
VALUES (?, ?, ?, ?, 'content')
|
|
`, [note.noteId, token, normalizeText(token), position]);
|
|
position++;
|
|
totalTokens++;
|
|
}
|
|
}
|
|
|
|
totalProcessed++;
|
|
|
|
} catch (error) {
|
|
log.error(`Failed to tokenize note ${note.noteId}: ${error}`);
|
|
}
|
|
}
|
|
|
|
offset += batchSize;
|
|
|
|
if (totalProcessed % 1000 === 0) {
|
|
log.info(`Processed ${totalProcessed} notes, ${totalTokens} tokens so far...`);
|
|
}
|
|
}
|
|
|
|
log.info(`Token population completed: ${totalProcessed} notes processed, ${totalTokens} total tokens`);
|
|
}
|
|
|
|
function finalizeSearchSetup() {
|
|
log.info("Running final verification and optimization...");
|
|
|
|
// Check for missing notes that should be indexed
|
|
const missingCount = sql.getValue<number>(`
|
|
SELECT COUNT(*) FROM notes n
|
|
LEFT JOIN blobs b ON n.blobId = b.blobId
|
|
WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
AND n.isDeleted = 0
|
|
AND n.isProtected = 0
|
|
AND b.content IS NOT NULL
|
|
AND NOT EXISTS (SELECT 1 FROM note_search_content WHERE noteId = n.noteId)
|
|
`) || 0;
|
|
|
|
if (missingCount > 0) {
|
|
log.info(`Found ${missingCount} notes that are missing from search index`);
|
|
|
|
// Index missing notes using basic SQLite functions
|
|
sql.execute(`
|
|
INSERT INTO note_search_content
|
|
(noteId, title, content, title_normalized, content_normalized, full_text_normalized)
|
|
SELECT
|
|
n.noteId,
|
|
n.title,
|
|
COALESCE(b.content, ''),
|
|
LOWER(n.title),
|
|
LOWER(COALESCE(b.content, '')),
|
|
LOWER(n.title || ' ' || COALESCE(b.content, ''))
|
|
FROM notes n
|
|
LEFT JOIN blobs b ON n.blobId = b.blobId
|
|
WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
AND n.isDeleted = 0
|
|
AND n.isProtected = 0
|
|
AND b.content IS NOT NULL
|
|
AND NOT EXISTS (SELECT 1 FROM note_search_content WHERE noteId = n.noteId)
|
|
`);
|
|
|
|
log.info(`Indexed ${missingCount} missing notes`);
|
|
}
|
|
|
|
// Populate tokens for all existing content (including any missing notes we just added)
|
|
populateAllTokens();
|
|
|
|
// Verify table creation
|
|
const tables = sql.getColumn<string>(`
|
|
SELECT name FROM sqlite_master
|
|
WHERE type = 'table'
|
|
AND name IN ('note_search_content', 'note_tokens')
|
|
`);
|
|
|
|
if (tables.length !== 2) {
|
|
throw new Error("Search tables were not created properly");
|
|
}
|
|
|
|
// Check row counts
|
|
const searchContentCount = sql.getValue<number>("SELECT COUNT(*) FROM note_search_content") || 0;
|
|
const tokenCount = sql.getValue<number>("SELECT COUNT(*) FROM note_tokens") || 0;
|
|
|
|
log.info(`Search content table has ${searchContentCount} entries`);
|
|
log.info(`Token table has ${tokenCount} entries`);
|
|
|
|
// Run ANALYZE to update SQLite query planner statistics
|
|
log.info("Updating SQLite statistics for query optimization...");
|
|
sql.execute("ANALYZE note_search_content");
|
|
sql.execute("ANALYZE note_tokens");
|
|
|
|
// Verify indexes were created
|
|
const indexes = sql.getColumn<string>(`
|
|
SELECT name FROM sqlite_master
|
|
WHERE type = 'index'
|
|
AND tbl_name IN ('note_search_content', 'note_tokens')
|
|
`);
|
|
|
|
log.info(`Created ${indexes.length} indexes for search optimization`);
|
|
|
|
log.info("Search setup finalization completed");
|
|
} |