mirror of
				https://github.com/zadam/trilium.git
				synced 2025-11-03 20:06:08 +01:00 
			
		
		
		
	create embedding services
This commit is contained in:
		@@ -11,7 +11,7 @@ export abstract class BaseAIService implements AIService {
 | 
			
		||||
    abstract generateChatCompletion(messages: Message[], options?: ChatCompletionOptions): Promise<ChatResponse>;
 | 
			
		||||
 | 
			
		||||
    isAvailable(): boolean {
 | 
			
		||||
        return options.getOption('aiEnabled') === 'true'; // Base check if AI is enabled globally
 | 
			
		||||
        return options.getOptionBool('aiEnabled'); // Base check if AI is enabled globally
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    getName(): string {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										299
									
								
								src/services/llm/embeddings/providers.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										299
									
								
								src/services/llm/embeddings/providers.ts
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,299 @@
 | 
			
		||||
import options from "../../options.js";
 | 
			
		||||
import log from "../../log.js";
 | 
			
		||||
import sql from "../../sql.js";
 | 
			
		||||
import dateUtils from "../../date_utils.js";
 | 
			
		||||
import { randomString } from "../../utils.js";
 | 
			
		||||
import type { EmbeddingProvider, EmbeddingConfig } from "./embeddings_interface.js";
 | 
			
		||||
import { OpenAIEmbeddingProvider } from "./providers/openai.js";
 | 
			
		||||
import { OllamaEmbeddingProvider } from "./providers/ollama.js";
 | 
			
		||||
import { AnthropicEmbeddingProvider } from "./providers/anthropic.js";
 | 
			
		||||
import { LocalEmbeddingProvider } from "./providers/local.js";
 | 
			
		||||
 | 
			
		||||
const providers = new Map<string, EmbeddingProvider>();
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Register a new embedding provider
 | 
			
		||||
 */
 | 
			
		||||
export function registerEmbeddingProvider(provider: EmbeddingProvider) {
 | 
			
		||||
    providers.set(provider.name, provider);
 | 
			
		||||
    log.info(`Registered embedding provider: ${provider.name}`);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Get all registered embedding providers
 | 
			
		||||
 */
 | 
			
		||||
export function getEmbeddingProviders(): EmbeddingProvider[] {
 | 
			
		||||
    return Array.from(providers.values());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Get a specific embedding provider by name
 | 
			
		||||
 */
 | 
			
		||||
export function getEmbeddingProvider(name: string): EmbeddingProvider | undefined {
 | 
			
		||||
    return providers.get(name);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Get all enabled embedding providers
 | 
			
		||||
 */
 | 
			
		||||
export async function getEnabledEmbeddingProviders(): Promise<EmbeddingProvider[]> {
 | 
			
		||||
    if (!(await options.getOptionBool('aiEnabled'))) {
 | 
			
		||||
        return [];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Get enabled providers from database
 | 
			
		||||
    const enabledProviders = await sql.getRows(`
 | 
			
		||||
        SELECT providerId, name, config
 | 
			
		||||
        FROM embedding_providers
 | 
			
		||||
        WHERE isEnabled = 1
 | 
			
		||||
        ORDER BY priority DESC`
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    const result: EmbeddingProvider[] = [];
 | 
			
		||||
 | 
			
		||||
    for (const row of enabledProviders) {
 | 
			
		||||
        const rowData = row as any;
 | 
			
		||||
        const provider = providers.get(rowData.name);
 | 
			
		||||
 | 
			
		||||
        if (provider) {
 | 
			
		||||
            result.push(provider);
 | 
			
		||||
        } else {
 | 
			
		||||
            // Use error instead of warn if warn is not available
 | 
			
		||||
            log.error(`Enabled embedding provider ${rowData.name} not found in registered providers`);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Create a new embedding provider configuration in the database
 | 
			
		||||
 */
 | 
			
		||||
export async function createEmbeddingProviderConfig(
 | 
			
		||||
    name: string,
 | 
			
		||||
    config: EmbeddingConfig,
 | 
			
		||||
    isEnabled = false,
 | 
			
		||||
    priority = 0
 | 
			
		||||
): Promise<string> {
 | 
			
		||||
    const providerId = randomString(16);
 | 
			
		||||
    const now = dateUtils.localNowDateTime();
 | 
			
		||||
    const utcNow = dateUtils.utcNowDateTime();
 | 
			
		||||
 | 
			
		||||
    await sql.execute(`
 | 
			
		||||
        INSERT INTO embedding_providers
 | 
			
		||||
        (providerId, name, isEnabled, priority, config,
 | 
			
		||||
         dateCreated, utcDateCreated, dateModified, utcDateModified)
 | 
			
		||||
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
 | 
			
		||||
        [providerId, name, isEnabled ? 1 : 0, priority, JSON.stringify(config),
 | 
			
		||||
         now, utcNow, now, utcNow]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    return providerId;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Update an existing embedding provider configuration
 | 
			
		||||
 */
 | 
			
		||||
export async function updateEmbeddingProviderConfig(
 | 
			
		||||
    providerId: string,
 | 
			
		||||
    isEnabled?: boolean,
 | 
			
		||||
    priority?: number,
 | 
			
		||||
    config?: EmbeddingConfig
 | 
			
		||||
): Promise<boolean> {
 | 
			
		||||
    const now = dateUtils.localNowDateTime();
 | 
			
		||||
    const utcNow = dateUtils.utcNowDateTime();
 | 
			
		||||
 | 
			
		||||
    // Get existing provider
 | 
			
		||||
    const provider = await sql.getRow(
 | 
			
		||||
        "SELECT * FROM embedding_providers WHERE providerId = ?",
 | 
			
		||||
        [providerId]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    if (!provider) {
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Build update query parts
 | 
			
		||||
    const updates = [];
 | 
			
		||||
    const params: any[] = [];
 | 
			
		||||
 | 
			
		||||
    if (isEnabled !== undefined) {
 | 
			
		||||
        updates.push("isEnabled = ?");
 | 
			
		||||
        params.push(isEnabled ? 1 : 0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (priority !== undefined) {
 | 
			
		||||
        updates.push("priority = ?");
 | 
			
		||||
        params.push(priority);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (config) {
 | 
			
		||||
        updates.push("config = ?");
 | 
			
		||||
        params.push(JSON.stringify(config));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (updates.length === 0) {
 | 
			
		||||
        return true; // Nothing to update
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    updates.push("dateModified = ?");
 | 
			
		||||
    updates.push("utcDateModified = ?");
 | 
			
		||||
    params.push(now, utcNow);
 | 
			
		||||
 | 
			
		||||
    params.push(providerId);
 | 
			
		||||
 | 
			
		||||
    // Execute update
 | 
			
		||||
    await sql.execute(
 | 
			
		||||
        `UPDATE embedding_providers SET ${updates.join(", ")} WHERE providerId = ?`,
 | 
			
		||||
        params
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Delete an embedding provider configuration
 | 
			
		||||
 */
 | 
			
		||||
export async function deleteEmbeddingProviderConfig(providerId: string): Promise<boolean> {
 | 
			
		||||
    const result = await sql.execute(
 | 
			
		||||
        "DELETE FROM embedding_providers WHERE providerId = ?",
 | 
			
		||||
        [providerId]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    return result.changes > 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Get all embedding provider configurations from the database
 | 
			
		||||
 */
 | 
			
		||||
export async function getEmbeddingProviderConfigs() {
 | 
			
		||||
    return await sql.getRows("SELECT * FROM embedding_providers ORDER BY priority DESC");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Initialize the default embedding providers
 | 
			
		||||
 */
 | 
			
		||||
export async function initializeDefaultProviders() {
 | 
			
		||||
    // Register built-in providers
 | 
			
		||||
    try {
 | 
			
		||||
        // Register OpenAI provider if API key is configured
 | 
			
		||||
        const openaiApiKey = await options.getOption('openaiApiKey');
 | 
			
		||||
        if (openaiApiKey) {
 | 
			
		||||
            const openaiModel = await options.getOption('openaiDefaultModel') || 'text-embedding-3-small';
 | 
			
		||||
            const openaiBaseUrl = await options.getOption('openaiBaseUrl') || 'https://api.openai.com/v1';
 | 
			
		||||
 | 
			
		||||
            registerEmbeddingProvider(new OpenAIEmbeddingProvider({
 | 
			
		||||
                model: openaiModel,
 | 
			
		||||
                dimension: 1536, // OpenAI's typical dimension
 | 
			
		||||
                type: 'float32',
 | 
			
		||||
                apiKey: openaiApiKey,
 | 
			
		||||
                baseUrl: openaiBaseUrl
 | 
			
		||||
            }));
 | 
			
		||||
 | 
			
		||||
            // Create OpenAI provider config if it doesn't exist
 | 
			
		||||
            const existingOpenAI = await sql.getRow(
 | 
			
		||||
                "SELECT * FROM embedding_providers WHERE name = ?",
 | 
			
		||||
                ['openai']
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            if (!existingOpenAI) {
 | 
			
		||||
                await createEmbeddingProviderConfig('openai', {
 | 
			
		||||
                    model: openaiModel,
 | 
			
		||||
                    dimension: 1536,
 | 
			
		||||
                    type: 'float32'
 | 
			
		||||
                }, true, 100);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Register Anthropic provider if API key is configured
 | 
			
		||||
        const anthropicApiKey = await options.getOption('anthropicApiKey');
 | 
			
		||||
        if (anthropicApiKey) {
 | 
			
		||||
            const anthropicModel = await options.getOption('anthropicDefaultModel') || 'claude-3-haiku-20240307';
 | 
			
		||||
            const anthropicBaseUrl = await options.getOption('anthropicBaseUrl') || 'https://api.anthropic.com/v1';
 | 
			
		||||
 | 
			
		||||
            registerEmbeddingProvider(new AnthropicEmbeddingProvider({
 | 
			
		||||
                model: anthropicModel,
 | 
			
		||||
                dimension: 1024, // Anthropic's embedding dimension
 | 
			
		||||
                type: 'float32',
 | 
			
		||||
                apiKey: anthropicApiKey,
 | 
			
		||||
                baseUrl: anthropicBaseUrl
 | 
			
		||||
            }));
 | 
			
		||||
 | 
			
		||||
            // Create Anthropic provider config if it doesn't exist
 | 
			
		||||
            const existingAnthropic = await sql.getRow(
 | 
			
		||||
                "SELECT * FROM embedding_providers WHERE name = ?",
 | 
			
		||||
                ['anthropic']
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            if (!existingAnthropic) {
 | 
			
		||||
                await createEmbeddingProviderConfig('anthropic', {
 | 
			
		||||
                    model: anthropicModel,
 | 
			
		||||
                    dimension: 1024,
 | 
			
		||||
                    type: 'float32'
 | 
			
		||||
                }, true, 75);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Register Ollama provider if enabled
 | 
			
		||||
        if (await options.getOptionBool('ollamaEnabled')) {
 | 
			
		||||
            const ollamaModel = await options.getOption('ollamaDefaultModel') || 'llama3';
 | 
			
		||||
            const ollamaBaseUrl = await options.getOption('ollamaBaseUrl') || 'http://localhost:11434';
 | 
			
		||||
 | 
			
		||||
            registerEmbeddingProvider(new OllamaEmbeddingProvider({
 | 
			
		||||
                model: ollamaModel,
 | 
			
		||||
                dimension: 4096, // Typical for Ollama models
 | 
			
		||||
                type: 'float32',
 | 
			
		||||
                baseUrl: ollamaBaseUrl
 | 
			
		||||
            }));
 | 
			
		||||
 | 
			
		||||
            // Create Ollama provider config if it doesn't exist
 | 
			
		||||
            const existingOllama = await sql.getRow(
 | 
			
		||||
                "SELECT * FROM embedding_providers WHERE name = ?",
 | 
			
		||||
                ['ollama']
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            if (!existingOllama) {
 | 
			
		||||
                await createEmbeddingProviderConfig('ollama', {
 | 
			
		||||
                    model: ollamaModel,
 | 
			
		||||
                    dimension: 4096,
 | 
			
		||||
                    type: 'float32'
 | 
			
		||||
                }, true, 50);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Always register local provider as fallback
 | 
			
		||||
        registerEmbeddingProvider(new LocalEmbeddingProvider({
 | 
			
		||||
            model: 'local',
 | 
			
		||||
            dimension: 384,
 | 
			
		||||
            type: 'float32'
 | 
			
		||||
        }));
 | 
			
		||||
 | 
			
		||||
        // Create local provider config if it doesn't exist
 | 
			
		||||
        const existingLocal = await sql.getRow(
 | 
			
		||||
            "SELECT * FROM embedding_providers WHERE name = ?",
 | 
			
		||||
            ['local']
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        if (!existingLocal) {
 | 
			
		||||
            await createEmbeddingProviderConfig('local', {
 | 
			
		||||
                model: 'local',
 | 
			
		||||
                dimension: 384,
 | 
			
		||||
                type: 'float32'
 | 
			
		||||
            }, true, 10);
 | 
			
		||||
        }
 | 
			
		||||
    } catch (error: any) {
 | 
			
		||||
        log.error(`Error initializing default embedding providers: ${error.message || 'Unknown error'}`);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export default {
 | 
			
		||||
    registerEmbeddingProvider,
 | 
			
		||||
    getEmbeddingProviders,
 | 
			
		||||
    getEmbeddingProvider,
 | 
			
		||||
    getEnabledEmbeddingProviders,
 | 
			
		||||
    createEmbeddingProviderConfig,
 | 
			
		||||
    updateEmbeddingProviderConfig,
 | 
			
		||||
    deleteEmbeddingProviderConfig,
 | 
			
		||||
    getEmbeddingProviderConfigs,
 | 
			
		||||
    initializeDefaultProviders
 | 
			
		||||
};
 | 
			
		||||
							
								
								
									
										78
									
								
								src/services/llm/embeddings/providers/anthropic.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								src/services/llm/embeddings/providers/anthropic.ts
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,78 @@
 | 
			
		||||
import { BaseEmbeddingProvider } from "../base_embeddings.js";
 | 
			
		||||
import type { EmbeddingConfig } from "../embeddings_interface.js";
 | 
			
		||||
import axios from "axios";
 | 
			
		||||
import log from "../../../log.js";
 | 
			
		||||
 | 
			
		||||
interface AnthropicEmbeddingConfig extends EmbeddingConfig {
 | 
			
		||||
    apiKey: string;
 | 
			
		||||
    baseUrl: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Anthropic (Claude) embedding provider implementation
 | 
			
		||||
 */
 | 
			
		||||
export class AnthropicEmbeddingProvider extends BaseEmbeddingProvider {
 | 
			
		||||
    name = "anthropic";
 | 
			
		||||
    private apiKey: string;
 | 
			
		||||
    private baseUrl: string;
 | 
			
		||||
 | 
			
		||||
    constructor(config: AnthropicEmbeddingConfig) {
 | 
			
		||||
        super(config);
 | 
			
		||||
        this.apiKey = config.apiKey;
 | 
			
		||||
        this.baseUrl = config.baseUrl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Generate embeddings for a single text
 | 
			
		||||
     */
 | 
			
		||||
    async generateEmbeddings(text: string): Promise<Float32Array> {
 | 
			
		||||
        try {
 | 
			
		||||
            const response = await axios.post(
 | 
			
		||||
                `${this.baseUrl}/embeddings`,
 | 
			
		||||
                {
 | 
			
		||||
                    model: this.config.model || "claude-3-haiku-20240307",
 | 
			
		||||
                    input: text,
 | 
			
		||||
                    encoding_format: "float"
 | 
			
		||||
                },
 | 
			
		||||
                {
 | 
			
		||||
                    headers: {
 | 
			
		||||
                        "Content-Type": "application/json",
 | 
			
		||||
                        "x-api-key": this.apiKey,
 | 
			
		||||
                        "anthropic-version": "2023-06-01"
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            if (response.data && response.data.embedding) {
 | 
			
		||||
                const embedding = response.data.embedding;
 | 
			
		||||
                return new Float32Array(embedding);
 | 
			
		||||
            } else {
 | 
			
		||||
                throw new Error("Unexpected response structure from Anthropic API");
 | 
			
		||||
            }
 | 
			
		||||
        } catch (error: any) {
 | 
			
		||||
            const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
 | 
			
		||||
            log.error(`Anthropic embedding error: ${errorMessage}`);
 | 
			
		||||
            throw new Error(`Anthropic embedding error: ${errorMessage}`);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Generate embeddings for multiple texts in a single batch
 | 
			
		||||
     *
 | 
			
		||||
     * Note: Anthropic doesn't currently support batch embedding, so we process each text individually
 | 
			
		||||
     */
 | 
			
		||||
    async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> {
 | 
			
		||||
        if (texts.length === 0) {
 | 
			
		||||
            return [];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        const results: Float32Array[] = [];
 | 
			
		||||
 | 
			
		||||
        for (const text of texts) {
 | 
			
		||||
            const embedding = await this.generateEmbeddings(text);
 | 
			
		||||
            results.push(embedding);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return results;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										73
									
								
								src/services/llm/embeddings/providers/local.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								src/services/llm/embeddings/providers/local.ts
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,73 @@
 | 
			
		||||
import { BaseEmbeddingProvider } from "../base_embeddings.js";
 | 
			
		||||
import type { EmbeddingConfig } from "../embeddings_interface.js";
 | 
			
		||||
import crypto from "crypto";
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Local embedding provider implementation
 | 
			
		||||
 *
 | 
			
		||||
 * This is a fallback provider that generates simple deterministic embeddings
 | 
			
		||||
 * using cryptographic hashing. These are not semantic vectors but can be used
 | 
			
		||||
 * for exact matches when no other providers are available.
 | 
			
		||||
 */
 | 
			
		||||
export class LocalEmbeddingProvider extends BaseEmbeddingProvider {
 | 
			
		||||
    name = "local";
 | 
			
		||||
 | 
			
		||||
    constructor(config: EmbeddingConfig) {
 | 
			
		||||
        super(config);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Generate a simple embedding by hashing the text
 | 
			
		||||
     */
 | 
			
		||||
    async generateEmbeddings(text: string): Promise<Float32Array> {
 | 
			
		||||
        const dimension = this.config.dimension || 384;
 | 
			
		||||
        const result = new Float32Array(dimension);
 | 
			
		||||
 | 
			
		||||
        // Generate a hash of the input text
 | 
			
		||||
        const hash = crypto.createHash('sha256').update(text).digest();
 | 
			
		||||
 | 
			
		||||
        // Use the hash to seed a deterministic PRNG
 | 
			
		||||
        let seed = 0;
 | 
			
		||||
        for (let i = 0; i < hash.length; i += 4) {
 | 
			
		||||
            seed = (seed * 65536 + hash.readUInt32LE(i % (hash.length - 3))) >>> 0;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Generate pseudo-random but deterministic values for the embedding
 | 
			
		||||
        for (let i = 0; i < dimension; i++) {
 | 
			
		||||
            // Generate next pseudo-random number
 | 
			
		||||
            seed = (seed * 1664525 + 1013904223) >>> 0;
 | 
			
		||||
 | 
			
		||||
            // Convert to a float between -1 and 1
 | 
			
		||||
            result[i] = (seed / 2147483648) - 1;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Normalize the vector
 | 
			
		||||
        let magnitude = 0;
 | 
			
		||||
        for (let i = 0; i < dimension; i++) {
 | 
			
		||||
            magnitude += result[i] * result[i];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        magnitude = Math.sqrt(magnitude);
 | 
			
		||||
        if (magnitude > 0) {
 | 
			
		||||
            for (let i = 0; i < dimension; i++) {
 | 
			
		||||
                result[i] /= magnitude;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return result;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Generate embeddings for multiple texts
 | 
			
		||||
     */
 | 
			
		||||
    async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> {
 | 
			
		||||
        const results: Float32Array[] = [];
 | 
			
		||||
 | 
			
		||||
        for (const text of texts) {
 | 
			
		||||
            const embedding = await this.generateEmbeddings(text);
 | 
			
		||||
            results.push(embedding);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return results;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										77
									
								
								src/services/llm/embeddings/providers/ollama.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								src/services/llm/embeddings/providers/ollama.ts
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,77 @@
 | 
			
		||||
import { BaseEmbeddingProvider } from "../base_embeddings.js";
 | 
			
		||||
import type { EmbeddingConfig } from "../embeddings_interface.js";
 | 
			
		||||
import axios from "axios";
 | 
			
		||||
import log from "../../../log.js";
 | 
			
		||||
 | 
			
		||||
interface OllamaEmbeddingConfig extends EmbeddingConfig {
 | 
			
		||||
    baseUrl: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Ollama embedding provider implementation
 | 
			
		||||
 */
 | 
			
		||||
export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
 | 
			
		||||
    name = "ollama";
 | 
			
		||||
    private baseUrl: string;
 | 
			
		||||
 | 
			
		||||
    constructor(config: OllamaEmbeddingConfig) {
 | 
			
		||||
        super(config);
 | 
			
		||||
        this.baseUrl = config.baseUrl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Generate embeddings for a single text
 | 
			
		||||
     */
 | 
			
		||||
    async generateEmbeddings(text: string): Promise<Float32Array> {
 | 
			
		||||
        try {
 | 
			
		||||
            const response = await axios.post(
 | 
			
		||||
                `${this.baseUrl}/api/embeddings`,
 | 
			
		||||
                {
 | 
			
		||||
                    model: this.config.model || "llama3",
 | 
			
		||||
                    prompt: text
 | 
			
		||||
                },
 | 
			
		||||
                {
 | 
			
		||||
                    headers: {
 | 
			
		||||
                        "Content-Type": "application/json"
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            if (response.data && Array.isArray(response.data.embedding)) {
 | 
			
		||||
                return new Float32Array(response.data.embedding);
 | 
			
		||||
            } else {
 | 
			
		||||
                throw new Error("Unexpected response structure from Ollama API");
 | 
			
		||||
            }
 | 
			
		||||
        } catch (error: any) {
 | 
			
		||||
            const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
 | 
			
		||||
            log.error(`Ollama embedding error: ${errorMessage}`);
 | 
			
		||||
            throw new Error(`Ollama embedding error: ${errorMessage}`);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Generate embeddings for multiple texts
 | 
			
		||||
     *
 | 
			
		||||
     * Note: Ollama API doesn't support batch embedding, so we process them sequentially
 | 
			
		||||
     */
 | 
			
		||||
    async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> {
 | 
			
		||||
        if (texts.length === 0) {
 | 
			
		||||
            return [];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        const results: Float32Array[] = [];
 | 
			
		||||
 | 
			
		||||
        for (const text of texts) {
 | 
			
		||||
            try {
 | 
			
		||||
                const embedding = await this.generateEmbeddings(text);
 | 
			
		||||
                results.push(embedding);
 | 
			
		||||
            } catch (error: any) {
 | 
			
		||||
                const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
 | 
			
		||||
                log.error(`Ollama batch embedding error: ${errorMessage}`);
 | 
			
		||||
                throw new Error(`Ollama batch embedding error: ${errorMessage}`);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return results;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										107
									
								
								src/services/llm/embeddings/providers/openai.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								src/services/llm/embeddings/providers/openai.ts
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,107 @@
 | 
			
		||||
import { BaseEmbeddingProvider } from "../base_embeddings.js";
 | 
			
		||||
import type { EmbeddingConfig } from "../embeddings_interface.js";
 | 
			
		||||
import axios from "axios";
 | 
			
		||||
import log from "../../../log.js";
 | 
			
		||||
 | 
			
		||||
interface OpenAIEmbeddingConfig extends EmbeddingConfig {
 | 
			
		||||
    apiKey: string;
 | 
			
		||||
    baseUrl: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * OpenAI embedding provider implementation
 | 
			
		||||
 */
 | 
			
		||||
export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider {
 | 
			
		||||
    name = "openai";
 | 
			
		||||
    private apiKey: string;
 | 
			
		||||
    private baseUrl: string;
 | 
			
		||||
 | 
			
		||||
    constructor(config: OpenAIEmbeddingConfig) {
 | 
			
		||||
        super(config);
 | 
			
		||||
        this.apiKey = config.apiKey;
 | 
			
		||||
        this.baseUrl = config.baseUrl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Generate embeddings for a single text
 | 
			
		||||
     */
 | 
			
		||||
    async generateEmbeddings(text: string): Promise<Float32Array> {
 | 
			
		||||
        try {
 | 
			
		||||
            const response = await axios.post(
 | 
			
		||||
                `${this.baseUrl}/embeddings`,
 | 
			
		||||
                {
 | 
			
		||||
                    input: text,
 | 
			
		||||
                    model: this.config.model || "text-embedding-3-small",
 | 
			
		||||
                    encoding_format: "float"
 | 
			
		||||
                },
 | 
			
		||||
                {
 | 
			
		||||
                    headers: {
 | 
			
		||||
                        "Content-Type": "application/json",
 | 
			
		||||
                        "Authorization": `Bearer ${this.apiKey}`
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            if (response.data && response.data.data && response.data.data[0] && response.data.data[0].embedding) {
 | 
			
		||||
                const embedding = response.data.data[0].embedding;
 | 
			
		||||
                return new Float32Array(embedding);
 | 
			
		||||
            } else {
 | 
			
		||||
                throw new Error("Unexpected response structure from OpenAI API");
 | 
			
		||||
            }
 | 
			
		||||
        } catch (error: any) {
 | 
			
		||||
            const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
 | 
			
		||||
            log.error(`OpenAI embedding error: ${errorMessage}`);
 | 
			
		||||
            throw new Error(`OpenAI embedding error: ${errorMessage}`);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Generate embeddings for multiple texts in a single batch
 | 
			
		||||
     */
 | 
			
		||||
    async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> {
 | 
			
		||||
        if (texts.length === 0) {
 | 
			
		||||
            return [];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        const batchSize = this.config.batchSize || 10;
 | 
			
		||||
        const results: Float32Array[] = [];
 | 
			
		||||
 | 
			
		||||
        // Process in batches to avoid API limits
 | 
			
		||||
        for (let i = 0; i < texts.length; i += batchSize) {
 | 
			
		||||
            const batch = texts.slice(i, i + batchSize);
 | 
			
		||||
            try {
 | 
			
		||||
                const response = await axios.post(
 | 
			
		||||
                    `${this.baseUrl}/embeddings`,
 | 
			
		||||
                    {
 | 
			
		||||
                        input: batch,
 | 
			
		||||
                        model: this.config.model || "text-embedding-3-small",
 | 
			
		||||
                        encoding_format: "float"
 | 
			
		||||
                    },
 | 
			
		||||
                    {
 | 
			
		||||
                        headers: {
 | 
			
		||||
                            "Content-Type": "application/json",
 | 
			
		||||
                            "Authorization": `Bearer ${this.apiKey}`
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                );
 | 
			
		||||
 | 
			
		||||
                if (response.data && response.data.data) {
 | 
			
		||||
                    // Sort the embeddings by index to ensure they match the input order
 | 
			
		||||
                    const sortedEmbeddings = response.data.data
 | 
			
		||||
                        .sort((a: any, b: any) => a.index - b.index)
 | 
			
		||||
                        .map((item: any) => new Float32Array(item.embedding));
 | 
			
		||||
 | 
			
		||||
                    results.push(...sortedEmbeddings);
 | 
			
		||||
                } else {
 | 
			
		||||
                    throw new Error("Unexpected response structure from OpenAI API");
 | 
			
		||||
                }
 | 
			
		||||
            } catch (error: any) {
 | 
			
		||||
                const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
 | 
			
		||||
                log.error(`OpenAI batch embedding error: ${errorMessage}`);
 | 
			
		||||
                throw new Error(`OpenAI batch embedding error: ${errorMessage}`);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return results;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										483
									
								
								src/services/llm/embeddings/vector_store.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										483
									
								
								src/services/llm/embeddings/vector_store.ts
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,483 @@
 | 
			
		||||
import sql from "../../sql.js";
 | 
			
		||||
import { randomString } from "../../utils.js";
 | 
			
		||||
import options from "../../options.js";
 | 
			
		||||
import dateUtils from "../../date_utils.js";
 | 
			
		||||
import log from "../../log.js";
 | 
			
		||||
import becca from "../../../becca/becca.js";
 | 
			
		||||
import type { NoteEmbeddingContext } from "./embeddings_interface.js";
 | 
			
		||||
import { getEmbeddingProviders, getEnabledEmbeddingProviders } from "./providers.js";
 | 
			
		||||
 | 
			
		||||
// Type definition for embedding result
 | 
			
		||||
interface EmbeddingResult {
 | 
			
		||||
    embedId: string;
 | 
			
		||||
    noteId: string;
 | 
			
		||||
    providerId: string;
 | 
			
		||||
    modelId: string;
 | 
			
		||||
    dimension: number;
 | 
			
		||||
    embedding: Float32Array;
 | 
			
		||||
    version: number;
 | 
			
		||||
    dateCreated: string;
 | 
			
		||||
    utcDateCreated: string;
 | 
			
		||||
    dateModified: string;
 | 
			
		||||
    utcDateModified: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Type for queue item
 | 
			
		||||
interface QueueItem {
 | 
			
		||||
    noteId: string;
 | 
			
		||||
    operation: string;
 | 
			
		||||
    attempts: number;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Computes the cosine similarity between two vectors
 | 
			
		||||
 */
 | 
			
		||||
export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
 | 
			
		||||
    if (a.length !== b.length) {
 | 
			
		||||
        throw new Error(`Vector dimensions don't match: ${a.length} vs ${b.length}`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let dotProduct = 0;
 | 
			
		||||
    let aMagnitude = 0;
 | 
			
		||||
    let bMagnitude = 0;
 | 
			
		||||
 | 
			
		||||
    for (let i = 0; i < a.length; i++) {
 | 
			
		||||
        dotProduct += a[i] * b[i];
 | 
			
		||||
        aMagnitude += a[i] * a[i];
 | 
			
		||||
        bMagnitude += b[i] * b[i];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    aMagnitude = Math.sqrt(aMagnitude);
 | 
			
		||||
    bMagnitude = Math.sqrt(bMagnitude);
 | 
			
		||||
 | 
			
		||||
    if (aMagnitude === 0 || bMagnitude === 0) {
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return dotProduct / (aMagnitude * bMagnitude);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Converts embedding Float32Array to Buffer for storage in SQLite
 | 
			
		||||
 */
 | 
			
		||||
export function embeddingToBuffer(embedding: Float32Array): Buffer {
 | 
			
		||||
    return Buffer.from(embedding.buffer);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Converts Buffer from SQLite back to Float32Array
 | 
			
		||||
 */
 | 
			
		||||
export function bufferToEmbedding(buffer: Buffer, dimension: number): Float32Array {
 | 
			
		||||
    return new Float32Array(buffer.buffer, buffer.byteOffset, dimension);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Creates or updates an embedding for a note
 | 
			
		||||
 */
 | 
			
		||||
export async function storeNoteEmbedding(
 | 
			
		||||
    noteId: string,
 | 
			
		||||
    providerId: string,
 | 
			
		||||
    modelId: string,
 | 
			
		||||
    embedding: Float32Array
 | 
			
		||||
): Promise<string> {
 | 
			
		||||
    const dimension = embedding.length;
 | 
			
		||||
    const embeddingBlob = embeddingToBuffer(embedding);
 | 
			
		||||
    const now = dateUtils.localNowDateTime();
 | 
			
		||||
    const utcNow = dateUtils.utcNowDateTime();
 | 
			
		||||
 | 
			
		||||
    // Check if an embedding already exists for this note and provider/model
 | 
			
		||||
    const existingEmbed = await getEmbeddingForNote(noteId, providerId, modelId);
 | 
			
		||||
 | 
			
		||||
    if (existingEmbed) {
 | 
			
		||||
        // Update existing embedding
 | 
			
		||||
        await sql.execute(`
 | 
			
		||||
            UPDATE note_embeddings
 | 
			
		||||
            SET embedding = ?, dimension = ?, version = version + 1,
 | 
			
		||||
                dateModified = ?, utcDateModified = ?
 | 
			
		||||
            WHERE embedId = ?`,
 | 
			
		||||
            [embeddingBlob, dimension, now, utcNow, existingEmbed.embedId]
 | 
			
		||||
        );
 | 
			
		||||
        return existingEmbed.embedId;
 | 
			
		||||
    } else {
 | 
			
		||||
        // Create new embedding
 | 
			
		||||
        const embedId = randomString(16);
 | 
			
		||||
        await sql.execute(`
 | 
			
		||||
            INSERT INTO note_embeddings
 | 
			
		||||
            (embedId, noteId, providerId, modelId, dimension, embedding,
 | 
			
		||||
             dateCreated, utcDateCreated, dateModified, utcDateModified)
 | 
			
		||||
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
 | 
			
		||||
            [embedId, noteId, providerId, modelId, dimension, embeddingBlob,
 | 
			
		||||
             now, utcNow, now, utcNow]
 | 
			
		||||
        );
 | 
			
		||||
        return embedId;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Retrieves embedding for a specific note
 | 
			
		||||
 */
 | 
			
		||||
export async function getEmbeddingForNote(noteId: string, providerId: string, modelId: string): Promise<EmbeddingResult | null> {
 | 
			
		||||
    const row = await sql.getRow(`
 | 
			
		||||
        SELECT embedId, noteId, providerId, modelId, dimension, embedding, version,
 | 
			
		||||
               dateCreated, utcDateCreated, dateModified, utcDateModified
 | 
			
		||||
        FROM note_embeddings
 | 
			
		||||
        WHERE noteId = ? AND providerId = ? AND modelId = ?`,
 | 
			
		||||
        [noteId, providerId, modelId]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    if (!row) {
 | 
			
		||||
        return null;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Need to cast row to any as it doesn't have type information
 | 
			
		||||
    const rowData = row as any;
 | 
			
		||||
 | 
			
		||||
    return {
 | 
			
		||||
        ...rowData,
 | 
			
		||||
        embedding: bufferToEmbedding(rowData.embedding, rowData.dimension)
 | 
			
		||||
    };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Finds similar notes based on vector similarity
 | 
			
		||||
 */
 | 
			
		||||
export async function findSimilarNotes(
 | 
			
		||||
    embedding: Float32Array,
 | 
			
		||||
    providerId: string,
 | 
			
		||||
    modelId: string,
 | 
			
		||||
    limit = 10,
 | 
			
		||||
    threshold = 0.7
 | 
			
		||||
): Promise<{noteId: string, similarity: number}[]> {
 | 
			
		||||
    // Get all embeddings for the given provider and model
 | 
			
		||||
    const rows = await sql.getRows(`
 | 
			
		||||
        SELECT embedId, noteId, providerId, modelId, dimension, embedding
 | 
			
		||||
        FROM note_embeddings
 | 
			
		||||
        WHERE providerId = ? AND modelId = ?`,
 | 
			
		||||
        [providerId, modelId]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    if (!rows.length) {
 | 
			
		||||
        return [];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Calculate similarity for each embedding
 | 
			
		||||
    const similarities = rows.map(row => {
 | 
			
		||||
        const rowData = row as any;
 | 
			
		||||
        const rowEmbedding = bufferToEmbedding(rowData.embedding, rowData.dimension);
 | 
			
		||||
        return {
 | 
			
		||||
            noteId: rowData.noteId,
 | 
			
		||||
            similarity: cosineSimilarity(embedding, rowEmbedding)
 | 
			
		||||
        };
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    // Filter by threshold and sort by similarity (descending)
 | 
			
		||||
    return similarities
 | 
			
		||||
        .filter(item => item.similarity >= threshold)
 | 
			
		||||
        .sort((a, b) => b.similarity - a.similarity)
 | 
			
		||||
        .slice(0, limit);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Gets context for a note to be embedded
 | 
			
		||||
 */
 | 
			
		||||
export async function getNoteEmbeddingContext(noteId: string): Promise<NoteEmbeddingContext> {
 | 
			
		||||
    const note = becca.getNote(noteId);
 | 
			
		||||
 | 
			
		||||
    if (!note) {
 | 
			
		||||
        throw new Error(`Note ${noteId} not found`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Get parent note titles
 | 
			
		||||
    const parentNotes = note.getParentNotes();
 | 
			
		||||
    const parentTitles = parentNotes.map(note => note.title);
 | 
			
		||||
 | 
			
		||||
    // Get child note titles
 | 
			
		||||
    const childNotes = note.getChildNotes();
 | 
			
		||||
    const childTitles = childNotes.map(note => note.title);
 | 
			
		||||
 | 
			
		||||
    // Get attributes
 | 
			
		||||
    const attributes = note.getOwnedAttributes().map(attr => ({
 | 
			
		||||
        type: attr.type,
 | 
			
		||||
        name: attr.name,
 | 
			
		||||
        value: attr.value
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    // Get attachments
 | 
			
		||||
    const attachments = note.getAttachments().map(att => ({
 | 
			
		||||
        title: att.title,
 | 
			
		||||
        mime: att.mime
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    // Get content
 | 
			
		||||
    let content = "";
 | 
			
		||||
    if (note.type === 'text') {
 | 
			
		||||
        content = String(await note.getContent());
 | 
			
		||||
    } else if (note.type === 'code') {
 | 
			
		||||
        content = String(await note.getContent());
 | 
			
		||||
    } else if (note.type === 'image' || note.type === 'file') {
 | 
			
		||||
        content = `[${note.type} attachment: ${note.mime}]`;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return {
 | 
			
		||||
        noteId: note.noteId,
 | 
			
		||||
        title: note.title,
 | 
			
		||||
        content: content,
 | 
			
		||||
        type: note.type,
 | 
			
		||||
        mime: note.mime,
 | 
			
		||||
        dateCreated: note.dateCreated || "",
 | 
			
		||||
        dateModified: note.dateModified || "",
 | 
			
		||||
        attributes,
 | 
			
		||||
        parentTitles,
 | 
			
		||||
        childTitles,
 | 
			
		||||
        attachments
 | 
			
		||||
    };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Queues a note for embedding update
 | 
			
		||||
 */
 | 
			
		||||
export async function queueNoteForEmbedding(noteId: string, operation = 'UPDATE') {
 | 
			
		||||
    const now = dateUtils.localNowDateTime();
 | 
			
		||||
    const utcNow = dateUtils.utcNowDateTime();
 | 
			
		||||
 | 
			
		||||
    // Check if note is already in queue
 | 
			
		||||
    const existing = await sql.getValue(
 | 
			
		||||
        "SELECT 1 FROM embedding_queue WHERE noteId = ?",
 | 
			
		||||
        [noteId]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    if (existing) {
 | 
			
		||||
        // Update existing queue entry
 | 
			
		||||
        await sql.execute(`
 | 
			
		||||
            UPDATE embedding_queue
 | 
			
		||||
            SET operation = ?, dateQueued = ?, utcDateQueued = ?, attempts = 0, error = NULL
 | 
			
		||||
            WHERE noteId = ?`,
 | 
			
		||||
            [operation, now, utcNow, noteId]
 | 
			
		||||
        );
 | 
			
		||||
    } else {
 | 
			
		||||
        // Add new queue entry
 | 
			
		||||
        await sql.execute(`
 | 
			
		||||
            INSERT INTO embedding_queue
 | 
			
		||||
            (noteId, operation, dateQueued, utcDateQueued)
 | 
			
		||||
            VALUES (?, ?, ?, ?)`,
 | 
			
		||||
            [noteId, operation, now, utcNow]
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Deletes all embeddings for a note
 | 
			
		||||
 */
 | 
			
		||||
export async function deleteNoteEmbeddings(noteId: string) {
 | 
			
		||||
    await sql.execute(
 | 
			
		||||
        "DELETE FROM note_embeddings WHERE noteId = ?",
 | 
			
		||||
        [noteId]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    // Remove from queue if present
 | 
			
		||||
    await sql.execute(
 | 
			
		||||
        "DELETE FROM embedding_queue WHERE noteId = ?",
 | 
			
		||||
        [noteId]
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Process the embedding queue
 | 
			
		||||
 */
 | 
			
		||||
export async function processEmbeddingQueue() {
 | 
			
		||||
    if (!(await options.getOptionBool('aiEnabled'))) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const batchSize = parseInt(await options.getOption('embeddingBatchSize') || '10', 10);
 | 
			
		||||
    const enabledProviders = await getEnabledEmbeddingProviders();
 | 
			
		||||
 | 
			
		||||
    if (enabledProviders.length === 0) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Get notes from queue
 | 
			
		||||
    const notes = await sql.getRows(`
 | 
			
		||||
        SELECT noteId, operation, attempts
 | 
			
		||||
        FROM embedding_queue
 | 
			
		||||
        ORDER BY priority DESC, utcDateQueued ASC
 | 
			
		||||
        LIMIT ?`,
 | 
			
		||||
        [batchSize]
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    if (notes.length === 0) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for (const note of notes) {
 | 
			
		||||
        try {
 | 
			
		||||
            const noteData = note as unknown as QueueItem;
 | 
			
		||||
 | 
			
		||||
            // Skip if note no longer exists
 | 
			
		||||
            if (!becca.getNote(noteData.noteId)) {
 | 
			
		||||
                await sql.execute(
 | 
			
		||||
                    "DELETE FROM embedding_queue WHERE noteId = ?",
 | 
			
		||||
                    [noteData.noteId]
 | 
			
		||||
                );
 | 
			
		||||
                await deleteNoteEmbeddings(noteData.noteId);
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (noteData.operation === 'DELETE') {
 | 
			
		||||
                await deleteNoteEmbeddings(noteData.noteId);
 | 
			
		||||
                await sql.execute(
 | 
			
		||||
                    "DELETE FROM embedding_queue WHERE noteId = ?",
 | 
			
		||||
                    [noteData.noteId]
 | 
			
		||||
                );
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // Get note context for embedding
 | 
			
		||||
            const context = await getNoteEmbeddingContext(noteData.noteId);
 | 
			
		||||
 | 
			
		||||
            // Process with each enabled provider
 | 
			
		||||
            for (const provider of enabledProviders) {
 | 
			
		||||
                try {
 | 
			
		||||
                    // Generate embedding
 | 
			
		||||
                    const embedding = await provider.generateNoteEmbeddings(context);
 | 
			
		||||
 | 
			
		||||
                    // Store embedding
 | 
			
		||||
                    const config = provider.getConfig();
 | 
			
		||||
                    await storeNoteEmbedding(noteData.noteId, provider.name, config.model, embedding);
 | 
			
		||||
                } catch (providerError: any) {
 | 
			
		||||
                    log.error(`Error generating embedding with provider ${provider.name} for note ${noteData.noteId}: ${providerError.message || 'Unknown error'}`);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // Remove from queue on success
 | 
			
		||||
            await sql.execute(
 | 
			
		||||
                "DELETE FROM embedding_queue WHERE noteId = ?",
 | 
			
		||||
                [noteData.noteId]
 | 
			
		||||
            );
 | 
			
		||||
        } catch (error: any) {
 | 
			
		||||
            const noteData = note as unknown as QueueItem;
 | 
			
		||||
 | 
			
		||||
            // Update attempt count and log error
 | 
			
		||||
            await sql.execute(`
 | 
			
		||||
                UPDATE embedding_queue
 | 
			
		||||
                SET attempts = attempts + 1,
 | 
			
		||||
                    lastAttempt = ?,
 | 
			
		||||
                    error = ?
 | 
			
		||||
                WHERE noteId = ?`,
 | 
			
		||||
                [dateUtils.utcNowDateTime(), error.message || 'Unknown error', noteData.noteId]
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            log.error(`Error processing embedding for note ${noteData.noteId}: ${error.message || 'Unknown error'}`);
 | 
			
		||||
 | 
			
		||||
            // Remove from queue if too many attempts
 | 
			
		||||
            if (noteData.attempts + 1 >= 3) {
 | 
			
		||||
                await sql.execute(
 | 
			
		||||
                    "DELETE FROM embedding_queue WHERE noteId = ?",
 | 
			
		||||
                    [noteData.noteId]
 | 
			
		||||
                );
 | 
			
		||||
                log.error(`Removed note ${noteData.noteId} from embedding queue after multiple failures`);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Setup note event listeners to keep embeddings up to date
 | 
			
		||||
 */
 | 
			
		||||
export function setupEmbeddingEventListeners() {
 | 
			
		||||
    require("../../../becca/entity_events.js").subscribe({
 | 
			
		||||
        entityName: "notes",
 | 
			
		||||
        eventType: "created",
 | 
			
		||||
        handler: (note: { noteId: string }) => queueNoteForEmbedding(note.noteId, 'CREATE')
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    require("../../../becca/entity_events.js").subscribe({
 | 
			
		||||
        entityName: "notes",
 | 
			
		||||
        eventType: "updated",
 | 
			
		||||
        handler: ({entity}: { entity: { noteId: string } }) => queueNoteForEmbedding(entity.noteId, 'UPDATE')
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    require("../../../becca/entity_events.js").subscribe({
 | 
			
		||||
        entityName: "notes",
 | 
			
		||||
        eventType: "deleted",
 | 
			
		||||
        handler: (note: { noteId: string }) => queueNoteForEmbedding(note.noteId, 'DELETE')
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    require("../../../becca/entity_events.js").subscribe({
 | 
			
		||||
        entityName: "attributes",
 | 
			
		||||
        eventType: ["created", "updated", "deleted"],
 | 
			
		||||
        handler: ({entity}: { entity: { noteId: string } }) => queueNoteForEmbedding(entity.noteId, 'UPDATE')
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    require("../../../becca/entity_events.js").subscribe({
 | 
			
		||||
        entityName: "branches",
 | 
			
		||||
        eventType: ["created", "updated", "deleted"],
 | 
			
		||||
        handler: ({entity}: { entity: { noteId: string } }) => queueNoteForEmbedding(entity.noteId, 'UPDATE')
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Setup background processing of the embedding queue
 | 
			
		||||
 */
 | 
			
		||||
export async function setupEmbeddingBackgroundProcessing() {
 | 
			
		||||
    const interval = parseInt(await options.getOption('embeddingUpdateInterval') || '5000', 10);
 | 
			
		||||
 | 
			
		||||
    setInterval(async () => {
 | 
			
		||||
        try {
 | 
			
		||||
            await processEmbeddingQueue();
 | 
			
		||||
        } catch (error: any) {
 | 
			
		||||
            log.error(`Error in background embedding processing: ${error.message || 'Unknown error'}`);
 | 
			
		||||
        }
 | 
			
		||||
    }, interval);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Initialize embeddings system
 | 
			
		||||
 */
 | 
			
		||||
export async function initEmbeddings() {
 | 
			
		||||
    if (await options.getOptionBool('aiEnabled')) {
 | 
			
		||||
        setupEmbeddingEventListeners();
 | 
			
		||||
        await setupEmbeddingBackgroundProcessing();
 | 
			
		||||
        log.info("Embeddings system initialized");
 | 
			
		||||
    } else {
 | 
			
		||||
        log.info("Embeddings system disabled");
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Reprocess all notes to update embeddings
 | 
			
		||||
 */
 | 
			
		||||
export async function reprocessAllNotes() {
 | 
			
		||||
    if (!(await options.getOptionBool('aiEnabled'))) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    log.info("Queueing all notes for embedding updates");
 | 
			
		||||
 | 
			
		||||
    const noteIds = await sql.getColumn(
 | 
			
		||||
        "SELECT noteId FROM notes WHERE isDeleted = 0"
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    log.info(`Adding ${noteIds.length} notes to embedding queue`);
 | 
			
		||||
 | 
			
		||||
    for (const noteId of noteIds) {
 | 
			
		||||
        await queueNoteForEmbedding(noteId as string, 'UPDATE');
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export default {
 | 
			
		||||
    cosineSimilarity,
 | 
			
		||||
    embeddingToBuffer,
 | 
			
		||||
    bufferToEmbedding,
 | 
			
		||||
    storeNoteEmbedding,
 | 
			
		||||
    getEmbeddingForNote,
 | 
			
		||||
    findSimilarNotes,
 | 
			
		||||
    getNoteEmbeddingContext,
 | 
			
		||||
    queueNoteForEmbedding,
 | 
			
		||||
    deleteNoteEmbeddings,
 | 
			
		||||
    processEmbeddingQueue,
 | 
			
		||||
    setupEmbeddingEventListeners,
 | 
			
		||||
    setupEmbeddingBackgroundProcessing,
 | 
			
		||||
    initEmbeddings,
 | 
			
		||||
    reprocessAllNotes
 | 
			
		||||
};
 | 
			
		||||
		Reference in New Issue
	
	Block a user