add Voyage AI as Embedding provider

2025-10-31 18:36:30 +01:00 · 2025-03-17 22:32:00 +00:00
parent 558f6a9802
commit c37201183b
9 changed files with 340 additions and 252 deletions
--- a/src/public/app/widgets/type_widgets/options/ai_settings.ts
+++ b/src/public/app/widgets/type_widgets/options/ai_settings.ts
@@ -201,9 +201,10 @@ export default class AiSettingsWidget extends OptionsWidget {

        <nav class="options-section-tabs">
            <div class="nav nav-tabs" id="nav-tab" role="tablist">
-                <button class="nav-link active" id="nav-openai-tab" data-bs-toggle="tab" data-bs-target="#nav-openai" type="button" role="tab" aria-controls="nav-openai" aria-selected="true">OpenAI</button>
-                <button class="nav-link" id="nav-anthropic-tab" data-bs-toggle="tab" data-bs-target="#nav-anthropic" type="button" role="tab" aria-controls="nav-anthropic" aria-selected="false">Anthropic</button>
-                <button class="nav-link" id="nav-ollama-tab" data-bs-toggle="tab" data-bs-target="#nav-ollama" type="button" role="tab" aria-controls="nav-ollama" aria-selected="false">Ollama</button>
+                <button class="nav-link active" id="nav-openai-tab" data-bs-toggle="tab" data-bs-target="#nav-openai" type="button" role="tab" aria-controls="nav-openai" aria-selected="true">${t("ai_llm.openai_tab")}</button>
+                <button class="nav-link" id="nav-anthropic-tab" data-bs-toggle="tab" data-bs-target="#nav-anthropic" type="button" role="tab" aria-controls="nav-anthropic" aria-selected="false">${t("ai_llm.anthropic_tab")}</button>
+                <button class="nav-link" id="nav-voyage-tab" data-bs-toggle="tab" data-bs-target="#nav-voyage" type="button" role="tab" aria-controls="nav-voyage" aria-selected="false">${t("ai_llm.voyage_tab")}</button>
+                <button class="nav-link" id="nav-ollama-tab" data-bs-toggle="tab" data-bs-target="#nav-ollama" type="button" role="tab" aria-controls="nav-ollama" aria-selected="false">${t("ai_llm.ollama_tab")}</button>
            </div>
        </nav>
        <div class="options-section">
@@ -280,6 +281,29 @@ export default class AiSettingsWidget extends OptionsWidget {
                        </div>
                    </div>
                </div>
+                <div class="tab-pane fade" id="nav-voyage" role="tabpanel" aria-labelledby="nav-voyage-tab">
+                    <div class="card">
+                        <div class="card-header">
+                            <h5>${t("ai_llm.voyage_configuration")}</h5>
+                        </div>
+                        <div class="card-body">
+                            <div class="form-group">
+                                <label>${t("ai_llm.api_key")}</label>
+                                <input type="password" class="voyage-api-key form-control" autocomplete="off">
+                                <div class="form-text">${t("ai_llm.voyage_api_key_description")}</div>
+                            </div>
+
+                            <div class="form-group">
+                                <label>${t("ai_llm.embedding_model")}</label>
+                                <select class="voyage-embedding-model form-control">
+                                    <option value="voyage-2">voyage-2 (recommended)</option>
+                                    <option value="voyage-large-2">voyage-large-2</option>
+                                </select>
+                                <div class="form-text">${t("ai_llm.voyage_embedding_model_description")}</div>
+                            </div>
+                        </div>
+                    </div>
+                </div>
                <div class="tab-pane fade" id="nav-ollama" role="tabpanel" aria-labelledby="nav-ollama-tab">
                    <div class="card">
                        <div class="card-header">
@@ -333,7 +357,7 @@ export default class AiSettingsWidget extends OptionsWidget {
                <label>${t("ai_llm.embedding_default_provider")}</label>
                <select class="embedding-default-provider form-control">
                    <option value="openai">OpenAI</option>
-                    <option value="anthropic">Anthropic</option>
+                    <option value="voyage">Voyage AI</option>
                    <option value="ollama">Ollama</option>
                    <option value="local">Local</option>
                </select>
@@ -366,16 +390,16 @@ export default class AiSettingsWidget extends OptionsWidget {
                                    <span class="bx bx-x"></span>
                                </button>
                            </li>
-                            <li class="standard-list-item d-flex align-items-center" data-provider="ollama">
+                            <li class="standard-list-item d-flex align-items-center" data-provider="voyage">
                                <span class="bx bx-menu handle me-2"></span>
-                                <strong class="flex-grow-1">Ollama</strong>
+                                <strong class="flex-grow-1">Voyage AI</strong>
                                <button class="icon-action remove-provider" title="${t("ai_llm.remove_provider")}">
                                    <span class="bx bx-x"></span>
                                </button>
                            </li>
-                            <li class="standard-list-item d-flex align-items-center" data-provider="anthropic">
+                            <li class="standard-list-item d-flex align-items-center" data-provider="ollama">
                                <span class="bx bx-menu handle me-2"></span>
-                                <strong class="flex-grow-1">Anthropic</strong>
+                                <strong class="flex-grow-1">Ollama</strong>
                                <button class="icon-action remove-provider" title="${t("ai_llm.remove_provider")}">
                                    <span class="bx bx-x"></span>
                                </button>
@@ -560,6 +584,16 @@ export default class AiSettingsWidget extends OptionsWidget {
            await this.updateOption('anthropicBaseUrl', $anthropicBaseUrl.val() as string);
        });

+        const $voyageApiKey = this.$widget.find('.voyage-api-key');
+        $voyageApiKey.on('change', async () => {
+            await this.updateOption('voyageApiKey', $voyageApiKey.val() as string);
+        });
+
+        const $voyageEmbeddingModel = this.$widget.find('.voyage-embedding-model');
+        $voyageEmbeddingModel.on('change', async () => {
+            await this.updateOption('voyageEmbeddingModel', $voyageEmbeddingModel.val() as string);
+        });
+
        const $ollamaBaseUrl = this.$widget.find('.ollama-base-url');
        $ollamaBaseUrl.on('change', async () => {
            await this.updateOption('ollamaBaseUrl', $ollamaBaseUrl.val() as string);
@@ -1001,7 +1035,7 @@ export default class AiSettingsWidget extends OptionsWidget {
            if (!savedValue) return;

            // Get all available providers
-            const allProviders = ['openai', 'anthropic', 'ollama', 'local'];
+            const allProviders = ['openai', 'voyage', 'anthropic', 'ollama', 'local'];
            const savedProviders = savedValue.split(',');

            // Find disabled providers (providers in allProviders but not in savedProviders)
@@ -1153,6 +1187,10 @@ export default class AiSettingsWidget extends OptionsWidget {
        this.$widget.find('.anthropic-default-model').val(options.anthropicDefaultModel || 'claude-3-opus-20240229');
        this.$widget.find('.anthropic-base-url').val(options.anthropicBaseUrl || 'https://api.anthropic.com/v1');

+        // Voyage Section
+        this.$widget.find('.voyage-api-key').val(options.voyageApiKey || '');
+        this.$widget.find('.voyage-embedding-model').val(options.voyageEmbeddingModel || 'voyage-2');
+
        // Ollama Section
        this.$widget.find('.ollama-enabled').prop('checked', options.ollamaEnabled !== 'false');
        this.$widget.find('.ollama-base-url').val(options.ollamaBaseUrl || 'http://localhost:11434');
@@ -1847,7 +1885,7 @@ export default class AiSettingsWidget extends OptionsWidget {
            if (!savedValue) return;

            // Get all available providers
-            const allProviders = ['openai', 'anthropic', 'ollama'];
+            const allProviders = ['openai', 'voyage', 'anthropic', 'ollama'];
            const savedProviders = savedValue.split(',');

            // Find disabled providers (providers in allProviders but not in savedProviders)
--- a/src/public/translations/en/translation.json
+++ b/src/public/translations/en/translation.json
@@ -1122,7 +1122,11 @@
    "layout-horizontal-description": "launcher bar is underneath the tab bar, the tab bar is now full width."
  },
  "ai_llm": {
-    "title": "AI/LLM Integration",
+    "title": "AI & Embedding Settings",
+    "openai_tab": "OpenAI",
+    "anthropic_tab": "Anthropic",
+    "voyage_tab": "Voyage AI",
+    "ollama_tab": "Ollama",
    "enable_ai": "Enable AI/LLM features",
    "enable_ai_desc": "Enable AI features like note summarization, content generation, and other LLM capabilities",
    "enable_ai_features": "Enable AI/LLM features",
@@ -1149,8 +1153,10 @@
    "openai_url_description": "Default: https://api.openai.com/v1",
    "anthropic_configuration": "Anthropic Configuration",
    "anthropic_model_description": "Examples: claude-3-opus-20240229, claude-3-sonnet-20240229",
-    "anthropic_embedding_model_description": "Anthropic embedding model (not available yet)",
-    "anthropic_url_description": "Default: https://api.anthropic.com/v1",
+    "voyage_embedding_model_description": "Voyage AI embedding models for text embeddings (voyage-2 recommended)",
+    "voyage_configuration": "Voyage AI Configuration",
+    "voyage_api_key_description": "Your Voyage AI API key for generating embeddings",
+    "voyage_url_description": "Default: https://api.voyageai.com/v1",
    "ollama_configuration": "Ollama Configuration",
    "enable_ollama": "Enable Ollama",
    "enable_ollama_description": "Enable Ollama for local AI model usage",
--- a/src/routes/api/llm.ts
+++ b/src/routes/api/llm.ts
@@ -21,6 +21,7 @@ export const LLM_CONSTANTS = {
        OLLAMA: 6000,
        OPENAI: 12000,
        ANTHROPIC: 15000,
+        VOYAGE: 12000,
        DEFAULT: 6000
    },

@@ -38,6 +39,9 @@ export const LLM_CONSTANTS = {
        ANTHROPIC: {
            CLAUDE: 1024,
            DEFAULT: 1024
+        },
+        VOYAGE: {
+            DEFAULT: 1024
        }
    },

--- a/src/routes/api/options.ts
+++ b/src/routes/api/options.ts
@@ -87,7 +87,8 @@ const ALLOWED_OPTIONS = new Set([
    "openaiBaseUrl",
    "anthropicApiKey",
    "anthropicDefaultModel",
-    "anthropicEmbeddingModel",
+    "voyageEmbeddingModel",
+    "voyageApiKey",
    "anthropicBaseUrl",
    "ollamaEnabled",
    "ollamaBaseUrl",
--- a/src/services/llm/embeddings/providers.ts
+++ b/src/services/llm/embeddings/providers.ts
@@ -6,7 +6,8 @@ import { randomString } from "../../utils.js";
 import type { EmbeddingProvider, EmbeddingConfig } from "./embeddings_interface.js";
 import { OpenAIEmbeddingProvider } from "./providers/openai.js";
 import { OllamaEmbeddingProvider } from "./providers/ollama.js";
-import { AnthropicEmbeddingProvider } from "./providers/anthropic.js";
+import { VoyageEmbeddingProvider } from "./providers/voyage.js";
+import type { OptionDefinitions } from "../../options_interface.js";

 /**
 * Simple local embedding provider implementation
@@ -250,29 +251,29 @@ export async function initializeDefaultProviders() {
            }
        }

-        // Register Anthropic provider if API key is configured
-        const anthropicApiKey = await options.getOption('anthropicApiKey');
-        if (anthropicApiKey) {
-            const anthropicModel = await options.getOption('anthropicDefaultModel') || 'claude-3-haiku-20240307';
-            const anthropicBaseUrl = await options.getOption('anthropicBaseUrl') || 'https://api.anthropic.com/v1';
+        // Register Voyage provider if API key is configured
+        const voyageApiKey = await options.getOption('voyageApiKey' as any);
+        if (voyageApiKey) {
+            const voyageModel = await options.getOption('voyageEmbeddingModel') || 'voyage-2';
+            const voyageBaseUrl = 'https://api.voyageai.com/v1';

-            registerEmbeddingProvider(new AnthropicEmbeddingProvider({
-                model: anthropicModel,
-                dimension: 1024, // Anthropic's embedding dimension
+            registerEmbeddingProvider(new VoyageEmbeddingProvider({
+                model: voyageModel,
+                dimension: 1024, // Voyage's embedding dimension
                type: 'float32',
-                apiKey: anthropicApiKey,
-                baseUrl: anthropicBaseUrl
+                apiKey: voyageApiKey,
+                baseUrl: voyageBaseUrl
            }));

-            // Create Anthropic provider config if it doesn't exist
-            const existingAnthropic = await sql.getRow(
+            // Create Voyage provider config if it doesn't exist
+            const existingVoyage = await sql.getRow(
                "SELECT * FROM embedding_providers WHERE name = ?",
-                ['anthropic']
+                ['voyage']
            );

-            if (!existingAnthropic) {
-                await createEmbeddingProviderConfig('anthropic', {
-                    model: anthropicModel,
+            if (!existingVoyage) {
+                await createEmbeddingProviderConfig('voyage', {
+                    model: voyageModel,
                    dimension: 1024,
                    type: 'float32'
                }, true, 75);
--- a/src/services/llm/embeddings/providers/anthropic.ts
+++ b/src/services/llm/embeddings/providers/anthropic.ts
@@ -1,218 +0,0 @@
-import axios from "axios";
-import log from "../../../log.js";
-import { BaseEmbeddingProvider } from "../base_embeddings.js";
-import type { EmbeddingConfig, EmbeddingModelInfo } from "../embeddings_interface.js";
-import { LLM_CONSTANTS } from "../../../../routes/api/llm.js";
-
-// Anthropic model context window sizes - as of current API version
-const ANTHROPIC_MODEL_CONTEXT_WINDOWS: Record<string, number> = {
-    "claude-3-opus-20240229": 200000,
-    "claude-3-sonnet-20240229": 180000,
-    "claude-3-haiku-20240307": 48000,
-    "claude-2.1": 200000,
-    "claude-2.0": 100000,
-    "claude-instant-1.2": 100000,
-    "default": 100000
-};
-
-/**
- * Anthropic embedding provider implementation
- */
-export class AnthropicEmbeddingProvider extends BaseEmbeddingProvider {
-    name = "anthropic";
-
-    constructor(config: EmbeddingConfig) {
-        super(config);
-    }
-
-    /**
-     * Initialize the provider by detecting model capabilities
-     */
-    async initialize(): Promise<void> {
-        const modelName = this.config.model || "claude-3-haiku-20240307";
-        try {
-            // Detect model capabilities
-            const modelInfo = await this.getModelInfo(modelName);
-
-            // Update the config dimension
-            this.config.dimension = modelInfo.dimension;
-
-            log.info(`Anthropic model ${modelName} initialized with dimension ${this.config.dimension} and context window ${modelInfo.contextWindow}`);
-        } catch (error: any) {
-            log.error(`Error initializing Anthropic provider: ${error.message}`);
-        }
-    }
-
-    /**
-     * Try to determine Anthropic model capabilities
-     * Note: Anthropic doesn't have a public endpoint for model metadata, so we use a combination
-     * of known values and detection by test embeddings
-     */
-    private async fetchModelCapabilities(modelName: string): Promise<EmbeddingModelInfo | null> {
-        // Anthropic doesn't have a model info endpoint, but we can look up known context sizes
-        // and detect embedding dimensions by making a test request
-
-        try {
-            // Get context window size from our local registry of known models
-            const modelBase = Object.keys(ANTHROPIC_MODEL_CONTEXT_WINDOWS).find(
-                model => modelName.startsWith(model)
-            ) || "default";
-
-            const contextWindow = ANTHROPIC_MODEL_CONTEXT_WINDOWS[modelBase];
-
-            // For embedding dimension, we'll return null and let getModelInfo detect it
-            return {
-                dimension: 0, // Will be detected by test embedding
-                contextWindow
-            };
-        } catch (error) {
-            log.info(`Could not determine capabilities for Anthropic model ${modelName}: ${error}`);
-            return null;
-        }
-    }
-
-    /**
-     * Get model information including embedding dimensions
-     */
-    async getModelInfo(modelName: string): Promise<EmbeddingModelInfo> {
-        // Check cache first
-        if (this.modelInfoCache.has(modelName)) {
-            return this.modelInfoCache.get(modelName);
-        }
-
-        // Try to determine model capabilities
-        const capabilities = await this.fetchModelCapabilities(modelName);
-        const contextWindow = capabilities?.contextWindow || LLM_CONSTANTS.CONTEXT_WINDOW.ANTHROPIC;
-
-        // For Anthropic, we need to detect embedding dimension with a test call
-        try {
-            // Detect dimension with a test embedding
-            const testEmbedding = await this.generateEmbeddings("Test");
-            const dimension = testEmbedding.length;
-
-            const modelInfo: EmbeddingModelInfo = {
-                dimension,
-                contextWindow
-            };
-
-            this.modelInfoCache.set(modelName, modelInfo);
-            this.config.dimension = dimension;
-
-            log.info(`Detected Anthropic model ${modelName} with dimension ${dimension} (context: ${contextWindow})`);
-            return modelInfo;
-        } catch (error: any) {
-            // If detection fails, use defaults
-            const dimension = LLM_CONSTANTS.EMBEDDING_DIMENSIONS.ANTHROPIC.DEFAULT;
-
-            log.info(`Using default parameters for Anthropic model ${modelName}: dimension ${dimension}, context ${contextWindow}`);
-
-            const modelInfo: EmbeddingModelInfo = { dimension, contextWindow };
-            this.modelInfoCache.set(modelName, modelInfo);
-            this.config.dimension = dimension;
-
-            return modelInfo;
-        }
-    }
-
-    /**
-     * Generate embeddings for a single text
-     */
-    async generateEmbeddings(text: string): Promise<Float32Array> {
-        try {
-            if (!text.trim()) {
-                return new Float32Array(this.config.dimension);
-            }
-
-            // Get model info to check context window
-            const modelName = this.config.model || "claude-3-haiku-20240307";
-            const modelInfo = await this.getModelInfo(modelName);
-
-            // Trim text if it might exceed context window (rough character estimate)
-            const charLimit = modelInfo.contextWindow * 4; // Rough estimate: avg 4 chars per token
-            const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;
-
-            const response = await axios.post(
-                `${this.baseUrl}/embeddings`,
-                {
-                    model: modelName,
-                    input: trimmedText,
-                    encoding_format: "float"
-                },
-                {
-                    headers: {
-                        "Content-Type": "application/json",
-                        "x-api-key": this.apiKey,
-                        "anthropic-version": "2023-06-01"
-                    }
-                }
-            );
-
-            if (response.data && response.data.embedding) {
-                return new Float32Array(response.data.embedding);
-            } else {
-                throw new Error("Unexpected response structure from Anthropic API");
-            }
-        } catch (error: any) {
-            const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
-            log.error(`Anthropic embedding error: ${errorMessage}`);
-            throw new Error(`Anthropic embedding error: ${errorMessage}`);
-        }
-    }
-
-    /**
-     * More specific implementation of batch size error detection for Anthropic
-     */
-    protected isBatchSizeError(error: any): boolean {
-        const errorMessage = error?.message || error?.response?.data?.error?.message || '';
-        const anthropicBatchSizeErrorPatterns = [
-            'batch size', 'too many inputs', 'context length exceeded',
-            'token limit', 'rate limit', 'limit exceeded',
-            'too long', 'request too large', 'content too large'
-        ];
-
-        return anthropicBatchSizeErrorPatterns.some(pattern =>
-            errorMessage.toLowerCase().includes(pattern.toLowerCase())
-        );
-    }
-
-    /**
-     * Generate embeddings for multiple texts in a single batch
-     *
-     * Note: Anthropic doesn't currently support batch embedding, so we process each text individually
-     * but using the adaptive batch processor to handle errors and retries
-     */
-    async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> {
-        if (texts.length === 0) {
-            return [];
-        }
-
-        try {
-            return await this.processWithAdaptiveBatch(
-                texts,
-                async (batch) => {
-                    const results: Float32Array[] = [];
-
-                    // For Anthropic, we have to process one at a time
-                    for (const text of batch) {
-                        // Skip empty texts
-                        if (!text.trim()) {
-                            results.push(new Float32Array(this.config.dimension));
-                            continue;
-                        }
-
-                        const embedding = await this.generateEmbeddings(text);
-                        results.push(embedding);
-                    }
-
-                    return results;
-                },
-                this.isBatchSizeError
-            );
-        }
-        catch (error: any) {
-            const errorMessage = error.message || "Unknown error";
-            log.error(`Anthropic batch embedding error: ${errorMessage}`);
-            throw new Error(`Anthropic batch embedding error: ${errorMessage}`);
-        }
-    }
-}
--- a/src/services/llm/embeddings/providers/voyage.ts
+++ b/src/services/llm/embeddings/providers/voyage.ts
@@ -0,0 +1,254 @@
+import axios from "axios";
+import log from "../../../log.js";
+import { BaseEmbeddingProvider } from "../base_embeddings.js";
+import type { EmbeddingConfig, EmbeddingModelInfo } from "../embeddings_interface.js";
+import { LLM_CONSTANTS } from "../../../../routes/api/llm.js";
+
+// Voyage model context window sizes - as of current API version
+const VOYAGE_MODEL_CONTEXT_WINDOWS: Record<string, number> = {
+    "voyage-large-2": 8192,
+    "voyage-2": 8192,
+    "default": 8192
+};
+
+// Voyage embedding dimensions
+const VOYAGE_MODEL_DIMENSIONS: Record<string, number> = {
+    "voyage-large-2": 1536,
+    "voyage-2": 1024,
+    "default": 1024
+};
+
+/**
+ * Voyage AI embedding provider implementation
+ */
+export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
+    name = "voyage";
+
+    constructor(config: EmbeddingConfig) {
+        super(config);
+
+        // Set default base URL if not provided
+        if (!this.baseUrl) {
+            this.baseUrl = "https://api.voyageai.com/v1";
+        }
+    }
+
+    /**
+     * Initialize the provider by detecting model capabilities
+     */
+    async initialize(): Promise<void> {
+        const modelName = this.config.model || "voyage-2";
+        try {
+            // Detect model capabilities
+            const modelInfo = await this.getModelInfo(modelName);
+
+            // Update the config dimension
+            this.config.dimension = modelInfo.dimension;
+
+            log.info(`Voyage AI model ${modelName} initialized with dimension ${this.config.dimension} and context window ${modelInfo.contextWindow}`);
+        } catch (error: any) {
+            log.error(`Error initializing Voyage AI provider: ${error.message}`);
+        }
+    }
+
+    /**
+     * Try to determine Voyage AI model capabilities
+     */
+    private async fetchModelCapabilities(modelName: string): Promise<EmbeddingModelInfo | null> {
+        try {
+            // Get context window size from our local registry of known models
+            const modelBase = Object.keys(VOYAGE_MODEL_CONTEXT_WINDOWS).find(
+                model => modelName.startsWith(model)
+            ) || "default";
+
+            const contextWindow = VOYAGE_MODEL_CONTEXT_WINDOWS[modelBase];
+
+            // Get dimension from our registry of known models
+            const dimension = VOYAGE_MODEL_DIMENSIONS[modelBase] || VOYAGE_MODEL_DIMENSIONS.default;
+
+            return {
+                dimension,
+                contextWindow
+            };
+        } catch (error) {
+            log.info(`Could not determine capabilities for Voyage AI model ${modelName}: ${error}`);
+            return null;
+        }
+    }
+
+    /**
+     * Get model information including embedding dimensions
+     */
+    async getModelInfo(modelName: string): Promise<EmbeddingModelInfo> {
+        // Check cache first
+        if (this.modelInfoCache.has(modelName)) {
+            return this.modelInfoCache.get(modelName);
+        }
+
+        // Try to determine model capabilities
+        const capabilities = await this.fetchModelCapabilities(modelName);
+        const contextWindow = capabilities?.contextWindow || 8192; // Default context window for Voyage
+        const knownDimension = capabilities?.dimension || 1024; // Default dimension for Voyage models
+
+        // For Voyage, we can use known dimensions or detect with a test call
+        try {
+            if (knownDimension) {
+                // Use known dimension
+                const modelInfo: EmbeddingModelInfo = {
+                    dimension: knownDimension,
+                    contextWindow
+                };
+
+                this.modelInfoCache.set(modelName, modelInfo);
+                this.config.dimension = knownDimension;
+
+                log.info(`Using known parameters for Voyage AI model ${modelName}: dimension ${knownDimension}, context ${contextWindow}`);
+                return modelInfo;
+            } else {
+                // Detect dimension with a test embedding as fallback
+                const testEmbedding = await this.generateEmbeddings("Test");
+                const dimension = testEmbedding.length;
+
+                const modelInfo: EmbeddingModelInfo = {
+                    dimension,
+                    contextWindow
+                };
+
+                this.modelInfoCache.set(modelName, modelInfo);
+                this.config.dimension = dimension;
+
+                log.info(`Detected Voyage AI model ${modelName} with dimension ${dimension} (context: ${contextWindow})`);
+                return modelInfo;
+            }
+        } catch (error: any) {
+            // If detection fails, use defaults
+            const dimension = 1024; // Default for Voyage models
+
+            log.info(`Using default parameters for Voyage AI model ${modelName}: dimension ${dimension}, context ${contextWindow}`);
+
+            const modelInfo: EmbeddingModelInfo = { dimension, contextWindow };
+            this.modelInfoCache.set(modelName, modelInfo);
+            this.config.dimension = dimension;
+
+            return modelInfo;
+        }
+    }
+
+    /**
+     * Generate embeddings for a single text
+     */
+    async generateEmbeddings(text: string): Promise<Float32Array> {
+        try {
+            if (!text.trim()) {
+                return new Float32Array(this.config.dimension);
+            }
+
+            // Get model info to check context window
+            const modelName = this.config.model || "voyage-2";
+            const modelInfo = await this.getModelInfo(modelName);
+
+            // Trim text if it might exceed context window (rough character estimate)
+            const charLimit = modelInfo.contextWindow * 4; // Rough estimate: avg 4 chars per token
+            const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;
+
+            const response = await axios.post(
+                `${this.baseUrl}/embeddings`,
+                {
+                    model: modelName,
+                    input: trimmedText,
+                    input_type: "text",
+                    truncation: true
+                },
+                {
+                    headers: {
+                        "Content-Type": "application/json",
+                        "Authorization": `Bearer ${this.apiKey}`
+                    }
+                }
+            );
+
+            if (response.data && response.data.data && response.data.data[0] && response.data.data[0].embedding) {
+                return new Float32Array(response.data.data[0].embedding);
+            } else {
+                throw new Error("Unexpected response structure from Voyage AI API");
+            }
+        } catch (error: any) {
+            const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
+            log.error(`Voyage AI embedding error: ${errorMessage}`);
+            throw new Error(`Voyage AI embedding error: ${errorMessage}`);
+        }
+    }
+
+    /**
+     * More specific implementation of batch size error detection for Voyage AI
+     */
+    protected isBatchSizeError(error: any): boolean {
+        const errorMessage = error?.message || error?.response?.data?.error?.message || '';
+        const voyageBatchSizeErrorPatterns = [
+            'batch size', 'too many inputs', 'context length exceeded',
+            'token limit', 'rate limit', 'limit exceeded',
+            'too long', 'request too large', 'content too large'
+        ];
+
+        return voyageBatchSizeErrorPatterns.some(pattern =>
+            errorMessage.toLowerCase().includes(pattern.toLowerCase())
+        );
+    }
+
+    /**
+     * Generate embeddings for multiple texts in a single batch
+     */
+    async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> {
+        if (texts.length === 0) {
+            return [];
+        }
+
+        try {
+            return await this.processWithAdaptiveBatch(
+                texts,
+                async (batch) => {
+                    if (batch.length === 0) return [];
+                    if (batch.length === 1) {
+                        return [await this.generateEmbeddings(batch[0])];
+                    }
+
+                    // For Voyage AI, we can batch embeddings
+                    const modelName = this.config.model || "voyage-2";
+
+                    // Filter out empty texts
+                    const validBatch = batch.map(text => text.trim() || " ");
+
+                    const response = await axios.post(
+                        `${this.baseUrl}/embeddings`,
+                        {
+                            model: modelName,
+                            input: validBatch,
+                            input_type: "text",
+                            truncation: true
+                        },
+                        {
+                            headers: {
+                                "Content-Type": "application/json",
+                                "Authorization": `Bearer ${this.apiKey}`
+                            }
+                        }
+                    );
+
+                    if (response.data && response.data.data && Array.isArray(response.data.data)) {
+                        return response.data.data.map((item: any) =>
+                            new Float32Array(item.embedding || [])
+                        );
+                    } else {
+                        throw new Error("Unexpected response structure from Voyage AI batch API");
+                    }
+                },
+                this.isBatchSizeError
+            );
+        }
+        catch (error: any) {
+            const errorMessage = error.message || "Unknown error";
+            log.error(`Voyage AI batch embedding error: ${errorMessage}`);
+            throw new Error(`Voyage AI batch embedding error: ${errorMessage}`);
+        }
+    }
+}
--- a/src/services/options_init.ts
+++ b/src/services/options_init.ts
@@ -176,7 +176,8 @@ const defaultOptions: DefaultOption[] = [
    { name: "openaiBaseUrl", value: "https://api.openai.com/v1", isSynced: true },
    { name: "anthropicApiKey", value: "", isSynced: false },
    { name: "anthropicDefaultModel", value: "claude-3-opus-20240229", isSynced: true },
-    { name: "anthropicEmbeddingModel", value: "", isSynced: true },
+    { name: "voyageEmbeddingModel", value: "voyage-2", isSynced: true },
+    { name: "voyageApiKey", value: "", isSynced: false },
    { name: "anthropicBaseUrl", value: "https://api.anthropic.com/v1", isSynced: true },
    { name: "ollamaEnabled", value: "false", isSynced: true },
    { name: "ollamaDefaultModel", value: "llama3", isSynced: true },
@@ -189,7 +190,7 @@ const defaultOptions: DefaultOption[] = [
    { name: "aiSystemPrompt", value: "", isSynced: true },
    { name: "aiProviderPrecedence", value: "openai,anthropic,ollama", isSynced: true },
    { name: "embeddingsDefaultProvider", value: "openai", isSynced: true },
-    { name: "embeddingProviderPrecedence", value: "openai,ollama", isSynced: true },
+    { name: "embeddingProviderPrecedence", value: "openai,voyage,ollama", isSynced: true },
    { name: "embeddingDimensionStrategy", value: "adapt", isSynced: true },
    { name: "enableAutomaticIndexing", value: "true", isSynced: true },
    { name: "embeddingSimilarityThreshold", value: "0.65", isSynced: true },
--- a/src/services/options_interface.ts
+++ b/src/services/options_interface.ts
@@ -57,7 +57,8 @@ export interface OptionDefinitions extends KeyboardShortcutsOptions<KeyboardActi
    openaiBaseUrl: string;
    anthropicApiKey: string;
    anthropicDefaultModel: string;
-    anthropicEmbeddingModel: string;
+    voyageEmbeddingModel: string;
+    voyageApiKey: string;
    anthropicBaseUrl: string;
    ollamaEnabled: boolean;
    ollamaBaseUrl: string;