do a wayyy better job at building the messages with context

2025-11-08 14:25:51 +01:00 · 2025-03-28 22:50:15 +00:00
parent ea4d3ac800
commit 72c380b6f4
9 changed files with 856 additions and 468 deletions
--- a/src/routes/api/llm.ts
+++ b/src/routes/api/llm.ts
@@ -956,8 +956,8 @@ async function sendMessage(req: Request, res: Response) {
                log.info(`Context ends with: "...${context.substring(context.length - 200)}"`);
                log.info(`Number of notes included: ${sourceNotes.length}`);
-                // Get messages with context properly formatted for the specific LLM provider
+                // Format messages for the LLM using the proper context
-                const aiMessages = contextService.buildMessagesWithContext(
+                const aiMessages = await contextService.buildMessagesWithContext(
                    session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
                        role: msg.role,
                        content: msg.content
@@ -1104,7 +1104,7 @@ async function sendMessage(req: Request, res: Response) {
                const context = buildContextFromNotes(relevantNotes, messageContent);
                // Get messages with context properly formatted for the specific LLM provider
-                const aiMessages = contextService.buildMessagesWithContext(
+                const aiMessages = await contextService.buildMessagesWithContext(
                    session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
                        role: msg.role,
                        content: msg.content
--- a/src/services/llm/chat_service.ts
+++ b/src/services/llm/chat_service.ts
@@ -265,10 +265,10 @@ export class ChatService {
            );
            // Create messages array with context using the improved method
-            const messagesWithContext = contextService.buildMessagesWithContext(
+            const messagesWithContext = await contextService.buildMessagesWithContext(
                session.messages,
                enhancedContext,
-                aiServiceManager.getService() // Get the default service
+                aiServiceManager.getService()
            );
            // Generate AI response
--- a/src/services/llm/context_service.ts
+++ b/src/services/llm/context_service.ts
@@ -11,6 +11,7 @@ import { ContextExtractor } from './context/index.js';
 import type { NoteSearchResult } from './interfaces/context_interfaces.js';
 import type { Message } from './ai_interface.js';
 import type { LLMServiceInterface } from './interfaces/agent_tool_interfaces.js';
 import { MessageFormatterFactory } from './interfaces/message_formatter.js';
 /**
 * Main Context Service for Trilium Notes
@@ -189,72 +190,62 @@ class TriliumContextService {
    }
    /**
-     * Build messages with proper context for an LLM-enhanced chat
+     * Builds messages with context for LLM service
     * This takes a set of messages and adds context in the appropriate format for each LLM provider
     *
     * @param messages Array of messages to enhance with context
     * @param context The context to add (built from relevant notes)
     * @param llmService The LLM service to format messages for
     * @returns Promise resolving to the messages array with context properly integrated
     */
-    buildMessagesWithContext(messages: Message[], context: string, llmService: LLMServiceInterface): Message[] {
+    async buildMessagesWithContext(
-        // For simple conversations just add context to the system message
+        messages: Message[],
        context: string,
        llmService: LLMServiceInterface
    ): Promise<Message[]> {
        try {
            if (!messages || messages.length === 0) {
-                return [{ role: 'system', content: context }];
+                log.info('No messages provided to buildMessagesWithContext');
                return [];
            }
-            const result: Message[] = [];
+            if (!context || context.trim() === '') {
-            let hasSystemMessage = false;
+                log.info('No context provided to buildMessagesWithContext, returning original messages');
-
+                return messages;
            // First pass: identify if there's a system message
            for (const msg of messages) {
                if (msg.role === 'system') {
                    hasSystemMessage = true;
                    break;
                }
            }
-            // If we have a system message, prepend context to it
+            // Get the provider name, handling service classes and raw provider names
-            // Otherwise create a new system message with the context
+            let providerName: string;
-            if (hasSystemMessage) {
+            if (typeof llmService === 'string') {
-                for (const msg of messages) {
+                // If llmService is a string, assume it's the provider name
-                    if (msg.role === 'system') {
+                providerName = llmService;
-                        // For Ollama, use a cleaner approach with just one system message
+            } else if (llmService.constructor && llmService.constructor.name) {
-                        if (llmService.constructor.name === 'OllamaService') {
+                // Extract provider name from service class name (e.g., OllamaService -> ollama)
-                            // If this is the first system message we've seen,
+                providerName = llmService.constructor.name.replace('Service', '').toLowerCase();
                            // add context to it, otherwise skip (Ollama handles multiple
                            // system messages poorly)
                            if (result.findIndex(m => m.role === 'system') === -1) {
                                result.push({
                                    role: 'system',
                                    content: `${context}\n\n${msg.content}`
                                });
                            }
            } else {
-                            // For other providers, include all system messages
+                // Fallback to default
-                            result.push({
+                providerName = 'default';
                                role: 'system',
                                content: msg.content.includes(context) ?
                                    msg.content : // Avoid duplicate context
                                    `${context}\n\n${msg.content}`
                            });
                        }
                    } else {
                        result.push(msg);
                    }
                }
            } else {
                // No system message found, prepend one with the context
                result.push({ role: 'system', content: context });
                // Add all the original messages
                result.push(...messages);
            }
-            return result;
+            log.info(`Using formatter for provider: ${providerName}`);
            // Get the appropriate formatter for this provider
            const formatter = MessageFormatterFactory.getFormatter(providerName);
            // Format messages with context using the provider-specific formatter
            const formattedMessages = formatter.formatMessages(
                messages,
                undefined, // No system prompt override - use what's in the messages
                context
            );
            log.info(`Formatted ${messages.length} messages into ${formattedMessages.length} messages for ${providerName}`);
            return formattedMessages;
        } catch (error) {
            log.error(`Error building messages with context: ${error}`);
-
+            // Fallback to original messages in case of error
-            // Fallback: prepend a system message with context
+            return messages;
            const safeMessages = Array.isArray(messages) ? messages : [];
            return [
                { role: 'system', content: context },
                ...safeMessages.filter(msg => msg.role !== 'system')
            ];
        }
    }
 }
--- a/src/services/llm/formatters/anthropic_formatter.ts
+++ b/src/services/llm/formatters/anthropic_formatter.ts
@@ -0,0 +1,223 @@
 import sanitizeHtml from 'sanitize-html';
 import type { Message } from '../ai_interface.js';
 import { BaseMessageFormatter } from './base_formatter.js';
 /**
 * Anthropic-specific message formatter
 * Optimized for Claude's API and preferences
 */
 export class AnthropicMessageFormatter extends BaseMessageFormatter {
    /**
     * Maximum recommended context length for Anthropic models
     * Claude has a very large context window
     */
    private static MAX_CONTEXT_LENGTH = 100000;
    /**
     * Format messages for the Anthropic API
     */
    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
        const formattedMessages: Message[] = [];
        // For Anthropic, system prompts work best as the first user message with <instructions> XML tags
        // First, collect all non-system messages
        const userAssistantMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
        // For Anthropic, we need to handle context differently
        // 1. If explicit context is provided, we format it with XML tags
        if (context) {
            // Build the system message with context
            const baseInstructions = this.getDefaultSystemPrompt(systemPrompt);
            const formattedContext =
                `<instructions>\n${baseInstructions}\n\n` +
                `Use the following information from the user's notes to answer their questions:\n\n` +
                `<user_notes>\n${this.cleanContextContent(context)}\n</user_notes>\n\n` +
                `When responding:\n` +
                `- Focus on the most relevant information from the notes\n` +
                `- Be concise and direct in your answers\n` +
                `- If quoting from notes, mention which note it's from\n` +
                `- If the notes don't contain relevant information, say so clearly\n` +
                `</instructions>`;
            // If there's at least one user message, add the context to the first one
            if (userAssistantMessages.length > 0 && userAssistantMessages[0].role === 'user') {
                // Add system as a new first message
                formattedMessages.push({
                    role: 'user',
                    content: formattedContext
                });
                // Add system response acknowledgment
                formattedMessages.push({
                    role: 'assistant',
                    content: "I'll help you with your notes based on the context provided."
                });
                // Add remaining messages
                for (const msg of userAssistantMessages) {
                    formattedMessages.push(msg);
                }
            }
            // If no user messages, create a placeholder
            else {
                formattedMessages.push({
                    role: 'user',
                    content: formattedContext
                });
                formattedMessages.push({
                    role: 'assistant',
                    content: "I'll help you with your notes based on the context provided. What would you like to know?"
                });
                // Add any existing assistant messages if they exist
                const assistantMsgs = userAssistantMessages.filter(msg => msg.role === 'assistant');
                for (const msg of assistantMsgs) {
                    formattedMessages.push(msg);
                }
            }
        }
        // 2. If no explicit context but we have system messages, convert them to Claude format
        else if (messages.some(msg => msg.role === 'system')) {
            // Get system messages
            const systemMessages = messages.filter(msg => msg.role === 'system');
            // Build system content with XML tags
            const systemContent =
                `<instructions>\n${systemMessages.map(msg => this.cleanContextContent(msg.content)).join('\n\n')}\n</instructions>`;
            // Add as first user message
            formattedMessages.push({
                role: 'user',
                content: systemContent
            });
            // Add assistant acknowledgment
            formattedMessages.push({
                role: 'assistant',
                content: "I understand. I'll follow those instructions."
            });
            // Add remaining user/assistant messages
            for (const msg of userAssistantMessages) {
                formattedMessages.push(msg);
            }
        }
        // 3. Just a system prompt, no context
        else if (systemPrompt) {
            // Add as first user message with XML tags
            formattedMessages.push({
                role: 'user',
                content: `<instructions>\n${systemPrompt}\n</instructions>`
            });
            // Add assistant acknowledgment
            formattedMessages.push({
                role: 'assistant',
                content: "I understand. I'll follow those instructions."
            });
            // Add all other messages
            for (const msg of userAssistantMessages) {
                formattedMessages.push(msg);
            }
        }
        // 4. No system prompt, use default from constants
        else if (userAssistantMessages.length > 0) {
            // Add default system prompt with XML tags
            formattedMessages.push({
                role: 'user',
                content: `<instructions>\n${this.getDefaultSystemPrompt()}\n</instructions>`
            });
            // Add assistant acknowledgment
            formattedMessages.push({
                role: 'assistant',
                content: "I understand. I'll follow those instructions."
            });
            // Add all user messages
            for (const msg of userAssistantMessages) {
                formattedMessages.push(msg);
            }
        }
        // 5. No special handling needed
        else {
            // Just add all messages as-is
            for (const msg of userAssistantMessages) {
                formattedMessages.push(msg);
            }
        }
        console.log(`Anthropic formatter: ${messages.length} messages → ${formattedMessages.length} messages`);
        return formattedMessages;
    }
    /**
     * Clean context content for Anthropic
     * Claude works well with XML-structured content
     */
    cleanContextContent(content: string): string {
        if (!content) return '';
        try {
            // Convert HTML to a Claude-friendly format
            const cleaned = sanitizeHtml(content, {
                allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'],
                allowedAttributes: {
                    'a': ['href']
                }
            });
            // Convert to markdown but preserve some structure
            let markdown = cleaned
                .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
                .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
                .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
                .replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
                .replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
                .replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
                .replace(/<br[^>]*>/gi, '\n')
                .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
                .replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
                .replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
                .replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
                .replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
                .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
                .replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
                // Process lists
                .replace(/<ul[^>]*>(.*?)<\/ul>/gs, (match, content) => {
                    return content.replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n');
                })
                .replace(/<ol[^>]*>(.*?)<\/ol>/gs, (match, content) => {
                    let index = 1;
                    return content.replace(/<li[^>]*>(.*?)<\/li>/gi, (m: string, item: string) => {
                        return `${index++}. ${item}\n`;
                    });
                })
                // Clean up any remaining HTML tags
                .replace(/<[^>]*>/g, '')
                // Clean up excessive newlines
                .replace(/\n{3,}/g, '\n\n')
                // Fix common HTML entities
                .replace(/&nbsp;/g, ' ')
                .replace(/&lt;/g, '<')
                .replace(/&gt;/g, '>')
                .replace(/&amp;/g, '&')
                .replace(/&quot;/g, '"');
            return markdown.trim();
        } catch (error) {
            console.error("Error cleaning content for Anthropic:", error);
            return content; // Return original if cleaning fails
        }
    }
    /**
     * Get the maximum recommended context length for Anthropic
     */
    getMaxContextLength(): number {
        return AnthropicMessageFormatter.MAX_CONTEXT_LENGTH;
    }
 }
--- a/src/services/llm/formatters/base_formatter.ts
+++ b/src/services/llm/formatters/base_formatter.ts
@@ -0,0 +1,161 @@
 import sanitizeHtml from 'sanitize-html';
 import type { Message } from '../ai_interface.js';
 import type { MessageFormatter } from '../interfaces/message_formatter.js';
 import { DEFAULT_SYSTEM_PROMPT } from '../constants/llm_prompt_constants.js';
 /**
 * Base formatter with common functionality for all providers
 * Provider-specific formatters should extend this class
 */
 export abstract class BaseMessageFormatter implements MessageFormatter {
    /**
     * Format messages for the LLM API
     * Each provider should override this method with its specific formatting logic
     */
    abstract formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[];
    /**
     * Get the maximum recommended context length for this provider
     * Each provider should override this with appropriate value
     */
    abstract getMaxContextLength(): number;
    /**
     * Get the default system prompt
     * Uses the default prompt from constants
     */
    protected getDefaultSystemPrompt(systemPrompt?: string): string {
        return systemPrompt || DEFAULT_SYSTEM_PROMPT;
    }
    /**
     * Clean context content - common method with standard HTML cleaning
     * Provider-specific formatters can override for custom behavior
     */
    cleanContextContent(content: string): string {
        if (!content) return '';
        try {
            // First fix any encoding issues
            const fixedContent = this.fixEncodingIssues(content);
            // Convert HTML to markdown for better readability
            const cleaned = sanitizeHtml(fixedContent, {
                allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'],
                allowedAttributes: {
                    'a': ['href']
                },
                transformTags: {
                    'h1': 'h2',
                    'h2': 'h3',
                    'div': 'p',
                    'span': 'span'
                }
            });
            // Process inline elements to markdown
            let markdown = cleaned
                .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
                .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
                .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
                .replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
                .replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
                .replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
                .replace(/<br[^>]*>/gi, '\n')
                .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
                .replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
                .replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
                .replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
                .replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
                .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
                .replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
                // Clean up any remaining HTML tags
                .replace(/<[^>]*>/g, '')
                // Clean up excessive newlines
                .replace(/\n{3,}/g, '\n\n');
            // Process list items
            markdown = this.processListItems(markdown);
            // Fix common HTML entities
            markdown = markdown
                .replace(/&nbsp;/g, ' ')
                .replace(/&lt;/g, '<')
                .replace(/&gt;/g, '>')
                .replace(/&amp;/g, '&')
                .replace(/&quot;/g, '"')
                .replace(/&#39;/g, "'")
                .replace(/&ldquo;/g, '"')
                .replace(/&rdquo;/g, '"')
                .replace(/&lsquo;/g, "'")
                .replace(/&rsquo;/g, "'")
                .replace(/&mdash;/g, '—')
                .replace(/&ndash;/g, '–')
                .replace(/&hellip;/g, '…');
            return markdown.trim();
        } catch (error) {
            console.error("Error cleaning context content:", error);
            return content; // Return original if cleaning fails
        }
    }
    /**
     * Process HTML list items in markdown conversion
     * This is a helper method that safely processes HTML list items
     */
    protected processListItems(content: string): string {
        // Process unordered lists
        let result = content.replace(/<ul[^>]*>([\s\S]*?)<\/ul>/gi, (match: string, listContent: string) => {
            return listContent.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n');
        });
        // Process ordered lists
        result = result.replace(/<ol[^>]*>([\s\S]*?)<\/ol>/gi, (match: string, listContent: string) => {
            let index = 1;
            return listContent.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (itemMatch: string, item: string) => {
                return `${index++}. ${item}\n`;
            });
        });
        return result;
    }
    /**
     * Fix common encoding issues in content
     * This fixes issues like broken quote characters and other encoding problems
     *
     * @param content The content to fix encoding issues in
     * @returns Content with encoding issues fixed
     */
    protected fixEncodingIssues(content: string): string {
        if (!content) return '';
        try {
            // Fix common encoding issues
            return content
                // Fix broken quote characters
                .replace(/Γ\u00c2[\u00a3\u00a5]/g, '"')
                // Fix other common broken unicode
                .replace(/[\u{0080}-\u{FFFF}]/gu, (match) => {
                    // Some common replacements
                    const replacements: Record<string, string> = {
                        '\u00A0': ' ',  // Non-breaking space
                        '\u2018': "'",  // Left single quote
                        '\u2019': "'",  // Right single quote
                        '\u201C': '"',  // Left double quote
                        '\u201D': '"',  // Right double quote
                        '\u2013': '-',  // En dash
                        '\u2014': '--', // Em dash
                        '\u2022': '*',  // Bullet
                        '\u2026': '...' // Ellipsis
                    };
                    return replacements[match] || match;
                });
        } catch (error) {
            console.error('Error fixing encoding issues:', error);
            return content; // Return original if fixing fails
        }
    }
 }
--- a/src/services/llm/formatters/ollama_formatter.ts
+++ b/src/services/llm/formatters/ollama_formatter.ts
@@ -0,0 +1,120 @@
 import type { Message } from '../ai_interface.js';
 import { BaseMessageFormatter } from './base_formatter.js';
 import sanitizeHtml from 'sanitize-html';
 /**
 * Ollama-specific message formatter
 * Handles the unique requirements of the Ollama API
 */
 export class OllamaMessageFormatter extends BaseMessageFormatter {
    /**
     * Maximum recommended context length for Ollama
     * Smaller than other providers due to Ollama's handling of context
     */
    private static MAX_CONTEXT_LENGTH = 4000;
    /**
     * Format messages for the Ollama API
     */
    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
        const formattedMessages: Message[] = [];
        // First identify user and system messages
        const systemMessages = messages.filter(msg => msg.role === 'system');
        const userMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
        // Create base system message with instructions
        const basePrompt = this.getDefaultSystemPrompt(systemPrompt);
        // Always add a system message with the base prompt
        formattedMessages.push({
            role: 'system',
            content: basePrompt
        });
        // If we have context, inject it into the first user message
        if (context && userMessages.length > 0) {
            let injectedContext = false;
            for (let i = 0; i < userMessages.length; i++) {
                const msg = userMessages[i];
                if (msg.role === 'user' && !injectedContext) {
                    // Simple context injection directly in the user's message
                    const cleanedContext = this.cleanContextContent(context);
                    const formattedContext =
                        "Here's information from my notes to help answer the question:\n\n" +
                        cleanedContext +
                        "\n\nBased on this information, please answer: " + msg.content;
                    formattedMessages.push({
                        role: 'user',
                        content: formattedContext
                    });
                    injectedContext = true;
                } else {
                    formattedMessages.push(msg);
                }
            }
        } else {
            // No context, just add all messages as-is
            for (const msg of userMessages) {
                formattedMessages.push(msg);
            }
        }
        console.log(`Ollama formatter processed ${messages.length} messages into ${formattedMessages.length} messages`);
        return formattedMessages;
    }
    /**
     * Clean up HTML and other problematic content before sending to Ollama
     * Ollama needs a more aggressive cleaning than other models
     */
    override cleanContextContent(content: string): string {
        if (!content) return '';
        try {
            // First use the parent class to do standard cleaning
            let sanitized = super.cleanContextContent(content);
            // Then apply Ollama-specific aggressive cleaning
            // Remove any remaining HTML using sanitizeHtml
            let plaintext = sanitizeHtml(sanitized, {
                allowedTags: [],
                allowedAttributes: {},
                textFilter: (text) => text
            });
            // Then aggressively sanitize to plain ASCII and simple formatting
            plaintext = plaintext
                // Replace common problematic quotes with simple ASCII quotes
                .replace(/[""]/g, '"')
                .replace(/['']/g, "'")
                // Replace other common Unicode characters
                .replace(/[–—]/g, '-')
                .replace(/[•]/g, '*')
                .replace(/[…]/g, '...')
                // Strip all non-ASCII characters
                .replace(/[^\x00-\x7F]/g, '')
                // Normalize whitespace
                .replace(/\s+/g, ' ')
                .replace(/\n\s+/g, '\n')
                .trim();
            return plaintext;
        } catch (error) {
            console.error("Error cleaning context content for Ollama:", error);
            return content; // Return original if cleaning fails
        }
    }
    /**
     * Get the maximum recommended context length for Ollama
     */
    getMaxContextLength(): number {
        return OllamaMessageFormatter.MAX_CONTEXT_LENGTH;
    }
 }
--- a/src/services/llm/formatters/openai_formatter.ts
+++ b/src/services/llm/formatters/openai_formatter.ts
@@ -0,0 +1,152 @@
 import sanitizeHtml from 'sanitize-html';
 import type { Message } from '../ai_interface.js';
 import { BaseMessageFormatter } from './base_formatter.js';
 /**
 * OpenAI-specific message formatter
 * Optimized for OpenAI's API requirements and preferences
 */
 export class OpenAIMessageFormatter extends BaseMessageFormatter {
    /**
     * Maximum recommended context length for OpenAI
     * Based on GPT-4 context window size
     */
    private static MAX_CONTEXT_LENGTH = 16000;
    /**
     * Format messages for the OpenAI API
     */
    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
        const formattedMessages: Message[] = [];
        // Check if we already have a system message
        const hasSystemMessage = messages.some(msg => msg.role === 'system');
        const userAssistantMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
        // If we have explicit context, format it properly
        if (context) {
            // For OpenAI, it's best to put context in the system message
            const formattedContext =
                "You are an AI assistant integrated into TriliumNext Notes. " +
                "Use the following information from the user's notes to answer their questions:\n\n" +
                this.cleanContextContent(context) +
                "\n\nFocus on relevant information from these notes when answering. " +
                "Be concise and informative in your responses.";
            // Add as system message
            formattedMessages.push({
                role: 'system',
                content: formattedContext
            });
        }
        // If we don't have explicit context but have a system prompt
        else if (!hasSystemMessage && systemPrompt) {
            formattedMessages.push({
                role: 'system',
                content: systemPrompt
            });
        }
        // If neither context nor system prompt is provided, use default system prompt
        else if (!hasSystemMessage) {
            formattedMessages.push({
                role: 'system',
                content: this.getDefaultSystemPrompt(systemPrompt)
            });
        }
        // Otherwise if there are existing system messages, keep them
        else if (hasSystemMessage) {
            // Keep any existing system messages
            const systemMessages = messages.filter(msg => msg.role === 'system');
            for (const msg of systemMessages) {
                formattedMessages.push({
                    role: 'system',
                    content: this.cleanContextContent(msg.content)
                });
            }
        }
        // Add all user and assistant messages
        for (const msg of userAssistantMessages) {
            formattedMessages.push({
                role: msg.role,
                content: msg.content
            });
        }
        console.log(`OpenAI formatter: ${messages.length} messages → ${formattedMessages.length} messages`);
        return formattedMessages;
    }
    /**
     * Clean context content for OpenAI
     * OpenAI handles HTML better than Ollama but still benefits from some cleaning
     */
    cleanContextContent(content: string): string {
        if (!content) return '';
        try {
            // Convert HTML to Markdown for better readability
            const cleaned = sanitizeHtml(content, {
                allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'],
                allowedAttributes: {
                    'a': ['href']
                },
                transformTags: {
                    'h1': 'h2',
                    'h2': 'h3',
                    'div': 'p',
                    'span': 'span'
                }
            });
            // Process inline elements to markdown with simpler approach
            let markdown = cleaned
                .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
                .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
                .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
                .replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
                .replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
                .replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
                .replace(/<br[^>]*>/gi, '\n')
                .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
                .replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
                .replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
                .replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
                .replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
                .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
                .replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
                // Clean up any remaining HTML tags
                .replace(/<[^>]*>/g, '')
                // Clean up excessive newlines
                .replace(/\n{3,}/g, '\n\n');
            // Fix common HTML entities
            markdown = markdown
                .replace(/&nbsp;/g, ' ')
                .replace(/&lt;/g, '<')
                .replace(/&gt;/g, '>')
                .replace(/&amp;/g, '&')
                .replace(/&quot;/g, '"')
                .replace(/&#39;/g, "'")
                .replace(/&ldquo;/g, '"')
                .replace(/&rdquo;/g, '"')
                .replace(/&lsquo;/g, "'")
                .replace(/&rsquo;/g, "'")
                .replace(/&mdash;/g, '—')
                .replace(/&ndash;/g, '–')
                .replace(/&hellip;/g, '…');
            return markdown.trim();
        } catch (error) {
            console.error("Error cleaning content for OpenAI:", error);
            return content; // Return original if cleaning fails
        }
    }
    /**
     * Get the maximum recommended context length for OpenAI
     */
    getMaxContextLength(): number {
        return OpenAIMessageFormatter.MAX_CONTEXT_LENGTH;
    }
 }
--- a/src/services/llm/interfaces/message_formatter.ts
+++ b/src/services/llm/interfaces/message_formatter.ts
@@ -0,0 +1,92 @@
 import type { Message } from "../ai_interface.js";
 // These imports need to be added for the factory to work
 import { OpenAIMessageFormatter } from "../formatters/openai_formatter.js";
 import { AnthropicMessageFormatter } from "../formatters/anthropic_formatter.js";
 import { OllamaMessageFormatter } from "../formatters/ollama_formatter.js";
 /**
 * Interface for provider-specific message formatters
 * This allows each provider to have custom formatting logic while maintaining a consistent interface
 */
 export interface MessageFormatter {
    /**
     * Format messages for a specific LLM provider
     *
     * @param messages Array of messages to format
     * @param systemPrompt Optional system prompt to include
     * @param context Optional context to incorporate into messages
     * @returns Formatted messages ready to send to the provider
     */
    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[];
    /**
     * Clean context content to prepare it for this specific provider
     *
     * @param content The raw context content
     * @returns Cleaned and formatted context content
     */
    cleanContextContent(content: string): string;
    /**
     * Get the maximum recommended context length for this provider
     *
     * @returns Maximum context length in characters
     */
    getMaxContextLength(): number;
 }
 /**
 * Factory to get the appropriate message formatter for a provider
 */
 export class MessageFormatterFactory {
    // Cache formatters for reuse
    private static formatters: Record<string, MessageFormatter> = {};
    /**
     * Get the appropriate message formatter for a provider
     *
     * @param providerName Name of the LLM provider (e.g., 'openai', 'anthropic', 'ollama')
     * @returns MessageFormatter instance for the specified provider
     */
    static getFormatter(providerName: string): MessageFormatter {
        // Normalize provider name and handle variations
        let providerKey: string;
        // Normalize provider name from various forms (constructor.name, etc.)
        if (providerName.toLowerCase().includes('openai')) {
            providerKey = 'openai';
        } else if (providerName.toLowerCase().includes('anthropic') ||
                  providerName.toLowerCase().includes('claude')) {
            providerKey = 'anthropic';
        } else if (providerName.toLowerCase().includes('ollama')) {
            providerKey = 'ollama';
        } else {
            // Default to lowercase of whatever name we got
            providerKey = providerName.toLowerCase();
        }
        // Return cached formatter if available
        if (this.formatters[providerKey]) {
            return this.formatters[providerKey];
        }
        // Create and cache new formatter
        switch (providerKey) {
            case 'openai':
                this.formatters[providerKey] = new OpenAIMessageFormatter();
                break;
            case 'anthropic':
                this.formatters[providerKey] = new AnthropicMessageFormatter();
                break;
            case 'ollama':
                this.formatters[providerKey] = new OllamaMessageFormatter();
                break;
            default:
                // Default to OpenAI formatter for unknown providers
                console.warn(`No specific formatter for provider: ${providerName}. Using OpenAI formatter as default.`);
                this.formatters[providerKey] = new OpenAIMessageFormatter();
        }
        return this.formatters[providerKey];
    }
 }
--- a/src/services/llm/providers/ollama_service.ts
+++ b/src/services/llm/providers/ollama_service.ts
@@ -1,53 +1,59 @@
 import options from '../../options.js';
 import { BaseAIService } from '../base_ai_service.js';
-import type { ChatCompletionOptions, ChatResponse, Message } from '../ai_interface.js';
+import type { Message, ChatCompletionOptions, ChatResponse } from '../ai_interface.js';
-import { PROVIDER_CONSTANTS } from '../constants/provider_constants.js';
+import sanitizeHtml from 'sanitize-html';
 import { OllamaMessageFormatter } from '../formatters/ollama_formatter.js';
 interface OllamaMessage {
    role: string;
    content: string;
 }
 interface OllamaResponse {
    model: string;
    created_at: string;
    message: OllamaMessage;
    done: boolean;
    total_duration: number;
    load_duration: number;
    prompt_eval_count: number;
    prompt_eval_duration: number;
    eval_count: number;
    eval_duration: number;
 }
 export class OllamaService extends BaseAIService {
    private formatter: OllamaMessageFormatter;
    constructor() {
        super('Ollama');
        this.formatter = new OllamaMessageFormatter();
    }
    isAvailable(): boolean {
-        return super.isAvailable() &&
+        return super.isAvailable() && !!options.getOption('ollamaBaseUrl');
               options.getOption('ollamaEnabled') === 'true' &&
               !!options.getOption('ollamaBaseUrl');
    }
    async generateChatCompletion(messages: Message[], opts: ChatCompletionOptions = {}): Promise<ChatResponse> {
        if (!this.isAvailable()) {
-            throw new Error('Ollama service is not available. Check Ollama settings.');
+            throw new Error('Ollama service is not available. Check API URL in settings.');
        }
-        const baseUrl = options.getOption('ollamaBaseUrl') || PROVIDER_CONSTANTS.OLLAMA.BASE_URL;
+        const apiBase = options.getOption('ollamaBaseUrl');
-        const model = opts.model || options.getOption('ollamaDefaultModel') || PROVIDER_CONSTANTS.OLLAMA.DEFAULT_MODEL;
+        const model = opts.model || options.getOption('ollamaDefaultModel') || 'llama3';
        const temperature = opts.temperature !== undefined
            ? opts.temperature
            : parseFloat(options.getOption('aiTemperature') || '0.7');
        const systemPrompt = this.getSystemPrompt(opts.systemPrompt || options.getOption('aiSystemPrompt'));
        // Format messages for Ollama
        const formattedMessages = this.formatMessages(messages, systemPrompt);
        // Log the formatted messages for debugging
        console.log('Input messages for formatting:', messages);
        console.log('Formatted messages for Ollama:', formattedMessages);
        try {
-            const endpoint = `${baseUrl.replace(/\/+$/, '')}/api/chat`;
+            // Use the formatter to prepare messages
            const formattedMessages = this.formatter.formatMessages(messages, systemPrompt);
-            // Determine if we should stream the response
+            console.log(`Sending to Ollama with formatted messages:`, JSON.stringify(formattedMessages, null, 2));
            const shouldStream = opts.stream === true;
-            if (shouldStream) {
+            const response = await fetch(`${apiBase}/api/chat`, {
                // Handle streaming response
                const response = await fetch(endpoint, {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
@@ -55,393 +61,36 @@ export class OllamaService extends BaseAIService {
                body: JSON.stringify({
                    model,
                    messages: formattedMessages,
                        stream: true,
                    options: {
-                            temperature,
+                        temperature
-                        }
+                    },
                    stream: false
                })
            });
            if (!response.ok) {
                const errorBody = await response.text();
-                    throw new Error(`Ollama API error: ${response.status} ${response.statusText} - ${errorBody}`);
+                console.error(`Ollama API error: ${response.status} ${response.statusText}`, errorBody);
                throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
            }
-                // For streaming, we return an object that has a callback for handling the stream
+            const data: OllamaResponse = await response.json();
            console.log('Raw response from Ollama:', JSON.stringify(data, null, 2));
            console.log('Parsed Ollama response:', JSON.stringify(data, null, 2));
            return {
                    text: "", // Initial empty text that will be built up
                    model: model,
                    provider: this.getName(),
                    usage: {
                        promptTokens: 0,
                        completionTokens: 0,
                        totalTokens: 0
                    },
                    stream: async (callback) => {
                        if (!response.body) {
                            throw new Error("No response body from Ollama");
                        }
                        const reader = response.body.getReader();
                        let fullText = "";
                        let partialLine = "";
                        let receivedAnyContent = false;
                        try {
                            while (true) {
                                const { done, value } = await reader.read();
                                if (done) break;
                                // Convert the chunk to text
                                const chunk = new TextDecoder().decode(value);
                                partialLine += chunk;
                                // Split by lines and process each complete JSON object
                                const lines = partialLine.split('\n');
                                // Process all complete lines except the last one (which might be incomplete)
                                for (let i = 0; i < lines.length - 1; i++) {
                                    const line = lines[i].trim();
                                    if (!line) continue;
                                    try {
                                        const data = JSON.parse(line);
                                        console.log("Streaming chunk received:", data);
                                        if (data.message && data.message.content) {
                                            // Extract just the new content
                                            const newContent = data.message.content;
                                            // Add to full text
                                            fullText += newContent;
                                            receivedAnyContent = true;
                                            // Call the callback with the new content
                                            await callback({
                                                text: newContent,
                                                done: false
                                            });
                                        }
                                        if (data.done) {
                                            // If we received an empty response with done=true,
                                            // generate a fallback response
                                            if (!receivedAnyContent && fullText.trim() === "") {
                                                // Generate a fallback response
                                                const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
                                                await callback({
                                                    text: fallbackText,
                                                    done: false
                                                });
                                                fullText = fallbackText;
                                            }
                                            // Final message in the stream
                                            await callback({
                                                text: "",
                                                done: true,
                                                usage: {
                                                    promptTokens: data.prompt_eval_count || 0,
                                                    completionTokens: data.eval_count || 0,
                                                    totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0)
                                                }
                                            });
                                        }
                                    } catch (err) {
                                        console.error("Error parsing JSON from Ollama stream:", err, "Line:", line);
                                    }
                                }
                                // Keep the potentially incomplete last line for the next iteration
                                partialLine = lines[lines.length - 1];
                            }
                            // Handle any remaining content in partialLine
                            if (partialLine.trim()) {
                                try {
                                    const data = JSON.parse(partialLine.trim());
                                    if (data.message && data.message.content) {
                                        fullText += data.message.content;
                                        receivedAnyContent = true;
                                        await callback({
                text: data.message.content,
-                                            done: false
+                model: data.model,
                                        });
                                    }
                                    if (data.done) {
                                        // Check for empty responses
                                        if (!receivedAnyContent && fullText.trim() === "") {
                                            // Generate a fallback response
                                            const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
                                            await callback({
                                                text: fallbackText,
                                                done: false
                                            });
                                            fullText = fallbackText;
                                        }
                                        await callback({
                                            text: "",
                                            done: true,
                                            usage: {
                                                promptTokens: data.prompt_eval_count || 0,
                                                completionTokens: data.eval_count || 0,
                                                totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0)
                                            }
                                        });
                                    }
                                } catch (err) {
                                    console.error("Error parsing JSON from last line:", err, "Line:", partialLine);
                                }
                            }
                            // If we reached the end without a done message and without any content
                            if (!receivedAnyContent && fullText.trim() === "") {
                                // Generate a fallback response
                                const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
                                await callback({
                                    text: fallbackText,
                                    done: false
                                });
                                // Final message
                                await callback({
                                    text: "",
                                    done: true,
                                    usage: {
                                        promptTokens: 0,
                                        completionTokens: 0,
                                        totalTokens: 0
                                    }
                                });
                            }
                            return fullText;
                        } catch (err) {
                            console.error("Error processing Ollama stream:", err);
                            throw err;
                        }
                    }
                };
            } else {
                // Non-streaming response - explicitly request JSON format
                console.log("Sending to Ollama with formatted messages:", JSON.stringify(formattedMessages, null, 2));
                const response = await fetch(endpoint, {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json'
                    },
                    body: JSON.stringify({
                        model,
                        messages: formattedMessages,
                        stream: false,
                        options: {
                            temperature,
                        }
                    })
                });
                if (!response.ok) {
                    const errorBody = await response.text();
                    throw new Error(`Ollama API error: ${response.status} ${response.statusText} - ${errorBody}`);
                }
                const rawResponseText = await response.text();
                console.log("Raw response from Ollama:", rawResponseText);
                let data;
                try {
                    data = JSON.parse(rawResponseText);
                    console.log("Parsed Ollama response:", JSON.stringify(data, null, 2));
                } catch (err: any) {
                    console.error("Error parsing JSON response from Ollama:", err);
                    console.error("Raw response:", rawResponseText);
                    throw new Error(`Failed to parse Ollama response as JSON: ${err.message}`);
                }
                // Check for empty or JSON object responses
                const content = data.message?.content || '';
                let finalResponseText = content;
                if (content === '{}' || content === '{  }' || content === '{ }') {
                    finalResponseText = "I don't have information about that in my notes.";
                } else if (!content.trim()) {
                    finalResponseText = "No response was generated. Please try asking a different question.";
                }
                return {
                    text: finalResponseText,
                    model: data.model || model,
                provider: this.getName(),
                usage: {
-                        promptTokens: data.prompt_eval_count || 0,
+                    promptTokens: data.prompt_eval_count,
-                        completionTokens: data.eval_count || 0,
+                    completionTokens: data.eval_count,
-                        totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0)
+                    totalTokens: data.prompt_eval_count + data.eval_count
                }
            };
            }
        } catch (error: any) {
            console.error("Ollama service error:", error);
            throw new Error(`Ollama service error: ${error.message}`);
        }
    }
    /**
     * Clean up HTML and other problematic content before sending to Ollama
     */
    private cleanContextContent(content: string): string {
        if (!content) return '';
        try {
            // First fix potential encoding issues
            let sanitized = content
                // Fix common encoding issues with quotes and special characters
                .replace(/Γ\u00c2[\u00a3\u00a5]/g, '"')  // Fix broken quote chars
                .replace(/[\u00A0-\u9999]/g, match => {
                    try {
                        return encodeURIComponent(match).replace(/%/g, '');
                    } catch (e) {
                        return '';
                    }
                });
            // Replace common HTML tags with markdown or plain text equivalents
            sanitized = sanitized
                // Remove HTML divs, spans, etc.
                .replace(/<\/?div[^>]*>/g, '')
                .replace(/<\/?span[^>]*>/g, '')
                .replace(/<\/?p[^>]*>/g, '\n')
                // Convert headers
                .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
                .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
                .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
                // Convert lists
                .replace(/<\/?ul[^>]*>/g, '')
                .replace(/<\/?ol[^>]*>/g, '')
                .replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n')
                // Convert links
                .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
                // Convert code blocks
                .replace(/<pre[^>]*><code[^>]*>(.*?)<\/code><\/pre>/gis, '```\n$1\n```')
                .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
                // Convert emphasis
                .replace(/<\/?strong[^>]*>/g, '**')
                .replace(/<\/?em[^>]*>/g, '*')
                // Remove figure tags
                .replace(/<\/?figure[^>]*>/g, '')
                // Remove all other HTML tags
                .replace(/<[^>]*>/g, '')
                // Fix double line breaks
                .replace(/\n\s*\n\s*\n/g, '\n\n')
                // Fix HTML entities
                .replace(/&nbsp;/g, ' ')
                .replace(/&lt;/g, '<')
                .replace(/&gt;/g, '>')
                .replace(/&amp;/g, '&')
                .replace(/&quot;/g, '"')
                // Final clean whitespace
                .replace(/\s+/g, ' ')
                .replace(/\n\s+/g, '\n')
                .trim();
            return sanitized;
        } catch (error) {
-            console.error("Error cleaning context content:", error);
+            console.error('Ollama service error:', error);
-            return content; // Return original if cleaning fails
+            throw error;
        }
    }
    /**
     * Format messages for the Ollama API
     */
    private formatMessages(messages: Message[], systemPrompt: string): OllamaMessage[] {
        const formattedMessages: OllamaMessage[] = [];
        const MAX_SYSTEM_CONTENT_LENGTH = 4000;
        // First identify user and system messages
        const systemMessages = messages.filter(msg => msg.role === 'system');
        const userMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
        // In the case of Ollama, we need to ensure context is properly integrated
        // The key insight is that simply including it in a system message doesn't work well
        // Check if we have context (typically in the first system message)
        let hasContext = false;
        let contextContent = '';
        if (systemMessages.length > 0) {
            const potentialContext = systemMessages[0].content;
            if (potentialContext && potentialContext.includes('# Context for your query')) {
                hasContext = true;
                contextContent = this.cleanContextContent(potentialContext);
            }
        }
        // Create base system message with instructions
        let basePrompt = systemPrompt ||
            "You are an AI assistant integrated into TriliumNext Notes. " +
            "Focus on helping users find information in their notes and answering questions based on their knowledge base. " +
            "Be concise, informative, and direct when responding to queries.";
        // If we have context, inject it differently - prepend it to the user's first question
        if (hasContext && userMessages.length > 0) {
            // Create initial system message with just the base prompt
            formattedMessages.push({
                role: 'system',
                content: basePrompt
            });
            // For user messages, inject context into the first user message
            let injectedContext = false;
            for (let i = 0; i < userMessages.length; i++) {
                const msg = userMessages[i];
                if (msg.role === 'user' && !injectedContext) {
                    // Format the context in a way Ollama can't ignore
                    const formattedContext =
                        "I need you to answer based on the following information from my notes:\n\n" +
                        "-----BEGIN MY NOTES-----\n" +
                        contextContent +
                        "\n-----END MY NOTES-----\n\n" +
                        "Based on these notes, please answer: " + msg.content;
                    formattedMessages.push({
                        role: 'user',
                        content: formattedContext
                    });
                    injectedContext = true;
                } else {
                    formattedMessages.push({
                        role: msg.role,
                        content: msg.content
                    });
                }
            }
        } else {
            // No context or empty context case
            // Add system message (with system prompt)
            if (systemPrompt) {
                formattedMessages.push({
                    role: 'system',
                    content: systemPrompt
                });
            }
            // Add all user and assistant messages as-is
            for (const msg of userMessages) {
                formattedMessages.push({
                    role: msg.role,
                    content: msg.content
                });
            }
        }
        console.log(`Formatted ${messages.length} messages into ${formattedMessages.length} messages for Ollama`);
        console.log(`Context detected: ${hasContext ? 'Yes' : 'No'}`);
        return formattedMessages;
    }
 }