do a wayyy better job at building the messages with context

This commit is contained in:
perf3ct
2025-03-28 22:50:15 +00:00
parent ea4d3ac800
commit 72c380b6f4
9 changed files with 856 additions and 468 deletions

View File

@@ -956,8 +956,8 @@ async function sendMessage(req: Request, res: Response) {
log.info(`Context ends with: "...${context.substring(context.length - 200)}"`); log.info(`Context ends with: "...${context.substring(context.length - 200)}"`);
log.info(`Number of notes included: ${sourceNotes.length}`); log.info(`Number of notes included: ${sourceNotes.length}`);
// Get messages with context properly formatted for the specific LLM provider // Format messages for the LLM using the proper context
const aiMessages = contextService.buildMessagesWithContext( const aiMessages = await contextService.buildMessagesWithContext(
session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({ session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
role: msg.role, role: msg.role,
content: msg.content content: msg.content
@@ -1104,7 +1104,7 @@ async function sendMessage(req: Request, res: Response) {
const context = buildContextFromNotes(relevantNotes, messageContent); const context = buildContextFromNotes(relevantNotes, messageContent);
// Get messages with context properly formatted for the specific LLM provider // Get messages with context properly formatted for the specific LLM provider
const aiMessages = contextService.buildMessagesWithContext( const aiMessages = await contextService.buildMessagesWithContext(
session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({ session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
role: msg.role, role: msg.role,
content: msg.content content: msg.content

View File

@@ -265,10 +265,10 @@ export class ChatService {
); );
// Create messages array with context using the improved method // Create messages array with context using the improved method
const messagesWithContext = contextService.buildMessagesWithContext( const messagesWithContext = await contextService.buildMessagesWithContext(
session.messages, session.messages,
enhancedContext, enhancedContext,
aiServiceManager.getService() // Get the default service aiServiceManager.getService()
); );
// Generate AI response // Generate AI response

View File

@@ -11,6 +11,7 @@ import { ContextExtractor } from './context/index.js';
import type { NoteSearchResult } from './interfaces/context_interfaces.js'; import type { NoteSearchResult } from './interfaces/context_interfaces.js';
import type { Message } from './ai_interface.js'; import type { Message } from './ai_interface.js';
import type { LLMServiceInterface } from './interfaces/agent_tool_interfaces.js'; import type { LLMServiceInterface } from './interfaces/agent_tool_interfaces.js';
import { MessageFormatterFactory } from './interfaces/message_formatter.js';
/** /**
* Main Context Service for Trilium Notes * Main Context Service for Trilium Notes
@@ -189,72 +190,62 @@ class TriliumContextService {
} }
/** /**
* Build messages with proper context for an LLM-enhanced chat * Builds messages with context for LLM service
* This takes a set of messages and adds context in the appropriate format for each LLM provider
*
* @param messages Array of messages to enhance with context
* @param context The context to add (built from relevant notes)
* @param llmService The LLM service to format messages for
* @returns Promise resolving to the messages array with context properly integrated
*/ */
buildMessagesWithContext(messages: Message[], context: string, llmService: LLMServiceInterface): Message[] { async buildMessagesWithContext(
// For simple conversations just add context to the system message messages: Message[],
context: string,
llmService: LLMServiceInterface
): Promise<Message[]> {
try { try {
if (!messages || messages.length === 0) { if (!messages || messages.length === 0) {
return [{ role: 'system', content: context }]; log.info('No messages provided to buildMessagesWithContext');
return [];
} }
const result: Message[] = []; if (!context || context.trim() === '') {
let hasSystemMessage = false; log.info('No context provided to buildMessagesWithContext, returning original messages');
return messages;
// First pass: identify if there's a system message
for (const msg of messages) {
if (msg.role === 'system') {
hasSystemMessage = true;
break;
}
} }
// If we have a system message, prepend context to it // Get the provider name, handling service classes and raw provider names
// Otherwise create a new system message with the context let providerName: string;
if (hasSystemMessage) { if (typeof llmService === 'string') {
for (const msg of messages) { // If llmService is a string, assume it's the provider name
if (msg.role === 'system') { providerName = llmService;
// For Ollama, use a cleaner approach with just one system message } else if (llmService.constructor && llmService.constructor.name) {
if (llmService.constructor.name === 'OllamaService') { // Extract provider name from service class name (e.g., OllamaService -> ollama)
// If this is the first system message we've seen, providerName = llmService.constructor.name.replace('Service', '').toLowerCase();
// add context to it, otherwise skip (Ollama handles multiple
// system messages poorly)
if (result.findIndex(m => m.role === 'system') === -1) {
result.push({
role: 'system',
content: `${context}\n\n${msg.content}`
});
}
} else { } else {
// For other providers, include all system messages // Fallback to default
result.push({ providerName = 'default';
role: 'system',
content: msg.content.includes(context) ?
msg.content : // Avoid duplicate context
`${context}\n\n${msg.content}`
});
}
} else {
result.push(msg);
}
}
} else {
// No system message found, prepend one with the context
result.push({ role: 'system', content: context });
// Add all the original messages
result.push(...messages);
} }
return result; log.info(`Using formatter for provider: ${providerName}`);
// Get the appropriate formatter for this provider
const formatter = MessageFormatterFactory.getFormatter(providerName);
// Format messages with context using the provider-specific formatter
const formattedMessages = formatter.formatMessages(
messages,
undefined, // No system prompt override - use what's in the messages
context
);
log.info(`Formatted ${messages.length} messages into ${formattedMessages.length} messages for ${providerName}`);
return formattedMessages;
} catch (error) { } catch (error) {
log.error(`Error building messages with context: ${error}`); log.error(`Error building messages with context: ${error}`);
// Fallback to original messages in case of error
// Fallback: prepend a system message with context return messages;
const safeMessages = Array.isArray(messages) ? messages : [];
return [
{ role: 'system', content: context },
...safeMessages.filter(msg => msg.role !== 'system')
];
} }
} }
} }

View File

@@ -0,0 +1,223 @@
import sanitizeHtml from 'sanitize-html';
import type { Message } from '../ai_interface.js';
import { BaseMessageFormatter } from './base_formatter.js';
/**
* Anthropic-specific message formatter
* Optimized for Claude's API and preferences
*/
export class AnthropicMessageFormatter extends BaseMessageFormatter {
/**
* Maximum recommended context length for Anthropic models
* Claude has a very large context window
*/
private static MAX_CONTEXT_LENGTH = 100000;
/**
* Format messages for the Anthropic API
*/
formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
const formattedMessages: Message[] = [];
// For Anthropic, system prompts work best as the first user message with <instructions> XML tags
// First, collect all non-system messages
const userAssistantMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
// For Anthropic, we need to handle context differently
// 1. If explicit context is provided, we format it with XML tags
if (context) {
// Build the system message with context
const baseInstructions = this.getDefaultSystemPrompt(systemPrompt);
const formattedContext =
`<instructions>\n${baseInstructions}\n\n` +
`Use the following information from the user's notes to answer their questions:\n\n` +
`<user_notes>\n${this.cleanContextContent(context)}\n</user_notes>\n\n` +
`When responding:\n` +
`- Focus on the most relevant information from the notes\n` +
`- Be concise and direct in your answers\n` +
`- If quoting from notes, mention which note it's from\n` +
`- If the notes don't contain relevant information, say so clearly\n` +
`</instructions>`;
// If there's at least one user message, add the context to the first one
if (userAssistantMessages.length > 0 && userAssistantMessages[0].role === 'user') {
// Add system as a new first message
formattedMessages.push({
role: 'user',
content: formattedContext
});
// Add system response acknowledgment
formattedMessages.push({
role: 'assistant',
content: "I'll help you with your notes based on the context provided."
});
// Add remaining messages
for (const msg of userAssistantMessages) {
formattedMessages.push(msg);
}
}
// If no user messages, create a placeholder
else {
formattedMessages.push({
role: 'user',
content: formattedContext
});
formattedMessages.push({
role: 'assistant',
content: "I'll help you with your notes based on the context provided. What would you like to know?"
});
// Add any existing assistant messages if they exist
const assistantMsgs = userAssistantMessages.filter(msg => msg.role === 'assistant');
for (const msg of assistantMsgs) {
formattedMessages.push(msg);
}
}
}
// 2. If no explicit context but we have system messages, convert them to Claude format
else if (messages.some(msg => msg.role === 'system')) {
// Get system messages
const systemMessages = messages.filter(msg => msg.role === 'system');
// Build system content with XML tags
const systemContent =
`<instructions>\n${systemMessages.map(msg => this.cleanContextContent(msg.content)).join('\n\n')}\n</instructions>`;
// Add as first user message
formattedMessages.push({
role: 'user',
content: systemContent
});
// Add assistant acknowledgment
formattedMessages.push({
role: 'assistant',
content: "I understand. I'll follow those instructions."
});
// Add remaining user/assistant messages
for (const msg of userAssistantMessages) {
formattedMessages.push(msg);
}
}
// 3. Just a system prompt, no context
else if (systemPrompt) {
// Add as first user message with XML tags
formattedMessages.push({
role: 'user',
content: `<instructions>\n${systemPrompt}\n</instructions>`
});
// Add assistant acknowledgment
formattedMessages.push({
role: 'assistant',
content: "I understand. I'll follow those instructions."
});
// Add all other messages
for (const msg of userAssistantMessages) {
formattedMessages.push(msg);
}
}
// 4. No system prompt, use default from constants
else if (userAssistantMessages.length > 0) {
// Add default system prompt with XML tags
formattedMessages.push({
role: 'user',
content: `<instructions>\n${this.getDefaultSystemPrompt()}\n</instructions>`
});
// Add assistant acknowledgment
formattedMessages.push({
role: 'assistant',
content: "I understand. I'll follow those instructions."
});
// Add all user messages
for (const msg of userAssistantMessages) {
formattedMessages.push(msg);
}
}
// 5. No special handling needed
else {
// Just add all messages as-is
for (const msg of userAssistantMessages) {
formattedMessages.push(msg);
}
}
console.log(`Anthropic formatter: ${messages.length} messages → ${formattedMessages.length} messages`);
return formattedMessages;
}
/**
* Clean context content for Anthropic
* Claude works well with XML-structured content
*/
cleanContextContent(content: string): string {
if (!content) return '';
try {
// Convert HTML to a Claude-friendly format
const cleaned = sanitizeHtml(content, {
allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'],
allowedAttributes: {
'a': ['href']
}
});
// Convert to markdown but preserve some structure
let markdown = cleaned
.replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
.replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
.replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
.replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
.replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
.replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
.replace(/<br[^>]*>/gi, '\n')
.replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
.replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
.replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
.replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
.replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
.replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
.replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
// Process lists
.replace(/<ul[^>]*>(.*?)<\/ul>/gs, (match, content) => {
return content.replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n');
})
.replace(/<ol[^>]*>(.*?)<\/ol>/gs, (match, content) => {
let index = 1;
return content.replace(/<li[^>]*>(.*?)<\/li>/gi, (m: string, item: string) => {
return `${index++}. ${item}\n`;
});
})
// Clean up any remaining HTML tags
.replace(/<[^>]*>/g, '')
// Clean up excessive newlines
.replace(/\n{3,}/g, '\n\n')
// Fix common HTML entities
.replace(/&nbsp;/g, ' ')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"');
return markdown.trim();
} catch (error) {
console.error("Error cleaning content for Anthropic:", error);
return content; // Return original if cleaning fails
}
}
/**
* Get the maximum recommended context length for Anthropic
*/
getMaxContextLength(): number {
return AnthropicMessageFormatter.MAX_CONTEXT_LENGTH;
}
}

View File

@@ -0,0 +1,161 @@
import sanitizeHtml from 'sanitize-html';
import type { Message } from '../ai_interface.js';
import type { MessageFormatter } from '../interfaces/message_formatter.js';
import { DEFAULT_SYSTEM_PROMPT } from '../constants/llm_prompt_constants.js';
/**
* Base formatter with common functionality for all providers
* Provider-specific formatters should extend this class
*/
export abstract class BaseMessageFormatter implements MessageFormatter {
/**
* Format messages for the LLM API
* Each provider should override this method with its specific formatting logic
*/
abstract formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[];
/**
* Get the maximum recommended context length for this provider
* Each provider should override this with appropriate value
*/
abstract getMaxContextLength(): number;
/**
* Get the default system prompt
* Uses the default prompt from constants
*/
protected getDefaultSystemPrompt(systemPrompt?: string): string {
return systemPrompt || DEFAULT_SYSTEM_PROMPT;
}
/**
* Clean context content - common method with standard HTML cleaning
* Provider-specific formatters can override for custom behavior
*/
cleanContextContent(content: string): string {
if (!content) return '';
try {
// First fix any encoding issues
const fixedContent = this.fixEncodingIssues(content);
// Convert HTML to markdown for better readability
const cleaned = sanitizeHtml(fixedContent, {
allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'],
allowedAttributes: {
'a': ['href']
},
transformTags: {
'h1': 'h2',
'h2': 'h3',
'div': 'p',
'span': 'span'
}
});
// Process inline elements to markdown
let markdown = cleaned
.replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
.replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
.replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
.replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
.replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
.replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
.replace(/<br[^>]*>/gi, '\n')
.replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
.replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
.replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
.replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
.replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
.replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
.replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
// Clean up any remaining HTML tags
.replace(/<[^>]*>/g, '')
// Clean up excessive newlines
.replace(/\n{3,}/g, '\n\n');
// Process list items
markdown = this.processListItems(markdown);
// Fix common HTML entities
markdown = markdown
.replace(/&nbsp;/g, ' ')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&ldquo;/g, '"')
.replace(/&rdquo;/g, '"')
.replace(/&lsquo;/g, "'")
.replace(/&rsquo;/g, "'")
.replace(/&mdash;/g, '—')
.replace(/&ndash;/g, '')
.replace(/&hellip;/g, '…');
return markdown.trim();
} catch (error) {
console.error("Error cleaning context content:", error);
return content; // Return original if cleaning fails
}
}
/**
* Process HTML list items in markdown conversion
* This is a helper method that safely processes HTML list items
*/
protected processListItems(content: string): string {
// Process unordered lists
let result = content.replace(/<ul[^>]*>([\s\S]*?)<\/ul>/gi, (match: string, listContent: string) => {
return listContent.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n');
});
// Process ordered lists
result = result.replace(/<ol[^>]*>([\s\S]*?)<\/ol>/gi, (match: string, listContent: string) => {
let index = 1;
return listContent.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (itemMatch: string, item: string) => {
return `${index++}. ${item}\n`;
});
});
return result;
}
/**
* Fix common encoding issues in content
* This fixes issues like broken quote characters and other encoding problems
*
* @param content The content to fix encoding issues in
* @returns Content with encoding issues fixed
*/
protected fixEncodingIssues(content: string): string {
if (!content) return '';
try {
// Fix common encoding issues
return content
// Fix broken quote characters
.replace(/Γ\u00c2[\u00a3\u00a5]/g, '"')
// Fix other common broken unicode
.replace(/[\u{0080}-\u{FFFF}]/gu, (match) => {
// Some common replacements
const replacements: Record<string, string> = {
'\u00A0': ' ', // Non-breaking space
'\u2018': "'", // Left single quote
'\u2019': "'", // Right single quote
'\u201C': '"', // Left double quote
'\u201D': '"', // Right double quote
'\u2013': '-', // En dash
'\u2014': '--', // Em dash
'\u2022': '*', // Bullet
'\u2026': '...' // Ellipsis
};
return replacements[match] || match;
});
} catch (error) {
console.error('Error fixing encoding issues:', error);
return content; // Return original if fixing fails
}
}
}

View File

@@ -0,0 +1,120 @@
import type { Message } from '../ai_interface.js';
import { BaseMessageFormatter } from './base_formatter.js';
import sanitizeHtml from 'sanitize-html';
/**
* Ollama-specific message formatter
* Handles the unique requirements of the Ollama API
*/
export class OllamaMessageFormatter extends BaseMessageFormatter {
/**
* Maximum recommended context length for Ollama
* Smaller than other providers due to Ollama's handling of context
*/
private static MAX_CONTEXT_LENGTH = 4000;
/**
* Format messages for the Ollama API
*/
formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
const formattedMessages: Message[] = [];
// First identify user and system messages
const systemMessages = messages.filter(msg => msg.role === 'system');
const userMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
// Create base system message with instructions
const basePrompt = this.getDefaultSystemPrompt(systemPrompt);
// Always add a system message with the base prompt
formattedMessages.push({
role: 'system',
content: basePrompt
});
// If we have context, inject it into the first user message
if (context && userMessages.length > 0) {
let injectedContext = false;
for (let i = 0; i < userMessages.length; i++) {
const msg = userMessages[i];
if (msg.role === 'user' && !injectedContext) {
// Simple context injection directly in the user's message
const cleanedContext = this.cleanContextContent(context);
const formattedContext =
"Here's information from my notes to help answer the question:\n\n" +
cleanedContext +
"\n\nBased on this information, please answer: " + msg.content;
formattedMessages.push({
role: 'user',
content: formattedContext
});
injectedContext = true;
} else {
formattedMessages.push(msg);
}
}
} else {
// No context, just add all messages as-is
for (const msg of userMessages) {
formattedMessages.push(msg);
}
}
console.log(`Ollama formatter processed ${messages.length} messages into ${formattedMessages.length} messages`);
return formattedMessages;
}
/**
* Clean up HTML and other problematic content before sending to Ollama
* Ollama needs a more aggressive cleaning than other models
*/
override cleanContextContent(content: string): string {
if (!content) return '';
try {
// First use the parent class to do standard cleaning
let sanitized = super.cleanContextContent(content);
// Then apply Ollama-specific aggressive cleaning
// Remove any remaining HTML using sanitizeHtml
let plaintext = sanitizeHtml(sanitized, {
allowedTags: [],
allowedAttributes: {},
textFilter: (text) => text
});
// Then aggressively sanitize to plain ASCII and simple formatting
plaintext = plaintext
// Replace common problematic quotes with simple ASCII quotes
.replace(/[""]/g, '"')
.replace(/['']/g, "'")
// Replace other common Unicode characters
.replace(/[–—]/g, '-')
.replace(/[•]/g, '*')
.replace(/[…]/g, '...')
// Strip all non-ASCII characters
.replace(/[^\x00-\x7F]/g, '')
// Normalize whitespace
.replace(/\s+/g, ' ')
.replace(/\n\s+/g, '\n')
.trim();
return plaintext;
} catch (error) {
console.error("Error cleaning context content for Ollama:", error);
return content; // Return original if cleaning fails
}
}
/**
* Get the maximum recommended context length for Ollama
*/
getMaxContextLength(): number {
return OllamaMessageFormatter.MAX_CONTEXT_LENGTH;
}
}

View File

@@ -0,0 +1,152 @@
import sanitizeHtml from 'sanitize-html';
import type { Message } from '../ai_interface.js';
import { BaseMessageFormatter } from './base_formatter.js';
/**
* OpenAI-specific message formatter
* Optimized for OpenAI's API requirements and preferences
*/
export class OpenAIMessageFormatter extends BaseMessageFormatter {
/**
* Maximum recommended context length for OpenAI
* Based on GPT-4 context window size
*/
private static MAX_CONTEXT_LENGTH = 16000;
/**
* Format messages for the OpenAI API
*/
formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
const formattedMessages: Message[] = [];
// Check if we already have a system message
const hasSystemMessage = messages.some(msg => msg.role === 'system');
const userAssistantMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
// If we have explicit context, format it properly
if (context) {
// For OpenAI, it's best to put context in the system message
const formattedContext =
"You are an AI assistant integrated into TriliumNext Notes. " +
"Use the following information from the user's notes to answer their questions:\n\n" +
this.cleanContextContent(context) +
"\n\nFocus on relevant information from these notes when answering. " +
"Be concise and informative in your responses.";
// Add as system message
formattedMessages.push({
role: 'system',
content: formattedContext
});
}
// If we don't have explicit context but have a system prompt
else if (!hasSystemMessage && systemPrompt) {
formattedMessages.push({
role: 'system',
content: systemPrompt
});
}
// If neither context nor system prompt is provided, use default system prompt
else if (!hasSystemMessage) {
formattedMessages.push({
role: 'system',
content: this.getDefaultSystemPrompt(systemPrompt)
});
}
// Otherwise if there are existing system messages, keep them
else if (hasSystemMessage) {
// Keep any existing system messages
const systemMessages = messages.filter(msg => msg.role === 'system');
for (const msg of systemMessages) {
formattedMessages.push({
role: 'system',
content: this.cleanContextContent(msg.content)
});
}
}
// Add all user and assistant messages
for (const msg of userAssistantMessages) {
formattedMessages.push({
role: msg.role,
content: msg.content
});
}
console.log(`OpenAI formatter: ${messages.length} messages → ${formattedMessages.length} messages`);
return formattedMessages;
}
/**
* Clean context content for OpenAI
* OpenAI handles HTML better than Ollama but still benefits from some cleaning
*/
cleanContextContent(content: string): string {
if (!content) return '';
try {
// Convert HTML to Markdown for better readability
const cleaned = sanitizeHtml(content, {
allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'],
allowedAttributes: {
'a': ['href']
},
transformTags: {
'h1': 'h2',
'h2': 'h3',
'div': 'p',
'span': 'span'
}
});
// Process inline elements to markdown with simpler approach
let markdown = cleaned
.replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
.replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
.replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
.replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
.replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
.replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
.replace(/<br[^>]*>/gi, '\n')
.replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
.replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
.replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
.replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
.replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
.replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
.replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
// Clean up any remaining HTML tags
.replace(/<[^>]*>/g, '')
// Clean up excessive newlines
.replace(/\n{3,}/g, '\n\n');
// Fix common HTML entities
markdown = markdown
.replace(/&nbsp;/g, ' ')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&ldquo;/g, '"')
.replace(/&rdquo;/g, '"')
.replace(/&lsquo;/g, "'")
.replace(/&rsquo;/g, "'")
.replace(/&mdash;/g, '—')
.replace(/&ndash;/g, '')
.replace(/&hellip;/g, '…');
return markdown.trim();
} catch (error) {
console.error("Error cleaning content for OpenAI:", error);
return content; // Return original if cleaning fails
}
}
/**
* Get the maximum recommended context length for OpenAI
*/
getMaxContextLength(): number {
return OpenAIMessageFormatter.MAX_CONTEXT_LENGTH;
}
}

View File

@@ -0,0 +1,92 @@
import type { Message } from "../ai_interface.js";
// These imports need to be added for the factory to work
import { OpenAIMessageFormatter } from "../formatters/openai_formatter.js";
import { AnthropicMessageFormatter } from "../formatters/anthropic_formatter.js";
import { OllamaMessageFormatter } from "../formatters/ollama_formatter.js";
/**
* Interface for provider-specific message formatters
* This allows each provider to have custom formatting logic while maintaining a consistent interface
*/
export interface MessageFormatter {
/**
* Format messages for a specific LLM provider
*
* @param messages Array of messages to format
* @param systemPrompt Optional system prompt to include
* @param context Optional context to incorporate into messages
* @returns Formatted messages ready to send to the provider
*/
formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[];
/**
* Clean context content to prepare it for this specific provider
*
* @param content The raw context content
* @returns Cleaned and formatted context content
*/
cleanContextContent(content: string): string;
/**
* Get the maximum recommended context length for this provider
*
* @returns Maximum context length in characters
*/
getMaxContextLength(): number;
}
/**
* Factory to get the appropriate message formatter for a provider
*/
export class MessageFormatterFactory {
// Cache formatters for reuse
private static formatters: Record<string, MessageFormatter> = {};
/**
* Get the appropriate message formatter for a provider
*
* @param providerName Name of the LLM provider (e.g., 'openai', 'anthropic', 'ollama')
* @returns MessageFormatter instance for the specified provider
*/
static getFormatter(providerName: string): MessageFormatter {
// Normalize provider name and handle variations
let providerKey: string;
// Normalize provider name from various forms (constructor.name, etc.)
if (providerName.toLowerCase().includes('openai')) {
providerKey = 'openai';
} else if (providerName.toLowerCase().includes('anthropic') ||
providerName.toLowerCase().includes('claude')) {
providerKey = 'anthropic';
} else if (providerName.toLowerCase().includes('ollama')) {
providerKey = 'ollama';
} else {
// Default to lowercase of whatever name we got
providerKey = providerName.toLowerCase();
}
// Return cached formatter if available
if (this.formatters[providerKey]) {
return this.formatters[providerKey];
}
// Create and cache new formatter
switch (providerKey) {
case 'openai':
this.formatters[providerKey] = new OpenAIMessageFormatter();
break;
case 'anthropic':
this.formatters[providerKey] = new AnthropicMessageFormatter();
break;
case 'ollama':
this.formatters[providerKey] = new OllamaMessageFormatter();
break;
default:
// Default to OpenAI formatter for unknown providers
console.warn(`No specific formatter for provider: ${providerName}. Using OpenAI formatter as default.`);
this.formatters[providerKey] = new OpenAIMessageFormatter();
}
return this.formatters[providerKey];
}
}

View File

@@ -1,53 +1,59 @@
import options from '../../options.js'; import options from '../../options.js';
import { BaseAIService } from '../base_ai_service.js'; import { BaseAIService } from '../base_ai_service.js';
import type { ChatCompletionOptions, ChatResponse, Message } from '../ai_interface.js'; import type { Message, ChatCompletionOptions, ChatResponse } from '../ai_interface.js';
import { PROVIDER_CONSTANTS } from '../constants/provider_constants.js'; import sanitizeHtml from 'sanitize-html';
import { OllamaMessageFormatter } from '../formatters/ollama_formatter.js';
interface OllamaMessage { interface OllamaMessage {
role: string; role: string;
content: string; content: string;
} }
interface OllamaResponse {
model: string;
created_at: string;
message: OllamaMessage;
done: boolean;
total_duration: number;
load_duration: number;
prompt_eval_count: number;
prompt_eval_duration: number;
eval_count: number;
eval_duration: number;
}
export class OllamaService extends BaseAIService { export class OllamaService extends BaseAIService {
private formatter: OllamaMessageFormatter;
constructor() { constructor() {
super('Ollama'); super('Ollama');
this.formatter = new OllamaMessageFormatter();
} }
isAvailable(): boolean { isAvailable(): boolean {
return super.isAvailable() && return super.isAvailable() && !!options.getOption('ollamaBaseUrl');
options.getOption('ollamaEnabled') === 'true' &&
!!options.getOption('ollamaBaseUrl');
} }
async generateChatCompletion(messages: Message[], opts: ChatCompletionOptions = {}): Promise<ChatResponse> { async generateChatCompletion(messages: Message[], opts: ChatCompletionOptions = {}): Promise<ChatResponse> {
if (!this.isAvailable()) { if (!this.isAvailable()) {
throw new Error('Ollama service is not available. Check Ollama settings.'); throw new Error('Ollama service is not available. Check API URL in settings.');
} }
const baseUrl = options.getOption('ollamaBaseUrl') || PROVIDER_CONSTANTS.OLLAMA.BASE_URL; const apiBase = options.getOption('ollamaBaseUrl');
const model = opts.model || options.getOption('ollamaDefaultModel') || PROVIDER_CONSTANTS.OLLAMA.DEFAULT_MODEL; const model = opts.model || options.getOption('ollamaDefaultModel') || 'llama3';
const temperature = opts.temperature !== undefined const temperature = opts.temperature !== undefined
? opts.temperature ? opts.temperature
: parseFloat(options.getOption('aiTemperature') || '0.7'); : parseFloat(options.getOption('aiTemperature') || '0.7');
const systemPrompt = this.getSystemPrompt(opts.systemPrompt || options.getOption('aiSystemPrompt')); const systemPrompt = this.getSystemPrompt(opts.systemPrompt || options.getOption('aiSystemPrompt'));
// Format messages for Ollama
const formattedMessages = this.formatMessages(messages, systemPrompt);
// Log the formatted messages for debugging
console.log('Input messages for formatting:', messages);
console.log('Formatted messages for Ollama:', formattedMessages);
try { try {
const endpoint = `${baseUrl.replace(/\/+$/, '')}/api/chat`; // Use the formatter to prepare messages
const formattedMessages = this.formatter.formatMessages(messages, systemPrompt);
// Determine if we should stream the response console.log(`Sending to Ollama with formatted messages:`, JSON.stringify(formattedMessages, null, 2));
const shouldStream = opts.stream === true;
if (shouldStream) { const response = await fetch(`${apiBase}/api/chat`, {
// Handle streaming response
const response = await fetch(endpoint, {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json' 'Content-Type': 'application/json'
@@ -55,393 +61,36 @@ export class OllamaService extends BaseAIService {
body: JSON.stringify({ body: JSON.stringify({
model, model,
messages: formattedMessages, messages: formattedMessages,
stream: true,
options: { options: {
temperature, temperature
} },
stream: false
}) })
}); });
if (!response.ok) { if (!response.ok) {
const errorBody = await response.text(); const errorBody = await response.text();
throw new Error(`Ollama API error: ${response.status} ${response.statusText} - ${errorBody}`); console.error(`Ollama API error: ${response.status} ${response.statusText}`, errorBody);
throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
} }
// For streaming, we return an object that has a callback for handling the stream const data: OllamaResponse = await response.json();
console.log('Raw response from Ollama:', JSON.stringify(data, null, 2));
console.log('Parsed Ollama response:', JSON.stringify(data, null, 2));
return { return {
text: "", // Initial empty text that will be built up
model: model,
provider: this.getName(),
usage: {
promptTokens: 0,
completionTokens: 0,
totalTokens: 0
},
stream: async (callback) => {
if (!response.body) {
throw new Error("No response body from Ollama");
}
const reader = response.body.getReader();
let fullText = "";
let partialLine = "";
let receivedAnyContent = false;
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
// Convert the chunk to text
const chunk = new TextDecoder().decode(value);
partialLine += chunk;
// Split by lines and process each complete JSON object
const lines = partialLine.split('\n');
// Process all complete lines except the last one (which might be incomplete)
for (let i = 0; i < lines.length - 1; i++) {
const line = lines[i].trim();
if (!line) continue;
try {
const data = JSON.parse(line);
console.log("Streaming chunk received:", data);
if (data.message && data.message.content) {
// Extract just the new content
const newContent = data.message.content;
// Add to full text
fullText += newContent;
receivedAnyContent = true;
// Call the callback with the new content
await callback({
text: newContent,
done: false
});
}
if (data.done) {
// If we received an empty response with done=true,
// generate a fallback response
if (!receivedAnyContent && fullText.trim() === "") {
// Generate a fallback response
const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
await callback({
text: fallbackText,
done: false
});
fullText = fallbackText;
}
// Final message in the stream
await callback({
text: "",
done: true,
usage: {
promptTokens: data.prompt_eval_count || 0,
completionTokens: data.eval_count || 0,
totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0)
}
});
}
} catch (err) {
console.error("Error parsing JSON from Ollama stream:", err, "Line:", line);
}
}
// Keep the potentially incomplete last line for the next iteration
partialLine = lines[lines.length - 1];
}
// Handle any remaining content in partialLine
if (partialLine.trim()) {
try {
const data = JSON.parse(partialLine.trim());
if (data.message && data.message.content) {
fullText += data.message.content;
receivedAnyContent = true;
await callback({
text: data.message.content, text: data.message.content,
done: false model: data.model,
});
}
if (data.done) {
// Check for empty responses
if (!receivedAnyContent && fullText.trim() === "") {
// Generate a fallback response
const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
await callback({
text: fallbackText,
done: false
});
fullText = fallbackText;
}
await callback({
text: "",
done: true,
usage: {
promptTokens: data.prompt_eval_count || 0,
completionTokens: data.eval_count || 0,
totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0)
}
});
}
} catch (err) {
console.error("Error parsing JSON from last line:", err, "Line:", partialLine);
}
}
// If we reached the end without a done message and without any content
if (!receivedAnyContent && fullText.trim() === "") {
// Generate a fallback response
const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
await callback({
text: fallbackText,
done: false
});
// Final message
await callback({
text: "",
done: true,
usage: {
promptTokens: 0,
completionTokens: 0,
totalTokens: 0
}
});
}
return fullText;
} catch (err) {
console.error("Error processing Ollama stream:", err);
throw err;
}
}
};
} else {
// Non-streaming response - explicitly request JSON format
console.log("Sending to Ollama with formatted messages:", JSON.stringify(formattedMessages, null, 2));
const response = await fetch(endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
model,
messages: formattedMessages,
stream: false,
options: {
temperature,
}
})
});
if (!response.ok) {
const errorBody = await response.text();
throw new Error(`Ollama API error: ${response.status} ${response.statusText} - ${errorBody}`);
}
const rawResponseText = await response.text();
console.log("Raw response from Ollama:", rawResponseText);
let data;
try {
data = JSON.parse(rawResponseText);
console.log("Parsed Ollama response:", JSON.stringify(data, null, 2));
} catch (err: any) {
console.error("Error parsing JSON response from Ollama:", err);
console.error("Raw response:", rawResponseText);
throw new Error(`Failed to parse Ollama response as JSON: ${err.message}`);
}
// Check for empty or JSON object responses
const content = data.message?.content || '';
let finalResponseText = content;
if (content === '{}' || content === '{ }' || content === '{ }') {
finalResponseText = "I don't have information about that in my notes.";
} else if (!content.trim()) {
finalResponseText = "No response was generated. Please try asking a different question.";
}
return {
text: finalResponseText,
model: data.model || model,
provider: this.getName(), provider: this.getName(),
usage: { usage: {
promptTokens: data.prompt_eval_count || 0, promptTokens: data.prompt_eval_count,
completionTokens: data.eval_count || 0, completionTokens: data.eval_count,
totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0) totalTokens: data.prompt_eval_count + data.eval_count
} }
}; };
}
} catch (error: any) {
console.error("Ollama service error:", error);
throw new Error(`Ollama service error: ${error.message}`);
}
}
/**
* Clean up HTML and other problematic content before sending to Ollama
*/
private cleanContextContent(content: string): string {
if (!content) return '';
try {
// First fix potential encoding issues
let sanitized = content
// Fix common encoding issues with quotes and special characters
.replace(/Γ\u00c2[\u00a3\u00a5]/g, '"') // Fix broken quote chars
.replace(/[\u00A0-\u9999]/g, match => {
try {
return encodeURIComponent(match).replace(/%/g, '');
} catch (e) {
return '';
}
});
// Replace common HTML tags with markdown or plain text equivalents
sanitized = sanitized
// Remove HTML divs, spans, etc.
.replace(/<\/?div[^>]*>/g, '')
.replace(/<\/?span[^>]*>/g, '')
.replace(/<\/?p[^>]*>/g, '\n')
// Convert headers
.replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
.replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
.replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
// Convert lists
.replace(/<\/?ul[^>]*>/g, '')
.replace(/<\/?ol[^>]*>/g, '')
.replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n')
// Convert links
.replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
// Convert code blocks
.replace(/<pre[^>]*><code[^>]*>(.*?)<\/code><\/pre>/gis, '```\n$1\n```')
.replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
// Convert emphasis
.replace(/<\/?strong[^>]*>/g, '**')
.replace(/<\/?em[^>]*>/g, '*')
// Remove figure tags
.replace(/<\/?figure[^>]*>/g, '')
// Remove all other HTML tags
.replace(/<[^>]*>/g, '')
// Fix double line breaks
.replace(/\n\s*\n\s*\n/g, '\n\n')
// Fix HTML entities
.replace(/&nbsp;/g, ' ')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
// Final clean whitespace
.replace(/\s+/g, ' ')
.replace(/\n\s+/g, '\n')
.trim();
return sanitized;
} catch (error) { } catch (error) {
console.error("Error cleaning context content:", error); console.error('Ollama service error:', error);
return content; // Return original if cleaning fails throw error;
} }
} }
/**
* Format messages for the Ollama API
*/
private formatMessages(messages: Message[], systemPrompt: string): OllamaMessage[] {
const formattedMessages: OllamaMessage[] = [];
const MAX_SYSTEM_CONTENT_LENGTH = 4000;
// First identify user and system messages
const systemMessages = messages.filter(msg => msg.role === 'system');
const userMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
// In the case of Ollama, we need to ensure context is properly integrated
// The key insight is that simply including it in a system message doesn't work well
// Check if we have context (typically in the first system message)
let hasContext = false;
let contextContent = '';
if (systemMessages.length > 0) {
const potentialContext = systemMessages[0].content;
if (potentialContext && potentialContext.includes('# Context for your query')) {
hasContext = true;
contextContent = this.cleanContextContent(potentialContext);
}
}
// Create base system message with instructions
let basePrompt = systemPrompt ||
"You are an AI assistant integrated into TriliumNext Notes. " +
"Focus on helping users find information in their notes and answering questions based on their knowledge base. " +
"Be concise, informative, and direct when responding to queries.";
// If we have context, inject it differently - prepend it to the user's first question
if (hasContext && userMessages.length > 0) {
// Create initial system message with just the base prompt
formattedMessages.push({
role: 'system',
content: basePrompt
});
// For user messages, inject context into the first user message
let injectedContext = false;
for (let i = 0; i < userMessages.length; i++) {
const msg = userMessages[i];
if (msg.role === 'user' && !injectedContext) {
// Format the context in a way Ollama can't ignore
const formattedContext =
"I need you to answer based on the following information from my notes:\n\n" +
"-----BEGIN MY NOTES-----\n" +
contextContent +
"\n-----END MY NOTES-----\n\n" +
"Based on these notes, please answer: " + msg.content;
formattedMessages.push({
role: 'user',
content: formattedContext
});
injectedContext = true;
} else {
formattedMessages.push({
role: msg.role,
content: msg.content
});
}
}
} else {
// No context or empty context case
// Add system message (with system prompt)
if (systemPrompt) {
formattedMessages.push({
role: 'system',
content: systemPrompt
});
}
// Add all user and assistant messages as-is
for (const msg of userMessages) {
formattedMessages.push({
role: msg.role,
content: msg.content
});
}
}
console.log(`Formatted ${messages.length} messages into ${formattedMessages.length} messages for Ollama`);
console.log(`Context detected: ${hasContext ? 'Yes' : 'No'}`);
return formattedMessages;
}
} }