feat(llm): enable cache control in Anthropic

2026-06-27 17:27:26 +02:00 · 2026-03-30 18:26:49 +03:00
parent d28318005d
commit 5feccae2a0
1 changed files with 29 additions and 7 deletions
--- a/apps/server/src/services/llm/providers/anthropic.ts
+++ b/apps/server/src/services/llm/providers/anthropic.ts
@@ -133,11 +133,34 @@ export class AnthropicProvider implements LlmProvider {
            }
        }

-        // Convert to AI SDK message format
-        const coreMessages: CoreMessage[] = chatMessages.map(m => ({
-            role: m.role as "user" | "assistant",
-            content: m.content
-        }));
+        // Convert to AI SDK message format with cache control breakpoints.
+        // The system prompt and conversation history (all but the last user message)
+        // are stable across turns, so we mark them for caching to reduce costs.
+        const CACHE_CONTROL = { anthropic: { cacheControl: { type: "ephemeral" as const } } };
+
+        const coreMessages: CoreMessage[] = [];
+
+        // System prompt as a cacheable message
+        if (systemPrompt) {
+            coreMessages.push({
+                role: "system",
+                content: systemPrompt,
+                providerOptions: CACHE_CONTROL
+            });
+        }
+
+        // Conversation messages
+        for (let i = 0; i < chatMessages.length; i++) {
+            const m = chatMessages[i];
+            const isLastBeforeNewTurn = i === chatMessages.length - 2;
+            coreMessages.push({
+                role: m.role as "user" | "assistant",
+                content: m.content,
+                // Cache breakpoint on the second-to-last message:
+                // everything up to here is identical across consecutive turns.
+                ...(isLastBeforeNewTurn && { providerOptions: CACHE_CONTROL })
+            });
+        }

        const model = this.anthropic(config.model || DEFAULT_MODEL);

@@ -145,8 +168,7 @@ export class AnthropicProvider implements LlmProvider {
        const streamOptions: Parameters<typeof streamText>[0] = {
            model,
            messages: coreMessages,
-            maxOutputTokens: config.maxTokens || DEFAULT_MAX_TOKENS,
-            system: systemPrompt
+            maxOutputTokens: config.maxTokens || DEFAULT_MAX_TOKENS
        };

        // Enable extended thinking for deeper reasoning