diff --git a/apps/server/src/app.ts b/apps/server/src/app.ts index 3c5f4bdb72..bb9097ba63 100644 --- a/apps/server/src/app.ts +++ b/apps/server/src/app.ts @@ -55,7 +55,16 @@ export default async function buildApp() { }); if (!utils.isElectron) { - app.use(compression()); // HTTP compression + app.use(compression({ + // Skip compression for SSE endpoints to enable real-time streaming + filter: (req, res) => { + // Skip compression for LLM chat streaming endpoint + if (req.path === "/api/llm-chat/stream") { + return false; + } + return compression.filter(req, res); + } + })); } let resourcePolicy = config["Network"]["corsResourcePolicy"] as 'same-origin' | 'same-site' | 'cross-origin' | undefined; diff --git a/apps/server/src/routes/api/llm_chat.ts b/apps/server/src/routes/api/llm_chat.ts index b8293d02e0..672df99a4c 100644 --- a/apps/server/src/routes/api/llm_chat.ts +++ b/apps/server/src/routes/api/llm_chat.ts @@ -25,21 +25,29 @@ async function streamChat(req: Request, res: Response) { return; } - // Set up SSE headers + // Set up SSE headers - disable compression and buffering for real-time streaming res.setHeader("Content-Type", "text/event-stream"); - res.setHeader("Cache-Control", "no-cache"); + res.setHeader("Cache-Control", "no-cache, no-transform"); res.setHeader("Connection", "keep-alive"); res.setHeader("X-Accel-Buffering", "no"); // Disable nginx buffering + res.setHeader("Content-Encoding", "none"); // Disable compression res.flushHeaders(); // Mark response as handled to prevent double-handling by apiResultHandler (res as any).triliumResponseHandled = true; + // Type assertion for flush method (available when compression is used) + const flushableRes = res as Response & { flush?: () => void }; + try { const provider = getProvider(config.provider || "anthropic"); for await (const chunk of provider.streamCompletion(messages, config)) { res.write(`data: ${JSON.stringify(chunk)}\n\n`); + // Flush immediately to ensure real-time streaming + if (typeof flushableRes.flush === "function") { + flushableRes.flush(); + } } } catch (error) { const errorMessage = error instanceof Error ? error.message : "Unknown error";