From d79d2e9ad29f30acb0e965356f5f71ccec3db92d Mon Sep 17 00:00:00 2001
From: Elian Doran <contact@eliandoran.me>
Date: Thu, 2 Apr 2026 20:58:11 +0300
Subject: [PATCH] fix(ocr): too many blob queries in search

---
 .../search/expressions/ocr_content.ts         | 57 +++++++++++--------
 1 file changed, 34 insertions(+), 23 deletions(-)
diff --git a/apps/server/src/services/search/expressions/ocr_content.ts b/apps/server/src/services/search/expressions/ocr_content.ts
index 277df2cfa8..2b3472b1bb 100644
--- a/apps/server/src/services/search/expressions/ocr_content.ts
+++ b/apps/server/src/services/search/expressions/ocr_content.ts
@@ -1,4 +1,5 @@
 import becca from "../../../becca/becca.js";
+import sql from "../../sql.js";
 import NoteSet from "../note_set.js";
 import type SearchContext from "../search_context.js";
 import Expression from "./expression.js";
@@ -6,6 +7,9 @@ import Expression from "./expression.js";
 /**
  * Search expression for finding text within OCR-extracted content (textRepresentation)
  * from image notes and their attachments.
+ *
+ * Uses a single SQL query to find all noteIds whose own blob or attachment blobs
+ * contain matching text, then intersects with the input note set.
  */
 export default class OCRContentExpression extends Expression {
     private tokens: string[];
@@ -17,9 +21,11 @@ export default class OCRContentExpression extends Expression {
 
     execute(inputNoteSet: NoteSet, executionContext: object, searchContext: SearchContext): NoteSet {
         const resultNoteSet = new NoteSet();
+        const matchingNoteIds = this.findNoteIdsWithMatchingOCR();
 
-        for (const note of inputNoteSet.notes) {
-            if (this.noteMatchesOCR(note.noteId)) {
+        for (const noteId of matchingNoteIds) {
+            const note = becca.notes[noteId];
+            if (note && inputNoteSet.hasNoteId(noteId)) {
                 resultNoteSet.add(note);
             }
         }
@@ -35,32 +41,37 @@ export default class OCRContentExpression extends Expression {
     }
 
     /**
-     * Check if a note (or its attachments) has OCR text matching all tokens.
+     * Find all noteIds that have OCR text matching all tokens, in a single query.
+     * Checks both the note's own blob and its attachment blobs.
      */
-    private noteMatchesOCR(noteId: string): boolean {
-        const note = becca.notes[noteId];
-        if (!note) return false;
+    private findNoteIdsWithMatchingOCR(): Set<string> {
+        if (this.tokens.length === 0) return new Set();
 
-        // Collect all textRepresentation values for this note
-        const texts: string[] = [];
+        // Build WHERE conditions: all tokens must appear in textRepresentation
+        const likeConditions = this.tokens.map(() => `b.textRepresentation LIKE ?`).join(' AND ');
+        const params = this.tokens.map(token => `%${token}%`);
 
-        const noteBlob = becca.getBlob({ blobId: note.blobId });
-        if (noteBlob?.textRepresentation) {
-            texts.push(noteBlob.textRepresentation.toLowerCase());
-        }
+        // Find notes whose own blob matches
+        const noteIds = sql.getColumn<string>(`
+            SELECT n.noteId
+            FROM notes n
+            JOIN blobs b ON n.blobId = b.blobId
+            WHERE b.textRepresentation IS NOT NULL
+              AND n.isDeleted = 0
+              AND ${likeConditions}
+        `, params);
 
-        for (const attachment of note.getAttachments()) {
-            const blob = becca.getBlob({ blobId: attachment.blobId });
-            if (blob?.textRepresentation) {
-                texts.push(blob.textRepresentation.toLowerCase());
-            }
-        }
+        // Find notes that own attachments whose blob matches
+        const attachmentOwnerIds = sql.getColumn<string>(`
+            SELECT a.ownerId
+            FROM attachments a
+            JOIN blobs b ON a.blobId = b.blobId
+            WHERE b.textRepresentation IS NOT NULL
+              AND a.isDeleted = 0
+              AND ${likeConditions}
+        `, params);
 
-        if (texts.length === 0) return false;
-
-        // All tokens must appear in at least one of the text representations
-        const combined = texts.join(" ");
-        return this.tokens.every(token => combined.includes(token.toLowerCase()));
+        return new Set([...noteIds, ...attachmentOwnerIds]);
     }
 
     toString(): string {