diff --git a/apps/client/src/services/content_renderer.ts b/apps/client/src/services/content_renderer.ts index 95f186c602..f554eb55ac 100644 --- a/apps/client/src/services/content_renderer.ts +++ b/apps/client/src/services/content_renderer.ts @@ -32,7 +32,7 @@ export interface RenderOptions { includeArchivedNotes?: boolean; /** Set of note IDs that have already been seen during rendering to prevent infinite recursion. */ seenNoteIds?: Set; - showOcrText?: boolean; + showTextRepresentation?: boolean; } const CODE_MIME_TYPES = new Set(["application/json"]); @@ -181,7 +181,7 @@ async function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery imageContextMenuService.setupContextMenu($img); // Add OCR text display for image notes - if (entity instanceof FNote && options.showOcrText) { + if (entity instanceof FNote && options.showTextRepresentation) { await addOCRTextIfAvailable(entity, $renderedContent); } } @@ -252,7 +252,7 @@ async function renderFile(entity: FNote | FAttachment, type: string, $renderedCo } // Add OCR text display for file notes - if (entity instanceof FNote && options.showOcrText) { + if (entity instanceof FNote && options.showTextRepresentation) { await addOCRTextIfAvailable(entity, $content); } diff --git a/apps/server/src/assets/db/schema.sql b/apps/server/src/assets/db/schema.sql index a985e8a837..e513dd123f 100644 --- a/apps/server/src/assets/db/schema.sql +++ b/apps/server/src/assets/db/schema.sql @@ -107,7 +107,7 @@ CREATE TABLE IF NOT EXISTS "recent_notes" CREATE TABLE IF NOT EXISTS "blobs" ( `blobId` TEXT NOT NULL, `content` TEXT NULL DEFAULT NULL, - `ocr_text` TEXT DEFAULT NULL, + `textRepresentation` TEXT DEFAULT NULL, `ocr_last_processed` TEXT DEFAULT NULL, `dateModified` TEXT NOT NULL, `utcDateModified` TEXT NOT NULL, diff --git a/apps/server/src/becca/entities/bblob.ts b/apps/server/src/becca/entities/bblob.ts index a4dbd712f7..c9365b9167 100644 --- a/apps/server/src/becca/entities/bblob.ts +++ b/apps/server/src/becca/entities/bblob.ts @@ -10,12 +10,12 @@ class BBlob extends AbstractBeccaEntity { return "blobId"; } static get hashedProperties() { - return ["blobId", "content", "ocr_text"]; + return ["blobId", "content", "textRepresentation"]; } content!: string | Buffer; contentLength!: number; - ocr_text?: string | null; + textRepresentation?: string | null; constructor(row: BlobRow) { super(); @@ -26,7 +26,7 @@ class BBlob extends AbstractBeccaEntity { this.blobId = row.blobId; this.content = row.content; this.contentLength = row.contentLength; - this.ocr_text = row.ocr_text; + this.textRepresentation = row.textRepresentation; this.dateModified = row.dateModified; this.utcDateModified = row.utcDateModified; } @@ -36,7 +36,7 @@ class BBlob extends AbstractBeccaEntity { blobId: this.blobId, content: this.content || null, contentLength: this.contentLength, - ocr_text: this.ocr_text || null, + textRepresentation: this.textRepresentation || null, dateModified: this.dateModified, utcDateModified: this.utcDateModified }; diff --git a/apps/server/src/migrations/migrations.ts b/apps/server/src/migrations/migrations.ts index 7eff7d3ec1..70f99d584c 100644 --- a/apps/server/src/migrations/migrations.ts +++ b/apps/server/src/migrations/migrations.ts @@ -6,19 +6,19 @@ // Migrations should be kept in descending order, so the latest migration is first. const MIGRATIONS: (SqlMigration | JsMigration)[] = [ - // Add OCR text column and last processed timestamp to blobs table + // Add text representation column and last processed timestamp to blobs table { version: 236, sql: /*sql*/`\ - -- Add OCR text column to blobs table - ALTER TABLE blobs ADD COLUMN ocr_text TEXT DEFAULT NULL; + -- Add text representation column to blobs table + ALTER TABLE blobs ADD COLUMN textRepresentation TEXT DEFAULT NULL; -- Add OCR last processed timestamp to blobs table ALTER TABLE blobs ADD COLUMN ocr_last_processed TEXT DEFAULT NULL; - -- Create index for OCR text searches - CREATE INDEX IF NOT EXISTS idx_blobs_ocr_text - ON blobs (ocr_text); + -- Create index for text representation searches + CREATE INDEX IF NOT EXISTS idx_blobs_textRepresentation + ON blobs (textRepresentation); -- Create index for OCR last processed timestamp CREATE INDEX IF NOT EXISTS idx_blobs_ocr_last_processed diff --git a/apps/server/src/routes/api/ocr.ts b/apps/server/src/routes/api/ocr.ts index a44da203e6..6a32be8615 100644 --- a/apps/server/src/routes/api/ocr.ts +++ b/apps/server/src/routes/api/ocr.ts @@ -569,16 +569,16 @@ async function getNoteOCRText(req: Request, res: Response) { if (note.blobId) { const result = sql.getRow<{ - ocr_text: string | null; + textRepresentation: string | null; ocr_last_processed: string | null; }>(` - SELECT ocr_text, ocr_last_processed + SELECT textRepresentation, ocr_last_processed FROM blobs WHERE blobId = ? `, [note.blobId]); if (result) { - ocrText = result.ocr_text; + ocrText = result.textRepresentation; extractedAt = result.ocr_last_processed; } } diff --git a/apps/server/src/services/ocr/ocr_service.spec.ts b/apps/server/src/services/ocr/ocr_service.spec.ts index 6313ce99f3..a53a44e178 100644 --- a/apps/server/src/services/ocr/ocr_service.spec.ts +++ b/apps/server/src/services/ocr/ocr_service.spec.ts @@ -251,7 +251,7 @@ describe('OCRService', () => { await ocrService.storeOCRResult('blob123', ocrResult); expect(mockSql.execute).toHaveBeenCalledWith( - expect.stringContaining('UPDATE blobs SET ocr_text = ?'), + expect.stringContaining('UPDATE blobs SET textRepresentation = ?'), ['Sample text', 'blob123'] ); }); @@ -331,7 +331,7 @@ describe('OCRService', () => { it('should return existing OCR result if forceReprocess is false', async () => { const existingResult = { - ocr_text: 'Existing text' + textRepresentation: 'Existing text' }; mockSql.getRow.mockReturnValue(existingResult); @@ -348,7 +348,7 @@ describe('OCRService', () => { it('should reprocess if forceReprocess is true', async () => { const existingResult = { - ocr_text: 'Existing text' + textRepresentation: 'Existing text' }; mockSql.getRow.mockResolvedValue(existingResult); @@ -445,7 +445,7 @@ describe('OCRService', () => { const mockResults = [ { blobId: 'blob1', - ocr_text: 'Sample search text' + textRepresentation: 'Sample search text' } ]; mockSql.getRows.mockReturnValue(mockResults); @@ -457,7 +457,7 @@ describe('OCRService', () => { text: 'Sample search text' }]); expect(mockSql.getRows).toHaveBeenCalledWith( - expect.stringContaining('WHERE ocr_text LIKE ?'), + expect.stringContaining('WHERE textRepresentation LIKE ?'), ['%search%'] ); }); @@ -851,7 +851,7 @@ describe('OCRService', () => { ocrService.deleteOCRResult('blob123'); expect(mockSql.execute).toHaveBeenCalledWith( - expect.stringContaining('UPDATE blobs SET ocr_text = NULL'), + expect.stringContaining('UPDATE blobs SET textRepresentation = NULL'), ['blob123'] ); expect(mockLog.info).toHaveBeenCalledWith('Deleted OCR result for blob blob123'); diff --git a/apps/server/src/services/ocr/ocr_service.ts b/apps/server/src/services/ocr/ocr_service.ts index 1e47b7e650..e1f0c96e06 100644 --- a/apps/server/src/services/ocr/ocr_service.ts +++ b/apps/server/src/services/ocr/ocr_service.ts @@ -26,7 +26,7 @@ export interface OCRProcessingOptions { interface OCRBlobRow { blobId: string; - ocr_text: string; + textRepresentation: string; ocr_last_processed?: string; } @@ -235,7 +235,7 @@ class OCRService { // Store OCR text and timestamp in blobs table sql.execute(` UPDATE blobs SET - ocr_text = ?, + textRepresentation = ?, ocr_last_processed = ? WHERE blobId = ? `, [ @@ -261,14 +261,14 @@ class OCRService { try { const row = sql.getRow<{ - ocr_text: string | null; + textRepresentation: string | null; }>(` - SELECT ocr_text + SELECT textRepresentation FROM blobs WHERE blobId = ? `, [blobId]); - if (!row || !row.ocr_text) { + if (!row || !row.textRepresentation) { return null; } @@ -276,7 +276,7 @@ class OCRService { // Note: we lose confidence, language, and extractedAt metadata // but gain simplicity by storing directly in blob return { - text: row.ocr_text, + text: row.textRepresentation, confidence: 0.95, // Default high confidence for existing OCR extractedAt: new Date().toISOString(), language: 'eng' @@ -293,10 +293,10 @@ class OCRService { searchOCRResults(searchText: string): Array<{ blobId: string; text: string }> { try { const query = ` - SELECT blobId, ocr_text + SELECT blobId, textRepresentation FROM blobs - WHERE ocr_text LIKE ? - AND ocr_text IS NOT NULL + WHERE textRepresentation LIKE ? + AND textRepresentation IS NOT NULL `; const params = [`%${searchText}%`]; @@ -304,7 +304,7 @@ class OCRService { return rows.map(row => ({ blobId: row.blobId, - text: row.ocr_text + text: row.textRepresentation })); } catch (error) { log.error(`Failed to search OCR results: ${error}`); @@ -318,7 +318,7 @@ class OCRService { deleteOCRResult(blobId: string): void { try { sql.execute(` - UPDATE blobs SET ocr_text = NULL + UPDATE blobs SET textRepresentation = NULL WHERE blobId = ? `, [blobId]); @@ -346,7 +346,7 @@ class OCRService { }>(` SELECT COUNT(*) as total_processed FROM blobs - WHERE ocr_text IS NOT NULL AND ocr_text != '' + WHERE textRepresentation IS NOT NULL AND textRepresentation != '' `); // Count image notes with OCR @@ -358,7 +358,7 @@ class OCRService { JOIN blobs b ON n.blobId = b.blobId WHERE n.type = 'image' AND n.isDeleted = 0 - AND b.ocr_text IS NOT NULL AND b.ocr_text != '' + AND b.textRepresentation IS NOT NULL AND b.textRepresentation != '' `); // Count image attachments with OCR @@ -370,7 +370,7 @@ class OCRService { JOIN blobs b ON a.blobId = b.blobId WHERE a.role = 'image' AND a.isDeleted = 0 - AND b.ocr_text IS NOT NULL AND b.ocr_text != '' + AND b.textRepresentation IS NOT NULL AND b.textRepresentation != '' `); return { @@ -591,7 +591,7 @@ class OCRService { } /** - * Invalidate OCR results for a blob (clear ocr_text and ocr_last_processed) + * Invalidate OCR results for a blob (clear textRepresentation and ocr_last_processed) */ invalidateOCRResult(blobId: string): void { if (!blobId) { @@ -601,7 +601,7 @@ class OCRService { try { sql.execute(` UPDATE blobs SET - ocr_text = NULL, + textRepresentation = NULL, ocr_last_processed = NULL WHERE blobId = ? `, [blobId]); diff --git a/apps/server/src/services/search/expressions/ocr_content.ts b/apps/server/src/services/search/expressions/ocr_content.ts index 8da5e589e5..9401ada0b5 100644 --- a/apps/server/src/services/search/expressions/ocr_content.ts +++ b/apps/server/src/services/search/expressions/ocr_content.ts @@ -72,23 +72,23 @@ export default class OCRContentExpression extends Expression { private searchOCRContent(searchText: string): Array<{ blobId: string; - ocr_text: string; + textRepresentation: string; }> { try { // Search in blobs table for OCR text const query = ` - SELECT blobId, ocr_text + SELECT blobId, textRepresentation FROM blobs - WHERE ocr_text LIKE ? - AND ocr_text IS NOT NULL - AND ocr_text != '' + WHERE textRepresentation LIKE ? + AND textRepresentation IS NOT NULL + AND textRepresentation != '' LIMIT 50 `; const params = [`%${searchText}%`]; return sql.getRows<{ blobId: string; - ocr_text: string; + textRepresentation: string; }>(query, params); } catch (error) { console.error('Error searching OCR content:', error); diff --git a/apps/server/src/services/search/search_result.ts b/apps/server/src/services/search/search_result.ts index fd25b74ece..18ee711879 100644 --- a/apps/server/src/services/search/search_result.ts +++ b/apps/server/src/services/search/search_result.ts @@ -140,10 +140,10 @@ class SearchResult { // Search for OCR results for this note and its attachments const ocrResults = sql.getRows(` - SELECT b.ocr_text + SELECT b.textRepresentation FROM blobs b - WHERE b.ocr_text IS NOT NULL - AND b.ocr_text != '' + WHERE b.textRepresentation IS NOT NULL + AND b.textRepresentation != '' AND ( b.blobId = (SELECT blobId FROM notes WHERE noteId = ? AND isDeleted = 0) OR b.blobId IN ( @@ -152,9 +152,9 @@ class SearchResult { ) `, [this.noteId, this.noteId]); - for (const ocrResult of ocrResults as Array<{ocr_text: string}>) { + for (const ocrResult of ocrResults as Array<{textRepresentation: string}>) { // Add score for OCR text matches - this.addScoreForStrings(tokens, ocrResult.ocr_text, factor); + this.addScoreForStrings(tokens, ocrResult.textRepresentation, factor); } } catch (error) { // Silently fail if OCR service is not available diff --git a/packages/commons/src/lib/rows.ts b/packages/commons/src/lib/rows.ts index dc23f0d695..200b567023 100644 --- a/packages/commons/src/lib/rows.ts +++ b/packages/commons/src/lib/rows.ts @@ -72,7 +72,7 @@ export interface BlobRow { blobId: string; content: string | Buffer; contentLength: number; - ocr_text?: string | null; + textRepresentation?: string | null; dateModified: string; utcDateModified: string; }