diff --git a/apps/server/src/becca/entities/bblob.ts b/apps/server/src/becca/entities/bblob.ts index c9365b9167..839eca153a 100644 --- a/apps/server/src/becca/entities/bblob.ts +++ b/apps/server/src/becca/entities/bblob.ts @@ -10,7 +10,7 @@ class BBlob extends AbstractBeccaEntity { return "blobId"; } static get hashedProperties() { - return ["blobId", "content", "textRepresentation"]; + return ["blobId", "content"]; } content!: string | Buffer; @@ -41,6 +41,11 @@ class BBlob extends AbstractBeccaEntity { utcDateModified: this.utcDateModified }; } + + protected getPojoToSave() { + const { contentLength: _, ...pojo } = this.getPojo(); + return pojo; + } } export default BBlob; diff --git a/apps/server/src/services/blob-interface.ts b/apps/server/src/services/blob-interface.ts index a0e6052785..154a3048e4 100644 --- a/apps/server/src/services/blob-interface.ts +++ b/apps/server/src/services/blob-interface.ts @@ -1,5 +1,6 @@ export interface Blob { blobId: string; content: string | Buffer; + textRepresentation?: string | null; utcDateModified: string; } diff --git a/apps/server/src/services/blob.ts b/apps/server/src/services/blob.ts index c4684c2ae5..fa8bbd2b61 100644 --- a/apps/server/src/services/blob.ts +++ b/apps/server/src/services/blob.ts @@ -50,8 +50,8 @@ function processContent(content: Buffer | string | null, isProtected: boolean, i } } -function calculateContentHash({ blobId, content }: Blob) { - return hash(`${blobId}|${content.toString()}`); +function calculateContentHash({ blobId, content, textRepresentation }: Blob) { + return hash(`${blobId}|${content.toString()}|${textRepresentation ?? ""}`); } export default { diff --git a/apps/server/src/services/entity_changes.ts b/apps/server/src/services/entity_changes.ts index c0a97c7d6b..cba9584d2c 100644 --- a/apps/server/src/services/entity_changes.ts +++ b/apps/server/src/services/entity_changes.ts @@ -146,7 +146,7 @@ function fillEntityChanges(entityName: string, entityPrimaryKey: string, conditi }; if (entityName === "blobs") { - const blob = sql.getRow("SELECT blobId, content, utcDateModified FROM blobs WHERE blobId = ?", [entityId]); + const blob = sql.getRow("SELECT blobId, content, textRepresentation, utcDateModified FROM blobs WHERE blobId = ?", [entityId]); ec.hash = blobService.calculateContentHash(blob); ec.utcDateChanged = blob.utcDateModified; ec.isSynced = true; // blobs are always synced diff --git a/apps/server/src/services/ocr/ocr_service.ts b/apps/server/src/services/ocr/ocr_service.ts index 842ee00196..3e39ed6e9c 100644 --- a/apps/server/src/services/ocr/ocr_service.ts +++ b/apps/server/src/services/ocr/ocr_service.ts @@ -2,6 +2,8 @@ import { getTesseractCode } from '@triliumnext/commons'; import Tesseract from 'tesseract.js'; import becca from '../../becca/becca.js'; +import blobService from '../blob.js'; +import entityChangesService from '../entity_changes.js'; import log from '../log.js'; import options from '../options.js'; import sql from '../sql.js'; @@ -277,17 +279,14 @@ class OCRService { } try { - // Store OCR text and timestamp in blobs table sql.execute(` UPDATE blobs SET textRepresentation = ?, textExtractionLastProcessed = ? WHERE blobId = ? - `, [ - ocrResult.text, - new Date().toISOString(), - blobId - ]); + `, [ocrResult.text, new Date().toISOString(), blobId]); + + this.putBlobEntityChange(blobId); log.info(`Stored OCR result for blob ${blobId}`); } catch (error) { @@ -363,10 +362,12 @@ class OCRService { deleteOCRResult(blobId: string): void { try { sql.execute(` - UPDATE blobs SET textRepresentation = NULL + UPDATE blobs SET textRepresentation = NULL, textExtractionLastProcessed = NULL WHERE blobId = ? `, [blobId]); + this.putBlobEntityChange(blobId); + log.info(`Deleted OCR result for blob ${blobId}`); } catch (error) { log.error(`Failed to delete OCR result for blob ${blobId}: ${error}`); @@ -558,6 +559,29 @@ class OCRService { /** * Get processor for a given MIME type */ + /** + * Notifies the sync system that a blob has changed, without modifying the blob's identity. + */ + private putBlobEntityChange(blobId: string): void { + const blob = becca.getBlob({ blobId }); + if (!blob || !blob.blobId) return; + + const hash = blobService.calculateContentHash({ + blobId: blob.blobId, + content: blob.content, + textRepresentation: blob.textRepresentation, + utcDateModified: blob.utcDateModified! + }); + entityChangesService.putEntityChange({ + entityName: "blobs", + entityId: blobId, + hash, + isErased: false, + utcDateChanged: blob.utcDateModified, + isSynced: true + }); + } + private getProcessorForMimeType(mimeType: string): FileProcessor | null { for (const processor of this.processors.values()) { if (processor.canProcess(mimeType)) { @@ -641,12 +665,12 @@ class OCRService { try { sql.execute(` - UPDATE blobs SET - textRepresentation = NULL, - textExtractionLastProcessed = NULL + UPDATE blobs SET textRepresentation = NULL, textExtractionLastProcessed = NULL WHERE blobId = ? `, [blobId]); + this.putBlobEntityChange(blobId); + log.info(`Invalidated OCR result for blob ${blobId}`); } catch (error) { log.error(`Failed to invalidate OCR result for blob ${blobId}: ${error}`);