feat(ocr): not well integrate with sync

This commit is contained in:
Elian Doran
2026-04-02 11:43:19 +03:00
parent 650b700415
commit b4e5d9dbc2
5 changed files with 44 additions and 14 deletions

View File

@@ -10,7 +10,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
return "blobId";
}
static get hashedProperties() {
return ["blobId", "content", "textRepresentation"];
return ["blobId", "content"];
}
content!: string | Buffer;
@@ -41,6 +41,11 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
utcDateModified: this.utcDateModified
};
}
protected getPojoToSave() {
const { contentLength: _, ...pojo } = this.getPojo();
return pojo;
}
}
export default BBlob;

View File

@@ -1,5 +1,6 @@
export interface Blob {
blobId: string;
content: string | Buffer;
textRepresentation?: string | null;
utcDateModified: string;
}

View File

@@ -50,8 +50,8 @@ function processContent(content: Buffer | string | null, isProtected: boolean, i
}
}
function calculateContentHash({ blobId, content }: Blob) {
return hash(`${blobId}|${content.toString()}`);
function calculateContentHash({ blobId, content, textRepresentation }: Blob) {
return hash(`${blobId}|${content.toString()}|${textRepresentation ?? ""}`);
}
export default {

View File

@@ -146,7 +146,7 @@ function fillEntityChanges(entityName: string, entityPrimaryKey: string, conditi
};
if (entityName === "blobs") {
const blob = sql.getRow<Blob>("SELECT blobId, content, utcDateModified FROM blobs WHERE blobId = ?", [entityId]);
const blob = sql.getRow<Blob>("SELECT blobId, content, textRepresentation, utcDateModified FROM blobs WHERE blobId = ?", [entityId]);
ec.hash = blobService.calculateContentHash(blob);
ec.utcDateChanged = blob.utcDateModified;
ec.isSynced = true; // blobs are always synced

View File

@@ -2,6 +2,8 @@ import { getTesseractCode } from '@triliumnext/commons';
import Tesseract from 'tesseract.js';
import becca from '../../becca/becca.js';
import blobService from '../blob.js';
import entityChangesService from '../entity_changes.js';
import log from '../log.js';
import options from '../options.js';
import sql from '../sql.js';
@@ -277,17 +279,14 @@ class OCRService {
}
try {
// Store OCR text and timestamp in blobs table
sql.execute(`
UPDATE blobs SET
textRepresentation = ?,
textExtractionLastProcessed = ?
WHERE blobId = ?
`, [
ocrResult.text,
new Date().toISOString(),
blobId
]);
`, [ocrResult.text, new Date().toISOString(), blobId]);
this.putBlobEntityChange(blobId);
log.info(`Stored OCR result for blob ${blobId}`);
} catch (error) {
@@ -363,10 +362,12 @@ class OCRService {
deleteOCRResult(blobId: string): void {
try {
sql.execute(`
UPDATE blobs SET textRepresentation = NULL
UPDATE blobs SET textRepresentation = NULL, textExtractionLastProcessed = NULL
WHERE blobId = ?
`, [blobId]);
this.putBlobEntityChange(blobId);
log.info(`Deleted OCR result for blob ${blobId}`);
} catch (error) {
log.error(`Failed to delete OCR result for blob ${blobId}: ${error}`);
@@ -558,6 +559,29 @@ class OCRService {
/**
* Get processor for a given MIME type
*/
/**
* Notifies the sync system that a blob has changed, without modifying the blob's identity.
*/
private putBlobEntityChange(blobId: string): void {
const blob = becca.getBlob({ blobId });
if (!blob || !blob.blobId) return;
const hash = blobService.calculateContentHash({
blobId: blob.blobId,
content: blob.content,
textRepresentation: blob.textRepresentation,
utcDateModified: blob.utcDateModified!
});
entityChangesService.putEntityChange({
entityName: "blobs",
entityId: blobId,
hash,
isErased: false,
utcDateChanged: blob.utcDateModified,
isSynced: true
});
}
private getProcessorForMimeType(mimeType: string): FileProcessor | null {
for (const processor of this.processors.values()) {
if (processor.canProcess(mimeType)) {
@@ -641,12 +665,12 @@ class OCRService {
try {
sql.execute(`
UPDATE blobs SET
textRepresentation = NULL,
textExtractionLastProcessed = NULL
UPDATE blobs SET textRepresentation = NULL, textExtractionLastProcessed = NULL
WHERE blobId = ?
`, [blobId]);
this.putBlobEntityChange(blobId);
log.info(`Invalidated OCR result for blob ${blobId}`);
} catch (error) {
log.error(`Failed to invalidate OCR result for blob ${blobId}: ${error}`);