mirror of
https://github.com/zadam/trilium.git
synced 2026-07-04 12:57:31 +02:00
feat(ocr): not well integrate with sync
This commit is contained in:
@@ -10,7 +10,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
|
||||
return "blobId";
|
||||
}
|
||||
static get hashedProperties() {
|
||||
return ["blobId", "content", "textRepresentation"];
|
||||
return ["blobId", "content"];
|
||||
}
|
||||
|
||||
content!: string | Buffer;
|
||||
@@ -41,6 +41,11 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
|
||||
utcDateModified: this.utcDateModified
|
||||
};
|
||||
}
|
||||
|
||||
protected getPojoToSave() {
|
||||
const { contentLength: _, ...pojo } = this.getPojo();
|
||||
return pojo;
|
||||
}
|
||||
}
|
||||
|
||||
export default BBlob;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
export interface Blob {
|
||||
blobId: string;
|
||||
content: string | Buffer;
|
||||
textRepresentation?: string | null;
|
||||
utcDateModified: string;
|
||||
}
|
||||
|
||||
@@ -50,8 +50,8 @@ function processContent(content: Buffer | string | null, isProtected: boolean, i
|
||||
}
|
||||
}
|
||||
|
||||
function calculateContentHash({ blobId, content }: Blob) {
|
||||
return hash(`${blobId}|${content.toString()}`);
|
||||
function calculateContentHash({ blobId, content, textRepresentation }: Blob) {
|
||||
return hash(`${blobId}|${content.toString()}|${textRepresentation ?? ""}`);
|
||||
}
|
||||
|
||||
export default {
|
||||
|
||||
@@ -146,7 +146,7 @@ function fillEntityChanges(entityName: string, entityPrimaryKey: string, conditi
|
||||
};
|
||||
|
||||
if (entityName === "blobs") {
|
||||
const blob = sql.getRow<Blob>("SELECT blobId, content, utcDateModified FROM blobs WHERE blobId = ?", [entityId]);
|
||||
const blob = sql.getRow<Blob>("SELECT blobId, content, textRepresentation, utcDateModified FROM blobs WHERE blobId = ?", [entityId]);
|
||||
ec.hash = blobService.calculateContentHash(blob);
|
||||
ec.utcDateChanged = blob.utcDateModified;
|
||||
ec.isSynced = true; // blobs are always synced
|
||||
|
||||
@@ -2,6 +2,8 @@ import { getTesseractCode } from '@triliumnext/commons';
|
||||
import Tesseract from 'tesseract.js';
|
||||
|
||||
import becca from '../../becca/becca.js';
|
||||
import blobService from '../blob.js';
|
||||
import entityChangesService from '../entity_changes.js';
|
||||
import log from '../log.js';
|
||||
import options from '../options.js';
|
||||
import sql from '../sql.js';
|
||||
@@ -277,17 +279,14 @@ class OCRService {
|
||||
}
|
||||
|
||||
try {
|
||||
// Store OCR text and timestamp in blobs table
|
||||
sql.execute(`
|
||||
UPDATE blobs SET
|
||||
textRepresentation = ?,
|
||||
textExtractionLastProcessed = ?
|
||||
WHERE blobId = ?
|
||||
`, [
|
||||
ocrResult.text,
|
||||
new Date().toISOString(),
|
||||
blobId
|
||||
]);
|
||||
`, [ocrResult.text, new Date().toISOString(), blobId]);
|
||||
|
||||
this.putBlobEntityChange(blobId);
|
||||
|
||||
log.info(`Stored OCR result for blob ${blobId}`);
|
||||
} catch (error) {
|
||||
@@ -363,10 +362,12 @@ class OCRService {
|
||||
deleteOCRResult(blobId: string): void {
|
||||
try {
|
||||
sql.execute(`
|
||||
UPDATE blobs SET textRepresentation = NULL
|
||||
UPDATE blobs SET textRepresentation = NULL, textExtractionLastProcessed = NULL
|
||||
WHERE blobId = ?
|
||||
`, [blobId]);
|
||||
|
||||
this.putBlobEntityChange(blobId);
|
||||
|
||||
log.info(`Deleted OCR result for blob ${blobId}`);
|
||||
} catch (error) {
|
||||
log.error(`Failed to delete OCR result for blob ${blobId}: ${error}`);
|
||||
@@ -558,6 +559,29 @@ class OCRService {
|
||||
/**
|
||||
* Get processor for a given MIME type
|
||||
*/
|
||||
/**
|
||||
* Notifies the sync system that a blob has changed, without modifying the blob's identity.
|
||||
*/
|
||||
private putBlobEntityChange(blobId: string): void {
|
||||
const blob = becca.getBlob({ blobId });
|
||||
if (!blob || !blob.blobId) return;
|
||||
|
||||
const hash = blobService.calculateContentHash({
|
||||
blobId: blob.blobId,
|
||||
content: blob.content,
|
||||
textRepresentation: blob.textRepresentation,
|
||||
utcDateModified: blob.utcDateModified!
|
||||
});
|
||||
entityChangesService.putEntityChange({
|
||||
entityName: "blobs",
|
||||
entityId: blobId,
|
||||
hash,
|
||||
isErased: false,
|
||||
utcDateChanged: blob.utcDateModified,
|
||||
isSynced: true
|
||||
});
|
||||
}
|
||||
|
||||
private getProcessorForMimeType(mimeType: string): FileProcessor | null {
|
||||
for (const processor of this.processors.values()) {
|
||||
if (processor.canProcess(mimeType)) {
|
||||
@@ -641,12 +665,12 @@ class OCRService {
|
||||
|
||||
try {
|
||||
sql.execute(`
|
||||
UPDATE blobs SET
|
||||
textRepresentation = NULL,
|
||||
textExtractionLastProcessed = NULL
|
||||
UPDATE blobs SET textRepresentation = NULL, textExtractionLastProcessed = NULL
|
||||
WHERE blobId = ?
|
||||
`, [blobId]);
|
||||
|
||||
this.putBlobEntityChange(blobId);
|
||||
|
||||
log.info(`Invalidated OCR result for blob ${blobId}`);
|
||||
} catch (error) {
|
||||
log.error(`Failed to invalidate OCR result for blob ${blobId}: ${error}`);
|
||||
|
||||
Reference in New Issue
Block a user