feat(ocr): automatic processing of attachments

This commit is contained in:
Elian Doran
2026-04-02 20:00:55 +03:00
parent f163cacddc
commit 2d852c38ec
2 changed files with 37 additions and 21 deletions

View File

@@ -144,23 +144,31 @@ eventService.subscribe(eventService.ENTITY_CREATED, ({ entityName, entity }) =>
// Note: OCR processing for images is now handled in image.ts during image processing
// OCR processing for files remains here since they don't go through image processing
if (entity.type === 'file' && optionService.getOptionBool("ocrAutoProcessImages")) {
// Check if the file MIME type is supported by any OCR processor
const supportedMimeTypes = ocrService.getAllSupportedMimeTypes();
if (entity.mime && supportedMimeTypes.includes(entity.mime)) {
// Process OCR asynchronously to avoid blocking note creation
ocrService.processNoteOCR(entity.noteId).then(result => {
if (result) {
log.info(`Automatically processed OCR for file note ${entity.noteId} with MIME type ${entity.mime}`);
}
}).catch(error => {
log.error(`Failed to automatically process OCR for file note ${entity.noteId}: ${error}`);
});
}
autoProcessOCR(entity.mime, () => ocrService.processNoteOCR(entity.noteId), `file note ${entity.noteId}`);
}
} else if (entityName === "attachments") {
// Image attachments are handled in image.ts after async image processing sets the real MIME type.
// Only handle non-image (file) attachments here.
if (entity.role === "file" && optionService.getOptionBool("ocrAutoProcessImages")) {
autoProcessOCR(entity.mime, () => ocrService.processAttachmentOCR(entity.attachmentId), `attachment ${entity.attachmentId}`);
}
}
});
function autoProcessOCR(mime: string, process: () => Promise<unknown>, entityDescription: string) {
const supportedMimeTypes = ocrService.getAllSupportedMimeTypes();
if (mime && supportedMimeTypes.includes(mime)) {
process().then(result => {
if (result) {
log.info(`Automatically processed OCR for ${entityDescription} with MIME type ${mime}`);
}
}).catch(error => {
log.error(`Failed to automatically process OCR for ${entityDescription}: ${error}`);
});
}
}
eventService.subscribe(eventService.CHILD_NOTE_CREATED, ({ parentNote, childNote }) => {
runAttachedRelations(parentNote, "runOnChildNoteCreation", childNote);
});

View File

@@ -13,7 +13,12 @@ import optionService from "./options.js";
import protectedSessionService from "./protected_session.js";
import sql from "./sql.js";
async function processImage(uploadBuffer: Buffer, originalName: string, shrinkImageSwitch: boolean, noteId?: string) {
interface ProcessImageOCRTarget {
noteId?: string;
attachmentId?: string;
}
async function processImage(uploadBuffer: Buffer, originalName: string, shrinkImageSwitch: boolean, ocrTarget?: ProcessImageOCRTarget) {
const compressImages = optionService.getOptionBool("compressImages");
const origImageFormat = await getImageType(uploadBuffer);
@@ -25,17 +30,20 @@ async function processImage(uploadBuffer: Buffer, originalName: string, shrinkIm
}
// Schedule OCR processing in the background for best quality
if (noteId && optionService.getOptionBool("ocrAutoProcessImages") && origImageFormat) {
if (ocrTarget && optionService.getOptionBool("ocrAutoProcessImages") && origImageFormat) {
const imageMime = getImageMimeFromExtension(origImageFormat.ext);
const supportedMimeTypes = ocrService.getAllSupportedMimeTypes();
if (supportedMimeTypes.includes(imageMime)) {
// Process OCR asynchronously without blocking image creation
setImmediate(async () => {
try {
await ocrService.processNoteOCR(noteId);
if (ocrTarget.noteId) {
await ocrService.processNoteOCR(ocrTarget.noteId);
} else if (ocrTarget.attachmentId) {
await ocrService.processAttachmentOCR(ocrTarget.attachmentId);
}
} catch (error) {
log.error(`Failed to process OCR for image ${noteId}: ${error}`);
log.error(`Failed to process OCR for image ${ocrTarget.noteId || ocrTarget.attachmentId}: ${error}`);
}
});
}
@@ -88,7 +96,7 @@ function updateImage(noteId: string, uploadBuffer: Buffer, originalName: string)
note.setLabel("originalFileName", originalName);
// resizing images asynchronously since JIMP does not support sync operation
processImage(uploadBuffer, originalName, true, noteId).then(({ buffer, imageFormat }) => {
processImage(uploadBuffer, originalName, true, { noteId }).then(({ buffer, imageFormat }) => {
sql.transactional(() => {
note.mime = getImageMimeFromExtension(imageFormat.ext);
note.save();
@@ -124,7 +132,7 @@ function saveImage(parentNoteId: string, uploadBuffer: Buffer, originalName: str
note.addLabel("originalFileName", originalName);
// resizing images asynchronously since JIMP does not support sync operation
processImage(uploadBuffer, originalName, shrinkImageSwitch, note.noteId).then(({ buffer, imageFormat }) => {
processImage(uploadBuffer, originalName, shrinkImageSwitch, { noteId: note.noteId }).then(({ buffer, imageFormat }) => {
sql.transactional(() => {
note.mime = getImageMimeFromExtension(imageFormat.ext);
@@ -175,7 +183,7 @@ function saveImageToAttachment(noteId: string, uploadBuffer: Buffer, originalNam
}, 5000);
// resizing images asynchronously since JIMP does not support sync operation
processImage(uploadBuffer, originalName, !!shrinkImageSwitch, attachment.attachmentId).then(({ buffer, imageFormat }) => {
processImage(uploadBuffer, originalName, !!shrinkImageSwitch, { attachmentId: attachment.attachmentId }).then(({ buffer, imageFormat }) => {
sql.transactional(() => {
// re-read, might be changed in the meantime
if (!attachment.attachmentId) {