diff --git a/apps/server/src/services/ocr/ocr_service.ts b/apps/server/src/services/ocr/ocr_service.ts index f82ec686b0..4baec68b3c 100644 --- a/apps/server/src/services/ocr/ocr_service.ts +++ b/apps/server/src/services/ocr/ocr_service.ts @@ -334,13 +334,6 @@ class OCRService { } } - /** - * Process OCR for all files that don't have OCR results yet or need reprocessing - */ - async processAllImages(): Promise { - return this.processAllBlobsNeedingOCR(); - } - /** * Get OCR statistics */ @@ -428,25 +421,23 @@ class OCRService { } try { - // Count total blobs needing OCR processing const blobsNeedingOCR = this.getBlobsNeedingOCR(); - const totalCount = blobsNeedingOCR.length; - if (totalCount === 0) { + if (blobsNeedingOCR.length === 0) { return { success: false, message: 'No images found that need OCR processing' }; } - // Initialize batch processing state this.batchProcessingState = { inProgress: true, - total: totalCount, + total: blobsNeedingOCR.length, processed: 0, startTime: new Date() }; // Start processing in background - this.processBatchInBackground(blobsNeedingOCR).catch(error => { + this.processBlobs(blobsNeedingOCR).catch(error => { log.error(`Batch processing failed: ${error instanceof Error ? error.message : String(error)}`); + }).finally(() => { this.batchProcessingState.inProgress = false; }); @@ -468,43 +459,6 @@ class OCRService { return result; } - /** - * Process batch OCR in background with progress tracking - */ - private async processBatchInBackground(blobsToProcess: Array<{ blobId: string; mimeType: string; entityType: 'note' | 'attachment'; entityId: string }>): Promise { - try { - log.info('Starting batch OCR processing...'); - - for (const blobInfo of blobsToProcess) { - if (!this.batchProcessingState.inProgress) { - break; // Stop if processing was cancelled - } - - try { - if (blobInfo.entityType === 'note') { - await this.processNoteOCR(blobInfo.entityId); - } else { - await this.processAttachmentOCR(blobInfo.entityId); - } - this.batchProcessingState.processed++; - // Add small delay to prevent overwhelming the system - await new Promise(resolve => setTimeout(resolve, 500)); - } catch (error) { - log.error(`Failed to process OCR for ${blobInfo.entityType} ${blobInfo.entityId}: ${error}`); - this.batchProcessingState.processed++; // Count as processed even if failed - } - } - - // Mark as completed - this.batchProcessingState.inProgress = false; - log.info(`Batch OCR processing completed. Processed ${this.batchProcessingState.processed} files.`); - } catch (error) { - log.error(`Batch OCR processing failed: ${error}`); - this.batchProcessingState.inProgress = false; - throw error; - } - } - /** * Cancel batch processing */ @@ -515,6 +469,43 @@ class OCRService { } } + /** + * Process a list of blobs sequentially, updating batch progress. + */ + private async processBlobs(blobs: Array<{ entityType: 'note' | 'attachment'; entityId: string }>): Promise { + log.info(`Starting batch OCR processing of ${blobs.length} items...`); + + for (const blob of blobs) { + if (!this.batchProcessingState.inProgress) { + break; + } + + try { + await this.processOcrEntity(blob); + } catch (error) { + log.error(`Failed to process OCR for ${blob.entityType} ${blob.entityId}: ${error}`); + } + + this.batchProcessingState.processed++; + + // Small delay to prevent overwhelming the system + await new Promise(resolve => setTimeout(resolve, 500)); + } + + log.info(`Batch OCR processing completed. Processed ${this.batchProcessingState.processed} files.`); + } + + /** + * Process OCR for a single entity (note or attachment) by type. + */ + private async processOcrEntity(entity: { entityType: 'note' | 'attachment'; entityId: string }): Promise { + if (entity.entityType === 'note') { + await this.processNoteOCR(entity.entityId); + } else { + await this.processAttachmentOCR(entity.entityId); + } + } + /** * Get processor for a given MIME type */ @@ -616,41 +607,6 @@ class OCRService { } } - /** - * Process OCR for all blobs that need it (auto-processing) - */ - async processAllBlobsNeedingOCR(): Promise { - if (!options.getOptionBool('ocrAutoProcessImages')) { - log.info('OCR auto-processing is disabled, skipping'); - return; - } - - const blobsNeedingOCR = this.getBlobsNeedingOCR(); - if (blobsNeedingOCR.length === 0) { - log.info('No blobs need OCR processing'); - return; - } - - log.info(`Auto-processing OCR for ${blobsNeedingOCR.length} blobs...`); - - for (const blobInfo of blobsNeedingOCR) { - try { - if (blobInfo.entityType === 'note') { - await this.processNoteOCR(blobInfo.entityId); - } else { - await this.processAttachmentOCR(blobInfo.entityId); - } - - // Add small delay to prevent overwhelming the system - await new Promise(resolve => setTimeout(resolve, 100)); - } catch (error) { - log.error(`Failed to auto-process OCR for ${blobInfo.entityType} ${blobInfo.entityId}: ${error}`); - // Continue with other blobs - } - } - - log.info('Auto-processing OCR completed'); - } } export default new OCRService();