From 44a5dccd61e09e325cb7d66cfd461cbd7a1cff93 Mon Sep 17 00:00:00 2001 From: Elian Doran Date: Thu, 2 Apr 2026 10:22:29 +0300 Subject: [PATCH] chore(ocr): remove master switch --- .../src/translations/en/translation.json | 2 -- .../widgets/type_widgets/options/media.tsx | 11 ------- apps/server/src/routes/api/options.ts | 2 -- apps/server/src/services/handlers.ts | 3 +- apps/server/src/services/image.ts | 33 +++++++++---------- apps/server/src/services/ocr/ocr_service.ts | 20 ++--------- apps/server/src/services/options_init.ts | 1 - .../search/expressions/ocr_content.ts | 13 -------- .../src/services/search/search_result.ts | 6 ---- 9 files changed, 18 insertions(+), 73 deletions(-) diff --git a/apps/client/src/translations/en/translation.json b/apps/client/src/translations/en/translation.json index 388081cb1c..2d799bf2fb 100644 --- a/apps/client/src/translations/en/translation.json +++ b/apps/client/src/translations/en/translation.json @@ -1265,8 +1265,6 @@ "jpeg_quality": "JPEG quality", "jpeg_quality_description": "Recommended range is 50–85. Lower values reduce file size, higher values preserve detail.", "ocr_section_title": "Text Extraction (OCR)", - "enable_ocr": "Enable text extraction", - "ocr_description": "Extract searchable text from images, PDFs, and Office documents (Word, Excel, PowerPoint).", "ocr_auto_process": "Auto-process new files", "ocr_auto_process_description": "Automatically extract text from newly uploaded or pasted files.", "ocr_min_confidence": "Minimum confidence", diff --git a/apps/client/src/widgets/type_widgets/options/media.tsx b/apps/client/src/widgets/type_widgets/options/media.tsx index ebdb179289..0b2aa8127d 100644 --- a/apps/client/src/widgets/type_widgets/options/media.tsx +++ b/apps/client/src/widgets/type_widgets/options/media.tsx @@ -64,33 +64,22 @@ function ImageSettings() { } function OcrSettings() { - const [ ocrEnabled, setOcrEnabled ] = useTriliumOptionBool("ocrEnabled"); const [ ocrAutoProcess, setOcrAutoProcess ] = useTriliumOptionBool("ocrAutoProcessImages"); const [ ocrMinConfidence, setOcrMinConfidence ] = useTriliumOption("ocrMinConfidence"); return ( - - - - ([ "llmProviders", // OCR options - "ocrEnabled", - "ocrLanguage", "ocrAutoProcessImages", "ocrMinConfidence" ]); diff --git a/apps/server/src/services/handlers.ts b/apps/server/src/services/handlers.ts index 1526e5019f..7eb0c05950 100644 --- a/apps/server/src/services/handlers.ts +++ b/apps/server/src/services/handlers.ts @@ -143,8 +143,7 @@ eventService.subscribe(eventService.ENTITY_CREATED, ({ entityName, entity }) => // Note: OCR processing for images is now handled in image.ts during image processing // OCR processing for files remains here since they don't go through image processing - // Only auto-process if both OCR is enabled and auto-processing is enabled - if (entity.type === 'file' && ocrService.isOCREnabled() && optionService.getOptionBool("ocrAutoProcessImages")) { + if (entity.type === 'file' && optionService.getOptionBool("ocrAutoProcessImages")) { // Check if the file MIME type is supported by any OCR processor const supportedMimeTypes = ocrService.getAllSupportedMimeTypes(); diff --git a/apps/server/src/services/image.ts b/apps/server/src/services/image.ts index d5fe58c3fb..e94fae2e7e 100644 --- a/apps/server/src/services/image.ts +++ b/apps/server/src/services/image.ts @@ -1,18 +1,17 @@ -"use strict"; +import imageType from "image-type"; +import isAnimated from "is-animated"; +import isSvg from "is-svg"; +import { Jimp } from "jimp"; +import sanitizeFilename from "sanitize-filename"; import becca from "../becca/becca.js"; -import log from "./log.js"; -import protectedSessionService from "./protected_session.js"; -import noteService from "./notes.js"; -import optionService from "./options.js"; -import sql from "./sql.js"; -import { Jimp } from "jimp"; -import imageType from "image-type"; -import sanitizeFilename from "sanitize-filename"; -import isSvg from "is-svg"; -import isAnimated from "is-animated"; import htmlSanitizer from "./html_sanitizer.js"; -import ocrService, { type OCRResult } from "./ocr/ocr_service.js"; +import log from "./log.js"; +import noteService from "./notes.js"; +import ocrService from "./ocr/ocr_service.js"; +import optionService from "./options.js"; +import protectedSessionService from "./protected_session.js"; +import sql from "./sql.js"; async function processImage(uploadBuffer: Buffer, originalName: string, shrinkImageSwitch: boolean, noteId?: string) { const compressImages = optionService.getOptionBool("compressImages"); @@ -26,8 +25,7 @@ async function processImage(uploadBuffer: Buffer, originalName: string, shrinkIm } // Schedule OCR processing in the background for best quality - // Only auto-process if both OCR is enabled and auto-processing is enabled - if (noteId && ocrService.isOCREnabled() && optionService.getOptionBool("ocrAutoProcessImages") && origImageFormat) { + if (noteId && optionService.getOptionBool("ocrAutoProcessImages") && origImageFormat) { const imageMime = getImageMimeFromExtension(origImageFormat.ext); const supportedMimeTypes = ocrService.getAllSupportedMimeTypes(); @@ -41,14 +39,14 @@ async function processImage(uploadBuffer: Buffer, originalName: string, shrinkIm // noteId could be either a note ID or attachment ID const note = becca.getNote(noteId); const attachment = becca.getAttachment(noteId); - + let blobId: string | undefined; if (note && note.blobId) { blobId = note.blobId; } else if (attachment && attachment.blobId) { blobId = attachment.blobId; } - + if (blobId) { await ocrService.storeOCRResult(blobId, ocrResult); log.info(`Successfully processed OCR for image ${noteId} (${originalName})`); @@ -83,9 +81,8 @@ async function processImage(uploadBuffer: Buffer, originalName: string, shrinkIm async function getImageType(buffer: Buffer) { if (isSvg(buffer.toString())) { return { ext: "svg" }; - } else { - return (await imageType(buffer)) || { ext: "jpg" }; // optimistic JPG default } + return (await imageType(buffer)) || { ext: "jpg" }; // optimistic JPG default } function getImageMimeFromExtension(ext: string) { diff --git a/apps/server/src/services/ocr/ocr_service.ts b/apps/server/src/services/ocr/ocr_service.ts index 9fd6e28904..f4d0fc1e68 100644 --- a/apps/server/src/services/ocr/ocr_service.ts +++ b/apps/server/src/services/ocr/ocr_service.ts @@ -49,18 +49,6 @@ class OCRService { this.processors.set('office', new OfficeProcessor()); } - /** - * Check if OCR is enabled in settings - */ - isOCREnabled(): boolean { - try { - return options.getOptionBool('ocrEnabled'); - } catch (error) { - log.error(`Failed to check OCR enabled status: ${error}`); - return false; - } - } - /** * Resolves the Tesseract language code(s) for OCR processing. * @@ -479,10 +467,6 @@ class OCRService { return { success: false, message: 'Batch processing already in progress' }; } - if (!this.isOCREnabled()) { - return { success: false, message: 'OCR is disabled' }; - } - try { // Count total blobs needing OCR processing const blobsNeedingOCR = this.getBlobsNeedingOCR(); @@ -773,8 +757,8 @@ class OCRService { * Process OCR for all blobs that need it (auto-processing) */ async processAllBlobsNeedingOCR(): Promise { - if (!this.isOCREnabled()) { - log.info('OCR is disabled, skipping auto-processing'); + if (!options.getOptionBool('ocrAutoProcessImages')) { + log.info('OCR auto-processing is disabled, skipping'); return; } diff --git a/apps/server/src/services/options_init.ts b/apps/server/src/services/options_init.ts index d398a252a2..2cec0cb768 100644 --- a/apps/server/src/services/options_init.ts +++ b/apps/server/src/services/options_init.ts @@ -215,7 +215,6 @@ const defaultOptions: DefaultOption[] = [ { name: "llmProviders", value: "[]", isSynced: false }, // OCR options - { name: "ocrEnabled", value: "false", isSynced: true }, { name: "ocrAutoProcessImages", value: "true", isSynced: true }, { name: "ocrMinConfidence", value: "0.55", isSynced: true }, ]; diff --git a/apps/server/src/services/search/expressions/ocr_content.ts b/apps/server/src/services/search/expressions/ocr_content.ts index 9401ada0b5..4b72f28282 100644 --- a/apps/server/src/services/search/expressions/ocr_content.ts +++ b/apps/server/src/services/search/expressions/ocr_content.ts @@ -16,11 +16,6 @@ export default class OCRContentExpression extends Expression { } execute(inputNoteSet: NoteSet, executionContext: object, searchContext: SearchContext): NoteSet { - // Don't search OCR content if it's not enabled - if (!this.isOCRSearchEnabled()) { - return new NoteSet(); - } - const resultNoteSet = new NoteSet(); const ocrResults = this.searchOCRContent(this.searchText); @@ -61,14 +56,6 @@ export default class OCRContentExpression extends Expression { return resultNoteSet; } - private isOCRSearchEnabled(): boolean { - try { - const optionService = require('../../options.js').default; - return optionService.getOptionBool('ocrEnabled'); - } catch { - return false; - } - } private searchOCRContent(searchText: string): Array<{ blobId: string; diff --git a/apps/server/src/services/search/search_result.ts b/apps/server/src/services/search/search_result.ts index 18ee711879..e40eb64dae 100644 --- a/apps/server/src/services/search/search_result.ts +++ b/apps/server/src/services/search/search_result.ts @@ -1,6 +1,5 @@ import becca from "../../becca/becca.js"; import beccaService from "../../becca/becca_service.js"; -import options from "../options.js"; import sql from "../sql.js"; import { calculateOptimizedEditDistance, @@ -133,11 +132,6 @@ class SearchResult { addOCRScore(tokens: string[], factor: number) { try { - // Check if OCR is enabled - if (!options.getOptionBool('ocrEnabled')) { - return; - } - // Search for OCR results for this note and its attachments const ocrResults = sql.getRows(` SELECT b.textRepresentation