mirror of
https://github.com/zadam/trilium.git
synced 2026-06-26 20:09:59 +02:00
chore(ocr): remove master switch
This commit is contained in:
@@ -1265,8 +1265,6 @@
|
||||
"jpeg_quality": "JPEG quality",
|
||||
"jpeg_quality_description": "Recommended range is 50–85. Lower values reduce file size, higher values preserve detail.",
|
||||
"ocr_section_title": "Text Extraction (OCR)",
|
||||
"enable_ocr": "Enable text extraction",
|
||||
"ocr_description": "Extract searchable text from images, PDFs, and Office documents (Word, Excel, PowerPoint).",
|
||||
"ocr_auto_process": "Auto-process new files",
|
||||
"ocr_auto_process_description": "Automatically extract text from newly uploaded or pasted files.",
|
||||
"ocr_min_confidence": "Minimum confidence",
|
||||
|
||||
@@ -64,33 +64,22 @@ function ImageSettings() {
|
||||
}
|
||||
|
||||
function OcrSettings() {
|
||||
const [ ocrEnabled, setOcrEnabled ] = useTriliumOptionBool("ocrEnabled");
|
||||
const [ ocrAutoProcess, setOcrAutoProcess ] = useTriliumOptionBool("ocrAutoProcessImages");
|
||||
const [ ocrMinConfidence, setOcrMinConfidence ] = useTriliumOption("ocrMinConfidence");
|
||||
|
||||
return (
|
||||
<OptionsSection title={t("images.ocr_section_title")}>
|
||||
<OptionsRow name="ocr-enabled" label={t("images.enable_ocr")} description={t("images.ocr_description")}>
|
||||
<FormToggle
|
||||
switchOnName="" switchOffName=""
|
||||
currentValue={ocrEnabled}
|
||||
onChange={setOcrEnabled}
|
||||
/>
|
||||
</OptionsRow>
|
||||
|
||||
<OptionsRow name="ocr-auto-process" label={t("images.ocr_auto_process")} description={t("images.ocr_auto_process_description")}>
|
||||
<FormToggle
|
||||
switchOnName="" switchOffName=""
|
||||
currentValue={ocrAutoProcess}
|
||||
onChange={setOcrAutoProcess}
|
||||
disabled={!ocrEnabled}
|
||||
/>
|
||||
</OptionsRow>
|
||||
|
||||
<OptionsRow name="ocr-min-confidence" label={t("images.ocr_min_confidence")} description={t("images.ocr_confidence_description")}>
|
||||
<FormTextBoxWithUnit
|
||||
type="number" min="0" max="1" step="0.05"
|
||||
disabled={!ocrEnabled}
|
||||
unit={t("images.ocr_confidence_unit")}
|
||||
currentValue={ocrMinConfidence}
|
||||
onChange={setOcrMinConfidence}
|
||||
|
||||
@@ -108,8 +108,6 @@ const ALLOWED_OPTIONS = new Set<OptionNames>([
|
||||
"llmProviders",
|
||||
|
||||
// OCR options
|
||||
"ocrEnabled",
|
||||
"ocrLanguage",
|
||||
"ocrAutoProcessImages",
|
||||
"ocrMinConfidence"
|
||||
]);
|
||||
|
||||
@@ -143,8 +143,7 @@ eventService.subscribe(eventService.ENTITY_CREATED, ({ entityName, entity }) =>
|
||||
|
||||
// Note: OCR processing for images is now handled in image.ts during image processing
|
||||
// OCR processing for files remains here since they don't go through image processing
|
||||
// Only auto-process if both OCR is enabled and auto-processing is enabled
|
||||
if (entity.type === 'file' && ocrService.isOCREnabled() && optionService.getOptionBool("ocrAutoProcessImages")) {
|
||||
if (entity.type === 'file' && optionService.getOptionBool("ocrAutoProcessImages")) {
|
||||
// Check if the file MIME type is supported by any OCR processor
|
||||
const supportedMimeTypes = ocrService.getAllSupportedMimeTypes();
|
||||
|
||||
|
||||
@@ -1,18 +1,17 @@
|
||||
"use strict";
|
||||
import imageType from "image-type";
|
||||
import isAnimated from "is-animated";
|
||||
import isSvg from "is-svg";
|
||||
import { Jimp } from "jimp";
|
||||
import sanitizeFilename from "sanitize-filename";
|
||||
|
||||
import becca from "../becca/becca.js";
|
||||
import log from "./log.js";
|
||||
import protectedSessionService from "./protected_session.js";
|
||||
import noteService from "./notes.js";
|
||||
import optionService from "./options.js";
|
||||
import sql from "./sql.js";
|
||||
import { Jimp } from "jimp";
|
||||
import imageType from "image-type";
|
||||
import sanitizeFilename from "sanitize-filename";
|
||||
import isSvg from "is-svg";
|
||||
import isAnimated from "is-animated";
|
||||
import htmlSanitizer from "./html_sanitizer.js";
|
||||
import ocrService, { type OCRResult } from "./ocr/ocr_service.js";
|
||||
import log from "./log.js";
|
||||
import noteService from "./notes.js";
|
||||
import ocrService from "./ocr/ocr_service.js";
|
||||
import optionService from "./options.js";
|
||||
import protectedSessionService from "./protected_session.js";
|
||||
import sql from "./sql.js";
|
||||
|
||||
async function processImage(uploadBuffer: Buffer, originalName: string, shrinkImageSwitch: boolean, noteId?: string) {
|
||||
const compressImages = optionService.getOptionBool("compressImages");
|
||||
@@ -26,8 +25,7 @@ async function processImage(uploadBuffer: Buffer, originalName: string, shrinkIm
|
||||
}
|
||||
|
||||
// Schedule OCR processing in the background for best quality
|
||||
// Only auto-process if both OCR is enabled and auto-processing is enabled
|
||||
if (noteId && ocrService.isOCREnabled() && optionService.getOptionBool("ocrAutoProcessImages") && origImageFormat) {
|
||||
if (noteId && optionService.getOptionBool("ocrAutoProcessImages") && origImageFormat) {
|
||||
const imageMime = getImageMimeFromExtension(origImageFormat.ext);
|
||||
const supportedMimeTypes = ocrService.getAllSupportedMimeTypes();
|
||||
|
||||
@@ -41,14 +39,14 @@ async function processImage(uploadBuffer: Buffer, originalName: string, shrinkIm
|
||||
// noteId could be either a note ID or attachment ID
|
||||
const note = becca.getNote(noteId);
|
||||
const attachment = becca.getAttachment(noteId);
|
||||
|
||||
|
||||
let blobId: string | undefined;
|
||||
if (note && note.blobId) {
|
||||
blobId = note.blobId;
|
||||
} else if (attachment && attachment.blobId) {
|
||||
blobId = attachment.blobId;
|
||||
}
|
||||
|
||||
|
||||
if (blobId) {
|
||||
await ocrService.storeOCRResult(blobId, ocrResult);
|
||||
log.info(`Successfully processed OCR for image ${noteId} (${originalName})`);
|
||||
@@ -83,9 +81,8 @@ async function processImage(uploadBuffer: Buffer, originalName: string, shrinkIm
|
||||
async function getImageType(buffer: Buffer) {
|
||||
if (isSvg(buffer.toString())) {
|
||||
return { ext: "svg" };
|
||||
} else {
|
||||
return (await imageType(buffer)) || { ext: "jpg" }; // optimistic JPG default
|
||||
}
|
||||
return (await imageType(buffer)) || { ext: "jpg" }; // optimistic JPG default
|
||||
}
|
||||
|
||||
function getImageMimeFromExtension(ext: string) {
|
||||
|
||||
@@ -49,18 +49,6 @@ class OCRService {
|
||||
this.processors.set('office', new OfficeProcessor());
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if OCR is enabled in settings
|
||||
*/
|
||||
isOCREnabled(): boolean {
|
||||
try {
|
||||
return options.getOptionBool('ocrEnabled');
|
||||
} catch (error) {
|
||||
log.error(`Failed to check OCR enabled status: ${error}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the Tesseract language code(s) for OCR processing.
|
||||
*
|
||||
@@ -479,10 +467,6 @@ class OCRService {
|
||||
return { success: false, message: 'Batch processing already in progress' };
|
||||
}
|
||||
|
||||
if (!this.isOCREnabled()) {
|
||||
return { success: false, message: 'OCR is disabled' };
|
||||
}
|
||||
|
||||
try {
|
||||
// Count total blobs needing OCR processing
|
||||
const blobsNeedingOCR = this.getBlobsNeedingOCR();
|
||||
@@ -773,8 +757,8 @@ class OCRService {
|
||||
* Process OCR for all blobs that need it (auto-processing)
|
||||
*/
|
||||
async processAllBlobsNeedingOCR(): Promise<void> {
|
||||
if (!this.isOCREnabled()) {
|
||||
log.info('OCR is disabled, skipping auto-processing');
|
||||
if (!options.getOptionBool('ocrAutoProcessImages')) {
|
||||
log.info('OCR auto-processing is disabled, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -215,7 +215,6 @@ const defaultOptions: DefaultOption[] = [
|
||||
{ name: "llmProviders", value: "[]", isSynced: false },
|
||||
|
||||
// OCR options
|
||||
{ name: "ocrEnabled", value: "false", isSynced: true },
|
||||
{ name: "ocrAutoProcessImages", value: "true", isSynced: true },
|
||||
{ name: "ocrMinConfidence", value: "0.55", isSynced: true },
|
||||
];
|
||||
|
||||
@@ -16,11 +16,6 @@ export default class OCRContentExpression extends Expression {
|
||||
}
|
||||
|
||||
execute(inputNoteSet: NoteSet, executionContext: object, searchContext: SearchContext): NoteSet {
|
||||
// Don't search OCR content if it's not enabled
|
||||
if (!this.isOCRSearchEnabled()) {
|
||||
return new NoteSet();
|
||||
}
|
||||
|
||||
const resultNoteSet = new NoteSet();
|
||||
const ocrResults = this.searchOCRContent(this.searchText);
|
||||
|
||||
@@ -61,14 +56,6 @@ export default class OCRContentExpression extends Expression {
|
||||
return resultNoteSet;
|
||||
}
|
||||
|
||||
private isOCRSearchEnabled(): boolean {
|
||||
try {
|
||||
const optionService = require('../../options.js').default;
|
||||
return optionService.getOptionBool('ocrEnabled');
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private searchOCRContent(searchText: string): Array<{
|
||||
blobId: string;
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import becca from "../../becca/becca.js";
|
||||
import beccaService from "../../becca/becca_service.js";
|
||||
import options from "../options.js";
|
||||
import sql from "../sql.js";
|
||||
import {
|
||||
calculateOptimizedEditDistance,
|
||||
@@ -133,11 +132,6 @@ class SearchResult {
|
||||
|
||||
addOCRScore(tokens: string[], factor: number) {
|
||||
try {
|
||||
// Check if OCR is enabled
|
||||
if (!options.getOptionBool('ocrEnabled')) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Search for OCR results for this note and its attachments
|
||||
const ocrResults = sql.getRows(`
|
||||
SELECT b.textRepresentation
|
||||
|
||||
Reference in New Issue
Block a user