diff --git a/apps/server/src/services/data_dir.ts b/apps/server/src/services/data_dir.ts index d0f34f01df..4bd8ec7ccb 100644 --- a/apps/server/src/services/data_dir.ts +++ b/apps/server/src/services/data_dir.ts @@ -1,5 +1,3 @@ -"use strict"; - /* * This file resolves trilium data path in this order of priority: * - case A) if TRILIUM_DATA_DIR environment variable exists, then its value is used as the path @@ -8,8 +6,8 @@ * - case D) as a fallback if the previous step fails, we'll use home dir */ -import os from "os"; import fs from "fs"; +import os from "os"; import { join as pathJoin } from "path"; const DIR_NAME = "trilium-data"; @@ -43,13 +41,14 @@ export function getTriliumDataDir(dataDirName: string) { export function getDataDirs(TRILIUM_DATA_DIR: string) { const dataDirs = { - TRILIUM_DATA_DIR: TRILIUM_DATA_DIR, + TRILIUM_DATA_DIR, DOCUMENT_PATH: process.env.TRILIUM_DOCUMENT_PATH || pathJoin(TRILIUM_DATA_DIR, "document.db"), BACKUP_DIR: process.env.TRILIUM_BACKUP_DIR || pathJoin(TRILIUM_DATA_DIR, "backup"), LOG_DIR: process.env.TRILIUM_LOG_DIR || pathJoin(TRILIUM_DATA_DIR, "log"), TMP_DIR: process.env.TRILIUM_TMP_DIR || pathJoin(TRILIUM_DATA_DIR, "tmp"), ANONYMIZED_DB_DIR: process.env.TRILIUM_ANONYMIZED_DB_DIR || pathJoin(TRILIUM_DATA_DIR, "anonymized-db"), - CONFIG_INI_PATH: process.env.TRILIUM_CONFIG_INI_PATH || pathJoin(TRILIUM_DATA_DIR, "config.ini") + CONFIG_INI_PATH: process.env.TRILIUM_CONFIG_INI_PATH || pathJoin(TRILIUM_DATA_DIR, "config.ini"), + OCR_CACHE_DIR: pathJoin(TRILIUM_DATA_DIR, "ocr-cache") } as const; createDirIfNotExisting(dataDirs.TMP_DIR); diff --git a/apps/server/src/services/ocr/processors/image_processor.ts b/apps/server/src/services/ocr/processors/image_processor.ts index 82c48bc8f8..bade5633f9 100644 --- a/apps/server/src/services/ocr/processors/image_processor.ts +++ b/apps/server/src/services/ocr/processors/image_processor.ts @@ -1,5 +1,7 @@ +import fs from 'fs'; import Tesseract from 'tesseract.js'; +import dataDirs from '../../data_dir.js'; import log from '../../log.js'; import options from '../../options.js'; import { OCRProcessingOptions,OCRResult } from '../ocr_service.js'; @@ -55,6 +57,7 @@ export class ImageProcessor extends FileProcessor { await this.worker.terminate(); log.info(`Initializing Tesseract worker for language(s): ${language}`); this.worker = await Tesseract.createWorker(language, 1, { + cachePath: dataDirs.OCR_CACHE_DIR, logger: (m: { status: string; progress: number }) => { if (m.status === 'recognizing text') { log.info(`Image OCR progress (${language}): ${Math.round(m.progress * 100)}%`); @@ -97,8 +100,9 @@ export class ImageProcessor extends FileProcessor { try { log.info('Initializing image OCR processor with Tesseract.js...'); + fs.mkdirSync(dataDirs.OCR_CACHE_DIR, { recursive: true }); + // Configure proper paths for Node.js environment - const tesseractDir = require.resolve('tesseract.js').replace('/src/index.js', ''); const workerPath = require.resolve('tesseract.js/src/worker-script/node/index.js'); const corePath = require.resolve('tesseract.js-core/tesseract-core.wasm.js'); @@ -108,6 +112,7 @@ export class ImageProcessor extends FileProcessor { this.worker = await Tesseract.createWorker("eng", 1, { workerPath, corePath, + cachePath: dataDirs.OCR_CACHE_DIR, logger: (m: { status: string; progress: number }) => { if (m.status === 'recognizing text') { log.info(`Image OCR progress: ${Math.round(m.progress * 100)}%`);