mirror of
https://github.com/zadam/trilium.git
synced 2026-06-26 20:09:59 +02:00
feat(ocr): handle cache dir properly
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
"use strict";
|
||||
|
||||
/*
|
||||
* This file resolves trilium data path in this order of priority:
|
||||
* - case A) if TRILIUM_DATA_DIR environment variable exists, then its value is used as the path
|
||||
@@ -8,8 +6,8 @@
|
||||
* - case D) as a fallback if the previous step fails, we'll use home dir
|
||||
*/
|
||||
|
||||
import os from "os";
|
||||
import fs from "fs";
|
||||
import os from "os";
|
||||
import { join as pathJoin } from "path";
|
||||
|
||||
const DIR_NAME = "trilium-data";
|
||||
@@ -43,13 +41,14 @@ export function getTriliumDataDir(dataDirName: string) {
|
||||
|
||||
export function getDataDirs(TRILIUM_DATA_DIR: string) {
|
||||
const dataDirs = {
|
||||
TRILIUM_DATA_DIR: TRILIUM_DATA_DIR,
|
||||
TRILIUM_DATA_DIR,
|
||||
DOCUMENT_PATH: process.env.TRILIUM_DOCUMENT_PATH || pathJoin(TRILIUM_DATA_DIR, "document.db"),
|
||||
BACKUP_DIR: process.env.TRILIUM_BACKUP_DIR || pathJoin(TRILIUM_DATA_DIR, "backup"),
|
||||
LOG_DIR: process.env.TRILIUM_LOG_DIR || pathJoin(TRILIUM_DATA_DIR, "log"),
|
||||
TMP_DIR: process.env.TRILIUM_TMP_DIR || pathJoin(TRILIUM_DATA_DIR, "tmp"),
|
||||
ANONYMIZED_DB_DIR: process.env.TRILIUM_ANONYMIZED_DB_DIR || pathJoin(TRILIUM_DATA_DIR, "anonymized-db"),
|
||||
CONFIG_INI_PATH: process.env.TRILIUM_CONFIG_INI_PATH || pathJoin(TRILIUM_DATA_DIR, "config.ini")
|
||||
CONFIG_INI_PATH: process.env.TRILIUM_CONFIG_INI_PATH || pathJoin(TRILIUM_DATA_DIR, "config.ini"),
|
||||
OCR_CACHE_DIR: pathJoin(TRILIUM_DATA_DIR, "ocr-cache")
|
||||
} as const;
|
||||
|
||||
createDirIfNotExisting(dataDirs.TMP_DIR);
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import fs from 'fs';
|
||||
import Tesseract from 'tesseract.js';
|
||||
|
||||
import dataDirs from '../../data_dir.js';
|
||||
import log from '../../log.js';
|
||||
import options from '../../options.js';
|
||||
import { OCRProcessingOptions,OCRResult } from '../ocr_service.js';
|
||||
@@ -55,6 +57,7 @@ export class ImageProcessor extends FileProcessor {
|
||||
await this.worker.terminate();
|
||||
log.info(`Initializing Tesseract worker for language(s): ${language}`);
|
||||
this.worker = await Tesseract.createWorker(language, 1, {
|
||||
cachePath: dataDirs.OCR_CACHE_DIR,
|
||||
logger: (m: { status: string; progress: number }) => {
|
||||
if (m.status === 'recognizing text') {
|
||||
log.info(`Image OCR progress (${language}): ${Math.round(m.progress * 100)}%`);
|
||||
@@ -97,8 +100,9 @@ export class ImageProcessor extends FileProcessor {
|
||||
try {
|
||||
log.info('Initializing image OCR processor with Tesseract.js...');
|
||||
|
||||
fs.mkdirSync(dataDirs.OCR_CACHE_DIR, { recursive: true });
|
||||
|
||||
// Configure proper paths for Node.js environment
|
||||
const tesseractDir = require.resolve('tesseract.js').replace('/src/index.js', '');
|
||||
const workerPath = require.resolve('tesseract.js/src/worker-script/node/index.js');
|
||||
const corePath = require.resolve('tesseract.js-core/tesseract-core.wasm.js');
|
||||
|
||||
@@ -108,6 +112,7 @@ export class ImageProcessor extends FileProcessor {
|
||||
this.worker = await Tesseract.createWorker("eng", 1, {
|
||||
workerPath,
|
||||
corePath,
|
||||
cachePath: dataDirs.OCR_CACHE_DIR,
|
||||
logger: (m: { status: string; progress: number }) => {
|
||||
if (m.status === 'recognizing text') {
|
||||
log.info(`Image OCR progress: ${Math.round(m.progress * 100)}%`);
|
||||
|
||||
Reference in New Issue
Block a user