chore(ocr): expose needed dependencies

This commit is contained in:
Elian Doran
2026-04-05 22:14:01 +03:00
parent 61dcc8db47
commit baa93cb371
2 changed files with 21 additions and 2 deletions

View File

@@ -12,6 +12,10 @@ async function main() {
// Copy node modules dependencies
build.copyNodeModules([ "better-sqlite3", "bindings", "file-uri-to-path" ]);
// Tesseract.js worker runs in a separate worker_thread and needs its
// source files (+ WASM core) on disk — they cannot be bundled.
build.copyNodeModules([ "tesseract.js", "tesseract.js-core", "wasm-feature-detect" ]);
build.copy("/node_modules/ckeditor5/dist/ckeditor5-content.css", "ckeditor5-content.css");
build.buildFrontend();

View File

@@ -1,9 +1,11 @@
import fs from 'fs';
import path from 'path';
import Tesseract from 'tesseract.js';
import dataDirs from '../../data_dir.js';
import log from '../../log.js';
import options from '../../options.js';
import { getResourceDir, isDev } from '../../utils.js';
import { OCRProcessingOptions,OCRResult } from '../ocr_service.js';
import { FileProcessor } from './file_processor.js';
@@ -79,14 +81,27 @@ export class ImageProcessor extends FileProcessor {
fs.mkdirSync(dataDirs.OCR_CACHE_DIR, { recursive: true });
log.info(`Initializing Tesseract worker for language(s): ${language}`);
this.worker = await Tesseract.createWorker(language, 1, {
const workerOptions: Record<string, unknown> = {
cachePath: dataDirs.OCR_CACHE_DIR,
logger: (m: { status: string; progress: number }) => {
if (m.status === 'recognizing text') {
log.info(`Image OCR progress (${language}): ${Math.round(m.progress * 100)}%`);
}
}
});
};
// In production the server is bundled, so tesseract.js's default
// __dirname-based worker path is wrong. Point it at the copy we
// place in dist/node_modules during the build step.
if (!isDev) {
workerOptions.workerPath = path.join(
getResourceDir(),
'node_modules', 'tesseract.js', 'src', 'worker-script', 'node', 'index.js'
);
}
this.worker = await Tesseract.createWorker(language, 1, workerOptions);
this.currentLanguage = language;
}