diff --git a/apps/server/scripts/build.ts b/apps/server/scripts/build.ts index 73a3aeaee3..14401080e2 100644 --- a/apps/server/scripts/build.ts +++ b/apps/server/scripts/build.ts @@ -14,8 +14,11 @@ async function main() { build.copyNodeModules([ "better-sqlite3", "bindings", "file-uri-to-path" ]); // Tesseract.js worker runs in a separate worker_thread and needs its - // source files (+ WASM core) on disk — they cannot be bundled. - build.copyNodeModules([ "tesseract.js", "tesseract.js-core", "wasm-feature-detect" ]); + // source files (+ WASM core + transitive deps) on disk — they cannot be bundled. + build.copyNodeModules([ + "tesseract.js", "tesseract.js-core", "wasm-feature-detect", + "regenerator-runtime", "is-url", "bmp-js" + ]); build.copy("/node_modules/ckeditor5/dist/ckeditor5-content.css", "ckeditor5-content.css"); build.buildFrontend(); diff --git a/apps/server/src/services/ocr/processors/image_processor.ts b/apps/server/src/services/ocr/processors/image_processor.ts index a7a09acd85..15bfe78085 100644 --- a/apps/server/src/services/ocr/processors/image_processor.ts +++ b/apps/server/src/services/ocr/processors/image_processor.ts @@ -1,11 +1,9 @@ import fs from 'fs'; -import path from 'path'; import Tesseract from 'tesseract.js'; import dataDirs from '../../data_dir.js'; import log from '../../log.js'; import options from '../../options.js'; -import { getResourceDir, isDev } from '../../utils.js'; import { OCRProcessingOptions,OCRResult } from '../ocr_service.js'; import { FileProcessor } from './file_processor.js'; @@ -81,27 +79,14 @@ export class ImageProcessor extends FileProcessor { fs.mkdirSync(dataDirs.OCR_CACHE_DIR, { recursive: true }); log.info(`Initializing Tesseract worker for language(s): ${language}`); - - const workerOptions: Record = { + this.worker = await Tesseract.createWorker(language, 1, { cachePath: dataDirs.OCR_CACHE_DIR, logger: (m: { status: string; progress: number }) => { if (m.status === 'recognizing text') { log.info(`Image OCR progress (${language}): ${Math.round(m.progress * 100)}%`); } } - }; - - // In production the server is bundled, so tesseract.js's default - // __dirname-based worker path is wrong. Point it at the copy we - // place in dist/node_modules during the build step. - if (!isDev) { - workerOptions.workerPath = path.join( - getResourceDir(), - 'node_modules', 'tesseract.js', 'src', 'worker-script', 'node', 'index.js' - ); - } - - this.worker = await Tesseract.createWorker(language, 1, workerOptions); + }); this.currentLanguage = language; } diff --git a/scripts/build-utils.ts b/scripts/build-utils.ts index 07a93a256e..7349cecb2b 100644 --- a/scripts/build-utils.ts +++ b/scripts/build-utils.ts @@ -53,7 +53,8 @@ export default class BuildHelper { "better-sqlite3", "pdfjs-dist", "./xhr-sync-worker.js", - "vite" + "vite", + "tesseract.js" ], metafile: true, splitting: false,