From e539b117186667bfb13302e18a33aa108039714a Mon Sep 17 00:00:00 2001 From: Elian Doran Date: Fri, 3 Apr 2026 11:11:53 +0300 Subject: [PATCH] chore(ocr): upgrade to officeprocessor v6 to avoid pdfjs issues --- apps/desktop/scripts/build.ts | 2 +- apps/server/package.json | 2 +- apps/server/scripts/build.ts | 2 +- apps/server/src/services/ocr/ocr_service.ts | 3 +- .../ocr/processors/office_processor.ts | 59 +++--- pnpm-lock.yaml | 190 +++++------------- 6 files changed, 82 insertions(+), 176 deletions(-) diff --git a/apps/desktop/scripts/build.ts b/apps/desktop/scripts/build.ts index b4d68d7d46..2e5f82c506 100644 --- a/apps/desktop/scripts/build.ts +++ b/apps/desktop/scripts/build.ts @@ -16,7 +16,7 @@ async function main() { build.copy("/packages/share-theme/src/templates", "share-theme/templates/"); // Copy node modules dependencies - build.copyNodeModules([ "better-sqlite3", "bindings", "file-uri-to-path", "pdfjs-dist", "@electron/remote" ]); + build.copyNodeModules([ "better-sqlite3", "bindings", "file-uri-to-path", "@electron/remote" ]); build.copy("/node_modules/ckeditor5/dist/ckeditor5-content.css", "ckeditor5-content.css"); build.buildFrontend(); diff --git a/apps/server/package.json b/apps/server/package.json index 20e87efdba..ac7847baa3 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -116,7 +116,7 @@ "mime-types": "3.0.2", "multer": "2.1.1", "normalize-strings": "1.1.1", - "officeparser": "5.2.0", + "officeparser": "6.0.7", "rand-token": "1.0.1", "safe-compare": "1.1.4", "sanitize-filename": "1.6.4", diff --git a/apps/server/scripts/build.ts b/apps/server/scripts/build.ts index 81985db51d..9fa1f9cb83 100644 --- a/apps/server/scripts/build.ts +++ b/apps/server/scripts/build.ts @@ -11,7 +11,7 @@ async function main() { build.copy("/packages/share-theme/src/templates", "share-theme/templates/"); // Copy node modules dependencies - build.copyNodeModules([ "better-sqlite3", "bindings", "file-uri-to-path", "pdfjs-dist" ]); + build.copyNodeModules([ "better-sqlite3", "bindings", "file-uri-to-path" ]); build.copy("/node_modules/ckeditor5/dist/ckeditor5-content.css", "ckeditor5-content.css"); build.buildFrontend(); diff --git a/apps/server/src/services/ocr/ocr_service.ts b/apps/server/src/services/ocr/ocr_service.ts index 583e38e08c..f59ff8e609 100644 --- a/apps/server/src/services/ocr/ocr_service.ts +++ b/apps/server/src/services/ocr/ocr_service.ts @@ -24,6 +24,7 @@ export interface OCRProcessingOptions { forceReprocess?: boolean; confidence?: number; enablePDFTextExtraction?: boolean; + mimeType?: string; } /** @@ -114,7 +115,7 @@ class OCRService { throw new Error(`No processor found for MIME type: ${mimeType}`); } - const result = await processor.extractText(fileBuffer, options); + const result = await processor.extractText(fileBuffer, { ...options, mimeType }); log.info(`OCR extraction completed. Confidence: ${Math.round(result.confidence * 100)}%, Text length: ${result.text.length}`); return result; diff --git a/apps/server/src/services/ocr/processors/office_processor.ts b/apps/server/src/services/ocr/processors/office_processor.ts index 1435ecc371..f2ee7e8ebe 100644 --- a/apps/server/src/services/ocr/processors/office_processor.ts +++ b/apps/server/src/services/ocr/processors/office_processor.ts @@ -1,53 +1,58 @@ -import * as officeParser from 'officeparser'; +import { parseExcel } from 'officeparser/dist/parsers/ExcelParser.js'; +import { parseOpenOffice } from 'officeparser/dist/parsers/OpenOfficeParser.js'; +import { parsePowerPoint } from 'officeparser/dist/parsers/PowerPointParser.js'; +import { parseWord } from 'officeparser/dist/parsers/WordParser.js'; +import type { OfficeParserConfig } from 'officeparser/dist/types.js'; import log from '../../log.js'; import { OCRProcessingOptions, OCRResult } from '../ocr_service.js'; import { FileProcessor } from './file_processor.js'; -// officeparser depends on pdfjs-dist which expects DOMMatrix at the -// top level. Provide a minimal stub so it doesn't crash in Node.js -// environments that lack it (e.g. Alpine Linux). -if (!globalThis.DOMMatrix) { - globalThis.DOMMatrix = class DOMMatrix { - a = 1; b = 0; c = 0; d = 1; e = 0; f = 0; - } as unknown as typeof globalThis.DOMMatrix; -} +type Parser = (buffer: Buffer, config: OfficeParserConfig) => Promise<{ toText(): string }>; -const SUPPORTED_TYPES = [ +const PARSER_BY_MIME: Record = { // Office Open XML - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', // DOCX - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', // XLSX - 'application/vnd.openxmlformats-officedocument.presentationml.presentation', // PPTX + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': parseWord, + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': parseExcel, + 'application/vnd.openxmlformats-officedocument.presentationml.presentation': parsePowerPoint, // OpenDocument - 'application/vnd.oasis.opendocument.text', // ODT - 'application/vnd.oasis.opendocument.spreadsheet', // ODS - 'application/vnd.oasis.opendocument.presentation' // ODP -]; + 'application/vnd.oasis.opendocument.text': parseOpenOffice, + 'application/vnd.oasis.opendocument.spreadsheet': parseOpenOffice, + 'application/vnd.oasis.opendocument.presentation': parseOpenOffice +}; + +const PARSER_CONFIG: OfficeParserConfig = { + outputErrorToConsole: false, + newlineDelimiter: '\n', + ignoreNotes: false, + putNotesAtLast: false +}; /** * Office document processor for extracting text from DOCX/XLSX/PPTX and ODT/ODS/ODP files. + * Uses individual parsers from officeparser v6 to avoid pulling in pdfjs-dist. */ export class OfficeProcessor extends FileProcessor { canProcess(mimeType: string): boolean { - return SUPPORTED_TYPES.includes(mimeType); + return mimeType in PARSER_BY_MIME; } getSupportedMimeTypes(): string[] { - return [...SUPPORTED_TYPES]; + return Object.keys(PARSER_BY_MIME); } async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise { - log.info('Starting Office document text extraction...'); + const mimeType = options.mimeType; + if (!mimeType || !(mimeType in PARSER_BY_MIME)) { + throw new Error(`Unsupported MIME type for Office processor: ${mimeType}`); + } - const text = await officeParser.parseOfficeAsync(buffer, { - outputErrorToConsole: false, - newlineDelimiter: '\n', - ignoreNotes: false, - putNotesAtLast: false - }); + log.info(`Starting Office document text extraction for ${mimeType}...`); - const trimmed = (text || '').trim(); + const parse = PARSER_BY_MIME[mimeType]; + const ast = await parse(buffer, PARSER_CONFIG); + const trimmed = ast.toText().trim(); return { text: trimmed, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 87f74ca7b0..a4e4ff0a0d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -810,8 +810,8 @@ importers: specifier: 1.1.1 version: 1.1.1 officeparser: - specifier: 5.2.0 - version: 5.2.0 + specifier: 6.0.7 + version: 6.0.7(encoding@0.1.13) rand-token: specifier: 1.0.1 version: 1.0.1 @@ -4028,61 +4028,30 @@ packages: resolution: {integrity: sha512-wK+5pLK5XFmgtH3aQ2YVvA3HohS3xqV/OxuVOdNx9Wpnz7VE/fnC+e1A7ln6LFYeck7gOJ/dsZV6OLplOtAJ2w==} engines: {node: '>=18'} - '@napi-rs/canvas-android-arm64@0.1.73': - resolution: {integrity: sha512-s8dMhfYIHVv7gz8BXg3Nb6cFi950Y0xH5R/sotNZzUVvU9EVqHfkqiGJ4UIqu+15UhqguT6mI3Bv1mhpRkmMQw==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [android] - '@napi-rs/canvas-android-arm64@0.1.96': resolution: {integrity: sha512-ew1sPrN3dGdZ3L4FoohPfnjq0f9/Jk7o+wP7HkQZokcXgIUD6FIyICEWGhMYzv53j63wUcPvZeAwgewX58/egg==} engines: {node: '>= 10'} cpu: [arm64] os: [android] - '@napi-rs/canvas-darwin-arm64@0.1.73': - resolution: {integrity: sha512-bLPCq8Yyq1vMdVdIpQAqmgf6VGUknk8e7NdSZXJJFOA9gxkJ1RGcHOwoXo7h0gzhHxSorg71hIxyxtwXpq10Rw==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [darwin] - '@napi-rs/canvas-darwin-arm64@0.1.96': resolution: {integrity: sha512-Q/wOXZ5PzTqpdmA5eUOcegCf4Go/zz3aZ5DlzSeDpOjFmfwMKh8EzLAoweQ+mJVagcHQyzoJhaTEnrO68TNyNg==} engines: {node: '>= 10'} cpu: [arm64] os: [darwin] - '@napi-rs/canvas-darwin-x64@0.1.73': - resolution: {integrity: sha512-GR1CcehDjdNYXN3bj8PIXcXfYLUUOQANjQpM+KNnmpRo7ojsuqPjT7ZVH+6zoG/aqRJWhiSo+ChQMRazZlRU9g==} - engines: {node: '>= 10'} - cpu: [x64] - os: [darwin] - '@napi-rs/canvas-darwin-x64@0.1.96': resolution: {integrity: sha512-UrXiQz28tQEvGM1qvyptewOAfmUrrd5+wvi6Rzjj2VprZI8iZ2KIvBD2lTTG1bVF95AbeDeG7PJA0D9sLKaOFA==} engines: {node: '>= 10'} cpu: [x64] os: [darwin] - '@napi-rs/canvas-linux-arm-gnueabihf@0.1.73': - resolution: {integrity: sha512-cM7F0kBJVFio0+U2iKSW4fWSfYQ8CPg4/DRZodSum/GcIyfB8+UPJSRM1BvvlcWinKLfX1zUYOwonZX9IFRRcw==} - engines: {node: '>= 10'} - cpu: [arm] - os: [linux] - '@napi-rs/canvas-linux-arm-gnueabihf@0.1.96': resolution: {integrity: sha512-I90ODxweD8aEP6XKU/NU+biso95MwCtQ2F46dUvhec1HesFi0tq/tAJkYic/1aBSiO/1kGKmSeD1B0duOHhEHQ==} engines: {node: '>= 10'} cpu: [arm] os: [linux] - '@napi-rs/canvas-linux-arm64-gnu@0.1.73': - resolution: {integrity: sha512-PMWNrMON9uz9klz1B8ZY/RXepQSC5dxxHQTowfw93Tb3fLtWO5oNX2k9utw7OM4ypT9BUZUWJnDQ5bfuXc/EUQ==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [linux] - libc: [glibc] - '@napi-rs/canvas-linux-arm64-gnu@0.1.96': resolution: {integrity: sha512-Dx/0+RFV++w3PcRy+4xNXkghhXjA5d0Mw1bs95emn5Llinp1vihMaA6WJt3oYv2LAHc36+gnrhIBsPhUyI2SGw==} engines: {node: '>= 10'} @@ -4090,13 +4059,6 @@ packages: os: [linux] libc: [glibc] - '@napi-rs/canvas-linux-arm64-musl@0.1.73': - resolution: {integrity: sha512-lX0z2bNmnk1PGZ+0a9OZwI2lPPvWjRYzPqvEitXX7lspyLFrOzh2kcQiLL7bhyODN23QvfriqwYqp5GreSzVvA==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [linux] - libc: [musl] - '@napi-rs/canvas-linux-arm64-musl@0.1.96': resolution: {integrity: sha512-UvOi7fii3IE2KDfEfhh8m+LpzSRvhGK7o1eho99M2M0HTik11k3GX+2qgVx9EtujN3/bhFFS1kSO3+vPMaJ0Mg==} engines: {node: '>= 10'} @@ -4104,13 +4066,6 @@ packages: os: [linux] libc: [musl] - '@napi-rs/canvas-linux-riscv64-gnu@0.1.73': - resolution: {integrity: sha512-QDQgMElwxAoADsSR3UYvdTTQk5XOyD9J5kq15Z8XpGwpZOZsSE0zZ/X1JaOtS2x+HEZL6z1S6MF/1uhZFZb5ig==} - engines: {node: '>= 10'} - cpu: [riscv64] - os: [linux] - libc: [glibc] - '@napi-rs/canvas-linux-riscv64-gnu@0.1.96': resolution: {integrity: sha512-MBSukhGCQ5nRtf9NbFYWOU080yqkZU1PbuH4o1ROvB4CbPl12fchDR35tU83Wz8gWIM9JTn99lBn9DenPIv7Ig==} engines: {node: '>= 10'} @@ -4118,13 +4073,6 @@ packages: os: [linux] libc: [glibc] - '@napi-rs/canvas-linux-x64-gnu@0.1.73': - resolution: {integrity: sha512-wbzLJrTalQrpyrU1YRrO6w6pdr5vcebbJa+Aut5QfTaW9eEmMb1WFG6l1V+cCa5LdHmRr8bsvl0nJDU/IYDsmw==} - engines: {node: '>= 10'} - cpu: [x64] - os: [linux] - libc: [glibc] - '@napi-rs/canvas-linux-x64-gnu@0.1.96': resolution: {integrity: sha512-I/ccu2SstyKiV3HIeVzyBIWfrJo8cN7+MSQZPnabewWV6hfJ2nY7Df2WqOHmobBRUw84uGR6zfQHsUEio/m5Vg==} engines: {node: '>= 10'} @@ -4132,13 +4080,6 @@ packages: os: [linux] libc: [glibc] - '@napi-rs/canvas-linux-x64-musl@0.1.73': - resolution: {integrity: sha512-xbfhYrUufoTAKvsEx2ZUN4jvACabIF0h1F5Ik1Rk4e/kQq6c+Dwa5QF0bGrfLhceLpzHT0pCMGMDeQKQrcUIyA==} - engines: {node: '>= 10'} - cpu: [x64] - os: [linux] - libc: [musl] - '@napi-rs/canvas-linux-x64-musl@0.1.96': resolution: {integrity: sha512-H3uov7qnTl73GDT4h52lAqpJPsl1tIUyNPWJyhQ6gHakohNqqRq3uf80+NEpzcytKGEOENP1wX3yGwZxhjiWEQ==} engines: {node: '>= 10'} @@ -4152,22 +4093,12 @@ packages: cpu: [arm64] os: [win32] - '@napi-rs/canvas-win32-x64-msvc@0.1.73': - resolution: {integrity: sha512-YQmHXBufFBdWqhx+ympeTPkMfs3RNxaOgWm59vyjpsub7Us07BwCcmu1N5kildhO8Fm0syoI2kHnzGkJBLSvsg==} - engines: {node: '>= 10'} - cpu: [x64] - os: [win32] - '@napi-rs/canvas-win32-x64-msvc@0.1.96': resolution: {integrity: sha512-UYGdTltVd+Z8mcIuoqGmAXXUvwH5CLf2M6mIB5B0/JmX5J041jETjqtSYl7gN+aj3k1by/SG6sS0hAwCqyK7zw==} engines: {node: '>= 10'} cpu: [x64] os: [win32] - '@napi-rs/canvas@0.1.73': - resolution: {integrity: sha512-9iwPZrNlCK4rG+vWyDvyvGeYjck9MoP0NVQP6N60gqJNFA1GsN0imG05pzNsqfCvFxUxgiTYlR8ff0HC1HXJiw==} - engines: {node: '>= 10'} - '@napi-rs/canvas@0.1.96': resolution: {integrity: sha512-6NNmNxvoJKeucVjxaaRUt3La2i5jShgiAbaY3G/72s1Vp3U06XPrAIxkAjBxpDcamEn/t+WJ4OOlGmvILo4/Ew==} engines: {node: '>= 10'} @@ -7343,6 +7274,10 @@ packages: engines: {node: '>=10.0.0'} deprecated: this version has critical issues, please update to the latest version + '@xmldom/xmldom@0.8.12': + resolution: {integrity: sha512-9k/gHF6n/pAi/9tqr3m3aqkuiNosYTurLLUtc7xQ9sxB/wm7WPygCv8GYa6mS0fLJEHhqMC1ATYhz++U/lRHqg==} + engines: {node: '>=10.0.0'} + '@xtuc/ieee754@1.2.0': resolution: {integrity: sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==} @@ -12253,9 +12188,6 @@ packages: engines: {node: '>=10.5.0'} deprecated: Use your platform's native DOMException instead - node-ensure@0.0.0: - resolution: {integrity: sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw==} - node-fetch-h2@2.3.0: resolution: {integrity: sha512-ofRW94Ab0T4AOh5Fk8t0h8OBWrmjb0SSB20xh1H8YnPV9EJ+f5AMoYSUQ2zgJ4Iq2HAK0I2l5/Nequ8YzFS3Hg==} engines: {node: 4.x || >=6.0.0} @@ -12484,8 +12416,9 @@ packages: ofetch@1.5.1: resolution: {integrity: sha512-2W4oUZlVaqAPAil6FUg/difl6YhqhUR7x2eZY4bQCko22UXg3hptq9KLQdqFClV+Wu85UX7hNtdGTngi/1BxcA==} - officeparser@5.2.0: - resolution: {integrity: sha512-EGdHj4RgP5FtyTHsqgDz2ZXkV2q2o2Ktwk4ogHpVcRT1+udwb3pRLfmlNO9ZMDZtDhJz5qNIUAs/+ItrUWoHiQ==} + officeparser@6.0.7: + resolution: {integrity: sha512-MkNHyWIfEZRDtB8c0fgJHdb4Ui0I/WztBjlUjlPiEbTO6dIYaJMt+llS5p5Foj13guUZgGxkkM9VwsVRthHNAA==} + engines: {node: '>=18.0.0'} hasBin: true ohash@2.0.11: @@ -12812,10 +12745,6 @@ packages: resolution: {integrity: sha512-XDF38WCH3z5OV/OVa8GKUNtLAyneuzbCisx7QUCF8Q6Nutx0WnJrQe5O+kOtBlLfRNUws98Y58Lblp+NJG5T4Q==} hasBin: true - pdfjs-dist@5.3.93: - resolution: {integrity: sha512-w3fQKVL1oGn8FRyx5JUG5tnbblggDqyx2XzA5brsJ5hSuS+I0NdnJANhmeWKLjotdbPQucLBug5t0MeWr0AAdg==} - engines: {node: '>=20.16.0 || >=22.3.0'} - pdfjs-dist@5.5.207: resolution: {integrity: sha512-WMqqw06w1vUt9ZfT0gOFhMf3wHsWhaCrxGrckGs5Cci6ybDW87IvPaOd2pnBwT6BJuP/CzXDZxjFgmSULLdsdw==} engines: {node: '>=20.19.0 || >=22.13.0 || >=24'} @@ -14862,9 +14791,15 @@ packages: tesseract.js-core@6.0.0: resolution: {integrity: sha512-1Qncm/9oKM7xgrQXZXNB+NRh19qiXGhxlrR8EwFbK5SaUbPZnS5OMtP/ghtqfd23hsr1ZvZbZjeuAGcMxd/ooA==} + tesseract.js-core@7.0.0: + resolution: {integrity: sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==} + tesseract.js@6.0.1: resolution: {integrity: sha512-/sPvMvrCtgxnNRCjbTYbr7BRu0yfWDsMZQ2a/T5aN/L1t8wUQN6tTWv6p6FwzpoEBA0jrN2UD2SX4QQFRdoDbA==} + tesseract.js@7.0.0: + resolution: {integrity: sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==} + text-decoder@1.2.3: resolution: {integrity: sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==} @@ -17171,8 +17106,6 @@ snapshots: '@ckeditor/ckeditor5-utils': 47.6.1 '@ckeditor/ckeditor5-widget': 47.6.1 es-toolkit: 1.39.5 - transitivePeerDependencies: - - supports-color '@ckeditor/ckeditor5-cloud-services@47.6.1': dependencies: @@ -17478,6 +17411,8 @@ snapshots: '@ckeditor/ckeditor5-ui': 47.6.1 '@ckeditor/ckeditor5-utils': 47.6.1 ckeditor5: 47.6.1 + transitivePeerDependencies: + - supports-color '@ckeditor/ckeditor5-export-word@47.6.1': dependencies: @@ -17579,8 +17514,6 @@ snapshots: '@ckeditor/ckeditor5-utils': 47.6.1 '@ckeditor/ckeditor5-widget': 47.6.1 ckeditor5: 47.6.1 - transitivePeerDependencies: - - supports-color '@ckeditor/ckeditor5-html-embed@47.6.1': dependencies: @@ -17590,8 +17523,6 @@ snapshots: '@ckeditor/ckeditor5-utils': 47.6.1 '@ckeditor/ckeditor5-widget': 47.6.1 ckeditor5: 47.6.1 - transitivePeerDependencies: - - supports-color '@ckeditor/ckeditor5-html-support@47.6.1': dependencies: @@ -17636,6 +17567,8 @@ snapshots: '@ckeditor/ckeditor5-ui': 47.6.1 '@ckeditor/ckeditor5-utils': 47.6.1 ckeditor5: 47.6.1 + transitivePeerDependencies: + - supports-color '@ckeditor/ckeditor5-indent@47.6.1': dependencies: @@ -17647,8 +17580,6 @@ snapshots: '@ckeditor/ckeditor5-ui': 47.6.1 '@ckeditor/ckeditor5-utils': 47.6.1 ckeditor5: 47.6.1 - transitivePeerDependencies: - - supports-color '@ckeditor/ckeditor5-inspector@5.0.0': {} @@ -17659,8 +17590,6 @@ snapshots: '@ckeditor/ckeditor5-ui': 47.6.1 '@ckeditor/ckeditor5-utils': 47.6.1 ckeditor5: 47.6.1 - transitivePeerDependencies: - - supports-color '@ckeditor/ckeditor5-line-height@47.6.1': dependencies: @@ -20145,83 +20074,39 @@ snapshots: strict-event-emitter: 0.5.1 optional: true - '@napi-rs/canvas-android-arm64@0.1.73': - optional: true - '@napi-rs/canvas-android-arm64@0.1.96': optional: true - '@napi-rs/canvas-darwin-arm64@0.1.73': - optional: true - '@napi-rs/canvas-darwin-arm64@0.1.96': optional: true - '@napi-rs/canvas-darwin-x64@0.1.73': - optional: true - '@napi-rs/canvas-darwin-x64@0.1.96': optional: true - '@napi-rs/canvas-linux-arm-gnueabihf@0.1.73': - optional: true - '@napi-rs/canvas-linux-arm-gnueabihf@0.1.96': optional: true - '@napi-rs/canvas-linux-arm64-gnu@0.1.73': - optional: true - '@napi-rs/canvas-linux-arm64-gnu@0.1.96': optional: true - '@napi-rs/canvas-linux-arm64-musl@0.1.73': - optional: true - '@napi-rs/canvas-linux-arm64-musl@0.1.96': optional: true - '@napi-rs/canvas-linux-riscv64-gnu@0.1.73': - optional: true - '@napi-rs/canvas-linux-riscv64-gnu@0.1.96': optional: true - '@napi-rs/canvas-linux-x64-gnu@0.1.73': - optional: true - '@napi-rs/canvas-linux-x64-gnu@0.1.96': optional: true - '@napi-rs/canvas-linux-x64-musl@0.1.73': - optional: true - '@napi-rs/canvas-linux-x64-musl@0.1.96': optional: true '@napi-rs/canvas-win32-arm64-msvc@0.1.96': optional: true - '@napi-rs/canvas-win32-x64-msvc@0.1.73': - optional: true - '@napi-rs/canvas-win32-x64-msvc@0.1.96': optional: true - '@napi-rs/canvas@0.1.73': - optionalDependencies: - '@napi-rs/canvas-android-arm64': 0.1.73 - '@napi-rs/canvas-darwin-arm64': 0.1.73 - '@napi-rs/canvas-darwin-x64': 0.1.73 - '@napi-rs/canvas-linux-arm-gnueabihf': 0.1.73 - '@napi-rs/canvas-linux-arm64-gnu': 0.1.73 - '@napi-rs/canvas-linux-arm64-musl': 0.1.73 - '@napi-rs/canvas-linux-riscv64-gnu': 0.1.73 - '@napi-rs/canvas-linux-x64-gnu': 0.1.73 - '@napi-rs/canvas-linux-x64-musl': 0.1.73 - '@napi-rs/canvas-win32-x64-msvc': 0.1.73 - optional: true - '@napi-rs/canvas@0.1.96': optionalDependencies: '@napi-rs/canvas-android-arm64': 0.1.96 @@ -24675,6 +24560,8 @@ snapshots: '@xmldom/xmldom@0.8.10': {} + '@xmldom/xmldom@0.8.12': {} + '@xtuc/ieee754@1.2.0': {} '@xtuc/long@4.2.2': {} @@ -30726,8 +30613,6 @@ snapshots: node-domexception@1.0.0: {} - node-ensure@0.0.0: {} - node-fetch-h2@2.3.0: dependencies: http2-client: 1.3.5 @@ -31022,14 +30907,17 @@ snapshots: node-fetch-native: 1.6.7 ufo: 1.6.1 - officeparser@5.2.0: + officeparser@6.0.7(encoding@0.1.13): dependencies: - '@xmldom/xmldom': 0.8.10 + '@xmldom/xmldom': 0.8.12 concat-stream: 2.0.0 - file-type: 16.5.4 - node-ensure: 0.0.0 - pdfjs-dist: 5.3.93 + file-type: 21.3.4 + pdfjs-dist: 5.5.207 + tesseract.js: 7.0.0(encoding@0.1.13) yauzl: 3.2.1 + transitivePeerDependencies: + - encoding + - supports-color ohash@2.0.11: {} @@ -31392,10 +31280,6 @@ snapshots: ieee754: 1.2.1 resolve-protobuf-schema: 2.1.0 - pdfjs-dist@5.3.93: - optionalDependencies: - '@napi-rs/canvas': 0.1.73 - pdfjs-dist@5.5.207: optionalDependencies: '@napi-rs/canvas': 0.1.96 @@ -33815,6 +33699,8 @@ snapshots: tesseract.js-core@6.0.0: {} + tesseract.js-core@7.0.0: {} + tesseract.js@6.0.1(encoding@0.1.13): dependencies: bmp-js: 0.1.0 @@ -33829,6 +33715,20 @@ snapshots: transitivePeerDependencies: - encoding + tesseract.js@7.0.0(encoding@0.1.13): + dependencies: + bmp-js: 0.1.0 + idb-keyval: 6.2.2 + is-url: 1.2.4 + node-fetch: 2.7.0(encoding@0.1.13) + opencollective-postinstall: 2.0.3 + regenerator-runtime: 0.13.11 + tesseract.js-core: 7.0.0 + wasm-feature-detect: 1.8.0 + zlibjs: 0.3.1 + transitivePeerDependencies: + - encoding + text-decoder@1.2.3: dependencies: b4a: 1.6.7