fix(ocr): use correct officeparser v6.1.0 API

v6.1.0 renamed parseOfficeAsync to OfficeParser.parseOffice (static method) and returns an AST object with toText() instead of a plain string. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-07-09 05:33:17 +02:00 · 2026-04-19 12:10:08 +03:00
parent b2bcccb4c7
commit f9baac34cc
1 changed files with 3 additions and 4 deletions
--- a/apps/server/src/services/ocr/processors/office_processor.ts
+++ b/apps/server/src/services/ocr/processors/office_processor.ts
@@ -1,5 +1,4 @@
-import officeparser from 'officeparser';
-import type { OfficeParserConfig } from 'officeparser';
+import { OfficeParser, type OfficeParserConfig } from 'officeparser';

 import log from '../../log.js';
 import { OCRProcessingOptions, OCRResult } from '../ocr_service.js';
@@ -45,8 +44,8 @@ export class OfficeProcessor extends FileProcessor {

        log.info(`Starting Office document text extraction for ${mimeType}...`);

-        const text = await officeparser.parseOfficeAsync(buffer, PARSER_CONFIG);
-        const trimmed = text.trim();
+        const ast = await OfficeParser.parseOffice(buffer, PARSER_CONFIG);
+        const trimmed = ast.toText().trim();

        return {
            text: trimmed,