mirror of
https://github.com/zadam/trilium.git
synced 2026-05-06 12:37:04 +02:00
chore(deps): update dependency officeparser to v6.1.0 (#9482)
This commit is contained in:
@@ -114,7 +114,7 @@
|
||||
"mime-types": "3.0.2",
|
||||
"multer": "2.1.1",
|
||||
"normalize-strings": "1.1.1",
|
||||
"officeparser": "6.0.7",
|
||||
"officeparser": "6.1.0",
|
||||
"rand-token": "1.0.1",
|
||||
"safe-compare": "1.1.4",
|
||||
"sanitize-filename": "1.6.4",
|
||||
|
||||
@@ -1,25 +1,19 @@
|
||||
import { parseExcel } from 'officeparser/dist/parsers/ExcelParser.js';
|
||||
import { parseOpenOffice } from 'officeparser/dist/parsers/OpenOfficeParser.js';
|
||||
import { parsePowerPoint } from 'officeparser/dist/parsers/PowerPointParser.js';
|
||||
import { parseWord } from 'officeparser/dist/parsers/WordParser.js';
|
||||
import type { OfficeParserConfig } from 'officeparser/dist/types.js';
|
||||
import { OfficeParser, type OfficeParserConfig } from 'officeparser';
|
||||
|
||||
import log from '../../log.js';
|
||||
import { OCRProcessingOptions, OCRResult } from '../ocr_service.js';
|
||||
import { FileProcessor } from './file_processor.js';
|
||||
|
||||
type Parser = (buffer: Buffer, config: OfficeParserConfig) => Promise<{ toText(): string }>;
|
||||
|
||||
const PARSER_BY_MIME: Record<string, Parser> = {
|
||||
const SUPPORTED_MIME_TYPES = new Set([
|
||||
// Office Open XML
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': parseWord,
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': parseExcel,
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation': parsePowerPoint,
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
// OpenDocument
|
||||
'application/vnd.oasis.opendocument.text': parseOpenOffice,
|
||||
'application/vnd.oasis.opendocument.spreadsheet': parseOpenOffice,
|
||||
'application/vnd.oasis.opendocument.presentation': parseOpenOffice
|
||||
};
|
||||
'application/vnd.oasis.opendocument.text',
|
||||
'application/vnd.oasis.opendocument.spreadsheet',
|
||||
'application/vnd.oasis.opendocument.presentation'
|
||||
]);
|
||||
|
||||
const PARSER_CONFIG: OfficeParserConfig = {
|
||||
outputErrorToConsole: false,
|
||||
@@ -30,28 +24,27 @@ const PARSER_CONFIG: OfficeParserConfig = {
|
||||
|
||||
/**
|
||||
* Office document processor for extracting text from DOCX/XLSX/PPTX and ODT/ODS/ODP files.
|
||||
* Uses individual parsers from officeparser v6 to avoid pulling in pdfjs-dist.
|
||||
* Uses officeparser's main API, which auto-detects the format from the buffer's magic bytes.
|
||||
*/
|
||||
export class OfficeProcessor extends FileProcessor {
|
||||
|
||||
canProcess(mimeType: string): boolean {
|
||||
return mimeType in PARSER_BY_MIME;
|
||||
return SUPPORTED_MIME_TYPES.has(mimeType);
|
||||
}
|
||||
|
||||
getSupportedMimeTypes(): string[] {
|
||||
return Object.keys(PARSER_BY_MIME);
|
||||
return [...SUPPORTED_MIME_TYPES];
|
||||
}
|
||||
|
||||
async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise<OCRResult> {
|
||||
const mimeType = options.mimeType;
|
||||
if (!mimeType || !(mimeType in PARSER_BY_MIME)) {
|
||||
if (!mimeType || !SUPPORTED_MIME_TYPES.has(mimeType)) {
|
||||
throw new Error(`Unsupported MIME type for Office processor: ${mimeType}`);
|
||||
}
|
||||
|
||||
log.info(`Starting Office document text extraction for ${mimeType}...`);
|
||||
|
||||
const parse = PARSER_BY_MIME[mimeType];
|
||||
const ast = await parse(buffer, PARSER_CONFIG);
|
||||
const ast = await OfficeParser.parseOffice(buffer, PARSER_CONFIG);
|
||||
const trimmed = ast.toText().trim();
|
||||
|
||||
return {
|
||||
|
||||
42
pnpm-lock.yaml
generated
42
pnpm-lock.yaml
generated
@@ -819,8 +819,8 @@ importers:
|
||||
specifier: 1.1.1
|
||||
version: 1.1.1
|
||||
officeparser:
|
||||
specifier: 6.0.7
|
||||
version: 6.0.7(encoding@0.1.13)
|
||||
specifier: 6.1.0
|
||||
version: 6.1.0(encoding@0.1.13)
|
||||
rand-token:
|
||||
specifier: 1.0.1
|
||||
version: 1.0.1
|
||||
@@ -6680,6 +6680,10 @@ packages:
|
||||
resolution: {integrity: sha512-9k/gHF6n/pAi/9tqr3m3aqkuiNosYTurLLUtc7xQ9sxB/wm7WPygCv8GYa6mS0fLJEHhqMC1ATYhz++U/lRHqg==}
|
||||
engines: {node: '>=10.0.0'}
|
||||
|
||||
'@xmldom/xmldom@0.9.9':
|
||||
resolution: {integrity: sha512-qycIHAucxy/LXAYIjmLmtQ8q9GPnMbnjG1KXhWm9o5sCr6pOYDATkMPiTNa6/v8eELyqOQ2FsEqeoFYmgv/gJg==}
|
||||
engines: {node: '>=14.6'}
|
||||
|
||||
'@xtuc/ieee754@1.2.0':
|
||||
resolution: {integrity: sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==}
|
||||
|
||||
@@ -8827,6 +8831,10 @@ packages:
|
||||
resolution: {integrity: sha512-Ievi/yy8DS3ygGvT47PjSfdFoX+2isQueoYP1cntFW1JLYAuS4GD7NUPGg4zv2iZfV52uDyk5w5Z0TdpRS6Q1g==}
|
||||
engines: {node: '>=20'}
|
||||
|
||||
file-type@22.0.1:
|
||||
resolution: {integrity: sha512-ww5Mhre0EE+jmBvOXTmXAbEMuZE7uX4a3+oRCQFNj8w++g3ev913N6tXQz0XTXbueQ5TWQfm6BdaViEHHn8bhA==}
|
||||
engines: {node: '>=22'}
|
||||
|
||||
file-uri-to-path@1.0.0:
|
||||
resolution: {integrity: sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==}
|
||||
|
||||
@@ -11199,8 +11207,8 @@ packages:
|
||||
ofetch@1.5.1:
|
||||
resolution: {integrity: sha512-2W4oUZlVaqAPAil6FUg/difl6YhqhUR7x2eZY4bQCko22UXg3hptq9KLQdqFClV+Wu85UX7hNtdGTngi/1BxcA==}
|
||||
|
||||
officeparser@6.0.7:
|
||||
resolution: {integrity: sha512-MkNHyWIfEZRDtB8c0fgJHdb4Ui0I/WztBjlUjlPiEbTO6dIYaJMt+llS5p5Foj13guUZgGxkkM9VwsVRthHNAA==}
|
||||
officeparser@6.1.0:
|
||||
resolution: {integrity: sha512-S/dMjUyhbeyDNUjnuGKsmuDx3IoOTcyy6uFzZ6321paaF5NVQVS+Ht8SkQEzEQ85DJ256LnTroMTP2PVKebX1Q==}
|
||||
engines: {node: '>=18.0.0'}
|
||||
hasBin: true
|
||||
|
||||
@@ -13427,6 +13435,10 @@ packages:
|
||||
resolution: {integrity: sha512-ZPtzy0hu4cZjv3z5NW9gfKnNLjoz4y6uv4HlelAjDK7sY/xOkKZv9xK/WQpcsBB3jEybChz9DPC2U/+cusjJVQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
uint8array-extras@1.5.0:
|
||||
resolution: {integrity: sha512-rvKSBiC5zqCCiDZ9kAOszZcDvdAHwwIKJG33Ykj43OKcWsnmcBRL09YTU4nOeHZ8Y2a7l1MgTd08SBe9A8Qj6A==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
ulid@2.4.0:
|
||||
resolution: {integrity: sha512-fIRiVTJNcSRmXKPZtGzFQv9WRrZ3M9eoptl/teFJvjOzmpU+/K/JH6HZ8deBfb5vMEpicJcLn7JmvdknlMq7Zg==}
|
||||
hasBin: true
|
||||
@@ -21603,6 +21615,8 @@ snapshots:
|
||||
|
||||
'@xmldom/xmldom@0.8.12': {}
|
||||
|
||||
'@xmldom/xmldom@0.9.9': {}
|
||||
|
||||
'@xtuc/ieee754@1.2.0': {}
|
||||
|
||||
'@xtuc/long@4.2.2': {}
|
||||
@@ -24297,6 +24311,15 @@ snapshots:
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
file-type@22.0.1:
|
||||
dependencies:
|
||||
'@tokenizer/inflate': 0.4.1
|
||||
strtok3: 10.3.5
|
||||
token-types: 6.1.2
|
||||
uint8array-extras: 1.5.0
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
file-uri-to-path@1.0.0: {}
|
||||
|
||||
file-uri-to-path@2.0.0: {}
|
||||
@@ -27078,14 +27101,13 @@ snapshots:
|
||||
node-fetch-native: 1.6.7
|
||||
ufo: 1.6.1
|
||||
|
||||
officeparser@6.0.7(encoding@0.1.13):
|
||||
officeparser@6.1.0(encoding@0.1.13):
|
||||
dependencies:
|
||||
'@xmldom/xmldom': 0.8.12
|
||||
concat-stream: 2.0.0
|
||||
file-type: 21.3.4
|
||||
'@xmldom/xmldom': 0.9.9
|
||||
fflate: 0.8.2
|
||||
file-type: 22.0.1
|
||||
pdfjs-dist: 5.6.205
|
||||
tesseract.js: 7.0.0(encoding@0.1.13)
|
||||
yauzl: 3.3.0
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
- supports-color
|
||||
@@ -29640,6 +29662,8 @@ snapshots:
|
||||
|
||||
uint8array-extras@1.4.0: {}
|
||||
|
||||
uint8array-extras@1.5.0: {}
|
||||
|
||||
ulid@2.4.0: {}
|
||||
|
||||
ulid@3.0.2: {}
|
||||
|
||||
Reference in New Issue
Block a user