added image OCR and parsing text from PDF (and OCR of PDF images)

This commit is contained in:
zadam
2023-01-26 20:32:27 +01:00
parent 63c62df787
commit ad887c4b12
13 changed files with 380 additions and 189 deletions

12
src-build/fix_pdfjs.js Normal file
View File

@@ -0,0 +1,12 @@
const fs = require("fs");
const PACKAGE_JSON_PATH = './node_modules/pdfjs-dist/package.json';
const packageJson = JSON.parse(
fs.readFileSync(PACKAGE_JSON_PATH).toString()
);
// non-legacy build doesn't work on node 16 at least
packageJson.main = "legacy/build/pdf.js";
fs.writeFileSync(PACKAGE_JSON_PATH, JSON.stringify(packageJson, null, 2));