added image OCR and parsing text from PDF (and OCR of PDF images)

2025-11-07 13:56:11 +01:00 · 2023-01-26 20:32:27 +01:00
parent 63c62df787
commit ad887c4b12
13 changed files with 380 additions and 189 deletions
--- a/src/services/image.js
+++ b/src/services/image.js
@@ -65,24 +65,6 @@ function getImageMimeFromExtension(ext) {
    return `image/${ext === 'svg' ? 'svg+xml' : ext}`;
 }

-function runOcr(note, buffer) {
-    if (!optionService.getOptionBool('ocrImages')) {
-        return;
-    }
-
-    const start = Date.now();
-    const img = new Canvas.Image();
-    img.src = buffer;
-    const canvas = new Canvas.createCanvas(img.width, img.height);
-    const ctx = canvas.getContext('2d');
-    ctx.drawImage(img, 0, 0, img.width, img.height);
-    const plainText = OCRAD(canvas);
-
-    log.info(`OCR of ${buffer.byteLength} image bytes into ${plainText.length} chars of text took ${Date.now() - start}ms`);
-
-    note.saveNoteAttachment('plainText', 'text/plain', plainText);
-}
-
 function updateImage(noteId, uploadBuffer, originalName) {
    log.info(`Updating image ${noteId}: ${originalName}`);