added image OCR and parsing text from PDF (and OCR of PDF images)

This commit is contained in:
zadam
2023-01-26 20:32:27 +01:00
parent 63c62df787
commit ad887c4b12
13 changed files with 380 additions and 189 deletions

View File

@@ -65,24 +65,6 @@ function getImageMimeFromExtension(ext) {
return `image/${ext === 'svg' ? 'svg+xml' : ext}`;
}
function runOcr(note, buffer) {
if (!optionService.getOptionBool('ocrImages')) {
return;
}
const start = Date.now();
const img = new Canvas.Image();
img.src = buffer;
const canvas = new Canvas.createCanvas(img.width, img.height);
const ctx = canvas.getContext('2d');
ctx.drawImage(img, 0, 0, img.width, img.height);
const plainText = OCRAD(canvas);
log.info(`OCR of ${buffer.byteLength} image bytes into ${plainText.length} chars of text took ${Date.now() - start}ms`);
note.saveNoteAttachment('plainText', 'text/plain', plainText);
}
function updateImage(noteId, uploadBuffer, originalName) {
log.info(`Updating image ${noteId}: ${originalName}`);