mirror of
https://github.com/zadam/trilium.git
synced 2025-11-08 14:25:51 +01:00
ocr wip
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
const Canvas = require("canvas");
|
||||
const OCRAD = require("ocrad.js");
|
||||
const log = require("./log.js");
|
||||
const optionService = require("./options.js");
|
||||
const log = require("./log");
|
||||
const optionService = require("./options");
|
||||
const cls = require("./cls");
|
||||
|
||||
function ocrFromByteArray(img) {
|
||||
// byte array contains raw uncompressed pixel data
|
||||
@@ -85,7 +86,7 @@ async function extractTextFromPdf(note, buffer) {
|
||||
content.items.forEach(({str}) => strings.push(str));
|
||||
|
||||
try {
|
||||
if (optionService.getOptionBool('ocrImages')) {
|
||||
if (optionService.getOptionBool('ocrImages') && !cls.isOcrDisabled()) {
|
||||
await ocrTextFromPdfImages(pdfjsLib, page, strings);
|
||||
}
|
||||
}
|
||||
@@ -117,13 +118,37 @@ async function ocrTextFromBuffer(buffer) {
|
||||
const canvas = new Canvas.createCanvas(img.width, img.height);
|
||||
const ctx = canvas.getContext('2d');
|
||||
ctx.drawImage(img, 0, 0, img.width, img.height);
|
||||
|
||||
const plainText = OCRAD(canvas);
|
||||
|
||||
log.info(`OCR of ${buffer.byteLength} image bytes into ${plainText.length} chars of text took ${Date.now() - start}ms`);
|
||||
return plainText;
|
||||
}
|
||||
|
||||
async function runOcr(note, buffer) {
|
||||
console.log("buffer length", buffer.length);
|
||||
|
||||
if (!note.isImage()
|
||||
|| !optionService.getOptionBool('ocrImages')
|
||||
|| cls.isOcrDisabled()
|
||||
|| buffer.length === 0
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const plainText = await ocrTextFromBuffer(buffer);
|
||||
|
||||
console.log("OCR", plainText);
|
||||
|
||||
note.saveNoteAncillary('plainText', 'text/plain', plainText);
|
||||
}
|
||||
catch (e) {
|
||||
log.error(`OCR on note '${note.noteId}' failed with error '${e.message}', stack ${e.stack}`);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
ocrTextFromBuffer,
|
||||
runOcr,
|
||||
extractTextFromPdf
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user