mirror of
https://github.com/zadam/trilium.git
synced 2026-06-27 19:09:37 +02:00
317 lines
8.5 KiB
TypeScript
317 lines
8.5 KiB
TypeScript
import type { Request } from "express";
|
|
|
|
import becca from "../../becca/becca.js";
|
|
import ocrService from "../../services/ocr/ocr_service.js";
|
|
import sql from "../../services/sql.js";
|
|
|
|
/**
|
|
* @swagger
|
|
* /api/ocr/process-note/{noteId}:
|
|
* post:
|
|
* summary: Process OCR for a specific note
|
|
* operationId: ocr-process-note
|
|
* parameters:
|
|
* - name: noteId
|
|
* in: path
|
|
* required: true
|
|
* schema:
|
|
* type: string
|
|
* description: ID of the note to process
|
|
* requestBody:
|
|
* required: false
|
|
* content:
|
|
* application/json:
|
|
* schema:
|
|
* type: object
|
|
* properties:
|
|
* language:
|
|
* type: string
|
|
* description: OCR language code (e.g. 'eng', 'fra', 'deu')
|
|
* default: 'eng'
|
|
* forceReprocess:
|
|
* type: boolean
|
|
* description: Force reprocessing even if OCR already exists
|
|
* default: false
|
|
* responses:
|
|
* '200':
|
|
* description: OCR processing completed successfully
|
|
* '400':
|
|
* description: Bad request - OCR disabled or unsupported file type
|
|
* '404':
|
|
* description: Note not found
|
|
* '500':
|
|
* description: Internal server error
|
|
* security:
|
|
* - session: []
|
|
* tags: ["ocr"]
|
|
*/
|
|
async function processNoteOCR(req: Request<{ noteId: string }>) {
|
|
const { noteId } = req.params;
|
|
const { language = 'eng', forceReprocess = false } = req.body || {};
|
|
|
|
const note = becca.getNote(noteId);
|
|
if (!note) {
|
|
return [404, { success: false, message: 'Note not found' }];
|
|
}
|
|
|
|
const result = await ocrService.processNoteOCR(noteId, { language, forceReprocess });
|
|
if (!result) {
|
|
return [400, { success: false, message: 'Note is not an image or has unsupported format' }];
|
|
}
|
|
|
|
return { success: true, result };
|
|
}
|
|
|
|
/**
|
|
* @swagger
|
|
* /api/ocr/process-attachment/{attachmentId}:
|
|
* post:
|
|
* summary: Process OCR for a specific attachment
|
|
* operationId: ocr-process-attachment
|
|
* parameters:
|
|
* - name: attachmentId
|
|
* in: path
|
|
* required: true
|
|
* schema:
|
|
* type: string
|
|
* description: ID of the attachment to process
|
|
* requestBody:
|
|
* required: false
|
|
* content:
|
|
* application/json:
|
|
* schema:
|
|
* type: object
|
|
* properties:
|
|
* language:
|
|
* type: string
|
|
* description: OCR language code (e.g. 'eng', 'fra', 'deu')
|
|
* default: 'eng'
|
|
* forceReprocess:
|
|
* type: boolean
|
|
* description: Force reprocessing even if OCR already exists
|
|
* default: false
|
|
* responses:
|
|
* '200':
|
|
* description: OCR processing completed successfully
|
|
* '400':
|
|
* description: Bad request - OCR disabled or unsupported file type
|
|
* '404':
|
|
* description: Attachment not found
|
|
* '500':
|
|
* description: Internal server error
|
|
* security:
|
|
* - session: []
|
|
* tags: ["ocr"]
|
|
*/
|
|
async function processAttachmentOCR(req: Request<{ attachmentId: string }>) {
|
|
const { attachmentId } = req.params;
|
|
const { language = 'eng', forceReprocess = false } = req.body || {};
|
|
|
|
const attachment = becca.getAttachment(attachmentId);
|
|
if (!attachment) {
|
|
return [404, { success: false, message: 'Attachment not found' }];
|
|
}
|
|
|
|
const result = await ocrService.processAttachmentOCR(attachmentId, { language, forceReprocess });
|
|
if (!result) {
|
|
return [400, { success: false, message: 'Attachment is not an image or has unsupported format' }];
|
|
}
|
|
|
|
return { success: true, result };
|
|
}
|
|
|
|
/**
|
|
* @swagger
|
|
* /api/ocr/search:
|
|
* get:
|
|
* summary: Search for text in OCR results
|
|
* operationId: ocr-search
|
|
* parameters:
|
|
* - name: q
|
|
* in: query
|
|
* required: true
|
|
* schema:
|
|
* type: string
|
|
* description: Search query text
|
|
* responses:
|
|
* '200':
|
|
* description: Search results
|
|
* '400':
|
|
* description: Bad request - missing search query
|
|
* '500':
|
|
* description: Internal server error
|
|
* security:
|
|
* - session: []
|
|
* tags: ["ocr"]
|
|
*/
|
|
async function searchOCR(req: Request) {
|
|
const { q: searchText } = req.query;
|
|
|
|
if (!searchText || typeof searchText !== 'string') {
|
|
return [400, { success: false, message: 'Search query is required' }];
|
|
}
|
|
|
|
const results = ocrService.searchOCRResults(searchText);
|
|
return { success: true, results };
|
|
}
|
|
|
|
/**
|
|
* @swagger
|
|
* /api/ocr/batch-process:
|
|
* post:
|
|
* summary: Process OCR for all images without existing OCR results
|
|
* operationId: ocr-batch-process
|
|
* responses:
|
|
* '200':
|
|
* description: Batch processing initiated successfully
|
|
* '400':
|
|
* description: Bad request - OCR disabled or already processing
|
|
* '500':
|
|
* description: Internal server error
|
|
* security:
|
|
* - session: []
|
|
* tags: ["ocr"]
|
|
*/
|
|
async function batchProcessOCR() {
|
|
const result = await ocrService.startBatchProcessing();
|
|
if (!result.success) {
|
|
return [400, result];
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* @swagger
|
|
* /api/ocr/batch-progress:
|
|
* get:
|
|
* summary: Get batch OCR processing progress
|
|
* operationId: ocr-batch-progress
|
|
* responses:
|
|
* '200':
|
|
* description: Batch processing progress information
|
|
* '500':
|
|
* description: Internal server error
|
|
* security:
|
|
* - session: []
|
|
* tags: ["ocr"]
|
|
*/
|
|
async function getBatchProgress() {
|
|
return ocrService.getBatchProgress();
|
|
}
|
|
|
|
/**
|
|
* @swagger
|
|
* /api/ocr/stats:
|
|
* get:
|
|
* summary: Get OCR processing statistics
|
|
* operationId: ocr-get-stats
|
|
* responses:
|
|
* '200':
|
|
* description: OCR statistics
|
|
* '500':
|
|
* description: Internal server error
|
|
* security:
|
|
* - session: []
|
|
* tags: ["ocr"]
|
|
*/
|
|
async function getOCRStats() {
|
|
return { success: true, stats: ocrService.getOCRStats() };
|
|
}
|
|
|
|
/**
|
|
* @swagger
|
|
* /api/ocr/delete/{blobId}:
|
|
* delete:
|
|
* summary: Delete OCR results for a specific blob
|
|
* operationId: ocr-delete-results
|
|
* parameters:
|
|
* - name: blobId
|
|
* in: path
|
|
* required: true
|
|
* schema:
|
|
* type: string
|
|
* description: ID of the blob
|
|
* responses:
|
|
* '200':
|
|
* description: OCR results deleted successfully
|
|
* '400':
|
|
* description: Bad request - invalid parameters
|
|
* '500':
|
|
* description: Internal server error
|
|
* security:
|
|
* - session: []
|
|
* tags: ["ocr"]
|
|
*/
|
|
async function deleteOCRResults(req: Request<{ blobId: string }>) {
|
|
const { blobId } = req.params;
|
|
|
|
ocrService.deleteOCRResult(blobId);
|
|
return { success: true, message: `OCR results deleted for blob ${blobId}` };
|
|
}
|
|
|
|
/**
|
|
* @swagger
|
|
* /api/ocr/notes/{noteId}/text:
|
|
* get:
|
|
* summary: Get OCR text for a specific note
|
|
* operationId: ocr-get-note-text
|
|
* parameters:
|
|
* - name: noteId
|
|
* in: path
|
|
* required: true
|
|
* schema:
|
|
* type: string
|
|
* description: Note ID to get OCR text for
|
|
* responses:
|
|
* 200:
|
|
* description: OCR text retrieved successfully
|
|
* 404:
|
|
* description: Note not found
|
|
* tags: ["ocr"]
|
|
*/
|
|
async function getNoteOCRText(req: Request<{ noteId: string }>) {
|
|
const { noteId } = req.params;
|
|
|
|
const note = becca.getNote(noteId);
|
|
if (!note) {
|
|
return [404, { success: false, message: 'Note not found' }];
|
|
}
|
|
|
|
let ocrText: string | null = null;
|
|
let extractedAt: string | null = null;
|
|
|
|
if (note.blobId) {
|
|
const result = sql.getRow<{
|
|
textRepresentation: string | null;
|
|
textExtractionLastProcessed: string | null;
|
|
}>(`
|
|
SELECT textRepresentation, textExtractionLastProcessed
|
|
FROM blobs
|
|
WHERE blobId = ?
|
|
`, [note.blobId]);
|
|
|
|
if (result) {
|
|
ocrText = result.textRepresentation;
|
|
extractedAt = result.textExtractionLastProcessed;
|
|
}
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
text: ocrText || '',
|
|
hasOcr: !!ocrText,
|
|
extractedAt
|
|
};
|
|
}
|
|
|
|
export default {
|
|
processNoteOCR,
|
|
processAttachmentOCR,
|
|
searchOCR,
|
|
batchProcessOCR,
|
|
getBatchProgress,
|
|
getOCRStats,
|
|
deleteOCRResults,
|
|
getNoteOCRText
|
|
};
|