mirror of
https://github.com/zadam/trilium.git
synced 2026-05-07 08:25:47 +02:00
refactor(ocr): rename ocr_text to textRepresentation
This commit is contained in:
@@ -32,7 +32,7 @@ export interface RenderOptions {
|
||||
includeArchivedNotes?: boolean;
|
||||
/** Set of note IDs that have already been seen during rendering to prevent infinite recursion. */
|
||||
seenNoteIds?: Set<string>;
|
||||
showOcrText?: boolean;
|
||||
showTextRepresentation?: boolean;
|
||||
}
|
||||
|
||||
const CODE_MIME_TYPES = new Set(["application/json"]);
|
||||
@@ -181,7 +181,7 @@ async function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery
|
||||
imageContextMenuService.setupContextMenu($img);
|
||||
|
||||
// Add OCR text display for image notes
|
||||
if (entity instanceof FNote && options.showOcrText) {
|
||||
if (entity instanceof FNote && options.showTextRepresentation) {
|
||||
await addOCRTextIfAvailable(entity, $renderedContent);
|
||||
}
|
||||
}
|
||||
@@ -252,7 +252,7 @@ async function renderFile(entity: FNote | FAttachment, type: string, $renderedCo
|
||||
}
|
||||
|
||||
// Add OCR text display for file notes
|
||||
if (entity instanceof FNote && options.showOcrText) {
|
||||
if (entity instanceof FNote && options.showTextRepresentation) {
|
||||
await addOCRTextIfAvailable(entity, $content);
|
||||
}
|
||||
|
||||
|
||||
@@ -107,7 +107,7 @@ CREATE TABLE IF NOT EXISTS "recent_notes"
|
||||
CREATE TABLE IF NOT EXISTS "blobs" (
|
||||
`blobId` TEXT NOT NULL,
|
||||
`content` TEXT NULL DEFAULT NULL,
|
||||
`ocr_text` TEXT DEFAULT NULL,
|
||||
`textRepresentation` TEXT DEFAULT NULL,
|
||||
`ocr_last_processed` TEXT DEFAULT NULL,
|
||||
`dateModified` TEXT NOT NULL,
|
||||
`utcDateModified` TEXT NOT NULL,
|
||||
|
||||
@@ -10,12 +10,12 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
|
||||
return "blobId";
|
||||
}
|
||||
static get hashedProperties() {
|
||||
return ["blobId", "content", "ocr_text"];
|
||||
return ["blobId", "content", "textRepresentation"];
|
||||
}
|
||||
|
||||
content!: string | Buffer;
|
||||
contentLength!: number;
|
||||
ocr_text?: string | null;
|
||||
textRepresentation?: string | null;
|
||||
|
||||
constructor(row: BlobRow) {
|
||||
super();
|
||||
@@ -26,7 +26,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
|
||||
this.blobId = row.blobId;
|
||||
this.content = row.content;
|
||||
this.contentLength = row.contentLength;
|
||||
this.ocr_text = row.ocr_text;
|
||||
this.textRepresentation = row.textRepresentation;
|
||||
this.dateModified = row.dateModified;
|
||||
this.utcDateModified = row.utcDateModified;
|
||||
}
|
||||
@@ -36,7 +36,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
|
||||
blobId: this.blobId,
|
||||
content: this.content || null,
|
||||
contentLength: this.contentLength,
|
||||
ocr_text: this.ocr_text || null,
|
||||
textRepresentation: this.textRepresentation || null,
|
||||
dateModified: this.dateModified,
|
||||
utcDateModified: this.utcDateModified
|
||||
};
|
||||
|
||||
@@ -6,19 +6,19 @@
|
||||
|
||||
// Migrations should be kept in descending order, so the latest migration is first.
|
||||
const MIGRATIONS: (SqlMigration | JsMigration)[] = [
|
||||
// Add OCR text column and last processed timestamp to blobs table
|
||||
// Add text representation column and last processed timestamp to blobs table
|
||||
{
|
||||
version: 236,
|
||||
sql: /*sql*/`\
|
||||
-- Add OCR text column to blobs table
|
||||
ALTER TABLE blobs ADD COLUMN ocr_text TEXT DEFAULT NULL;
|
||||
-- Add text representation column to blobs table
|
||||
ALTER TABLE blobs ADD COLUMN textRepresentation TEXT DEFAULT NULL;
|
||||
|
||||
-- Add OCR last processed timestamp to blobs table
|
||||
ALTER TABLE blobs ADD COLUMN ocr_last_processed TEXT DEFAULT NULL;
|
||||
|
||||
-- Create index for OCR text searches
|
||||
CREATE INDEX IF NOT EXISTS idx_blobs_ocr_text
|
||||
ON blobs (ocr_text);
|
||||
-- Create index for text representation searches
|
||||
CREATE INDEX IF NOT EXISTS idx_blobs_textRepresentation
|
||||
ON blobs (textRepresentation);
|
||||
|
||||
-- Create index for OCR last processed timestamp
|
||||
CREATE INDEX IF NOT EXISTS idx_blobs_ocr_last_processed
|
||||
|
||||
@@ -569,16 +569,16 @@ async function getNoteOCRText(req: Request, res: Response) {
|
||||
|
||||
if (note.blobId) {
|
||||
const result = sql.getRow<{
|
||||
ocr_text: string | null;
|
||||
textRepresentation: string | null;
|
||||
ocr_last_processed: string | null;
|
||||
}>(`
|
||||
SELECT ocr_text, ocr_last_processed
|
||||
SELECT textRepresentation, ocr_last_processed
|
||||
FROM blobs
|
||||
WHERE blobId = ?
|
||||
`, [note.blobId]);
|
||||
|
||||
if (result) {
|
||||
ocrText = result.ocr_text;
|
||||
ocrText = result.textRepresentation;
|
||||
extractedAt = result.ocr_last_processed;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -251,7 +251,7 @@ describe('OCRService', () => {
|
||||
await ocrService.storeOCRResult('blob123', ocrResult);
|
||||
|
||||
expect(mockSql.execute).toHaveBeenCalledWith(
|
||||
expect.stringContaining('UPDATE blobs SET ocr_text = ?'),
|
||||
expect.stringContaining('UPDATE blobs SET textRepresentation = ?'),
|
||||
['Sample text', 'blob123']
|
||||
);
|
||||
});
|
||||
@@ -331,7 +331,7 @@ describe('OCRService', () => {
|
||||
|
||||
it('should return existing OCR result if forceReprocess is false', async () => {
|
||||
const existingResult = {
|
||||
ocr_text: 'Existing text'
|
||||
textRepresentation: 'Existing text'
|
||||
};
|
||||
mockSql.getRow.mockReturnValue(existingResult);
|
||||
|
||||
@@ -348,7 +348,7 @@ describe('OCRService', () => {
|
||||
|
||||
it('should reprocess if forceReprocess is true', async () => {
|
||||
const existingResult = {
|
||||
ocr_text: 'Existing text'
|
||||
textRepresentation: 'Existing text'
|
||||
};
|
||||
mockSql.getRow.mockResolvedValue(existingResult);
|
||||
|
||||
@@ -445,7 +445,7 @@ describe('OCRService', () => {
|
||||
const mockResults = [
|
||||
{
|
||||
blobId: 'blob1',
|
||||
ocr_text: 'Sample search text'
|
||||
textRepresentation: 'Sample search text'
|
||||
}
|
||||
];
|
||||
mockSql.getRows.mockReturnValue(mockResults);
|
||||
@@ -457,7 +457,7 @@ describe('OCRService', () => {
|
||||
text: 'Sample search text'
|
||||
}]);
|
||||
expect(mockSql.getRows).toHaveBeenCalledWith(
|
||||
expect.stringContaining('WHERE ocr_text LIKE ?'),
|
||||
expect.stringContaining('WHERE textRepresentation LIKE ?'),
|
||||
['%search%']
|
||||
);
|
||||
});
|
||||
@@ -851,7 +851,7 @@ describe('OCRService', () => {
|
||||
ocrService.deleteOCRResult('blob123');
|
||||
|
||||
expect(mockSql.execute).toHaveBeenCalledWith(
|
||||
expect.stringContaining('UPDATE blobs SET ocr_text = NULL'),
|
||||
expect.stringContaining('UPDATE blobs SET textRepresentation = NULL'),
|
||||
['blob123']
|
||||
);
|
||||
expect(mockLog.info).toHaveBeenCalledWith('Deleted OCR result for blob blob123');
|
||||
|
||||
@@ -26,7 +26,7 @@ export interface OCRProcessingOptions {
|
||||
|
||||
interface OCRBlobRow {
|
||||
blobId: string;
|
||||
ocr_text: string;
|
||||
textRepresentation: string;
|
||||
ocr_last_processed?: string;
|
||||
}
|
||||
|
||||
@@ -235,7 +235,7 @@ class OCRService {
|
||||
// Store OCR text and timestamp in blobs table
|
||||
sql.execute(`
|
||||
UPDATE blobs SET
|
||||
ocr_text = ?,
|
||||
textRepresentation = ?,
|
||||
ocr_last_processed = ?
|
||||
WHERE blobId = ?
|
||||
`, [
|
||||
@@ -261,14 +261,14 @@ class OCRService {
|
||||
|
||||
try {
|
||||
const row = sql.getRow<{
|
||||
ocr_text: string | null;
|
||||
textRepresentation: string | null;
|
||||
}>(`
|
||||
SELECT ocr_text
|
||||
SELECT textRepresentation
|
||||
FROM blobs
|
||||
WHERE blobId = ?
|
||||
`, [blobId]);
|
||||
|
||||
if (!row || !row.ocr_text) {
|
||||
if (!row || !row.textRepresentation) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -276,7 +276,7 @@ class OCRService {
|
||||
// Note: we lose confidence, language, and extractedAt metadata
|
||||
// but gain simplicity by storing directly in blob
|
||||
return {
|
||||
text: row.ocr_text,
|
||||
text: row.textRepresentation,
|
||||
confidence: 0.95, // Default high confidence for existing OCR
|
||||
extractedAt: new Date().toISOString(),
|
||||
language: 'eng'
|
||||
@@ -293,10 +293,10 @@ class OCRService {
|
||||
searchOCRResults(searchText: string): Array<{ blobId: string; text: string }> {
|
||||
try {
|
||||
const query = `
|
||||
SELECT blobId, ocr_text
|
||||
SELECT blobId, textRepresentation
|
||||
FROM blobs
|
||||
WHERE ocr_text LIKE ?
|
||||
AND ocr_text IS NOT NULL
|
||||
WHERE textRepresentation LIKE ?
|
||||
AND textRepresentation IS NOT NULL
|
||||
`;
|
||||
const params = [`%${searchText}%`];
|
||||
|
||||
@@ -304,7 +304,7 @@ class OCRService {
|
||||
|
||||
return rows.map(row => ({
|
||||
blobId: row.blobId,
|
||||
text: row.ocr_text
|
||||
text: row.textRepresentation
|
||||
}));
|
||||
} catch (error) {
|
||||
log.error(`Failed to search OCR results: ${error}`);
|
||||
@@ -318,7 +318,7 @@ class OCRService {
|
||||
deleteOCRResult(blobId: string): void {
|
||||
try {
|
||||
sql.execute(`
|
||||
UPDATE blobs SET ocr_text = NULL
|
||||
UPDATE blobs SET textRepresentation = NULL
|
||||
WHERE blobId = ?
|
||||
`, [blobId]);
|
||||
|
||||
@@ -346,7 +346,7 @@ class OCRService {
|
||||
}>(`
|
||||
SELECT COUNT(*) as total_processed
|
||||
FROM blobs
|
||||
WHERE ocr_text IS NOT NULL AND ocr_text != ''
|
||||
WHERE textRepresentation IS NOT NULL AND textRepresentation != ''
|
||||
`);
|
||||
|
||||
// Count image notes with OCR
|
||||
@@ -358,7 +358,7 @@ class OCRService {
|
||||
JOIN blobs b ON n.blobId = b.blobId
|
||||
WHERE n.type = 'image'
|
||||
AND n.isDeleted = 0
|
||||
AND b.ocr_text IS NOT NULL AND b.ocr_text != ''
|
||||
AND b.textRepresentation IS NOT NULL AND b.textRepresentation != ''
|
||||
`);
|
||||
|
||||
// Count image attachments with OCR
|
||||
@@ -370,7 +370,7 @@ class OCRService {
|
||||
JOIN blobs b ON a.blobId = b.blobId
|
||||
WHERE a.role = 'image'
|
||||
AND a.isDeleted = 0
|
||||
AND b.ocr_text IS NOT NULL AND b.ocr_text != ''
|
||||
AND b.textRepresentation IS NOT NULL AND b.textRepresentation != ''
|
||||
`);
|
||||
|
||||
return {
|
||||
@@ -591,7 +591,7 @@ class OCRService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Invalidate OCR results for a blob (clear ocr_text and ocr_last_processed)
|
||||
* Invalidate OCR results for a blob (clear textRepresentation and ocr_last_processed)
|
||||
*/
|
||||
invalidateOCRResult(blobId: string): void {
|
||||
if (!blobId) {
|
||||
@@ -601,7 +601,7 @@ class OCRService {
|
||||
try {
|
||||
sql.execute(`
|
||||
UPDATE blobs SET
|
||||
ocr_text = NULL,
|
||||
textRepresentation = NULL,
|
||||
ocr_last_processed = NULL
|
||||
WHERE blobId = ?
|
||||
`, [blobId]);
|
||||
|
||||
@@ -72,23 +72,23 @@ export default class OCRContentExpression extends Expression {
|
||||
|
||||
private searchOCRContent(searchText: string): Array<{
|
||||
blobId: string;
|
||||
ocr_text: string;
|
||||
textRepresentation: string;
|
||||
}> {
|
||||
try {
|
||||
// Search in blobs table for OCR text
|
||||
const query = `
|
||||
SELECT blobId, ocr_text
|
||||
SELECT blobId, textRepresentation
|
||||
FROM blobs
|
||||
WHERE ocr_text LIKE ?
|
||||
AND ocr_text IS NOT NULL
|
||||
AND ocr_text != ''
|
||||
WHERE textRepresentation LIKE ?
|
||||
AND textRepresentation IS NOT NULL
|
||||
AND textRepresentation != ''
|
||||
LIMIT 50
|
||||
`;
|
||||
const params = [`%${searchText}%`];
|
||||
|
||||
return sql.getRows<{
|
||||
blobId: string;
|
||||
ocr_text: string;
|
||||
textRepresentation: string;
|
||||
}>(query, params);
|
||||
} catch (error) {
|
||||
console.error('Error searching OCR content:', error);
|
||||
|
||||
@@ -140,10 +140,10 @@ class SearchResult {
|
||||
|
||||
// Search for OCR results for this note and its attachments
|
||||
const ocrResults = sql.getRows(`
|
||||
SELECT b.ocr_text
|
||||
SELECT b.textRepresentation
|
||||
FROM blobs b
|
||||
WHERE b.ocr_text IS NOT NULL
|
||||
AND b.ocr_text != ''
|
||||
WHERE b.textRepresentation IS NOT NULL
|
||||
AND b.textRepresentation != ''
|
||||
AND (
|
||||
b.blobId = (SELECT blobId FROM notes WHERE noteId = ? AND isDeleted = 0)
|
||||
OR b.blobId IN (
|
||||
@@ -152,9 +152,9 @@ class SearchResult {
|
||||
)
|
||||
`, [this.noteId, this.noteId]);
|
||||
|
||||
for (const ocrResult of ocrResults as Array<{ocr_text: string}>) {
|
||||
for (const ocrResult of ocrResults as Array<{textRepresentation: string}>) {
|
||||
// Add score for OCR text matches
|
||||
this.addScoreForStrings(tokens, ocrResult.ocr_text, factor);
|
||||
this.addScoreForStrings(tokens, ocrResult.textRepresentation, factor);
|
||||
}
|
||||
} catch (error) {
|
||||
// Silently fail if OCR service is not available
|
||||
|
||||
@@ -72,7 +72,7 @@ export interface BlobRow {
|
||||
blobId: string;
|
||||
content: string | Buffer;
|
||||
contentLength: number;
|
||||
ocr_text?: string | null;
|
||||
textRepresentation?: string | null;
|
||||
dateModified: string;
|
||||
utcDateModified: string;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user