refactor(ocr): rename ocr_text to textRepresentation

This commit is contained in:
Elian Doran
2026-04-01 16:14:08 +03:00
parent 9462d6109c
commit 5846df7d02
10 changed files with 51 additions and 51 deletions

View File

@@ -32,7 +32,7 @@ export interface RenderOptions {
includeArchivedNotes?: boolean;
/** Set of note IDs that have already been seen during rendering to prevent infinite recursion. */
seenNoteIds?: Set<string>;
showOcrText?: boolean;
showTextRepresentation?: boolean;
}
const CODE_MIME_TYPES = new Set(["application/json"]);
@@ -181,7 +181,7 @@ async function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery
imageContextMenuService.setupContextMenu($img);
// Add OCR text display for image notes
if (entity instanceof FNote && options.showOcrText) {
if (entity instanceof FNote && options.showTextRepresentation) {
await addOCRTextIfAvailable(entity, $renderedContent);
}
}
@@ -252,7 +252,7 @@ async function renderFile(entity: FNote | FAttachment, type: string, $renderedCo
}
// Add OCR text display for file notes
if (entity instanceof FNote && options.showOcrText) {
if (entity instanceof FNote && options.showTextRepresentation) {
await addOCRTextIfAvailable(entity, $content);
}

View File

@@ -107,7 +107,7 @@ CREATE TABLE IF NOT EXISTS "recent_notes"
CREATE TABLE IF NOT EXISTS "blobs" (
`blobId` TEXT NOT NULL,
`content` TEXT NULL DEFAULT NULL,
`ocr_text` TEXT DEFAULT NULL,
`textRepresentation` TEXT DEFAULT NULL,
`ocr_last_processed` TEXT DEFAULT NULL,
`dateModified` TEXT NOT NULL,
`utcDateModified` TEXT NOT NULL,

View File

@@ -10,12 +10,12 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
return "blobId";
}
static get hashedProperties() {
return ["blobId", "content", "ocr_text"];
return ["blobId", "content", "textRepresentation"];
}
content!: string | Buffer;
contentLength!: number;
ocr_text?: string | null;
textRepresentation?: string | null;
constructor(row: BlobRow) {
super();
@@ -26,7 +26,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
this.blobId = row.blobId;
this.content = row.content;
this.contentLength = row.contentLength;
this.ocr_text = row.ocr_text;
this.textRepresentation = row.textRepresentation;
this.dateModified = row.dateModified;
this.utcDateModified = row.utcDateModified;
}
@@ -36,7 +36,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
blobId: this.blobId,
content: this.content || null,
contentLength: this.contentLength,
ocr_text: this.ocr_text || null,
textRepresentation: this.textRepresentation || null,
dateModified: this.dateModified,
utcDateModified: this.utcDateModified
};

View File

@@ -6,19 +6,19 @@
// Migrations should be kept in descending order, so the latest migration is first.
const MIGRATIONS: (SqlMigration | JsMigration)[] = [
// Add OCR text column and last processed timestamp to blobs table
// Add text representation column and last processed timestamp to blobs table
{
version: 236,
sql: /*sql*/`\
-- Add OCR text column to blobs table
ALTER TABLE blobs ADD COLUMN ocr_text TEXT DEFAULT NULL;
-- Add text representation column to blobs table
ALTER TABLE blobs ADD COLUMN textRepresentation TEXT DEFAULT NULL;
-- Add OCR last processed timestamp to blobs table
ALTER TABLE blobs ADD COLUMN ocr_last_processed TEXT DEFAULT NULL;
-- Create index for OCR text searches
CREATE INDEX IF NOT EXISTS idx_blobs_ocr_text
ON blobs (ocr_text);
-- Create index for text representation searches
CREATE INDEX IF NOT EXISTS idx_blobs_textRepresentation
ON blobs (textRepresentation);
-- Create index for OCR last processed timestamp
CREATE INDEX IF NOT EXISTS idx_blobs_ocr_last_processed

View File

@@ -569,16 +569,16 @@ async function getNoteOCRText(req: Request, res: Response) {
if (note.blobId) {
const result = sql.getRow<{
ocr_text: string | null;
textRepresentation: string | null;
ocr_last_processed: string | null;
}>(`
SELECT ocr_text, ocr_last_processed
SELECT textRepresentation, ocr_last_processed
FROM blobs
WHERE blobId = ?
`, [note.blobId]);
if (result) {
ocrText = result.ocr_text;
ocrText = result.textRepresentation;
extractedAt = result.ocr_last_processed;
}
}

View File

@@ -251,7 +251,7 @@ describe('OCRService', () => {
await ocrService.storeOCRResult('blob123', ocrResult);
expect(mockSql.execute).toHaveBeenCalledWith(
expect.stringContaining('UPDATE blobs SET ocr_text = ?'),
expect.stringContaining('UPDATE blobs SET textRepresentation = ?'),
['Sample text', 'blob123']
);
});
@@ -331,7 +331,7 @@ describe('OCRService', () => {
it('should return existing OCR result if forceReprocess is false', async () => {
const existingResult = {
ocr_text: 'Existing text'
textRepresentation: 'Existing text'
};
mockSql.getRow.mockReturnValue(existingResult);
@@ -348,7 +348,7 @@ describe('OCRService', () => {
it('should reprocess if forceReprocess is true', async () => {
const existingResult = {
ocr_text: 'Existing text'
textRepresentation: 'Existing text'
};
mockSql.getRow.mockResolvedValue(existingResult);
@@ -445,7 +445,7 @@ describe('OCRService', () => {
const mockResults = [
{
blobId: 'blob1',
ocr_text: 'Sample search text'
textRepresentation: 'Sample search text'
}
];
mockSql.getRows.mockReturnValue(mockResults);
@@ -457,7 +457,7 @@ describe('OCRService', () => {
text: 'Sample search text'
}]);
expect(mockSql.getRows).toHaveBeenCalledWith(
expect.stringContaining('WHERE ocr_text LIKE ?'),
expect.stringContaining('WHERE textRepresentation LIKE ?'),
['%search%']
);
});
@@ -851,7 +851,7 @@ describe('OCRService', () => {
ocrService.deleteOCRResult('blob123');
expect(mockSql.execute).toHaveBeenCalledWith(
expect.stringContaining('UPDATE blobs SET ocr_text = NULL'),
expect.stringContaining('UPDATE blobs SET textRepresentation = NULL'),
['blob123']
);
expect(mockLog.info).toHaveBeenCalledWith('Deleted OCR result for blob blob123');

View File

@@ -26,7 +26,7 @@ export interface OCRProcessingOptions {
interface OCRBlobRow {
blobId: string;
ocr_text: string;
textRepresentation: string;
ocr_last_processed?: string;
}
@@ -235,7 +235,7 @@ class OCRService {
// Store OCR text and timestamp in blobs table
sql.execute(`
UPDATE blobs SET
ocr_text = ?,
textRepresentation = ?,
ocr_last_processed = ?
WHERE blobId = ?
`, [
@@ -261,14 +261,14 @@ class OCRService {
try {
const row = sql.getRow<{
ocr_text: string | null;
textRepresentation: string | null;
}>(`
SELECT ocr_text
SELECT textRepresentation
FROM blobs
WHERE blobId = ?
`, [blobId]);
if (!row || !row.ocr_text) {
if (!row || !row.textRepresentation) {
return null;
}
@@ -276,7 +276,7 @@ class OCRService {
// Note: we lose confidence, language, and extractedAt metadata
// but gain simplicity by storing directly in blob
return {
text: row.ocr_text,
text: row.textRepresentation,
confidence: 0.95, // Default high confidence for existing OCR
extractedAt: new Date().toISOString(),
language: 'eng'
@@ -293,10 +293,10 @@ class OCRService {
searchOCRResults(searchText: string): Array<{ blobId: string; text: string }> {
try {
const query = `
SELECT blobId, ocr_text
SELECT blobId, textRepresentation
FROM blobs
WHERE ocr_text LIKE ?
AND ocr_text IS NOT NULL
WHERE textRepresentation LIKE ?
AND textRepresentation IS NOT NULL
`;
const params = [`%${searchText}%`];
@@ -304,7 +304,7 @@ class OCRService {
return rows.map(row => ({
blobId: row.blobId,
text: row.ocr_text
text: row.textRepresentation
}));
} catch (error) {
log.error(`Failed to search OCR results: ${error}`);
@@ -318,7 +318,7 @@ class OCRService {
deleteOCRResult(blobId: string): void {
try {
sql.execute(`
UPDATE blobs SET ocr_text = NULL
UPDATE blobs SET textRepresentation = NULL
WHERE blobId = ?
`, [blobId]);
@@ -346,7 +346,7 @@ class OCRService {
}>(`
SELECT COUNT(*) as total_processed
FROM blobs
WHERE ocr_text IS NOT NULL AND ocr_text != ''
WHERE textRepresentation IS NOT NULL AND textRepresentation != ''
`);
// Count image notes with OCR
@@ -358,7 +358,7 @@ class OCRService {
JOIN blobs b ON n.blobId = b.blobId
WHERE n.type = 'image'
AND n.isDeleted = 0
AND b.ocr_text IS NOT NULL AND b.ocr_text != ''
AND b.textRepresentation IS NOT NULL AND b.textRepresentation != ''
`);
// Count image attachments with OCR
@@ -370,7 +370,7 @@ class OCRService {
JOIN blobs b ON a.blobId = b.blobId
WHERE a.role = 'image'
AND a.isDeleted = 0
AND b.ocr_text IS NOT NULL AND b.ocr_text != ''
AND b.textRepresentation IS NOT NULL AND b.textRepresentation != ''
`);
return {
@@ -591,7 +591,7 @@ class OCRService {
}
/**
* Invalidate OCR results for a blob (clear ocr_text and ocr_last_processed)
* Invalidate OCR results for a blob (clear textRepresentation and ocr_last_processed)
*/
invalidateOCRResult(blobId: string): void {
if (!blobId) {
@@ -601,7 +601,7 @@ class OCRService {
try {
sql.execute(`
UPDATE blobs SET
ocr_text = NULL,
textRepresentation = NULL,
ocr_last_processed = NULL
WHERE blobId = ?
`, [blobId]);

View File

@@ -72,23 +72,23 @@ export default class OCRContentExpression extends Expression {
private searchOCRContent(searchText: string): Array<{
blobId: string;
ocr_text: string;
textRepresentation: string;
}> {
try {
// Search in blobs table for OCR text
const query = `
SELECT blobId, ocr_text
SELECT blobId, textRepresentation
FROM blobs
WHERE ocr_text LIKE ?
AND ocr_text IS NOT NULL
AND ocr_text != ''
WHERE textRepresentation LIKE ?
AND textRepresentation IS NOT NULL
AND textRepresentation != ''
LIMIT 50
`;
const params = [`%${searchText}%`];
return sql.getRows<{
blobId: string;
ocr_text: string;
textRepresentation: string;
}>(query, params);
} catch (error) {
console.error('Error searching OCR content:', error);

View File

@@ -140,10 +140,10 @@ class SearchResult {
// Search for OCR results for this note and its attachments
const ocrResults = sql.getRows(`
SELECT b.ocr_text
SELECT b.textRepresentation
FROM blobs b
WHERE b.ocr_text IS NOT NULL
AND b.ocr_text != ''
WHERE b.textRepresentation IS NOT NULL
AND b.textRepresentation != ''
AND (
b.blobId = (SELECT blobId FROM notes WHERE noteId = ? AND isDeleted = 0)
OR b.blobId IN (
@@ -152,9 +152,9 @@ class SearchResult {
)
`, [this.noteId, this.noteId]);
for (const ocrResult of ocrResults as Array<{ocr_text: string}>) {
for (const ocrResult of ocrResults as Array<{textRepresentation: string}>) {
// Add score for OCR text matches
this.addScoreForStrings(tokens, ocrResult.ocr_text, factor);
this.addScoreForStrings(tokens, ocrResult.textRepresentation, factor);
}
} catch (error) {
// Silently fail if OCR service is not available

View File

@@ -72,7 +72,7 @@ export interface BlobRow {
blobId: string;
content: string | Buffer;
contentLength: number;
ocr_text?: string | null;
textRepresentation?: string | null;
dateModified: string;
utcDateModified: string;
}