mirror of
https://github.com/zadam/trilium.git
synced 2026-05-07 12:25:37 +02:00
feat(ocr): add OCR (#5834)
This commit is contained in:
@@ -302,6 +302,7 @@ export type CommandMappings = {
|
||||
ninthTab: CommandData;
|
||||
lastTab: CommandData;
|
||||
showNoteSource: CommandData;
|
||||
showNoteOCRText: CommandData;
|
||||
showSQLConsole: CommandData;
|
||||
showBackendLog: CommandData;
|
||||
showCheatsheet: CommandData;
|
||||
|
||||
@@ -148,6 +148,19 @@ export default class RootCommandExecutor extends Component {
|
||||
}
|
||||
}
|
||||
|
||||
async showNoteOCRTextCommand() {
|
||||
const notePath = appContext.tabManager.getActiveContextNotePath();
|
||||
|
||||
if (notePath) {
|
||||
await appContext.tabManager.openTabWithNoteWithHoisting(notePath, {
|
||||
activate: true,
|
||||
viewScope: {
|
||||
viewMode: "ocr"
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async showAttachmentsCommand() {
|
||||
const notePath = appContext.tabManager.getActiveContextNotePath();
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import "./content_renderer.css";
|
||||
|
||||
import { normalizeMimeTypeForCKEditor } from "@triliumnext/commons";
|
||||
import { normalizeMimeTypeForCKEditor, type TextRepresentationResponse } from "@triliumnext/commons";
|
||||
import { h, render } from "preact";
|
||||
import WheelZoom from 'vanilla-js-wheel-zoom';
|
||||
|
||||
@@ -15,6 +15,7 @@ import openService from "./open.js";
|
||||
import protectedSessionService from "./protected_session.js";
|
||||
import protectedSessionHolder from "./protected_session_holder.js";
|
||||
import renderService from "./render.js";
|
||||
import server from "./server.js";
|
||||
import { applySingleBlockSyntaxHighlight } from "./syntax_highlight.js";
|
||||
import utils, { getErrorMessage } from "./utils.js";
|
||||
|
||||
@@ -32,6 +33,7 @@ export interface RenderOptions {
|
||||
includeArchivedNotes?: boolean;
|
||||
/** Set of note IDs that have already been seen during rendering to prevent infinite recursion. */
|
||||
seenNoteIds?: Set<string>;
|
||||
showTextRepresentation?: boolean;
|
||||
}
|
||||
|
||||
const CODE_MIME_TYPES = new Set(["application/json"]);
|
||||
@@ -55,9 +57,9 @@ export async function getRenderedContent(this: {} | { ctx: string }, entity: FNo
|
||||
} else if (type === "code") {
|
||||
await renderCode(entity, $renderedContent);
|
||||
} else if (["image", "canvas", "mindMap", "spreadsheet"].includes(type)) {
|
||||
renderImage(entity, $renderedContent, options);
|
||||
await renderImage(entity, $renderedContent, options);
|
||||
} else if (!options.tooltip && ["file", "pdf", "audio", "video"].includes(type)) {
|
||||
await renderFile(entity, type, $renderedContent);
|
||||
await renderFile(entity, type, $renderedContent, options);
|
||||
} else if (type === "mermaid") {
|
||||
await renderMermaid(entity, $renderedContent);
|
||||
} else if (type === "render" && entity instanceof FNote) {
|
||||
@@ -138,7 +140,7 @@ async function renderCode(note: FNote | FAttachment, $renderedContent: JQuery<HT
|
||||
await applySingleBlockSyntaxHighlight($codeBlock, normalizeMimeTypeForCKEditor(note.mime));
|
||||
}
|
||||
|
||||
function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLElement>, options: RenderOptions = {}) {
|
||||
async function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLElement>, options: RenderOptions = {}) {
|
||||
const encodedTitle = encodeURIComponent(entity.title);
|
||||
|
||||
let url;
|
||||
@@ -146,13 +148,14 @@ function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLE
|
||||
if (entity instanceof FNote) {
|
||||
url = `api/images/${entity.noteId}/${encodedTitle}?${Math.random()}`;
|
||||
} else if (entity instanceof FAttachment) {
|
||||
url = `api/attachments/${entity.attachmentId}/image/${encodedTitle}?${entity.utcDateModified}">`;
|
||||
url = `api/attachments/${entity.attachmentId}/image/${encodedTitle}?${entity.utcDateModified}`;
|
||||
}
|
||||
|
||||
$renderedContent // styles needed for the zoom to work well
|
||||
.css("display", "flex")
|
||||
.css("align-items", "center")
|
||||
.css("justify-content", "center");
|
||||
.css("justify-content", "center")
|
||||
.css("flex-direction", "column"); // OCR text is displayed below the image.
|
||||
|
||||
const $img = $("<img>")
|
||||
.attr("src", url || "")
|
||||
@@ -178,9 +181,35 @@ function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLE
|
||||
}
|
||||
|
||||
imageContextMenuService.setupContextMenu($img);
|
||||
|
||||
if (entity instanceof FNote && options.showTextRepresentation) {
|
||||
await addOCRTextIfAvailable(entity, $renderedContent);
|
||||
}
|
||||
}
|
||||
|
||||
async function renderFile(entity: FNote | FAttachment, type: string, $renderedContent: JQuery<HTMLElement>) {
|
||||
async function addOCRTextIfAvailable(note: FNote, $content: JQuery<HTMLElement>) {
|
||||
try {
|
||||
const data = await server.get<TextRepresentationResponse>(`ocr/notes/${note.noteId}/text`);
|
||||
if (data.success && data.hasOcr && data.text) {
|
||||
const $ocrSection = $(`
|
||||
<div class="ocr-text-section">
|
||||
<div class="ocr-header">
|
||||
<span class="bx bx-text"></span> ${t("ocr.extracted_text")}
|
||||
</div>
|
||||
<div class="ocr-content"></div>
|
||||
</div>
|
||||
`);
|
||||
|
||||
$ocrSection.find('.ocr-content').text(data.text);
|
||||
$content.append($ocrSection);
|
||||
}
|
||||
} catch (error) {
|
||||
// Silently fail if OCR API is not available
|
||||
console.debug('Failed to fetch OCR text:', error);
|
||||
}
|
||||
}
|
||||
|
||||
async function renderFile(entity: FNote | FAttachment, type: string, $renderedContent: JQuery<HTMLElement>, options: RenderOptions = {}) {
|
||||
let entityType, entityId;
|
||||
|
||||
if (entity instanceof FNote) {
|
||||
@@ -220,6 +249,10 @@ async function renderFile(entity: FNote | FAttachment, type: string, $renderedCo
|
||||
$content.append($videoPreview);
|
||||
}
|
||||
|
||||
if (entity instanceof FNote && options.showTextRepresentation) {
|
||||
await addOCRTextIfAvailable(entity, $content);
|
||||
}
|
||||
|
||||
if (entityType === "notes" && "noteId" in entity) {
|
||||
// TODO: we should make this available also for attachments, but there's a problem with "Open externally" support
|
||||
// in attachment list
|
||||
|
||||
@@ -28,7 +28,7 @@ async function getLinkIcon(noteId: string, viewMode: ViewMode | undefined) {
|
||||
return icon;
|
||||
}
|
||||
|
||||
export type ViewMode = "default" | "source" | "attachments" | "contextual-help" | "note-map";
|
||||
export type ViewMode = "default" | "source" | "attachments" | "contextual-help" | "note-map" | "ocr";
|
||||
|
||||
export interface ViewScope {
|
||||
/**
|
||||
|
||||
@@ -270,7 +270,11 @@ function ajax(url: string, method: string, data: unknown, headers: Headers, opts
|
||||
} else if (opts.silentInternalServerError && jqXhr.status === 500) {
|
||||
// report nothing
|
||||
} else {
|
||||
await reportError(method, url, jqXhr.status, jqXhr.responseText);
|
||||
try {
|
||||
await reportError(method, url, jqXhr.status, jqXhr.responseText);
|
||||
} catch {
|
||||
// reportError may throw (e.g. ValidationError); ensure rej() is still called below.
|
||||
}
|
||||
}
|
||||
|
||||
rej(jqXhr.responseText);
|
||||
|
||||
@@ -2641,3 +2641,26 @@ iframe.print-iframe {
|
||||
min-height: 50px;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.ocr-text-section {
|
||||
padding: 10px;
|
||||
background: var(--accented-background-color);
|
||||
border-left: 3px solid var(--main-border-color);
|
||||
text-align: left;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.ocr-header {
|
||||
font-weight: bold;
|
||||
margin-bottom: 8px;
|
||||
font-size: 0.9em;
|
||||
color: var(--muted-text-color);
|
||||
}
|
||||
|
||||
.ocr-content {
|
||||
max-height: 150px;
|
||||
overflow-y: auto;
|
||||
font-size: 0.9em;
|
||||
line-height: 1.4;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
@@ -691,6 +691,7 @@
|
||||
"search_in_note": "Search in note",
|
||||
"note_source": "Note source",
|
||||
"note_attachments": "Note attachments",
|
||||
"view_ocr_text": "View OCR text",
|
||||
"open_note_externally": "Open note externally",
|
||||
"open_note_externally_title": "File will be open in an external application and watched for changes. You'll then be able to upload the modified version back to Trilium.",
|
||||
"open_note_custom": "Open note custom",
|
||||
@@ -1254,12 +1255,28 @@
|
||||
},
|
||||
"images": {
|
||||
"images_section_title": "Images",
|
||||
"download_images_automatically": "Download images automatically for offline use.",
|
||||
"download_images_description": "Pasted HTML can contain references to online images, Trilium will find those references and download the images so that they are available offline.",
|
||||
"enable_image_compression": "Enable image compression",
|
||||
"max_image_dimensions": "Max width / height of an image (image will be resized if it exceeds this setting).",
|
||||
"download_images_automatically": "Download images automatically",
|
||||
"download_images_description": "Download referenced online images from pasted HTML so they are available offline.",
|
||||
"enable_image_compression": "Image compression",
|
||||
"enable_image_compression_description": "Compress and resize images when they are uploaded or pasted.",
|
||||
"max_image_dimensions": "Max image dimensions",
|
||||
"max_image_dimensions_description": "Images exceeding this size will be resized automatically.",
|
||||
"max_image_dimensions_unit": "pixels",
|
||||
"jpeg_quality_description": "JPEG quality (10 - worst quality, 100 - best quality, 50 - 85 is recommended)"
|
||||
"jpeg_quality": "JPEG quality",
|
||||
"jpeg_quality_description": "Recommended range is 50–85. Lower values reduce file size, higher values preserve detail.",
|
||||
"ocr_section_title": "Text Extraction (OCR)",
|
||||
"ocr_related_content_languages": "Content languages (used for text extraction)",
|
||||
"ocr_auto_process": "Auto-process new files",
|
||||
"ocr_auto_process_description": "Automatically extract text from newly uploaded or pasted files.",
|
||||
"ocr_min_confidence": "Minimum confidence",
|
||||
"ocr_confidence_description": "Only extract text above this confidence threshold. Lower values include more text but may be less accurate.",
|
||||
"batch_ocr_title": "Process Existing Files",
|
||||
"batch_ocr_description": "Extract text from all existing images, PDFs, and Office documents in your notes. This may take some time depending on the number of files.",
|
||||
"batch_ocr_start": "Start Batch Processing",
|
||||
"batch_ocr_starting": "Starting batch processing...",
|
||||
"batch_ocr_progress": "Processing {{processed}} of {{total}} files...",
|
||||
"batch_ocr_completed": "Batch processing completed! Processed {{processed}} files.",
|
||||
"batch_ocr_error": "Error during batch processing: {{error}}"
|
||||
},
|
||||
"attachment_erasure_timeout": {
|
||||
"attachment_erasure_timeout": "Attachment Erasure Timeout",
|
||||
@@ -1967,7 +1984,7 @@
|
||||
},
|
||||
"content_language": {
|
||||
"title": "Content languages",
|
||||
"description": "Select one or more languages that should appear in the language selection in the Basic Properties section of a read-only or editable text note. This will allow features such as spell-checking or right-to-left support."
|
||||
"description": "Select one or more languages that should appear in the language selection in the Basic Properties section of a read-only or editable text note. This will allow features such as spell-checking, right-to-left support and text extraction (OCR)."
|
||||
},
|
||||
"switch_layout_button": {
|
||||
"title_vertical": "Move editing pane to the bottom",
|
||||
@@ -2067,6 +2084,19 @@
|
||||
"calendar_view": {
|
||||
"delete_note": "Delete note..."
|
||||
},
|
||||
"ocr": {
|
||||
"extracted_text": "Extracted Text (OCR)",
|
||||
"extracted_text_title": "Extracted Text (OCR)",
|
||||
"loading_text": "Loading OCR text...",
|
||||
"no_text_available": "No OCR text available",
|
||||
"no_text_explanation": "This note has not been processed for OCR text extraction or no text was found.",
|
||||
"failed_to_load": "Failed to load OCR text",
|
||||
"process_now": "Process OCR",
|
||||
"processing": "Processing...",
|
||||
"processing_started": "OCR processing has been started. Please wait a moment and refresh.",
|
||||
"processing_failed": "Failed to start OCR processing",
|
||||
"view_extracted_text": "View extracted text (OCR)"
|
||||
},
|
||||
"command_palette": {
|
||||
"tree-action-name": "Tree: {{name}}",
|
||||
"export_note_title": "Export Note",
|
||||
|
||||
@@ -336,6 +336,8 @@ export async function getExtendedWidgetType(note: FNote | null | undefined, note
|
||||
|
||||
if (noteContext?.viewScope?.viewMode === "source") {
|
||||
resultingType = "readOnlyCode";
|
||||
} else if (noteContext.viewScope?.viewMode === "ocr") {
|
||||
resultingType = "readOnlyOCRText";
|
||||
} else if (noteContext.viewScope?.viewMode === "attachments") {
|
||||
resultingType = noteContext.viewScope.attachmentId ? "attachmentDetail" : "attachmentList";
|
||||
} else if (noteContext.viewScope?.viewMode === "note-map") {
|
||||
|
||||
@@ -25,6 +25,7 @@ interface NoteListProps {
|
||||
viewType: ViewTypeOptions | undefined;
|
||||
onReady?: (data: PrintReport) => void;
|
||||
onProgressChanged?(progress: number): void;
|
||||
showTextRepresentation?: boolean;
|
||||
}
|
||||
|
||||
type LazyLoadedComponent = ((props: ViewModeProps<any>) => VNode<any> | undefined);
|
||||
@@ -67,7 +68,7 @@ export default function NoteList(props: Pick<NoteListProps, "displayOnlyCollecti
|
||||
|
||||
export function SearchNoteList(props: Omit<NoteListProps, "isEnabled" | "viewType">) {
|
||||
const viewType = useNoteViewType(props.note);
|
||||
return <CustomNoteList {...props} isEnabled={true} viewType={viewType} />;
|
||||
return <CustomNoteList {...props} isEnabled={true} viewType={viewType} showTextRepresentation />;
|
||||
}
|
||||
|
||||
export function CustomNoteList({ note, viewType, isEnabled: shouldEnable, notePath, highlightedTokens, displayOnlyCollections, ntxId, onReady, onProgressChanged, ...restProps }: NoteListProps) {
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import { it, describe, expect } from "vitest";
|
||||
import { buildNote } from "../../../test/easy-froca";
|
||||
import { getBoardData } from "./data";
|
||||
import { describe, expect,it } from "vitest";
|
||||
|
||||
import FBranch from "../../../entities/fbranch";
|
||||
import froca from "../../../services/froca";
|
||||
import { buildNote } from "../../../test/easy-froca";
|
||||
import { getBoardData } from "./data";
|
||||
|
||||
describe("Board data", () => {
|
||||
it("deduplicates cloned notes", async () => {
|
||||
|
||||
@@ -21,4 +21,5 @@ export interface ViewModeProps<T extends object> {
|
||||
media: ViewModeMedia;
|
||||
onReady(data: PrintReport): void;
|
||||
onProgressChanged?: ProgressChangedFn;
|
||||
showTextRepresentation?: boolean;
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ import { ComponentChildren, TargetedMouseEvent } from "preact";
|
||||
|
||||
const contentSizeObserver = new ResizeObserver(onContentResized);
|
||||
|
||||
export function ListView({ note, noteIds: unfilteredNoteIds, highlightedTokens }: ViewModeProps<{}>) {
|
||||
export function ListView({ note, noteIds: unfilteredNoteIds, highlightedTokens, showTextRepresentation }: ViewModeProps<{}>) {
|
||||
const expandDepth = useExpansionDepth(note);
|
||||
const noteIds = useFilteredNoteIds(note, unfilteredNoteIds);
|
||||
const { pageNotes, ...pagination } = usePagination(note, noteIds);
|
||||
@@ -37,13 +37,14 @@ export function ListView({ note, noteIds: unfilteredNoteIds, highlightedTokens }
|
||||
key={childNote.noteId}
|
||||
note={childNote} parentNote={note}
|
||||
expandDepth={expandDepth} highlightedTokens={highlightedTokens}
|
||||
currentLevel={1} includeArchived={includeArchived} />
|
||||
currentLevel={1} includeArchived={includeArchived}
|
||||
showTextRepresentation={showTextRepresentation} />
|
||||
))}
|
||||
</Card>
|
||||
</NoteList>;
|
||||
}
|
||||
|
||||
export function GridView({ note, noteIds: unfilteredNoteIds, highlightedTokens }: ViewModeProps<{}>) {
|
||||
export function GridView({ note, noteIds: unfilteredNoteIds, highlightedTokens, showTextRepresentation }: ViewModeProps<{}>) {
|
||||
const noteIds = useFilteredNoteIds(note, unfilteredNoteIds);
|
||||
const { pageNotes, ...pagination } = usePagination(note, noteIds);
|
||||
const [ includeArchived ] = useNoteLabelBoolean(note, "includeArchived");
|
||||
@@ -56,7 +57,8 @@ export function GridView({ note, noteIds: unfilteredNoteIds, highlightedTokens }
|
||||
note={childNote}
|
||||
parentNote={note}
|
||||
highlightedTokens={highlightedTokens}
|
||||
includeArchived={includeArchived} />
|
||||
includeArchived={includeArchived}
|
||||
showTextRepresentation={showTextRepresentation} />
|
||||
))}
|
||||
</div>
|
||||
</NoteList>
|
||||
@@ -91,13 +93,14 @@ function NoteList(props: NoteListProps) {
|
||||
</div>
|
||||
}
|
||||
|
||||
function ListNoteCard({ note, parentNote, highlightedTokens, currentLevel, expandDepth, includeArchived }: {
|
||||
function ListNoteCard({ note, parentNote, highlightedTokens, currentLevel, expandDepth, includeArchived, showTextRepresentation }: {
|
||||
note: FNote,
|
||||
parentNote: FNote,
|
||||
currentLevel: number,
|
||||
expandDepth: number,
|
||||
highlightedTokens: string[] | null | undefined;
|
||||
includeArchived: boolean;
|
||||
showTextRepresentation?: boolean;
|
||||
}) {
|
||||
|
||||
const [ isExpanded, setExpanded ] = useState(currentLevel <= expandDepth);
|
||||
@@ -113,7 +116,8 @@ function ListNoteCard({ note, parentNote, highlightedTokens, currentLevel, expan
|
||||
<NoteContent note={note}
|
||||
highlightedTokens={highlightedTokens}
|
||||
noChildrenList
|
||||
includeArchivedNotes={includeArchived} />
|
||||
includeArchivedNotes={includeArchived}
|
||||
showTextRepresentation={showTextRepresentation} />
|
||||
</CardSection>
|
||||
|
||||
<NoteChildren note={note}
|
||||
@@ -157,6 +161,7 @@ interface GridNoteCardProps {
|
||||
parentNote: FNote;
|
||||
highlightedTokens: string[] | null | undefined;
|
||||
includeArchived: boolean;
|
||||
showTextRepresentation?: boolean;
|
||||
}
|
||||
|
||||
function GridNoteCard(props: GridNoteCardProps) {
|
||||
@@ -185,6 +190,7 @@ function GridNoteCard(props: GridNoteCardProps) {
|
||||
trim
|
||||
highlightedTokens={props.highlightedTokens}
|
||||
includeArchivedNotes={props.includeArchived}
|
||||
showTextRepresentation={props.showTextRepresentation}
|
||||
/>
|
||||
</CardFrame>
|
||||
);
|
||||
@@ -201,12 +207,13 @@ function NoteAttributes({ note }: { note: FNote }) {
|
||||
return <span className="note-list-attributes" ref={ref} />;
|
||||
}
|
||||
|
||||
export function NoteContent({ note, trim, noChildrenList, highlightedTokens, includeArchivedNotes }: {
|
||||
export function NoteContent({ note, trim, noChildrenList, highlightedTokens, includeArchivedNotes, showTextRepresentation }: {
|
||||
note: FNote;
|
||||
trim?: boolean;
|
||||
noChildrenList?: boolean;
|
||||
highlightedTokens: string[] | null | undefined;
|
||||
includeArchivedNotes: boolean;
|
||||
showTextRepresentation?: boolean;
|
||||
}) {
|
||||
const contentRef = useRef<HTMLDivElement>(null);
|
||||
const highlightSearch = useImperativeSearchHighlighlighting(highlightedTokens);
|
||||
@@ -230,7 +237,8 @@ export function NoteContent({ note, trim, noChildrenList, highlightedTokens, inc
|
||||
trim,
|
||||
noChildrenList,
|
||||
noIncludedNotes: true,
|
||||
includeArchivedNotes
|
||||
includeArchivedNotes,
|
||||
showTextRepresentation
|
||||
})
|
||||
.then(({ $renderedContent, type }) => {
|
||||
if (!contentRef.current) return;
|
||||
|
||||
@@ -27,6 +27,7 @@ const VIEW_MODE_ICON_MAPPINGS: Record<Exclude<ViewMode, "default">, string> = {
|
||||
"contextual-help": "bx bx-help-circle",
|
||||
"note-map": "bx bxs-network-chart",
|
||||
attachments: "bx bx-paperclip",
|
||||
ocr: "bx bx-text"
|
||||
};
|
||||
|
||||
export default function TabSwitcher() {
|
||||
|
||||
@@ -12,7 +12,7 @@ import { TypeWidgetProps } from "./type_widgets/type_widget";
|
||||
* A `NoteType` altered by the note detail widget, taking into consideration whether the note is editable or not and adding special note types such as an empty one,
|
||||
* for protected session or attachment information.
|
||||
*/
|
||||
export type ExtendedNoteType = Exclude<NoteType, "launcher" | "text" | "code" | "llmChat"> | "empty" | "readOnlyCode" | "readOnlyText" | "editableText" | "editableCode" | "attachmentDetail" | "attachmentList" | "protectedSession" | "sqlConsole" | "llmChat";
|
||||
export type ExtendedNoteType = Exclude<NoteType, "launcher" | "text" | "code" | "llmChat"> | "empty" | "readOnlyCode" | "readOnlyText" | "readOnlyOCRText" | "editableText" | "editableCode" | "attachmentDetail" | "attachmentList" | "protectedSession" | "sqlConsole" | "llmChat";
|
||||
|
||||
export type TypeWidget = ((props: TypeWidgetProps) => VNode | JSX.Element | undefined);
|
||||
type NoteTypeView = () => (Promise<{ default: TypeWidget } | TypeWidget> | TypeWidget);
|
||||
@@ -78,6 +78,11 @@ export const TYPE_MAPPINGS: Record<ExtendedNoteType, NoteTypeMapping> = {
|
||||
className: "note-detail-readonly-code",
|
||||
printable: true
|
||||
},
|
||||
readOnlyOCRText: {
|
||||
view: () => import("./type_widgets/ReadOnlyTextRepresentation"),
|
||||
className: "note-detail-ocr-text",
|
||||
printable: true
|
||||
},
|
||||
editableCode: {
|
||||
view: async () => (await import("./type_widgets/code/Code")).EditableCode,
|
||||
className: "note-detail-code",
|
||||
|
||||
@@ -3,6 +3,7 @@ interface SliderProps {
|
||||
onChange(newValue: number);
|
||||
min?: number;
|
||||
max?: number;
|
||||
step?: number;
|
||||
title?: string;
|
||||
}
|
||||
|
||||
|
||||
@@ -162,6 +162,7 @@ export function NoteContextMenu({ note, noteContext, itemsAtStart, itemsNearNote
|
||||
<CommandItem command="openNoteExternally" icon="bx bx-file-find" disabled={isSearchOrBook || !isElectron} text={t("note_actions.open_note_externally")} title={t("note_actions.open_note_externally_title")} />
|
||||
<CommandItem command="openNoteCustom" icon="bx bx-customize" disabled={isSearchOrBook || isMac || !isElectron} text={t("note_actions.open_note_custom")} />
|
||||
<CommandItem command="showNoteSource" icon="bx bx-code" disabled={!hasSource} text={t("note_actions.note_source")} />
|
||||
<CommandItem command="showNoteOCRText" icon="bx bx-text" disabled={!["image", "file"].includes(noteType)} text={t("note_actions.view_ocr_text")} />
|
||||
{(syncServerHost && isElectron) &&
|
||||
<CommandItem command="openNoteOnServer" icon="bx bx-world" disabled={!syncServerHost} text={t("note_actions.open_note_on_server")} />
|
||||
}
|
||||
|
||||
@@ -27,8 +27,10 @@ import { FormDropdownDivider, FormListItem } from "../react/FormList";
|
||||
import HelpButton from "../react/HelpButton";
|
||||
import { useTriliumEvent } from "../react/hooks";
|
||||
import Icon from "../react/Icon";
|
||||
import Modal from "../react/Modal";
|
||||
import NoteLink from "../react/NoteLink";
|
||||
import { ParentComponent, refToJQuerySelector } from "../react/react_utils";
|
||||
import { TextRepresentation } from "./ReadOnlyTextRepresentation";
|
||||
import { TypeWidgetProps } from "./type_widget";
|
||||
|
||||
/**
|
||||
@@ -141,6 +143,8 @@ export function AttachmentDetail({ note, viewScope }: TypeWidgetProps) {
|
||||
|
||||
function AttachmentInfo({ attachment, isFullDetail }: { attachment: FAttachment, isFullDetail?: boolean }) {
|
||||
const contentWrapper = useRef<HTMLDivElement>(null);
|
||||
const [ ocrModalShown, setOcrModalShown ] = useState(false);
|
||||
const supportsOcr = attachment.role === "image" || attachment.role === "file";
|
||||
|
||||
function refresh() {
|
||||
content_renderer.getRenderedContent(attachment, { imageHasZoom: isFullDetail })
|
||||
@@ -181,7 +185,11 @@ function AttachmentInfo({ attachment, isFullDetail }: { attachment: FAttachment,
|
||||
<div className="attachment-detail-widget">
|
||||
<div className={`attachment-detail-wrapper ${isFullDetail ? "full-detail" : "list-view"} ${attachment.utcDateScheduledForErasureSince ? "scheduled-for-deletion" : ""}`}>
|
||||
<div className="attachment-title-line">
|
||||
<AttachmentActions attachment={attachment} copyAttachmentLinkToClipboard={copyAttachmentLinkToClipboard} />
|
||||
<AttachmentActions
|
||||
attachment={attachment}
|
||||
copyAttachmentLinkToClipboard={copyAttachmentLinkToClipboard}
|
||||
onShowOcr={supportsOcr ? () => setOcrModalShown(true) : undefined}
|
||||
/>
|
||||
<h4 className="attachment-title">
|
||||
{!isFullDetail ? (
|
||||
<NoteLink
|
||||
@@ -207,6 +215,22 @@ function AttachmentInfo({ attachment, isFullDetail }: { attachment: FAttachment,
|
||||
{attachment.utcDateScheduledForErasureSince && <DeletionAlert utcDateScheduledForErasureSince={attachment.utcDateScheduledForErasureSince} />}
|
||||
<div ref={contentWrapper} className="attachment-content-wrapper" />
|
||||
</div>
|
||||
|
||||
{supportsOcr && (
|
||||
<Modal
|
||||
className="ocr-text-modal"
|
||||
title={t("ocr.extracted_text_title")}
|
||||
show={ocrModalShown}
|
||||
onHidden={() => setOcrModalShown(false)}
|
||||
size="lg"
|
||||
scrollable
|
||||
>
|
||||
<TextRepresentation
|
||||
textUrl={`ocr/attachments/${attachment.attachmentId}/text`}
|
||||
processUrl={`ocr/process-attachment/${attachment.attachmentId}`}
|
||||
/>
|
||||
</Modal>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -228,7 +252,7 @@ function DeletionAlert({ utcDateScheduledForErasureSince }: { utcDateScheduledFo
|
||||
);
|
||||
}
|
||||
|
||||
function AttachmentActions({ attachment, copyAttachmentLinkToClipboard }: { attachment: FAttachment, copyAttachmentLinkToClipboard: () => void }) {
|
||||
function AttachmentActions({ attachment, copyAttachmentLinkToClipboard, onShowOcr }: { attachment: FAttachment, copyAttachmentLinkToClipboard: () => void, onShowOcr?: () => void }) {
|
||||
const isElectron = utils.isElectron();
|
||||
const fileUploadRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
@@ -262,6 +286,12 @@ function AttachmentActions({ attachment, copyAttachmentLinkToClipboard }: { atta
|
||||
icon="bx bx-link"
|
||||
onClick={copyAttachmentLinkToClipboard}
|
||||
>{t("attachments_actions.copy_link_to_clipboard")}</FormListItem>
|
||||
{onShowOcr && (
|
||||
<FormListItem
|
||||
icon="bx bx-text"
|
||||
onClick={onShowOcr}
|
||||
>{t("ocr.view_extracted_text")}</FormListItem>
|
||||
)}
|
||||
<FormDropdownDivider />
|
||||
|
||||
<FormListItem
|
||||
|
||||
@@ -4,7 +4,7 @@ import AppearanceSettings from "./options/appearance";
|
||||
import ShortcutSettings from "./options/shortcuts";
|
||||
import TextNoteSettings from "./options/text_notes";
|
||||
import CodeNoteSettings from "./options/code_notes";
|
||||
import ImageSettings from "./options/images";
|
||||
import MediaSettings from "./options/media";
|
||||
import SpellcheckSettings from "./options/spellcheck";
|
||||
import PasswordSettings from "./options/password";
|
||||
import MultiFactorAuthenticationSettings from "./options/multi_factor_authentication";
|
||||
@@ -19,14 +19,14 @@ import "./ContentWidget.css";
|
||||
import { t } from "../../services/i18n";
|
||||
import BackendLog from "./code/BackendLog";
|
||||
|
||||
export type OptionPages = "_optionsAppearance" | "_optionsShortcuts" | "_optionsTextNotes" | "_optionsCodeNotes" | "_optionsImages" | "_optionsSpellcheck" | "_optionsPassword" | "_optionsMFA" | "_optionsEtapi" | "_optionsBackup" | "_optionsSync" | "_optionsOther" | "_optionsLocalization" | "_optionsAdvanced" | "_optionsLlm";
|
||||
export type OptionPages = "_optionsAppearance" | "_optionsShortcuts" | "_optionsTextNotes" | "_optionsCodeNotes" | "_optionsMedia" | "_optionsSpellcheck" | "_optionsPassword" | "_optionsMFA" | "_optionsEtapi" | "_optionsBackup" | "_optionsSync" | "_optionsOther" | "_optionsLocalization" | "_optionsAdvanced" | "_optionsLlm";
|
||||
|
||||
const CONTENT_WIDGETS: Record<OptionPages | "_backendLog", (props: TypeWidgetProps) => JSX.Element> = {
|
||||
_optionsAppearance: AppearanceSettings,
|
||||
_optionsShortcuts: ShortcutSettings,
|
||||
_optionsTextNotes: TextNoteSettings,
|
||||
_optionsCodeNotes: CodeNoteSettings,
|
||||
_optionsImages: ImageSettings,
|
||||
_optionsMedia: MediaSettings,
|
||||
_optionsSpellcheck: SpellcheckSettings,
|
||||
_optionsPassword: PasswordSettings,
|
||||
_optionsMFA: MultiFactorAuthenticationSettings,
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
.text-representation {
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
.text-representation-header {
|
||||
margin-bottom: 10px;
|
||||
padding: 8px 12px;
|
||||
background-color: var(--main-background-color);
|
||||
border: 1px solid var(--main-border-color);
|
||||
border-radius: 4px;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.text-representation-loading {
|
||||
text-align: center;
|
||||
padding: 30px;
|
||||
color: var(--muted-text-color);
|
||||
}
|
||||
|
||||
.text-representation-content {
|
||||
white-space: pre-wrap;
|
||||
line-height: 1.6;
|
||||
border: 1px solid var(--main-border-color);
|
||||
border-radius: 4px;
|
||||
padding: 15px;
|
||||
background-color: var(--accented-background-color);
|
||||
min-height: 100px;
|
||||
user-select: text;
|
||||
}
|
||||
|
||||
.text-representation-meta {
|
||||
font-size: 0.9em;
|
||||
color: var(--muted-text-color);
|
||||
margin-top: 10px;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.text-representation-empty {
|
||||
color: var(--muted-text-color);
|
||||
font-style: italic;
|
||||
text-align: center;
|
||||
padding: 30px;
|
||||
}
|
||||
|
||||
.text-representation-process-btn {
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
.text-representation-error {
|
||||
color: var(--error-color);
|
||||
background-color: var(--error-background-color);
|
||||
border: 1px solid var(--error-border-color);
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
@@ -0,0 +1,131 @@
|
||||
import "./ReadOnlyTextRepresentation.css";
|
||||
|
||||
import type { TextRepresentationResponse } from "@triliumnext/commons";
|
||||
import { useEffect, useState } from "preact/hooks";
|
||||
|
||||
import { t } from "../../services/i18n";
|
||||
import server from "../../services/server";
|
||||
import toast from "../../services/toast";
|
||||
import { TypeWidgetProps } from "./type_widget";
|
||||
|
||||
type State =
|
||||
| { kind: "loading" }
|
||||
| { kind: "loaded"; text: string }
|
||||
| { kind: "empty" }
|
||||
| { kind: "error"; message: string };
|
||||
|
||||
interface TextRepresentationProps {
|
||||
/** The API path to fetch OCR text from (e.g. `ocr/notes/{id}/text`). */
|
||||
textUrl: string;
|
||||
/** The API path to trigger OCR processing (e.g. `ocr/process-note/{id}`). */
|
||||
processUrl: string;
|
||||
}
|
||||
|
||||
export default function ReadOnlyTextRepresentation({ note }: TypeWidgetProps) {
|
||||
return (
|
||||
<TextRepresentation
|
||||
textUrl={`ocr/notes/${note.noteId}/text`}
|
||||
processUrl={`ocr/process-note/${note.noteId}`}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export function TextRepresentation({ textUrl, processUrl }: TextRepresentationProps) {
|
||||
const [ state, setState ] = useState<State>({ kind: "loading" });
|
||||
const [ processing, setProcessing ] = useState(false);
|
||||
|
||||
async function fetchText() {
|
||||
setState({ kind: "loading" });
|
||||
|
||||
try {
|
||||
const response = await server.get<TextRepresentationResponse>(textUrl);
|
||||
|
||||
if (!response.success) {
|
||||
setState({ kind: "error", message: response.message || t("ocr.failed_to_load") });
|
||||
return;
|
||||
}
|
||||
|
||||
if (!response.hasOcr || !response.text) {
|
||||
setState({ kind: "empty" });
|
||||
return;
|
||||
}
|
||||
|
||||
setState({ kind: "loaded", text: response.text });
|
||||
} catch (error: any) {
|
||||
console.error("Error loading text representation:", error);
|
||||
setState({ kind: "error", message: error.message || t("ocr.failed_to_load") });
|
||||
}
|
||||
}
|
||||
|
||||
useEffect(() => { fetchText(); }, [ textUrl ]);
|
||||
|
||||
async function processOCR() {
|
||||
setProcessing(true);
|
||||
try {
|
||||
const response = await server.post<{ success: boolean; message?: string }>(processUrl, { forceReprocess: true });
|
||||
if (response.success) {
|
||||
toast.showMessage(t("ocr.processing_started"));
|
||||
setTimeout(fetchText, 2000);
|
||||
} else {
|
||||
toast.showError(response.message || t("ocr.processing_failed"));
|
||||
}
|
||||
} catch {
|
||||
// Server errors (4xx/5xx) are already shown as toasts by server.ts.
|
||||
} finally {
|
||||
setProcessing(false);
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="text-representation note-detail-printable">
|
||||
<div className="text-representation-header">
|
||||
<span className="bx bx-text" />{" "}{t("ocr.extracted_text_title")}
|
||||
</div>
|
||||
|
||||
{state.kind === "loading" && (
|
||||
<div className="text-representation-loading">
|
||||
<span className="bx bx-loader-alt bx-spin" />{" "}{t("ocr.loading_text")}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{state.kind === "loaded" && (
|
||||
<>
|
||||
<div className="text-representation-content">
|
||||
{state.text}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{state.kind === "empty" && (
|
||||
<>
|
||||
<div className="text-representation-empty">
|
||||
<span className="bx bx-info-circle" />{" "}{t("ocr.no_text_available")}
|
||||
</div>
|
||||
<div className="text-representation-meta">
|
||||
{t("ocr.no_text_explanation")}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{state.kind === "error" && (
|
||||
<div className="text-representation-error">
|
||||
<span className="bx bx-error" />{" "}{state.message}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{state.kind !== "loading" && (
|
||||
<button
|
||||
type="button"
|
||||
className="btn btn-secondary text-representation-process-btn"
|
||||
disabled={processing}
|
||||
onClick={processOCR}
|
||||
>
|
||||
{processing
|
||||
? <><span className="bx bx-loader-alt bx-spin" />{" "}{t("ocr.processing")}</>
|
||||
: <><span className="bx bx-play" />{" "}{t("ocr.process_now")}</>
|
||||
}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,30 +1,41 @@
|
||||
.option-row {
|
||||
border-bottom: 1px solid var(--main-border-color);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
flex-direction: column;
|
||||
padding: 0.5em 0;
|
||||
}
|
||||
|
||||
.option-row > label {
|
||||
width: 40%;
|
||||
.option-row-main {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.option-row-main > label {
|
||||
width: 45%;
|
||||
margin-bottom: 0 !important;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.option-row > select,
|
||||
.option-row > .dropdown {
|
||||
.option-row-main > select,
|
||||
.option-row-main > .dropdown {
|
||||
width: 60%;
|
||||
}
|
||||
|
||||
.option-row > .dropdown button {
|
||||
.option-row-main > .dropdown button {
|
||||
width: 100%;
|
||||
text-align: start;
|
||||
}
|
||||
|
||||
.option-row-description {
|
||||
line-height: 1.3;
|
||||
margin-top: 0.25em;
|
||||
color: var(--muted-text-color);
|
||||
}
|
||||
|
||||
.option-row:last-of-type {
|
||||
border-bottom: unset;
|
||||
}
|
||||
|
||||
.option-row.centered {
|
||||
.option-row.centered .option-row-main {
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
@@ -5,18 +5,22 @@ import { useUniqueName } from "../../../react/hooks";
|
||||
interface OptionsRowProps {
|
||||
name: string;
|
||||
label?: string;
|
||||
description?: string;
|
||||
children: VNode;
|
||||
centered?: boolean;
|
||||
}
|
||||
|
||||
export default function OptionsRow({ name, label, children, centered }: OptionsRowProps) {
|
||||
export default function OptionsRow({ name, label, description, children, centered }: OptionsRowProps) {
|
||||
const id = useUniqueName(name);
|
||||
const childWithId = cloneElement(children, { id });
|
||||
|
||||
return (
|
||||
<div className={`option-row ${centered ? "centered" : ""}`}>
|
||||
{label && <label for={id}>{label}</label>}
|
||||
{childWithId}
|
||||
<div className="option-row-main">
|
||||
{label && <label for={id}>{label}</label>}
|
||||
{childWithId}
|
||||
</div>
|
||||
{description && <small className="option-row-description">{description}</small>}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
import { t } from "../../../services/i18n";
|
||||
import FormCheckbox from "../../react/FormCheckbox";
|
||||
import FormGroup from "../../react/FormGroup";
|
||||
import { FormTextBoxWithUnit } from "../../react/FormTextBox";
|
||||
import { useTriliumOption, useTriliumOptionBool } from "../../react/hooks";
|
||||
import OptionsSection from "./components/OptionsSection";
|
||||
|
||||
export default function ImageSettings() {
|
||||
const [ downloadImagesAutomatically, setDownloadImagesAutomatically ] = useTriliumOptionBool("downloadImagesAutomatically");
|
||||
const [ compressImages, setCompressImages ] = useTriliumOptionBool("compressImages");
|
||||
const [ imageMaxWidthHeight, setImageMaxWidthHeight ] = useTriliumOption("imageMaxWidthHeight");
|
||||
const [ imageJpegQuality, setImageJpegQuality ] = useTriliumOption("imageJpegQuality");
|
||||
|
||||
return (
|
||||
<OptionsSection title={t("images.images_section_title")}>
|
||||
<FormGroup name="download-images-automatically" description={t("images.download_images_description")}>
|
||||
<FormCheckbox
|
||||
label={t("images.download_images_automatically")}
|
||||
currentValue={downloadImagesAutomatically} onChange={setDownloadImagesAutomatically}
|
||||
/>
|
||||
</FormGroup>
|
||||
|
||||
<hr/>
|
||||
|
||||
<FormCheckbox
|
||||
name="image-compression-enabled"
|
||||
label={t("images.enable_image_compression")}
|
||||
currentValue={compressImages} onChange={setCompressImages}
|
||||
/>
|
||||
|
||||
<FormGroup name="image-max-width-height" label={t("images.max_image_dimensions")} disabled={!compressImages}>
|
||||
<FormTextBoxWithUnit
|
||||
type="number" min="1"
|
||||
unit={t("images.max_image_dimensions_unit")}
|
||||
currentValue={imageMaxWidthHeight} onChange={setImageMaxWidthHeight}
|
||||
/>
|
||||
</FormGroup>
|
||||
|
||||
<FormGroup name="image-jpeg-quality" label={t("images.jpeg_quality_description")} disabled={!compressImages}>
|
||||
<FormTextBoxWithUnit
|
||||
min="10" max="100" type="number"
|
||||
unit={t("units.percentage")}
|
||||
currentValue={imageJpegQuality} onChange={setImageJpegQuality}
|
||||
/>
|
||||
</FormGroup>
|
||||
</OptionsSection>
|
||||
);
|
||||
}
|
||||
176
apps/client/src/widgets/type_widgets/options/media.tsx
Normal file
176
apps/client/src/widgets/type_widgets/options/media.tsx
Normal file
@@ -0,0 +1,176 @@
|
||||
import { useCallback, useEffect, useRef, useState } from "preact/hooks";
|
||||
|
||||
import { t } from "../../../services/i18n";
|
||||
import server from "../../../services/server";
|
||||
import toast from "../../../services/toast";
|
||||
import { FormTextBoxWithUnit } from "../../react/FormTextBox";
|
||||
import FormToggle from "../../react/FormToggle";
|
||||
import { useTriliumOption, useTriliumOptionBool } from "../../react/hooks";
|
||||
import Slider from "../../react/Slider";
|
||||
import OptionsRow from "./components/OptionsRow";
|
||||
import OptionsSection from "./components/OptionsSection";
|
||||
import RelatedSettings from "./components/RelatedSettings";
|
||||
|
||||
export default function MediaSettings() {
|
||||
return (
|
||||
<>
|
||||
<ImageSettings />
|
||||
<OcrSettings />
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
function ImageSettings() {
|
||||
const [ downloadImagesAutomatically, setDownloadImagesAutomatically ] = useTriliumOptionBool("downloadImagesAutomatically");
|
||||
const [ compressImages, setCompressImages ] = useTriliumOptionBool("compressImages");
|
||||
const [ imageMaxWidthHeight, setImageMaxWidthHeight ] = useTriliumOption("imageMaxWidthHeight");
|
||||
const [ imageJpegQuality, setImageJpegQuality ] = useTriliumOption("imageJpegQuality");
|
||||
|
||||
return (
|
||||
<OptionsSection title={t("images.images_section_title")}>
|
||||
<OptionsRow name="download-images-automatically" label={t("images.download_images_automatically")} description={t("images.download_images_description")}>
|
||||
<FormToggle
|
||||
switchOnName="" switchOffName=""
|
||||
currentValue={downloadImagesAutomatically}
|
||||
onChange={setDownloadImagesAutomatically}
|
||||
/>
|
||||
</OptionsRow>
|
||||
|
||||
<OptionsRow name="image-compression-enabled" label={t("images.enable_image_compression")} description={t("images.enable_image_compression_description")}>
|
||||
<FormToggle
|
||||
switchOnName="" switchOffName=""
|
||||
currentValue={compressImages}
|
||||
onChange={setCompressImages}
|
||||
/>
|
||||
</OptionsRow>
|
||||
|
||||
<OptionsRow name="image-max-width-height" label={t("images.max_image_dimensions")} description={t("images.max_image_dimensions_description")}>
|
||||
<FormTextBoxWithUnit
|
||||
type="number" min="1"
|
||||
disabled={!compressImages}
|
||||
unit={t("images.max_image_dimensions_unit")}
|
||||
currentValue={imageMaxWidthHeight} onChange={setImageMaxWidthHeight}
|
||||
/>
|
||||
</OptionsRow>
|
||||
|
||||
<OptionsRow name="image-jpeg-quality" label={`${t("images.jpeg_quality")} (${imageJpegQuality ?? 75}%)`} description={t("images.jpeg_quality_description")}>
|
||||
<Slider
|
||||
min={10} max={100} step={5}
|
||||
value={parseInt(imageJpegQuality ?? "75", 10)}
|
||||
onChange={(v) => setImageJpegQuality(String(v))}
|
||||
/>
|
||||
</OptionsRow>
|
||||
</OptionsSection>
|
||||
);
|
||||
}
|
||||
|
||||
function OcrSettings() {
|
||||
const [ ocrAutoProcess, setOcrAutoProcess ] = useTriliumOptionBool("ocrAutoProcessImages");
|
||||
const [ ocrMinConfidence, setOcrMinConfidence ] = useTriliumOption("ocrMinConfidence");
|
||||
|
||||
return (
|
||||
<>
|
||||
<OptionsSection title={t("images.ocr_section_title")}>
|
||||
<OptionsRow name="ocr-auto-process" label={t("images.ocr_auto_process")} description={t("images.ocr_auto_process_description")}>
|
||||
<FormToggle
|
||||
switchOnName="" switchOffName=""
|
||||
currentValue={ocrAutoProcess}
|
||||
onChange={setOcrAutoProcess}
|
||||
/>
|
||||
</OptionsRow>
|
||||
|
||||
<OptionsRow name="ocr-min-confidence" label={`${t("images.ocr_min_confidence")} (${Math.round(parseFloat(ocrMinConfidence ?? "0.75") * 100)}%)`} description={t("images.ocr_confidence_description")}>
|
||||
<Slider
|
||||
min={0} max={100} step={5}
|
||||
value={Math.round(parseFloat(ocrMinConfidence ?? "0.75") * 100)}
|
||||
onChange={(v) => setOcrMinConfidence(String(v / 100))}
|
||||
/>
|
||||
</OptionsRow>
|
||||
|
||||
<BatchProcessing />
|
||||
</OptionsSection>
|
||||
|
||||
<RelatedSettings items={[
|
||||
{
|
||||
title: t("images.ocr_related_content_languages"),
|
||||
targetPage: "_optionsLocalization"
|
||||
}
|
||||
]} />
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
interface BatchProgress {
|
||||
inProgress: boolean;
|
||||
total: number;
|
||||
processed: number;
|
||||
percentage?: number;
|
||||
}
|
||||
|
||||
function BatchProcessing() {
|
||||
const [ progress, setProgress ] = useState<BatchProgress | null>(null);
|
||||
const pollingRef = useRef<ReturnType<typeof setInterval>>(null);
|
||||
|
||||
const pollProgress = useCallback(() => {
|
||||
server.get<BatchProgress>("ocr/batch-progress").then((data) => {
|
||||
setProgress(data);
|
||||
if (!data.inProgress && pollingRef.current) {
|
||||
clearInterval(pollingRef.current);
|
||||
pollingRef.current = null;
|
||||
toast.showMessage(t("images.batch_ocr_completed", { processed: data.processed }));
|
||||
}
|
||||
});
|
||||
}, []);
|
||||
|
||||
// Clean up polling on unmount.
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (pollingRef.current) {
|
||||
clearInterval(pollingRef.current);
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
async function startBatch() {
|
||||
try {
|
||||
const result = await server.post<{ success: boolean; message?: string }>("ocr/batch-process");
|
||||
if (result.success) {
|
||||
toast.showMessage(t("images.batch_ocr_starting"));
|
||||
pollingRef.current = setInterval(pollProgress, 2000);
|
||||
pollProgress();
|
||||
} else {
|
||||
toast.showError(result.message || t("images.batch_ocr_error", { error: "Unknown" }));
|
||||
}
|
||||
} catch {
|
||||
// Server errors are already shown as toasts by server.ts.
|
||||
}
|
||||
}
|
||||
|
||||
const isRunning = progress?.inProgress ?? false;
|
||||
|
||||
return (
|
||||
<OptionsRow name="batch-ocr" label={t("images.batch_ocr_title")} description={t("images.batch_ocr_description")}>
|
||||
{isRunning ? (
|
||||
<div style={{ width: "100%" }}>
|
||||
<div className="progress" style={{ height: "24px" }}>
|
||||
<div
|
||||
className="progress-bar progress-bar-striped progress-bar-animated"
|
||||
role="progressbar"
|
||||
style={{ width: `${progress?.percentage ?? 0}%` }}
|
||||
>
|
||||
{t("images.batch_ocr_progress", { processed: progress?.processed ?? 0, total: progress?.total ?? 0 })}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<button
|
||||
type="button"
|
||||
className="btn btn-secondary"
|
||||
onClick={startBatch}
|
||||
>
|
||||
<span className="bx bx-play" />{" "}{t("images.batch_ocr_start")}
|
||||
</button>
|
||||
)}
|
||||
</OptionsRow>
|
||||
);
|
||||
}
|
||||
@@ -37,7 +37,8 @@
|
||||
"better-sqlite3": "12.8.0",
|
||||
"html-to-text": "9.0.5",
|
||||
"node-html-parser": "7.1.0",
|
||||
"sucrase": "3.35.1"
|
||||
"sucrase": "3.35.1",
|
||||
"unpdf": "1.4.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@braintree/sanitize-url": "7.1.2",
|
||||
@@ -115,6 +116,7 @@
|
||||
"mime-types": "3.0.2",
|
||||
"multer": "2.1.1",
|
||||
"normalize-strings": "1.1.1",
|
||||
"officeparser": "6.0.7",
|
||||
"rand-token": "1.0.1",
|
||||
"safe-compare": "1.1.4",
|
||||
"sanitize-filename": "1.6.4",
|
||||
@@ -126,6 +128,7 @@
|
||||
"striptags": "3.2.0",
|
||||
"supertest": "7.2.2",
|
||||
"swagger-jsdoc": "6.2.8",
|
||||
"tesseract.js": "6.0.1",
|
||||
"time2fa": "1.4.2",
|
||||
"tmp": "0.2.5",
|
||||
"turnish": "1.8.0",
|
||||
|
||||
Binary file not shown.
@@ -107,6 +107,7 @@ CREATE TABLE IF NOT EXISTS "recent_notes"
|
||||
CREATE TABLE IF NOT EXISTS "blobs" (
|
||||
`blobId` TEXT NOT NULL,
|
||||
`content` TEXT NULL DEFAULT NULL,
|
||||
`textRepresentation` TEXT DEFAULT NULL,
|
||||
`dateModified` TEXT NOT NULL,
|
||||
`utcDateModified` TEXT NOT NULL,
|
||||
PRIMARY KEY(`blobId`)
|
||||
|
||||
@@ -344,7 +344,7 @@
|
||||
"shortcuts-title": "Shortcuts",
|
||||
"text-notes": "Text Notes",
|
||||
"code-notes-title": "Code Notes",
|
||||
"images-title": "Images",
|
||||
"images-title": "Media",
|
||||
"spellcheck-title": "Spellcheck",
|
||||
"password-title": "Password",
|
||||
"multi-factor-authentication-title": "MFA",
|
||||
|
||||
@@ -15,6 +15,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
|
||||
|
||||
content!: string | Buffer;
|
||||
contentLength!: number;
|
||||
textRepresentation?: string | null;
|
||||
|
||||
constructor(row: BlobRow) {
|
||||
super();
|
||||
@@ -25,6 +26,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
|
||||
this.blobId = row.blobId;
|
||||
this.content = row.content;
|
||||
this.contentLength = row.contentLength;
|
||||
this.textRepresentation = row.textRepresentation;
|
||||
this.dateModified = row.dateModified;
|
||||
this.utcDateModified = row.utcDateModified;
|
||||
}
|
||||
@@ -34,10 +36,16 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
|
||||
blobId: this.blobId,
|
||||
content: this.content || null,
|
||||
contentLength: this.contentLength,
|
||||
textRepresentation: this.textRepresentation || null,
|
||||
dateModified: this.dateModified,
|
||||
utcDateModified: this.utcDateModified
|
||||
};
|
||||
}
|
||||
|
||||
protected getPojoToSave() {
|
||||
const { contentLength: _, ...pojo } = this.getPojo();
|
||||
return pojo;
|
||||
}
|
||||
}
|
||||
|
||||
export default BBlob;
|
||||
|
||||
@@ -6,6 +6,13 @@
|
||||
|
||||
// Migrations should be kept in descending order, so the latest migration is first.
|
||||
const MIGRATIONS: (SqlMigration | JsMigration)[] = [
|
||||
// Add text representation column to blobs table
|
||||
{
|
||||
version: 236,
|
||||
sql: /*sql*/`\
|
||||
ALTER TABLE blobs ADD COLUMN textRepresentation TEXT DEFAULT NULL;
|
||||
`
|
||||
},
|
||||
// Add missing database indices for query performance
|
||||
{
|
||||
version: 235,
|
||||
|
||||
56
apps/server/src/routes/api/ocr.spec.ts
Normal file
56
apps/server/src/routes/api/ocr.spec.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { describe, expect, it, vi, beforeEach } from "vitest";
|
||||
import ocrRoutes from "./ocr.js";
|
||||
|
||||
// Mock the OCR service
|
||||
vi.mock("../../services/ocr/ocr_service.js", () => ({
|
||||
default: {
|
||||
startBatchProcessing: vi.fn(() => Promise.resolve({ success: true })),
|
||||
getBatchProgress: vi.fn(() => ({ inProgress: false, total: 0, processed: 0 }))
|
||||
}
|
||||
}));
|
||||
|
||||
// Mock becca
|
||||
vi.mock("../../becca/becca.js", () => ({
|
||||
default: {}
|
||||
}));
|
||||
|
||||
// Mock sql
|
||||
vi.mock("../../services/sql.js", () => ({
|
||||
default: {
|
||||
getRow: vi.fn()
|
||||
}
|
||||
}));
|
||||
|
||||
// Mock log
|
||||
vi.mock("../../services/log.js", () => ({
|
||||
default: {
|
||||
error: vi.fn()
|
||||
}
|
||||
}));
|
||||
|
||||
describe("OCR API", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("should return success for batch processing", async () => {
|
||||
const result = await ocrRoutes.batchProcessOCR();
|
||||
expect(result).toEqual({ success: true });
|
||||
});
|
||||
|
||||
it("should return batch progress", async () => {
|
||||
const result = await ocrRoutes.getBatchProgress();
|
||||
expect(result).toEqual({ inProgress: false, total: 0, processed: 0 });
|
||||
});
|
||||
|
||||
it("should return 400 when batch processing fails", async () => {
|
||||
const ocrService = await import("../../services/ocr/ocr_service.js");
|
||||
vi.mocked(ocrService.default.startBatchProcessing).mockResolvedValueOnce({
|
||||
success: false,
|
||||
message: "No images found that need OCR processing"
|
||||
});
|
||||
|
||||
const result = await ocrRoutes.batchProcessOCR();
|
||||
expect(result).toEqual([400, { success: false, message: "No images found that need OCR processing" }]);
|
||||
});
|
||||
});
|
||||
241
apps/server/src/routes/api/ocr.ts
Normal file
241
apps/server/src/routes/api/ocr.ts
Normal file
@@ -0,0 +1,241 @@
|
||||
import { TextRepresentationResponse } from "@triliumnext/commons";
|
||||
import type { Request } from "express";
|
||||
|
||||
import becca from "../../becca/becca.js";
|
||||
import ocrService from "../../services/ocr/ocr_service.js";
|
||||
import sql from "../../services/sql.js";
|
||||
|
||||
/**
|
||||
* @swagger
|
||||
* /api/ocr/process-note/{noteId}:
|
||||
* post:
|
||||
* summary: Process OCR for a specific note
|
||||
* operationId: ocr-process-note
|
||||
* parameters:
|
||||
* - name: noteId
|
||||
* in: path
|
||||
* required: true
|
||||
* schema:
|
||||
* type: string
|
||||
* description: ID of the note to process
|
||||
* requestBody:
|
||||
* required: false
|
||||
* content:
|
||||
* application/json:
|
||||
* schema:
|
||||
* type: object
|
||||
* properties:
|
||||
* language:
|
||||
* type: string
|
||||
* description: >
|
||||
* Tesseract language code to use (e.g. 'eng', 'fra', 'deu', 'eng+fra').
|
||||
* If omitted, the language is resolved automatically from the note's language label,
|
||||
* the enabled content languages, or the UI locale.
|
||||
* forceReprocess:
|
||||
* type: boolean
|
||||
* description: Force reprocessing even if OCR already exists
|
||||
* default: false
|
||||
* responses:
|
||||
* '200':
|
||||
* description: OCR processing completed successfully
|
||||
* '400':
|
||||
* description: Bad request - unsupported file type
|
||||
* '404':
|
||||
* description: Note not found
|
||||
* '500':
|
||||
* description: Internal server error
|
||||
* security:
|
||||
* - session: []
|
||||
* tags: ["ocr"]
|
||||
*/
|
||||
async function processNoteOCR(req: Request<{ noteId: string }>) {
|
||||
const { noteId } = req.params;
|
||||
const { language, forceReprocess = false } = req.body || {};
|
||||
|
||||
const note = becca.getNote(noteId);
|
||||
if (!note) {
|
||||
return [404, { success: false, message: 'Note not found' }];
|
||||
}
|
||||
|
||||
const result = await ocrService.processNoteOCR(noteId, { language, forceReprocess });
|
||||
if (!result) {
|
||||
return [400, { success: false, message: 'Note is not an image or has unsupported format' }];
|
||||
}
|
||||
|
||||
return { success: true, result };
|
||||
}
|
||||
|
||||
/**
|
||||
* @swagger
|
||||
* /api/ocr/process-attachment/{attachmentId}:
|
||||
* post:
|
||||
* summary: Process OCR for a specific attachment
|
||||
* operationId: ocr-process-attachment
|
||||
* parameters:
|
||||
* - name: attachmentId
|
||||
* in: path
|
||||
* required: true
|
||||
* schema:
|
||||
* type: string
|
||||
* description: ID of the attachment to process
|
||||
* requestBody:
|
||||
* required: false
|
||||
* content:
|
||||
* application/json:
|
||||
* schema:
|
||||
* type: object
|
||||
* properties:
|
||||
* language:
|
||||
* type: string
|
||||
* description: >
|
||||
* Tesseract language code to use (e.g. 'eng', 'fra', 'deu', 'eng+fra').
|
||||
* If omitted, the language is resolved automatically from the owner note's language label,
|
||||
* the enabled content languages, or the UI locale.
|
||||
* forceReprocess:
|
||||
* type: boolean
|
||||
* description: Force reprocessing even if OCR already exists
|
||||
* default: false
|
||||
* responses:
|
||||
* '200':
|
||||
* description: OCR processing completed successfully
|
||||
* '400':
|
||||
* description: Bad request - unsupported file type
|
||||
* '404':
|
||||
* description: Attachment not found
|
||||
* '500':
|
||||
* description: Internal server error
|
||||
* security:
|
||||
* - session: []
|
||||
* tags: ["ocr"]
|
||||
*/
|
||||
async function processAttachmentOCR(req: Request<{ attachmentId: string }>) {
|
||||
const { attachmentId } = req.params;
|
||||
const { language, forceReprocess = false } = req.body || {};
|
||||
|
||||
const attachment = becca.getAttachment(attachmentId);
|
||||
if (!attachment) {
|
||||
return [404, { success: false, message: 'Attachment not found' }];
|
||||
}
|
||||
|
||||
const result = await ocrService.processAttachmentOCR(attachmentId, { language, forceReprocess });
|
||||
if (!result) {
|
||||
return [400, { success: false, message: 'Attachment is not an image or has unsupported format' }];
|
||||
}
|
||||
|
||||
return { success: true, result };
|
||||
}
|
||||
|
||||
/**
|
||||
* @swagger
|
||||
* /api/ocr/batch-process:
|
||||
* post:
|
||||
* summary: Process OCR for all images without existing OCR results
|
||||
* operationId: ocr-batch-process
|
||||
* responses:
|
||||
* '200':
|
||||
* description: Batch processing initiated successfully
|
||||
* '400':
|
||||
* description: Bad request - OCR disabled or already processing
|
||||
* '500':
|
||||
* description: Internal server error
|
||||
* security:
|
||||
* - session: []
|
||||
* tags: ["ocr"]
|
||||
*/
|
||||
async function batchProcessOCR() {
|
||||
const result = await ocrService.startBatchProcessing();
|
||||
if (!result.success) {
|
||||
return [400, result];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @swagger
|
||||
* /api/ocr/batch-progress:
|
||||
* get:
|
||||
* summary: Get batch OCR processing progress
|
||||
* operationId: ocr-batch-progress
|
||||
* responses:
|
||||
* '200':
|
||||
* description: Batch processing progress information
|
||||
* '500':
|
||||
* description: Internal server error
|
||||
* security:
|
||||
* - session: []
|
||||
* tags: ["ocr"]
|
||||
*/
|
||||
async function getBatchProgress() {
|
||||
return ocrService.getBatchProgress();
|
||||
}
|
||||
|
||||
/**
|
||||
* @swagger
|
||||
* /api/ocr/notes/{noteId}/text:
|
||||
* get:
|
||||
* summary: Get OCR text for a specific note
|
||||
* operationId: ocr-get-note-text
|
||||
* parameters:
|
||||
* - name: noteId
|
||||
* in: path
|
||||
* required: true
|
||||
* schema:
|
||||
* type: string
|
||||
* description: Note ID to get OCR text for
|
||||
* responses:
|
||||
* 200:
|
||||
* description: OCR text retrieved successfully
|
||||
* 404:
|
||||
* description: Note not found
|
||||
* tags: ["ocr"]
|
||||
*/
|
||||
function getTextRepresentation(blobId: string | undefined): TextRepresentationResponse {
|
||||
let ocrText: string | null = null;
|
||||
|
||||
if (blobId) {
|
||||
const result = sql.getRow<{
|
||||
textRepresentation: string | null;
|
||||
}>(`
|
||||
SELECT textRepresentation
|
||||
FROM blobs
|
||||
WHERE blobId = ?
|
||||
`, [blobId]);
|
||||
|
||||
if (result) {
|
||||
ocrText = result.textRepresentation;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
text: ocrText || '',
|
||||
hasOcr: !!ocrText
|
||||
};
|
||||
}
|
||||
|
||||
async function getNoteOCRText(req: Request<{ noteId: string }>) {
|
||||
const note = becca.getNote(req.params.noteId);
|
||||
if (!note) {
|
||||
return [404, { success: false, message: 'Note not found' }];
|
||||
}
|
||||
|
||||
return getTextRepresentation(note.blobId);
|
||||
}
|
||||
|
||||
async function getAttachmentOCRText(req: Request<{ attachmentId: string }>) {
|
||||
const attachment = becca.getAttachment(req.params.attachmentId);
|
||||
if (!attachment) {
|
||||
return [404, { success: false, message: 'Attachment not found' }];
|
||||
}
|
||||
|
||||
return getTextRepresentation(attachment.blobId);
|
||||
}
|
||||
|
||||
export default {
|
||||
processNoteOCR,
|
||||
processAttachmentOCR,
|
||||
batchProcessOCR,
|
||||
getBatchProgress,
|
||||
getNoteOCRText,
|
||||
getAttachmentOCRText
|
||||
};
|
||||
@@ -105,7 +105,11 @@ const ALLOWED_OPTIONS = new Set<OptionNames>([
|
||||
"newLayout",
|
||||
"mfaEnabled",
|
||||
"mfaMethod",
|
||||
"llmProviders"
|
||||
"llmProviders",
|
||||
|
||||
// OCR options
|
||||
"ocrAutoProcessImages",
|
||||
"ocrMinConfidence"
|
||||
]);
|
||||
|
||||
function getOptions() {
|
||||
|
||||
@@ -39,6 +39,7 @@ import loginApiRoute from "./api/login.js";
|
||||
import metricsRoute from "./api/metrics.js";
|
||||
import noteMapRoute from "./api/note_map.js";
|
||||
import notesApiRoute from "./api/notes.js";
|
||||
import ocrRoute from "./api/ocr.js";
|
||||
import optionsApiRoute from "./api/options.js";
|
||||
import otherRoute from "./api/other.js";
|
||||
import passwordApiRoute from "./api/password.js";
|
||||
@@ -376,6 +377,14 @@ function register(app: express.Application) {
|
||||
etapiBackupRoute.register(router);
|
||||
etapiMetricsRoute.register(router);
|
||||
|
||||
// OCR API
|
||||
asyncApiRoute(PST, "/api/ocr/process-note/:noteId", ocrRoute.processNoteOCR);
|
||||
asyncApiRoute(PST, "/api/ocr/process-attachment/:attachmentId", ocrRoute.processAttachmentOCR);
|
||||
asyncApiRoute(PST, "/api/ocr/batch-process", ocrRoute.batchProcessOCR);
|
||||
asyncApiRoute(GET, "/api/ocr/batch-progress", ocrRoute.getBatchProgress);
|
||||
asyncApiRoute(GET, "/api/ocr/notes/:noteId/text", ocrRoute.getNoteOCRText);
|
||||
asyncApiRoute(GET, "/api/ocr/attachments/:attachmentId/text", ocrRoute.getAttachmentOCRText);
|
||||
|
||||
app.use("", router);
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import packageJson from "../../package.json" with { type: "json" };
|
||||
import build from "./build.js";
|
||||
import dataDir from "./data_dir.js";
|
||||
|
||||
const APP_DB_VERSION = 235;
|
||||
const APP_DB_VERSION = 236;
|
||||
const SYNC_VERSION = 37;
|
||||
const CLIPPER_PROTOCOL_VERSION = "1.0";
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
export interface Blob {
|
||||
blobId: string;
|
||||
content: string | Buffer;
|
||||
textRepresentation?: string | null;
|
||||
utcDateModified: string;
|
||||
}
|
||||
|
||||
@@ -50,8 +50,8 @@ function processContent(content: Buffer | string | null, isProtected: boolean, i
|
||||
}
|
||||
}
|
||||
|
||||
function calculateContentHash({ blobId, content }: Blob) {
|
||||
return hash(`${blobId}|${content.toString()}`);
|
||||
function calculateContentHash({ blobId, content, textRepresentation }: Blob) {
|
||||
return hash(`${blobId}|${content.toString()}|${textRepresentation ?? ""}`);
|
||||
}
|
||||
|
||||
export default {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { describe, it, expect, beforeEach, vi } from "vitest";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
import type { getTriliumDataDir as getTriliumDataDirType, getDataDirs as getDataDirsType, getPlatformAppDataDir as getPlatformAppDataDirType } from "./data_dir.js";
|
||||
import type { getDataDirs as getDataDirsType, getPlatformAppDataDir as getPlatformAppDataDirType,getTriliumDataDir as getTriliumDataDirType } from "./data_dir.js";
|
||||
|
||||
describe("data_dir.ts unit tests", async () => {
|
||||
let getTriliumDataDir: typeof getTriliumDataDirType;
|
||||
@@ -277,7 +277,7 @@ describe("data_dir.ts unit tests", async () => {
|
||||
});
|
||||
|
||||
describe("#getDataDirs()", () => {
|
||||
const envKeys: Omit<keyof ReturnType<typeof getDataDirs>, "TRILIUM_DATA_DIR">[] = [ "DOCUMENT_PATH", "BACKUP_DIR", "LOG_DIR", "ANONYMIZED_DB_DIR", "CONFIG_INI_PATH", "TMP_DIR" ];
|
||||
const envKeys: Omit<keyof ReturnType<typeof getDataDirs>, "TRILIUM_DATA_DIR">[] = [ "DOCUMENT_PATH", "BACKUP_DIR", "LOG_DIR", "ANONYMIZED_DB_DIR", "CONFIG_INI_PATH", "TMP_DIR", "OCR_CACHE_DIR" ];
|
||||
|
||||
const setMockedEnv = (prefix: string | null) => {
|
||||
envKeys.forEach((key) => {
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
"use strict";
|
||||
|
||||
/*
|
||||
* This file resolves trilium data path in this order of priority:
|
||||
* - case A) if TRILIUM_DATA_DIR environment variable exists, then its value is used as the path
|
||||
@@ -8,8 +6,8 @@
|
||||
* - case D) as a fallback if the previous step fails, we'll use home dir
|
||||
*/
|
||||
|
||||
import os from "os";
|
||||
import fs from "fs";
|
||||
import os from "os";
|
||||
import { join as pathJoin } from "path";
|
||||
|
||||
const DIR_NAME = "trilium-data";
|
||||
@@ -43,13 +41,14 @@ export function getTriliumDataDir(dataDirName: string) {
|
||||
|
||||
export function getDataDirs(TRILIUM_DATA_DIR: string) {
|
||||
const dataDirs = {
|
||||
TRILIUM_DATA_DIR: TRILIUM_DATA_DIR,
|
||||
TRILIUM_DATA_DIR,
|
||||
DOCUMENT_PATH: process.env.TRILIUM_DOCUMENT_PATH || pathJoin(TRILIUM_DATA_DIR, "document.db"),
|
||||
BACKUP_DIR: process.env.TRILIUM_BACKUP_DIR || pathJoin(TRILIUM_DATA_DIR, "backup"),
|
||||
LOG_DIR: process.env.TRILIUM_LOG_DIR || pathJoin(TRILIUM_DATA_DIR, "log"),
|
||||
TMP_DIR: process.env.TRILIUM_TMP_DIR || pathJoin(TRILIUM_DATA_DIR, "tmp"),
|
||||
ANONYMIZED_DB_DIR: process.env.TRILIUM_ANONYMIZED_DB_DIR || pathJoin(TRILIUM_DATA_DIR, "anonymized-db"),
|
||||
CONFIG_INI_PATH: process.env.TRILIUM_CONFIG_INI_PATH || pathJoin(TRILIUM_DATA_DIR, "config.ini")
|
||||
CONFIG_INI_PATH: process.env.TRILIUM_CONFIG_INI_PATH || pathJoin(TRILIUM_DATA_DIR, "config.ini"),
|
||||
OCR_CACHE_DIR: process.env.TRILIUM_OCR_CACHE_DIR || pathJoin(TRILIUM_DATA_DIR, "ocr-cache")
|
||||
} as const;
|
||||
|
||||
createDirIfNotExisting(dataDirs.TMP_DIR);
|
||||
|
||||
@@ -146,7 +146,7 @@ function fillEntityChanges(entityName: string, entityPrimaryKey: string, conditi
|
||||
};
|
||||
|
||||
if (entityName === "blobs") {
|
||||
const blob = sql.getRow<Blob>("SELECT blobId, content, utcDateModified FROM blobs WHERE blobId = ?", [entityId]);
|
||||
const blob = sql.getRow<Blob>("SELECT blobId, content, textRepresentation, utcDateModified FROM blobs WHERE blobId = ?", [entityId]);
|
||||
ec.hash = blobService.calculateContentHash(blob);
|
||||
ec.utcDateChanged = blob.utcDateModified;
|
||||
ec.isSynced = true; // blobs are always synced
|
||||
|
||||
@@ -6,6 +6,9 @@ import becca from "../becca/becca.js";
|
||||
import BAttribute from "../becca/entities/battribute.js";
|
||||
import hiddenSubtreeService from "./hidden_subtree.js";
|
||||
import oneTimeTimer from "./one_time_timer.js";
|
||||
import ocrService from "./ocr/ocr_service.js";
|
||||
import optionService from "./options.js";
|
||||
import log from "./log.js";
|
||||
import type BNote from "../becca/entities/bnote.js";
|
||||
import type AbstractBeccaEntity from "../becca/entities/abstract_becca_entity.js";
|
||||
import type { DefinitionObject } from "./promoted_attribute_definition_interface.js";
|
||||
@@ -137,9 +140,35 @@ eventService.subscribe(eventService.ENTITY_CREATED, ({ entityName, entity }) =>
|
||||
}
|
||||
} else if (entityName === "notes") {
|
||||
runAttachedRelations(entity, "runOnNoteCreation", entity);
|
||||
|
||||
// Note: OCR processing for images is now handled in image.ts during image processing
|
||||
// OCR processing for files remains here since they don't go through image processing
|
||||
if (entity.type === 'file' && optionService.getOptionBool("ocrAutoProcessImages")) {
|
||||
autoProcessOCR(entity.mime, () => ocrService.processNoteOCR(entity.noteId), `file note ${entity.noteId}`);
|
||||
}
|
||||
} else if (entityName === "attachments") {
|
||||
// Image attachments are handled in image.ts after async image processing sets the real MIME type.
|
||||
// Only handle non-image (file) attachments here.
|
||||
if (entity.role === "file" && optionService.getOptionBool("ocrAutoProcessImages")) {
|
||||
autoProcessOCR(entity.mime, () => ocrService.processAttachmentOCR(entity.attachmentId), `attachment ${entity.attachmentId}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
function autoProcessOCR(mime: string, process: () => Promise<unknown>, entityDescription: string) {
|
||||
const supportedMimeTypes = ocrService.getAllSupportedMimeTypes();
|
||||
|
||||
if (mime && supportedMimeTypes.includes(mime)) {
|
||||
process().then(result => {
|
||||
if (result) {
|
||||
log.info(`Automatically processed OCR for ${entityDescription} with MIME type ${mime}`);
|
||||
}
|
||||
}).catch(error => {
|
||||
log.error(`Failed to automatically process OCR for ${entityDescription}: ${error}`);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
eventService.subscribe(eventService.CHILD_NOTE_CREATED, ({ parentNote, childNote }) => {
|
||||
runAttachedRelations(parentNote, "runOnChildNoteCreation", childNote);
|
||||
});
|
||||
|
||||
@@ -246,7 +246,8 @@ function buildHiddenSubtreeDefinition(helpSubtree: HiddenSubtreeItem[]): HiddenS
|
||||
{ id: "_optionsShortcuts", title: t("hidden-subtree.shortcuts-title"), type: "contentWidget", icon: "bxs-keyboard" },
|
||||
{ id: "_optionsTextNotes", title: t("hidden-subtree.text-notes"), type: "contentWidget", icon: "bx-text" },
|
||||
{ id: "_optionsCodeNotes", title: t("hidden-subtree.code-notes-title"), type: "contentWidget", icon: "bx-code" },
|
||||
{ id: "_optionsImages", title: t("hidden-subtree.images-title"), type: "contentWidget", icon: "bx-image" },
|
||||
{ id: "_optionsImages", title: "Images", type: "contentWidget", enforceDeleted: true },
|
||||
{ id: "_optionsMedia", title: t("hidden-subtree.images-title"), type: "contentWidget", icon: "bx-image" },
|
||||
{ id: "_optionsSpellcheck", title: t("hidden-subtree.spellcheck-title"), type: "contentWidget", icon: "bx-check-double" },
|
||||
{ id: "_optionsPassword", title: t("hidden-subtree.password-title"), type: "contentWidget", icon: "bx-lock" },
|
||||
{ id: '_optionsMFA', title: t('hidden-subtree.multi-factor-authentication-title'), type: 'contentWidget', icon: 'bx-lock ' },
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
"use strict";
|
||||
import imageType from "image-type";
|
||||
import isAnimated from "is-animated";
|
||||
import isSvg from "is-svg";
|
||||
import { Jimp } from "jimp";
|
||||
import sanitizeFilename from "sanitize-filename";
|
||||
|
||||
import becca from "../becca/becca.js";
|
||||
import log from "./log.js";
|
||||
import protectedSessionService from "./protected_session.js";
|
||||
import noteService from "./notes.js";
|
||||
import optionService from "./options.js";
|
||||
import sql from "./sql.js";
|
||||
import { Jimp } from "jimp";
|
||||
import imageType from "image-type";
|
||||
import sanitizeFilename from "sanitize-filename";
|
||||
import isSvg from "is-svg";
|
||||
import isAnimated from "is-animated";
|
||||
import htmlSanitizer from "./html_sanitizer.js";
|
||||
import log from "./log.js";
|
||||
import noteService from "./notes.js";
|
||||
import ocrService from "./ocr/ocr_service.js";
|
||||
import optionService from "./options.js";
|
||||
import protectedSessionService from "./protected_session.js";
|
||||
import sql from "./sql.js";
|
||||
|
||||
async function processImage(uploadBuffer: Buffer, originalName: string, shrinkImageSwitch: boolean) {
|
||||
const compressImages = optionService.getOptionBool("compressImages");
|
||||
@@ -46,9 +46,8 @@ async function processImage(uploadBuffer: Buffer, originalName: string, shrinkIm
|
||||
async function getImageType(buffer: Buffer) {
|
||||
if (isSvg(buffer.toString())) {
|
||||
return { ext: "svg" };
|
||||
} else {
|
||||
return (await imageType(buffer)) || { ext: "jpg" }; // optimistic JPG default
|
||||
}
|
||||
return (await imageType(buffer)) || { ext: "jpg" }; // optimistic JPG default
|
||||
}
|
||||
|
||||
function getImageMimeFromExtension(ext: string) {
|
||||
@@ -79,6 +78,8 @@ function updateImage(noteId: string, uploadBuffer: Buffer, originalName: string)
|
||||
|
||||
note.setContent(buffer);
|
||||
});
|
||||
|
||||
scheduleOcrForNote(noteId);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -121,6 +122,8 @@ function saveImage(parentNoteId: string, uploadBuffer: Buffer, originalName: str
|
||||
|
||||
note.setContent(buffer, { forceSave: true });
|
||||
});
|
||||
|
||||
scheduleOcrForNote(note.noteId);
|
||||
});
|
||||
|
||||
return {
|
||||
@@ -159,13 +162,14 @@ function saveImageToAttachment(noteId: string, uploadBuffer: Buffer, originalNam
|
||||
}, 5000);
|
||||
|
||||
// resizing images asynchronously since JIMP does not support sync operation
|
||||
const attachmentId = attachment.attachmentId;
|
||||
processImage(uploadBuffer, originalName, !!shrinkImageSwitch).then(({ buffer, imageFormat }) => {
|
||||
sql.transactional(() => {
|
||||
// re-read, might be changed in the meantime
|
||||
if (!attachment.attachmentId) {
|
||||
if (!attachmentId) {
|
||||
throw new Error("Missing attachment ID.");
|
||||
}
|
||||
attachment = becca.getAttachmentOrThrow(attachment.attachmentId);
|
||||
attachment = becca.getAttachmentOrThrow(attachmentId);
|
||||
|
||||
attachment.mime = getImageMimeFromExtension(imageFormat.ext);
|
||||
|
||||
@@ -176,11 +180,37 @@ function saveImageToAttachment(noteId: string, uploadBuffer: Buffer, originalNam
|
||||
|
||||
attachment.setContent(buffer, { forceSave: true });
|
||||
});
|
||||
|
||||
scheduleOcrForAttachment(attachmentId);
|
||||
});
|
||||
|
||||
return attachment;
|
||||
}
|
||||
|
||||
function scheduleOcrForNote(noteId: string) {
|
||||
if (optionService.getOptionBool("ocrAutoProcessImages")) {
|
||||
setImmediate(async () => {
|
||||
try {
|
||||
await ocrService.processNoteOCR(noteId);
|
||||
} catch (error) {
|
||||
log.error(`Failed to process OCR for note ${noteId}: ${error}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function scheduleOcrForAttachment(attachmentId: string | undefined) {
|
||||
if (attachmentId && optionService.getOptionBool("ocrAutoProcessImages")) {
|
||||
setImmediate(async () => {
|
||||
try {
|
||||
await ocrService.processAttachmentOCR(attachmentId);
|
||||
} catch (error) {
|
||||
log.error(`Failed to process OCR for attachment ${attachmentId}: ${error}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function shrinkImage(buffer: Buffer, originalName: string) {
|
||||
let jpegQuality = optionService.getOptionInt("imageJpegQuality", 0);
|
||||
|
||||
|
||||
@@ -16,9 +16,14 @@ import searchService from "../../search/services/search.js";
|
||||
* Convert note content to a format suitable for LLM consumption.
|
||||
* Text notes are converted from HTML to Markdown to reduce token usage.
|
||||
*/
|
||||
export function getNoteContentForLlm(note: { type: string; getContent: () => string | Buffer }) {
|
||||
export function getNoteContentForLlm(note: { type: string; blobId?: string; getContent: () => string | Buffer }) {
|
||||
const content = note.getContent();
|
||||
if (typeof content !== "string") {
|
||||
// For binary content (images, files), use extracted text if available.
|
||||
const blob = note.blobId ? becca.getBlob({ blobId: note.blobId }) : null;
|
||||
if (blob?.textRepresentation) {
|
||||
return `[extracted text from ${note.type}]\n${blob.textRepresentation}`;
|
||||
}
|
||||
return "[binary content]";
|
||||
}
|
||||
if (note.type === "text") {
|
||||
|
||||
450
apps/server/src/services/ocr/ocr_service.spec.ts
Normal file
450
apps/server/src/services/ocr/ocr_service.spec.ts
Normal file
@@ -0,0 +1,450 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
// Mock Tesseract.js
|
||||
const mockWorker = {
|
||||
recognize: vi.fn(),
|
||||
terminate: vi.fn(),
|
||||
reinitialize: vi.fn()
|
||||
};
|
||||
|
||||
const mockTesseract = {
|
||||
createWorker: vi.fn().mockResolvedValue(mockWorker)
|
||||
};
|
||||
|
||||
vi.mock('tesseract.js', () => ({
|
||||
default: mockTesseract
|
||||
}));
|
||||
|
||||
// Mock dependencies
|
||||
const mockOptions = {
|
||||
getOptionBool: vi.fn(),
|
||||
getOption: vi.fn()
|
||||
};
|
||||
|
||||
const mockLog = {
|
||||
info: vi.fn(),
|
||||
error: vi.fn()
|
||||
};
|
||||
|
||||
const mockSql = {
|
||||
execute: vi.fn(),
|
||||
getRow: vi.fn(),
|
||||
getRows: vi.fn(),
|
||||
getColumn: vi.fn()
|
||||
};
|
||||
|
||||
const mockBecca = {
|
||||
getNote: vi.fn(),
|
||||
getAttachment: vi.fn(),
|
||||
getBlob: vi.fn()
|
||||
};
|
||||
|
||||
const mockBlobService = {
|
||||
calculateContentHash: vi.fn().mockReturnValue('hash123')
|
||||
};
|
||||
|
||||
const mockEntityChangesService = {
|
||||
putEntityChange: vi.fn()
|
||||
};
|
||||
|
||||
vi.mock('../options.js', () => ({
|
||||
default: mockOptions
|
||||
}));
|
||||
|
||||
vi.mock('../log.js', () => ({
|
||||
default: mockLog
|
||||
}));
|
||||
|
||||
vi.mock('../sql.js', () => ({
|
||||
default: mockSql
|
||||
}));
|
||||
|
||||
vi.mock('../../becca/becca.js', () => ({
|
||||
default: mockBecca
|
||||
}));
|
||||
|
||||
vi.mock('../blob.js', () => ({
|
||||
default: mockBlobService
|
||||
}));
|
||||
|
||||
vi.mock('../entity_changes.js', () => ({
|
||||
default: mockEntityChangesService
|
||||
}));
|
||||
|
||||
// Import the service after mocking
|
||||
let ocrService: typeof import('./ocr_service.js').default;
|
||||
|
||||
beforeEach(async () => {
|
||||
vi.clearAllMocks();
|
||||
|
||||
// Reset mock implementations
|
||||
mockOptions.getOptionBool.mockReturnValue(true);
|
||||
mockOptions.getOption.mockImplementation((name: string) => {
|
||||
if (name === 'ocrMinConfidence') return '0';
|
||||
return 'eng';
|
||||
});
|
||||
mockSql.execute.mockImplementation(() => ({ lastInsertRowid: 1 }));
|
||||
mockSql.getRow.mockReturnValue(null);
|
||||
mockSql.getRows.mockReturnValue([]);
|
||||
mockSql.getColumn.mockReturnValue([]);
|
||||
|
||||
// Mock getBlob for putBlobEntityChange
|
||||
mockBecca.getBlob.mockReturnValue({
|
||||
blobId: 'blob123',
|
||||
content: Buffer.from('data'),
|
||||
textRepresentation: null,
|
||||
utcDateModified: '2025-01-01'
|
||||
});
|
||||
|
||||
mockTesseract.createWorker.mockImplementation(async () => {
|
||||
return mockWorker;
|
||||
});
|
||||
|
||||
// Dynamically import the service to ensure mocks are applied
|
||||
const module = await import('./ocr_service.js');
|
||||
ocrService = module.default;
|
||||
|
||||
// Reset the OCR service state
|
||||
(ocrService as any).batchProcessingState = {
|
||||
inProgress: false,
|
||||
total: 0,
|
||||
processed: 0
|
||||
};
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
describe('OCRService', () => {
|
||||
describe('extractTextFromFile', () => {
|
||||
const mockImageBuffer = Buffer.from('fake-image-data');
|
||||
|
||||
it('should extract text successfully with default options', async () => {
|
||||
const mockResult = {
|
||||
data: {
|
||||
text: 'Extracted text from image',
|
||||
confidence: 95,
|
||||
words: [{ text: 'Extracted', confidence: 95 }, { text: 'text', confidence: 95 }, { text: 'from', confidence: 95 }, { text: 'image', confidence: 95 }]
|
||||
}
|
||||
};
|
||||
mockWorker.recognize.mockResolvedValue(mockResult);
|
||||
|
||||
const result = await ocrService.extractTextFromFile(mockImageBuffer, 'image/jpeg');
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result.text).toBe('Extracted text from image');
|
||||
expect(result.extractedAt).toEqual(expect.any(String));
|
||||
});
|
||||
|
||||
it('should handle OCR recognition errors', async () => {
|
||||
const error = new Error('OCR recognition failed');
|
||||
mockWorker.recognize.mockRejectedValue(error);
|
||||
|
||||
await expect(ocrService.extractTextFromFile(mockImageBuffer, 'image/jpeg')).rejects.toThrow('OCR recognition failed');
|
||||
expect(mockLog.error).toHaveBeenCalledWith('Image OCR text extraction failed: Error: OCR recognition failed');
|
||||
});
|
||||
});
|
||||
|
||||
describe('storeOCRResult', () => {
|
||||
it('should store OCR result in blob successfully', () => {
|
||||
const ocrResult = {
|
||||
text: 'Sample text',
|
||||
confidence: 0.95,
|
||||
extractedAt: '2025-06-10T10:00:00.000Z',
|
||||
language: 'eng'
|
||||
};
|
||||
|
||||
ocrService.storeOCRResult('blob123', ocrResult);
|
||||
|
||||
expect(mockSql.execute).toHaveBeenCalledWith(
|
||||
expect.stringContaining('UPDATE blobs SET textRepresentation = ?'),
|
||||
['Sample text', 'blob123']
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle undefined blobId gracefully', () => {
|
||||
const ocrResult = {
|
||||
text: 'Sample text',
|
||||
confidence: 0.95,
|
||||
extractedAt: '2025-06-10T10:00:00.000Z',
|
||||
language: 'eng'
|
||||
};
|
||||
|
||||
ocrService.storeOCRResult(undefined, ocrResult);
|
||||
|
||||
expect(mockSql.execute).not.toHaveBeenCalled();
|
||||
expect(mockLog.error).toHaveBeenCalledWith('Cannot store OCR result: blobId is undefined');
|
||||
});
|
||||
|
||||
it('should handle database update errors', () => {
|
||||
const error = new Error('Database error');
|
||||
mockSql.execute.mockImplementation(() => {
|
||||
throw error;
|
||||
});
|
||||
|
||||
const ocrResult = {
|
||||
text: 'Sample text',
|
||||
confidence: 0.95,
|
||||
extractedAt: '2025-06-10T10:00:00.000Z',
|
||||
language: 'eng'
|
||||
};
|
||||
|
||||
expect(() => ocrService.storeOCRResult('blob123', ocrResult)).toThrow('Database error');
|
||||
expect(mockLog.error).toHaveBeenCalledWith('Failed to store OCR result for blob blob123: Error: Database error');
|
||||
});
|
||||
});
|
||||
|
||||
describe('processNoteOCR', () => {
|
||||
const mockNote = {
|
||||
noteId: 'note123',
|
||||
type: 'image',
|
||||
mime: 'image/jpeg',
|
||||
blobId: 'blob123',
|
||||
getContent: vi.fn(),
|
||||
getLabelValue: vi.fn().mockReturnValue(null)
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
mockBecca.getNote.mockReturnValue(mockNote);
|
||||
mockNote.getContent.mockReturnValue(Buffer.from('fake-image-data'));
|
||||
mockNote.mime = 'image/jpeg';
|
||||
});
|
||||
|
||||
it('should process note OCR successfully', async () => {
|
||||
mockSql.getRow.mockReturnValue(null);
|
||||
|
||||
const mockOCRResult = {
|
||||
data: {
|
||||
text: 'Note image text',
|
||||
confidence: 90,
|
||||
words: [{ text: 'Note', confidence: 90 }, { text: 'image', confidence: 90 }, { text: 'text', confidence: 90 }]
|
||||
}
|
||||
};
|
||||
mockWorker.recognize.mockResolvedValue(mockOCRResult);
|
||||
|
||||
const result = await ocrService.processNoteOCR('note123');
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result!.text).toBe('Note image text');
|
||||
expect(mockBecca.getNote).toHaveBeenCalledWith('note123');
|
||||
expect(mockNote.getContent).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should skip processing if OCR already exists and forceReprocess is false', async () => {
|
||||
mockSql.getRow.mockReturnValue({ textRepresentation: 'Existing text' });
|
||||
|
||||
const result = await ocrService.processNoteOCR('note123');
|
||||
|
||||
expect(result).toBeNull();
|
||||
expect(mockNote.getContent).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should reprocess if forceReprocess is true', async () => {
|
||||
mockSql.getRow.mockReturnValue({ textRepresentation: 'Existing text' });
|
||||
|
||||
const mockOCRResult = {
|
||||
data: {
|
||||
text: 'New processed text',
|
||||
confidence: 95,
|
||||
words: [{ text: 'New', confidence: 95 }, { text: 'processed', confidence: 95 }, { text: 'text', confidence: 95 }]
|
||||
}
|
||||
};
|
||||
mockWorker.recognize.mockResolvedValue(mockOCRResult);
|
||||
|
||||
const result = await ocrService.processNoteOCR('note123', { forceReprocess: true });
|
||||
|
||||
expect(result?.text).toBe('New processed text');
|
||||
expect(mockNote.getContent).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should return null for non-existent note', async () => {
|
||||
mockBecca.getNote.mockReturnValue(null);
|
||||
|
||||
const result = await ocrService.processNoteOCR('nonexistent');
|
||||
|
||||
expect(result).toBe(null);
|
||||
expect(mockLog.error).toHaveBeenCalledWith('Note nonexistent not found');
|
||||
});
|
||||
|
||||
it('should return null for unsupported MIME type', async () => {
|
||||
mockNote.mime = 'text/plain';
|
||||
|
||||
const result = await ocrService.processNoteOCR('note123');
|
||||
|
||||
expect(result).toBe(null);
|
||||
expect(mockLog.info).toHaveBeenCalledWith('note note123 has unsupported MIME type text/plain for text extraction, skipping');
|
||||
});
|
||||
});
|
||||
|
||||
describe('processAttachmentOCR', () => {
|
||||
const mockAttachment = {
|
||||
attachmentId: 'attach123',
|
||||
ownerId: 'note123',
|
||||
role: 'image',
|
||||
mime: 'image/png',
|
||||
blobId: 'blob456',
|
||||
getContent: vi.fn()
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
mockBecca.getAttachment.mockReturnValue(mockAttachment);
|
||||
mockBecca.getNote.mockReturnValue({ getLabelValue: vi.fn().mockReturnValue(null) });
|
||||
mockAttachment.getContent.mockReturnValue(Buffer.from('fake-image-data'));
|
||||
});
|
||||
|
||||
it('should process attachment OCR successfully', async () => {
|
||||
mockSql.getRow.mockReturnValue(null);
|
||||
|
||||
const mockOCRResult = {
|
||||
data: {
|
||||
text: 'Attachment image text',
|
||||
confidence: 92,
|
||||
words: [{ text: 'Attachment', confidence: 92 }, { text: 'image', confidence: 92 }, { text: 'text', confidence: 92 }]
|
||||
}
|
||||
};
|
||||
mockWorker.recognize.mockResolvedValue(mockOCRResult);
|
||||
|
||||
const result = await ocrService.processAttachmentOCR('attach123');
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result!.text).toBe('Attachment image text');
|
||||
expect(mockBecca.getAttachment).toHaveBeenCalledWith('attach123');
|
||||
});
|
||||
|
||||
it('should return null for non-existent attachment', async () => {
|
||||
mockBecca.getAttachment.mockReturnValue(null);
|
||||
|
||||
const result = await ocrService.processAttachmentOCR('nonexistent');
|
||||
|
||||
expect(result).toBe(null);
|
||||
expect(mockLog.error).toHaveBeenCalledWith('Attachment nonexistent not found');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Batch Processing', () => {
|
||||
// Helper to mock getBlobsNeedingOCR to return entities
|
||||
function mockBlobsNeedingOCR(notes: Array<{ entityId: string; mimeType: string }>, attachments: Array<{ entityId: string; mimeType: string }> = []) {
|
||||
const noteRows = notes.map(n => ({ blobId: `blob_${n.entityId}`, mimeType: n.mimeType, entityId: n.entityId }));
|
||||
const attachmentRows = attachments.map(a => ({ blobId: `blob_${a.entityId}`, mimeType: a.mimeType, entityId: a.entityId }));
|
||||
mockSql.getRows.mockReturnValueOnce(noteRows);
|
||||
mockSql.getRows.mockReturnValueOnce(attachmentRows);
|
||||
}
|
||||
|
||||
describe('startBatchProcessing', () => {
|
||||
beforeEach(() => {
|
||||
ocrService.cancelBatchProcessing();
|
||||
});
|
||||
|
||||
it('should start batch processing when items are available', async () => {
|
||||
mockBlobsNeedingOCR(
|
||||
[{ entityId: 'note1', mimeType: 'image/jpeg' }]
|
||||
);
|
||||
|
||||
const result = await ocrService.startBatchProcessing();
|
||||
|
||||
expect(result).toEqual({ success: true });
|
||||
});
|
||||
|
||||
it('should return error if batch processing already in progress', async () => {
|
||||
// First call: items for starting
|
||||
mockBlobsNeedingOCR(
|
||||
[{ entityId: 'note1', mimeType: 'image/jpeg' }]
|
||||
);
|
||||
// Mock note for background processing
|
||||
mockBecca.getNote.mockReturnValue({
|
||||
noteId: 'note1', type: 'image', mime: 'image/jpeg', blobId: 'blob1',
|
||||
getContent: vi.fn().mockReturnValue(Buffer.from('data')),
|
||||
getLabelValue: vi.fn().mockReturnValue(null)
|
||||
});
|
||||
mockWorker.recognize.mockResolvedValue({ data: { text: 'text', confidence: 90, words: [] } });
|
||||
|
||||
ocrService.startBatchProcessing();
|
||||
|
||||
const result = await ocrService.startBatchProcessing();
|
||||
|
||||
expect(result).toEqual({
|
||||
success: false,
|
||||
message: 'Batch processing already in progress'
|
||||
});
|
||||
});
|
||||
|
||||
it('should return error if no items need processing', async () => {
|
||||
mockBlobsNeedingOCR([], []);
|
||||
|
||||
const result = await ocrService.startBatchProcessing();
|
||||
|
||||
expect(result).toEqual({
|
||||
success: false,
|
||||
message: 'No images found that need OCR processing'
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle database errors gracefully', async () => {
|
||||
mockSql.getRows.mockImplementation(() => {
|
||||
throw new Error('Database connection failed');
|
||||
});
|
||||
|
||||
const result = await ocrService.startBatchProcessing();
|
||||
|
||||
// getBlobsNeedingOCR catches DB errors and returns [], so startBatchProcessing sees no items
|
||||
expect(result).toEqual({
|
||||
success: false,
|
||||
message: 'No images found that need OCR processing'
|
||||
});
|
||||
expect(mockLog.error).toHaveBeenCalledWith(
|
||||
expect.stringContaining('Failed to get blobs needing OCR')
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getBatchProgress', () => {
|
||||
it('should return initial progress state', () => {
|
||||
const progress = ocrService.getBatchProgress();
|
||||
|
||||
expect(progress.inProgress).toBe(false);
|
||||
expect(progress.total).toBe(0);
|
||||
expect(progress.processed).toBe(0);
|
||||
});
|
||||
|
||||
it('should return progress with percentage when total > 0', async () => {
|
||||
mockBlobsNeedingOCR(
|
||||
Array.from({ length: 10 }, (_, i) => ({ entityId: `note${i}`, mimeType: 'image/jpeg' }))
|
||||
);
|
||||
|
||||
ocrService.startBatchProcessing();
|
||||
|
||||
const progress = ocrService.getBatchProgress();
|
||||
|
||||
expect(progress.inProgress).toBe(true);
|
||||
expect(progress.total).toBe(10);
|
||||
expect(progress.processed).toBe(0);
|
||||
expect(progress.percentage).toBe(0);
|
||||
expect(progress.startTime).toBeInstanceOf(Date);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cancelBatchProcessing', () => {
|
||||
it('should cancel ongoing batch processing', async () => {
|
||||
mockBlobsNeedingOCR(
|
||||
[{ entityId: 'note1', mimeType: 'image/jpeg' }]
|
||||
);
|
||||
|
||||
ocrService.startBatchProcessing();
|
||||
|
||||
expect(ocrService.getBatchProgress().inProgress).toBe(true);
|
||||
|
||||
ocrService.cancelBatchProcessing();
|
||||
|
||||
expect(ocrService.getBatchProgress().inProgress).toBe(false);
|
||||
expect(mockLog.info).toHaveBeenCalledWith('Batch OCR processing cancelled');
|
||||
});
|
||||
|
||||
it('should do nothing if no batch processing is running', () => {
|
||||
ocrService.cancelBatchProcessing();
|
||||
|
||||
expect(mockLog.info).not.toHaveBeenCalledWith('Batch OCR processing cancelled');
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
462
apps/server/src/services/ocr/ocr_service.ts
Normal file
462
apps/server/src/services/ocr/ocr_service.ts
Normal file
@@ -0,0 +1,462 @@
|
||||
import { getTesseractCode } from '@triliumnext/commons';
|
||||
|
||||
import becca from '../../becca/becca.js';
|
||||
import blobService from '../blob.js';
|
||||
import entityChangesService from '../entity_changes.js';
|
||||
import log from '../log.js';
|
||||
import options from '../options.js';
|
||||
import sql from '../sql.js';
|
||||
import { FileProcessor } from './processors/file_processor.js';
|
||||
import { ImageProcessor } from './processors/image_processor.js';
|
||||
import { OfficeProcessor } from './processors/office_processor.js';
|
||||
import { PDFProcessor } from './processors/pdf_processor.js';
|
||||
|
||||
export interface OCRResult {
|
||||
text: string;
|
||||
confidence: number;
|
||||
extractedAt: string;
|
||||
language?: string;
|
||||
pageCount?: number;
|
||||
}
|
||||
|
||||
export interface OCRProcessingOptions {
|
||||
language?: string;
|
||||
forceReprocess?: boolean;
|
||||
confidence?: number;
|
||||
enablePDFTextExtraction?: boolean;
|
||||
mimeType?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* OCR Service for extracting text from images and other OCR-able objects
|
||||
* Uses Tesseract.js for text recognition
|
||||
*/
|
||||
class OCRService {
|
||||
private processors: Map<string, FileProcessor> = new Map();
|
||||
|
||||
constructor() {
|
||||
this.processors.set('image', new ImageProcessor());
|
||||
this.processors.set('pdf', new PDFProcessor());
|
||||
this.processors.set('office', new OfficeProcessor());
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the Tesseract language code(s) for OCR processing.
|
||||
*
|
||||
* Priority:
|
||||
* 1. Explicitly passed `language` option (e.g. from API call)
|
||||
* 2. The note's `language` label (mapped via {@link getTesseractCode})
|
||||
* 3. All enabled content languages joined with `+`
|
||||
* 4. The UI locale
|
||||
* 5. Fallback to `eng`
|
||||
*/
|
||||
resolveOcrLanguage(noteId?: string, explicitLanguage?: string): string {
|
||||
// 1. Explicit language from caller
|
||||
if (explicitLanguage) {
|
||||
return explicitLanguage;
|
||||
}
|
||||
|
||||
// 2. Note's language label
|
||||
if (noteId) {
|
||||
const note = becca.getNote(noteId);
|
||||
const noteLanguage = note?.getLabelValue("language");
|
||||
if (noteLanguage) {
|
||||
const code = getTesseractCode(noteLanguage);
|
||||
if (code) {
|
||||
return code;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. All enabled content languages
|
||||
try {
|
||||
const languagesJson = options.getOption("languages");
|
||||
const enabledLanguages = JSON.parse(languagesJson || "[]") as string[];
|
||||
if (enabledLanguages.length > 0) {
|
||||
const codes = enabledLanguages
|
||||
.map((id) => getTesseractCode(id))
|
||||
.filter((code): code is string => code !== null);
|
||||
// Deduplicate (e.g. en + en-GB both map to eng)
|
||||
const unique = [...new Set(codes)];
|
||||
if (unique.length > 0) {
|
||||
return unique.join("+");
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Fall through
|
||||
}
|
||||
|
||||
// 4. UI locale
|
||||
try {
|
||||
const uiLocale = options.getOption("locale");
|
||||
if (uiLocale) {
|
||||
const code = getTesseractCode(uiLocale);
|
||||
if (code) {
|
||||
return code;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Fall through
|
||||
}
|
||||
|
||||
// 5. Fallback
|
||||
return "eng";
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Extract text from file buffer using appropriate processor
|
||||
*/
|
||||
async extractTextFromFile(fileBuffer: Buffer, mimeType: string, options: OCRProcessingOptions = {}): Promise<OCRResult> {
|
||||
log.info(`Starting OCR text extraction for MIME type: ${mimeType} with language: ${options.language || "eng"}`);
|
||||
|
||||
const processor = this.getProcessorForMimeType(mimeType);
|
||||
if (!processor) {
|
||||
throw new Error(`No processor found for MIME type: ${mimeType}`);
|
||||
}
|
||||
|
||||
const result = await processor.extractText(fileBuffer, { ...options, mimeType });
|
||||
|
||||
log.info(`OCR extraction completed. Confidence: ${Math.round(result.confidence * 100)}%, Text length: ${result.text.length}`);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process OCR for a note (image type)
|
||||
*/
|
||||
async processNoteOCR(noteId: string, options: OCRProcessingOptions = {}): Promise<OCRResult | null> {
|
||||
const note = becca.getNote(noteId);
|
||||
if (!note) {
|
||||
log.error(`Note ${noteId} not found`);
|
||||
return null;
|
||||
}
|
||||
|
||||
return this.processEntityOCR({
|
||||
entityId: noteId,
|
||||
entityType: 'note',
|
||||
category: note.type,
|
||||
mime: note.mime,
|
||||
blobId: note.blobId,
|
||||
languageNoteId: noteId,
|
||||
getContent: () => note.getContent()
|
||||
}, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process OCR for an attachment
|
||||
*/
|
||||
async processAttachmentOCR(attachmentId: string, options: OCRProcessingOptions = {}): Promise<OCRResult | null> {
|
||||
const attachment = becca.getAttachment(attachmentId);
|
||||
if (!attachment) {
|
||||
log.error(`Attachment ${attachmentId} not found`);
|
||||
return null;
|
||||
}
|
||||
|
||||
return this.processEntityOCR({
|
||||
entityId: attachmentId,
|
||||
entityType: 'attachment',
|
||||
category: attachment.role,
|
||||
mime: attachment.mime,
|
||||
blobId: attachment.blobId,
|
||||
languageNoteId: attachment.ownerId,
|
||||
getContent: () => attachment.getContent()
|
||||
}, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared OCR processing logic for both notes and attachments.
|
||||
*/
|
||||
private async processEntityOCR(entity: {
|
||||
entityId: string;
|
||||
entityType: string;
|
||||
category: string;
|
||||
mime: string;
|
||||
blobId: string | undefined;
|
||||
languageNoteId: string;
|
||||
getContent: () => string | Buffer;
|
||||
}, options: OCRProcessingOptions = {}): Promise<OCRResult | null> {
|
||||
const { entityId, entityType, category, mime, blobId, languageNoteId } = entity;
|
||||
|
||||
if (!['image', 'file'].includes(category)) {
|
||||
log.info(`${entityType} ${entityId} is not an image or file, skipping OCR`);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!this.getProcessorForMimeType(mime)) {
|
||||
log.info(`${entityType} ${entityId} has unsupported MIME type ${mime} for text extraction, skipping`);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!options.forceReprocess && this.hasStoredOCRResult(blobId)) {
|
||||
log.info(`OCR already exists for ${entityType} ${entityId}, skipping`);
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const content = entity.getContent();
|
||||
if (!content || !(content instanceof Buffer)) {
|
||||
throw new Error(`Cannot get content for ${entityType} ${entityId}`);
|
||||
}
|
||||
|
||||
const language = this.resolveOcrLanguage(languageNoteId, options.language);
|
||||
const ocrResult = await this.extractTextFromFile(content, mime, { ...options, language });
|
||||
|
||||
this.storeOCRResult(blobId, ocrResult);
|
||||
|
||||
return ocrResult;
|
||||
} catch (error) {
|
||||
log.error(`Failed to process OCR for ${entityType} ${entityId}: ${error}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Store OCR result in blob
|
||||
*/
|
||||
storeOCRResult(blobId: string | undefined, ocrResult: OCRResult): void {
|
||||
if (!blobId) {
|
||||
log.error('Cannot store OCR result: blobId is undefined');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
sql.execute(`
|
||||
UPDATE blobs SET textRepresentation = ?
|
||||
WHERE blobId = ?
|
||||
`, [ocrResult.text, blobId]);
|
||||
|
||||
this.putBlobEntityChange(blobId);
|
||||
|
||||
log.info(`Stored OCR result for blob ${blobId}`);
|
||||
} catch (error) {
|
||||
log.error(`Failed to store OCR result for blob ${blobId}: ${error}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a blob already has a stored text representation.
|
||||
*/
|
||||
private hasStoredOCRResult(blobId: string | undefined): boolean {
|
||||
if (!blobId) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const row = sql.getRow<{ textRepresentation: string | null }>(
|
||||
`SELECT textRepresentation FROM blobs WHERE blobId = ?`,
|
||||
[blobId]
|
||||
);
|
||||
|
||||
return !!row?.textRepresentation;
|
||||
}
|
||||
|
||||
// Batch processing state
|
||||
private batchProcessingState: {
|
||||
inProgress: boolean;
|
||||
total: number;
|
||||
processed: number;
|
||||
startTime?: Date;
|
||||
} = {
|
||||
inProgress: false,
|
||||
total: 0,
|
||||
processed: 0
|
||||
};
|
||||
|
||||
/**
|
||||
* Start batch OCR processing with progress tracking
|
||||
*/
|
||||
async startBatchProcessing(): Promise<{ success: boolean; message?: string }> {
|
||||
if (this.batchProcessingState.inProgress) {
|
||||
return { success: false, message: 'Batch processing already in progress' };
|
||||
}
|
||||
|
||||
try {
|
||||
const blobsNeedingOCR = this.getBlobsNeedingOCR();
|
||||
|
||||
if (blobsNeedingOCR.length === 0) {
|
||||
return { success: false, message: 'No images found that need OCR processing' };
|
||||
}
|
||||
|
||||
this.batchProcessingState = {
|
||||
inProgress: true,
|
||||
total: blobsNeedingOCR.length,
|
||||
processed: 0,
|
||||
startTime: new Date()
|
||||
};
|
||||
|
||||
// Start processing in background
|
||||
this.processBlobs(blobsNeedingOCR).catch(error => {
|
||||
log.error(`Batch processing failed: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}).finally(() => {
|
||||
this.batchProcessingState.inProgress = false;
|
||||
});
|
||||
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
log.error(`Failed to start batch processing: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return { success: false, message: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get batch processing progress
|
||||
*/
|
||||
getBatchProgress(): { inProgress: boolean; total: number; processed: number; percentage?: number; startTime?: Date } {
|
||||
const result: { inProgress: boolean; total: number; processed: number; percentage?: number; startTime?: Date } = { ...this.batchProcessingState };
|
||||
if (result.total > 0) {
|
||||
result.percentage = (result.processed / result.total) * 100;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel batch processing
|
||||
*/
|
||||
cancelBatchProcessing(): void {
|
||||
if (this.batchProcessingState.inProgress) {
|
||||
this.batchProcessingState.inProgress = false;
|
||||
log.info('Batch OCR processing cancelled');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a list of blobs sequentially, updating batch progress.
|
||||
*/
|
||||
private async processBlobs(blobs: Array<{ entityType: 'note' | 'attachment'; entityId: string }>): Promise<void> {
|
||||
log.info(`Starting batch OCR processing of ${blobs.length} items...`);
|
||||
|
||||
for (const blob of blobs) {
|
||||
if (!this.batchProcessingState.inProgress) {
|
||||
break;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.processOcrEntity(blob);
|
||||
} catch (error) {
|
||||
log.error(`Failed to process OCR for ${blob.entityType} ${blob.entityId}: ${error}`);
|
||||
}
|
||||
|
||||
this.batchProcessingState.processed++;
|
||||
|
||||
// Small delay to prevent overwhelming the system
|
||||
await new Promise(resolve => setTimeout(resolve, 500));
|
||||
}
|
||||
|
||||
log.info(`Batch OCR processing completed. Processed ${this.batchProcessingState.processed} files.`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process OCR for a single entity (note or attachment) by type.
|
||||
*/
|
||||
private async processOcrEntity(entity: { entityType: 'note' | 'attachment'; entityId: string }): Promise<void> {
|
||||
if (entity.entityType === 'note') {
|
||||
await this.processNoteOCR(entity.entityId);
|
||||
} else {
|
||||
await this.processAttachmentOCR(entity.entityId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get processor for a given MIME type
|
||||
*/
|
||||
/**
|
||||
* Notifies the sync system that a blob has changed, without modifying the blob's identity.
|
||||
*/
|
||||
private putBlobEntityChange(blobId: string): void {
|
||||
const blob = becca.getBlob({ blobId });
|
||||
if (!blob || !blob.blobId) return;
|
||||
|
||||
const hash = blobService.calculateContentHash({
|
||||
blobId: blob.blobId,
|
||||
content: blob.content,
|
||||
textRepresentation: blob.textRepresentation,
|
||||
utcDateModified: blob.utcDateModified!
|
||||
});
|
||||
entityChangesService.putEntityChange({
|
||||
entityName: "blobs",
|
||||
entityId: blobId,
|
||||
hash,
|
||||
isErased: false,
|
||||
utcDateChanged: blob.utcDateModified,
|
||||
isSynced: true
|
||||
});
|
||||
}
|
||||
|
||||
private getProcessorForMimeType(mimeType: string): FileProcessor | null {
|
||||
for (const processor of this.processors.values()) {
|
||||
if (processor.canProcess(mimeType)) {
|
||||
return processor;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all MIME types supported by all registered processors
|
||||
*/
|
||||
getAllSupportedMimeTypes(): string[] {
|
||||
const supportedTypes = new Set<string>();
|
||||
|
||||
// Gather MIME types from all registered processors
|
||||
for (const processor of this.processors.values()) {
|
||||
const processorTypes = processor.getSupportedMimeTypes();
|
||||
processorTypes.forEach(type => supportedTypes.add(type));
|
||||
}
|
||||
|
||||
return Array.from(supportedTypes);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get blobs that need OCR processing (those without text representation)
|
||||
*/
|
||||
getBlobsNeedingOCR(): Array<{ blobId: string; mimeType: string; entityType: 'note' | 'attachment'; entityId: string }> {
|
||||
try {
|
||||
const supportedMimes = this.getAllSupportedMimeTypes();
|
||||
const placeholders = supportedMimes.map(() => '?').join(', ');
|
||||
|
||||
const noteBlobs = sql.getRows<{
|
||||
blobId: string;
|
||||
mimeType: string;
|
||||
entityId: string;
|
||||
}>(`
|
||||
SELECT n.blobId, n.mime as mimeType, n.noteId as entityId
|
||||
FROM notes n
|
||||
JOIN blobs b ON n.blobId = b.blobId
|
||||
WHERE (n.type = 'image' OR (n.type = 'file' AND n.mime IN (${placeholders})))
|
||||
AND n.isDeleted = 0
|
||||
AND n.blobId IS NOT NULL
|
||||
AND b.textRepresentation IS NULL
|
||||
`, supportedMimes);
|
||||
|
||||
const attachmentBlobs = sql.getRows<{
|
||||
blobId: string;
|
||||
mimeType: string;
|
||||
entityId: string;
|
||||
}>(`
|
||||
SELECT a.blobId, a.mime as mimeType, a.attachmentId as entityId
|
||||
FROM attachments a
|
||||
JOIN blobs b ON a.blobId = b.blobId
|
||||
WHERE (a.role = 'image' OR (a.role = 'file' AND a.mime IN (${placeholders})))
|
||||
AND a.isDeleted = 0
|
||||
AND a.blobId IS NOT NULL
|
||||
AND b.textRepresentation IS NULL
|
||||
`, supportedMimes);
|
||||
|
||||
// Combine results
|
||||
const result = [
|
||||
...noteBlobs.map(blob => ({ ...blob, entityType: 'note' as const })),
|
||||
...attachmentBlobs.map(blob => ({ ...blob, entityType: 'attachment' as const }))
|
||||
];
|
||||
|
||||
// Return all results (no need to filter by MIME type as we already did in the query)
|
||||
return result;
|
||||
} catch (error) {
|
||||
log.error(`Failed to get blobs needing OCR: ${error}`);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default new OCRService();
|
||||
26
apps/server/src/services/ocr/processors/file_processor.ts
Normal file
26
apps/server/src/services/ocr/processors/file_processor.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import { OCRResult, OCRProcessingOptions } from '../ocr_service.js';
|
||||
|
||||
/**
|
||||
* Base class for file processors that extract text from different file types
|
||||
*/
|
||||
export abstract class FileProcessor {
|
||||
/**
|
||||
* Check if this processor can handle the given MIME type
|
||||
*/
|
||||
abstract canProcess(mimeType: string): boolean;
|
||||
|
||||
/**
|
||||
* Extract text from the given file buffer
|
||||
*/
|
||||
abstract extractText(buffer: Buffer, options: OCRProcessingOptions): Promise<OCRResult>;
|
||||
|
||||
/**
|
||||
* Get the processing type identifier
|
||||
*/
|
||||
abstract getProcessingType(): string;
|
||||
|
||||
/**
|
||||
* Get list of MIME types supported by this processor
|
||||
*/
|
||||
abstract getSupportedMimeTypes(): string[];
|
||||
}
|
||||
160
apps/server/src/services/ocr/processors/image_processor.ts
Normal file
160
apps/server/src/services/ocr/processors/image_processor.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
import fs from 'fs';
|
||||
import Tesseract from 'tesseract.js';
|
||||
|
||||
import dataDirs from '../../data_dir.js';
|
||||
import log from '../../log.js';
|
||||
import options from '../../options.js';
|
||||
import { OCRProcessingOptions,OCRResult } from '../ocr_service.js';
|
||||
import { FileProcessor } from './file_processor.js';
|
||||
|
||||
/**
|
||||
* Image processor for extracting text from image files using Tesseract
|
||||
*/
|
||||
export class ImageProcessor extends FileProcessor {
|
||||
private worker: Tesseract.Worker | null = null;
|
||||
private currentLanguage: string | null = null;
|
||||
private readonly supportedTypes = [
|
||||
'image/jpeg',
|
||||
'image/jpg',
|
||||
'image/png',
|
||||
'image/gif',
|
||||
'image/bmp',
|
||||
'image/tiff',
|
||||
'image/webp'
|
||||
];
|
||||
|
||||
canProcess(mimeType: string): boolean {
|
||||
return this.supportedTypes.includes(mimeType.toLowerCase());
|
||||
}
|
||||
|
||||
getSupportedMimeTypes(): string[] {
|
||||
return [...this.supportedTypes];
|
||||
}
|
||||
|
||||
async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise<OCRResult> {
|
||||
const language = options.language || "eng";
|
||||
await this.ensureWorker(language);
|
||||
|
||||
try {
|
||||
log.info(`Starting image OCR text extraction (language: ${language})...`);
|
||||
|
||||
const result = await this.worker!.recognize(buffer);
|
||||
|
||||
// Filter text based on minimum confidence threshold
|
||||
const { filteredText, overallConfidence } = this.filterTextByConfidence(result.data);
|
||||
|
||||
const ocrResult: OCRResult = {
|
||||
text: filteredText,
|
||||
confidence: overallConfidence,
|
||||
extractedAt: new Date().toISOString(),
|
||||
language,
|
||||
pageCount: 1
|
||||
};
|
||||
|
||||
return ocrResult;
|
||||
|
||||
} catch (error) {
|
||||
log.error(`Image OCR text extraction failed: ${error}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
getProcessingType(): string {
|
||||
return 'image';
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures a Tesseract worker is ready for the given language.
|
||||
* Creates a new worker if none exists or if the language has changed.
|
||||
*/
|
||||
private async ensureWorker(language: string): Promise<void> {
|
||||
if (this.worker && this.currentLanguage === language) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.worker) {
|
||||
await this.worker.terminate();
|
||||
}
|
||||
|
||||
fs.mkdirSync(dataDirs.OCR_CACHE_DIR, { recursive: true });
|
||||
|
||||
log.info(`Initializing Tesseract worker for language(s): ${language}`);
|
||||
this.worker = await Tesseract.createWorker(language, 1, {
|
||||
cachePath: dataDirs.OCR_CACHE_DIR,
|
||||
logger: (m: { status: string; progress: number }) => {
|
||||
if (m.status === 'recognizing text') {
|
||||
log.info(`Image OCR progress (${language}): ${Math.round(m.progress * 100)}%`);
|
||||
}
|
||||
}
|
||||
});
|
||||
this.currentLanguage = language;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Filter text based on minimum confidence threshold
|
||||
*/
|
||||
private filterTextByConfidence(data: any): { filteredText: string; overallConfidence: number } {
|
||||
const minConfidence = this.getMinConfidenceThreshold();
|
||||
|
||||
// If no minimum confidence set, return original text
|
||||
if (minConfidence <= 0) {
|
||||
return {
|
||||
filteredText: data.text.trim(),
|
||||
overallConfidence: data.confidence / 100
|
||||
};
|
||||
}
|
||||
|
||||
const filteredWords: string[] = [];
|
||||
const validConfidences: number[] = [];
|
||||
|
||||
// Tesseract provides word-level data
|
||||
if (data.words && Array.isArray(data.words)) {
|
||||
for (const word of data.words) {
|
||||
const wordConfidence = word.confidence / 100; // Convert to decimal
|
||||
|
||||
if (wordConfidence >= minConfidence) {
|
||||
filteredWords.push(word.text);
|
||||
validConfidences.push(wordConfidence);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Fallback: if word-level data not available, use overall confidence
|
||||
const overallConfidence = data.confidence / 100;
|
||||
if (overallConfidence >= minConfidence) {
|
||||
return {
|
||||
filteredText: data.text.trim(),
|
||||
overallConfidence
|
||||
};
|
||||
}
|
||||
log.info(`Entire text filtered out due to low confidence ${overallConfidence} (below threshold ${minConfidence})`);
|
||||
return {
|
||||
filteredText: '',
|
||||
overallConfidence
|
||||
};
|
||||
}
|
||||
|
||||
// Calculate average confidence of accepted words
|
||||
const averageConfidence = validConfidences.length > 0
|
||||
? validConfidences.reduce((sum, conf) => sum + conf, 0) / validConfidences.length
|
||||
: 0;
|
||||
|
||||
const filteredText = filteredWords.join(' ').trim();
|
||||
|
||||
log.info(`Filtered OCR text: ${filteredWords.length} words kept out of ${data.words?.length || 0} total words (min confidence: ${minConfidence})`);
|
||||
|
||||
return {
|
||||
filteredText,
|
||||
overallConfidence: averageConfidence
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get minimum confidence threshold from options
|
||||
*/
|
||||
private getMinConfidenceThreshold(): number {
|
||||
const minConfidence = options.getOption('ocrMinConfidence') ?? 0;
|
||||
return parseFloat(minConfidence);
|
||||
}
|
||||
|
||||
}
|
||||
70
apps/server/src/services/ocr/processors/office_processor.ts
Normal file
70
apps/server/src/services/ocr/processors/office_processor.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
import { parseExcel } from 'officeparser/dist/parsers/ExcelParser.js';
|
||||
import { parseOpenOffice } from 'officeparser/dist/parsers/OpenOfficeParser.js';
|
||||
import { parsePowerPoint } from 'officeparser/dist/parsers/PowerPointParser.js';
|
||||
import { parseWord } from 'officeparser/dist/parsers/WordParser.js';
|
||||
import type { OfficeParserConfig } from 'officeparser/dist/types.js';
|
||||
|
||||
import log from '../../log.js';
|
||||
import { OCRProcessingOptions, OCRResult } from '../ocr_service.js';
|
||||
import { FileProcessor } from './file_processor.js';
|
||||
|
||||
type Parser = (buffer: Buffer, config: OfficeParserConfig) => Promise<{ toText(): string }>;
|
||||
|
||||
const PARSER_BY_MIME: Record<string, Parser> = {
|
||||
// Office Open XML
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': parseWord,
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': parseExcel,
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation': parsePowerPoint,
|
||||
// OpenDocument
|
||||
'application/vnd.oasis.opendocument.text': parseOpenOffice,
|
||||
'application/vnd.oasis.opendocument.spreadsheet': parseOpenOffice,
|
||||
'application/vnd.oasis.opendocument.presentation': parseOpenOffice
|
||||
};
|
||||
|
||||
const PARSER_CONFIG: OfficeParserConfig = {
|
||||
outputErrorToConsole: false,
|
||||
newlineDelimiter: '\n',
|
||||
ignoreNotes: false,
|
||||
putNotesAtLast: false
|
||||
};
|
||||
|
||||
/**
|
||||
* Office document processor for extracting text from DOCX/XLSX/PPTX and ODT/ODS/ODP files.
|
||||
* Uses individual parsers from officeparser v6 to avoid pulling in pdfjs-dist.
|
||||
*/
|
||||
export class OfficeProcessor extends FileProcessor {
|
||||
|
||||
canProcess(mimeType: string): boolean {
|
||||
return mimeType in PARSER_BY_MIME;
|
||||
}
|
||||
|
||||
getSupportedMimeTypes(): string[] {
|
||||
return Object.keys(PARSER_BY_MIME);
|
||||
}
|
||||
|
||||
async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise<OCRResult> {
|
||||
const mimeType = options.mimeType;
|
||||
if (!mimeType || !(mimeType in PARSER_BY_MIME)) {
|
||||
throw new Error(`Unsupported MIME type for Office processor: ${mimeType}`);
|
||||
}
|
||||
|
||||
log.info(`Starting Office document text extraction for ${mimeType}...`);
|
||||
|
||||
const parse = PARSER_BY_MIME[mimeType];
|
||||
const ast = await parse(buffer, PARSER_CONFIG);
|
||||
const trimmed = ast.toText().trim();
|
||||
|
||||
return {
|
||||
text: trimmed,
|
||||
confidence: trimmed.length > 0 ? 0.99 : 0,
|
||||
extractedAt: new Date().toISOString(),
|
||||
language: options.language || "eng",
|
||||
pageCount: 1
|
||||
};
|
||||
}
|
||||
|
||||
getProcessingType(): string {
|
||||
return 'office';
|
||||
}
|
||||
|
||||
}
|
||||
39
apps/server/src/services/ocr/processors/pdf_processor.ts
Normal file
39
apps/server/src/services/ocr/processors/pdf_processor.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { extractText, getDocumentProxy } from 'unpdf';
|
||||
|
||||
import log from '../../log.js';
|
||||
import { OCRProcessingOptions, OCRResult } from '../ocr_service.js';
|
||||
import { FileProcessor } from './file_processor.js';
|
||||
|
||||
/**
|
||||
* PDF processor for extracting embedded text from PDF files using unpdf.
|
||||
*/
|
||||
export class PDFProcessor extends FileProcessor {
|
||||
|
||||
canProcess(mimeType: string): boolean {
|
||||
return mimeType.toLowerCase() === 'application/pdf';
|
||||
}
|
||||
|
||||
getSupportedMimeTypes(): string[] {
|
||||
return ['application/pdf'];
|
||||
}
|
||||
|
||||
async extractText(buffer: Buffer, options: OCRProcessingOptions = {}): Promise<OCRResult> {
|
||||
log.info('Starting PDF text extraction...');
|
||||
|
||||
const pdf = await getDocumentProxy(new Uint8Array(buffer));
|
||||
const { totalPages, text } = await extractText(pdf, { mergePages: true });
|
||||
|
||||
return {
|
||||
text: text.trim(),
|
||||
confidence: 0.99,
|
||||
extractedAt: new Date().toISOString(),
|
||||
language: options.language || "eng",
|
||||
pageCount: totalPages
|
||||
};
|
||||
}
|
||||
|
||||
getProcessingType(): string {
|
||||
return 'pdf';
|
||||
}
|
||||
|
||||
}
|
||||
@@ -212,7 +212,11 @@ const defaultOptions: DefaultOption[] = [
|
||||
{ name: "experimentalFeatures", value: "[]", isSynced: true },
|
||||
|
||||
// AI / LLM
|
||||
{ name: "llmProviders", value: "[]", isSynced: false }
|
||||
{ name: "llmProviders", value: "[]", isSynced: false },
|
||||
|
||||
// OCR options
|
||||
{ name: "ocrAutoProcessImages", value: "false", isSynced: true },
|
||||
{ name: "ocrMinConfidence", value: "0.75", isSynced: true },
|
||||
];
|
||||
|
||||
/**
|
||||
|
||||
80
apps/server/src/services/search/expressions/ocr_content.ts
Normal file
80
apps/server/src/services/search/expressions/ocr_content.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
import becca from "../../../becca/becca.js";
|
||||
import sql from "../../sql.js";
|
||||
import NoteSet from "../note_set.js";
|
||||
import type SearchContext from "../search_context.js";
|
||||
import Expression from "./expression.js";
|
||||
|
||||
/**
|
||||
* Search expression for finding text within OCR-extracted content (textRepresentation)
|
||||
* from image notes and their attachments.
|
||||
*
|
||||
* Uses a single SQL query to find all noteIds whose own blob or attachment blobs
|
||||
* contain matching text, then intersects with the input note set.
|
||||
*/
|
||||
export default class OCRContentExpression extends Expression {
|
||||
private tokens: string[];
|
||||
|
||||
constructor(tokens: string[]) {
|
||||
super();
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
execute(inputNoteSet: NoteSet, executionContext: object, searchContext: SearchContext): NoteSet {
|
||||
const resultNoteSet = new NoteSet();
|
||||
const matchingNoteIds = this.findNoteIdsWithMatchingOCR();
|
||||
|
||||
for (const noteId of matchingNoteIds) {
|
||||
const note = becca.notes[noteId];
|
||||
if (note && inputNoteSet.hasNoteId(noteId)) {
|
||||
resultNoteSet.add(note);
|
||||
}
|
||||
}
|
||||
|
||||
if (resultNoteSet.notes.length > 0) {
|
||||
const highlightTokens = this.tokens
|
||||
.filter(token => token.length > 2)
|
||||
.map(token => token.toLowerCase());
|
||||
searchContext.highlightedTokens.push(...highlightTokens);
|
||||
}
|
||||
|
||||
return resultNoteSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find all noteIds that have OCR text matching all tokens, in a single query.
|
||||
* Checks both the note's own blob and its attachment blobs.
|
||||
*/
|
||||
private findNoteIdsWithMatchingOCR(): Set<string> {
|
||||
if (this.tokens.length === 0) return new Set();
|
||||
|
||||
// Build WHERE conditions: all tokens must appear in textRepresentation
|
||||
const likeConditions = this.tokens.map(() => `b.textRepresentation LIKE ?`).join(' AND ');
|
||||
const params = this.tokens.map(token => `%${token}%`);
|
||||
|
||||
// Find notes whose own blob matches
|
||||
const noteIds = sql.getColumn<string>(`
|
||||
SELECT n.noteId
|
||||
FROM notes n
|
||||
JOIN blobs b ON n.blobId = b.blobId
|
||||
WHERE b.textRepresentation IS NOT NULL
|
||||
AND n.isDeleted = 0
|
||||
AND ${likeConditions}
|
||||
`, params);
|
||||
|
||||
// Find notes that own attachments whose blob matches
|
||||
const attachmentOwnerIds = sql.getColumn<string>(`
|
||||
SELECT a.ownerId
|
||||
FROM attachments a
|
||||
JOIN blobs b ON a.blobId = b.blobId
|
||||
WHERE b.textRepresentation IS NOT NULL
|
||||
AND a.isDeleted = 0
|
||||
AND ${likeConditions}
|
||||
`, params);
|
||||
|
||||
return new Set([...noteIds, ...attachmentOwnerIds]);
|
||||
}
|
||||
|
||||
toString(): string {
|
||||
return `OCRContent('${this.tokens.join("', '")}')`;
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,9 @@
|
||||
"use strict";
|
||||
|
||||
import beccaService from "../../becca/becca_service.js";
|
||||
import becca from "../../becca/becca.js";
|
||||
import {
|
||||
normalizeSearchText,
|
||||
calculateOptimizedEditDistance,
|
||||
FUZZY_SEARCH_CONFIG
|
||||
} from "./utils/text_utils.js";
|
||||
import beccaService from "../../becca/becca_service.js";
|
||||
import {
|
||||
calculateOptimizedEditDistance,
|
||||
FUZZY_SEARCH_CONFIG,
|
||||
normalizeSearchText} from "./utils/text_utils.js";
|
||||
|
||||
// Scoring constants for better maintainability
|
||||
const SCORE_WEIGHTS = {
|
||||
@@ -98,7 +95,7 @@ class SearchResult {
|
||||
for (const chunk of chunks) {
|
||||
for (const token of tokens) {
|
||||
const normalizedToken = normalizeSearchText(token.toLowerCase());
|
||||
|
||||
|
||||
if (chunk === normalizedToken) {
|
||||
tokenScore += SCORE_WEIGHTS.TOKEN_EXACT_MATCH * token.length * factor;
|
||||
} else if (chunk.startsWith(normalizedToken)) {
|
||||
@@ -108,10 +105,10 @@ class SearchResult {
|
||||
} else {
|
||||
// Try fuzzy matching for individual tokens with caps applied
|
||||
const editDistance = calculateOptimizedEditDistance(chunk, normalizedToken, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
|
||||
if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
|
||||
if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
|
||||
normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH &&
|
||||
this.fuzzyScore < SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
|
||||
|
||||
|
||||
const fuzzyWeight = SCORE_WEIGHTS.TOKEN_FUZZY_MATCH * (1 - editDistance / FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
|
||||
// Apply caps: limit token length multiplier and per-token contribution
|
||||
const cappedTokenLength = Math.min(token.length, SCORE_WEIGHTS.MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER);
|
||||
@@ -119,7 +116,7 @@ class SearchResult {
|
||||
fuzzyWeight * cappedTokenLength * factor,
|
||||
SCORE_WEIGHTS.MAX_FUZZY_SCORE_PER_TOKEN
|
||||
);
|
||||
|
||||
|
||||
tokenScore += fuzzyTokenScore;
|
||||
this.fuzzyScore += fuzzyTokenScore;
|
||||
}
|
||||
@@ -129,13 +126,12 @@ class SearchResult {
|
||||
this.score += tokenScore;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if the query matches as a complete word in the text
|
||||
*/
|
||||
private isWordMatch(text: string, query: string): boolean {
|
||||
return text.includes(` ${query} `) ||
|
||||
text.startsWith(`${query} `) ||
|
||||
return text.includes(` ${query} `) ||
|
||||
text.startsWith(`${query} `) ||
|
||||
text.endsWith(` ${query}`);
|
||||
}
|
||||
|
||||
@@ -147,21 +143,21 @@ class SearchResult {
|
||||
if (this.fuzzyScore >= SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
const editDistance = calculateOptimizedEditDistance(title, query, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
|
||||
const maxLen = Math.max(title.length, query.length);
|
||||
|
||||
|
||||
// Only apply fuzzy matching if the query is reasonably long and edit distance is small
|
||||
if (query.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH &&
|
||||
editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
|
||||
if (query.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH &&
|
||||
editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
|
||||
editDistance / maxLen <= 0.3) {
|
||||
const similarity = 1 - (editDistance / maxLen);
|
||||
const baseFuzzyScore = SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches
|
||||
|
||||
|
||||
// Apply cap to ensure fuzzy title matches don't exceed reasonable bounds
|
||||
return Math.min(baseFuzzyScore, SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE * 0.3);
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
149
apps/server/src/services/search/search_result_ocr.spec.ts
Normal file
149
apps/server/src/services/search/search_result_ocr.spec.ts
Normal file
@@ -0,0 +1,149 @@
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
const mockBecca = {
|
||||
notes: {} as Record<string, any>,
|
||||
getNote: vi.fn()
|
||||
};
|
||||
|
||||
const mockBeccaService = {
|
||||
getNoteTitleForPath: vi.fn()
|
||||
};
|
||||
|
||||
vi.mock('../../becca/becca.js', () => ({
|
||||
default: mockBecca
|
||||
}));
|
||||
|
||||
vi.mock('../../becca/becca_service.js', () => ({
|
||||
default: mockBeccaService
|
||||
}));
|
||||
|
||||
let SearchResult: any;
|
||||
|
||||
beforeEach(async () => {
|
||||
vi.clearAllMocks();
|
||||
|
||||
mockBeccaService.getNoteTitleForPath.mockReturnValue('Test Note Title');
|
||||
|
||||
mockBecca.notes['test123'] = {
|
||||
noteId: 'test123',
|
||||
title: 'Test Note',
|
||||
isInHiddenSubtree: vi.fn().mockReturnValue(false)
|
||||
};
|
||||
|
||||
const module = await import('./search_result.js');
|
||||
SearchResult = module.default;
|
||||
});
|
||||
|
||||
describe('SearchResult', () => {
|
||||
describe('constructor', () => {
|
||||
it('should initialize with note path array', () => {
|
||||
const searchResult = new SearchResult(['root', 'folder', 'test123']);
|
||||
|
||||
expect(searchResult.notePathArray).toEqual(['root', 'folder', 'test123']);
|
||||
expect(searchResult.noteId).toBe('test123');
|
||||
expect(searchResult.notePath).toBe('root/folder/test123');
|
||||
expect(searchResult.score).toBe(0);
|
||||
expect(mockBeccaService.getNoteTitleForPath).toHaveBeenCalledWith(['root', 'folder', 'test123']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeScore', () => {
|
||||
let searchResult: any;
|
||||
|
||||
beforeEach(() => {
|
||||
searchResult = new SearchResult(['root', 'test123']);
|
||||
});
|
||||
|
||||
describe('basic scoring', () => {
|
||||
it('should give highest score for exact note ID match', () => {
|
||||
searchResult.computeScore('test123', ['test123']);
|
||||
expect(searchResult.score).toBeGreaterThanOrEqual(1000);
|
||||
});
|
||||
|
||||
it('should give high score for exact title match', () => {
|
||||
searchResult.computeScore('test note', ['test', 'note']);
|
||||
expect(searchResult.score).toBeGreaterThan(2000);
|
||||
});
|
||||
|
||||
it('should give medium score for title prefix match', () => {
|
||||
searchResult.computeScore('test', ['test']);
|
||||
expect(searchResult.score).toBeGreaterThan(500);
|
||||
});
|
||||
|
||||
it('should give lower score for title word match', () => {
|
||||
mockBecca.notes['test123'].title = 'This is a test note';
|
||||
searchResult.computeScore('test', ['test']);
|
||||
expect(searchResult.score).toBeGreaterThan(300);
|
||||
});
|
||||
});
|
||||
|
||||
describe('hidden notes penalty', () => {
|
||||
it('should apply penalty for hidden notes', () => {
|
||||
mockBecca.notes['test123'].isInHiddenSubtree.mockReturnValue(true);
|
||||
|
||||
searchResult.computeScore('test', ['test']);
|
||||
const hiddenScore = searchResult.score;
|
||||
|
||||
mockBecca.notes['test123'].isInHiddenSubtree.mockReturnValue(false);
|
||||
searchResult.score = 0;
|
||||
searchResult.computeScore('test', ['test']);
|
||||
const normalScore = searchResult.score;
|
||||
|
||||
expect(normalScore).toBeGreaterThan(hiddenScore);
|
||||
expect(hiddenScore).toBe(normalScore / 3);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('addScoreForStrings', () => {
|
||||
let searchResult: any;
|
||||
|
||||
beforeEach(() => {
|
||||
searchResult = new SearchResult(['root', 'test123']);
|
||||
});
|
||||
|
||||
it('should give highest score for exact token match', () => {
|
||||
searchResult.addScoreForStrings(['sample'], 'sample text', 1.0);
|
||||
const exactScore = searchResult.score;
|
||||
|
||||
searchResult.score = 0;
|
||||
searchResult.addScoreForStrings(['sample'], 'sampling text', 1.0);
|
||||
const prefixScore = searchResult.score;
|
||||
|
||||
searchResult.score = 0;
|
||||
searchResult.addScoreForStrings(['sample'], 'text sample text', 1.0);
|
||||
const partialScore = searchResult.score;
|
||||
|
||||
expect(exactScore).toBeGreaterThan(prefixScore);
|
||||
expect(exactScore).toBeGreaterThanOrEqual(partialScore);
|
||||
});
|
||||
|
||||
it('should apply factor multiplier correctly', () => {
|
||||
searchResult.addScoreForStrings(['sample'], 'sample text', 2.0);
|
||||
const doubleFactorScore = searchResult.score;
|
||||
|
||||
searchResult.score = 0;
|
||||
searchResult.addScoreForStrings(['sample'], 'sample text', 1.0);
|
||||
const singleFactorScore = searchResult.score;
|
||||
|
||||
expect(doubleFactorScore).toBe(singleFactorScore * 2);
|
||||
});
|
||||
|
||||
it('should handle multiple tokens', () => {
|
||||
searchResult.addScoreForStrings(['hello', 'world'], 'hello world test', 1.0);
|
||||
expect(searchResult.score).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should be case insensitive', () => {
|
||||
searchResult.addScoreForStrings(['sample'], 'sample text', 1.0);
|
||||
const lowerCaseScore = searchResult.score;
|
||||
|
||||
searchResult.score = 0;
|
||||
searchResult.addScoreForStrings(['sample'], 'SAMPLE text', 1.0);
|
||||
const upperCaseScore = searchResult.score;
|
||||
|
||||
expect(upperCaseScore).toEqual(lowerCaseScore);
|
||||
expect(upperCaseScore).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,28 +1,30 @@
|
||||
"use strict";
|
||||
|
||||
|
||||
import { dayjs } from "@triliumnext/commons";
|
||||
|
||||
import { removeDiacritic } from "../../utils.js";
|
||||
import AncestorExp from "../expressions/ancestor.js";
|
||||
import AndExp from "../expressions/and.js";
|
||||
import OrExp from "../expressions/or.js";
|
||||
import NotExp from "../expressions/not.js";
|
||||
import AttributeExistsExp from "../expressions/attribute_exists.js";
|
||||
import ChildOfExp from "../expressions/child_of.js";
|
||||
import DescendantOfExp from "../expressions/descendant_of.js";
|
||||
import ParentOfExp from "../expressions/parent_of.js";
|
||||
import RelationWhereExp from "../expressions/relation_where.js";
|
||||
import PropertyComparisonExp from "../expressions/property_comparison.js";
|
||||
import AttributeExistsExp from "../expressions/attribute_exists.js";
|
||||
import LabelComparisonExp from "../expressions/label_comparison.js";
|
||||
import NoteFlatTextExp from "../expressions/note_flat_text.js";
|
||||
import NoteContentFulltextExp from "../expressions/note_content_fulltext.js";
|
||||
import OrderByAndLimitExp from "../expressions/order_by_and_limit.js";
|
||||
import AncestorExp from "../expressions/ancestor.js";
|
||||
import buildComparator from "./build_comparator.js";
|
||||
import ValueExtractor from "../value_extractor.js";
|
||||
import { removeDiacritic } from "../../utils.js";
|
||||
import TrueExp from "../expressions/true.js";
|
||||
import IsHiddenExp from "../expressions/is_hidden.js";
|
||||
import type SearchContext from "../search_context.js";
|
||||
import type { TokenData, TokenStructure } from "./types.js";
|
||||
import type Expression from "../expressions/expression.js";
|
||||
import IsHiddenExp from "../expressions/is_hidden.js";
|
||||
import LabelComparisonExp from "../expressions/label_comparison.js";
|
||||
import NotExp from "../expressions/not.js";
|
||||
import NoteContentFulltextExp from "../expressions/note_content_fulltext.js";
|
||||
import NoteFlatTextExp from "../expressions/note_flat_text.js";
|
||||
import OCRContentExpression from "../expressions/ocr_content.js";
|
||||
import OrExp from "../expressions/or.js";
|
||||
import OrderByAndLimitExp from "../expressions/order_by_and_limit.js";
|
||||
import ParentOfExp from "../expressions/parent_of.js";
|
||||
import PropertyComparisonExp from "../expressions/property_comparison.js";
|
||||
import RelationWhereExp from "../expressions/relation_where.js";
|
||||
import TrueExp from "../expressions/true.js";
|
||||
import type SearchContext from "../search_context.js";
|
||||
import ValueExtractor from "../value_extractor.js";
|
||||
import buildComparator from "./build_comparator.js";
|
||||
import type { TokenData, TokenStructure } from "./types.js";
|
||||
|
||||
function getFulltext(_tokens: TokenData[], searchContext: SearchContext, leadingOperator?: string) {
|
||||
const tokens: string[] = _tokens.map((t) => removeDiacritic(t.token));
|
||||
@@ -42,16 +44,26 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext, leading
|
||||
// Exact match on title OR exact match on content OR exact match in flat text (includes attributes)
|
||||
// For multi-word, join tokens with space to form exact phrase
|
||||
const titleSearchValue = tokens.join(" ");
|
||||
return new OrExp([
|
||||
const exactMatchExpressions: Expression[] = [
|
||||
new PropertyComparisonExp(searchContext, "title", "=", titleSearchValue),
|
||||
new NoteContentFulltextExp("=", { tokens, flatText: false }),
|
||||
new NoteContentFulltextExp("=", { tokens, flatText: true })
|
||||
]);
|
||||
];
|
||||
|
||||
exactMatchExpressions.push(new OCRContentExpression(tokens));
|
||||
|
||||
return new OrExp(exactMatchExpressions);
|
||||
}
|
||||
return new OrExp([new NoteFlatTextExp(tokens), new NoteContentFulltextExp(operator, { tokens, flatText: true })]);
|
||||
} else {
|
||||
return new NoteFlatTextExp(tokens);
|
||||
|
||||
const searchExpressions: Expression[] = [
|
||||
new NoteFlatTextExp(tokens),
|
||||
new NoteContentFulltextExp(operator, { tokens, flatText: true }),
|
||||
new OCRContentExpression(tokens)
|
||||
];
|
||||
|
||||
return new OrExp(searchExpressions);
|
||||
}
|
||||
return new NoteFlatTextExp(tokens);
|
||||
}
|
||||
|
||||
const OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", ">", ">=", "<", "<=", "%=", "~=", "~*"]);
|
||||
@@ -298,9 +310,9 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level
|
||||
searchContext.addError(`Relation can be compared only with property, e.g. ~relation.title=hello in ${context(i)}`);
|
||||
|
||||
return null;
|
||||
} else {
|
||||
return new AttributeExistsExp("relation", relationName, searchContext.fuzzyAttributeSearch);
|
||||
}
|
||||
return new AttributeExistsExp("relation", relationName, searchContext.fuzzyAttributeSearch);
|
||||
|
||||
}
|
||||
|
||||
function parseOrderByAndLimit() {
|
||||
@@ -308,7 +320,7 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level
|
||||
valueExtractor: ValueExtractor;
|
||||
direction: string;
|
||||
}[] = [];
|
||||
let limit: number | undefined = undefined;
|
||||
let limit: number | undefined;
|
||||
|
||||
if (tokens[i].token === "orderby") {
|
||||
do {
|
||||
@@ -354,9 +366,9 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level
|
||||
return AndExp.of(expressions);
|
||||
} else if (op === "or") {
|
||||
return OrExp.of(expressions);
|
||||
} else {
|
||||
throw new Error(`Unrecognized op=${op}`);
|
||||
}
|
||||
throw new Error(`Unrecognized op=${op}`);
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < tokens.length; i++) {
|
||||
@@ -423,7 +435,7 @@ function getExpression(tokens: TokenData[], searchContext: SearchContext, level
|
||||
} else if (op !== token) {
|
||||
searchContext.addError("Mixed usage of AND/OR - always use parenthesis to group AND/OR expressions.");
|
||||
}
|
||||
} else if (isOperator({ token: token })) {
|
||||
} else if (isOperator({ token })) {
|
||||
searchContext.addError(`Misplaced or incomplete expression "${token}"`);
|
||||
} else {
|
||||
searchContext.addError(`Unrecognized expression "${token}"`);
|
||||
@@ -493,9 +505,9 @@ function getAncestorExp({ ancestorNoteId, ancestorDepth, includeHiddenNotes }: S
|
||||
return new AncestorExp(ancestorNoteId, ancestorDepth);
|
||||
} else if (!includeHiddenNotes) {
|
||||
return new NotExp(new IsHiddenExp());
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
export default parse;
|
||||
|
||||
@@ -435,21 +435,46 @@ function findFirstNoteWithQuery(query: string, searchContext: SearchContext): BN
|
||||
return searchResults.length > 0 ? becca.notes[searchResults[0].noteId] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first non-empty textRepresentation for a note's own blob
|
||||
* or any of its attachment blobs.
|
||||
*/
|
||||
function getTextRepresentationForNote(note: BNote): string | null {
|
||||
// Query only textRepresentation to avoid loading large binary content into memory.
|
||||
const row = sql.getRow<{ textRepresentation: string | null }>(`
|
||||
SELECT b.textRepresentation FROM blobs b
|
||||
WHERE b.textRepresentation IS NOT NULL
|
||||
AND b.textRepresentation != ''
|
||||
AND (
|
||||
b.blobId = ?
|
||||
OR b.blobId IN (SELECT blobId FROM attachments WHERE ownerId = ? AND isDeleted = 0)
|
||||
)
|
||||
LIMIT 1
|
||||
`, [note.blobId, note.noteId]);
|
||||
|
||||
return row?.textRepresentation ?? null;
|
||||
}
|
||||
|
||||
function extractContentSnippet(noteId: string, searchTokens: string[], maxLength: number = 200): string {
|
||||
const note = becca.notes[noteId];
|
||||
if (!note) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Only extract content for text-based notes
|
||||
if (!["text", "code", "mermaid", "canvas", "mindMap"].includes(note.type)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
try {
|
||||
let content = note.getContent();
|
||||
|
||||
if (!content || typeof content !== "string") {
|
||||
let content: string | undefined;
|
||||
|
||||
if (["text", "code", "mermaid", "canvas", "mindMap"].includes(note.type)) {
|
||||
const raw = note.getContent();
|
||||
if (raw && typeof raw === "string") {
|
||||
content = raw;
|
||||
}
|
||||
} else {
|
||||
// For non-text notes (image, file), use OCR text representation
|
||||
content = getTextRepresentationForNote(note) || undefined;
|
||||
}
|
||||
|
||||
if (!content) {
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
"test:all": "pnpm test:parallel && pnpm test:sequential",
|
||||
"test:parallel": "pnpm --filter=!server --filter=!ckeditor5-mermaid --filter=!ckeditor5-math --parallel test",
|
||||
"test:sequential": "pnpm --filter=server --filter=ckeditor5-mermaid --filter=ckeditor5-math --sequential test",
|
||||
"typecheck": "tsc --build",
|
||||
"typecheck": "tsx scripts/filter-tsc-output.mts",
|
||||
"dev:format-check": "eslint -c eslint.format.config.mjs .",
|
||||
"dev:format-fix": "eslint -c eslint.format.config.mjs . --fix",
|
||||
"dev:linter-check": "cross-env NODE_OPTIONS=--max_old_space_size=4096 eslint .",
|
||||
|
||||
@@ -9,27 +9,29 @@ export interface Locale {
|
||||
devOnly?: boolean;
|
||||
/** The value to pass to `--lang` for the Electron instance in order to set it as a locale. Not setting it will hide it from the list of supported locales. */
|
||||
electronLocale?: "en" | "de" | "es" | "fr" | "zh_CN" | "zh_TW" | "ro" | "af" | "am" | "ar" | "bg" | "bn" | "ca" | "cs" | "da" | "el" | "en_GB" | "es_419" | "et" | "fa" | "fi" | "fil" | "gu" | "he" | "hi" | "hr" | "hu" | "id" | "it" | "ja" | "kn" | "ko" | "lt" | "lv" | "ml" | "mr" | "ms" | "nb" | "nl" | "pl" | "pt_BR" | "pt_PT" | "ru" | "sk" | "sl" | "sr" | "sv" | "sw" | "ta" | "te" | "th" | "tr" | "uk" | "ur" | "vi";
|
||||
/** The Tesseract OCR language code for this locale (e.g. "eng", "fra", "deu"). See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html */
|
||||
tesseractCode?: "eng" | "deu" | "spa" | "fra" | "gle" | "ita" | "hin" | "jpn" | "por" | "pol" | "ron" | "rus" | "chi_sim" | "chi_tra" | "ukr" | "ara" | "heb" | "kur" | "fas" | "kor";
|
||||
}
|
||||
|
||||
// When adding a new locale, prefer the version with hyphen instead of underscore.
|
||||
const UNSORTED_LOCALES = [
|
||||
{ id: "cn", name: "简体中文", electronLocale: "zh_CN" },
|
||||
{ id: "de", name: "Deutsch", electronLocale: "de" },
|
||||
{ id: "en", name: "English (United States)", electronLocale: "en" },
|
||||
{ id: "en-GB", name: "English (United Kingdom)", electronLocale: "en_GB" },
|
||||
{ id: "es", name: "Español", electronLocale: "es" },
|
||||
{ id: "fr", name: "Français", electronLocale: "fr" },
|
||||
{ id: "ga", name: "Gaeilge", electronLocale: "en" },
|
||||
{ id: "it", name: "Italiano", electronLocale: "it" },
|
||||
{ id: "hi", name: "हिन्दी", electronLocale: "hi" },
|
||||
{ id: "ja", name: "日本語", electronLocale: "ja" },
|
||||
{ id: "pt_br", name: "Português (Brasil)", electronLocale: "pt_BR" },
|
||||
{ id: "pt", name: "Português (Portugal)", electronLocale: "pt_PT" },
|
||||
{ id: "pl", name: "Polski", electronLocale: "pl" },
|
||||
{ id: "ro", name: "Română", electronLocale: "ro" },
|
||||
{ id: "ru", name: "Русский", electronLocale: "ru" },
|
||||
{ id: "tw", name: "繁體中文", electronLocale: "zh_TW" },
|
||||
{ id: "uk", name: "Українська", electronLocale: "uk" },
|
||||
{ id: "cn", name: "简体中文", electronLocale: "zh_CN", tesseractCode: "chi_sim" },
|
||||
{ id: "de", name: "Deutsch", electronLocale: "de", tesseractCode: "deu" },
|
||||
{ id: "en", name: "English (United States)", electronLocale: "en", tesseractCode: "eng" },
|
||||
{ id: "en-GB", name: "English (United Kingdom)", electronLocale: "en_GB", tesseractCode: "eng" },
|
||||
{ id: "es", name: "Español", electronLocale: "es", tesseractCode: "spa" },
|
||||
{ id: "fr", name: "Français", electronLocale: "fr", tesseractCode: "fra" },
|
||||
{ id: "ga", name: "Gaeilge", electronLocale: "en", tesseractCode: "gle" },
|
||||
{ id: "it", name: "Italiano", electronLocale: "it", tesseractCode: "ita" },
|
||||
{ id: "hi", name: "हिन्दी", electronLocale: "hi", tesseractCode: "hin" },
|
||||
{ id: "ja", name: "日本語", electronLocale: "ja", tesseractCode: "jpn" },
|
||||
{ id: "pt_br", name: "Português (Brasil)", electronLocale: "pt_BR", tesseractCode: "por" },
|
||||
{ id: "pt", name: "Português (Portugal)", electronLocale: "pt_PT", tesseractCode: "por" },
|
||||
{ id: "pl", name: "Polski", electronLocale: "pl", tesseractCode: "pol" },
|
||||
{ id: "ro", name: "Română", electronLocale: "ro", tesseractCode: "ron" },
|
||||
{ id: "ru", name: "Русский", electronLocale: "ru", tesseractCode: "rus" },
|
||||
{ id: "tw", name: "繁體中文", electronLocale: "zh_TW", tesseractCode: "chi_tra" },
|
||||
{ id: "uk", name: "Українська", electronLocale: "uk", tesseractCode: "ukr" },
|
||||
|
||||
/**
|
||||
* Development-only languages.
|
||||
@@ -53,25 +55,29 @@ const UNSORTED_LOCALES = [
|
||||
id: "ar",
|
||||
name: "اَلْعَرَبِيَّةُ",
|
||||
rtl: true,
|
||||
electronLocale: "ar"
|
||||
electronLocale: "ar",
|
||||
tesseractCode: "ara"
|
||||
},
|
||||
{ // Hebrew
|
||||
id: "he",
|
||||
name: "עברית",
|
||||
rtl: true,
|
||||
contentOnly: true
|
||||
contentOnly: true,
|
||||
tesseractCode: "heb"
|
||||
},
|
||||
{ // Kurdish
|
||||
id: "ku",
|
||||
name: "کوردی",
|
||||
rtl: true,
|
||||
contentOnly: true
|
||||
contentOnly: true,
|
||||
tesseractCode: "kur"
|
||||
},
|
||||
{ // Persian
|
||||
id: "fa",
|
||||
name: "فارسی",
|
||||
rtl: true,
|
||||
contentOnly: true
|
||||
contentOnly: true,
|
||||
tesseractCode: "fas"
|
||||
}
|
||||
] as const;
|
||||
|
||||
@@ -82,3 +88,10 @@ export const LOCALES: Locale[] = Array.from(UNSORTED_LOCALES)
|
||||
export type LOCALE_IDS = typeof UNSORTED_LOCALES[number]["id"];
|
||||
/** A type containing a string union of all the supported locales that are not content-only (i.e. can be used as the UI language). */
|
||||
export type DISPLAYABLE_LOCALE_IDS = Exclude<typeof UNSORTED_LOCALES[number], { contentOnly: true }>["id"];
|
||||
|
||||
/**
|
||||
* Returns the Tesseract OCR language code for the given locale ID, or `null` if not mapped.
|
||||
*/
|
||||
export function getTesseractCode(localeId: string): string | null {
|
||||
return LOCALES.find((l) => l.id === localeId)?.tesseractCode ?? null;
|
||||
}
|
||||
|
||||
@@ -144,6 +144,12 @@ export interface OptionDefinitions extends KeyboardShortcutsOptions<KeyboardActi
|
||||
// AI / LLM
|
||||
/** JSON array of configured LLM providers with their API keys */
|
||||
llmProviders: string;
|
||||
|
||||
// OCR options
|
||||
ocrEnabled: boolean;
|
||||
ocrLanguage: string;
|
||||
ocrAutoProcessImages: boolean;
|
||||
ocrMinConfidence: string;
|
||||
}
|
||||
|
||||
export type OptionNames = keyof OptionDefinitions;
|
||||
|
||||
@@ -72,6 +72,7 @@ export interface BlobRow {
|
||||
blobId: string;
|
||||
content: string | Buffer;
|
||||
contentLength: number;
|
||||
textRepresentation?: string | null;
|
||||
dateModified: string;
|
||||
utcDateModified: string;
|
||||
}
|
||||
|
||||
@@ -288,6 +288,13 @@ export interface ToMarkdownResponse {
|
||||
markdownContent: string;
|
||||
}
|
||||
|
||||
export interface TextRepresentationResponse {
|
||||
success: boolean;
|
||||
text: string;
|
||||
hasOcr: boolean;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export interface IconRegistry {
|
||||
sources: {
|
||||
prefix: string;
|
||||
|
||||
182
pnpm-lock.yaml
generated
182
pnpm-lock.yaml
generated
@@ -580,6 +580,9 @@ importers:
|
||||
sucrase:
|
||||
specifier: 3.35.1
|
||||
version: 3.35.1
|
||||
unpdf:
|
||||
specifier: 1.4.0
|
||||
version: 1.4.0(@napi-rs/canvas@0.1.96)
|
||||
devDependencies:
|
||||
'@braintree/sanitize-url':
|
||||
specifier: 7.1.2
|
||||
@@ -806,6 +809,9 @@ importers:
|
||||
normalize-strings:
|
||||
specifier: 1.1.1
|
||||
version: 1.1.1
|
||||
officeparser:
|
||||
specifier: 6.0.7
|
||||
version: 6.0.7(encoding@0.1.13)
|
||||
rand-token:
|
||||
specifier: 1.0.1
|
||||
version: 1.0.1
|
||||
@@ -839,6 +845,9 @@ importers:
|
||||
swagger-jsdoc:
|
||||
specifier: 6.2.8
|
||||
version: 6.2.8(openapi-types@12.1.3)
|
||||
tesseract.js:
|
||||
specifier: 6.0.1
|
||||
version: 6.0.1(encoding@0.1.13)
|
||||
time2fa:
|
||||
specifier: 1.4.2
|
||||
version: 1.4.2
|
||||
@@ -2572,9 +2581,6 @@ packages:
|
||||
'@emnapi/core@1.9.0':
|
||||
resolution: {integrity: sha512-0DQ98G9ZQZOxfUcQn1waV2yS8aWdZ6kJMbYCJB3oUBecjWYO1fqJ+a1DRfPF3O5JEkwqwP1A9QEN/9mYm2Yd0w==}
|
||||
|
||||
'@emnapi/runtime@1.8.1':
|
||||
resolution: {integrity: sha512-mehfKSMWjjNol8659Z8KxEMrdSJDDot5SXMq00dM8BN4o+CLNXQ0xH2V7EchNHV4RmbZLmmPdEaXZc5H2FXmDg==}
|
||||
|
||||
'@emnapi/runtime@1.9.0':
|
||||
resolution: {integrity: sha512-QN75eB0IH2ywSpRpNddCRfQIhmJYBCJ1x5Lb3IscKAL8bMnVAKnRg8dCoXbHzVLLH7P38N2Z3mtulB7W0J0FKw==}
|
||||
|
||||
@@ -7268,6 +7274,10 @@ packages:
|
||||
engines: {node: '>=10.0.0'}
|
||||
deprecated: this version has critical issues, please update to the latest version
|
||||
|
||||
'@xmldom/xmldom@0.8.12':
|
||||
resolution: {integrity: sha512-9k/gHF6n/pAi/9tqr3m3aqkuiNosYTurLLUtc7xQ9sxB/wm7WPygCv8GYa6mS0fLJEHhqMC1ATYhz++U/lRHqg==}
|
||||
engines: {node: '>=10.0.0'}
|
||||
|
||||
'@xtuc/ieee754@1.2.0':
|
||||
resolution: {integrity: sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==}
|
||||
|
||||
@@ -7766,6 +7776,9 @@ packages:
|
||||
blurhash@2.0.5:
|
||||
resolution: {integrity: sha512-cRygWd7kGBQO3VEhPiTgq4Wc43ctsM+o46urrmPOiuAe+07fzlSB9OJVdpgDL0jPqXUVQ9ht7aq7kxOeJHRK+w==}
|
||||
|
||||
bmp-js@0.1.0:
|
||||
resolution: {integrity: sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==}
|
||||
|
||||
bmp-ts@1.0.9:
|
||||
resolution: {integrity: sha512-cTEHk2jLrPyi+12M3dhpEbnnPOsaZuq7C45ylbbQIiWgDFZq4UVYPEY5mlqjvsj/6gJv9qX5sa+ebDzLXT28Vw==}
|
||||
|
||||
@@ -10479,6 +10492,9 @@ packages:
|
||||
peerDependencies:
|
||||
postcss: ^8.1.0
|
||||
|
||||
idb-keyval@6.2.2:
|
||||
resolution: {integrity: sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==}
|
||||
|
||||
ieee754@1.2.1:
|
||||
resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
|
||||
|
||||
@@ -10876,6 +10892,9 @@ packages:
|
||||
resolution: {integrity: sha512-mE00Gnza5EEB3Ds0HfMyllZzbBrmLOX3vfWoj9A9PEnTfratQ/BcaJOuMhnkhjXvb2+FkY3VuHqtAGpTPmglFQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
is-url@1.2.4:
|
||||
resolution: {integrity: sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==}
|
||||
|
||||
is-weakmap@2.0.2:
|
||||
resolution: {integrity: sha512-K5pXYOm9wqY1RgjpL3YTkF39tni1XajUIkawTLUo9EZEVUFga5gSQJF8nNS7ZwJQ02y+1YCNYcMh+HIf1ZqE+w==}
|
||||
engines: {node: '>= 0.4'}
|
||||
@@ -12400,6 +12419,11 @@ packages:
|
||||
ofetch@1.5.1:
|
||||
resolution: {integrity: sha512-2W4oUZlVaqAPAil6FUg/difl6YhqhUR7x2eZY4bQCko22UXg3hptq9KLQdqFClV+Wu85UX7hNtdGTngi/1BxcA==}
|
||||
|
||||
officeparser@6.0.7:
|
||||
resolution: {integrity: sha512-MkNHyWIfEZRDtB8c0fgJHdb4Ui0I/WztBjlUjlPiEbTO6dIYaJMt+llS5p5Foj13guUZgGxkkM9VwsVRthHNAA==}
|
||||
engines: {node: '>=18.0.0'}
|
||||
hasBin: true
|
||||
|
||||
ohash@2.0.11:
|
||||
resolution: {integrity: sha512-RdR9FQrFwNBNXAr4GixM8YaRZRJ5PUWbKYbE5eOsrwAjJW0q2REGcf79oYPsLyskQCZG1PLN+S/K1V00joZAoQ==}
|
||||
|
||||
@@ -12458,6 +12482,10 @@ packages:
|
||||
openapi-types@12.1.3:
|
||||
resolution: {integrity: sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==}
|
||||
|
||||
opencollective-postinstall@2.0.3:
|
||||
resolution: {integrity: sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==}
|
||||
hasBin: true
|
||||
|
||||
opener@1.5.2:
|
||||
resolution: {integrity: sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A==}
|
||||
hasBin: true
|
||||
@@ -13545,6 +13573,9 @@ packages:
|
||||
regenerate@1.4.2:
|
||||
resolution: {integrity: sha512-zrceR/XhGYU/d/opr2EKO7aRHUeiBI8qjtfHqADTwZd6Szfy16la6kqD0MIUs5z5hx6AaKa+PixpPrR289+I0A==}
|
||||
|
||||
regenerator-runtime@0.13.11:
|
||||
resolution: {integrity: sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==}
|
||||
|
||||
regexp-util@2.0.3:
|
||||
resolution: {integrity: sha512-GP6h9OgJmhAZpb3dbNbXTfRWVnGcoMhWRZv/HxgM4/qCVqs1P9ukQdYxaUhjWBSAs9oJ/uPXUUvGT1VMe0Bs0Q==}
|
||||
engines: {node: '>=16'}
|
||||
@@ -14760,6 +14791,18 @@ packages:
|
||||
engines: {node: '>=10'}
|
||||
hasBin: true
|
||||
|
||||
tesseract.js-core@6.0.0:
|
||||
resolution: {integrity: sha512-1Qncm/9oKM7xgrQXZXNB+NRh19qiXGhxlrR8EwFbK5SaUbPZnS5OMtP/ghtqfd23hsr1ZvZbZjeuAGcMxd/ooA==}
|
||||
|
||||
tesseract.js-core@7.0.0:
|
||||
resolution: {integrity: sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==}
|
||||
|
||||
tesseract.js@6.0.1:
|
||||
resolution: {integrity: sha512-/sPvMvrCtgxnNRCjbTYbr7BRu0yfWDsMZQ2a/T5aN/L1t8wUQN6tTWv6p6FwzpoEBA0jrN2UD2SX4QQFRdoDbA==}
|
||||
|
||||
tesseract.js@7.0.0:
|
||||
resolution: {integrity: sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==}
|
||||
|
||||
text-decoder@1.2.3:
|
||||
resolution: {integrity: sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==}
|
||||
|
||||
@@ -15239,6 +15282,14 @@ packages:
|
||||
resolution: {integrity: sha512-b2/KCUlYZUeA7JFUuRJZPUtr4gZvBh7tavtv4fvk4+KV9pfGiR6CQAQAWl49ZpR3ts2dk4FYkP7EIgDJoiOLDA==}
|
||||
engines: {node: '>= 0.4.0'}
|
||||
|
||||
unpdf@1.4.0:
|
||||
resolution: {integrity: sha512-TahIk0xdH/4jh/MxfclzU79g40OyxtP00VnEUZdEkJoYtXAHWLiir6t3FC6z3vDqQTzc2ZHcla6uEiVTNjejuA==}
|
||||
peerDependencies:
|
||||
'@napi-rs/canvas': ^0.1.69
|
||||
peerDependenciesMeta:
|
||||
'@napi-rs/canvas':
|
||||
optional: true
|
||||
|
||||
unpipe@1.0.0:
|
||||
resolution: {integrity: sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==}
|
||||
engines: {node: '>= 0.8'}
|
||||
@@ -15601,6 +15652,9 @@ packages:
|
||||
warning@4.0.3:
|
||||
resolution: {integrity: sha512-rpJyN222KWIvHJ/F53XSZv0Zl/accqHR8et1kpaMTD/fLCRxtV8iX8czMzY7sVZupTI3zcUTg8eycS2kNF9l6w==}
|
||||
|
||||
wasm-feature-detect@1.8.0:
|
||||
resolution: {integrity: sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==}
|
||||
|
||||
watchpack@2.4.4:
|
||||
resolution: {integrity: sha512-c5EGNOiyxxV5qmTtAB7rbiXxi1ooX1pQKMLX/MIabJjRA0SJBQOjKF+KSVfHkr9U1cADPon0mRiVe/riyaiDUA==}
|
||||
engines: {node: '>=10.13.0'}
|
||||
@@ -16036,6 +16090,9 @@ packages:
|
||||
resolution: {integrity: sha512-zK7YHHz4ZXpW89AHXUPbQVGKI7uvkd3hzusTdotCg1UxyaVtg0zFJSTfW/Dq5f7OBBVnq6cZIaC8Ti4hb6dtCA==}
|
||||
engines: {node: '>= 14'}
|
||||
|
||||
zlibjs@0.3.1:
|
||||
resolution: {integrity: sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==}
|
||||
|
||||
zod@4.1.12:
|
||||
resolution: {integrity: sha512-JInaHOamG8pt5+Ey8kGmdcAcg3OL9reK8ltczgHTAwNhMys/6ThXHityHxVV2p3fkw/c+MAvBHFVYHFZDmjMCQ==}
|
||||
|
||||
@@ -16909,6 +16966,8 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-core': 47.6.1
|
||||
'@ckeditor/ckeditor5-upload': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-ai@47.6.1(bufferutil@4.0.9)(utf-8-validate@6.0.5)':
|
||||
dependencies:
|
||||
@@ -17069,8 +17128,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-ui': 47.6.1
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-collaboration-core@47.6.1':
|
||||
dependencies:
|
||||
@@ -17244,6 +17301,8 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
es-toolkit: 1.39.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-editor-classic@47.6.1':
|
||||
dependencies:
|
||||
@@ -17253,6 +17312,8 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
es-toolkit: 1.39.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-editor-decoupled@47.6.1':
|
||||
dependencies:
|
||||
@@ -17262,6 +17323,8 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
es-toolkit: 1.39.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-editor-inline@47.6.1':
|
||||
dependencies:
|
||||
@@ -17271,6 +17334,8 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
es-toolkit: 1.39.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-editor-multi-root@47.6.1':
|
||||
dependencies:
|
||||
@@ -17377,6 +17442,8 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
es-toolkit: 1.39.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-font@47.6.1':
|
||||
dependencies:
|
||||
@@ -17461,8 +17528,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
'@ckeditor/ckeditor5-widget': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-html-support@47.6.1':
|
||||
dependencies:
|
||||
@@ -17478,8 +17543,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-widget': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
es-toolkit: 1.39.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-icons@47.6.1': {}
|
||||
|
||||
@@ -17524,8 +17587,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-ui': 47.6.1
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-inspector@5.0.0': {}
|
||||
|
||||
@@ -17536,8 +17597,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-ui': 47.6.1
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-line-height@47.6.1':
|
||||
dependencies:
|
||||
@@ -17562,8 +17621,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-widget': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
es-toolkit: 1.39.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-list-multi-level@47.6.1':
|
||||
dependencies:
|
||||
@@ -17588,8 +17645,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
es-toolkit: 1.39.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-markdown-gfm@47.6.1':
|
||||
dependencies:
|
||||
@@ -17627,8 +17682,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
'@ckeditor/ckeditor5-widget': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-mention@47.6.1(patch_hash=5981fb59ba35829e4dff1d39cf771000f8a8fdfa7a34b51d8af9549541f2d62d)':
|
||||
dependencies:
|
||||
@@ -17662,8 +17715,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-ui': 47.6.1
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-operations-compressor@47.6.1':
|
||||
dependencies:
|
||||
@@ -17716,8 +17767,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
'@ckeditor/ckeditor5-widget': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-pagination@47.6.1':
|
||||
dependencies:
|
||||
@@ -17781,8 +17830,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-ui': 47.6.1
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-restricted-editing@47.6.1':
|
||||
dependencies:
|
||||
@@ -17827,8 +17874,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-ui': 47.6.1
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-slash-command@47.6.1':
|
||||
dependencies:
|
||||
@@ -17841,8 +17886,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-ui': 47.6.1
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-source-editing-enhanced@47.6.1':
|
||||
dependencies:
|
||||
@@ -17890,8 +17933,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
es-toolkit: 1.39.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-table@47.6.1':
|
||||
dependencies:
|
||||
@@ -17904,8 +17945,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-widget': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
es-toolkit: 1.39.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-template@47.6.1':
|
||||
dependencies:
|
||||
@@ -18015,8 +18054,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-engine': 47.6.1
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
es-toolkit: 1.39.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@ckeditor/ckeditor5-widget@47.6.1':
|
||||
dependencies:
|
||||
@@ -18036,8 +18073,6 @@ snapshots:
|
||||
'@ckeditor/ckeditor5-utils': 47.6.1
|
||||
ckeditor5: 47.6.1
|
||||
es-toolkit: 1.39.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@codemirror/autocomplete@6.18.6':
|
||||
dependencies:
|
||||
@@ -18695,11 +18730,6 @@ snapshots:
|
||||
tslib: 2.8.1
|
||||
optional: true
|
||||
|
||||
'@emnapi/runtime@1.8.1':
|
||||
dependencies:
|
||||
tslib: 2.8.1
|
||||
optional: true
|
||||
|
||||
'@emnapi/runtime@1.9.0':
|
||||
dependencies:
|
||||
tslib: 2.8.1
|
||||
@@ -19437,7 +19467,7 @@ snapshots:
|
||||
|
||||
'@img/sharp-wasm32@0.34.5':
|
||||
dependencies:
|
||||
'@emnapi/runtime': 1.8.1
|
||||
'@emnapi/runtime': 1.9.0
|
||||
optional: true
|
||||
|
||||
'@img/sharp-win32-arm64@0.34.5':
|
||||
@@ -24541,6 +24571,8 @@ snapshots:
|
||||
|
||||
'@xmldom/xmldom@0.8.10': {}
|
||||
|
||||
'@xmldom/xmldom@0.8.12': {}
|
||||
|
||||
'@xtuc/ieee754@1.2.0': {}
|
||||
|
||||
'@xtuc/long@4.2.2': {}
|
||||
@@ -25039,6 +25071,8 @@ snapshots:
|
||||
|
||||
blurhash@2.0.5: {}
|
||||
|
||||
bmp-js@0.1.0: {}
|
||||
|
||||
bmp-ts@1.0.9: {}
|
||||
|
||||
body-parser@1.20.3:
|
||||
@@ -25477,8 +25511,6 @@ snapshots:
|
||||
ckeditor5-collaboration@47.6.1:
|
||||
dependencies:
|
||||
'@ckeditor/ckeditor5-collaboration-core': 47.6.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
ckeditor5-premium-features@47.6.1(bufferutil@4.0.9)(ckeditor5@47.6.1)(utf-8-validate@6.0.5):
|
||||
dependencies:
|
||||
@@ -28629,6 +28661,8 @@ snapshots:
|
||||
dependencies:
|
||||
postcss: 8.5.8
|
||||
|
||||
idb-keyval@6.2.2: {}
|
||||
|
||||
ieee754@1.2.1: {}
|
||||
|
||||
ignore-walk@8.0.0:
|
||||
@@ -28948,6 +28982,8 @@ snapshots:
|
||||
|
||||
is-unicode-supported@2.1.0: {}
|
||||
|
||||
is-url@1.2.4: {}
|
||||
|
||||
is-weakmap@2.0.2: {}
|
||||
|
||||
is-weakref@1.1.1:
|
||||
@@ -30884,6 +30920,18 @@ snapshots:
|
||||
node-fetch-native: 1.6.7
|
||||
ufo: 1.6.1
|
||||
|
||||
officeparser@6.0.7(encoding@0.1.13):
|
||||
dependencies:
|
||||
'@xmldom/xmldom': 0.8.12
|
||||
concat-stream: 2.0.0
|
||||
file-type: 21.3.4
|
||||
pdfjs-dist: 5.5.207
|
||||
tesseract.js: 7.0.0(encoding@0.1.13)
|
||||
yauzl: 3.2.1
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
- supports-color
|
||||
|
||||
ohash@2.0.11: {}
|
||||
|
||||
oidc-token-hash@5.1.0: {}
|
||||
@@ -30946,6 +30994,8 @@ snapshots:
|
||||
|
||||
openapi-types@12.1.3: {}
|
||||
|
||||
opencollective-postinstall@2.0.3: {}
|
||||
|
||||
opener@1.5.2: {}
|
||||
|
||||
openid-client@4.9.1:
|
||||
@@ -32109,6 +32159,8 @@ snapshots:
|
||||
|
||||
regenerate@1.4.2: {}
|
||||
|
||||
regenerator-runtime@0.13.11: {}
|
||||
|
||||
regexp-util@2.0.3: {}
|
||||
|
||||
regexp.prototype.flags@1.5.4:
|
||||
@@ -33658,6 +33710,38 @@ snapshots:
|
||||
commander: 2.20.3
|
||||
source-map-support: 0.5.21
|
||||
|
||||
tesseract.js-core@6.0.0: {}
|
||||
|
||||
tesseract.js-core@7.0.0: {}
|
||||
|
||||
tesseract.js@6.0.1(encoding@0.1.13):
|
||||
dependencies:
|
||||
bmp-js: 0.1.0
|
||||
idb-keyval: 6.2.2
|
||||
is-url: 1.2.4
|
||||
node-fetch: 2.7.0(encoding@0.1.13)
|
||||
opencollective-postinstall: 2.0.3
|
||||
regenerator-runtime: 0.13.11
|
||||
tesseract.js-core: 6.0.0
|
||||
wasm-feature-detect: 1.8.0
|
||||
zlibjs: 0.3.1
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
|
||||
tesseract.js@7.0.0(encoding@0.1.13):
|
||||
dependencies:
|
||||
bmp-js: 0.1.0
|
||||
idb-keyval: 6.2.2
|
||||
is-url: 1.2.4
|
||||
node-fetch: 2.7.0(encoding@0.1.13)
|
||||
opencollective-postinstall: 2.0.3
|
||||
regenerator-runtime: 0.13.11
|
||||
tesseract.js-core: 7.0.0
|
||||
wasm-feature-detect: 1.8.0
|
||||
zlibjs: 0.3.1
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
|
||||
text-decoder@1.2.3:
|
||||
dependencies:
|
||||
b4a: 1.6.7
|
||||
@@ -34180,6 +34264,10 @@ snapshots:
|
||||
unorm@1.6.0:
|
||||
optional: true
|
||||
|
||||
unpdf@1.4.0(@napi-rs/canvas@0.1.96):
|
||||
optionalDependencies:
|
||||
'@napi-rs/canvas': 0.1.96
|
||||
|
||||
unpipe@1.0.0: {}
|
||||
|
||||
unplugin-utils@0.3.1:
|
||||
@@ -34516,6 +34604,8 @@ snapshots:
|
||||
dependencies:
|
||||
loose-envify: 1.4.0
|
||||
|
||||
wasm-feature-detect@1.8.0: {}
|
||||
|
||||
watchpack@2.4.4:
|
||||
dependencies:
|
||||
glob-to-regexp: 0.4.1
|
||||
@@ -35135,6 +35225,8 @@ snapshots:
|
||||
compress-commons: 6.0.2
|
||||
readable-stream: 4.7.0
|
||||
|
||||
zlibjs@0.3.1: {}
|
||||
|
||||
zod@4.1.12: {}
|
||||
|
||||
zod@4.3.6: {}
|
||||
|
||||
@@ -51,6 +51,7 @@ export default class BuildHelper {
|
||||
"electron",
|
||||
"@electron/remote",
|
||||
"better-sqlite3",
|
||||
"pdfjs-dist",
|
||||
"./xhr-sync-worker.js",
|
||||
"vite"
|
||||
],
|
||||
|
||||
56
scripts/filter-tsc-output.mts
Normal file
56
scripts/filter-tsc-output.mts
Normal file
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* Runs `tsc --build` and filters out noisy cascade errors (TS6305).
|
||||
* Numbers each remaining error and prints a summary at the end.
|
||||
*/
|
||||
|
||||
import { execSync } from "child_process";
|
||||
|
||||
const SUPPRESSED_CODES = [ "TS6305" ];
|
||||
const ERROR_LINE_PATTERN = /^.+\(\d+,\d+\): error TS\d+:/;
|
||||
|
||||
let output: string;
|
||||
try {
|
||||
output = execSync("tsc --build", {
|
||||
encoding: "utf-8",
|
||||
stdio: [ "inherit", "pipe", "pipe" ]
|
||||
});
|
||||
} catch (err: unknown) {
|
||||
const execErr = err as { stdout?: string; stderr?: string };
|
||||
output = (execErr.stdout ?? "") + (execErr.stderr ?? "");
|
||||
}
|
||||
|
||||
const lines = output.split(/\r?\n/);
|
||||
const filtered = lines.filter(
|
||||
(line) => !SUPPRESSED_CODES.some((code) => line.includes(code))
|
||||
);
|
||||
|
||||
let errorIndex = 0;
|
||||
const numbered: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
let skipContinuation = false;
|
||||
|
||||
for (const line of filtered) {
|
||||
if (ERROR_LINE_PATTERN.test(line)) {
|
||||
if (seen.has(line)) {
|
||||
skipContinuation = true;
|
||||
continue;
|
||||
}
|
||||
seen.add(line);
|
||||
skipContinuation = false;
|
||||
errorIndex++;
|
||||
numbered.push(`[${errorIndex}] ${line}`);
|
||||
} else if (line.trim()) {
|
||||
// Continuation line (indented context for multi-line errors)
|
||||
if (!skipContinuation) {
|
||||
numbered.push(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (errorIndex > 0) {
|
||||
console.log(numbered.join("\n"));
|
||||
console.log(`\n${errorIndex} error(s) found.`);
|
||||
process.exit(1);
|
||||
} else {
|
||||
console.log("No errors found.");
|
||||
}
|
||||
Reference in New Issue
Block a user