diff --git a/apps/server/src/services/search/services/search.ts b/apps/server/src/services/search/services/search.ts index 0523aeb98f..97bfc457a4 100644 --- a/apps/server/src/services/search/services/search.ts +++ b/apps/server/src/services/search/services/search.ts @@ -8,6 +8,7 @@ import SearchContext from "../search_context.js"; import becca from "../../../becca/becca.js"; import beccaService from "../../../becca/becca_service.js"; import { normalize, removeDiacritic, escapeHtml, escapeRegExp } from "../../utils.js"; +import { stripHtmlTags } from "../utils/text_utils.js"; import log from "../../log.js"; import hoistedNoteService from "../../hoisted_note.js"; import type BNote from "../../../becca/entities/bnote.js"; @@ -494,10 +495,9 @@ function extractContentSnippet(noteId: string, searchTokens: string[], maxLength return ""; // Protected but no session available } - // Strip HTML tags for text notes — use fast regex for snippet extraction - // (striptags library is ~18x slower and not needed for search snippets) + // Strip HTML tags for text notes if (note.type === "text") { - content = content.replace(/<[^>]*>/g, ""); + content = stripHtmlTags(content); } if (!content) { diff --git a/apps/server/src/services/search/utils/text_utils.spec.ts b/apps/server/src/services/search/utils/text_utils.spec.ts index a5f1da129d..146f5cc0fe 100644 --- a/apps/server/src/services/search/utils/text_utils.spec.ts +++ b/apps/server/src/services/search/utils/text_utils.spec.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from "vitest"; -import { calculateOptimizedEditDistance, validateFuzzySearchTokens, fuzzyMatchWord } from './text_utils.js'; +import { calculateOptimizedEditDistance, validateFuzzySearchTokens, fuzzyMatchWord, stripHtmlTags } from './text_utils.js'; describe('Fuzzy Search Core', () => { describe('calculateOptimizedEditDistance', () => { @@ -62,4 +62,69 @@ describe('Fuzzy Search Core', () => { expect(fuzzyMatchWord('a', 'b')).toBe(false); // Very short tokens }); }); + + describe('stripHtmlTags', () => { + it('strips simple HTML tags', () => { + expect(stripHtmlTags('
Hello
')).toBe('Hello'); + expect(stripHtmlTags('
')).toBe('Image: ');
+ });
+
+ it('handles tags with attributes', () => {
+ expect(stripHtmlTags('Link')).toBe('Link');
+ expect(stripHtmlTags('