feat(markdown): support full feature set (math, admonitions)

This commit is contained in:
Elian Doran
2026-04-17 07:32:08 +03:00
parent 11b0bec47d
commit 822e7ffbab
6 changed files with 325 additions and 245 deletions

View File

@@ -43,9 +43,8 @@ describe("renderWithSourceLines", () => {
});
it("renders standard markdown constructs inside the wrappers", () => {
const html = renderWithSourceLines("# Heading\n\n- item\n");
expect(html).toContain("<h1");
expect(html).toContain("Heading");
const html = renderWithSourceLines("## Heading\n\n- item\n");
expect(html).toContain("<h2>Heading</h2>");
expect(html).toContain("<ul>");
expect(html).toContain("<li>item</li>");
});
@@ -60,4 +59,19 @@ describe("renderWithSourceLines", () => {
const html = renderWithSourceLines(src);
expect(html).toContain('href="https://example.com"');
});
it("normalizes fenced code languages to CKEditor MIME identifiers for syntax highlighting", () => {
const html = renderWithSourceLines("```javascript\nconst x = 1;\n```");
expect(html).toMatch(/class="language-application-javascript-env-(backend|frontend)"/);
});
it("produces CKEditor admonition markup for GFM callouts", () => {
const html = renderWithSourceLines("> [!NOTE]\n> heads up");
expect(html).toContain('<aside class="admonition note">');
});
it("produces math-tex spans for inline math", () => {
const html = renderWithSourceLines("Energy: $e=mc^2$.");
expect(html).toContain('<span class="math-tex">');
});
});

View File

@@ -1,8 +1,9 @@
import "./Markdown.css";
import VanillaCodeMirror from "@triliumnext/codemirror";
import { renderToHtml } from "@triliumnext/commons";
import DOMPurify from "dompurify";
import { Marked, type TokensList } from "marked";
import { Marked } from "marked";
import { RefObject } from "preact";
import { useEffect, useMemo, useRef, useState } from "preact/hooks";
@@ -14,7 +15,7 @@ const marked = new Marked({ breaks: true, gfm: true });
export default function Markdown(props: TypeWidgetProps) {
const [ content, setContent ] = useState("");
const html = useMemo(() => DOMPurify.sanitize(renderWithSourceLines(content), { ADD_ATTR: [ "data-source-line" ] }), [ content ]);
const html = useMemo(() => renderWithSourceLines(content), [ content ]);
const previewRef = useRef<HTMLDivElement>(null);
const editorRef = useRef<VanillaCodeMirror>(null);
@@ -126,22 +127,39 @@ function useSyncedHighlight(editorRef: RefObject<VanillaCodeMirror>, previewRef:
}, [ editorRef, previewRef, html ]);
}
/** Token types the parser emits but which don't produce top-level block HTML. */
const NON_RENDERED_TOKENS = new Set([ "space", "def" ]);
/**
* Render markdown and tag each top-level block with its 1-indexed source line,
* so the preview can be scrolled to match the editor. Marked does not emit
* source positions (markedjs/marked#1267) so we count newlines in `raw` ourselves.
* so the preview can be scrolled to match the editor. Uses the shared
* `renderToHtml` pipeline (admonitions, math, tables, etc.) with DOMPurify for
* sanitization, then walks the rendered DOM and pairs each top-level child
* with the matching lexer token's start line. Marked does not emit source
* positions (markedjs/marked#1267) so we count newlines in `raw` ourselves.
*/
export function renderWithSourceLines(src: string): string {
// Compute the start line of each renderable top-level token in source order.
const tokens = marked.lexer(src);
const lines: number[] = [];
let line = 1;
const parts: string[] = [];
for (const token of tokens) {
const startLine = line;
line += (token.raw.match(/\n/g) ?? []).length;
if (token.type === "space") continue;
const sub = [ token ] as unknown as TokensList;
(sub as TokensList).links = tokens.links;
parts.push(`<div data-source-line="${startLine}">${marked.parser(sub)}</div>`);
if (!NON_RENDERED_TOKENS.has(token.type)) lines.push(startLine);
}
const html = renderToHtml(src, "", { sanitize: (h) => DOMPurify.sanitize(h) });
if (!html) return "";
const container = document.createElement("div");
container.innerHTML = html;
const parts: string[] = [];
const children = Array.from(container.children);
for (let i = 0; i < children.length; i++) {
const sourceLine = lines[i] ?? lines[lines.length - 1] ?? 1;
parts.push(`<div data-source-line="${sourceLine}">${children[i].outerHTML}</div>`);
}
return parts.join("");
}

View File

@@ -1,16 +1,10 @@
import { ADMONITION_TYPE_MAPPINGS } from "@triliumnext/commons";
import { gfm } from "@triliumnext/turndown-plugin-gfm";
import Turnish, { type Rule } from "turnish";
let instance: Turnish | null = null;
// TODO: Move this to a dedicated file someday.
export const ADMONITION_TYPE_MAPPINGS: Record<string, string> = {
note: "NOTE",
tip: "TIP",
important: "IMPORTANT",
caution: "CAUTION",
warning: "WARNING"
};
export { ADMONITION_TYPE_MAPPINGS };
export const DEFAULT_ADMONITION_TYPE = ADMONITION_TYPE_MAPPINGS.note;

View File

@@ -1,233 +1,11 @@
import { renderToHtml as renderToHtmlShared } from "@triliumnext/commons";
import { getMimeTypeFromMarkdownName, MIME_TYPE_AUTO, normalizeMimeTypeForCKEditor, transclusionExtension, wikiLinkExtension } from "@triliumnext/commons";
import { parse, Renderer, type Tokens, use } from "marked";
import { ADMONITION_TYPE_MAPPINGS } from "../export/markdown.js";
import htmlSanitizer from "../html_sanitizer.js";
import utils from "../utils.js";
import importUtils from "./utils.js";
const escape = utils.escapeHtml;
/**
* Keep renderer code up to date with https://github.com/markedjs/marked/blob/master/src/Renderer.ts.
*/
class CustomMarkdownRenderer extends Renderer {
override heading(data: Tokens.Heading): string {
// Treat h1 as raw text.
if (data.depth === 1) {
return `<h1>${data.text}</h1>`;
}
return super.heading(data).trimEnd();
}
override paragraph(data: Tokens.Paragraph): string {
return super.paragraph(data).trimEnd();
}
override code({ text, lang }: Tokens.Code): string {
if (!text) {
return "";
}
// Escape the HTML.
text = escape(text);
// Unescape &quot
text = text.replace(/&quot;/g, '"');
const ckEditorLanguage = getNormalizedMimeFromMarkdownLanguage(lang);
return `<pre><code class="language-${ckEditorLanguage}">${text}</code></pre>`;
}
override list(token: Tokens.List): string {
let result = super.list(token)
.replace("\n", "") // we replace the first one only.
.trimEnd();
// Handle todo-list in the CKEditor format.
if (token.items.some(item => item.task)) {
result = result.replace(/^<ul>/, "<ul class=\"todo-list\">");
}
return result;
}
override checkbox({ checked }: Tokens.Checkbox): string {
return `<input type="checkbox"${
checked ? 'checked="checked" ' : ''
}disabled="disabled">`;
}
override listitem(item: Tokens.ListItem): string {
// Handle todo-list in the CKEditor format.
if (item.task) {
let itemBody = '';
const checkbox = this.checkbox({ checked: !!item.checked, raw: "- [ ]", type: "checkbox" });
if (item.loose) {
if (item.tokens[0]?.type === 'paragraph') {
item.tokens[0].text = checkbox + item.tokens[0].text;
if (item.tokens[0].tokens && item.tokens[0].tokens.length > 0 && item.tokens[0].tokens[0].type === 'text') {
item.tokens[0].tokens[0].text = checkbox + escape(item.tokens[0].tokens[0].text);
item.tokens[0].tokens[0].escaped = true;
}
} else {
item.tokens.unshift({
type: 'text',
raw: checkbox,
text: checkbox,
escaped: true,
});
}
} else {
itemBody += checkbox;
}
itemBody += `<span class="todo-list__label__description">${this.parser.parse(item.tokens.filter(t => t.type !== "checkbox"))}</span>`;
return `<li><label class="todo-list__label">${itemBody}</label></li>`;
}
return super.listitem(item).trimEnd();
}
override image(token: Tokens.Image): string {
return super.image(token)
.replace(` alt=""`, "");
}
override blockquote({ tokens }: Tokens.Blockquote): string {
const body = renderer.parser.parse(tokens);
const admonitionMatch = /^<p>\[\!([A-Z]+)\]/.exec(body);
if (Array.isArray(admonitionMatch) && admonitionMatch.length === 2) {
const type = admonitionMatch[1].toLowerCase();
if (ADMONITION_TYPE_MAPPINGS[type]) {
const bodyWithoutHeader = body
.replace(/^<p>\[\!([A-Z]+)\]\s*/, "<p>")
.replace(/^<p><\/p>/, ""); // Having a heading will generate an empty paragraph that we need to remove.
return `<aside class="admonition ${type}">${bodyWithoutHeader.trim()}</aside>`;
}
}
return `<blockquote>${body}</blockquote>`;
}
codespan({ text }: Tokens.Codespan): string {
return `<code spellcheck="false">${escape(text)}</code>`;
}
function renderToHtml(content: string, title: string): string {
return renderToHtmlShared(content, title, { sanitize: htmlSanitizer.sanitize });
}
function renderToHtml(content: string, title: string) {
// Double-escape slashes in math expression because they are otherwise consumed by the parser somewhere.
content = content.replaceAll("\\$", "\\\\$");
// Extract formulas and replace them with placeholders to prevent interference from Markdown rendering
const { processedText, placeholderMap: formulaMap } = extractFormulas(content);
use({
// Order is important, especially for wikilinks.
extensions: [
transclusionExtension,
wikiLinkExtension
]
});
let html = parse(processedText, {
async: false,
renderer
}) as string;
// After rendering, replace placeholders back with the formula HTML
html = restoreFromMap(html, formulaMap);
// h1 handling needs to come before sanitization
html = importUtils.handleH1(html, title);
html = htmlSanitizer.sanitize(html);
// Add a trailing semicolon to CSS styles.
html = html.replaceAll(/(<(img|figure|col).*?style=".*?)"/g, "$1;\"");
// Remove slash for self-closing tags to match CKEditor's approach.
html = html.replace(/<(\w+)([^>]*)\s+\/>/g, "<$1$2>");
// Normalize non-breaking spaces to entity.
html = html.replaceAll("\u00a0", "&nbsp;");
return html;
}
function getNormalizedMimeFromMarkdownLanguage(language: string | undefined) {
if (language) {
const mimeDefinition = getMimeTypeFromMarkdownName(language);
if (mimeDefinition) {
return normalizeMimeTypeForCKEditor(mimeDefinition.mime);
}
}
return MIME_TYPE_AUTO;
}
function extractCodeBlocks(text: string): { processedText: string, placeholderMap: Map<string, string> } {
const codeMap = new Map<string, string>();
let id = 0;
const timestamp = Date.now();
// Multi-line code block and Inline code
text = text.replace(/```[\s\S]*?```/g, (m) => {
const key = `<!--CODE_BLOCK_${timestamp}_${id++}-->`;
codeMap.set(key, m);
return key;
}).replace(/`[^`\n]+`/g, (m) => {
const key = `<!--INLINE_CODE_${timestamp}_${id++}-->`;
codeMap.set(key, m);
return key;
});
return { processedText: text, placeholderMap: codeMap };
}
function extractFormulas(text: string): { processedText: string, placeholderMap: Map<string, string> } {
// Protect the $ signs inside code blocks from being recognized as formulas.
const { processedText: noCodeText, placeholderMap: codeMap } = extractCodeBlocks(text);
const formulaMap = new Map<string, string>();
let id = 0;
const timestamp = Date.now();
// Display math and Inline math
let processedText = noCodeText.replace(/(?<!\\)\$\$((?:(?!\n{2,})[\s\S])+?)\$\$/g, (_, formula) => {
const key = `<!--FORMULA_BLOCK_${timestamp}_${id++}-->`;
const rendered = `<span class="math-tex">\\[${formula}\\]</span>`;
formulaMap.set(key, rendered);
return key;
}).replace(/(?<!\\)\$(.+?)\$/g, (_, formula) => {
const key = `<!--FORMULA_INLINE_${timestamp}_${id++}-->`;
const rendered = `<span class="math-tex">\\(${formula}\\)</span>`;
formulaMap.set(key, rendered);
return key;
});
processedText = restoreFromMap(processedText, codeMap);
return { processedText, placeholderMap: formulaMap };
}
function restoreFromMap(text: string, map: Map<string, string>): string {
if (map.size === 0) return text;
const pattern = [...map.keys()]
.map(k => k.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))
.join('|');
return text.replace(new RegExp(pattern, 'g'), match => map.get(match) ?? match);
}
const renderer = new CustomMarkdownRenderer({ async: false });
export default {
renderToHtml
};

View File

@@ -18,3 +18,4 @@ export { default as BUILTIN_ATTRIBUTES } from "./lib/builtin_attributes.js";
export * from "./lib/spreadsheet/render_to_html.js";
export * from "./lib/llm_api.js";
export * from "./lib/marked_extensions.js";
export * from "./lib/markdown_renderer.js";

View File

@@ -0,0 +1,275 @@
import { Marked, Renderer, type Tokens } from "marked";
import { getMimeTypeFromMarkdownName, MIME_TYPE_AUTO, normalizeMimeTypeForCKEditor } from "./mime_type.js";
import { transclusionExtension, wikiLinkExtension } from "./marked_extensions.js";
/**
* Mapping from markdown admonition keywords (case-insensitive) to the ids
* used in the rendered `<aside class="admonition …">` markup. Same set as
* GitHub's supported admonition callouts.
*/
export const ADMONITION_TYPE_MAPPINGS: Record<string, string> = {
note: "NOTE",
tip: "TIP",
important: "IMPORTANT",
caution: "CAUTION",
warning: "WARNING"
};
/** Options for {@link renderToHtml}. */
export interface RenderToHtmlOptions {
/**
* HTML sanitizer. Required — each environment plugs in its own:
* - server: `sanitize-html` configured with per-option allowed tags
* - client: `DOMPurify.sanitize`
*/
sanitize: (dirtyHtml: string) => string;
}
function escapeHtml(str: string): string {
return str
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#39;");
}
const NAMED_ENTITIES: Record<string, string> = {
amp: "&",
lt: "<",
gt: ">",
quot: '"',
apos: "'",
nbsp: "\u00a0"
};
function unescapeHtml(str: string): string {
return str.replace(/&(#\d+|#x[0-9a-fA-F]+|\w+);/g, (match, entity: string) => {
if (entity.startsWith("#x") || entity.startsWith("#X")) {
return String.fromCodePoint(parseInt(entity.slice(2), 16));
}
if (entity.startsWith("#")) {
return String.fromCodePoint(parseInt(entity.slice(1), 10));
}
return NAMED_ENTITIES[entity] ?? match;
});
}
function getNormalizedMimeFromMarkdownLanguage(language: string | undefined): string {
if (language) {
const mimeDefinition = getMimeTypeFromMarkdownName(language);
if (mimeDefinition) {
return normalizeMimeTypeForCKEditor(mimeDefinition.mime);
}
}
return MIME_TYPE_AUTO;
}
function handleH1(content: string, title: string): string {
let isFirstH1Handled = false;
return content.replace(/<h1[^>]*>([^<]*)<\/h1>/gi, (match, text: string) => {
text = unescapeHtml(text);
const convertedContent = `<h2>${text}</h2>`;
if (!isFirstH1Handled) {
isFirstH1Handled = true;
return title.trim() === text.trim() ? "" : convertedContent;
}
return convertedContent;
});
}
function extractCodeBlocks(text: string): { processedText: string; placeholderMap: Map<string, string> } {
const codeMap = new Map<string, string>();
let id = 0;
const timestamp = Date.now();
text = text
.replace(/```[\s\S]*?```/g, (m) => {
const key = `<!--CODE_BLOCK_${timestamp}_${id++}-->`;
codeMap.set(key, m);
return key;
})
.replace(/`[^`\n]+`/g, (m) => {
const key = `<!--INLINE_CODE_${timestamp}_${id++}-->`;
codeMap.set(key, m);
return key;
});
return { processedText: text, placeholderMap: codeMap };
}
function extractFormulas(text: string): { processedText: string; placeholderMap: Map<string, string> } {
const { processedText: noCodeText, placeholderMap: codeMap } = extractCodeBlocks(text);
const formulaMap = new Map<string, string>();
let id = 0;
const timestamp = Date.now();
let processedText = noCodeText
.replace(/(?<!\\)\$\$((?:(?!\n{2,})[\s\S])+?)\$\$/g, (_, formula: string) => {
const key = `<!--FORMULA_BLOCK_${timestamp}_${id++}-->`;
formulaMap.set(key, `<span class="math-tex">\\[${formula}\\]</span>`);
return key;
})
.replace(/(?<!\\)\$(.+?)\$/g, (_, formula: string) => {
const key = `<!--FORMULA_INLINE_${timestamp}_${id++}-->`;
formulaMap.set(key, `<span class="math-tex">\\(${formula}\\)</span>`);
return key;
});
processedText = restoreFromMap(processedText, codeMap);
return { processedText, placeholderMap: formulaMap };
}
function restoreFromMap(text: string, map: Map<string, string>): string {
if (map.size === 0) return text;
const pattern = [ ...map.keys() ]
.map((k) => k.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"))
.join("|");
return text.replace(new RegExp(pattern, "g"), (match) => map.get(match) ?? match);
}
/**
* Keep renderer code up to date with https://github.com/markedjs/marked/blob/master/src/Renderer.ts.
*/
class CustomMarkdownRenderer extends Renderer {
override heading(data: Tokens.Heading): string {
if (data.depth === 1) {
return `<h1>${data.text}</h1>`;
}
return super.heading(data).trimEnd();
}
override paragraph(data: Tokens.Paragraph): string {
return super.paragraph(data).trimEnd();
}
override code({ text, lang }: Tokens.Code): string {
if (!text) return "";
text = escapeHtml(text).replace(/&quot;/g, '"');
const ckEditorLanguage = getNormalizedMimeFromMarkdownLanguage(lang);
return `<pre><code class="language-${ckEditorLanguage}">${text}</code></pre>`;
}
override list(token: Tokens.List): string {
let result = super.list(token)
.replace("\n", "")
.trimEnd();
if (token.items.some((item) => item.task)) {
result = result.replace(/^<ul>/, '<ul class="todo-list">');
}
return result;
}
override checkbox({ checked }: Tokens.Checkbox): string {
return `<input type="checkbox"${
checked ? 'checked="checked" ' : ""
}disabled="disabled">`;
}
override listitem(item: Tokens.ListItem): string {
if (item.task) {
let itemBody = "";
const checkbox = this.checkbox({ checked: !!item.checked, raw: "- [ ]", type: "checkbox" });
if (item.loose) {
if (item.tokens[0]?.type === "paragraph") {
item.tokens[0].text = checkbox + item.tokens[0].text;
if (item.tokens[0].tokens && item.tokens[0].tokens.length > 0 && item.tokens[0].tokens[0].type === "text") {
item.tokens[0].tokens[0].text = checkbox + escapeHtml(item.tokens[0].tokens[0].text);
item.tokens[0].tokens[0].escaped = true;
}
} else {
item.tokens.unshift({
type: "text",
raw: checkbox,
text: checkbox,
escaped: true
});
}
} else {
itemBody += checkbox;
}
itemBody += `<span class="todo-list__label__description">${this.parser.parse(item.tokens.filter((t) => t.type !== "checkbox"))}</span>`;
return `<li><label class="todo-list__label">${itemBody}</label></li>`;
}
return super.listitem(item).trimEnd();
}
override image(token: Tokens.Image): string {
return super.image(token).replace(` alt=""`, "");
}
override blockquote({ tokens }: Tokens.Blockquote): string {
const body = this.parser.parse(tokens);
const admonitionMatch = /^<p>\[\!([A-Z]+)\]/.exec(body);
if (Array.isArray(admonitionMatch) && admonitionMatch.length === 2) {
const type = admonitionMatch[1].toLowerCase();
if (ADMONITION_TYPE_MAPPINGS[type]) {
const bodyWithoutHeader = body
.replace(/^<p>\[\!([A-Z]+)\]\s*/, "<p>")
.replace(/^<p><\/p>/, "");
return `<aside class="admonition ${type}">${bodyWithoutHeader.trim()}</aside>`;
}
}
return `<blockquote>${body}</blockquote>`;
}
override codespan({ text }: Tokens.Codespan): string {
return `<code spellcheck="false">${escapeHtml(text)}</code>`;
}
}
/**
* Render markdown to CKEditor-compatible HTML. Produces the same output the
* server-side `/api/other/render-markdown` endpoint emits, but sanitization is
* delegated to the caller so this works in both Node and the browser.
*/
export function renderToHtml(content: string, title: string, options: RenderToHtmlOptions): string {
// Double-escape slashes in math expressions — otherwise the parser consumes them.
content = content.replaceAll("\\$", "\\\\$");
const { processedText, placeholderMap: formulaMap } = extractFormulas(content);
const marked = new Marked({ async: false });
marked.use({
// Order is important, especially for wikilinks.
extensions: [ transclusionExtension, wikiLinkExtension ]
});
const renderer = new CustomMarkdownRenderer({ async: false });
let html = marked.parse(processedText, { async: false, renderer }) as string;
html = restoreFromMap(html, formulaMap);
// h1 handling needs to come before sanitization.
html = handleH1(html, title);
html = options.sanitize(html);
// Add a trailing semicolon to CSS styles.
html = html.replaceAll(/(<(img|figure|col).*?style=".*?)"/g, '$1;"');
// Remove slash for self-closing tags to match CKEditor's approach.
html = html.replace(/<(\w+)([^>]*)\s+\/>/g, "<$1$2>");
// Normalize non-breaking spaces to entity.
html = html.replaceAll("\u00a0", "&nbsp;");
return html;
}