From 441c64d7bd8893b2f4e48660a8be3a7472e14291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E1=B4=8A=E1=B4=8F=E1=B4=87=20=E1=B4=84=CA=9C=E1=B4=87?= =?UTF-8?q?=C9=B4?= Date: Tue, 10 Feb 2026 22:26:31 -0500 Subject: [PATCH] markup: restrict data URI scheme to safe image MIME types (#8174) Co-authored-by: Claude Opus 4.6 --- internal/markup/sanitizer.go | 20 ++++++++++++++++++-- internal/markup/sanitizer_test.go | 14 ++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/internal/markup/sanitizer.go b/internal/markup/sanitizer.go index bb9b2000e..9cad25b8d 100644 --- a/internal/markup/sanitizer.go +++ b/internal/markup/sanitizer.go @@ -1,6 +1,8 @@ package markup import ( + "net/url" + "strings" "sync" "github.com/microcosm-cc/bluemonday" @@ -32,14 +34,28 @@ func NewSanitizer() { sanitizer.policy.AllowAttrs("type").Matching(lazyregexp.New(`^checkbox$`).Regexp()).OnElements("input") sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input") - // Data URLs - sanitizer.policy.AllowURLSchemes("data") + // Only allow data URIs with safe image MIME types to prevent XSS via + // "data:text/html" payloads. + sanitizer.policy.AllowURLSchemeWithCustomPolicy("data", isSafeDataURI) // Custom URL-Schemes sanitizer.policy.AllowURLSchemes(conf.Markdown.CustomURLSchemes...) }) } +// isSafeDataURI returns whether the given data URI uses a safe image MIME type. +func isSafeDataURI(u *url.URL) bool { + // The opaque data of a data URI has the form "mediatype;base64,data" or + // "mediatype,data". We only allow common image MIME types. + mediatype, _, _ := strings.Cut(u.Opaque, ";") + mediatype, _, _ = strings.Cut(mediatype, ",") + switch strings.TrimSpace(strings.ToLower(mediatype)) { + case "image/png", "image/jpeg", "image/gif", "image/webp", "image/x-icon": + return true + } + return false +} + // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. func Sanitize(s string) string { return sanitizer.policy.Sanitize(s) diff --git a/internal/markup/sanitizer_test.go b/internal/markup/sanitizer_test.go index 83253bfdf..897f4f98f 100644 --- a/internal/markup/sanitizer_test.go +++ b/internal/markup/sanitizer_test.go @@ -26,6 +26,20 @@ func Test_Sanitizer(t *testing.T) { {input: ``, expVal: ``}, {input: ``, expVal: ``}, {input: ``, expVal: ``}, + + // Data URIs: safe image types should be allowed + {input: ``, expVal: ``}, + {input: ``, expVal: ``}, + {input: ``, expVal: ``}, + {input: ``, expVal: ``}, + + // Data URIs: text/html must be stripped to prevent XSS (GHSA-xrcr-gmf5-2r8j) + {input: `Click`, expVal: `Click`}, + {input: `XSS`, expVal: `XSS`}, + {input: ``, expVal: ``}, + + // Data URIs: SVG must be stripped (can contain embedded JavaScript) + {input: ``, expVal: ``}, } for _, test := range tests { t.Run(test.input, func(t *testing.T) {