markup: restrict data URI scheme to safe image MIME types (#8174)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
ᴊᴏᴇ ᴄʜᴇɴ
2026-02-10 22:26:31 -05:00
committed by GitHub
parent 5c67d47512
commit 441c64d7bd
2 changed files with 32 additions and 2 deletions

View File

@@ -1,6 +1,8 @@
package markup
import (
"net/url"
"strings"
"sync"
"github.com/microcosm-cc/bluemonday"
@@ -32,14 +34,28 @@ func NewSanitizer() {
sanitizer.policy.AllowAttrs("type").Matching(lazyregexp.New(`^checkbox$`).Regexp()).OnElements("input")
sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input")
// Data URLs
sanitizer.policy.AllowURLSchemes("data")
// Only allow data URIs with safe image MIME types to prevent XSS via
// "data:text/html" payloads.
sanitizer.policy.AllowURLSchemeWithCustomPolicy("data", isSafeDataURI)
// Custom URL-Schemes
sanitizer.policy.AllowURLSchemes(conf.Markdown.CustomURLSchemes...)
})
}
// isSafeDataURI returns whether the given data URI uses a safe image MIME type.
func isSafeDataURI(u *url.URL) bool {
// The opaque data of a data URI has the form "mediatype;base64,data" or
// "mediatype,data". We only allow common image MIME types.
mediatype, _, _ := strings.Cut(u.Opaque, ";")
mediatype, _, _ = strings.Cut(mediatype, ",")
switch strings.TrimSpace(strings.ToLower(mediatype)) {
case "image/png", "image/jpeg", "image/gif", "image/webp", "image/x-icon":
return true
}
return false
}
// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
func Sanitize(s string) string {
return sanitizer.policy.Sanitize(s)

View File

@@ -26,6 +26,20 @@ func Test_Sanitizer(t *testing.T) {
{input: `<input type="hidden">`, expVal: ``},
{input: `<input type="checkbox">`, expVal: `<input type="checkbox">`},
{input: `<input checked disabled autofocus>`, expVal: `<input checked="" disabled="">`},
// Data URIs: safe image types should be allowed
{input: `<img src="">`, expVal: `<img src="">`},
{input: `<img src="">`, expVal: `<img src="">`},
{input: `<img src="">`, expVal: `<img src="">`},
{input: `<img src="">`, expVal: `<img src="">`},
// Data URIs: text/html must be stripped to prevent XSS (GHSA-xrcr-gmf5-2r8j)
{input: `<a href="data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">Click</a>`, expVal: `Click`},
{input: `<a href="data:text/html,<script>alert(1)</script>">XSS</a>`, expVal: `XSS`},
{input: `<img src="data:text/html;base64,abc">`, expVal: ``},
// Data URIs: SVG must be stripped (can contain embedded JavaScript)
{input: `<img src="">`, expVal: ``},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {