diff --git a/internal/markup/sanitizer.go b/internal/markup/sanitizer.go index bb9b2000e..9cad25b8d 100644 --- a/internal/markup/sanitizer.go +++ b/internal/markup/sanitizer.go @@ -1,6 +1,8 @@ package markup import ( + "net/url" + "strings" "sync" "github.com/microcosm-cc/bluemonday" @@ -32,14 +34,28 @@ func NewSanitizer() { sanitizer.policy.AllowAttrs("type").Matching(lazyregexp.New(`^checkbox$`).Regexp()).OnElements("input") sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input") - // Data URLs - sanitizer.policy.AllowURLSchemes("data") + // Only allow data URIs with safe image MIME types to prevent XSS via + // "data:text/html" payloads. + sanitizer.policy.AllowURLSchemeWithCustomPolicy("data", isSafeDataURI) // Custom URL-Schemes sanitizer.policy.AllowURLSchemes(conf.Markdown.CustomURLSchemes...) }) } +// isSafeDataURI returns whether the given data URI uses a safe image MIME type. +func isSafeDataURI(u *url.URL) bool { + // The opaque data of a data URI has the form "mediatype;base64,data" or + // "mediatype,data". We only allow common image MIME types. + mediatype, _, _ := strings.Cut(u.Opaque, ";") + mediatype, _, _ = strings.Cut(mediatype, ",") + switch strings.TrimSpace(strings.ToLower(mediatype)) { + case "image/png", "image/jpeg", "image/gif", "image/webp", "image/x-icon": + return true + } + return false +} + // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. func Sanitize(s string) string { return sanitizer.policy.Sanitize(s) diff --git a/internal/markup/sanitizer_test.go b/internal/markup/sanitizer_test.go index 83253bfdf..897f4f98f 100644 --- a/internal/markup/sanitizer_test.go +++ b/internal/markup/sanitizer_test.go @@ -26,6 +26,20 @@ func Test_Sanitizer(t *testing.T) { {input: ``, expVal: ``}, {input: ``, expVal: ``}, {input: ``, expVal: ``}, + + // Data URIs: safe image types should be allowed + {input: ``, expVal: ``}, + {input: ``, expVal: ``}, + {input: ``, expVal: ``}, + {input: ``, expVal: ``}, + + // Data URIs: text/html must be stripped to prevent XSS (GHSA-xrcr-gmf5-2r8j) + {input: `Click`, expVal: `Click`}, + {input: `XSS`, expVal: `XSS`}, + {input: ``, expVal: ``}, + + // Data URIs: SVG must be stripped (can contain embedded JavaScript) + {input: ``, expVal: ``}, } for _, test := range tests { t.Run(test.input, func(t *testing.T) {