diff --git a/internal/markup/sanitizer.go b/internal/markup/sanitizer.go
index bb9b2000e..9cad25b8d 100644
--- a/internal/markup/sanitizer.go
+++ b/internal/markup/sanitizer.go
@@ -1,6 +1,8 @@
package markup
import (
+ "net/url"
+ "strings"
"sync"
"github.com/microcosm-cc/bluemonday"
@@ -32,14 +34,28 @@ func NewSanitizer() {
sanitizer.policy.AllowAttrs("type").Matching(lazyregexp.New(`^checkbox$`).Regexp()).OnElements("input")
sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input")
- // Data URLs
- sanitizer.policy.AllowURLSchemes("data")
+ // Only allow data URIs with safe image MIME types to prevent XSS via
+ // "data:text/html" payloads.
+ sanitizer.policy.AllowURLSchemeWithCustomPolicy("data", isSafeDataURI)
// Custom URL-Schemes
sanitizer.policy.AllowURLSchemes(conf.Markdown.CustomURLSchemes...)
})
}
+// isSafeDataURI returns whether the given data URI uses a safe image MIME type.
+func isSafeDataURI(u *url.URL) bool {
+ // The opaque data of a data URI has the form "mediatype;base64,data" or
+ // "mediatype,data". We only allow common image MIME types.
+ mediatype, _, _ := strings.Cut(u.Opaque, ";")
+ mediatype, _, _ = strings.Cut(mediatype, ",")
+ switch strings.TrimSpace(strings.ToLower(mediatype)) {
+ case "image/png", "image/jpeg", "image/gif", "image/webp", "image/x-icon":
+ return true
+ }
+ return false
+}
+
// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
func Sanitize(s string) string {
return sanitizer.policy.Sanitize(s)
diff --git a/internal/markup/sanitizer_test.go b/internal/markup/sanitizer_test.go
index 83253bfdf..897f4f98f 100644
--- a/internal/markup/sanitizer_test.go
+++ b/internal/markup/sanitizer_test.go
@@ -26,6 +26,20 @@ func Test_Sanitizer(t *testing.T) {
{input: ``, expVal: ``},
{input: ``, expVal: ``},
{input: ``, expVal: ``},
+
+ // Data URIs: safe image types should be allowed
+ {input: `
`, expVal: `
`},
+ {input: `
`, expVal: `
`},
+ {input: `
`, expVal: `
`},
+ {input: `
`, expVal: `
`},
+
+ // Data URIs: text/html must be stripped to prevent XSS (GHSA-xrcr-gmf5-2r8j)
+ {input: `Click`, expVal: `Click`},
+ {input: `XSS`, expVal: `XSS`},
+ {input: `
`, expVal: ``},
+
+ // Data URIs: SVG must be stripped (can contain embedded JavaScript)
+ {input: `
`, expVal: ``},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {