mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-31 19:06:18 +01:00 
			
		
		
		
	Markdown: Sanitizier Configuration (#9075)
* Support custom sanitization policy Allowing the gitea administrator to configure sanitization policy allows them to couple external renders and custom templates to support more markup. In particular, the `pandoc` renderer allows generating KaTeX annotations, wrapping them in `<span>` elements with class `math` and either `inline` or `display` (depending on whether or not inline or block mode was requested). This iteration gives the administrator whitelisting powers; carefully crafted regexes will thus let through only the desired attributes necessary to support their custom markup. Resolves: #9054 Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Document new sanitization configuration - Adds basic documentation to app.ini.sample, - Adds an example to the Configuration Cheat Sheet, and - Adds extended information to External Renderers section. Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Drop extraneous length check in newMarkupSanitizer(...) Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Fix plural ELEMENT and ALLOW_ATTR in docs These were left over from their initial names. Make them singular to conform with the current expectations. Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com>
This commit is contained in:
		
				
					committed by
					
						 techknowlogick
						techknowlogick
					
				
			
			
				
	
			
			
			
						parent
						
							cecc31951c
						
					
				
				
					commit
					ee7df7ba8c
				
			| @@ -877,6 +877,12 @@ SHOW_FOOTER_VERSION = true | |||||||
| ; Show template execution time in the footer | ; Show template execution time in the footer | ||||||
| SHOW_FOOTER_TEMPLATE_LOAD_TIME = true | SHOW_FOOTER_TEMPLATE_LOAD_TIME = true | ||||||
|  |  | ||||||
|  | [markup.sanitizer] | ||||||
|  | ; The following keys can be used multiple times to define sanitation policy rules. | ||||||
|  | ;ELEMENT = span | ||||||
|  | ;ALLOW_ATTR = class | ||||||
|  | ;REGEXP = ^(info|warning|error)$ | ||||||
|  |  | ||||||
| [markup.asciidoc] | [markup.asciidoc] | ||||||
| ENABLED = false | ENABLED = false | ||||||
| ; List of file extensions that should be rendered by an external command | ; List of file extensions that should be rendered by an external command | ||||||
|   | |||||||
| @@ -578,6 +578,24 @@ Two special environment variables are passed to the render command: | |||||||
| - `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links. | - `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links. | ||||||
| - `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths. | - `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc. | ||||||
|  |  | ||||||
|  | ```ini | ||||||
|  | [markup.sanitizer] | ||||||
|  | ; Pandoc renders TeX segments as <span>s with the "math" class, optionally | ||||||
|  | ; with "inline" or "display" classes depending on context. | ||||||
|  | ELEMENT = span | ||||||
|  | ALLOW_ATTR = class | ||||||
|  | REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+ | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  |  - `ELEMENT`: The element this policy applies to. Must be non-empty. | ||||||
|  |  - `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty. | ||||||
|  |  - `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute. | ||||||
|  |  | ||||||
|  | You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry. | ||||||
|  |  | ||||||
| ## Time (`time`) | ## Time (`time`) | ||||||
|  |  | ||||||
| - `FORMAT`: Time format to diplay on UI. i.e. RFC1123 or 2006-01-02 15:04:05 | - `FORMAT`: Time format to diplay on UI. i.e. RFC1123 or 2006-01-02 15:04:05 | ||||||
|   | |||||||
| @@ -68,4 +68,22 @@ RENDER_COMMAND = rst2html.py | |||||||
| IS_INPUT_FILE = false | IS_INPUT_FILE = false | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
|  | If your external markup relies on additional classes and attributes on the generated HTML elements, you might need to enable custom sanitizer policies. Gitea uses the [`bluemonday`](https://godoc.org/github.com/microcosm-cc/bluemonday) package as our HTML sanitizier. The example below will support [KaTeX](https://katex.org/) output from [`pandoc`](https://pandoc.org/). | ||||||
|  |  | ||||||
|  | ```ini | ||||||
|  | [markup.sanitizer] | ||||||
|  | ; Pandoc renders TeX segments as <span>s with the "math" class, optionally | ||||||
|  | ; with "inline" or "display" classes depending on context. | ||||||
|  | ELEMENT = span | ||||||
|  | ALLOW_ATTR = class | ||||||
|  | REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+ | ||||||
|  |  | ||||||
|  | [markup.markdown] | ||||||
|  | ENABLED         = true | ||||||
|  | FILE_EXTENSIONS = .md,.markdown | ||||||
|  | RENDER_COMMAND  = pandoc -f markdown -t html --katex | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry. All three must be defined, but `REGEXP` may be blank to allow unconditional whitelisting of that attribute. | ||||||
|  |  | ||||||
| Once your configuration changes have been made, restart Gitea to have changes take effect. | Once your configuration changes have been made, restart Gitea to have changes take effect. | ||||||
|   | |||||||
| @@ -50,6 +50,15 @@ func ReplaceSanitizer() { | |||||||
|  |  | ||||||
| 	// Allow <kbd> tags for keyboard shortcut styling | 	// Allow <kbd> tags for keyboard shortcut styling | ||||||
| 	sanitizer.policy.AllowElements("kbd") | 	sanitizer.policy.AllowElements("kbd") | ||||||
|  |  | ||||||
|  | 	// Custom keyword markup | ||||||
|  | 	for _, rule := range setting.ExternalSanitizerRules { | ||||||
|  | 		if rule.Regexp != nil { | ||||||
|  | 			sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) | ||||||
|  | 		} else { | ||||||
|  | 			sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. | // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. | ||||||
|   | |||||||
| @@ -9,11 +9,14 @@ import ( | |||||||
| 	"strings" | 	"strings" | ||||||
|  |  | ||||||
| 	"code.gitea.io/gitea/modules/log" | 	"code.gitea.io/gitea/modules/log" | ||||||
|  |  | ||||||
|  | 	"gopkg.in/ini.v1" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // ExternalMarkupParsers represents the external markup parsers | // ExternalMarkupParsers represents the external markup parsers | ||||||
| var ( | var ( | ||||||
| 	ExternalMarkupParsers []MarkupParser | 	ExternalMarkupParsers  []MarkupParser | ||||||
|  | 	ExternalSanitizerRules []MarkupSanitizerRule | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // MarkupParser defines the external parser configured in ini | // MarkupParser defines the external parser configured in ini | ||||||
| @@ -25,8 +28,15 @@ type MarkupParser struct { | |||||||
| 	IsInputFile    bool | 	IsInputFile    bool | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // MarkupSanitizerRule defines the policy for whitelisting attributes on | ||||||
|  | // certain elements. | ||||||
|  | type MarkupSanitizerRule struct { | ||||||
|  | 	Element   string | ||||||
|  | 	AllowAttr string | ||||||
|  | 	Regexp    *regexp.Regexp | ||||||
|  | } | ||||||
|  |  | ||||||
| func newMarkup() { | func newMarkup() { | ||||||
| 	extensionReg := regexp.MustCompile(`\.\w`) |  | ||||||
| 	for _, sec := range Cfg.Section("markup").ChildSections() { | 	for _, sec := range Cfg.Section("markup").ChildSections() { | ||||||
| 		name := strings.TrimPrefix(sec.Name(), "markup.") | 		name := strings.TrimPrefix(sec.Name(), "markup.") | ||||||
| 		if name == "" { | 		if name == "" { | ||||||
| @@ -34,33 +44,98 @@ func newMarkup() { | |||||||
| 			continue | 			continue | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		extensions := sec.Key("FILE_EXTENSIONS").Strings(",") | 		if name == "sanitizer" { | ||||||
| 		var exts = make([]string, 0, len(extensions)) | 			newMarkupSanitizer(name, sec) | ||||||
| 		for _, extension := range extensions { | 		} else { | ||||||
| 			if !extensionReg.MatchString(extension) { | 			newMarkupRenderer(name, sec) | ||||||
| 				log.Warn(sec.Name() + " file extension " + extension + " is invalid. Extension ignored") |  | ||||||
| 			} else { |  | ||||||
| 				exts = append(exts, extension) |  | ||||||
| 			} |  | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		if len(exts) == 0 { |  | ||||||
| 			log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored") |  | ||||||
| 			continue |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		command := sec.Key("RENDER_COMMAND").MustString("") |  | ||||||
| 		if command == "" { |  | ||||||
| 			log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored") |  | ||||||
| 			continue |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{ |  | ||||||
| 			Enabled:        sec.Key("ENABLED").MustBool(false), |  | ||||||
| 			MarkupName:     name, |  | ||||||
| 			FileExtensions: exts, |  | ||||||
| 			Command:        command, |  | ||||||
| 			IsInputFile:    sec.Key("IS_INPUT_FILE").MustBool(false), |  | ||||||
| 		}) |  | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func newMarkupSanitizer(name string, sec *ini.Section) { | ||||||
|  | 	haveElement := sec.HasKey("ELEMENT") | ||||||
|  | 	haveAttr := sec.HasKey("ALLOW_ATTR") | ||||||
|  | 	haveRegexp := sec.HasKey("REGEXP") | ||||||
|  |  | ||||||
|  | 	if !haveElement && !haveAttr && !haveRegexp { | ||||||
|  | 		log.Warn("Skipping empty section: markup.%s.", name) | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if !haveElement || !haveAttr || !haveRegexp { | ||||||
|  | 		log.Error("Missing required keys from markup.%s. Must have all three of ELEMENT, ALLOW_ATTR, and REGEXP defined!", name) | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	elements := sec.Key("ELEMENT").ValueWithShadows() | ||||||
|  | 	allowAttrs := sec.Key("ALLOW_ATTR").ValueWithShadows() | ||||||
|  | 	regexps := sec.Key("REGEXP").ValueWithShadows() | ||||||
|  |  | ||||||
|  | 	if len(elements) != len(allowAttrs) || | ||||||
|  | 		len(elements) != len(regexps) { | ||||||
|  | 		log.Error("All three keys in markup.%s (ELEMENT, ALLOW_ATTR, REGEXP) must be defined the same number of times! Got %d, %d, and %d respectively.", name, len(elements), len(allowAttrs), len(regexps)) | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, len(elements)) | ||||||
|  |  | ||||||
|  | 	for index, pattern := range regexps { | ||||||
|  | 		if pattern == "" { | ||||||
|  | 			rule := MarkupSanitizerRule{ | ||||||
|  | 				Element:   elements[index], | ||||||
|  | 				AllowAttr: allowAttrs[index], | ||||||
|  | 				Regexp:    nil, | ||||||
|  | 			} | ||||||
|  | 			ExternalSanitizerRules = append(ExternalSanitizerRules, rule) | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// Validate when parsing the config that this is a valid regular | ||||||
|  | 		// expression. Then we can use regexp.MustCompile(...) later. | ||||||
|  | 		compiled, err := regexp.Compile(pattern) | ||||||
|  | 		if err != nil { | ||||||
|  | 			log.Error("In module.%s: REGEXP at definition %d failed to compile: %v", name, index+1, err) | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		rule := MarkupSanitizerRule{ | ||||||
|  | 			Element:   elements[index], | ||||||
|  | 			AllowAttr: allowAttrs[index], | ||||||
|  | 			Regexp:    compiled, | ||||||
|  | 		} | ||||||
|  | 		ExternalSanitizerRules = append(ExternalSanitizerRules, rule) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func newMarkupRenderer(name string, sec *ini.Section) { | ||||||
|  | 	extensionReg := regexp.MustCompile(`\.\w`) | ||||||
|  |  | ||||||
|  | 	extensions := sec.Key("FILE_EXTENSIONS").Strings(",") | ||||||
|  | 	var exts = make([]string, 0, len(extensions)) | ||||||
|  | 	for _, extension := range extensions { | ||||||
|  | 		if !extensionReg.MatchString(extension) { | ||||||
|  | 			log.Warn(sec.Name() + " file extension " + extension + " is invalid. Extension ignored") | ||||||
|  | 		} else { | ||||||
|  | 			exts = append(exts, extension) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if len(exts) == 0 { | ||||||
|  | 		log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored") | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	command := sec.Key("RENDER_COMMAND").MustString("") | ||||||
|  | 	if command == "" { | ||||||
|  | 		log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored") | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{ | ||||||
|  | 		Enabled:        sec.Key("ENABLED").MustBool(false), | ||||||
|  | 		MarkupName:     name, | ||||||
|  | 		FileExtensions: exts, | ||||||
|  | 		Command:        command, | ||||||
|  | 		IsInputFile:    sec.Key("IS_INPUT_FILE").MustBool(false), | ||||||
|  | 	}) | ||||||
|  | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user