1// Copyright 2017 The Gitea Authors. All rights reserved. 2// Copyright 2017 The Gogs Authors. All rights reserved. 3// Use of this source code is governed by a MIT-style 4// license that can be found in the LICENSE file. 5 6package markup 7 8import ( 9 "io" 10 "regexp" 11 "sync" 12 13 "code.gitea.io/gitea/modules/setting" 14 15 "github.com/microcosm-cc/bluemonday" 16) 17 18// Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow 19// any modification to the underlying policies once it's been created. 20type Sanitizer struct { 21 defaultPolicy *bluemonday.Policy 22 rendererPolicies map[string]*bluemonday.Policy 23 init sync.Once 24} 25 26var sanitizer = &Sanitizer{} 27 28// NewSanitizer initializes sanitizer with allowed attributes based on settings. 29// Multiple calls to this function will only create one instance of Sanitizer during 30// entire application lifecycle. 31func NewSanitizer() { 32 sanitizer.init.Do(func() { 33 InitializeSanitizer() 34 }) 35} 36 37// InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings 38func InitializeSanitizer() { 39 sanitizer.rendererPolicies = map[string]*bluemonday.Policy{} 40 sanitizer.defaultPolicy = createDefaultPolicy() 41 42 for name, renderer := range renderers { 43 sanitizerRules := renderer.SanitizerRules() 44 if len(sanitizerRules) > 0 { 45 policy := createDefaultPolicy() 46 addSanitizerRules(policy, sanitizerRules) 47 sanitizer.rendererPolicies[name] = policy 48 } 49 } 50} 51 52func createDefaultPolicy() *bluemonday.Policy { 53 policy := bluemonday.UGCPolicy() 54 55 // For JS code copy and Mermaid loading state 56 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre") 57 58 // For Chroma markdown plugin 59 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code") 60 61 // Checkboxes 62 policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") 63 policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") 64 65 // Custom URL-Schemes 66 if len(setting.Markdown.CustomURLSchemes) > 0 { 67 policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) 68 } 69 70 // Allow classes for anchors 71 policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a") 72 73 // Allow classes for task lists 74 policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li") 75 76 // Allow icons 77 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i") 78 79 // Allow unlabelled labels 80 policy.AllowNoAttrs().OnElements("label") 81 82 // Allow classes for emojis 83 policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img") 84 85 // Allow icons, emojis, chroma syntax and keyword markup on span 86 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span") 87 88 // Allow generally safe attributes 89 generalSafeAttrs := []string{"abbr", "accept", "accept-charset", 90 "accesskey", "action", "align", "alt", 91 "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby", 92 "axis", "border", "cellpadding", "cellspacing", "char", 93 "charoff", "charset", "checked", 94 "clear", "cols", "colspan", "color", 95 "compact", "coords", "datetime", "dir", 96 "disabled", "enctype", "for", "frame", 97 "headers", "height", "hreflang", 98 "hspace", "ismap", "label", "lang", 99 "maxlength", "media", "method", 100 "multiple", "name", "nohref", "noshade", 101 "nowrap", "open", "prompt", "readonly", "rel", "rev", 102 "rows", "rowspan", "rules", "scope", 103 "selected", "shape", "size", "span", 104 "start", "summary", "tabindex", "target", 105 "title", "type", "usemap", "valign", "value", 106 "vspace", "width", "itemprop", 107 } 108 109 generalSafeElements := []string{ 110 "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt", 111 "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", 112 "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary", 113 "details", "caption", "figure", "figcaption", 114 "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr", 115 } 116 117 policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) 118 119 policy.AllowAttrs("itemscope", "itemtype").OnElements("div") 120 121 // FIXME: Need to handle longdesc in img but there is no easy way to do it 122 123 // Custom keyword markup 124 addSanitizerRules(policy, setting.ExternalSanitizerRules) 125 126 return policy 127} 128 129func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) { 130 for _, rule := range rules { 131 if rule.AllowDataURIImages { 132 policy.AllowDataURIImages() 133 } 134 if rule.Element != "" { 135 if rule.Regexp != nil { 136 policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) 137 } else { 138 policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) 139 } 140 } 141 } 142} 143 144// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. 145func Sanitize(s string) string { 146 NewSanitizer() 147 return sanitizer.defaultPolicy.Sanitize(s) 148} 149 150// SanitizeReader sanitizes a Reader 151func SanitizeReader(r io.Reader, renderer string, w io.Writer) error { 152 NewSanitizer() 153 policy, exist := sanitizer.rendererPolicies[renderer] 154 if !exist { 155 policy = sanitizer.defaultPolicy 156 } 157 return policy.SanitizeReaderToWriter(r, w) 158} 159