1// Copyright (c) 2014, David Kitchen <david@buro9.com> 2// 3// All rights reserved. 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are met: 7// 8// * Redistributions of source code must retain the above copyright notice, this 9// list of conditions and the following disclaimer. 10// 11// * Redistributions in binary form must reproduce the above copyright notice, 12// this list of conditions and the following disclaimer in the documentation 13// and/or other materials provided with the distribution. 14// 15// * Neither the name of the organisation (Microcosm) nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30package bluemonday 31 32import ( 33 "regexp" 34) 35 36// StrictPolicy returns an empty policy, which will effectively strip all HTML 37// elements and their attributes from a document. 38func StrictPolicy() *Policy { 39 return NewPolicy() 40} 41 42// StripTagsPolicy is DEPRECATED. Use StrictPolicy instead. 43func StripTagsPolicy() *Policy { 44 return StrictPolicy() 45} 46 47// UGCPolicy returns a policy aimed at user generated content that is a result 48// of HTML WYSIWYG tools and Markdown conversions. 49// 50// This is expected to be a fairly rich document where as much markup as 51// possible should be retained. Markdown permits raw HTML so we are basically 52// providing a policy to sanitise HTML5 documents safely but with the 53// least intrusion on the formatting expectations of the user. 54func UGCPolicy() *Policy { 55 56 p := NewPolicy() 57 58 /////////////////////// 59 // Global attributes // 60 /////////////////////// 61 62 // "class" is not permitted as we are not allowing users to style their own 63 // content 64 65 p.AllowStandardAttributes() 66 67 ////////////////////////////// 68 // Global URL format policy // 69 ////////////////////////////// 70 71 p.AllowStandardURLs() 72 73 //////////////////////////////// 74 // Declarations and structure // 75 //////////////////////////////// 76 77 // "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are 78 // expecting user generated content to be a fragment of HTML and not a full 79 // document. 80 81 ////////////////////////// 82 // Sectioning root tags // 83 ////////////////////////// 84 85 // "article" and "aside" are permitted and takes no attributes 86 p.AllowElements("article", "aside") 87 88 // "body" is not permitted as we are expecting user generated content to be a fragment 89 // of HTML and not a full document. 90 91 // "details" is permitted, including the "open" attribute which can either 92 // be blank or the value "open". 93 p.AllowAttrs( 94 "open", 95 ).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details") 96 97 // "fieldset" is not permitted as we are not allowing forms to be created. 98 99 // "figure" is permitted and takes no attributes 100 p.AllowElements("figure") 101 102 // "nav" is not permitted as it is assumed that the site (and not the user) 103 // has defined navigation elements 104 105 // "section" is permitted and takes no attributes 106 p.AllowElements("section") 107 108 // "summary" is permitted and takes no attributes 109 p.AllowElements("summary") 110 111 ////////////////////////// 112 // Headings and footers // 113 ////////////////////////// 114 115 // "footer" is not permitted as we expect user content to be a fragment and 116 // not structural to this extent 117 118 // "h1" through "h6" are permitted and take no attributes 119 p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6") 120 121 // "header" is not permitted as we expect user content to be a fragment and 122 // not structural to this extent 123 124 // "hgroup" is permitted and takes no attributes 125 p.AllowElements("hgroup") 126 127 ///////////////////////////////////// 128 // Content grouping and separating // 129 ///////////////////////////////////// 130 131 // "blockquote" is permitted, including the "cite" attribute which must be 132 // a standard URL. 133 p.AllowAttrs("cite").OnElements("blockquote") 134 135 // "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes 136 p.AllowElements("br", "div", "hr", "p", "span", "wbr") 137 138 /////////// 139 // Links // 140 /////////// 141 142 // "a" is permitted 143 p.AllowAttrs("href").OnElements("a") 144 145 // "area" is permitted along with the attributes that map image maps work 146 p.AllowAttrs("name").Matching( 147 regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`), 148 ).OnElements("map") 149 p.AllowAttrs("alt").Matching(Paragraph).OnElements("area") 150 p.AllowAttrs("coords").Matching( 151 regexp.MustCompile(`^([0-9]+,)+[0-9]+$`), 152 ).OnElements("area") 153 p.AllowAttrs("href").OnElements("area") 154 p.AllowAttrs("rel").Matching(SpaceSeparatedTokens).OnElements("area") 155 p.AllowAttrs("shape").Matching( 156 regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`), 157 ).OnElements("area") 158 p.AllowAttrs("usemap").Matching( 159 regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`), 160 ).OnElements("img") 161 162 // "link" is not permitted 163 164 ///////////////////// 165 // Phrase elements // 166 ///////////////////// 167 168 // The following are all inline phrasing elements 169 p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em", 170 "figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var") 171 172 // "q" is permitted and "cite" is a URL and handled by URL policies 173 p.AllowAttrs("cite").OnElements("q") 174 175 // "time" is permitted 176 p.AllowAttrs("datetime").Matching(ISO8601).OnElements("time") 177 178 //////////////////// 179 // Style elements // 180 //////////////////// 181 182 // block and inline elements that impart no semantic meaning but style the 183 // document 184 p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u") 185 186 // "style" is not permitted as we are not yet sanitising CSS and it is an 187 // XSS attack vector 188 189 ////////////////////// 190 // HTML5 Formatting // 191 ////////////////////// 192 193 // "bdi" "bdo" are permitted 194 p.AllowAttrs("dir").Matching(Direction).OnElements("bdi", "bdo") 195 196 // "rp" "rt" "ruby" are permitted 197 p.AllowElements("rp", "rt", "ruby") 198 199 /////////////////////////// 200 // HTML5 Change tracking // 201 /////////////////////////// 202 203 // "del" "ins" are permitted 204 p.AllowAttrs("cite").Matching(Paragraph).OnElements("del", "ins") 205 p.AllowAttrs("datetime").Matching(ISO8601).OnElements("del", "ins") 206 207 /////////// 208 // Lists // 209 /////////// 210 211 p.AllowLists() 212 213 //////////// 214 // Tables // 215 //////////// 216 217 p.AllowTables() 218 219 /////////// 220 // Forms // 221 /////////// 222 223 // By and large, forms are not permitted. However there are some form 224 // elements that can be used to present data, and we do permit those 225 // 226 // "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist" 227 // "textarea" "optgroup" "option" are all not permitted 228 229 // "meter" is permitted 230 p.AllowAttrs( 231 "value", 232 "min", 233 "max", 234 "low", 235 "high", 236 "optimum", 237 ).Matching(Number).OnElements("meter") 238 239 // "progress" is permitted 240 p.AllowAttrs("value", "max").Matching(Number).OnElements("progress") 241 242 ////////////////////// 243 // Embedded content // 244 ////////////////////// 245 246 // Vast majority not permitted 247 // "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track" 248 // "video" are all not permitted 249 250 p.AllowImages() 251 252 return p 253} 254