1// Copyright 2011 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package template 6 7import ( 8 "bytes" 9 "fmt" 10 "strings" 11 "unicode/utf8" 12) 13 14// htmlNospaceEscaper escapes for inclusion in unquoted attribute values. 15func htmlNospaceEscaper(args ...interface{}) string { 16 s, t := stringify(args...) 17 if t == contentTypeHTML { 18 return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false) 19 } 20 return htmlReplacer(s, htmlNospaceReplacementTable, false) 21} 22 23// attrEscaper escapes for inclusion in quoted attribute values. 24func attrEscaper(args ...interface{}) string { 25 s, t := stringify(args...) 26 if t == contentTypeHTML { 27 return htmlReplacer(stripTags(s), htmlNormReplacementTable, true) 28 } 29 return htmlReplacer(s, htmlReplacementTable, true) 30} 31 32// rcdataEscaper escapes for inclusion in an RCDATA element body. 33func rcdataEscaper(args ...interface{}) string { 34 s, t := stringify(args...) 35 if t == contentTypeHTML { 36 return htmlReplacer(s, htmlNormReplacementTable, true) 37 } 38 return htmlReplacer(s, htmlReplacementTable, true) 39} 40 41// htmlEscaper escapes for inclusion in HTML text. 42func htmlEscaper(args ...interface{}) string { 43 s, t := stringify(args...) 44 if t == contentTypeHTML { 45 return s 46 } 47 return htmlReplacer(s, htmlReplacementTable, true) 48} 49 50// htmlReplacementTable contains the runes that need to be escaped 51// inside a quoted attribute value or in a text node. 52var htmlReplacementTable = []string{ 53 // http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state 54 // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT 55 // CHARACTER character to the current attribute's value. 56 // " 57 // and similarly 58 // http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state 59 0: "\uFFFD", 60 '"': """, 61 '&': "&", 62 '\'': "'", 63 '+': "+", 64 '<': "<", 65 '>': ">", 66} 67 68// htmlNormReplacementTable is like htmlReplacementTable but without '&' to 69// avoid over-encoding existing entities. 70var htmlNormReplacementTable = []string{ 71 0: "\uFFFD", 72 '"': """, 73 '\'': "'", 74 '+': "+", 75 '<': "<", 76 '>': ">", 77} 78 79// htmlNospaceReplacementTable contains the runes that need to be escaped 80// inside an unquoted attribute value. 81// The set of runes escaped is the union of the HTML specials and 82// those determined by running the JS below in browsers: 83// <div id=d></div> 84// <script>(function () { 85// var a = [], d = document.getElementById("d"), i, c, s; 86// for (i = 0; i < 0x10000; ++i) { 87// c = String.fromCharCode(i); 88// d.innerHTML = "<span title=" + c + "lt" + c + "></span>" 89// s = d.getElementsByTagName("SPAN")[0]; 90// if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); } 91// } 92// document.write(a.join(", ")); 93// })()</script> 94var htmlNospaceReplacementTable = []string{ 95 0: "�", 96 '\t': "	", 97 '\n': " ", 98 '\v': "", 99 '\f': "", 100 '\r': " ", 101 ' ': " ", 102 '"': """, 103 '&': "&", 104 '\'': "'", 105 '+': "+", 106 '<': "<", 107 '=': "=", 108 '>': ">", 109 // A parse error in the attribute value (unquoted) and 110 // before attribute value states. 111 // Treated as a quoting character by IE. 112 '`': "`", 113} 114 115// htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but 116// without '&' to avoid over-encoding existing entities. 117var htmlNospaceNormReplacementTable = []string{ 118 0: "�", 119 '\t': "	", 120 '\n': " ", 121 '\v': "", 122 '\f': "", 123 '\r': " ", 124 ' ': " ", 125 '"': """, 126 '\'': "'", 127 '+': "+", 128 '<': "<", 129 '=': "=", 130 '>': ">", 131 // A parse error in the attribute value (unquoted) and 132 // before attribute value states. 133 // Treated as a quoting character by IE. 134 '`': "`", 135} 136 137// htmlReplacer returns s with runes replaced according to replacementTable 138// and when badRunes is true, certain bad runes are allowed through unescaped. 139func htmlReplacer(s string, replacementTable []string, badRunes bool) string { 140 written, b := 0, new(bytes.Buffer) 141 r, w := rune(0), 0 142 for i := 0; i < len(s); i += w { 143 // Cannot use 'for range s' because we need to preserve the width 144 // of the runes in the input. If we see a decoding error, the input 145 // width will not be utf8.Runelen(r) and we will overrun the buffer. 146 r, w = utf8.DecodeRuneInString(s[i:]) 147 if int(r) < len(replacementTable) { 148 if repl := replacementTable[r]; len(repl) != 0 { 149 b.WriteString(s[written:i]) 150 b.WriteString(repl) 151 written = i + w 152 } 153 } else if badRunes { 154 // No-op. 155 // IE does not allow these ranges in unquoted attrs. 156 } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff { 157 fmt.Fprintf(b, "%s&#x%x;", s[written:i], r) 158 written = i + w 159 } 160 } 161 if written == 0 { 162 return s 163 } 164 b.WriteString(s[written:]) 165 return b.String() 166} 167 168// stripTags takes a snippet of HTML and returns only the text content. 169// For example, `<b>¡Hi!</b> <script>...</script>` -> `¡Hi! `. 170func stripTags(html string) string { 171 var b bytes.Buffer 172 s, c, i, allText := []byte(html), context{}, 0, true 173 // Using the transition funcs helps us avoid mangling 174 // `<div title="1>2">` or `I <3 Ponies!`. 175 for i != len(s) { 176 if c.delim == delimNone { 177 st := c.state 178 // Use RCDATA instead of parsing into JS or CSS styles. 179 if c.element != elementNone && !isInTag(st) { 180 st = stateRCDATA 181 } 182 d, nread := transitionFunc[st](c, s[i:]) 183 i1 := i + nread 184 if c.state == stateText || c.state == stateRCDATA { 185 // Emit text up to the start of the tag or comment. 186 j := i1 187 if d.state != c.state { 188 for j1 := j - 1; j1 >= i; j1-- { 189 if s[j1] == '<' { 190 j = j1 191 break 192 } 193 } 194 } 195 b.Write(s[i:j]) 196 } else { 197 allText = false 198 } 199 c, i = d, i1 200 continue 201 } 202 i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim]) 203 if i1 < i { 204 break 205 } 206 if c.delim != delimSpaceOrTagEnd { 207 // Consume any quote. 208 i1++ 209 } 210 c, i = context{state: stateTag, element: c.element}, i1 211 } 212 if allText { 213 return html 214 } else if c.state == stateText || c.state == stateRCDATA { 215 b.Write(s[i:]) 216 } 217 return b.String() 218} 219 220// htmlNameFilter accepts valid parts of an HTML attribute or tag name or 221// a known-safe HTML attribute. 222func htmlNameFilter(args ...interface{}) string { 223 s, t := stringify(args...) 224 if t == contentTypeHTMLAttr { 225 return s 226 } 227 if len(s) == 0 { 228 // Avoid violation of structure preservation. 229 // <input checked {{.K}}={{.V}}>. 230 // Without this, if .K is empty then .V is the value of 231 // checked, but otherwise .V is the value of the attribute 232 // named .K. 233 return filterFailsafe 234 } 235 s = strings.ToLower(s) 236 if t := attrType(s); t != contentTypePlain { 237 // TODO: Split attr and element name part filters so we can whitelist 238 // attributes. 239 return filterFailsafe 240 } 241 for _, r := range s { 242 switch { 243 case '0' <= r && r <= '9': 244 case 'a' <= r && r <= 'z': 245 default: 246 return filterFailsafe 247 } 248 } 249 return s 250} 251 252// commentEscaper returns the empty string regardless of input. 253// Comment content does not correspond to any parsed structure or 254// human-readable content, so the simplest and most secure policy is to drop 255// content interpolated into comments. 256// This approach is equally valid whether or not static comment content is 257// removed from the template. 258func commentEscaper(args ...interface{}) string { 259 return "" 260} 261