1// Copyright 2011 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package template 6 7import ( 8 "bytes" 9 "encoding/json" 10 "fmt" 11 "reflect" 12 "strings" 13 "unicode/utf8" 14) 15 16// nextJSCtx returns the context that determines whether a slash after the 17// given run of tokens starts a regular expression instead of a division 18// operator: / or /=. 19// 20// This assumes that the token run does not include any string tokens, comment 21// tokens, regular expression literal tokens, or division operators. 22// 23// This fails on some valid but nonsensical JavaScript programs like 24// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to 25// fail on any known useful programs. It is based on the draft 26// JavaScript 2.0 lexical grammar and requires one token of lookbehind: 27// http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html 28func nextJSCtx(s []byte, preceding jsCtx) jsCtx { 29 s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029") 30 if len(s) == 0 { 31 return preceding 32 } 33 34 // All cases below are in the single-byte UTF-8 group. 35 switch c, n := s[len(s)-1], len(s); c { 36 case '+', '-': 37 // ++ and -- are not regexp preceders, but + and - are whether 38 // they are used as infix or prefix operators. 39 start := n - 1 40 // Count the number of adjacent dashes or pluses. 41 for start > 0 && s[start-1] == c { 42 start-- 43 } 44 if (n-start)&1 == 1 { 45 // Reached for trailing minus signs since "---" is the 46 // same as "-- -". 47 return jsCtxRegexp 48 } 49 return jsCtxDivOp 50 case '.': 51 // Handle "42." 52 if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' { 53 return jsCtxDivOp 54 } 55 return jsCtxRegexp 56 // Suffixes for all punctuators from section 7.7 of the language spec 57 // that only end binary operators not handled above. 58 case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?': 59 return jsCtxRegexp 60 // Suffixes for all punctuators from section 7.7 of the language spec 61 // that are prefix operators not handled above. 62 case '!', '~': 63 return jsCtxRegexp 64 // Matches all the punctuators from section 7.7 of the language spec 65 // that are open brackets not handled above. 66 case '(', '[': 67 return jsCtxRegexp 68 // Matches all the punctuators from section 7.7 of the language spec 69 // that precede expression starts. 70 case ':', ';', '{': 71 return jsCtxRegexp 72 // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and 73 // are handled in the default except for '}' which can precede a 74 // division op as in 75 // ({ valueOf: function () { return 42 } } / 2 76 // which is valid, but, in practice, developers don't divide object 77 // literals, so our heuristic works well for code like 78 // function () { ... } /foo/.test(x) && sideEffect(); 79 // The ')' punctuator can precede a regular expression as in 80 // if (b) /foo/.test(x) && ... 81 // but this is much less likely than 82 // (a + b) / c 83 case '}': 84 return jsCtxRegexp 85 default: 86 // Look for an IdentifierName and see if it is a keyword that 87 // can precede a regular expression. 88 j := n 89 for j > 0 && isJSIdentPart(rune(s[j-1])) { 90 j-- 91 } 92 if regexpPrecederKeywords[string(s[j:])] { 93 return jsCtxRegexp 94 } 95 } 96 // Otherwise is a punctuator not listed above, or 97 // a string which precedes a div op, or an identifier 98 // which precedes a div op. 99 return jsCtxDivOp 100} 101 102// regexpPrecederKeywords is a set of reserved JS keywords that can precede a 103// regular expression in JS source. 104var regexpPrecederKeywords = map[string]bool{ 105 "break": true, 106 "case": true, 107 "continue": true, 108 "delete": true, 109 "do": true, 110 "else": true, 111 "finally": true, 112 "in": true, 113 "instanceof": true, 114 "return": true, 115 "throw": true, 116 "try": true, 117 "typeof": true, 118 "void": true, 119} 120 121var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem() 122 123// indirectToJSONMarshaler returns the value, after dereferencing as many times 124// as necessary to reach the base type (or nil) or an implementation of json.Marshal. 125func indirectToJSONMarshaler(a interface{}) interface{} { 126 v := reflect.ValueOf(a) 127 for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() { 128 v = v.Elem() 129 } 130 return v.Interface() 131} 132 133// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has 134// neither side-effects nor free variables outside (NaN, Infinity). 135func jsValEscaper(args ...interface{}) string { 136 var a interface{} 137 if len(args) == 1 { 138 a = indirectToJSONMarshaler(args[0]) 139 switch t := a.(type) { 140 case JS: 141 return string(t) 142 case JSStr: 143 // TODO: normalize quotes. 144 return `"` + string(t) + `"` 145 case json.Marshaler: 146 // Do not treat as a Stringer. 147 case fmt.Stringer: 148 a = t.String() 149 } 150 } else { 151 for i, arg := range args { 152 args[i] = indirectToJSONMarshaler(arg) 153 } 154 a = fmt.Sprint(args...) 155 } 156 // TODO: detect cycles before calling Marshal which loops infinitely on 157 // cyclic data. This may be an unacceptable DoS risk. 158 159 b, err := json.Marshal(a) 160 if err != nil { 161 // Put a space before comment so that if it is flush against 162 // a division operator it is not turned into a line comment: 163 // x/{{y}} 164 // turning into 165 // x//* error marshaling y: 166 // second line of error message */null 167 return fmt.Sprintf(" /* %s */null ", strings.Replace(err.Error(), "*/", "* /", -1)) 168 } 169 170 // TODO: maybe post-process output to prevent it from containing 171 // "<!--", "-->", "<![CDATA[", "]]>", or "</script" 172 // in case custom marshalers produce output containing those. 173 174 // TODO: Maybe abbreviate \u00ab to \xab to produce more compact output. 175 if len(b) == 0 { 176 // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should 177 // not cause the output `x=y/*z`. 178 return " null " 179 } 180 first, _ := utf8.DecodeRune(b) 181 last, _ := utf8.DecodeLastRune(b) 182 var buf bytes.Buffer 183 // Prevent IdentifierNames and NumericLiterals from running into 184 // keywords: in, instanceof, typeof, void 185 pad := isJSIdentPart(first) || isJSIdentPart(last) 186 if pad { 187 buf.WriteByte(' ') 188 } 189 written := 0 190 // Make sure that json.Marshal escapes codepoints U+2028 & U+2029 191 // so it falls within the subset of JSON which is valid JS. 192 for i := 0; i < len(b); { 193 rune, n := utf8.DecodeRune(b[i:]) 194 repl := "" 195 if rune == 0x2028 { 196 repl = `\u2028` 197 } else if rune == 0x2029 { 198 repl = `\u2029` 199 } 200 if repl != "" { 201 buf.Write(b[written:i]) 202 buf.WriteString(repl) 203 written = i + n 204 } 205 i += n 206 } 207 if buf.Len() != 0 { 208 buf.Write(b[written:]) 209 if pad { 210 buf.WriteByte(' ') 211 } 212 b = buf.Bytes() 213 } 214 return string(b) 215} 216 217// jsStrEscaper produces a string that can be included between quotes in 218// JavaScript source, in JavaScript embedded in an HTML5 <script> element, 219// or in an HTML5 event handler attribute such as onclick. 220func jsStrEscaper(args ...interface{}) string { 221 s, t := stringify(args...) 222 if t == contentTypeJSStr { 223 return replace(s, jsStrNormReplacementTable) 224 } 225 return replace(s, jsStrReplacementTable) 226} 227 228// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression 229// specials so the result is treated literally when included in a regular 230// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by 231// the literal text of {{.X}} followed by the string "bar". 232func jsRegexpEscaper(args ...interface{}) string { 233 s, _ := stringify(args...) 234 s = replace(s, jsRegexpReplacementTable) 235 if s == "" { 236 // /{{.X}}/ should not produce a line comment when .X == "". 237 return "(?:)" 238 } 239 return s 240} 241 242// replace replaces each rune r of s with replacementTable[r], provided that 243// r < len(replacementTable). If replacementTable[r] is the empty string then 244// no replacement is made. 245// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and 246// `\u2029`. 247func replace(s string, replacementTable []string) string { 248 var b bytes.Buffer 249 r, w, written := rune(0), 0, 0 250 for i := 0; i < len(s); i += w { 251 // See comment in htmlEscaper. 252 r, w = utf8.DecodeRuneInString(s[i:]) 253 var repl string 254 switch { 255 case int(r) < len(replacementTable) && replacementTable[r] != "": 256 repl = replacementTable[r] 257 case r == '\u2028': 258 repl = `\u2028` 259 case r == '\u2029': 260 repl = `\u2029` 261 default: 262 continue 263 } 264 b.WriteString(s[written:i]) 265 b.WriteString(repl) 266 written = i + w 267 } 268 if written == 0 { 269 return s 270 } 271 b.WriteString(s[written:]) 272 return b.String() 273} 274 275var jsStrReplacementTable = []string{ 276 0: `\0`, 277 '\t': `\t`, 278 '\n': `\n`, 279 '\v': `\x0b`, // "\v" == "v" on IE 6. 280 '\f': `\f`, 281 '\r': `\r`, 282 // Encode HTML specials as hex so the output can be embedded 283 // in HTML attributes without further encoding. 284 '"': `\x22`, 285 '&': `\x26`, 286 '\'': `\x27`, 287 '+': `\x2b`, 288 '/': `\/`, 289 '<': `\x3c`, 290 '>': `\x3e`, 291 '\\': `\\`, 292} 293 294// jsStrNormReplacementTable is like jsStrReplacementTable but does not 295// overencode existing escapes since this table has no entry for `\`. 296var jsStrNormReplacementTable = []string{ 297 0: `\0`, 298 '\t': `\t`, 299 '\n': `\n`, 300 '\v': `\x0b`, // "\v" == "v" on IE 6. 301 '\f': `\f`, 302 '\r': `\r`, 303 // Encode HTML specials as hex so the output can be embedded 304 // in HTML attributes without further encoding. 305 '"': `\x22`, 306 '&': `\x26`, 307 '\'': `\x27`, 308 '+': `\x2b`, 309 '/': `\/`, 310 '<': `\x3c`, 311 '>': `\x3e`, 312} 313 314var jsRegexpReplacementTable = []string{ 315 0: `\0`, 316 '\t': `\t`, 317 '\n': `\n`, 318 '\v': `\x0b`, // "\v" == "v" on IE 6. 319 '\f': `\f`, 320 '\r': `\r`, 321 // Encode HTML specials as hex so the output can be embedded 322 // in HTML attributes without further encoding. 323 '"': `\x22`, 324 '$': `\$`, 325 '&': `\x26`, 326 '\'': `\x27`, 327 '(': `\(`, 328 ')': `\)`, 329 '*': `\*`, 330 '+': `\x2b`, 331 '-': `\-`, 332 '.': `\.`, 333 '/': `\/`, 334 '<': `\x3c`, 335 '>': `\x3e`, 336 '?': `\?`, 337 '[': `\[`, 338 '\\': `\\`, 339 ']': `\]`, 340 '^': `\^`, 341 '{': `\{`, 342 '|': `\|`, 343 '}': `\}`, 344} 345 346// isJSIdentPart reports whether the given rune is a JS identifier part. 347// It does not handle all the non-Latin letters, joiners, and combining marks, 348// but it does handle every codepoint that can occur in a numeric literal or 349// a keyword. 350func isJSIdentPart(r rune) bool { 351 switch { 352 case r == '$': 353 return true 354 case '0' <= r && r <= '9': 355 return true 356 case 'A' <= r && r <= 'Z': 357 return true 358 case r == '_': 359 return true 360 case 'a' <= r && r <= 'z': 361 return true 362 } 363 return false 364} 365 366// isJSType returns true if the given MIME type should be considered JavaScript. 367// 368// It is used to determine whether a script tag with a type attribute is a javascript container. 369func isJSType(mimeType string) bool { 370 // per 371 // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type 372 // https://tools.ietf.org/html/rfc7231#section-3.1.1 373 // https://tools.ietf.org/html/rfc4329#section-3 374 // https://www.ietf.org/rfc/rfc4627.txt 375 mimeType = strings.ToLower(mimeType) 376 // discard parameters 377 if i := strings.Index(mimeType, ";"); i >= 0 { 378 mimeType = mimeType[:i] 379 } 380 mimeType = strings.TrimSpace(mimeType) 381 switch mimeType { 382 case 383 "application/ecmascript", 384 "application/javascript", 385 "application/json", 386 "application/x-ecmascript", 387 "application/x-javascript", 388 "text/ecmascript", 389 "text/javascript", 390 "text/javascript1.0", 391 "text/javascript1.1", 392 "text/javascript1.2", 393 "text/javascript1.3", 394 "text/javascript1.4", 395 "text/javascript1.5", 396 "text/jscript", 397 "text/livescript", 398 "text/x-ecmascript", 399 "text/x-javascript": 400 return true 401 default: 402 return false 403 } 404} 405