1// Copyright 2011 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package template 6 7import ( 8 "bytes" 9 "encoding/json" 10 "fmt" 11 "reflect" 12 "strings" 13 "unicode/utf8" 14) 15 16// nextJSCtx returns the context that determines whether a slash after the 17// given run of tokens starts a regular expression instead of a division 18// operator: / or /=. 19// 20// This assumes that the token run does not include any string tokens, comment 21// tokens, regular expression literal tokens, or division operators. 22// 23// This fails on some valid but nonsensical JavaScript programs like 24// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to 25// fail on any known useful programs. It is based on the draft 26// JavaScript 2.0 lexical grammar and requires one token of lookbehind: 27// https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html 28func nextJSCtx(s []byte, preceding jsCtx) jsCtx { 29 s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029") 30 if len(s) == 0 { 31 return preceding 32 } 33 34 // All cases below are in the single-byte UTF-8 group. 35 switch c, n := s[len(s)-1], len(s); c { 36 case '+', '-': 37 // ++ and -- are not regexp preceders, but + and - are whether 38 // they are used as infix or prefix operators. 39 start := n - 1 40 // Count the number of adjacent dashes or pluses. 41 for start > 0 && s[start-1] == c { 42 start-- 43 } 44 if (n-start)&1 == 1 { 45 // Reached for trailing minus signs since "---" is the 46 // same as "-- -". 47 return jsCtxRegexp 48 } 49 return jsCtxDivOp 50 case '.': 51 // Handle "42." 52 if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' { 53 return jsCtxDivOp 54 } 55 return jsCtxRegexp 56 // Suffixes for all punctuators from section 7.7 of the language spec 57 // that only end binary operators not handled above. 58 case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?': 59 return jsCtxRegexp 60 // Suffixes for all punctuators from section 7.7 of the language spec 61 // that are prefix operators not handled above. 62 case '!', '~': 63 return jsCtxRegexp 64 // Matches all the punctuators from section 7.7 of the language spec 65 // that are open brackets not handled above. 66 case '(', '[': 67 return jsCtxRegexp 68 // Matches all the punctuators from section 7.7 of the language spec 69 // that precede expression starts. 70 case ':', ';', '{': 71 return jsCtxRegexp 72 // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and 73 // are handled in the default except for '}' which can precede a 74 // division op as in 75 // ({ valueOf: function () { return 42 } } / 2 76 // which is valid, but, in practice, developers don't divide object 77 // literals, so our heuristic works well for code like 78 // function () { ... } /foo/.test(x) && sideEffect(); 79 // The ')' punctuator can precede a regular expression as in 80 // if (b) /foo/.test(x) && ... 81 // but this is much less likely than 82 // (a + b) / c 83 case '}': 84 return jsCtxRegexp 85 default: 86 // Look for an IdentifierName and see if it is a keyword that 87 // can precede a regular expression. 88 j := n 89 for j > 0 && isJSIdentPart(rune(s[j-1])) { 90 j-- 91 } 92 if regexpPrecederKeywords[string(s[j:])] { 93 return jsCtxRegexp 94 } 95 } 96 // Otherwise is a punctuator not listed above, or 97 // a string which precedes a div op, or an identifier 98 // which precedes a div op. 99 return jsCtxDivOp 100} 101 102// regexpPrecederKeywords is a set of reserved JS keywords that can precede a 103// regular expression in JS source. 104var regexpPrecederKeywords = map[string]bool{ 105 "break": true, 106 "case": true, 107 "continue": true, 108 "delete": true, 109 "do": true, 110 "else": true, 111 "finally": true, 112 "in": true, 113 "instanceof": true, 114 "return": true, 115 "throw": true, 116 "try": true, 117 "typeof": true, 118 "void": true, 119} 120 121var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem() 122 123// indirectToJSONMarshaler returns the value, after dereferencing as many times 124// as necessary to reach the base type (or nil) or an implementation of json.Marshal. 125func indirectToJSONMarshaler(a interface{}) interface{} { 126 // text/template now supports passing untyped nil as a func call 127 // argument, so we must support it. Otherwise we'd panic below, as one 128 // cannot call the Type or Interface methods on an invalid 129 // reflect.Value. See golang.org/issue/18716. 130 if a == nil { 131 return nil 132 } 133 134 v := reflect.ValueOf(a) 135 for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() { 136 v = v.Elem() 137 } 138 return v.Interface() 139} 140 141// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has 142// neither side-effects nor free variables outside (NaN, Infinity). 143func jsValEscaper(args ...interface{}) string { 144 var a interface{} 145 if len(args) == 1 { 146 a = indirectToJSONMarshaler(args[0]) 147 switch t := a.(type) { 148 case JS: 149 return string(t) 150 case JSStr: 151 // TODO: normalize quotes. 152 return `"` + string(t) + `"` 153 case json.Marshaler: 154 // Do not treat as a Stringer. 155 case fmt.Stringer: 156 a = t.String() 157 } 158 } else { 159 for i, arg := range args { 160 args[i] = indirectToJSONMarshaler(arg) 161 } 162 a = fmt.Sprint(args...) 163 } 164 // TODO: detect cycles before calling Marshal which loops infinitely on 165 // cyclic data. This may be an unacceptable DoS risk. 166 167 b, err := json.Marshal(a) 168 if err != nil { 169 // Put a space before comment so that if it is flush against 170 // a division operator it is not turned into a line comment: 171 // x/{{y}} 172 // turning into 173 // x//* error marshaling y: 174 // second line of error message */null 175 return fmt.Sprintf(" /* %s */null ", strings.ReplaceAll(err.Error(), "*/", "* /")) 176 } 177 178 // TODO: maybe post-process output to prevent it from containing 179 // "<!--", "-->", "<![CDATA[", "]]>", or "</script" 180 // in case custom marshalers produce output containing those. 181 182 // TODO: Maybe abbreviate \u00ab to \xab to produce more compact output. 183 if len(b) == 0 { 184 // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should 185 // not cause the output `x=y/*z`. 186 return " null " 187 } 188 first, _ := utf8.DecodeRune(b) 189 last, _ := utf8.DecodeLastRune(b) 190 var buf strings.Builder 191 // Prevent IdentifierNames and NumericLiterals from running into 192 // keywords: in, instanceof, typeof, void 193 pad := isJSIdentPart(first) || isJSIdentPart(last) 194 if pad { 195 buf.WriteByte(' ') 196 } 197 written := 0 198 // Make sure that json.Marshal escapes codepoints U+2028 & U+2029 199 // so it falls within the subset of JSON which is valid JS. 200 for i := 0; i < len(b); { 201 rune, n := utf8.DecodeRune(b[i:]) 202 repl := "" 203 if rune == 0x2028 { 204 repl = `\u2028` 205 } else if rune == 0x2029 { 206 repl = `\u2029` 207 } 208 if repl != "" { 209 buf.Write(b[written:i]) 210 buf.WriteString(repl) 211 written = i + n 212 } 213 i += n 214 } 215 if buf.Len() != 0 { 216 buf.Write(b[written:]) 217 if pad { 218 buf.WriteByte(' ') 219 } 220 return buf.String() 221 } 222 return string(b) 223} 224 225// jsStrEscaper produces a string that can be included between quotes in 226// JavaScript source, in JavaScript embedded in an HTML5 <script> element, 227// or in an HTML5 event handler attribute such as onclick. 228func jsStrEscaper(args ...interface{}) string { 229 s, t := stringify(args...) 230 if t == contentTypeJSStr { 231 return replace(s, jsStrNormReplacementTable) 232 } 233 return replace(s, jsStrReplacementTable) 234} 235 236// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression 237// specials so the result is treated literally when included in a regular 238// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by 239// the literal text of {{.X}} followed by the string "bar". 240func jsRegexpEscaper(args ...interface{}) string { 241 s, _ := stringify(args...) 242 s = replace(s, jsRegexpReplacementTable) 243 if s == "" { 244 // /{{.X}}/ should not produce a line comment when .X == "". 245 return "(?:)" 246 } 247 return s 248} 249 250// replace replaces each rune r of s with replacementTable[r], provided that 251// r < len(replacementTable). If replacementTable[r] is the empty string then 252// no replacement is made. 253// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and 254// `\u2029`. 255func replace(s string, replacementTable []string) string { 256 var b strings.Builder 257 r, w, written := rune(0), 0, 0 258 for i := 0; i < len(s); i += w { 259 // See comment in htmlEscaper. 260 r, w = utf8.DecodeRuneInString(s[i:]) 261 var repl string 262 switch { 263 case int(r) < len(replacementTable) && replacementTable[r] != "": 264 repl = replacementTable[r] 265 case r == '\u2028': 266 repl = `\u2028` 267 case r == '\u2029': 268 repl = `\u2029` 269 default: 270 continue 271 } 272 if written == 0 { 273 b.Grow(len(s)) 274 } 275 b.WriteString(s[written:i]) 276 b.WriteString(repl) 277 written = i + w 278 } 279 if written == 0 { 280 return s 281 } 282 b.WriteString(s[written:]) 283 return b.String() 284} 285 286var jsStrReplacementTable = []string{ 287 0: `\0`, 288 '\t': `\t`, 289 '\n': `\n`, 290 '\v': `\x0b`, // "\v" == "v" on IE 6. 291 '\f': `\f`, 292 '\r': `\r`, 293 // Encode HTML specials as hex so the output can be embedded 294 // in HTML attributes without further encoding. 295 '"': `\x22`, 296 '&': `\x26`, 297 '\'': `\x27`, 298 '+': `\x2b`, 299 '/': `\/`, 300 '<': `\x3c`, 301 '>': `\x3e`, 302 '\\': `\\`, 303} 304 305// jsStrNormReplacementTable is like jsStrReplacementTable but does not 306// overencode existing escapes since this table has no entry for `\`. 307var jsStrNormReplacementTable = []string{ 308 0: `\0`, 309 '\t': `\t`, 310 '\n': `\n`, 311 '\v': `\x0b`, // "\v" == "v" on IE 6. 312 '\f': `\f`, 313 '\r': `\r`, 314 // Encode HTML specials as hex so the output can be embedded 315 // in HTML attributes without further encoding. 316 '"': `\x22`, 317 '&': `\x26`, 318 '\'': `\x27`, 319 '+': `\x2b`, 320 '/': `\/`, 321 '<': `\x3c`, 322 '>': `\x3e`, 323} 324 325var jsRegexpReplacementTable = []string{ 326 0: `\0`, 327 '\t': `\t`, 328 '\n': `\n`, 329 '\v': `\x0b`, // "\v" == "v" on IE 6. 330 '\f': `\f`, 331 '\r': `\r`, 332 // Encode HTML specials as hex so the output can be embedded 333 // in HTML attributes without further encoding. 334 '"': `\x22`, 335 '$': `\$`, 336 '&': `\x26`, 337 '\'': `\x27`, 338 '(': `\(`, 339 ')': `\)`, 340 '*': `\*`, 341 '+': `\x2b`, 342 '-': `\-`, 343 '.': `\.`, 344 '/': `\/`, 345 '<': `\x3c`, 346 '>': `\x3e`, 347 '?': `\?`, 348 '[': `\[`, 349 '\\': `\\`, 350 ']': `\]`, 351 '^': `\^`, 352 '{': `\{`, 353 '|': `\|`, 354 '}': `\}`, 355} 356 357// isJSIdentPart reports whether the given rune is a JS identifier part. 358// It does not handle all the non-Latin letters, joiners, and combining marks, 359// but it does handle every codepoint that can occur in a numeric literal or 360// a keyword. 361func isJSIdentPart(r rune) bool { 362 switch { 363 case r == '$': 364 return true 365 case '0' <= r && r <= '9': 366 return true 367 case 'A' <= r && r <= 'Z': 368 return true 369 case r == '_': 370 return true 371 case 'a' <= r && r <= 'z': 372 return true 373 } 374 return false 375} 376 377// isJSType reports whether the given MIME type should be considered JavaScript. 378// 379// It is used to determine whether a script tag with a type attribute is a javascript container. 380func isJSType(mimeType string) bool { 381 // per 382 // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type 383 // https://tools.ietf.org/html/rfc7231#section-3.1.1 384 // https://tools.ietf.org/html/rfc4329#section-3 385 // https://www.ietf.org/rfc/rfc4627.txt 386 // discard parameters 387 if i := strings.Index(mimeType, ";"); i >= 0 { 388 mimeType = mimeType[:i] 389 } 390 mimeType = strings.ToLower(mimeType) 391 mimeType = strings.TrimSpace(mimeType) 392 switch mimeType { 393 case 394 "application/ecmascript", 395 "application/javascript", 396 "application/json", 397 "application/ld+json", 398 "application/x-ecmascript", 399 "application/x-javascript", 400 "module", 401 "text/ecmascript", 402 "text/javascript", 403 "text/javascript1.0", 404 "text/javascript1.1", 405 "text/javascript1.2", 406 "text/javascript1.3", 407 "text/javascript1.4", 408 "text/javascript1.5", 409 "text/jscript", 410 "text/livescript", 411 "text/x-ecmascript", 412 "text/x-javascript": 413 return true 414 default: 415 return false 416 } 417} 418