1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package template
6
7import (
8	"bytes"
9	"encoding/json"
10	"fmt"
11	"reflect"
12	"strings"
13	"unicode/utf8"
14)
15
16// nextJSCtx returns the context that determines whether a slash after the
17// given run of tokens starts a regular expression instead of a division
18// operator: / or /=.
19//
20// This assumes that the token run does not include any string tokens, comment
21// tokens, regular expression literal tokens, or division operators.
22//
23// This fails on some valid but nonsensical JavaScript programs like
24// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
25// fail on any known useful programs. It is based on the draft
26// JavaScript 2.0 lexical grammar and requires one token of lookbehind:
27// https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
28func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
29	s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
30	if len(s) == 0 {
31		return preceding
32	}
33
34	// All cases below are in the single-byte UTF-8 group.
35	switch c, n := s[len(s)-1], len(s); c {
36	case '+', '-':
37		// ++ and -- are not regexp preceders, but + and - are whether
38		// they are used as infix or prefix operators.
39		start := n - 1
40		// Count the number of adjacent dashes or pluses.
41		for start > 0 && s[start-1] == c {
42			start--
43		}
44		if (n-start)&1 == 1 {
45			// Reached for trailing minus signs since "---" is the
46			// same as "-- -".
47			return jsCtxRegexp
48		}
49		return jsCtxDivOp
50	case '.':
51		// Handle "42."
52		if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
53			return jsCtxDivOp
54		}
55		return jsCtxRegexp
56	// Suffixes for all punctuators from section 7.7 of the language spec
57	// that only end binary operators not handled above.
58	case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
59		return jsCtxRegexp
60	// Suffixes for all punctuators from section 7.7 of the language spec
61	// that are prefix operators not handled above.
62	case '!', '~':
63		return jsCtxRegexp
64	// Matches all the punctuators from section 7.7 of the language spec
65	// that are open brackets not handled above.
66	case '(', '[':
67		return jsCtxRegexp
68	// Matches all the punctuators from section 7.7 of the language spec
69	// that precede expression starts.
70	case ':', ';', '{':
71		return jsCtxRegexp
72	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
73	// are handled in the default except for '}' which can precede a
74	// division op as in
75	//    ({ valueOf: function () { return 42 } } / 2
76	// which is valid, but, in practice, developers don't divide object
77	// literals, so our heuristic works well for code like
78	//    function () { ... }  /foo/.test(x) && sideEffect();
79	// The ')' punctuator can precede a regular expression as in
80	//     if (b) /foo/.test(x) && ...
81	// but this is much less likely than
82	//     (a + b) / c
83	case '}':
84		return jsCtxRegexp
85	default:
86		// Look for an IdentifierName and see if it is a keyword that
87		// can precede a regular expression.
88		j := n
89		for j > 0 && isJSIdentPart(rune(s[j-1])) {
90			j--
91		}
92		if regexpPrecederKeywords[string(s[j:])] {
93			return jsCtxRegexp
94		}
95	}
96	// Otherwise is a punctuator not listed above, or
97	// a string which precedes a div op, or an identifier
98	// which precedes a div op.
99	return jsCtxDivOp
100}
101
102// regexpPrecederKeywords is a set of reserved JS keywords that can precede a
103// regular expression in JS source.
104var regexpPrecederKeywords = map[string]bool{
105	"break":      true,
106	"case":       true,
107	"continue":   true,
108	"delete":     true,
109	"do":         true,
110	"else":       true,
111	"finally":    true,
112	"in":         true,
113	"instanceof": true,
114	"return":     true,
115	"throw":      true,
116	"try":        true,
117	"typeof":     true,
118	"void":       true,
119}
120
121var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
122
123// indirectToJSONMarshaler returns the value, after dereferencing as many times
124// as necessary to reach the base type (or nil) or an implementation of json.Marshal.
125func indirectToJSONMarshaler(a interface{}) interface{} {
126	// text/template now supports passing untyped nil as a func call
127	// argument, so we must support it. Otherwise we'd panic below, as one
128	// cannot call the Type or Interface methods on an invalid
129	// reflect.Value. See golang.org/issue/18716.
130	if a == nil {
131		return nil
132	}
133
134	v := reflect.ValueOf(a)
135	for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() {
136		v = v.Elem()
137	}
138	return v.Interface()
139}
140
141// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
142// neither side-effects nor free variables outside (NaN, Infinity).
143func jsValEscaper(args ...interface{}) string {
144	var a interface{}
145	if len(args) == 1 {
146		a = indirectToJSONMarshaler(args[0])
147		switch t := a.(type) {
148		case JS:
149			return string(t)
150		case JSStr:
151			// TODO: normalize quotes.
152			return `"` + string(t) + `"`
153		case json.Marshaler:
154			// Do not treat as a Stringer.
155		case fmt.Stringer:
156			a = t.String()
157		}
158	} else {
159		for i, arg := range args {
160			args[i] = indirectToJSONMarshaler(arg)
161		}
162		a = fmt.Sprint(args...)
163	}
164	// TODO: detect cycles before calling Marshal which loops infinitely on
165	// cyclic data. This may be an unacceptable DoS risk.
166
167	b, err := json.Marshal(a)
168	if err != nil {
169		// Put a space before comment so that if it is flush against
170		// a division operator it is not turned into a line comment:
171		//     x/{{y}}
172		// turning into
173		//     x//* error marshaling y:
174		//          second line of error message */null
175		return fmt.Sprintf(" /* %s */null ", strings.ReplaceAll(err.Error(), "*/", "* /"))
176	}
177
178	// TODO: maybe post-process output to prevent it from containing
179	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
180	// in case custom marshalers produce output containing those.
181
182	// TODO: Maybe abbreviate \u00ab to \xab to produce more compact output.
183	if len(b) == 0 {
184		// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
185		// not cause the output `x=y/*z`.
186		return " null "
187	}
188	first, _ := utf8.DecodeRune(b)
189	last, _ := utf8.DecodeLastRune(b)
190	var buf strings.Builder
191	// Prevent IdentifierNames and NumericLiterals from running into
192	// keywords: in, instanceof, typeof, void
193	pad := isJSIdentPart(first) || isJSIdentPart(last)
194	if pad {
195		buf.WriteByte(' ')
196	}
197	written := 0
198	// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
199	// so it falls within the subset of JSON which is valid JS.
200	for i := 0; i < len(b); {
201		rune, n := utf8.DecodeRune(b[i:])
202		repl := ""
203		if rune == 0x2028 {
204			repl = `\u2028`
205		} else if rune == 0x2029 {
206			repl = `\u2029`
207		}
208		if repl != "" {
209			buf.Write(b[written:i])
210			buf.WriteString(repl)
211			written = i + n
212		}
213		i += n
214	}
215	if buf.Len() != 0 {
216		buf.Write(b[written:])
217		if pad {
218			buf.WriteByte(' ')
219		}
220		return buf.String()
221	}
222	return string(b)
223}
224
225// jsStrEscaper produces a string that can be included between quotes in
226// JavaScript source, in JavaScript embedded in an HTML5 <script> element,
227// or in an HTML5 event handler attribute such as onclick.
228func jsStrEscaper(args ...interface{}) string {
229	s, t := stringify(args...)
230	if t == contentTypeJSStr {
231		return replace(s, jsStrNormReplacementTable)
232	}
233	return replace(s, jsStrReplacementTable)
234}
235
236// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
237// specials so the result is treated literally when included in a regular
238// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
239// the literal text of {{.X}} followed by the string "bar".
240func jsRegexpEscaper(args ...interface{}) string {
241	s, _ := stringify(args...)
242	s = replace(s, jsRegexpReplacementTable)
243	if s == "" {
244		// /{{.X}}/ should not produce a line comment when .X == "".
245		return "(?:)"
246	}
247	return s
248}
249
250// replace replaces each rune r of s with replacementTable[r], provided that
251// r < len(replacementTable). If replacementTable[r] is the empty string then
252// no replacement is made.
253// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
254// `\u2029`.
255func replace(s string, replacementTable []string) string {
256	var b strings.Builder
257	r, w, written := rune(0), 0, 0
258	for i := 0; i < len(s); i += w {
259		// See comment in htmlEscaper.
260		r, w = utf8.DecodeRuneInString(s[i:])
261		var repl string
262		switch {
263		case int(r) < len(replacementTable) && replacementTable[r] != "":
264			repl = replacementTable[r]
265		case r == '\u2028':
266			repl = `\u2028`
267		case r == '\u2029':
268			repl = `\u2029`
269		default:
270			continue
271		}
272		if written == 0 {
273			b.Grow(len(s))
274		}
275		b.WriteString(s[written:i])
276		b.WriteString(repl)
277		written = i + w
278	}
279	if written == 0 {
280		return s
281	}
282	b.WriteString(s[written:])
283	return b.String()
284}
285
286var jsStrReplacementTable = []string{
287	0:    `\0`,
288	'\t': `\t`,
289	'\n': `\n`,
290	'\v': `\x0b`, // "\v" == "v" on IE 6.
291	'\f': `\f`,
292	'\r': `\r`,
293	// Encode HTML specials as hex so the output can be embedded
294	// in HTML attributes without further encoding.
295	'"':  `\x22`,
296	'&':  `\x26`,
297	'\'': `\x27`,
298	'+':  `\x2b`,
299	'/':  `\/`,
300	'<':  `\x3c`,
301	'>':  `\x3e`,
302	'\\': `\\`,
303}
304
305// jsStrNormReplacementTable is like jsStrReplacementTable but does not
306// overencode existing escapes since this table has no entry for `\`.
307var jsStrNormReplacementTable = []string{
308	0:    `\0`,
309	'\t': `\t`,
310	'\n': `\n`,
311	'\v': `\x0b`, // "\v" == "v" on IE 6.
312	'\f': `\f`,
313	'\r': `\r`,
314	// Encode HTML specials as hex so the output can be embedded
315	// in HTML attributes without further encoding.
316	'"':  `\x22`,
317	'&':  `\x26`,
318	'\'': `\x27`,
319	'+':  `\x2b`,
320	'/':  `\/`,
321	'<':  `\x3c`,
322	'>':  `\x3e`,
323}
324
325var jsRegexpReplacementTable = []string{
326	0:    `\0`,
327	'\t': `\t`,
328	'\n': `\n`,
329	'\v': `\x0b`, // "\v" == "v" on IE 6.
330	'\f': `\f`,
331	'\r': `\r`,
332	// Encode HTML specials as hex so the output can be embedded
333	// in HTML attributes without further encoding.
334	'"':  `\x22`,
335	'$':  `\$`,
336	'&':  `\x26`,
337	'\'': `\x27`,
338	'(':  `\(`,
339	')':  `\)`,
340	'*':  `\*`,
341	'+':  `\x2b`,
342	'-':  `\-`,
343	'.':  `\.`,
344	'/':  `\/`,
345	'<':  `\x3c`,
346	'>':  `\x3e`,
347	'?':  `\?`,
348	'[':  `\[`,
349	'\\': `\\`,
350	']':  `\]`,
351	'^':  `\^`,
352	'{':  `\{`,
353	'|':  `\|`,
354	'}':  `\}`,
355}
356
357// isJSIdentPart reports whether the given rune is a JS identifier part.
358// It does not handle all the non-Latin letters, joiners, and combining marks,
359// but it does handle every codepoint that can occur in a numeric literal or
360// a keyword.
361func isJSIdentPart(r rune) bool {
362	switch {
363	case r == '$':
364		return true
365	case '0' <= r && r <= '9':
366		return true
367	case 'A' <= r && r <= 'Z':
368		return true
369	case r == '_':
370		return true
371	case 'a' <= r && r <= 'z':
372		return true
373	}
374	return false
375}
376
377// isJSType reports whether the given MIME type should be considered JavaScript.
378//
379// It is used to determine whether a script tag with a type attribute is a javascript container.
380func isJSType(mimeType string) bool {
381	// per
382	//   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
383	//   https://tools.ietf.org/html/rfc7231#section-3.1.1
384	//   https://tools.ietf.org/html/rfc4329#section-3
385	//   https://www.ietf.org/rfc/rfc4627.txt
386	// discard parameters
387	if i := strings.Index(mimeType, ";"); i >= 0 {
388		mimeType = mimeType[:i]
389	}
390	mimeType = strings.ToLower(mimeType)
391	mimeType = strings.TrimSpace(mimeType)
392	switch mimeType {
393	case
394		"application/ecmascript",
395		"application/javascript",
396		"application/json",
397		"application/ld+json",
398		"application/x-ecmascript",
399		"application/x-javascript",
400		"module",
401		"text/ecmascript",
402		"text/javascript",
403		"text/javascript1.0",
404		"text/javascript1.1",
405		"text/javascript1.2",
406		"text/javascript1.3",
407		"text/javascript1.4",
408		"text/javascript1.5",
409		"text/jscript",
410		"text/livescript",
411		"text/x-ecmascript",
412		"text/x-javascript":
413		return true
414	default:
415		return false
416	}
417}
418