1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package template
6
7import (
8	"bytes"
9	"encoding/json"
10	"fmt"
11	"reflect"
12	"strings"
13	"unicode/utf8"
14)
15
16// nextJSCtx returns the context that determines whether a slash after the
17// given run of tokens starts a regular expression instead of a division
18// operator: / or /=.
19//
20// This assumes that the token run does not include any string tokens, comment
21// tokens, regular expression literal tokens, or division operators.
22//
23// This fails on some valid but nonsensical JavaScript programs like
24// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
25// fail on any known useful programs. It is based on the draft
26// JavaScript 2.0 lexical grammar and requires one token of lookbehind:
27// https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
28func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
29	s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
30	if len(s) == 0 {
31		return preceding
32	}
33
34	// All cases below are in the single-byte UTF-8 group.
35	switch c, n := s[len(s)-1], len(s); c {
36	case '+', '-':
37		// ++ and -- are not regexp preceders, but + and - are whether
38		// they are used as infix or prefix operators.
39		start := n - 1
40		// Count the number of adjacent dashes or pluses.
41		for start > 0 && s[start-1] == c {
42			start--
43		}
44		if (n-start)&1 == 1 {
45			// Reached for trailing minus signs since "---" is the
46			// same as "-- -".
47			return jsCtxRegexp
48		}
49		return jsCtxDivOp
50	case '.':
51		// Handle "42."
52		if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
53			return jsCtxDivOp
54		}
55		return jsCtxRegexp
56	// Suffixes for all punctuators from section 7.7 of the language spec
57	// that only end binary operators not handled above.
58	case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
59		return jsCtxRegexp
60	// Suffixes for all punctuators from section 7.7 of the language spec
61	// that are prefix operators not handled above.
62	case '!', '~':
63		return jsCtxRegexp
64	// Matches all the punctuators from section 7.7 of the language spec
65	// that are open brackets not handled above.
66	case '(', '[':
67		return jsCtxRegexp
68	// Matches all the punctuators from section 7.7 of the language spec
69	// that precede expression starts.
70	case ':', ';', '{':
71		return jsCtxRegexp
72	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
73	// are handled in the default except for '}' which can precede a
74	// division op as in
75	//    ({ valueOf: function () { return 42 } } / 2
76	// which is valid, but, in practice, developers don't divide object
77	// literals, so our heuristic works well for code like
78	//    function () { ... }  /foo/.test(x) && sideEffect();
79	// The ')' punctuator can precede a regular expression as in
80	//     if (b) /foo/.test(x) && ...
81	// but this is much less likely than
82	//     (a + b) / c
83	case '}':
84		return jsCtxRegexp
85	default:
86		// Look for an IdentifierName and see if it is a keyword that
87		// can precede a regular expression.
88		j := n
89		for j > 0 && isJSIdentPart(rune(s[j-1])) {
90			j--
91		}
92		if regexpPrecederKeywords[string(s[j:])] {
93			return jsCtxRegexp
94		}
95	}
96	// Otherwise is a punctuator not listed above, or
97	// a string which precedes a div op, or an identifier
98	// which precedes a div op.
99	return jsCtxDivOp
100}
101
102// regexpPrecederKeywords is a set of reserved JS keywords that can precede a
103// regular expression in JS source.
104var regexpPrecederKeywords = map[string]bool{
105	"break":      true,
106	"case":       true,
107	"continue":   true,
108	"delete":     true,
109	"do":         true,
110	"else":       true,
111	"finally":    true,
112	"in":         true,
113	"instanceof": true,
114	"return":     true,
115	"throw":      true,
116	"try":        true,
117	"typeof":     true,
118	"void":       true,
119}
120
121var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
122
123// indirectToJSONMarshaler returns the value, after dereferencing as many times
124// as necessary to reach the base type (or nil) or an implementation of json.Marshal.
125func indirectToJSONMarshaler(a interface{}) interface{} {
126	// text/template now supports passing untyped nil as a func call
127	// argument, so we must support it. Otherwise we'd panic below, as one
128	// cannot call the Type or Interface methods on an invalid
129	// reflect.Value. See golang.org/issue/18716.
130	if a == nil {
131		return nil
132	}
133
134	v := reflect.ValueOf(a)
135	for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() {
136		v = v.Elem()
137	}
138	return v.Interface()
139}
140
141// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
142// neither side-effects nor free variables outside (NaN, Infinity).
143func jsValEscaper(args ...interface{}) string {
144	var a interface{}
145	if len(args) == 1 {
146		a = indirectToJSONMarshaler(args[0])
147		switch t := a.(type) {
148		case JS:
149			return string(t)
150		case JSStr:
151			// TODO: normalize quotes.
152			return `"` + string(t) + `"`
153		case json.Marshaler:
154			// Do not treat as a Stringer.
155		case fmt.Stringer:
156			a = t.String()
157		}
158	} else {
159		for i, arg := range args {
160			args[i] = indirectToJSONMarshaler(arg)
161		}
162		a = fmt.Sprint(args...)
163	}
164	// TODO: detect cycles before calling Marshal which loops infinitely on
165	// cyclic data. This may be an unacceptable DoS risk.
166	b, err := json.Marshal(a)
167	if err != nil {
168		// Put a space before comment so that if it is flush against
169		// a division operator it is not turned into a line comment:
170		//     x/{{y}}
171		// turning into
172		//     x//* error marshaling y:
173		//          second line of error message */null
174		return fmt.Sprintf(" /* %s */null ", strings.ReplaceAll(err.Error(), "*/", "* /"))
175	}
176
177	// TODO: maybe post-process output to prevent it from containing
178	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
179	// in case custom marshalers produce output containing those.
180	// Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
181	// supports ld+json content-type.
182	if len(b) == 0 {
183		// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
184		// not cause the output `x=y/*z`.
185		return " null "
186	}
187	first, _ := utf8.DecodeRune(b)
188	last, _ := utf8.DecodeLastRune(b)
189	var buf strings.Builder
190	// Prevent IdentifierNames and NumericLiterals from running into
191	// keywords: in, instanceof, typeof, void
192	pad := isJSIdentPart(first) || isJSIdentPart(last)
193	if pad {
194		buf.WriteByte(' ')
195	}
196	written := 0
197	// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
198	// so it falls within the subset of JSON which is valid JS.
199	for i := 0; i < len(b); {
200		rune, n := utf8.DecodeRune(b[i:])
201		repl := ""
202		if rune == 0x2028 {
203			repl = `\u2028`
204		} else if rune == 0x2029 {
205			repl = `\u2029`
206		}
207		if repl != "" {
208			buf.Write(b[written:i])
209			buf.WriteString(repl)
210			written = i + n
211		}
212		i += n
213	}
214	if buf.Len() != 0 {
215		buf.Write(b[written:])
216		if pad {
217			buf.WriteByte(' ')
218		}
219		return buf.String()
220	}
221	return string(b)
222}
223
224// jsStrEscaper produces a string that can be included between quotes in
225// JavaScript source, in JavaScript embedded in an HTML5 <script> element,
226// or in an HTML5 event handler attribute such as onclick.
227func jsStrEscaper(args ...interface{}) string {
228	s, t := stringify(args...)
229	if t == contentTypeJSStr {
230		return replace(s, jsStrNormReplacementTable)
231	}
232	return replace(s, jsStrReplacementTable)
233}
234
235// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
236// specials so the result is treated literally when included in a regular
237// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
238// the literal text of {{.X}} followed by the string "bar".
239func jsRegexpEscaper(args ...interface{}) string {
240	s, _ := stringify(args...)
241	s = replace(s, jsRegexpReplacementTable)
242	if s == "" {
243		// /{{.X}}/ should not produce a line comment when .X == "".
244		return "(?:)"
245	}
246	return s
247}
248
249// replace replaces each rune r of s with replacementTable[r], provided that
250// r < len(replacementTable). If replacementTable[r] is the empty string then
251// no replacement is made.
252// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
253// `\u2029`.
254func replace(s string, replacementTable []string) string {
255	var b strings.Builder
256	r, w, written := rune(0), 0, 0
257	for i := 0; i < len(s); i += w {
258		// See comment in htmlEscaper.
259		r, w = utf8.DecodeRuneInString(s[i:])
260		var repl string
261		switch {
262		case int(r) < len(lowUnicodeReplacementTable):
263			repl = lowUnicodeReplacementTable[r]
264		case int(r) < len(replacementTable) && replacementTable[r] != "":
265			repl = replacementTable[r]
266		case r == '\u2028':
267			repl = `\u2028`
268		case r == '\u2029':
269			repl = `\u2029`
270		default:
271			continue
272		}
273		if written == 0 {
274			b.Grow(len(s))
275		}
276		b.WriteString(s[written:i])
277		b.WriteString(repl)
278		written = i + w
279	}
280	if written == 0 {
281		return s
282	}
283	b.WriteString(s[written:])
284	return b.String()
285}
286
287var lowUnicodeReplacementTable = []string{
288	0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
289	'\a': `\u0007`,
290	'\b': `\u0008`,
291	'\t': `\t`,
292	'\n': `\n`,
293	'\v': `\u000b`, // "\v" == "v" on IE 6.
294	'\f': `\f`,
295	'\r': `\r`,
296	0xe:  `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
297	0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
298	0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
299}
300
301var jsStrReplacementTable = []string{
302	0:    `\u0000`,
303	'\t': `\t`,
304	'\n': `\n`,
305	'\v': `\u000b`, // "\v" == "v" on IE 6.
306	'\f': `\f`,
307	'\r': `\r`,
308	// Encode HTML specials as hex so the output can be embedded
309	// in HTML attributes without further encoding.
310	'"':  `\u0022`,
311	'&':  `\u0026`,
312	'\'': `\u0027`,
313	'+':  `\u002b`,
314	'/':  `\/`,
315	'<':  `\u003c`,
316	'>':  `\u003e`,
317	'\\': `\\`,
318}
319
320// jsStrNormReplacementTable is like jsStrReplacementTable but does not
321// overencode existing escapes since this table has no entry for `\`.
322var jsStrNormReplacementTable = []string{
323	0:    `\u0000`,
324	'\t': `\t`,
325	'\n': `\n`,
326	'\v': `\u000b`, // "\v" == "v" on IE 6.
327	'\f': `\f`,
328	'\r': `\r`,
329	// Encode HTML specials as hex so the output can be embedded
330	// in HTML attributes without further encoding.
331	'"':  `\u0022`,
332	'&':  `\u0026`,
333	'\'': `\u0027`,
334	'+':  `\u002b`,
335	'/':  `\/`,
336	'<':  `\u003c`,
337	'>':  `\u003e`,
338}
339var jsRegexpReplacementTable = []string{
340	0:    `\u0000`,
341	'\t': `\t`,
342	'\n': `\n`,
343	'\v': `\u000b`, // "\v" == "v" on IE 6.
344	'\f': `\f`,
345	'\r': `\r`,
346	// Encode HTML specials as hex so the output can be embedded
347	// in HTML attributes without further encoding.
348	'"':  `\u0022`,
349	'$':  `\$`,
350	'&':  `\u0026`,
351	'\'': `\u0027`,
352	'(':  `\(`,
353	')':  `\)`,
354	'*':  `\*`,
355	'+':  `\u002b`,
356	'-':  `\-`,
357	'.':  `\.`,
358	'/':  `\/`,
359	'<':  `\u003c`,
360	'>':  `\u003e`,
361	'?':  `\?`,
362	'[':  `\[`,
363	'\\': `\\`,
364	']':  `\]`,
365	'^':  `\^`,
366	'{':  `\{`,
367	'|':  `\|`,
368	'}':  `\}`,
369}
370
371// isJSIdentPart reports whether the given rune is a JS identifier part.
372// It does not handle all the non-Latin letters, joiners, and combining marks,
373// but it does handle every codepoint that can occur in a numeric literal or
374// a keyword.
375func isJSIdentPart(r rune) bool {
376	switch {
377	case r == '$':
378		return true
379	case '0' <= r && r <= '9':
380		return true
381	case 'A' <= r && r <= 'Z':
382		return true
383	case r == '_':
384		return true
385	case 'a' <= r && r <= 'z':
386		return true
387	}
388	return false
389}
390
391// isJSType reports whether the given MIME type should be considered JavaScript.
392//
393// It is used to determine whether a script tag with a type attribute is a javascript container.
394func isJSType(mimeType string) bool {
395	// per
396	//   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
397	//   https://tools.ietf.org/html/rfc7231#section-3.1.1
398	//   https://tools.ietf.org/html/rfc4329#section-3
399	//   https://www.ietf.org/rfc/rfc4627.txt
400	// discard parameters
401	if i := strings.Index(mimeType, ";"); i >= 0 {
402		mimeType = mimeType[:i]
403	}
404	mimeType = strings.ToLower(mimeType)
405	mimeType = strings.TrimSpace(mimeType)
406	switch mimeType {
407	case
408		"application/ecmascript",
409		"application/javascript",
410		"application/json",
411		"application/ld+json",
412		"application/x-ecmascript",
413		"application/x-javascript",
414		"module",
415		"text/ecmascript",
416		"text/javascript",
417		"text/javascript1.0",
418		"text/javascript1.1",
419		"text/javascript1.2",
420		"text/javascript1.3",
421		"text/javascript1.4",
422		"text/javascript1.5",
423		"text/jscript",
424		"text/livescript",
425		"text/x-ecmascript",
426		"text/x-javascript":
427		return true
428	default:
429		return false
430	}
431}
432