1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package template
6
7import (
8	"bytes"
9	"encoding/json"
10	"fmt"
11	"reflect"
12	"strings"
13	"unicode/utf8"
14)
15
16// nextJSCtx returns the context that determines whether a slash after the
17// given run of tokens starts a regular expression instead of a division
18// operator: / or /=.
19//
20// This assumes that the token run does not include any string tokens, comment
21// tokens, regular expression literal tokens, or division operators.
22//
23// This fails on some valid but nonsensical JavaScript programs like
24// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
25// fail on any known useful programs. It is based on the draft
26// JavaScript 2.0 lexical grammar and requires one token of lookbehind:
27// http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
28func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
29	s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
30	if len(s) == 0 {
31		return preceding
32	}
33
34	// All cases below are in the single-byte UTF-8 group.
35	switch c, n := s[len(s)-1], len(s); c {
36	case '+', '-':
37		// ++ and -- are not regexp preceders, but + and - are whether
38		// they are used as infix or prefix operators.
39		start := n - 1
40		// Count the number of adjacent dashes or pluses.
41		for start > 0 && s[start-1] == c {
42			start--
43		}
44		if (n-start)&1 == 1 {
45			// Reached for trailing minus signs since "---" is the
46			// same as "-- -".
47			return jsCtxRegexp
48		}
49		return jsCtxDivOp
50	case '.':
51		// Handle "42."
52		if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
53			return jsCtxDivOp
54		}
55		return jsCtxRegexp
56	// Suffixes for all punctuators from section 7.7 of the language spec
57	// that only end binary operators not handled above.
58	case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
59		return jsCtxRegexp
60	// Suffixes for all punctuators from section 7.7 of the language spec
61	// that are prefix operators not handled above.
62	case '!', '~':
63		return jsCtxRegexp
64	// Matches all the punctuators from section 7.7 of the language spec
65	// that are open brackets not handled above.
66	case '(', '[':
67		return jsCtxRegexp
68	// Matches all the punctuators from section 7.7 of the language spec
69	// that precede expression starts.
70	case ':', ';', '{':
71		return jsCtxRegexp
72	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
73	// are handled in the default except for '}' which can precede a
74	// division op as in
75	//    ({ valueOf: function () { return 42 } } / 2
76	// which is valid, but, in practice, developers don't divide object
77	// literals, so our heuristic works well for code like
78	//    function () { ... }  /foo/.test(x) && sideEffect();
79	// The ')' punctuator can precede a regular expression as in
80	//     if (b) /foo/.test(x) && ...
81	// but this is much less likely than
82	//     (a + b) / c
83	case '}':
84		return jsCtxRegexp
85	default:
86		// Look for an IdentifierName and see if it is a keyword that
87		// can precede a regular expression.
88		j := n
89		for j > 0 && isJSIdentPart(rune(s[j-1])) {
90			j--
91		}
92		if regexpPrecederKeywords[string(s[j:])] {
93			return jsCtxRegexp
94		}
95	}
96	// Otherwise is a punctuator not listed above, or
97	// a string which precedes a div op, or an identifier
98	// which precedes a div op.
99	return jsCtxDivOp
100}
101
102// regexpPrecederKeywords is a set of reserved JS keywords that can precede a
103// regular expression in JS source.
104var regexpPrecederKeywords = map[string]bool{
105	"break":      true,
106	"case":       true,
107	"continue":   true,
108	"delete":     true,
109	"do":         true,
110	"else":       true,
111	"finally":    true,
112	"in":         true,
113	"instanceof": true,
114	"return":     true,
115	"throw":      true,
116	"try":        true,
117	"typeof":     true,
118	"void":       true,
119}
120
121var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
122
123// indirectToJSONMarshaler returns the value, after dereferencing as many times
124// as necessary to reach the base type (or nil) or an implementation of json.Marshal.
125func indirectToJSONMarshaler(a interface{}) interface{} {
126	v := reflect.ValueOf(a)
127	for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() {
128		v = v.Elem()
129	}
130	return v.Interface()
131}
132
133// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
134// neither side-effects nor free variables outside (NaN, Infinity).
135func jsValEscaper(args ...interface{}) string {
136	var a interface{}
137	if len(args) == 1 {
138		a = indirectToJSONMarshaler(args[0])
139		switch t := a.(type) {
140		case JS:
141			return string(t)
142		case JSStr:
143			// TODO: normalize quotes.
144			return `"` + string(t) + `"`
145		case json.Marshaler:
146			// Do not treat as a Stringer.
147		case fmt.Stringer:
148			a = t.String()
149		}
150	} else {
151		for i, arg := range args {
152			args[i] = indirectToJSONMarshaler(arg)
153		}
154		a = fmt.Sprint(args...)
155	}
156	// TODO: detect cycles before calling Marshal which loops infinitely on
157	// cyclic data. This may be an unacceptable DoS risk.
158
159	b, err := json.Marshal(a)
160	if err != nil {
161		// Put a space before comment so that if it is flush against
162		// a division operator it is not turned into a line comment:
163		//     x/{{y}}
164		// turning into
165		//     x//* error marshaling y:
166		//          second line of error message */null
167		return fmt.Sprintf(" /* %s */null ", strings.Replace(err.Error(), "*/", "* /", -1))
168	}
169
170	// TODO: maybe post-process output to prevent it from containing
171	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
172	// in case custom marshalers produce output containing those.
173
174	// TODO: Maybe abbreviate \u00ab to \xab to produce more compact output.
175	if len(b) == 0 {
176		// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
177		// not cause the output `x=y/*z`.
178		return " null "
179	}
180	first, _ := utf8.DecodeRune(b)
181	last, _ := utf8.DecodeLastRune(b)
182	var buf bytes.Buffer
183	// Prevent IdentifierNames and NumericLiterals from running into
184	// keywords: in, instanceof, typeof, void
185	pad := isJSIdentPart(first) || isJSIdentPart(last)
186	if pad {
187		buf.WriteByte(' ')
188	}
189	written := 0
190	// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
191	// so it falls within the subset of JSON which is valid JS.
192	for i := 0; i < len(b); {
193		rune, n := utf8.DecodeRune(b[i:])
194		repl := ""
195		if rune == 0x2028 {
196			repl = `\u2028`
197		} else if rune == 0x2029 {
198			repl = `\u2029`
199		}
200		if repl != "" {
201			buf.Write(b[written:i])
202			buf.WriteString(repl)
203			written = i + n
204		}
205		i += n
206	}
207	if buf.Len() != 0 {
208		buf.Write(b[written:])
209		if pad {
210			buf.WriteByte(' ')
211		}
212		b = buf.Bytes()
213	}
214	return string(b)
215}
216
217// jsStrEscaper produces a string that can be included between quotes in
218// JavaScript source, in JavaScript embedded in an HTML5 <script> element,
219// or in an HTML5 event handler attribute such as onclick.
220func jsStrEscaper(args ...interface{}) string {
221	s, t := stringify(args...)
222	if t == contentTypeJSStr {
223		return replace(s, jsStrNormReplacementTable)
224	}
225	return replace(s, jsStrReplacementTable)
226}
227
228// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
229// specials so the result is treated literally when included in a regular
230// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
231// the literal text of {{.X}} followed by the string "bar".
232func jsRegexpEscaper(args ...interface{}) string {
233	s, _ := stringify(args...)
234	s = replace(s, jsRegexpReplacementTable)
235	if s == "" {
236		// /{{.X}}/ should not produce a line comment when .X == "".
237		return "(?:)"
238	}
239	return s
240}
241
242// replace replaces each rune r of s with replacementTable[r], provided that
243// r < len(replacementTable). If replacementTable[r] is the empty string then
244// no replacement is made.
245// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
246// `\u2029`.
247func replace(s string, replacementTable []string) string {
248	var b bytes.Buffer
249	r, w, written := rune(0), 0, 0
250	for i := 0; i < len(s); i += w {
251		// See comment in htmlEscaper.
252		r, w = utf8.DecodeRuneInString(s[i:])
253		var repl string
254		switch {
255		case int(r) < len(replacementTable) && replacementTable[r] != "":
256			repl = replacementTable[r]
257		case r == '\u2028':
258			repl = `\u2028`
259		case r == '\u2029':
260			repl = `\u2029`
261		default:
262			continue
263		}
264		b.WriteString(s[written:i])
265		b.WriteString(repl)
266		written = i + w
267	}
268	if written == 0 {
269		return s
270	}
271	b.WriteString(s[written:])
272	return b.String()
273}
274
275var jsStrReplacementTable = []string{
276	0:    `\0`,
277	'\t': `\t`,
278	'\n': `\n`,
279	'\v': `\x0b`, // "\v" == "v" on IE 6.
280	'\f': `\f`,
281	'\r': `\r`,
282	// Encode HTML specials as hex so the output can be embedded
283	// in HTML attributes without further encoding.
284	'"':  `\x22`,
285	'&':  `\x26`,
286	'\'': `\x27`,
287	'+':  `\x2b`,
288	'/':  `\/`,
289	'<':  `\x3c`,
290	'>':  `\x3e`,
291	'\\': `\\`,
292}
293
294// jsStrNormReplacementTable is like jsStrReplacementTable but does not
295// overencode existing escapes since this table has no entry for `\`.
296var jsStrNormReplacementTable = []string{
297	0:    `\0`,
298	'\t': `\t`,
299	'\n': `\n`,
300	'\v': `\x0b`, // "\v" == "v" on IE 6.
301	'\f': `\f`,
302	'\r': `\r`,
303	// Encode HTML specials as hex so the output can be embedded
304	// in HTML attributes without further encoding.
305	'"':  `\x22`,
306	'&':  `\x26`,
307	'\'': `\x27`,
308	'+':  `\x2b`,
309	'/':  `\/`,
310	'<':  `\x3c`,
311	'>':  `\x3e`,
312}
313
314var jsRegexpReplacementTable = []string{
315	0:    `\0`,
316	'\t': `\t`,
317	'\n': `\n`,
318	'\v': `\x0b`, // "\v" == "v" on IE 6.
319	'\f': `\f`,
320	'\r': `\r`,
321	// Encode HTML specials as hex so the output can be embedded
322	// in HTML attributes without further encoding.
323	'"':  `\x22`,
324	'$':  `\$`,
325	'&':  `\x26`,
326	'\'': `\x27`,
327	'(':  `\(`,
328	')':  `\)`,
329	'*':  `\*`,
330	'+':  `\x2b`,
331	'-':  `\-`,
332	'.':  `\.`,
333	'/':  `\/`,
334	'<':  `\x3c`,
335	'>':  `\x3e`,
336	'?':  `\?`,
337	'[':  `\[`,
338	'\\': `\\`,
339	']':  `\]`,
340	'^':  `\^`,
341	'{':  `\{`,
342	'|':  `\|`,
343	'}':  `\}`,
344}
345
346// isJSIdentPart reports whether the given rune is a JS identifier part.
347// It does not handle all the non-Latin letters, joiners, and combining marks,
348// but it does handle every codepoint that can occur in a numeric literal or
349// a keyword.
350func isJSIdentPart(r rune) bool {
351	switch {
352	case r == '$':
353		return true
354	case '0' <= r && r <= '9':
355		return true
356	case 'A' <= r && r <= 'Z':
357		return true
358	case r == '_':
359		return true
360	case 'a' <= r && r <= 'z':
361		return true
362	}
363	return false
364}
365
366// isJSType returns true if the given MIME type should be considered JavaScript.
367//
368// It is used to determine whether a script tag with a type attribute is a javascript container.
369func isJSType(mimeType string) bool {
370	// per
371	//   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
372	//   https://tools.ietf.org/html/rfc7231#section-3.1.1
373	//   https://tools.ietf.org/html/rfc4329#section-3
374	//   https://www.ietf.org/rfc/rfc4627.txt
375	mimeType = strings.ToLower(mimeType)
376	// discard parameters
377	if i := strings.Index(mimeType, ";"); i >= 0 {
378		mimeType = mimeType[:i]
379	}
380	mimeType = strings.TrimSpace(mimeType)
381	switch mimeType {
382	case
383		"application/ecmascript",
384		"application/javascript",
385		"application/json",
386		"application/x-ecmascript",
387		"application/x-javascript",
388		"text/ecmascript",
389		"text/javascript",
390		"text/javascript1.0",
391		"text/javascript1.1",
392		"text/javascript1.2",
393		"text/javascript1.3",
394		"text/javascript1.4",
395		"text/javascript1.5",
396		"text/jscript",
397		"text/livescript",
398		"text/x-ecmascript",
399		"text/x-javascript":
400		return true
401	default:
402		return false
403	}
404}
405