1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package template
6
7import (
8	"bytes"
9	"fmt"
10	"unicode"
11	"unicode/utf8"
12)
13
14// endsWithCSSKeyword reports whether b ends with an ident that
15// case-insensitively matches the lower-case kw.
16func endsWithCSSKeyword(b []byte, kw string) bool {
17	i := len(b) - len(kw)
18	if i < 0 {
19		// Too short.
20		return false
21	}
22	if i != 0 {
23		r, _ := utf8.DecodeLastRune(b[:i])
24		if isCSSNmchar(r) {
25			// Too long.
26			return false
27		}
28	}
29	// Many CSS keywords, such as "!important" can have characters encoded,
30	// but the URI production does not allow that according to
31	// http://www.w3.org/TR/css3-syntax/#TOK-URI
32	// This does not attempt to recognize encoded keywords. For example,
33	// given "\75\72\6c" and "url" this return false.
34	return string(bytes.ToLower(b[i:])) == kw
35}
36
37// isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
38func isCSSNmchar(r rune) bool {
39	// Based on the CSS3 nmchar production but ignores multi-rune escape
40	// sequences.
41	// http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
42	return 'a' <= r && r <= 'z' ||
43		'A' <= r && r <= 'Z' ||
44		'0' <= r && r <= '9' ||
45		r == '-' ||
46		r == '_' ||
47		// Non-ASCII cases below.
48		0x80 <= r && r <= 0xd7ff ||
49		0xe000 <= r && r <= 0xfffd ||
50		0x10000 <= r && r <= 0x10ffff
51}
52
53// decodeCSS decodes CSS3 escapes given a sequence of stringchars.
54// If there is no change, it returns the input, otherwise it returns a slice
55// backed by a new array.
56// http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
57func decodeCSS(s []byte) []byte {
58	i := bytes.IndexByte(s, '\\')
59	if i == -1 {
60		return s
61	}
62	// The UTF-8 sequence for a codepoint is never longer than 1 + the
63	// number hex digits need to represent that codepoint, so len(s) is an
64	// upper bound on the output length.
65	b := make([]byte, 0, len(s))
66	for len(s) != 0 {
67		i := bytes.IndexByte(s, '\\')
68		if i == -1 {
69			i = len(s)
70		}
71		b, s = append(b, s[:i]...), s[i:]
72		if len(s) < 2 {
73			break
74		}
75		// http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
76		// escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
77		if isHex(s[1]) {
78			// http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
79			//   unicode ::= '\' [0-9a-fA-F]{1,6} wc?
80			j := 2
81			for j < len(s) && j < 7 && isHex(s[j]) {
82				j++
83			}
84			r := hexDecode(s[1:j])
85			if r > unicode.MaxRune {
86				r, j = r/16, j-1
87			}
88			n := utf8.EncodeRune(b[len(b):cap(b)], r)
89			// The optional space at the end allows a hex
90			// sequence to be followed by a literal hex.
91			// string(decodeCSS([]byte(`\A B`))) == "\nB"
92			b, s = b[:len(b)+n], skipCSSSpace(s[j:])
93		} else {
94			// `\\` decodes to `\` and `\"` to `"`.
95			_, n := utf8.DecodeRune(s[1:])
96			b, s = append(b, s[1:1+n]...), s[1+n:]
97		}
98	}
99	return b
100}
101
102// isHex reports whether the given character is a hex digit.
103func isHex(c byte) bool {
104	return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
105}
106
107// hexDecode decodes a short hex digit sequence: "10" -> 16.
108func hexDecode(s []byte) rune {
109	n := '\x00'
110	for _, c := range s {
111		n <<= 4
112		switch {
113		case '0' <= c && c <= '9':
114			n |= rune(c - '0')
115		case 'a' <= c && c <= 'f':
116			n |= rune(c-'a') + 10
117		case 'A' <= c && c <= 'F':
118			n |= rune(c-'A') + 10
119		default:
120			panic(fmt.Sprintf("Bad hex digit in %q", s))
121		}
122	}
123	return n
124}
125
126// skipCSSSpace returns a suffix of c, skipping over a single space.
127func skipCSSSpace(c []byte) []byte {
128	if len(c) == 0 {
129		return c
130	}
131	// wc ::= #x9 | #xA | #xC | #xD | #x20
132	switch c[0] {
133	case '\t', '\n', '\f', ' ':
134		return c[1:]
135	case '\r':
136		// This differs from CSS3's wc production because it contains a
137		// probable spec error whereby wc contains all the single byte
138		// sequences in nl (newline) but not CRLF.
139		if len(c) >= 2 && c[1] == '\n' {
140			return c[2:]
141		}
142		return c[1:]
143	}
144	return c
145}
146
147// isCSSSpace reports whether b is a CSS space char as defined in wc.
148func isCSSSpace(b byte) bool {
149	switch b {
150	case '\t', '\n', '\f', '\r', ' ':
151		return true
152	}
153	return false
154}
155
156// cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
157func cssEscaper(args ...interface{}) string {
158	s, _ := stringify(args...)
159	var b bytes.Buffer
160	written := 0
161	for i, r := range s {
162		var repl string
163		switch r {
164		case 0:
165			repl = `\0`
166		case '\t':
167			repl = `\9`
168		case '\n':
169			repl = `\a`
170		case '\f':
171			repl = `\c`
172		case '\r':
173			repl = `\d`
174		// Encode HTML specials as hex so the output can be embedded
175		// in HTML attributes without further encoding.
176		case '"':
177			repl = `\22`
178		case '&':
179			repl = `\26`
180		case '\'':
181			repl = `\27`
182		case '(':
183			repl = `\28`
184		case ')':
185			repl = `\29`
186		case '+':
187			repl = `\2b`
188		case '/':
189			repl = `\2f`
190		case ':':
191			repl = `\3a`
192		case ';':
193			repl = `\3b`
194		case '<':
195			repl = `\3c`
196		case '>':
197			repl = `\3e`
198		case '\\':
199			repl = `\\`
200		case '{':
201			repl = `\7b`
202		case '}':
203			repl = `\7d`
204		default:
205			continue
206		}
207		b.WriteString(s[written:i])
208		b.WriteString(repl)
209		written = i + utf8.RuneLen(r)
210		if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
211			b.WriteByte(' ')
212		}
213	}
214	if written == 0 {
215		return s
216	}
217	b.WriteString(s[written:])
218	return b.String()
219}
220
221var expressionBytes = []byte("expression")
222var mozBindingBytes = []byte("mozbinding")
223
224// cssValueFilter allows innocuous CSS values in the output including CSS
225// quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
226// (inherit, blue), and colors (#888).
227// It filters out unsafe values, such as those that affect token boundaries,
228// and anything that might execute scripts.
229func cssValueFilter(args ...interface{}) string {
230	s, t := stringify(args...)
231	if t == contentTypeCSS {
232		return s
233	}
234	b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
235
236	// CSS3 error handling is specified as honoring string boundaries per
237	// http://www.w3.org/TR/css3-syntax/#error-handling :
238	//     Malformed declarations. User agents must handle unexpected
239	//     tokens encountered while parsing a declaration by reading until
240	//     the end of the declaration, while observing the rules for
241	//     matching pairs of (), [], {}, "", and '', and correctly handling
242	//     escapes. For example, a malformed declaration may be missing a
243	//     property, colon (:) or value.
244	// So we need to make sure that values do not have mismatched bracket
245	// or quote characters to prevent the browser from restarting parsing
246	// inside a string that might embed JavaScript source.
247	for i, c := range b {
248		switch c {
249		case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
250			return filterFailsafe
251		case '-':
252			// Disallow <!-- or -->.
253			// -- should not appear in valid identifiers.
254			if i != 0 && b[i-1] == '-' {
255				return filterFailsafe
256			}
257		default:
258			if c < 0x80 && isCSSNmchar(rune(c)) {
259				id = append(id, c)
260			}
261		}
262	}
263	id = bytes.ToLower(id)
264	if bytes.Index(id, expressionBytes) != -1 || bytes.Index(id, mozBindingBytes) != -1 {
265		return filterFailsafe
266	}
267	return string(b)
268}
269