1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package template
6
7import (
8	"bytes"
9	"fmt"
10	"unicode"
11	"unicode/utf8"
12)
13
14// endsWithCSSKeyword reports whether b ends with an ident that
15// case-insensitively matches the lower-case kw.
16func endsWithCSSKeyword(b []byte, kw string) bool {
17	i := len(b) - len(kw)
18	if i < 0 {
19		// Too short.
20		return false
21	}
22	if i != 0 {
23		r, _ := utf8.DecodeLastRune(b[:i])
24		if isCSSNmchar(r) {
25			// Too long.
26			return false
27		}
28	}
29	// Many CSS keywords, such as "!important" can have characters encoded,
30	// but the URI production does not allow that according to
31	// http://www.w3.org/TR/css3-syntax/#TOK-URI
32	// This does not attempt to recognize encoded keywords. For example,
33	// given "\75\72\6c" and "url" this return false.
34	return string(bytes.ToLower(b[i:])) == kw
35}
36
37// isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
38func isCSSNmchar(r rune) bool {
39	// Based on the CSS3 nmchar production but ignores multi-rune escape
40	// sequences.
41	// http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
42	return 'a' <= r && r <= 'z' ||
43		'A' <= r && r <= 'Z' ||
44		'0' <= r && r <= '9' ||
45		r == '-' ||
46		r == '_' ||
47		// Non-ASCII cases below.
48		0x80 <= r && r <= 0xd7ff ||
49		0xe000 <= r && r <= 0xfffd ||
50		0x10000 <= r && r <= 0x10ffff
51}
52
53// decodeCSS decodes CSS3 escapes given a sequence of stringchars.
54// If there is no change, it returns the input, otherwise it returns a slice
55// backed by a new array.
56// http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
57func decodeCSS(s []byte) []byte {
58	i := bytes.IndexByte(s, '\\')
59	if i == -1 {
60		return s
61	}
62	// The UTF-8 sequence for a codepoint is never longer than 1 + the
63	// number hex digits need to represent that codepoint, so len(s) is an
64	// upper bound on the output length.
65	b := make([]byte, 0, len(s))
66	for len(s) != 0 {
67		i := bytes.IndexByte(s, '\\')
68		if i == -1 {
69			i = len(s)
70		}
71		b, s = append(b, s[:i]...), s[i:]
72		if len(s) < 2 {
73			break
74		}
75		// http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
76		// escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
77		if isHex(s[1]) {
78			// http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
79			//   unicode ::= '\' [0-9a-fA-F]{1,6} wc?
80			j := 2
81			for j < len(s) && j < 7 && isHex(s[j]) {
82				j++
83			}
84			r := hexDecode(s[1:j])
85			if r > unicode.MaxRune {
86				r, j = r/16, j-1
87			}
88			n := utf8.EncodeRune(b[len(b):cap(b)], r)
89			// The optional space at the end allows a hex
90			// sequence to be followed by a literal hex.
91			// string(decodeCSS([]byte(`\A B`))) == "\nB"
92			b, s = b[:len(b)+n], skipCSSSpace(s[j:])
93		} else {
94			// `\\` decodes to `\` and `\"` to `"`.
95			_, n := utf8.DecodeRune(s[1:])
96			b, s = append(b, s[1:1+n]...), s[1+n:]
97		}
98	}
99	return b
100}
101
102// isHex reports whether the given character is a hex digit.
103func isHex(c byte) bool {
104	return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
105}
106
107// hexDecode decodes a short hex digit sequence: "10" -> 16.
108func hexDecode(s []byte) rune {
109	n := '\x00'
110	for _, c := range s {
111		n <<= 4
112		switch {
113		case '0' <= c && c <= '9':
114			n |= rune(c - '0')
115		case 'a' <= c && c <= 'f':
116			n |= rune(c-'a') + 10
117		case 'A' <= c && c <= 'F':
118			n |= rune(c-'A') + 10
119		default:
120			panic(fmt.Sprintf("Bad hex digit in %q", s))
121		}
122	}
123	return n
124}
125
126// skipCSSSpace returns a suffix of c, skipping over a single space.
127func skipCSSSpace(c []byte) []byte {
128	if len(c) == 0 {
129		return c
130	}
131	// wc ::= #x9 | #xA | #xC | #xD | #x20
132	switch c[0] {
133	case '\t', '\n', '\f', ' ':
134		return c[1:]
135	case '\r':
136		// This differs from CSS3's wc production because it contains a
137		// probable spec error whereby wc contains all the single byte
138		// sequences in nl (newline) but not CRLF.
139		if len(c) >= 2 && c[1] == '\n' {
140			return c[2:]
141		}
142		return c[1:]
143	}
144	return c
145}
146
147// isCSSSpace reports whether b is a CSS space char as defined in wc.
148func isCSSSpace(b byte) bool {
149	switch b {
150	case '\t', '\n', '\f', '\r', ' ':
151		return true
152	}
153	return false
154}
155
156// cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
157func cssEscaper(args ...interface{}) string {
158	s, _ := stringify(args...)
159	var b bytes.Buffer
160	r, w, written := rune(0), 0, 0
161	for i := 0; i < len(s); i += w {
162		// See comment in htmlEscaper.
163		r, w = utf8.DecodeRuneInString(s[i:])
164		var repl string
165		switch {
166		case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
167			repl = cssReplacementTable[r]
168		default:
169			continue
170		}
171		b.WriteString(s[written:i])
172		b.WriteString(repl)
173		written = i + w
174		if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
175			b.WriteByte(' ')
176		}
177	}
178	if written == 0 {
179		return s
180	}
181	b.WriteString(s[written:])
182	return b.String()
183}
184
185var cssReplacementTable = []string{
186	0:    `\0`,
187	'\t': `\9`,
188	'\n': `\a`,
189	'\f': `\c`,
190	'\r': `\d`,
191	// Encode HTML specials as hex so the output can be embedded
192	// in HTML attributes without further encoding.
193	'"':  `\22`,
194	'&':  `\26`,
195	'\'': `\27`,
196	'(':  `\28`,
197	')':  `\29`,
198	'+':  `\2b`,
199	'/':  `\2f`,
200	':':  `\3a`,
201	';':  `\3b`,
202	'<':  `\3c`,
203	'>':  `\3e`,
204	'\\': `\\`,
205	'{':  `\7b`,
206	'}':  `\7d`,
207}
208
209var expressionBytes = []byte("expression")
210var mozBindingBytes = []byte("mozbinding")
211
212// cssValueFilter allows innocuous CSS values in the output including CSS
213// quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
214// (inherit, blue), and colors (#888).
215// It filters out unsafe values, such as those that affect token boundaries,
216// and anything that might execute scripts.
217func cssValueFilter(args ...interface{}) string {
218	s, t := stringify(args...)
219	if t == contentTypeCSS {
220		return s
221	}
222	b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
223
224	// CSS3 error handling is specified as honoring string boundaries per
225	// http://www.w3.org/TR/css3-syntax/#error-handling :
226	//     Malformed declarations. User agents must handle unexpected
227	//     tokens encountered while parsing a declaration by reading until
228	//     the end of the declaration, while observing the rules for
229	//     matching pairs of (), [], {}, "", and '', and correctly handling
230	//     escapes. For example, a malformed declaration may be missing a
231	//     property, colon (:) or value.
232	// So we need to make sure that values do not have mismatched bracket
233	// or quote characters to prevent the browser from restarting parsing
234	// inside a string that might embed JavaScript source.
235	for i, c := range b {
236		switch c {
237		case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
238			return filterFailsafe
239		case '-':
240			// Disallow <!-- or -->.
241			// -- should not appear in valid identifiers.
242			if i != 0 && b[i-1] == '-' {
243				return filterFailsafe
244			}
245		default:
246			if c < 0x80 && isCSSNmchar(rune(c)) {
247				id = append(id, c)
248			}
249		}
250	}
251	id = bytes.ToLower(id)
252	if bytes.Index(id, expressionBytes) != -1 || bytes.Index(id, mozBindingBytes) != -1 {
253		return filterFailsafe
254	}
255	return string(b)
256}
257