1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package template
6
7import (
8	"bytes"
9	"fmt"
10	"strings"
11)
12
13// urlFilter returns its input unless it contains an unsafe scheme in which
14// case it defangs the entire URL.
15//
16// Schemes that cause unintended side effects that are irreversible without user
17// interaction are considered unsafe. For example, clicking on a "javascript:"
18// link can immediately trigger JavaScript code execution.
19//
20// This filter conservatively assumes that all schemes other than the following
21// are unsafe:
22//    * http:   Navigates to a new website, and may open a new window or tab.
23//              These side effects can be reversed by navigating back to the
24//              previous website, or closing the window or tab. No irreversible
25//              changes will take place without further user interaction with
26//              the new website.
27//    * https:  Same as http.
28//    * mailto: Opens an email program and starts a new draft. This side effect
29//              is not irreversible until the user explicitly clicks send; it
30//              can be undone by closing the email program.
31//
32// To allow URLs containing other schemes to bypass this filter, developers must
33// explicitly indicate that such a URL is expected and safe by encapsulating it
34// in a template.URL value.
35func urlFilter(args ...interface{}) string {
36	s, t := stringify(args...)
37	if t == contentTypeURL {
38		return s
39	}
40	if !isSafeUrl(s) {
41		return "#" + filterFailsafe
42	}
43	return s
44}
45
46// isSafeUrl is true if s is a relative URL or if URL has a protocol in
47// (http, https, mailto).
48func isSafeUrl(s string) bool {
49	if i := strings.IndexRune(s, ':'); i >= 0 && !strings.ContainsRune(s[:i], '/') {
50
51		protocol := s[:i]
52		if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") {
53			return false
54		}
55	}
56	return true
57}
58
59// urlEscaper produces an output that can be embedded in a URL query.
60// The output can be embedded in an HTML attribute without further escaping.
61func urlEscaper(args ...interface{}) string {
62	return urlProcessor(false, args...)
63}
64
65// urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
66// string or parenthesis delimited url(...).
67// The normalizer does not encode all HTML specials. Specifically, it does not
68// encode '&' so correct embedding in an HTML attribute requires escaping of
69// '&' to '&'.
70func urlNormalizer(args ...interface{}) string {
71	return urlProcessor(true, args...)
72}
73
74// urlProcessor normalizes (when norm is true) or escapes its input to produce
75// a valid hierarchical or opaque URL part.
76func urlProcessor(norm bool, args ...interface{}) string {
77	s, t := stringify(args...)
78	if t == contentTypeURL {
79		norm = true
80	}
81	var b bytes.Buffer
82	if processUrlOnto(s, norm, &b) {
83		return b.String()
84	}
85	return s
86}
87
88// processUrlOnto appends a normalized URL corresponding to its input to b
89// and returns true if the appended content differs from s.
90func processUrlOnto(s string, norm bool, b *bytes.Buffer) bool {
91	b.Grow(b.Cap() + len(s) + 16)
92	written := 0
93	// The byte loop below assumes that all URLs use UTF-8 as the
94	// content-encoding. This is similar to the URI to IRI encoding scheme
95	// defined in section 3.1 of  RFC 3987, and behaves the same as the
96	// EcmaScript builtin encodeURIComponent.
97	// It should not cause any misencoding of URLs in pages with
98	// Content-type: text/html;charset=UTF-8.
99	for i, n := 0, len(s); i < n; i++ {
100		c := s[i]
101		switch c {
102		// Single quote and parens are sub-delims in RFC 3986, but we
103		// escape them so the output can be embedded in single
104		// quoted attributes and unquoted CSS url(...) constructs.
105		// Single quotes are reserved in URLs, but are only used in
106		// the obsolete "mark" rule in an appendix in RFC 3986
107		// so can be safely encoded.
108		case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
109			if norm {
110				continue
111			}
112		// Unreserved according to RFC 3986 sec 2.3
113		// "For consistency, percent-encoded octets in the ranges of
114		// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
115		// period (%2E), underscore (%5F), or tilde (%7E) should not be
116		// created by URI producers
117		case '-', '.', '_', '~':
118			continue
119		case '%':
120			// When normalizing do not re-encode valid escapes.
121			if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
122				continue
123			}
124		default:
125			// Unreserved according to RFC 3986 sec 2.3
126			if 'a' <= c && c <= 'z' {
127				continue
128			}
129			if 'A' <= c && c <= 'Z' {
130				continue
131			}
132			if '0' <= c && c <= '9' {
133				continue
134			}
135		}
136		b.WriteString(s[written:i])
137		fmt.Fprintf(b, "%%%02x", c)
138		written = i + 1
139	}
140	b.WriteString(s[written:])
141	return written != 0
142}
143
144// Filters and normalizes srcset values which are comma separated
145// URLs followed by metadata.
146func srcsetFilterAndEscaper(args ...interface{}) string {
147	s, t := stringify(args...)
148	switch t {
149	case contentTypeSrcset:
150		return s
151	case contentTypeURL:
152		// Normalizing gets rid of all HTML whitespace
153		// which separate the image URL from its metadata.
154		var b bytes.Buffer
155		if processUrlOnto(s, true, &b) {
156			s = b.String()
157		}
158		// Additionally, commas separate one source from another.
159		return strings.Replace(s, ",", "%2c", -1)
160	}
161
162	var b bytes.Buffer
163	written := 0
164	for i := 0; i < len(s); i++ {
165		if s[i] == ',' {
166			filterSrcsetElement(s, written, i, &b)
167			b.WriteString(",")
168			written = i + 1
169		}
170	}
171	filterSrcsetElement(s, written, len(s), &b)
172	return b.String()
173}
174
175// Derived from https://play.golang.org/p/Dhmj7FORT5
176const htmlSpaceAndAsciiAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07"
177
178// isHtmlSpace is true iff c is a whitespace character per
179// https://infra.spec.whatwg.org/#ascii-whitespace
180func isHtmlSpace(c byte) bool {
181	return (c <= 0x20) && 0 != (htmlSpaceAndAsciiAlnumBytes[c>>3]&(1<<uint(c&0x7)))
182}
183
184func isHtmlSpaceOrAsciiAlnum(c byte) bool {
185	return (c < 0x80) && 0 != (htmlSpaceAndAsciiAlnumBytes[c>>3]&(1<<uint(c&0x7)))
186}
187
188func filterSrcsetElement(s string, left int, right int, b *bytes.Buffer) {
189	start := left
190	for start < right && isHtmlSpace(s[start]) {
191		start += 1
192	}
193	end := right
194	for i := start; i < right; i++ {
195		if isHtmlSpace(s[i]) {
196			end = i
197			break
198		}
199	}
200	if url := s[start:end]; isSafeUrl(url) {
201		// If image metadata is only spaces or alnums then
202		// we don't need to URL normalize it.
203		metadataOk := true
204		for i := end; i < right; i++ {
205			if !isHtmlSpaceOrAsciiAlnum(s[i]) {
206				metadataOk = false
207				break
208			}
209		}
210		if metadataOk {
211			b.WriteString(s[left:start])
212			processUrlOnto(url, true, b)
213			b.WriteString(s[end:right])
214			return
215		}
216	}
217	b.WriteString("#")
218	b.WriteString(filterFailsafe)
219}
220