1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package httpproxy provides support for HTTP proxy determination
6// based on environment variables, as provided by net/http's
7// ProxyFromEnvironment function.
8//
9// The API is not subject to the Go 1 compatibility promise and may change at
10// any time.
11package httpproxy
12
13import (
14	"errors"
15	"fmt"
16	"net"
17	"net/url"
18	"os"
19	"strings"
20	"unicode/utf8"
21
22	"golang.org/x/net/idna"
23)
24
25// Config holds configuration for HTTP proxy settings. See
26// FromEnvironment for details.
27type Config struct {
28	// HTTPProxy represents the value of the HTTP_PROXY or
29	// http_proxy environment variable. It will be used as the proxy
30	// URL for HTTP requests and HTTPS requests unless overridden by
31	// HTTPSProxy or NoProxy.
32	HTTPProxy string
33
34	// HTTPSProxy represents the HTTPS_PROXY or https_proxy
35	// environment variable. It will be used as the proxy URL for
36	// HTTPS requests unless overridden by NoProxy.
37	HTTPSProxy string
38
39	// NoProxy represents the NO_PROXY or no_proxy environment
40	// variable. It specifies a string that contains comma-separated values
41	// specifying hosts that should be excluded from proxying. Each value is
42	// represented by an IP address prefix (1.2.3.4), an IP address prefix in
43	// CIDR notation (1.2.3.4/8), a domain name, or a special DNS label (*).
44	// An IP address prefix and domain name can also include a literal port
45	// number (1.2.3.4:80).
46	// A domain name matches that name and all subdomains. A domain name with
47	// a leading "." matches subdomains only. For example "foo.com" matches
48	// "foo.com" and "bar.foo.com"; ".y.com" matches "x.y.com" but not "y.com".
49	// A single asterisk (*) indicates that no proxying should be done.
50	// A best effort is made to parse the string and errors are
51	// ignored.
52	NoProxy string
53
54	// CGI holds whether the current process is running
55	// as a CGI handler (FromEnvironment infers this from the
56	// presence of a REQUEST_METHOD environment variable).
57	// When this is set, ProxyForURL will return an error
58	// when HTTPProxy applies, because a client could be
59	// setting HTTP_PROXY maliciously. See https://golang.org/s/cgihttpproxy.
60	CGI bool
61}
62
63// config holds the parsed configuration for HTTP proxy settings.
64type config struct {
65	// Config represents the original configuration as defined above.
66	Config
67
68	// httpsProxy is the parsed URL of the HTTPSProxy if defined.
69	httpsProxy *url.URL
70
71	// httpProxy is the parsed URL of the HTTPProxy if defined.
72	httpProxy *url.URL
73
74	// ipMatchers represent all values in the NoProxy that are IP address
75	// prefixes or an IP address in CIDR notation.
76	ipMatchers []matcher
77
78	// domainMatchers represent all values in the NoProxy that are a domain
79	// name or hostname & domain name
80	domainMatchers []matcher
81}
82
83// FromEnvironment returns a Config instance populated from the
84// environment variables HTTP_PROXY, HTTPS_PROXY and NO_PROXY (or the
85// lowercase versions thereof). HTTPS_PROXY takes precedence over
86// HTTP_PROXY for https requests.
87//
88// The environment values may be either a complete URL or a
89// "host[:port]", in which case the "http" scheme is assumed. An error
90// is returned if the value is a different form.
91func FromEnvironment() *Config {
92	return &Config{
93		HTTPProxy:  getEnvAny("HTTP_PROXY", "http_proxy"),
94		HTTPSProxy: getEnvAny("HTTPS_PROXY", "https_proxy"),
95		NoProxy:    getEnvAny("NO_PROXY", "no_proxy"),
96		CGI:        os.Getenv("REQUEST_METHOD") != "",
97	}
98}
99
100func getEnvAny(names ...string) string {
101	for _, n := range names {
102		if val := os.Getenv(n); val != "" {
103			return val
104		}
105	}
106	return ""
107}
108
109// ProxyFunc returns a function that determines the proxy URL to use for
110// a given request URL. Changing the contents of cfg will not affect
111// proxy functions created earlier.
112//
113// A nil URL and nil error are returned if no proxy is defined in the
114// environment, or a proxy should not be used for the given request, as
115// defined by NO_PROXY.
116//
117// As a special case, if req.URL.Host is "localhost" (with or without a
118// port number), then a nil URL and nil error will be returned.
119func (cfg *Config) ProxyFunc() func(reqURL *url.URL) (*url.URL, error) {
120	// Preprocess the Config settings for more efficient evaluation.
121	cfg1 := &config{
122		Config: *cfg,
123	}
124	cfg1.init()
125	return cfg1.proxyForURL
126}
127
128func (cfg *config) proxyForURL(reqURL *url.URL) (*url.URL, error) {
129	var proxy *url.URL
130	if reqURL.Scheme == "https" {
131		proxy = cfg.httpsProxy
132	}
133	if proxy == nil {
134		proxy = cfg.httpProxy
135		if proxy != nil && cfg.CGI {
136			return nil, errors.New("refusing to use HTTP_PROXY value in CGI environment; see golang.org/s/cgihttpproxy")
137		}
138	}
139	if proxy == nil {
140		return nil, nil
141	}
142	if !cfg.useProxy(canonicalAddr(reqURL)) {
143		return nil, nil
144	}
145
146	return proxy, nil
147}
148
149func parseProxy(proxy string) (*url.URL, error) {
150	if proxy == "" {
151		return nil, nil
152	}
153
154	proxyURL, err := url.Parse(proxy)
155	if err != nil ||
156		(proxyURL.Scheme != "http" &&
157			proxyURL.Scheme != "https" &&
158			proxyURL.Scheme != "socks5") {
159		// proxy was bogus. Try prepending "http://" to it and
160		// see if that parses correctly. If not, we fall
161		// through and complain about the original one.
162		if proxyURL, err := url.Parse("http://" + proxy); err == nil {
163			return proxyURL, nil
164		}
165	}
166	if err != nil {
167		return nil, fmt.Errorf("invalid proxy address %q: %v", proxy, err)
168	}
169	return proxyURL, nil
170}
171
172// useProxy reports whether requests to addr should use a proxy,
173// according to the NO_PROXY or no_proxy environment variable.
174// addr is always a canonicalAddr with a host and port.
175func (cfg *config) useProxy(addr string) bool {
176	if len(addr) == 0 {
177		return true
178	}
179	host, port, err := net.SplitHostPort(addr)
180	if err != nil {
181		return false
182	}
183	if host == "localhost" {
184		return false
185	}
186	ip := net.ParseIP(host)
187	if ip != nil {
188		if ip.IsLoopback() {
189			return false
190		}
191	}
192
193	addr = strings.ToLower(strings.TrimSpace(host))
194
195	if ip != nil {
196		for _, m := range cfg.ipMatchers {
197			if m.match(addr, port, ip) {
198				return false
199			}
200		}
201	}
202	for _, m := range cfg.domainMatchers {
203		if m.match(addr, port, ip) {
204			return false
205		}
206	}
207	return true
208}
209
210func (c *config) init() {
211	if parsed, err := parseProxy(c.HTTPProxy); err == nil {
212		c.httpProxy = parsed
213	}
214	if parsed, err := parseProxy(c.HTTPSProxy); err == nil {
215		c.httpsProxy = parsed
216	}
217
218	for _, p := range strings.Split(c.NoProxy, ",") {
219		p = strings.ToLower(strings.TrimSpace(p))
220		if len(p) == 0 {
221			continue
222		}
223
224		if p == "*" {
225			c.ipMatchers = []matcher{allMatch{}}
226			c.domainMatchers = []matcher{allMatch{}}
227			return
228		}
229
230		// IPv4/CIDR, IPv6/CIDR
231		if _, pnet, err := net.ParseCIDR(p); err == nil {
232			c.ipMatchers = append(c.ipMatchers, cidrMatch{cidr: pnet})
233			continue
234		}
235
236		// IPv4:port, [IPv6]:port
237		phost, pport, err := net.SplitHostPort(p)
238		if err == nil {
239			if len(phost) == 0 {
240				// There is no host part, likely the entry is malformed; ignore.
241				continue
242			}
243			if phost[0] == '[' && phost[len(phost)-1] == ']' {
244				phost = phost[1 : len(phost)-1]
245			}
246		} else {
247			phost = p
248		}
249		// IPv4, IPv6
250		if pip := net.ParseIP(phost); pip != nil {
251			c.ipMatchers = append(c.ipMatchers, ipMatch{ip: pip, port: pport})
252			continue
253		}
254
255		if len(phost) == 0 {
256			// There is no host part, likely the entry is malformed; ignore.
257			continue
258		}
259
260		// domain.com or domain.com:80
261		// foo.com matches bar.foo.com
262		// .domain.com or .domain.com:port
263		// *.domain.com or *.domain.com:port
264		if strings.HasPrefix(phost, "*.") {
265			phost = phost[1:]
266		}
267		matchHost := false
268		if phost[0] != '.' {
269			matchHost = true
270			phost = "." + phost
271		}
272		c.domainMatchers = append(c.domainMatchers, domainMatch{host: phost, port: pport, matchHost: matchHost})
273	}
274}
275
276var portMap = map[string]string{
277	"http":   "80",
278	"https":  "443",
279	"socks5": "1080",
280}
281
282// canonicalAddr returns url.Host but always with a ":port" suffix
283func canonicalAddr(url *url.URL) string {
284	addr := url.Hostname()
285	if v, err := idnaASCII(addr); err == nil {
286		addr = v
287	}
288	port := url.Port()
289	if port == "" {
290		port = portMap[url.Scheme]
291	}
292	return net.JoinHostPort(addr, port)
293}
294
295// Given a string of the form "host", "host:port", or "[ipv6::address]:port",
296// return true if the string includes a port.
297func hasPort(s string) bool { return strings.LastIndex(s, ":") > strings.LastIndex(s, "]") }
298
299func idnaASCII(v string) (string, error) {
300	// TODO: Consider removing this check after verifying performance is okay.
301	// Right now punycode verification, length checks, context checks, and the
302	// permissible character tests are all omitted. It also prevents the ToASCII
303	// call from salvaging an invalid IDN, when possible. As a result it may be
304	// possible to have two IDNs that appear identical to the user where the
305	// ASCII-only version causes an error downstream whereas the non-ASCII
306	// version does not.
307	// Note that for correct ASCII IDNs ToASCII will only do considerably more
308	// work, but it will not cause an allocation.
309	if isASCII(v) {
310		return v, nil
311	}
312	return idna.Lookup.ToASCII(v)
313}
314
315func isASCII(s string) bool {
316	for i := 0; i < len(s); i++ {
317		if s[i] >= utf8.RuneSelf {
318			return false
319		}
320	}
321	return true
322}
323
324// matcher represents the matching rule for a given value in the NO_PROXY list
325type matcher interface {
326	// match returns true if the host and optional port or ip and optional port
327	// are allowed
328	match(host, port string, ip net.IP) bool
329}
330
331// allMatch matches on all possible inputs
332type allMatch struct{}
333
334func (a allMatch) match(host, port string, ip net.IP) bool {
335	return true
336}
337
338type cidrMatch struct {
339	cidr *net.IPNet
340}
341
342func (m cidrMatch) match(host, port string, ip net.IP) bool {
343	return m.cidr.Contains(ip)
344}
345
346type ipMatch struct {
347	ip   net.IP
348	port string
349}
350
351func (m ipMatch) match(host, port string, ip net.IP) bool {
352	if m.ip.Equal(ip) {
353		return m.port == "" || m.port == port
354	}
355	return false
356}
357
358type domainMatch struct {
359	host string
360	port string
361
362	matchHost bool
363}
364
365func (m domainMatch) match(host, port string, ip net.IP) bool {
366	if strings.HasSuffix(host, m.host) || (m.matchHost && host == m.host[1:]) {
367		return m.port == "" || m.port == port
368	}
369	return false
370}
371