1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package url parses URLs and implements query escaping.
6package url
7
8// See RFC 3986. This package generally follows RFC 3986, except where
9// it deviates for compatibility reasons. When sending changes, first
10// search old issues for history on decisions. Unit tests should also
11// contain references to issue numbers with details.
12
13import (
14	"bytes"
15	"errors"
16	"fmt"
17	"sort"
18	"strconv"
19	"strings"
20)
21
22// Error reports an error and the operation and URL that caused it.
23type Error struct {
24	Op  string
25	URL string
26	Err error
27}
28
29func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
30
31type timeout interface {
32	Timeout() bool
33}
34
35func (e *Error) Timeout() bool {
36	t, ok := e.Err.(timeout)
37	return ok && t.Timeout()
38}
39
40type temporary interface {
41	Temporary() bool
42}
43
44func (e *Error) Temporary() bool {
45	t, ok := e.Err.(temporary)
46	return ok && t.Temporary()
47}
48
49func ishex(c byte) bool {
50	switch {
51	case '0' <= c && c <= '9':
52		return true
53	case 'a' <= c && c <= 'f':
54		return true
55	case 'A' <= c && c <= 'F':
56		return true
57	}
58	return false
59}
60
61func unhex(c byte) byte {
62	switch {
63	case '0' <= c && c <= '9':
64		return c - '0'
65	case 'a' <= c && c <= 'f':
66		return c - 'a' + 10
67	case 'A' <= c && c <= 'F':
68		return c - 'A' + 10
69	}
70	return 0
71}
72
73type encoding int
74
75const (
76	encodePath encoding = 1 + iota
77	encodePathSegment
78	encodeHost
79	encodeZone
80	encodeUserPassword
81	encodeQueryComponent
82	encodeFragment
83)
84
85type EscapeError string
86
87func (e EscapeError) Error() string {
88	return "invalid URL escape " + strconv.Quote(string(e))
89}
90
91type InvalidHostError string
92
93func (e InvalidHostError) Error() string {
94	return "invalid character " + strconv.Quote(string(e)) + " in host name"
95}
96
97// Return true if the specified character should be escaped when
98// appearing in a URL string, according to RFC 3986.
99//
100// Please be informed that for now shouldEscape does not check all
101// reserved characters correctly. See golang.org/issue/5684.
102func shouldEscape(c byte, mode encoding) bool {
103	// §2.3 Unreserved characters (alphanum)
104	if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
105		return false
106	}
107
108	if mode == encodeHost || mode == encodeZone {
109		// §3.2.2 Host allows
110		//	sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
111		// as part of reg-name.
112		// We add : because we include :port as part of host.
113		// We add [ ] because we include [ipv6]:port as part of host.
114		// We add < > because they're the only characters left that
115		// we could possibly allow, and Parse will reject them if we
116		// escape them (because hosts can't use %-encoding for
117		// ASCII bytes).
118		switch c {
119		case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
120			return false
121		}
122	}
123
124	switch c {
125	case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
126		return false
127
128	case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
129		// Different sections of the URL allow a few of
130		// the reserved characters to appear unescaped.
131		switch mode {
132		case encodePath: // §3.3
133			// The RFC allows : @ & = + $ but saves / ; , for assigning
134			// meaning to individual path segments. This package
135			// only manipulates the path as a whole, so we allow those
136			// last three as well. That leaves only ? to escape.
137			return c == '?'
138
139		case encodePathSegment: // §3.3
140			// The RFC allows : @ & = + $ but saves / ; , for assigning
141			// meaning to individual path segments.
142			return c == '/' || c == ';' || c == ',' || c == '?'
143
144		case encodeUserPassword: // §3.2.1
145			// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
146			// userinfo, so we must escape only '@', '/', and '?'.
147			// The parsing of userinfo treats ':' as special so we must escape
148			// that too.
149			return c == '@' || c == '/' || c == '?' || c == ':'
150
151		case encodeQueryComponent: // §3.4
152			// The RFC reserves (so we must escape) everything.
153			return true
154
155		case encodeFragment: // §4.1
156			// The RFC text is silent but the grammar allows
157			// everything, so escape nothing.
158			return false
159		}
160	}
161
162	// Everything else must be escaped.
163	return true
164}
165
166// QueryUnescape does the inverse transformation of QueryEscape,
167// converting each 3-byte encoded substring of the form "%AB" into the
168// hex-decoded byte 0xAB. It also converts '+' into ' ' (space).
169// It returns an error if any % is not followed by two hexadecimal
170// digits.
171func QueryUnescape(s string) (string, error) {
172	return unescape(s, encodeQueryComponent)
173}
174
175// PathUnescape does the inverse transformation of PathEscape,
176// converting each 3-byte encoded substring of the form "%AB" into the
177// hex-decoded byte 0xAB. It also converts '+' into ' ' (space).
178// It returns an error if any % is not followed by two hexadecimal
179// digits.
180//
181// PathUnescape is identical to QueryUnescape except that it does not
182// unescape '+' to ' ' (space).
183func PathUnescape(s string) (string, error) {
184	return unescape(s, encodePathSegment)
185}
186
187// unescape unescapes a string; the mode specifies
188// which section of the URL string is being unescaped.
189func unescape(s string, mode encoding) (string, error) {
190	// Count %, check that they're well-formed.
191	n := 0
192	hasPlus := false
193	for i := 0; i < len(s); {
194		switch s[i] {
195		case '%':
196			n++
197			if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
198				s = s[i:]
199				if len(s) > 3 {
200					s = s[:3]
201				}
202				return "", EscapeError(s)
203			}
204			// Per https://tools.ietf.org/html/rfc3986#page-21
205			// in the host component %-encoding can only be used
206			// for non-ASCII bytes.
207			// But https://tools.ietf.org/html/rfc6874#section-2
208			// introduces %25 being allowed to escape a percent sign
209			// in IPv6 scoped-address literals. Yay.
210			if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" {
211				return "", EscapeError(s[i : i+3])
212			}
213			if mode == encodeZone {
214				// RFC 6874 says basically "anything goes" for zone identifiers
215				// and that even non-ASCII can be redundantly escaped,
216				// but it seems prudent to restrict %-escaped bytes here to those
217				// that are valid host name bytes in their unescaped form.
218				// That is, you can use escaping in the zone identifier but not
219				// to introduce bytes you couldn't just write directly.
220				// But Windows puts spaces here! Yay.
221				v := unhex(s[i+1])<<4 | unhex(s[i+2])
222				if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, encodeHost) {
223					return "", EscapeError(s[i : i+3])
224				}
225			}
226			i += 3
227		case '+':
228			hasPlus = mode == encodeQueryComponent
229			i++
230		default:
231			if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) {
232				return "", InvalidHostError(s[i : i+1])
233			}
234			i++
235		}
236	}
237
238	if n == 0 && !hasPlus {
239		return s, nil
240	}
241
242	t := make([]byte, len(s)-2*n)
243	j := 0
244	for i := 0; i < len(s); {
245		switch s[i] {
246		case '%':
247			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
248			j++
249			i += 3
250		case '+':
251			if mode == encodeQueryComponent {
252				t[j] = ' '
253			} else {
254				t[j] = '+'
255			}
256			j++
257			i++
258		default:
259			t[j] = s[i]
260			j++
261			i++
262		}
263	}
264	return string(t), nil
265}
266
267// QueryEscape escapes the string so it can be safely placed
268// inside a URL query.
269func QueryEscape(s string) string {
270	return escape(s, encodeQueryComponent)
271}
272
273// PathEscape escapes the string so it can be safely placed
274// inside a URL path segment.
275func PathEscape(s string) string {
276	return escape(s, encodePathSegment)
277}
278
279func escape(s string, mode encoding) string {
280	spaceCount, hexCount := 0, 0
281	for i := 0; i < len(s); i++ {
282		c := s[i]
283		if shouldEscape(c, mode) {
284			if c == ' ' && mode == encodeQueryComponent {
285				spaceCount++
286			} else {
287				hexCount++
288			}
289		}
290	}
291
292	if spaceCount == 0 && hexCount == 0 {
293		return s
294	}
295
296	t := make([]byte, len(s)+2*hexCount)
297	j := 0
298	for i := 0; i < len(s); i++ {
299		switch c := s[i]; {
300		case c == ' ' && mode == encodeQueryComponent:
301			t[j] = '+'
302			j++
303		case shouldEscape(c, mode):
304			t[j] = '%'
305			t[j+1] = "0123456789ABCDEF"[c>>4]
306			t[j+2] = "0123456789ABCDEF"[c&15]
307			j += 3
308		default:
309			t[j] = s[i]
310			j++
311		}
312	}
313	return string(t)
314}
315
316// A URL represents a parsed URL (technically, a URI reference).
317//
318// The general form represented is:
319//
320//	[scheme:][//[userinfo@]host][/]path[?query][#fragment]
321//
322// URLs that do not start with a slash after the scheme are interpreted as:
323//
324//	scheme:opaque[?query][#fragment]
325//
326// Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.
327// A consequence is that it is impossible to tell which slashes in the Path were
328// slashes in the raw URL and which were %2f. This distinction is rarely important,
329// but when it is, code must not use Path directly.
330// The Parse function sets both Path and RawPath in the URL it returns,
331// and URL's String method uses RawPath if it is a valid encoding of Path,
332// by calling the EscapedPath method.
333type URL struct {
334	Scheme     string
335	Opaque     string    // encoded opaque data
336	User       *Userinfo // username and password information
337	Host       string    // host or host:port
338	Path       string    // path (relative paths may omit leading slash)
339	RawPath    string    // encoded path hint (see EscapedPath method)
340	ForceQuery bool      // append a query ('?') even if RawQuery is empty
341	RawQuery   string    // encoded query values, without '?'
342	Fragment   string    // fragment for references, without '#'
343}
344
345// User returns a Userinfo containing the provided username
346// and no password set.
347func User(username string) *Userinfo {
348	return &Userinfo{username, "", false}
349}
350
351// UserPassword returns a Userinfo containing the provided username
352// and password.
353//
354// This functionality should only be used with legacy web sites.
355// RFC 2396 warns that interpreting Userinfo this way
356// ``is NOT RECOMMENDED, because the passing of authentication
357// information in clear text (such as URI) has proven to be a
358// security risk in almost every case where it has been used.''
359func UserPassword(username, password string) *Userinfo {
360	return &Userinfo{username, password, true}
361}
362
363// The Userinfo type is an immutable encapsulation of username and
364// password details for a URL. An existing Userinfo value is guaranteed
365// to have a username set (potentially empty, as allowed by RFC 2396),
366// and optionally a password.
367type Userinfo struct {
368	username    string
369	password    string
370	passwordSet bool
371}
372
373// Username returns the username.
374func (u *Userinfo) Username() string {
375	if u == nil {
376		return ""
377	}
378	return u.username
379}
380
381// Password returns the password in case it is set, and whether it is set.
382func (u *Userinfo) Password() (string, bool) {
383	if u == nil {
384		return "", false
385	}
386	return u.password, u.passwordSet
387}
388
389// String returns the encoded userinfo information in the standard form
390// of "username[:password]".
391func (u *Userinfo) String() string {
392	if u == nil {
393		return ""
394	}
395	s := escape(u.username, encodeUserPassword)
396	if u.passwordSet {
397		s += ":" + escape(u.password, encodeUserPassword)
398	}
399	return s
400}
401
402// Maybe rawurl is of the form scheme:path.
403// (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
404// If so, return scheme, path; else return "", rawurl.
405func getscheme(rawurl string) (scheme, path string, err error) {
406	for i := 0; i < len(rawurl); i++ {
407		c := rawurl[i]
408		switch {
409		case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
410		// do nothing
411		case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
412			if i == 0 {
413				return "", rawurl, nil
414			}
415		case c == ':':
416			if i == 0 {
417				return "", "", errors.New("missing protocol scheme")
418			}
419			return rawurl[:i], rawurl[i+1:], nil
420		default:
421			// we have encountered an invalid character,
422			// so there is no valid scheme
423			return "", rawurl, nil
424		}
425	}
426	return "", rawurl, nil
427}
428
429// Maybe s is of the form t c u.
430// If so, return t, c u (or t, u if cutc == true).
431// If not, return s, "".
432func split(s string, c string, cutc bool) (string, string) {
433	i := strings.Index(s, c)
434	if i < 0 {
435		return s, ""
436	}
437	if cutc {
438		return s[:i], s[i+len(c):]
439	}
440	return s[:i], s[i:]
441}
442
443// Parse parses rawurl into a URL structure.
444//
445// The rawurl may be relative (a path, without a host) or absolute
446// (starting with a scheme). Trying to parse a hostname and path
447// without a scheme is invalid but may not necessarily return an
448// error, due to parsing ambiguities.
449func Parse(rawurl string) (*URL, error) {
450	// Cut off #frag
451	u, frag := split(rawurl, "#", true)
452	url, err := parse(u, false)
453	if err != nil {
454		return nil, &Error{"parse", u, err}
455	}
456	if frag == "" {
457		return url, nil
458	}
459	if url.Fragment, err = unescape(frag, encodeFragment); err != nil {
460		return nil, &Error{"parse", rawurl, err}
461	}
462	return url, nil
463}
464
465// ParseRequestURI parses rawurl into a URL structure. It assumes that
466// rawurl was received in an HTTP request, so the rawurl is interpreted
467// only as an absolute URI or an absolute path.
468// The string rawurl is assumed not to have a #fragment suffix.
469// (Web browsers strip #fragment before sending the URL to a web server.)
470func ParseRequestURI(rawurl string) (*URL, error) {
471	url, err := parse(rawurl, true)
472	if err != nil {
473		return nil, &Error{"parse", rawurl, err}
474	}
475	return url, nil
476}
477
478// parse parses a URL from a string in one of two contexts. If
479// viaRequest is true, the URL is assumed to have arrived via an HTTP request,
480// in which case only absolute URLs or path-absolute relative URLs are allowed.
481// If viaRequest is false, all forms of relative URLs are allowed.
482func parse(rawurl string, viaRequest bool) (*URL, error) {
483	var rest string
484	var err error
485
486	if rawurl == "" && viaRequest {
487		return nil, errors.New("empty url")
488	}
489	url := new(URL)
490
491	if rawurl == "*" {
492		url.Path = "*"
493		return url, nil
494	}
495
496	// Split off possible leading "http:", "mailto:", etc.
497	// Cannot contain escaped characters.
498	if url.Scheme, rest, err = getscheme(rawurl); err != nil {
499		return nil, err
500	}
501	url.Scheme = strings.ToLower(url.Scheme)
502
503	if strings.HasSuffix(rest, "?") && strings.Count(rest, "?") == 1 {
504		url.ForceQuery = true
505		rest = rest[:len(rest)-1]
506	} else {
507		rest, url.RawQuery = split(rest, "?", true)
508	}
509
510	if !strings.HasPrefix(rest, "/") {
511		if url.Scheme != "" {
512			// We consider rootless paths per RFC 3986 as opaque.
513			url.Opaque = rest
514			return url, nil
515		}
516		if viaRequest {
517			return nil, errors.New("invalid URI for request")
518		}
519
520		// Avoid confusion with malformed schemes, like cache_object:foo/bar.
521		// See golang.org/issue/16822.
522		//
523		// RFC 3986, §3.3:
524		// In addition, a URI reference (Section 4.1) may be a relative-path reference,
525		// in which case the first path segment cannot contain a colon (":") character.
526		colon := strings.Index(rest, ":")
527		slash := strings.Index(rest, "/")
528		if colon >= 0 && (slash < 0 || colon < slash) {
529			// First path segment has colon. Not allowed in relative URL.
530			return nil, errors.New("first path segment in URL cannot contain colon")
531		}
532	}
533
534	if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") {
535		var authority string
536		authority, rest = split(rest[2:], "/", false)
537		url.User, url.Host, err = parseAuthority(authority)
538		if err != nil {
539			return nil, err
540		}
541	}
542	// Set Path and, optionally, RawPath.
543	// RawPath is a hint of the encoding of Path. We don't want to set it if
544	// the default escaping of Path is equivalent, to help make sure that people
545	// don't rely on it in general.
546	if err := url.setPath(rest); err != nil {
547		return nil, err
548	}
549	return url, nil
550}
551
552func parseAuthority(authority string) (user *Userinfo, host string, err error) {
553	i := strings.LastIndex(authority, "@")
554	if i < 0 {
555		host, err = parseHost(authority)
556	} else {
557		host, err = parseHost(authority[i+1:])
558	}
559	if err != nil {
560		return nil, "", err
561	}
562	if i < 0 {
563		return nil, host, nil
564	}
565	userinfo := authority[:i]
566	if !validUserinfo(userinfo) {
567		return nil, "", errors.New("net/url: invalid userinfo")
568	}
569	if !strings.Contains(userinfo, ":") {
570		if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
571			return nil, "", err
572		}
573		user = User(userinfo)
574	} else {
575		username, password := split(userinfo, ":", true)
576		if username, err = unescape(username, encodeUserPassword); err != nil {
577			return nil, "", err
578		}
579		if password, err = unescape(password, encodeUserPassword); err != nil {
580			return nil, "", err
581		}
582		user = UserPassword(username, password)
583	}
584	return user, host, nil
585}
586
587// parseHost parses host as an authority without user
588// information. That is, as host[:port].
589func parseHost(host string) (string, error) {
590	if strings.HasPrefix(host, "[") {
591		// Parse an IP-Literal in RFC 3986 and RFC 6874.
592		// E.g., "[fe80::1]", "[fe80::1%25en0]", "[fe80::1]:80".
593		i := strings.LastIndex(host, "]")
594		if i < 0 {
595			return "", errors.New("missing ']' in host")
596		}
597		colonPort := host[i+1:]
598		if !validOptionalPort(colonPort) {
599			return "", fmt.Errorf("invalid port %q after host", colonPort)
600		}
601
602		// RFC 6874 defines that %25 (%-encoded percent) introduces
603		// the zone identifier, and the zone identifier can use basically
604		// any %-encoding it likes. That's different from the host, which
605		// can only %-encode non-ASCII bytes.
606		// We do impose some restrictions on the zone, to avoid stupidity
607		// like newlines.
608		zone := strings.Index(host[:i], "%25")
609		if zone >= 0 {
610			host1, err := unescape(host[:zone], encodeHost)
611			if err != nil {
612				return "", err
613			}
614			host2, err := unescape(host[zone:i], encodeZone)
615			if err != nil {
616				return "", err
617			}
618			host3, err := unescape(host[i:], encodeHost)
619			if err != nil {
620				return "", err
621			}
622			return host1 + host2 + host3, nil
623		}
624	}
625
626	var err error
627	if host, err = unescape(host, encodeHost); err != nil {
628		return "", err
629	}
630	return host, nil
631}
632
633// setPath sets the Path and RawPath fields of the URL based on the provided
634// escaped path p. It maintains the invariant that RawPath is only specified
635// when it differs from the default encoding of the path.
636// For example:
637// - setPath("/foo/bar")   will set Path="/foo/bar" and RawPath=""
638// - setPath("/foo%2fbar") will set Path="/foo/bar" and RawPath="/foo%2fbar"
639// setPath will return an error only if the provided path contains an invalid
640// escaping.
641func (u *URL) setPath(p string) error {
642	path, err := unescape(p, encodePath)
643	if err != nil {
644		return err
645	}
646	u.Path = path
647	if escp := escape(path, encodePath); p == escp {
648		// Default encoding is fine.
649		u.RawPath = ""
650	} else {
651		u.RawPath = p
652	}
653	return nil
654}
655
656// EscapedPath returns the escaped form of u.Path.
657// In general there are multiple possible escaped forms of any path.
658// EscapedPath returns u.RawPath when it is a valid escaping of u.Path.
659// Otherwise EscapedPath ignores u.RawPath and computes an escaped
660// form on its own.
661// The String and RequestURI methods use EscapedPath to construct
662// their results.
663// In general, code should call EscapedPath instead of
664// reading u.RawPath directly.
665func (u *URL) EscapedPath() string {
666	if u.RawPath != "" && validEncodedPath(u.RawPath) {
667		p, err := unescape(u.RawPath, encodePath)
668		if err == nil && p == u.Path {
669			return u.RawPath
670		}
671	}
672	if u.Path == "*" {
673		return "*" // don't escape (Issue 11202)
674	}
675	return escape(u.Path, encodePath)
676}
677
678// validEncodedPath reports whether s is a valid encoded path.
679// It must not contain any bytes that require escaping during path encoding.
680func validEncodedPath(s string) bool {
681	for i := 0; i < len(s); i++ {
682		// RFC 3986, Appendix A.
683		// pchar = unreserved / pct-encoded / sub-delims / ":" / "@".
684		// shouldEscape is not quite compliant with the RFC,
685		// so we check the sub-delims ourselves and let
686		// shouldEscape handle the others.
687		switch s[i] {
688		case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '@':
689			// ok
690		case '[', ']':
691			// ok - not specified in RFC 3986 but left alone by modern browsers
692		case '%':
693			// ok - percent encoded, will decode
694		default:
695			if shouldEscape(s[i], encodePath) {
696				return false
697			}
698		}
699	}
700	return true
701}
702
703// validOptionalPort reports whether port is either an empty string
704// or matches /^:\d*$/
705func validOptionalPort(port string) bool {
706	if port == "" {
707		return true
708	}
709	if port[0] != ':' {
710		return false
711	}
712	for _, b := range port[1:] {
713		if b < '0' || b > '9' {
714			return false
715		}
716	}
717	return true
718}
719
720// String reassembles the URL into a valid URL string.
721// The general form of the result is one of:
722//
723//	scheme:opaque?query#fragment
724//	scheme://userinfo@host/path?query#fragment
725//
726// If u.Opaque is non-empty, String uses the first form;
727// otherwise it uses the second form.
728// To obtain the path, String uses u.EscapedPath().
729//
730// In the second form, the following rules apply:
731//	- if u.Scheme is empty, scheme: is omitted.
732//	- if u.User is nil, userinfo@ is omitted.
733//	- if u.Host is empty, host/ is omitted.
734//	- if u.Scheme and u.Host are empty and u.User is nil,
735//	   the entire scheme://userinfo@host/ is omitted.
736//	- if u.Host is non-empty and u.Path begins with a /,
737//	   the form host/path does not add its own /.
738//	- if u.RawQuery is empty, ?query is omitted.
739//	- if u.Fragment is empty, #fragment is omitted.
740func (u *URL) String() string {
741	var buf bytes.Buffer
742	if u.Scheme != "" {
743		buf.WriteString(u.Scheme)
744		buf.WriteByte(':')
745	}
746	if u.Opaque != "" {
747		buf.WriteString(u.Opaque)
748	} else {
749		if u.Scheme != "" || u.Host != "" || u.User != nil {
750			if u.Host != "" || u.Path != "" || u.User != nil {
751				buf.WriteString("//")
752			}
753			if ui := u.User; ui != nil {
754				buf.WriteString(ui.String())
755				buf.WriteByte('@')
756			}
757			if h := u.Host; h != "" {
758				buf.WriteString(escape(h, encodeHost))
759			}
760		}
761		path := u.EscapedPath()
762		if path != "" && path[0] != '/' && u.Host != "" {
763			buf.WriteByte('/')
764		}
765		if buf.Len() == 0 {
766			// RFC 3986 §4.2
767			// A path segment that contains a colon character (e.g., "this:that")
768			// cannot be used as the first segment of a relative-path reference, as
769			// it would be mistaken for a scheme name. Such a segment must be
770			// preceded by a dot-segment (e.g., "./this:that") to make a relative-
771			// path reference.
772			if i := strings.IndexByte(path, ':'); i > -1 && strings.IndexByte(path[:i], '/') == -1 {
773				buf.WriteString("./")
774			}
775		}
776		buf.WriteString(path)
777	}
778	if u.ForceQuery || u.RawQuery != "" {
779		buf.WriteByte('?')
780		buf.WriteString(u.RawQuery)
781	}
782	if u.Fragment != "" {
783		buf.WriteByte('#')
784		buf.WriteString(escape(u.Fragment, encodeFragment))
785	}
786	return buf.String()
787}
788
789// Values maps a string key to a list of values.
790// It is typically used for query parameters and form values.
791// Unlike in the http.Header map, the keys in a Values map
792// are case-sensitive.
793type Values map[string][]string
794
795// Get gets the first value associated with the given key.
796// If there are no values associated with the key, Get returns
797// the empty string. To access multiple values, use the map
798// directly.
799func (v Values) Get(key string) string {
800	if v == nil {
801		return ""
802	}
803	vs := v[key]
804	if len(vs) == 0 {
805		return ""
806	}
807	return vs[0]
808}
809
810// Set sets the key to value. It replaces any existing
811// values.
812func (v Values) Set(key, value string) {
813	v[key] = []string{value}
814}
815
816// Add adds the value to key. It appends to any existing
817// values associated with key.
818func (v Values) Add(key, value string) {
819	v[key] = append(v[key], value)
820}
821
822// Del deletes the values associated with key.
823func (v Values) Del(key string) {
824	delete(v, key)
825}
826
827// ParseQuery parses the URL-encoded query string and returns
828// a map listing the values specified for each key.
829// ParseQuery always returns a non-nil map containing all the
830// valid query parameters found; err describes the first decoding error
831// encountered, if any.
832//
833// Query is expected to be a list of key=value settings separated by
834// ampersands or semicolons. A setting without an equals sign is
835// interpreted as a key set to an empty value.
836func ParseQuery(query string) (Values, error) {
837	m := make(Values)
838	err := parseQuery(m, query)
839	return m, err
840}
841
842func parseQuery(m Values, query string) (err error) {
843	for query != "" {
844		key := query
845		if i := strings.IndexAny(key, "&;"); i >= 0 {
846			key, query = key[:i], key[i+1:]
847		} else {
848			query = ""
849		}
850		if key == "" {
851			continue
852		}
853		value := ""
854		if i := strings.Index(key, "="); i >= 0 {
855			key, value = key[:i], key[i+1:]
856		}
857		key, err1 := QueryUnescape(key)
858		if err1 != nil {
859			if err == nil {
860				err = err1
861			}
862			continue
863		}
864		value, err1 = QueryUnescape(value)
865		if err1 != nil {
866			if err == nil {
867				err = err1
868			}
869			continue
870		}
871		m[key] = append(m[key], value)
872	}
873	return err
874}
875
876// Encode encodes the values into ``URL encoded'' form
877// ("bar=baz&foo=quux") sorted by key.
878func (v Values) Encode() string {
879	if v == nil {
880		return ""
881	}
882	var buf bytes.Buffer
883	keys := make([]string, 0, len(v))
884	for k := range v {
885		keys = append(keys, k)
886	}
887	sort.Strings(keys)
888	for _, k := range keys {
889		vs := v[k]
890		prefix := QueryEscape(k) + "="
891		for _, v := range vs {
892			if buf.Len() > 0 {
893				buf.WriteByte('&')
894			}
895			buf.WriteString(prefix)
896			buf.WriteString(QueryEscape(v))
897		}
898	}
899	return buf.String()
900}
901
902// resolvePath applies special path segments from refs and applies
903// them to base, per RFC 3986.
904func resolvePath(base, ref string) string {
905	var full string
906	if ref == "" {
907		full = base
908	} else if ref[0] != '/' {
909		i := strings.LastIndex(base, "/")
910		full = base[:i+1] + ref
911	} else {
912		full = ref
913	}
914	if full == "" {
915		return ""
916	}
917	var dst []string
918	src := strings.Split(full, "/")
919	for _, elem := range src {
920		switch elem {
921		case ".":
922			// drop
923		case "..":
924			if len(dst) > 0 {
925				dst = dst[:len(dst)-1]
926			}
927		default:
928			dst = append(dst, elem)
929		}
930	}
931	if last := src[len(src)-1]; last == "." || last == ".." {
932		// Add final slash to the joined path.
933		dst = append(dst, "")
934	}
935	return "/" + strings.TrimPrefix(strings.Join(dst, "/"), "/")
936}
937
938// IsAbs reports whether the URL is absolute.
939// Absolute means that it has a non-empty scheme.
940func (u *URL) IsAbs() bool {
941	return u.Scheme != ""
942}
943
944// Parse parses a URL in the context of the receiver. The provided URL
945// may be relative or absolute. Parse returns nil, err on parse
946// failure, otherwise its return value is the same as ResolveReference.
947func (u *URL) Parse(ref string) (*URL, error) {
948	refurl, err := Parse(ref)
949	if err != nil {
950		return nil, err
951	}
952	return u.ResolveReference(refurl), nil
953}
954
955// ResolveReference resolves a URI reference to an absolute URI from
956// an absolute base URI, per RFC 3986 Section 5.2.  The URI reference
957// may be relative or absolute. ResolveReference always returns a new
958// URL instance, even if the returned URL is identical to either the
959// base or reference. If ref is an absolute URL, then ResolveReference
960// ignores base and returns a copy of ref.
961func (u *URL) ResolveReference(ref *URL) *URL {
962	url := *ref
963	if ref.Scheme == "" {
964		url.Scheme = u.Scheme
965	}
966	if ref.Scheme != "" || ref.Host != "" || ref.User != nil {
967		// The "absoluteURI" or "net_path" cases.
968		// We can ignore the error from setPath since we know we provided a
969		// validly-escaped path.
970		url.setPath(resolvePath(ref.EscapedPath(), ""))
971		return &url
972	}
973	if ref.Opaque != "" {
974		url.User = nil
975		url.Host = ""
976		url.Path = ""
977		return &url
978	}
979	if ref.Path == "" && ref.RawQuery == "" {
980		url.RawQuery = u.RawQuery
981		if ref.Fragment == "" {
982			url.Fragment = u.Fragment
983		}
984	}
985	// The "abs_path" or "rel_path" cases.
986	url.Host = u.Host
987	url.User = u.User
988	url.setPath(resolvePath(u.EscapedPath(), ref.EscapedPath()))
989	return &url
990}
991
992// Query parses RawQuery and returns the corresponding values.
993// It silently discards malformed value pairs.
994// To check errors use ParseQuery.
995func (u *URL) Query() Values {
996	v, _ := ParseQuery(u.RawQuery)
997	return v
998}
999
1000// RequestURI returns the encoded path?query or opaque?query
1001// string that would be used in an HTTP request for u.
1002func (u *URL) RequestURI() string {
1003	result := u.Opaque
1004	if result == "" {
1005		result = u.EscapedPath()
1006		if result == "" {
1007			result = "/"
1008		}
1009	} else {
1010		if strings.HasPrefix(result, "//") {
1011			result = u.Scheme + ":" + result
1012		}
1013	}
1014	if u.ForceQuery || u.RawQuery != "" {
1015		result += "?" + u.RawQuery
1016	}
1017	return result
1018}
1019
1020// Hostname returns u.Host, without any port number.
1021//
1022// If Host is an IPv6 literal with a port number, Hostname returns the
1023// IPv6 literal without the square brackets. IPv6 literals may include
1024// a zone identifier.
1025func (u *URL) Hostname() string {
1026	return stripPort(u.Host)
1027}
1028
1029// Port returns the port part of u.Host, without the leading colon.
1030// If u.Host doesn't contain a port, Port returns an empty string.
1031func (u *URL) Port() string {
1032	return portOnly(u.Host)
1033}
1034
1035func stripPort(hostport string) string {
1036	colon := strings.IndexByte(hostport, ':')
1037	if colon == -1 {
1038		return hostport
1039	}
1040	if i := strings.IndexByte(hostport, ']'); i != -1 {
1041		return strings.TrimPrefix(hostport[:i], "[")
1042	}
1043	return hostport[:colon]
1044}
1045
1046func portOnly(hostport string) string {
1047	colon := strings.IndexByte(hostport, ':')
1048	if colon == -1 {
1049		return ""
1050	}
1051	if i := strings.Index(hostport, "]:"); i != -1 {
1052		return hostport[i+len("]:"):]
1053	}
1054	if strings.Contains(hostport, "]") {
1055		return ""
1056	}
1057	return hostport[colon+len(":"):]
1058}
1059
1060// Marshaling interface implementations.
1061// Would like to implement MarshalText/UnmarshalText but that will change the JSON representation of URLs.
1062
1063func (u *URL) MarshalBinary() (text []byte, err error) {
1064	return []byte(u.String()), nil
1065}
1066
1067func (u *URL) UnmarshalBinary(text []byte) error {
1068	u1, err := Parse(string(text))
1069	if err != nil {
1070		return err
1071	}
1072	*u = *u1
1073	return nil
1074}
1075
1076// validUserinfo reports whether s is a valid userinfo string per RFC 3986
1077// Section 3.2.1:
1078//     userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
1079//     unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
1080//     sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
1081//                   / "*" / "+" / "," / ";" / "="
1082//
1083// It doesn't validate pct-encoded. The caller does that via func unescape.
1084func validUserinfo(s string) bool {
1085	for _, r := range s {
1086		if 'A' <= r && r <= 'Z' {
1087			continue
1088		}
1089		if 'a' <= r && r <= 'z' {
1090			continue
1091		}
1092		if '0' <= r && r <= '9' {
1093			continue
1094		}
1095		switch r {
1096		case '-', '.', '_', ':', '~', '!', '$', '&', '\'',
1097			'(', ')', '*', '+', ',', ';', '=', '%', '@':
1098			continue
1099		default:
1100			return false
1101		}
1102	}
1103	return true
1104}
1105