1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package urlesc implements query escaping as per RFC 3986. 6// It contains some parts of the net/url package, modified so as to allow 7// some reserved characters incorrectly escaped by net/url. 8// See https://github.com/golang/go/issues/5684 9package urlesc 10 11import ( 12 "bytes" 13 "net/url" 14 "strings" 15) 16 17type encoding int 18 19const ( 20 encodePath encoding = 1 + iota 21 encodeUserPassword 22 encodeQueryComponent 23 encodeFragment 24) 25 26// Return true if the specified character should be escaped when 27// appearing in a URL string, according to RFC 3986. 28func shouldEscape(c byte, mode encoding) bool { 29 // §2.3 Unreserved characters (alphanum) 30 if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { 31 return false 32 } 33 34 switch c { 35 case '-', '.', '_', '~': // §2.3 Unreserved characters (mark) 36 return false 37 38 // §2.2 Reserved characters (reserved) 39 case ':', '/', '?', '#', '[', ']', '@', // gen-delims 40 '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': // sub-delims 41 // Different sections of the URL allow a few of 42 // the reserved characters to appear unescaped. 43 switch mode { 44 case encodePath: // §3.3 45 // The RFC allows sub-delims and : @. 46 // '/', '[' and ']' can be used to assign meaning to individual path 47 // segments. This package only manipulates the path as a whole, 48 // so we allow those as well. That leaves only ? and # to escape. 49 return c == '?' || c == '#' 50 51 case encodeUserPassword: // §3.2.1 52 // The RFC allows : and sub-delims in 53 // userinfo. The parsing of userinfo treats ':' as special so we must escape 54 // all the gen-delims. 55 return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' || c == ']' || c == '@' 56 57 case encodeQueryComponent: // §3.4 58 // The RFC allows / and ?. 59 return c != '/' && c != '?' 60 61 case encodeFragment: // §4.1 62 // The RFC text is silent but the grammar allows 63 // everything, so escape nothing but # 64 return c == '#' 65 } 66 } 67 68 // Everything else must be escaped. 69 return true 70} 71 72// QueryEscape escapes the string so it can be safely placed 73// inside a URL query. 74func QueryEscape(s string) string { 75 return escape(s, encodeQueryComponent) 76} 77 78func escape(s string, mode encoding) string { 79 spaceCount, hexCount := 0, 0 80 for i := 0; i < len(s); i++ { 81 c := s[i] 82 if shouldEscape(c, mode) { 83 if c == ' ' && mode == encodeQueryComponent { 84 spaceCount++ 85 } else { 86 hexCount++ 87 } 88 } 89 } 90 91 if spaceCount == 0 && hexCount == 0 { 92 return s 93 } 94 95 t := make([]byte, len(s)+2*hexCount) 96 j := 0 97 for i := 0; i < len(s); i++ { 98 switch c := s[i]; { 99 case c == ' ' && mode == encodeQueryComponent: 100 t[j] = '+' 101 j++ 102 case shouldEscape(c, mode): 103 t[j] = '%' 104 t[j+1] = "0123456789ABCDEF"[c>>4] 105 t[j+2] = "0123456789ABCDEF"[c&15] 106 j += 3 107 default: 108 t[j] = s[i] 109 j++ 110 } 111 } 112 return string(t) 113} 114 115var uiReplacer = strings.NewReplacer( 116 "%21", "!", 117 "%27", "'", 118 "%28", "(", 119 "%29", ")", 120 "%2A", "*", 121) 122 123// unescapeUserinfo unescapes some characters that need not to be escaped as per RFC3986. 124func unescapeUserinfo(s string) string { 125 return uiReplacer.Replace(s) 126} 127 128// Escape reassembles the URL into a valid URL string. 129// The general form of the result is one of: 130// 131// scheme:opaque 132// scheme://userinfo@host/path?query#fragment 133// 134// If u.Opaque is non-empty, String uses the first form; 135// otherwise it uses the second form. 136// 137// In the second form, the following rules apply: 138// - if u.Scheme is empty, scheme: is omitted. 139// - if u.User is nil, userinfo@ is omitted. 140// - if u.Host is empty, host/ is omitted. 141// - if u.Scheme and u.Host are empty and u.User is nil, 142// the entire scheme://userinfo@host/ is omitted. 143// - if u.Host is non-empty and u.Path begins with a /, 144// the form host/path does not add its own /. 145// - if u.RawQuery is empty, ?query is omitted. 146// - if u.Fragment is empty, #fragment is omitted. 147func Escape(u *url.URL) string { 148 var buf bytes.Buffer 149 if u.Scheme != "" { 150 buf.WriteString(u.Scheme) 151 buf.WriteByte(':') 152 } 153 if u.Opaque != "" { 154 buf.WriteString(u.Opaque) 155 } else { 156 if u.Scheme != "" || u.Host != "" || u.User != nil { 157 buf.WriteString("//") 158 if ui := u.User; ui != nil { 159 buf.WriteString(unescapeUserinfo(ui.String())) 160 buf.WriteByte('@') 161 } 162 if h := u.Host; h != "" { 163 buf.WriteString(h) 164 } 165 } 166 if u.Path != "" && u.Path[0] != '/' && u.Host != "" { 167 buf.WriteByte('/') 168 } 169 buf.WriteString(escape(u.Path, encodePath)) 170 } 171 if u.RawQuery != "" { 172 buf.WriteByte('?') 173 buf.WriteString(u.RawQuery) 174 } 175 if u.Fragment != "" { 176 buf.WriteByte('#') 177 buf.WriteString(escape(u.Fragment, encodeFragment)) 178 } 179 return buf.String() 180} 181