1// Copyright 2019 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package proxy 16 17import ( 18 "bytes" 19 "io" 20 "io/ioutil" 21 "mime" 22 "mime/multipart" 23 "net/http" 24 "net/url" 25 "regexp" 26 "strings" 27) 28 29// A Converter converts HTTP requests and responses to the Request and Response types 30// of this package, while removing or redacting information. 31type Converter struct { 32 // These all apply to both headers and trailers. 33 ClearHeaders []tRegexp // replace matching headers with "CLEARED" 34 RemoveRequestHeaders []tRegexp // remove matching headers in requests 35 RemoveResponseHeaders []tRegexp // remove matching headers in responses 36 ClearParams []tRegexp // replace matching query params with "CLEARED" 37 RemoveParams []tRegexp // remove matching query params 38} 39 40// A regexp that can be marshaled to and from text. 41type tRegexp struct { 42 *regexp.Regexp 43} 44 45func (r tRegexp) MarshalText() ([]byte, error) { 46 return []byte(r.String()), nil 47} 48 49func (r *tRegexp) UnmarshalText(b []byte) error { 50 var err error 51 r.Regexp, err = regexp.Compile(string(b)) 52 return err 53} 54 55func (c *Converter) registerRemoveRequestHeaders(pat string) { 56 c.RemoveRequestHeaders = append(c.RemoveRequestHeaders, pattern(pat)) 57} 58 59func (c *Converter) registerClearHeaders(pat string) { 60 c.ClearHeaders = append(c.ClearHeaders, pattern(pat)) 61} 62 63func (c *Converter) registerRemoveParams(pat string) { 64 c.RemoveParams = append(c.RemoveParams, pattern(pat)) 65} 66 67func (c *Converter) registerClearParams(pat string) { 68 c.ClearParams = append(c.ClearParams, pattern(pat)) 69} 70 71var ( 72 defaultRemoveRequestHeaders = []string{ 73 "Authorization", // not only is it secret, but it is probably missing on replay 74 "Proxy-Authorization", 75 "Connection", 76 "Content-Type", // because it may contain a random multipart boundary 77 "Date", 78 "Host", 79 "Transfer-Encoding", 80 "Via", 81 "X-Forwarded-*", 82 // Google-specific 83 "X-Cloud-Trace-Context", // OpenCensus traces have a random ID 84 "X-Goog-Api-Client", // can differ for, e.g., different Go versions 85 } 86 87 defaultRemoveBothHeaders = []string{ 88 // Google-specific 89 // GFEs scrub X-Google- and X-GFE- headers from requests and responses. 90 // Drop them from recordings made by users inside Google. 91 // http://g3doc/gfe/g3doc/gfe3/design/http_filters/google_header_filter 92 // (internal Google documentation). 93 "X-Google-*", 94 "X-Gfe-*", 95 } 96 97 defaultClearHeaders = []string{ 98 // Google-specific 99 // Used by Cloud Storage for customer-supplied encryption. 100 "X-Goog-*Encryption-Key", 101 } 102) 103 104func defaultConverter() *Converter { 105 c := &Converter{} 106 for _, h := range defaultClearHeaders { 107 c.registerClearHeaders(h) 108 } 109 for _, h := range defaultRemoveRequestHeaders { 110 c.registerRemoveRequestHeaders(h) 111 } 112 for _, h := range defaultRemoveBothHeaders { 113 c.registerRemoveRequestHeaders(h) 114 c.RemoveResponseHeaders = append(c.RemoveResponseHeaders, pattern(h)) 115 } 116 return c 117} 118 119// Convert a pattern into a regexp. 120// A pattern is like a literal regexp anchored on both ends, with only one 121// non-literal character: "*", which matches zero or more characters. 122func pattern(p string) tRegexp { 123 q := regexp.QuoteMeta(p) 124 q = "^" + strings.Replace(q, `\*`, `.*`, -1) + "$" 125 // q must be a legal regexp. 126 return tRegexp{regexp.MustCompile(q)} 127} 128 129func (c *Converter) convertRequest(req *http.Request) (*Request, error) { 130 body, err := snapshotBody(&req.Body) 131 if err != nil { 132 return nil, err 133 } 134 // If the body is empty, set it to nil to make sure the proxy sends a 135 // Content-Length header. 136 if len(body) == 0 { 137 req.Body = nil 138 } 139 mediaType, parts, err := parseRequestBody(req.Header.Get("Content-Type"), body) 140 if err != nil { 141 return nil, err 142 } 143 url2 := *req.URL 144 url2.RawQuery = scrubQuery(url2.RawQuery, c.ClearParams, c.RemoveParams) 145 return &Request{ 146 Method: req.Method, 147 URL: url2.String(), 148 Header: scrubHeaders(req.Header, c.ClearHeaders, c.RemoveRequestHeaders), 149 MediaType: mediaType, 150 BodyParts: parts, 151 Trailer: scrubHeaders(req.Trailer, c.ClearHeaders, c.RemoveRequestHeaders), 152 }, nil 153} 154 155// parseRequestBody parses the Content-Type header, reads the body, and splits it into 156// parts if necessary. It returns the media type and the body parts. 157func parseRequestBody(contentType string, body []byte) (string, [][]byte, error) { 158 if contentType == "" { 159 // No content-type header. Treat the body as a single part. 160 return "", [][]byte{body}, nil 161 } 162 mediaType, params, err := mime.ParseMediaType(contentType) 163 if err != nil { 164 return "", nil, err 165 } 166 var parts [][]byte 167 if strings.HasPrefix(mediaType, "multipart/") { 168 mr := multipart.NewReader(bytes.NewReader(body), params["boundary"]) 169 for { 170 p, err := mr.NextPart() 171 if err == io.EOF { 172 break 173 } 174 if err != nil { 175 return "", nil, err 176 } 177 part, err := ioutil.ReadAll(p) 178 if err != nil { 179 return "", nil, err 180 } 181 // TODO(jba): care about part headers? 182 parts = append(parts, part) 183 } 184 } else { 185 parts = [][]byte{body} 186 } 187 return mediaType, parts, nil 188} 189 190func (c *Converter) convertResponse(res *http.Response) (*Response, error) { 191 data, err := snapshotBody(&res.Body) 192 if err != nil { 193 return nil, err 194 } 195 return &Response{ 196 StatusCode: res.StatusCode, 197 Proto: res.Proto, 198 ProtoMajor: res.ProtoMajor, 199 ProtoMinor: res.ProtoMinor, 200 Header: scrubHeaders(res.Header, c.ClearHeaders, c.RemoveResponseHeaders), 201 Body: data, 202 Trailer: scrubHeaders(res.Trailer, c.ClearHeaders, c.RemoveResponseHeaders), 203 }, nil 204} 205 206func snapshotBody(body *io.ReadCloser) ([]byte, error) { 207 data, err := ioutil.ReadAll(*body) 208 if err != nil { 209 return nil, err 210 } 211 (*body).Close() 212 *body = ioutil.NopCloser(bytes.NewReader(data)) 213 return data, nil 214} 215 216// Copy headers, clearing some and removing others. 217func scrubHeaders(hs http.Header, clear, remove []tRegexp) http.Header { 218 rh := http.Header{} 219 for k, v := range hs { 220 switch { 221 case match(k, clear): 222 rh.Set(k, "CLEARED") 223 case match(k, remove): 224 // skip 225 default: 226 rh[k] = v 227 } 228 } 229 return rh 230} 231 232// Copy the query string, clearing some query params and removing others. 233// Preserve the order of the string. 234func scrubQuery(query string, clear, remove []tRegexp) string { 235 // We can't use url.ParseQuery because it doesn't preserve order. 236 var buf bytes.Buffer 237 for { 238 if i := strings.IndexAny(query, "&;"); i >= 0 { 239 scrubParam(&buf, query[:i], query[i], clear, remove) 240 query = query[i+1:] 241 } else { 242 scrubParam(&buf, query, 0, clear, remove) 243 break 244 } 245 } 246 s := buf.String() 247 if strings.HasSuffix(s, "&") { 248 return s[:len(s)-1] 249 } 250 return s 251} 252 253func scrubParam(buf *bytes.Buffer, param string, sep byte, clear, remove []tRegexp) { 254 if param == "" { 255 return 256 } 257 key := param 258 value := "" 259 if i := strings.Index(param, "="); i >= 0 { 260 key, value = key[:i], key[i+1:] 261 } 262 ukey, err := url.QueryUnescape(key) 263 // If the key is bad, just pass it and the value through. 264 if err != nil { 265 buf.WriteString(param) 266 if sep != 0 { 267 buf.WriteByte(sep) 268 } 269 return 270 } 271 if match(ukey, remove) { 272 return 273 } 274 if match(ukey, clear) && value != "" { 275 value = "CLEARED" 276 } 277 buf.WriteString(key) 278 buf.WriteByte('=') 279 buf.WriteString(value) 280 if sep != 0 { 281 buf.WriteByte(sep) 282 } 283} 284 285func match(s string, res []tRegexp) bool { 286 for _, re := range res { 287 if re.MatchString(s) { 288 return true 289 } 290 } 291 return false 292} 293