1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package proxy
16
17import (
18	"bytes"
19	"io"
20	"io/ioutil"
21	"mime"
22	"mime/multipart"
23	"net/http"
24	"net/url"
25	"regexp"
26	"strings"
27)
28
29// A Converter converts HTTP requests and responses to the Request and Response types
30// of this package, while removing or redacting information.
31type Converter struct {
32	// These all apply to both headers and trailers.
33	ClearHeaders          []tRegexp // replace matching headers with "CLEARED"
34	RemoveRequestHeaders  []tRegexp // remove matching headers in requests
35	RemoveResponseHeaders []tRegexp // remove matching headers in responses
36	ClearParams           []tRegexp // replace matching query params with "CLEARED"
37	RemoveParams          []tRegexp // remove matching query params
38}
39
40// A regexp that can be marshaled to and from text.
41type tRegexp struct {
42	*regexp.Regexp
43}
44
45func (r tRegexp) MarshalText() ([]byte, error) {
46	return []byte(r.String()), nil
47}
48
49func (r *tRegexp) UnmarshalText(b []byte) error {
50	var err error
51	r.Regexp, err = regexp.Compile(string(b))
52	return err
53}
54
55func (c *Converter) registerRemoveRequestHeaders(pat string) {
56	c.RemoveRequestHeaders = append(c.RemoveRequestHeaders, pattern(pat))
57}
58
59func (c *Converter) registerClearHeaders(pat string) {
60	c.ClearHeaders = append(c.ClearHeaders, pattern(pat))
61}
62
63func (c *Converter) registerRemoveParams(pat string) {
64	c.RemoveParams = append(c.RemoveParams, pattern(pat))
65}
66
67func (c *Converter) registerClearParams(pat string) {
68	c.ClearParams = append(c.ClearParams, pattern(pat))
69}
70
71var (
72	defaultRemoveRequestHeaders = []string{
73		"Authorization", // not only is it secret, but it is probably missing on replay
74		"Proxy-Authorization",
75		"Connection",
76		"Content-Type", // because it may contain a random multipart boundary
77		"Date",
78		"Host",
79		"Transfer-Encoding",
80		"Via",
81		"X-Forwarded-*",
82		// Google-specific
83		"X-Cloud-Trace-Context", // OpenCensus traces have a random ID
84		"X-Goog-Api-Client",     // can differ for, e.g., different Go versions
85	}
86
87	defaultRemoveBothHeaders = []string{
88		// Google-specific
89		// GFEs scrub X-Google- and X-GFE- headers from requests and responses.
90		// Drop them from recordings made by users inside Google.
91		// http://g3doc/gfe/g3doc/gfe3/design/http_filters/google_header_filter
92		// (internal Google documentation).
93		"X-Google-*",
94		"X-Gfe-*",
95	}
96
97	defaultClearHeaders = []string{
98		// Google-specific
99		// Used by Cloud Storage for customer-supplied encryption.
100		"X-Goog-*Encryption-Key",
101	}
102)
103
104func defaultConverter() *Converter {
105	c := &Converter{}
106	for _, h := range defaultClearHeaders {
107		c.registerClearHeaders(h)
108	}
109	for _, h := range defaultRemoveRequestHeaders {
110		c.registerRemoveRequestHeaders(h)
111	}
112	for _, h := range defaultRemoveBothHeaders {
113		c.registerRemoveRequestHeaders(h)
114		c.RemoveResponseHeaders = append(c.RemoveResponseHeaders, pattern(h))
115	}
116	return c
117}
118
119// Convert a pattern into a regexp.
120// A pattern is like a literal regexp anchored on both ends, with only one
121// non-literal character: "*", which matches zero or more characters.
122func pattern(p string) tRegexp {
123	q := regexp.QuoteMeta(p)
124	q = "^" + strings.Replace(q, `\*`, `.*`, -1) + "$"
125	// q must be a legal regexp.
126	return tRegexp{regexp.MustCompile(q)}
127}
128
129func (c *Converter) convertRequest(req *http.Request) (*Request, error) {
130	body, err := snapshotBody(&req.Body)
131	if err != nil {
132		return nil, err
133	}
134	// If the body is empty, set it to nil to make sure the proxy sends a
135	// Content-Length header.
136	if len(body) == 0 {
137		req.Body = nil
138	}
139	mediaType, parts, err := parseRequestBody(req.Header.Get("Content-Type"), body)
140	if err != nil {
141		return nil, err
142	}
143	url2 := *req.URL
144	url2.RawQuery = scrubQuery(url2.RawQuery, c.ClearParams, c.RemoveParams)
145	return &Request{
146		Method:    req.Method,
147		URL:       url2.String(),
148		Header:    scrubHeaders(req.Header, c.ClearHeaders, c.RemoveRequestHeaders),
149		MediaType: mediaType,
150		BodyParts: parts,
151		Trailer:   scrubHeaders(req.Trailer, c.ClearHeaders, c.RemoveRequestHeaders),
152	}, nil
153}
154
155// parseRequestBody parses the Content-Type header, reads the body, and splits it into
156// parts if necessary. It returns the media type and the body parts.
157func parseRequestBody(contentType string, body []byte) (string, [][]byte, error) {
158	if contentType == "" {
159		// No content-type header. Treat the body as a single part.
160		return "", [][]byte{body}, nil
161	}
162	mediaType, params, err := mime.ParseMediaType(contentType)
163	if err != nil {
164		return "", nil, err
165	}
166	var parts [][]byte
167	if strings.HasPrefix(mediaType, "multipart/") {
168		mr := multipart.NewReader(bytes.NewReader(body), params["boundary"])
169		for {
170			p, err := mr.NextPart()
171			if err == io.EOF {
172				break
173			}
174			if err != nil {
175				return "", nil, err
176			}
177			part, err := ioutil.ReadAll(p)
178			if err != nil {
179				return "", nil, err
180			}
181			// TODO(jba): care about part headers?
182			parts = append(parts, part)
183		}
184	} else {
185		parts = [][]byte{body}
186	}
187	return mediaType, parts, nil
188}
189
190func (c *Converter) convertResponse(res *http.Response) (*Response, error) {
191	data, err := snapshotBody(&res.Body)
192	if err != nil {
193		return nil, err
194	}
195	return &Response{
196		StatusCode: res.StatusCode,
197		Proto:      res.Proto,
198		ProtoMajor: res.ProtoMajor,
199		ProtoMinor: res.ProtoMinor,
200		Header:     scrubHeaders(res.Header, c.ClearHeaders, c.RemoveResponseHeaders),
201		Body:       data,
202		Trailer:    scrubHeaders(res.Trailer, c.ClearHeaders, c.RemoveResponseHeaders),
203	}, nil
204}
205
206func snapshotBody(body *io.ReadCloser) ([]byte, error) {
207	data, err := ioutil.ReadAll(*body)
208	if err != nil {
209		return nil, err
210	}
211	(*body).Close()
212	*body = ioutil.NopCloser(bytes.NewReader(data))
213	return data, nil
214}
215
216// Copy headers, clearing some and removing others.
217func scrubHeaders(hs http.Header, clear, remove []tRegexp) http.Header {
218	rh := http.Header{}
219	for k, v := range hs {
220		switch {
221		case match(k, clear):
222			rh.Set(k, "CLEARED")
223		case match(k, remove):
224			// skip
225		default:
226			rh[k] = v
227		}
228	}
229	return rh
230}
231
232// Copy the query string, clearing some query params and removing others.
233// Preserve the order of the string.
234func scrubQuery(query string, clear, remove []tRegexp) string {
235	// We can't use url.ParseQuery because it doesn't preserve order.
236	var buf bytes.Buffer
237	for {
238		if i := strings.IndexAny(query, "&;"); i >= 0 {
239			scrubParam(&buf, query[:i], query[i], clear, remove)
240			query = query[i+1:]
241		} else {
242			scrubParam(&buf, query, 0, clear, remove)
243			break
244		}
245	}
246	s := buf.String()
247	if strings.HasSuffix(s, "&") {
248		return s[:len(s)-1]
249	}
250	return s
251}
252
253func scrubParam(buf *bytes.Buffer, param string, sep byte, clear, remove []tRegexp) {
254	if param == "" {
255		return
256	}
257	key := param
258	value := ""
259	if i := strings.Index(param, "="); i >= 0 {
260		key, value = key[:i], key[i+1:]
261	}
262	ukey, err := url.QueryUnescape(key)
263	// If the key is bad, just pass it and the value through.
264	if err != nil {
265		buf.WriteString(param)
266		if sep != 0 {
267			buf.WriteByte(sep)
268		}
269		return
270	}
271	if match(ukey, remove) {
272		return
273	}
274	if match(ukey, clear) && value != "" {
275		value = "CLEARED"
276	}
277	buf.WriteString(key)
278	buf.WriteByte('=')
279	buf.WriteString(value)
280	if sep != 0 {
281		buf.WriteByte(sep)
282	}
283}
284
285func match(s string, res []tRegexp) bool {
286	for _, re := range res {
287		if re.MatchString(s) {
288			return true
289		}
290	}
291	return false
292}
293