1// Copyright 2018 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package proxy
16
17import (
18	"bytes"
19	"fmt"
20	"io/ioutil"
21	"net/http"
22	"strconv"
23	"sync"
24
25	"github.com/google/martian"
26)
27
28// Replacement for the HAR logging that comes with martian. HAR is not designed for
29// replay. In particular, response bodies are interpreted (e.g. decompressed), and we
30// just want them to be stored literally. This isn't something we can fix in martian: it
31// is required in the HAR spec (http://www.softwareishard.com/blog/har-12-spec/#content).
32
33// LogVersion is the current version of the log format. It can be used to
34// support changes to the format over time, so newer code can read older files.
35const LogVersion = "0.2"
36
37// A Log is a record of HTTP interactions, suitable for replay. It can be serialized to JSON.
38type Log struct {
39	Initial   []byte // initial data for replay
40	Version   string // version of this log format
41	Converter *Converter
42	Entries   []*Entry
43}
44
45// An Entry  single request-response pair.
46type Entry struct {
47	ID       string // unique ID
48	Request  *Request
49	Response *Response
50}
51
52// A Request represents an http.Request in the log.
53type Request struct {
54	Method string      // http.Request.Method
55	URL    string      // http.Request.URL, as a string
56	Header http.Header // http.Request.Header
57	// We need to understand multipart bodies because the boundaries are
58	// generated randomly, so we can't just compare the entire bodies for equality.
59	MediaType string      // the media type part of the Content-Type header
60	BodyParts [][]byte    // http.Request.Body, read to completion and split for multipart
61	Trailer   http.Header `json:",omitempty"` // http.Request.Trailer
62}
63
64// A Response represents an http.Response in the log.
65type Response struct {
66	StatusCode int         // http.Response.StatusCode
67	Proto      string      // http.Response.Proto
68	ProtoMajor int         // http.Response.ProtoMajor
69	ProtoMinor int         // http.Response.ProtoMinor
70	Header     http.Header // http.Response.Header
71	Body       []byte      // http.Response.Body, read to completion
72	Trailer    http.Header `json:",omitempty"` // http.Response.Trailer
73}
74
75// A Logger maintains a request-response log.
76type Logger struct {
77	mu      sync.Mutex
78	entries map[string]*Entry // from ID
79	log     *Log
80}
81
82// newLogger creates a new logger.
83func newLogger() *Logger {
84	return &Logger{
85		log: &Log{
86			Version:   LogVersion,
87			Converter: defaultConverter(),
88		},
89		entries: map[string]*Entry{},
90	}
91}
92
93// ModifyRequest logs requests.
94func (l *Logger) ModifyRequest(req *http.Request) error {
95	if req.Method == "CONNECT" {
96		return nil
97	}
98	ctx := martian.NewContext(req)
99	if ctx.SkippingLogging() {
100		return nil
101	}
102	lreq, err := l.log.Converter.convertRequest(req)
103	if err != nil {
104		return err
105	}
106	id := ctx.ID()
107	entry := &Entry{ID: id, Request: lreq}
108
109	l.mu.Lock()
110	defer l.mu.Unlock()
111
112	if _, ok := l.entries[id]; ok {
113		panic(fmt.Sprintf("proxy: duplicate request ID: %s", id))
114	}
115	l.entries[id] = entry
116	l.log.Entries = append(l.log.Entries, entry)
117	return nil
118}
119
120// ModifyResponse logs responses.
121func (l *Logger) ModifyResponse(res *http.Response) error {
122	ctx := martian.NewContext(res.Request)
123	if ctx.SkippingLogging() {
124		return nil
125	}
126	id := ctx.ID()
127	lres, err := l.log.Converter.convertResponse(res)
128	if err != nil {
129		return err
130	}
131
132	l.mu.Lock()
133	defer l.mu.Unlock()
134
135	if e, ok := l.entries[id]; ok {
136		e.Response = lres
137	}
138	// Ignore the response if we haven't seen the request.
139	return nil
140}
141
142// Extract returns the Log and removes it. The Logger is not usable
143// after this call.
144func (l *Logger) Extract() *Log {
145	l.mu.Lock()
146	defer l.mu.Unlock()
147	r := l.log
148	l.log = nil
149	l.entries = nil
150	return r
151}
152
153func toHTTPResponse(lr *Response, req *http.Request) *http.Response {
154	res := &http.Response{
155		StatusCode:    lr.StatusCode,
156		Proto:         lr.Proto,
157		ProtoMajor:    lr.ProtoMajor,
158		ProtoMinor:    lr.ProtoMinor,
159		Header:        lr.Header,
160		Body:          ioutil.NopCloser(bytes.NewReader(lr.Body)),
161		ContentLength: int64(len(lr.Body)),
162	}
163	res.Request = req
164	// For HEAD, set ContentLength to the value of the Content-Length header, or -1
165	// if there isn't one.
166	if req.Method == "HEAD" {
167		res.ContentLength = -1
168		if c := res.Header["Content-Length"]; len(c) == 1 {
169			if c64, err := strconv.ParseInt(c[0], 10, 64); err == nil {
170				res.ContentLength = c64
171			}
172		}
173	}
174	return res
175}
176