1package stats
2
3import (
4	"bytes"
5	"compress/gzip"
6	"context"
7	"errors"
8	"fmt"
9	"io"
10	"io/ioutil"
11	"net/http"
12	"os"
13	"strings"
14	"time"
15
16	"gitlab.com/gitlab-org/gitaly/v14/internal/git/pktline"
17)
18
19type Clone struct {
20	URL         string
21	Interactive bool
22	User        string
23	Password    string
24
25	wants []string // all branch and tag pointers
26	Get
27	Post
28}
29
30func (cl *Clone) RefsWanted() int { return len(cl.wants) }
31
32// Perform does a Git HTTP clone, discarding cloned data to /dev/null.
33func (cl *Clone) Perform(ctx context.Context) error {
34	if err := cl.doGet(ctx); err != nil {
35		return ctxErr(ctx, err)
36	}
37
38	if err := cl.doPost(ctx); err != nil {
39		return ctxErr(ctx, err)
40	}
41
42	return nil
43}
44
45func ctxErr(ctx context.Context, err error) error {
46	if ctx.Err() != nil {
47		return ctx.Err()
48	}
49	return err
50}
51
52type Get struct {
53	start          time.Time
54	responseHeader time.Duration
55	httpStatus     int
56	ReferenceDiscovery
57}
58
59func (g *Get) ResponseHeader() time.Duration { return g.responseHeader }
60func (g *Get) HTTPStatus() int               { return g.httpStatus }
61func (g *Get) FirstGitPacket() time.Duration { return g.FirstPacket.Sub(g.start) }
62func (g *Get) ResponseBody() time.Duration   { return g.LastPacket.Sub(g.start) }
63
64func (cl *Clone) doGet(ctx context.Context) error {
65	req, err := http.NewRequest("GET", cl.URL+"/info/refs?service=git-upload-pack", nil)
66	if err != nil {
67		return err
68	}
69
70	req = req.WithContext(ctx)
71	if cl.User != "" {
72		req.SetBasicAuth(cl.User, cl.Password)
73	}
74
75	for k, v := range map[string]string{
76		"User-Agent":      "gitaly-debug",
77		"Accept":          "*/*",
78		"Accept-Encoding": "deflate, gzip",
79		"Pragma":          "no-cache",
80	} {
81		req.Header.Set(k, v)
82	}
83
84	cl.Get.start = time.Now()
85	cl.printInteractive("---")
86	cl.printInteractive("--- GET %v", req.URL)
87	cl.printInteractive("---")
88
89	resp, err := http.DefaultClient.Do(req)
90	if err != nil {
91		return err
92	}
93	defer func() {
94		io.Copy(ioutil.Discard, resp.Body)
95		resp.Body.Close()
96	}()
97
98	if code := resp.StatusCode; code < 200 || code >= 400 {
99		return fmt.Errorf("git http get: unexpected http status: %d", code)
100	}
101
102	cl.Get.responseHeader = time.Since(cl.Get.start)
103	cl.Get.httpStatus = resp.StatusCode
104	cl.printInteractive("response code: %d", resp.StatusCode)
105	cl.printInteractive("response header: %v", resp.Header)
106
107	body := resp.Body
108	if resp.Header.Get("Content-Encoding") == "gzip" {
109		body, err = gzip.NewReader(body)
110		if err != nil {
111			return err
112		}
113	}
114
115	if err := cl.Get.Parse(body); err != nil {
116		return err
117	}
118
119	for _, ref := range cl.Get.Refs {
120		if strings.HasPrefix(ref.Name, "refs/heads/") || strings.HasPrefix(ref.Name, "refs/tags/") {
121			cl.wants = append(cl.wants, ref.Oid)
122		}
123	}
124
125	return nil
126}
127
128type Post struct {
129	start             time.Time
130	responseHeader    time.Duration
131	httpStatus        int
132	nak               time.Duration
133	multiband         map[string]*bandInfo
134	responseBody      time.Duration
135	packets           int
136	largestPacketSize int
137}
138
139func (p *Post) ResponseHeader() time.Duration { return p.responseHeader }
140func (p *Post) HTTPStatus() int               { return p.httpStatus }
141func (p *Post) NAK() time.Duration            { return p.nak }
142func (p *Post) ResponseBody() time.Duration   { return p.responseBody }
143func (p *Post) Packets() int                  { return p.packets }
144func (p *Post) LargestPacketSize() int        { return p.largestPacketSize }
145
146func (p *Post) BandPackets(b string) int               { return p.multiband[b].packets }
147func (p *Post) BandPayloadSize(b string) int64         { return p.multiband[b].size }
148func (p *Post) BandFirstPacket(b string) time.Duration { return p.multiband[b].firstPacket }
149
150type bandInfo struct {
151	firstPacket time.Duration
152	size        int64
153	packets     int
154}
155
156func (bi *bandInfo) consume(start time.Time, data []byte) {
157	if bi.packets == 0 {
158		bi.firstPacket = time.Since(start)
159	}
160	bi.size += int64(len(data))
161	bi.packets++
162}
163
164// See
165// https://github.com/git/git/blob/v2.25.0/Documentation/technical/http-protocol.txt#L351
166// for background information.
167func (cl *Clone) buildPost(ctx context.Context) (*http.Request, error) {
168	reqBodyRaw := &bytes.Buffer{}
169	reqBodyGzip := gzip.NewWriter(reqBodyRaw)
170	for i, oid := range cl.wants {
171		if i == 0 {
172			oid += " multi_ack_detailed no-done side-band-64k thin-pack ofs-delta deepen-since deepen-not agent=git/2.21.0"
173		}
174		if _, err := pktline.WriteString(reqBodyGzip, "want "+oid+"\n"); err != nil {
175			return nil, err
176		}
177	}
178	if err := pktline.WriteFlush(reqBodyGzip); err != nil {
179		return nil, err
180	}
181	if _, err := pktline.WriteString(reqBodyGzip, "done\n"); err != nil {
182		return nil, err
183	}
184	if err := reqBodyGzip.Close(); err != nil {
185		return nil, err
186	}
187
188	req, err := http.NewRequest("POST", cl.URL+"/git-upload-pack", reqBodyRaw)
189	if err != nil {
190		return nil, err
191	}
192
193	req = req.WithContext(ctx)
194	if cl.User != "" {
195		req.SetBasicAuth(cl.User, cl.Password)
196	}
197
198	for k, v := range map[string]string{
199		"User-Agent":       "gitaly-debug",
200		"Content-Type":     "application/x-git-upload-pack-request",
201		"Accept":           "application/x-git-upload-pack-result",
202		"Content-Encoding": "gzip",
203	} {
204		req.Header.Set(k, v)
205	}
206
207	return req, nil
208}
209
210func (cl *Clone) doPost(ctx context.Context) error {
211	req, err := cl.buildPost(ctx)
212	if err != nil {
213		return err
214	}
215
216	cl.Post.start = time.Now()
217	cl.printInteractive("---")
218	cl.printInteractive("--- POST %v", req.URL)
219	cl.printInteractive("---")
220
221	resp, err := http.DefaultClient.Do(req)
222	if err != nil {
223		return err
224	}
225	defer resp.Body.Close()
226
227	if code := resp.StatusCode; code < 200 || code >= 400 {
228		return fmt.Errorf("git http post: unexpected http status: %d", code)
229	}
230
231	cl.Post.responseHeader = time.Since(cl.Post.start)
232	cl.Post.httpStatus = resp.StatusCode
233	cl.printInteractive("response code: %d", resp.StatusCode)
234	cl.printInteractive("response header: %v", resp.Header)
235
236	// Expected response:
237	// - "NAK\n"
238	// - "<side band byte><pack or progress or error data>
239	// - ...
240	// - FLUSH
241	//
242
243	cl.Post.multiband = make(map[string]*bandInfo)
244	for _, band := range Bands() {
245		cl.Post.multiband[band] = &bandInfo{}
246	}
247
248	seenFlush := false
249
250	scanner := pktline.NewScanner(resp.Body)
251	for ; scanner.Scan(); cl.Post.packets++ {
252		if seenFlush {
253			return errors.New("received extra packet after flush")
254		}
255
256		if n := len(scanner.Bytes()); n > cl.Post.largestPacketSize {
257			cl.Post.largestPacketSize = n
258		}
259
260		data := pktline.Data(scanner.Bytes())
261
262		if cl.Post.packets == 0 {
263			// We're now looking at the first git packet sent by the server. The
264			// server must conclude the ref negotiation. Because we have not sent any
265			// "have" messages there is nothing to negotiate and the server should
266			// send a single NAK.
267			if !bytes.Equal([]byte("NAK\n"), data) {
268				return fmt.Errorf("expected NAK, got %q", data)
269			}
270			cl.Post.nak = time.Since(cl.Post.start)
271			continue
272		}
273
274		if pktline.IsFlush(scanner.Bytes()) {
275			seenFlush = true
276			continue
277		}
278
279		if len(data) == 0 {
280			return errors.New("empty packet in PACK data")
281		}
282
283		band, err := bandToHuman(data[0])
284		if err != nil {
285			return err
286		}
287
288		cl.Post.multiband[band].consume(cl.Post.start, data[1:])
289
290		// Print progress data as-is
291		if cl.Interactive && band == bandProgress {
292			if _, err := os.Stdout.Write(data[1:]); err != nil {
293				return err
294			}
295		}
296
297		if cl.Interactive && cl.Post.packets%500 == 0 && cl.Post.packets > 0 && band == bandPack {
298			// Print dots to have some sort of progress meter for the user in
299			// interactive mode. It's not accurate progress, but it shows that
300			// something is happening.
301			if _, err := fmt.Print("."); err != nil {
302				return err
303			}
304		}
305	}
306
307	if cl.Interactive {
308		// Trailing newline for progress dots.
309		if _, err := fmt.Println(""); err != nil {
310			return err
311		}
312	}
313
314	if err := scanner.Err(); err != nil {
315		return err
316	}
317	if !seenFlush {
318		return errors.New("POST response did not end in flush")
319	}
320
321	cl.Post.responseBody = time.Since(cl.Post.start)
322	return nil
323}
324
325func (cl *Clone) printInteractive(format string, a ...interface{}) error {
326	if !cl.Interactive {
327		return nil
328	}
329
330	if _, err := fmt.Println(fmt.Sprintf(format, a...)); err != nil {
331		return err
332	}
333
334	return nil
335}
336
337const (
338	bandPack     = "pack"
339	bandProgress = "progress"
340	bandError    = "error"
341)
342
343// Bands returns the slice of bands which git uses to transport different kinds
344// of data in a multiplexed way. See
345// https://git-scm.com/docs/protocol-capabilities/2.24.0#_side_band_side_band_64k
346// for more information about the different bands.
347func Bands() []string { return []string{bandPack, bandProgress, bandError} }
348
349func bandToHuman(b byte) (string, error) {
350	bands := Bands()
351
352	// Band index bytes are 1-indexed.
353	if b < 1 || int(b) > len(bands) {
354		return "", fmt.Errorf("invalid band index: %d", b)
355	}
356
357	return bands[b-1], nil
358}
359