1package stats 2 3import ( 4 "bytes" 5 "compress/gzip" 6 "context" 7 "errors" 8 "fmt" 9 "io" 10 "io/ioutil" 11 "net/http" 12 "os" 13 "strings" 14 "time" 15 16 "gitlab.com/gitlab-org/gitaly/v14/internal/git/pktline" 17) 18 19type Clone struct { 20 URL string 21 Interactive bool 22 User string 23 Password string 24 25 wants []string // all branch and tag pointers 26 Get 27 Post 28} 29 30func (cl *Clone) RefsWanted() int { return len(cl.wants) } 31 32// Perform does a Git HTTP clone, discarding cloned data to /dev/null. 33func (cl *Clone) Perform(ctx context.Context) error { 34 if err := cl.doGet(ctx); err != nil { 35 return ctxErr(ctx, err) 36 } 37 38 if err := cl.doPost(ctx); err != nil { 39 return ctxErr(ctx, err) 40 } 41 42 return nil 43} 44 45func ctxErr(ctx context.Context, err error) error { 46 if ctx.Err() != nil { 47 return ctx.Err() 48 } 49 return err 50} 51 52type Get struct { 53 start time.Time 54 responseHeader time.Duration 55 httpStatus int 56 ReferenceDiscovery 57} 58 59func (g *Get) ResponseHeader() time.Duration { return g.responseHeader } 60func (g *Get) HTTPStatus() int { return g.httpStatus } 61func (g *Get) FirstGitPacket() time.Duration { return g.FirstPacket.Sub(g.start) } 62func (g *Get) ResponseBody() time.Duration { return g.LastPacket.Sub(g.start) } 63 64func (cl *Clone) doGet(ctx context.Context) error { 65 req, err := http.NewRequest("GET", cl.URL+"/info/refs?service=git-upload-pack", nil) 66 if err != nil { 67 return err 68 } 69 70 req = req.WithContext(ctx) 71 if cl.User != "" { 72 req.SetBasicAuth(cl.User, cl.Password) 73 } 74 75 for k, v := range map[string]string{ 76 "User-Agent": "gitaly-debug", 77 "Accept": "*/*", 78 "Accept-Encoding": "deflate, gzip", 79 "Pragma": "no-cache", 80 } { 81 req.Header.Set(k, v) 82 } 83 84 cl.Get.start = time.Now() 85 cl.printInteractive("---") 86 cl.printInteractive("--- GET %v", req.URL) 87 cl.printInteractive("---") 88 89 resp, err := http.DefaultClient.Do(req) 90 if err != nil { 91 return err 92 } 93 defer func() { 94 io.Copy(ioutil.Discard, resp.Body) 95 resp.Body.Close() 96 }() 97 98 if code := resp.StatusCode; code < 200 || code >= 400 { 99 return fmt.Errorf("git http get: unexpected http status: %d", code) 100 } 101 102 cl.Get.responseHeader = time.Since(cl.Get.start) 103 cl.Get.httpStatus = resp.StatusCode 104 cl.printInteractive("response code: %d", resp.StatusCode) 105 cl.printInteractive("response header: %v", resp.Header) 106 107 body := resp.Body 108 if resp.Header.Get("Content-Encoding") == "gzip" { 109 body, err = gzip.NewReader(body) 110 if err != nil { 111 return err 112 } 113 } 114 115 if err := cl.Get.Parse(body); err != nil { 116 return err 117 } 118 119 for _, ref := range cl.Get.Refs { 120 if strings.HasPrefix(ref.Name, "refs/heads/") || strings.HasPrefix(ref.Name, "refs/tags/") { 121 cl.wants = append(cl.wants, ref.Oid) 122 } 123 } 124 125 return nil 126} 127 128type Post struct { 129 start time.Time 130 responseHeader time.Duration 131 httpStatus int 132 nak time.Duration 133 multiband map[string]*bandInfo 134 responseBody time.Duration 135 packets int 136 largestPacketSize int 137} 138 139func (p *Post) ResponseHeader() time.Duration { return p.responseHeader } 140func (p *Post) HTTPStatus() int { return p.httpStatus } 141func (p *Post) NAK() time.Duration { return p.nak } 142func (p *Post) ResponseBody() time.Duration { return p.responseBody } 143func (p *Post) Packets() int { return p.packets } 144func (p *Post) LargestPacketSize() int { return p.largestPacketSize } 145 146func (p *Post) BandPackets(b string) int { return p.multiband[b].packets } 147func (p *Post) BandPayloadSize(b string) int64 { return p.multiband[b].size } 148func (p *Post) BandFirstPacket(b string) time.Duration { return p.multiband[b].firstPacket } 149 150type bandInfo struct { 151 firstPacket time.Duration 152 size int64 153 packets int 154} 155 156func (bi *bandInfo) consume(start time.Time, data []byte) { 157 if bi.packets == 0 { 158 bi.firstPacket = time.Since(start) 159 } 160 bi.size += int64(len(data)) 161 bi.packets++ 162} 163 164// See 165// https://github.com/git/git/blob/v2.25.0/Documentation/technical/http-protocol.txt#L351 166// for background information. 167func (cl *Clone) buildPost(ctx context.Context) (*http.Request, error) { 168 reqBodyRaw := &bytes.Buffer{} 169 reqBodyGzip := gzip.NewWriter(reqBodyRaw) 170 for i, oid := range cl.wants { 171 if i == 0 { 172 oid += " multi_ack_detailed no-done side-band-64k thin-pack ofs-delta deepen-since deepen-not agent=git/2.21.0" 173 } 174 if _, err := pktline.WriteString(reqBodyGzip, "want "+oid+"\n"); err != nil { 175 return nil, err 176 } 177 } 178 if err := pktline.WriteFlush(reqBodyGzip); err != nil { 179 return nil, err 180 } 181 if _, err := pktline.WriteString(reqBodyGzip, "done\n"); err != nil { 182 return nil, err 183 } 184 if err := reqBodyGzip.Close(); err != nil { 185 return nil, err 186 } 187 188 req, err := http.NewRequest("POST", cl.URL+"/git-upload-pack", reqBodyRaw) 189 if err != nil { 190 return nil, err 191 } 192 193 req = req.WithContext(ctx) 194 if cl.User != "" { 195 req.SetBasicAuth(cl.User, cl.Password) 196 } 197 198 for k, v := range map[string]string{ 199 "User-Agent": "gitaly-debug", 200 "Content-Type": "application/x-git-upload-pack-request", 201 "Accept": "application/x-git-upload-pack-result", 202 "Content-Encoding": "gzip", 203 } { 204 req.Header.Set(k, v) 205 } 206 207 return req, nil 208} 209 210func (cl *Clone) doPost(ctx context.Context) error { 211 req, err := cl.buildPost(ctx) 212 if err != nil { 213 return err 214 } 215 216 cl.Post.start = time.Now() 217 cl.printInteractive("---") 218 cl.printInteractive("--- POST %v", req.URL) 219 cl.printInteractive("---") 220 221 resp, err := http.DefaultClient.Do(req) 222 if err != nil { 223 return err 224 } 225 defer resp.Body.Close() 226 227 if code := resp.StatusCode; code < 200 || code >= 400 { 228 return fmt.Errorf("git http post: unexpected http status: %d", code) 229 } 230 231 cl.Post.responseHeader = time.Since(cl.Post.start) 232 cl.Post.httpStatus = resp.StatusCode 233 cl.printInteractive("response code: %d", resp.StatusCode) 234 cl.printInteractive("response header: %v", resp.Header) 235 236 // Expected response: 237 // - "NAK\n" 238 // - "<side band byte><pack or progress or error data> 239 // - ... 240 // - FLUSH 241 // 242 243 cl.Post.multiband = make(map[string]*bandInfo) 244 for _, band := range Bands() { 245 cl.Post.multiband[band] = &bandInfo{} 246 } 247 248 seenFlush := false 249 250 scanner := pktline.NewScanner(resp.Body) 251 for ; scanner.Scan(); cl.Post.packets++ { 252 if seenFlush { 253 return errors.New("received extra packet after flush") 254 } 255 256 if n := len(scanner.Bytes()); n > cl.Post.largestPacketSize { 257 cl.Post.largestPacketSize = n 258 } 259 260 data := pktline.Data(scanner.Bytes()) 261 262 if cl.Post.packets == 0 { 263 // We're now looking at the first git packet sent by the server. The 264 // server must conclude the ref negotiation. Because we have not sent any 265 // "have" messages there is nothing to negotiate and the server should 266 // send a single NAK. 267 if !bytes.Equal([]byte("NAK\n"), data) { 268 return fmt.Errorf("expected NAK, got %q", data) 269 } 270 cl.Post.nak = time.Since(cl.Post.start) 271 continue 272 } 273 274 if pktline.IsFlush(scanner.Bytes()) { 275 seenFlush = true 276 continue 277 } 278 279 if len(data) == 0 { 280 return errors.New("empty packet in PACK data") 281 } 282 283 band, err := bandToHuman(data[0]) 284 if err != nil { 285 return err 286 } 287 288 cl.Post.multiband[band].consume(cl.Post.start, data[1:]) 289 290 // Print progress data as-is 291 if cl.Interactive && band == bandProgress { 292 if _, err := os.Stdout.Write(data[1:]); err != nil { 293 return err 294 } 295 } 296 297 if cl.Interactive && cl.Post.packets%500 == 0 && cl.Post.packets > 0 && band == bandPack { 298 // Print dots to have some sort of progress meter for the user in 299 // interactive mode. It's not accurate progress, but it shows that 300 // something is happening. 301 if _, err := fmt.Print("."); err != nil { 302 return err 303 } 304 } 305 } 306 307 if cl.Interactive { 308 // Trailing newline for progress dots. 309 if _, err := fmt.Println(""); err != nil { 310 return err 311 } 312 } 313 314 if err := scanner.Err(); err != nil { 315 return err 316 } 317 if !seenFlush { 318 return errors.New("POST response did not end in flush") 319 } 320 321 cl.Post.responseBody = time.Since(cl.Post.start) 322 return nil 323} 324 325func (cl *Clone) printInteractive(format string, a ...interface{}) error { 326 if !cl.Interactive { 327 return nil 328 } 329 330 if _, err := fmt.Println(fmt.Sprintf(format, a...)); err != nil { 331 return err 332 } 333 334 return nil 335} 336 337const ( 338 bandPack = "pack" 339 bandProgress = "progress" 340 bandError = "error" 341) 342 343// Bands returns the slice of bands which git uses to transport different kinds 344// of data in a multiplexed way. See 345// https://git-scm.com/docs/protocol-capabilities/2.24.0#_side_band_side_band_64k 346// for more information about the different bands. 347func Bands() []string { return []string{bandPack, bandProgress, bandError} } 348 349func bandToHuman(b byte) (string, error) { 350 bands := Bands() 351 352 // Band index bytes are 1-indexed. 353 if b < 1 || int(b) > len(bands) { 354 return "", fmt.Errorf("invalid band index: %d", b) 355 } 356 357 return bands[b-1], nil 358} 359