1package packfile 2 3import ( 4 "bufio" 5 "bytes" 6 "compress/zlib" 7 "fmt" 8 "hash" 9 "hash/crc32" 10 "io" 11 stdioutil "io/ioutil" 12 "sync" 13 14 "github.com/go-git/go-git/v5/plumbing" 15 "github.com/go-git/go-git/v5/utils/binary" 16 "github.com/go-git/go-git/v5/utils/ioutil" 17) 18 19var ( 20 // ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile 21 ErrEmptyPackfile = NewError("empty packfile") 22 // ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect. 23 ErrBadSignature = NewError("malformed pack file signature") 24 // ErrUnsupportedVersion is returned by ReadHeader when the packfile version is 25 // different than VersionSupported. 26 ErrUnsupportedVersion = NewError("unsupported packfile version") 27 // ErrSeekNotSupported returned if seek is not support 28 ErrSeekNotSupported = NewError("not seek support") 29) 30 31// ObjectHeader contains the information related to the object, this information 32// is collected from the previous bytes to the content of the object. 33type ObjectHeader struct { 34 Type plumbing.ObjectType 35 Offset int64 36 Length int64 37 Reference plumbing.Hash 38 OffsetReference int64 39} 40 41type Scanner struct { 42 r *scannerReader 43 crc hash.Hash32 44 45 // pendingObject is used to detect if an object has been read, or still 46 // is waiting to be read 47 pendingObject *ObjectHeader 48 version, objects uint32 49 50 // lsSeekable says if this scanner can do Seek or not, to have a Scanner 51 // seekable a r implementing io.Seeker is required 52 IsSeekable bool 53} 54 55// NewScanner returns a new Scanner based on a reader, if the given reader 56// implements io.ReadSeeker the Scanner will be also Seekable 57func NewScanner(r io.Reader) *Scanner { 58 _, ok := r.(io.ReadSeeker) 59 60 crc := crc32.NewIEEE() 61 return &Scanner{ 62 r: newScannerReader(r, crc), 63 crc: crc, 64 IsSeekable: ok, 65 } 66} 67 68func (s *Scanner) Reset(r io.Reader) { 69 _, ok := r.(io.ReadSeeker) 70 71 s.r.Reset(r) 72 s.crc.Reset() 73 s.IsSeekable = ok 74 s.pendingObject = nil 75 s.version = 0 76 s.objects = 0 77} 78 79// Header reads the whole packfile header (signature, version and object count). 80// It returns the version and the object count and performs checks on the 81// validity of the signature and the version fields. 82func (s *Scanner) Header() (version, objects uint32, err error) { 83 if s.version != 0 { 84 return s.version, s.objects, nil 85 } 86 87 sig, err := s.readSignature() 88 if err != nil { 89 if err == io.EOF { 90 err = ErrEmptyPackfile 91 } 92 93 return 94 } 95 96 if !s.isValidSignature(sig) { 97 err = ErrBadSignature 98 return 99 } 100 101 version, err = s.readVersion() 102 s.version = version 103 if err != nil { 104 return 105 } 106 107 if !s.isSupportedVersion(version) { 108 err = ErrUnsupportedVersion.AddDetails("%d", version) 109 return 110 } 111 112 objects, err = s.readCount() 113 s.objects = objects 114 return 115} 116 117// readSignature reads an returns the signature field in the packfile. 118func (s *Scanner) readSignature() ([]byte, error) { 119 var sig = make([]byte, 4) 120 if _, err := io.ReadFull(s.r, sig); err != nil { 121 return []byte{}, err 122 } 123 124 return sig, nil 125} 126 127// isValidSignature returns if sig is a valid packfile signature. 128func (s *Scanner) isValidSignature(sig []byte) bool { 129 return bytes.Equal(sig, signature) 130} 131 132// readVersion reads and returns the version field of a packfile. 133func (s *Scanner) readVersion() (uint32, error) { 134 return binary.ReadUint32(s.r) 135} 136 137// isSupportedVersion returns whether version v is supported by the parser. 138// The current supported version is VersionSupported, defined above. 139func (s *Scanner) isSupportedVersion(v uint32) bool { 140 return v == VersionSupported 141} 142 143// readCount reads and returns the count of objects field of a packfile. 144func (s *Scanner) readCount() (uint32, error) { 145 return binary.ReadUint32(s.r) 146} 147 148// SeekObjectHeader seeks to specified offset and returns the ObjectHeader 149// for the next object in the reader 150func (s *Scanner) SeekObjectHeader(offset int64) (*ObjectHeader, error) { 151 // if seeking we assume that you are not interested in the header 152 if s.version == 0 { 153 s.version = VersionSupported 154 } 155 156 if _, err := s.r.Seek(offset, io.SeekStart); err != nil { 157 return nil, err 158 } 159 160 h, err := s.nextObjectHeader() 161 if err != nil { 162 return nil, err 163 } 164 165 h.Offset = offset 166 return h, nil 167} 168 169// NextObjectHeader returns the ObjectHeader for the next object in the reader 170func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) { 171 if err := s.doPending(); err != nil { 172 return nil, err 173 } 174 175 offset, err := s.r.Seek(0, io.SeekCurrent) 176 if err != nil { 177 return nil, err 178 } 179 180 h, err := s.nextObjectHeader() 181 if err != nil { 182 return nil, err 183 } 184 185 h.Offset = offset 186 return h, nil 187} 188 189// nextObjectHeader returns the ObjectHeader for the next object in the reader 190// without the Offset field 191func (s *Scanner) nextObjectHeader() (*ObjectHeader, error) { 192 s.r.Flush() 193 s.crc.Reset() 194 195 h := &ObjectHeader{} 196 s.pendingObject = h 197 198 var err error 199 h.Offset, err = s.r.Seek(0, io.SeekCurrent) 200 if err != nil { 201 return nil, err 202 } 203 204 h.Type, h.Length, err = s.readObjectTypeAndLength() 205 if err != nil { 206 return nil, err 207 } 208 209 switch h.Type { 210 case plumbing.OFSDeltaObject: 211 no, err := binary.ReadVariableWidthInt(s.r) 212 if err != nil { 213 return nil, err 214 } 215 216 h.OffsetReference = h.Offset - no 217 case plumbing.REFDeltaObject: 218 var err error 219 h.Reference, err = binary.ReadHash(s.r) 220 if err != nil { 221 return nil, err 222 } 223 } 224 225 return h, nil 226} 227 228func (s *Scanner) doPending() error { 229 if s.version == 0 { 230 var err error 231 s.version, s.objects, err = s.Header() 232 if err != nil { 233 return err 234 } 235 } 236 237 return s.discardObjectIfNeeded() 238} 239 240func (s *Scanner) discardObjectIfNeeded() error { 241 if s.pendingObject == nil { 242 return nil 243 } 244 245 h := s.pendingObject 246 n, _, err := s.NextObject(stdioutil.Discard) 247 if err != nil { 248 return err 249 } 250 251 if n != h.Length { 252 return fmt.Errorf( 253 "error discarding object, discarded %d, expected %d", 254 n, h.Length, 255 ) 256 } 257 258 return nil 259} 260 261// ReadObjectTypeAndLength reads and returns the object type and the 262// length field from an object entry in a packfile. 263func (s *Scanner) readObjectTypeAndLength() (plumbing.ObjectType, int64, error) { 264 t, c, err := s.readType() 265 if err != nil { 266 return t, 0, err 267 } 268 269 l, err := s.readLength(c) 270 271 return t, l, err 272} 273 274func (s *Scanner) readType() (plumbing.ObjectType, byte, error) { 275 var c byte 276 var err error 277 if c, err = s.r.ReadByte(); err != nil { 278 return plumbing.ObjectType(0), 0, err 279 } 280 281 typ := parseType(c) 282 283 return typ, c, nil 284} 285 286func parseType(b byte) plumbing.ObjectType { 287 return plumbing.ObjectType((b & maskType) >> firstLengthBits) 288} 289 290// the length is codified in the last 4 bits of the first byte and in 291// the last 7 bits of subsequent bytes. Last byte has a 0 MSB. 292func (s *Scanner) readLength(first byte) (int64, error) { 293 length := int64(first & maskFirstLength) 294 295 c := first 296 shift := firstLengthBits 297 var err error 298 for c&maskContinue > 0 { 299 if c, err = s.r.ReadByte(); err != nil { 300 return 0, err 301 } 302 303 length += int64(c&maskLength) << shift 304 shift += lengthBits 305 } 306 307 return length, nil 308} 309 310// NextObject writes the content of the next object into the reader, returns 311// the number of bytes written, the CRC32 of the content and an error, if any 312func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err error) { 313 s.pendingObject = nil 314 written, err = s.copyObject(w) 315 316 s.r.Flush() 317 crc32 = s.crc.Sum32() 318 s.crc.Reset() 319 320 return 321} 322 323// ReadObject returns a reader for the object content and an error 324func (s *Scanner) ReadObject() (io.ReadCloser, error) { 325 s.pendingObject = nil 326 zr := zlibReaderPool.Get().(io.ReadCloser) 327 328 if err := zr.(zlib.Resetter).Reset(s.r, nil); err != nil { 329 return nil, fmt.Errorf("zlib reset error: %s", err) 330 } 331 332 return ioutil.NewReadCloserWithCloser(zr, func() error { 333 zlibReaderPool.Put(zr) 334 return nil 335 }), nil 336} 337 338// ReadRegularObject reads and write a non-deltified object 339// from it zlib stream in an object entry in the packfile. 340func (s *Scanner) copyObject(w io.Writer) (n int64, err error) { 341 zr := zlibReaderPool.Get().(io.ReadCloser) 342 defer zlibReaderPool.Put(zr) 343 344 if err = zr.(zlib.Resetter).Reset(s.r, nil); err != nil { 345 return 0, fmt.Errorf("zlib reset error: %s", err) 346 } 347 348 defer ioutil.CheckClose(zr, &err) 349 buf := byteSlicePool.Get().([]byte) 350 n, err = io.CopyBuffer(w, zr, buf) 351 byteSlicePool.Put(buf) 352 return 353} 354 355var byteSlicePool = sync.Pool{ 356 New: func() interface{} { 357 return make([]byte, 32*1024) 358 }, 359} 360 361// SeekFromStart sets a new offset from start, returns the old position before 362// the change. 363func (s *Scanner) SeekFromStart(offset int64) (previous int64, err error) { 364 // if seeking we assume that you are not interested in the header 365 if s.version == 0 { 366 s.version = VersionSupported 367 } 368 369 previous, err = s.r.Seek(0, io.SeekCurrent) 370 if err != nil { 371 return -1, err 372 } 373 374 _, err = s.r.Seek(offset, io.SeekStart) 375 return previous, err 376} 377 378// Checksum returns the checksum of the packfile 379func (s *Scanner) Checksum() (plumbing.Hash, error) { 380 err := s.discardObjectIfNeeded() 381 if err != nil { 382 return plumbing.ZeroHash, err 383 } 384 385 return binary.ReadHash(s.r) 386} 387 388// Close reads the reader until io.EOF 389func (s *Scanner) Close() error { 390 buf := byteSlicePool.Get().([]byte) 391 _, err := io.CopyBuffer(stdioutil.Discard, s.r, buf) 392 byteSlicePool.Put(buf) 393 return err 394} 395 396// Flush is a no-op (deprecated) 397func (s *Scanner) Flush() error { 398 return nil 399} 400 401// scannerReader has the following characteristics: 402// - Provides an io.SeekReader impl for bufio.Reader, when the underlying 403// reader supports it. 404// - Keeps track of the current read position, for when the underlying reader 405// isn't an io.SeekReader, but we still want to know the current offset. 406// - Writes to the hash writer what it reads, with the aid of a smaller buffer. 407// The buffer helps avoid a performance penality for performing small writes 408// to the crc32 hash writer. 409type scannerReader struct { 410 reader io.Reader 411 crc io.Writer 412 rbuf *bufio.Reader 413 wbuf *bufio.Writer 414 offset int64 415} 416 417func newScannerReader(r io.Reader, h io.Writer) *scannerReader { 418 sr := &scannerReader{ 419 rbuf: bufio.NewReader(nil), 420 wbuf: bufio.NewWriterSize(nil, 64), 421 crc: h, 422 } 423 sr.Reset(r) 424 425 return sr 426} 427 428func (r *scannerReader) Reset(reader io.Reader) { 429 r.reader = reader 430 r.rbuf.Reset(r.reader) 431 r.wbuf.Reset(r.crc) 432 433 r.offset = 0 434 if seeker, ok := r.reader.(io.ReadSeeker); ok { 435 r.offset, _ = seeker.Seek(0, io.SeekCurrent) 436 } 437} 438 439func (r *scannerReader) Read(p []byte) (n int, err error) { 440 n, err = r.rbuf.Read(p) 441 442 r.offset += int64(n) 443 if _, err := r.wbuf.Write(p[:n]); err != nil { 444 return n, err 445 } 446 return 447} 448 449func (r *scannerReader) ReadByte() (b byte, err error) { 450 b, err = r.rbuf.ReadByte() 451 if err == nil { 452 r.offset++ 453 return b, r.wbuf.WriteByte(b) 454 } 455 return 456} 457 458func (r *scannerReader) Flush() error { 459 return r.wbuf.Flush() 460} 461 462// Seek seeks to a location. If the underlying reader is not an io.ReadSeeker, 463// then only whence=io.SeekCurrent is supported, any other operation fails. 464func (r *scannerReader) Seek(offset int64, whence int) (int64, error) { 465 var err error 466 467 if seeker, ok := r.reader.(io.ReadSeeker); !ok { 468 if whence != io.SeekCurrent || offset != 0 { 469 return -1, ErrSeekNotSupported 470 } 471 } else { 472 if whence == io.SeekCurrent && offset == 0 { 473 return r.offset, nil 474 } 475 476 r.offset, err = seeker.Seek(offset, whence) 477 r.rbuf.Reset(r.reader) 478 } 479 480 return r.offset, err 481} 482