1package packfile 2 3import ( 4 "bytes" 5 "errors" 6 "io" 7 stdioutil "io/ioutil" 8 9 "github.com/go-git/go-git/v5/plumbing" 10 "github.com/go-git/go-git/v5/plumbing/cache" 11 "github.com/go-git/go-git/v5/plumbing/storer" 12 "github.com/go-git/go-git/v5/utils/ioutil" 13) 14 15var ( 16 // ErrReferenceDeltaNotFound is returned when the reference delta is not 17 // found. 18 ErrReferenceDeltaNotFound = errors.New("reference delta not found") 19 20 // ErrNotSeekableSource is returned when the source for the parser is not 21 // seekable and a storage was not provided, so it can't be parsed. 22 ErrNotSeekableSource = errors.New("parser source is not seekable and storage was not provided") 23 24 // ErrDeltaNotCached is returned when the delta could not be found in cache. 25 ErrDeltaNotCached = errors.New("delta could not be found in cache") 26) 27 28// Observer interface is implemented by index encoders. 29type Observer interface { 30 // OnHeader is called when a new packfile is opened. 31 OnHeader(count uint32) error 32 // OnInflatedObjectHeader is called for each object header read. 33 OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error 34 // OnInflatedObjectContent is called for each decoded object. 35 OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, content []byte) error 36 // OnFooter is called when decoding is done. 37 OnFooter(h plumbing.Hash) error 38} 39 40// Parser decodes a packfile and calls any observer associated to it. Is used 41// to generate indexes. 42type Parser struct { 43 storage storer.EncodedObjectStorer 44 scanner *Scanner 45 count uint32 46 oi []*objectInfo 47 oiByHash map[plumbing.Hash]*objectInfo 48 oiByOffset map[int64]*objectInfo 49 hashOffset map[plumbing.Hash]int64 50 checksum plumbing.Hash 51 52 cache *cache.BufferLRU 53 // delta content by offset, only used if source is not seekable 54 deltas map[int64][]byte 55 56 ob []Observer 57} 58 59// NewParser creates a new Parser. The Scanner source must be seekable. 60// If it's not, NewParserWithStorage should be used instead. 61func NewParser(scanner *Scanner, ob ...Observer) (*Parser, error) { 62 return NewParserWithStorage(scanner, nil, ob...) 63} 64 65// NewParserWithStorage creates a new Parser. The scanner source must either 66// be seekable or a storage must be provided. 67func NewParserWithStorage( 68 scanner *Scanner, 69 storage storer.EncodedObjectStorer, 70 ob ...Observer, 71) (*Parser, error) { 72 if !scanner.IsSeekable && storage == nil { 73 return nil, ErrNotSeekableSource 74 } 75 76 var deltas map[int64][]byte 77 if !scanner.IsSeekable { 78 deltas = make(map[int64][]byte) 79 } 80 81 return &Parser{ 82 storage: storage, 83 scanner: scanner, 84 ob: ob, 85 count: 0, 86 cache: cache.NewBufferLRUDefault(), 87 deltas: deltas, 88 }, nil 89} 90 91func (p *Parser) forEachObserver(f func(o Observer) error) error { 92 for _, o := range p.ob { 93 if err := f(o); err != nil { 94 return err 95 } 96 } 97 return nil 98} 99 100func (p *Parser) onHeader(count uint32) error { 101 return p.forEachObserver(func(o Observer) error { 102 return o.OnHeader(count) 103 }) 104} 105 106func (p *Parser) onInflatedObjectHeader( 107 t plumbing.ObjectType, 108 objSize int64, 109 pos int64, 110) error { 111 return p.forEachObserver(func(o Observer) error { 112 return o.OnInflatedObjectHeader(t, objSize, pos) 113 }) 114} 115 116func (p *Parser) onInflatedObjectContent( 117 h plumbing.Hash, 118 pos int64, 119 crc uint32, 120 content []byte, 121) error { 122 return p.forEachObserver(func(o Observer) error { 123 return o.OnInflatedObjectContent(h, pos, crc, content) 124 }) 125} 126 127func (p *Parser) onFooter(h plumbing.Hash) error { 128 return p.forEachObserver(func(o Observer) error { 129 return o.OnFooter(h) 130 }) 131} 132 133// Parse start decoding phase of the packfile. 134func (p *Parser) Parse() (plumbing.Hash, error) { 135 if err := p.init(); err != nil { 136 return plumbing.ZeroHash, err 137 } 138 139 if err := p.indexObjects(); err != nil { 140 return plumbing.ZeroHash, err 141 } 142 143 var err error 144 p.checksum, err = p.scanner.Checksum() 145 if err != nil && err != io.EOF { 146 return plumbing.ZeroHash, err 147 } 148 149 if err := p.resolveDeltas(); err != nil { 150 return plumbing.ZeroHash, err 151 } 152 153 if err := p.onFooter(p.checksum); err != nil { 154 return plumbing.ZeroHash, err 155 } 156 157 return p.checksum, nil 158} 159 160func (p *Parser) init() error { 161 _, c, err := p.scanner.Header() 162 if err != nil { 163 return err 164 } 165 166 if err := p.onHeader(c); err != nil { 167 return err 168 } 169 170 p.count = c 171 p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count) 172 p.oiByOffset = make(map[int64]*objectInfo, p.count) 173 p.oi = make([]*objectInfo, p.count) 174 175 return nil 176} 177 178func (p *Parser) indexObjects() error { 179 buf := new(bytes.Buffer) 180 181 for i := uint32(0); i < p.count; i++ { 182 buf.Reset() 183 184 oh, err := p.scanner.NextObjectHeader() 185 if err != nil { 186 return err 187 } 188 189 delta := false 190 var ota *objectInfo 191 switch t := oh.Type; t { 192 case plumbing.OFSDeltaObject: 193 delta = true 194 195 parent, ok := p.oiByOffset[oh.OffsetReference] 196 if !ok { 197 return plumbing.ErrObjectNotFound 198 } 199 200 ota = newDeltaObject(oh.Offset, oh.Length, t, parent) 201 parent.Children = append(parent.Children, ota) 202 case plumbing.REFDeltaObject: 203 delta = true 204 parent, ok := p.oiByHash[oh.Reference] 205 if !ok { 206 // can't find referenced object in this pack file 207 // this must be a "thin" pack. 208 parent = &objectInfo{ //Placeholder parent 209 SHA1: oh.Reference, 210 ExternalRef: true, // mark as an external reference that must be resolved 211 Type: plumbing.AnyObject, 212 DiskType: plumbing.AnyObject, 213 } 214 p.oiByHash[oh.Reference] = parent 215 } 216 ota = newDeltaObject(oh.Offset, oh.Length, t, parent) 217 parent.Children = append(parent.Children, ota) 218 219 default: 220 ota = newBaseObject(oh.Offset, oh.Length, t) 221 } 222 223 _, crc, err := p.scanner.NextObject(buf) 224 if err != nil { 225 return err 226 } 227 228 ota.Crc32 = crc 229 ota.Length = oh.Length 230 231 data := buf.Bytes() 232 if !delta { 233 sha1, err := getSHA1(ota.Type, data) 234 if err != nil { 235 return err 236 } 237 238 ota.SHA1 = sha1 239 p.oiByHash[ota.SHA1] = ota 240 } 241 242 if p.storage != nil && !delta { 243 obj := new(plumbing.MemoryObject) 244 obj.SetSize(oh.Length) 245 obj.SetType(oh.Type) 246 if _, err := obj.Write(data); err != nil { 247 return err 248 } 249 250 if _, err := p.storage.SetEncodedObject(obj); err != nil { 251 return err 252 } 253 } 254 255 if delta && !p.scanner.IsSeekable { 256 p.deltas[oh.Offset] = make([]byte, len(data)) 257 copy(p.deltas[oh.Offset], data) 258 } 259 260 p.oiByOffset[oh.Offset] = ota 261 p.oi[i] = ota 262 } 263 264 return nil 265} 266 267func (p *Parser) resolveDeltas() error { 268 buf := &bytes.Buffer{} 269 for _, obj := range p.oi { 270 buf.Reset() 271 err := p.get(obj, buf) 272 if err != nil { 273 return err 274 } 275 content := buf.Bytes() 276 277 if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil { 278 return err 279 } 280 281 if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content); err != nil { 282 return err 283 } 284 285 if !obj.IsDelta() && len(obj.Children) > 0 { 286 for _, child := range obj.Children { 287 if err := p.resolveObject(stdioutil.Discard, child, content); err != nil { 288 return err 289 } 290 } 291 292 // Remove the delta from the cache. 293 if obj.DiskType.IsDelta() && !p.scanner.IsSeekable { 294 delete(p.deltas, obj.Offset) 295 } 296 } 297 } 298 299 return nil 300} 301 302func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) { 303 if !o.ExternalRef { // skip cache check for placeholder parents 304 b, ok := p.cache.Get(o.Offset) 305 if ok { 306 _, err := buf.Write(b) 307 return err 308 } 309 } 310 311 // If it's not on the cache and is not a delta we can try to find it in the 312 // storage, if there's one. External refs must enter here. 313 if p.storage != nil && !o.Type.IsDelta() { 314 var e plumbing.EncodedObject 315 e, err = p.storage.EncodedObject(plumbing.AnyObject, o.SHA1) 316 if err != nil { 317 return err 318 } 319 o.Type = e.Type() 320 321 var r io.ReadCloser 322 r, err = e.Reader() 323 if err != nil { 324 return err 325 } 326 327 defer ioutil.CheckClose(r, &err) 328 329 _, err = buf.ReadFrom(io.LimitReader(r, e.Size())) 330 return err 331 } 332 333 if o.ExternalRef { 334 // we were not able to resolve a ref in a thin pack 335 return ErrReferenceDeltaNotFound 336 } 337 338 if o.DiskType.IsDelta() { 339 b := bufPool.Get().(*bytes.Buffer) 340 defer bufPool.Put(b) 341 b.Reset() 342 err := p.get(o.Parent, b) 343 if err != nil { 344 return err 345 } 346 base := b.Bytes() 347 348 err = p.resolveObject(buf, o, base) 349 if err != nil { 350 return err 351 } 352 } else { 353 err := p.readData(buf, o) 354 if err != nil { 355 return err 356 } 357 } 358 359 if len(o.Children) > 0 { 360 data := make([]byte, buf.Len()) 361 copy(data, buf.Bytes()) 362 p.cache.Put(o.Offset, data) 363 } 364 return nil 365} 366 367func (p *Parser) resolveObject( 368 w io.Writer, 369 o *objectInfo, 370 base []byte, 371) error { 372 if !o.DiskType.IsDelta() { 373 return nil 374 } 375 buf := bufPool.Get().(*bytes.Buffer) 376 defer bufPool.Put(buf) 377 buf.Reset() 378 err := p.readData(buf, o) 379 if err != nil { 380 return err 381 } 382 data := buf.Bytes() 383 384 data, err = applyPatchBase(o, data, base) 385 if err != nil { 386 return err 387 } 388 389 if p.storage != nil { 390 obj := new(plumbing.MemoryObject) 391 obj.SetSize(o.Size()) 392 obj.SetType(o.Type) 393 if _, err := obj.Write(data); err != nil { 394 return err 395 } 396 397 if _, err := p.storage.SetEncodedObject(obj); err != nil { 398 return err 399 } 400 } 401 _, err = w.Write(data) 402 return err 403} 404 405func (p *Parser) readData(w io.Writer, o *objectInfo) error { 406 if !p.scanner.IsSeekable && o.DiskType.IsDelta() { 407 data, ok := p.deltas[o.Offset] 408 if !ok { 409 return ErrDeltaNotCached 410 } 411 _, err := w.Write(data) 412 return err 413 } 414 415 if _, err := p.scanner.SeekObjectHeader(o.Offset); err != nil { 416 return err 417 } 418 419 if _, _, err := p.scanner.NextObject(w); err != nil { 420 return err 421 } 422 return nil 423} 424 425func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { 426 patched, err := PatchDelta(base, data) 427 if err != nil { 428 return nil, err 429 } 430 431 if ota.SHA1 == plumbing.ZeroHash { 432 ota.Type = ota.Parent.Type 433 sha1, err := getSHA1(ota.Type, patched) 434 if err != nil { 435 return nil, err 436 } 437 438 ota.SHA1 = sha1 439 ota.Length = int64(len(patched)) 440 } 441 442 return patched, nil 443} 444 445func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) { 446 hasher := plumbing.NewHasher(t, int64(len(data))) 447 if _, err := hasher.Write(data); err != nil { 448 return plumbing.ZeroHash, err 449 } 450 451 return hasher.Sum(), nil 452} 453 454type objectInfo struct { 455 Offset int64 456 Length int64 457 Type plumbing.ObjectType 458 DiskType plumbing.ObjectType 459 ExternalRef bool // indicates this is an external reference in a thin pack file 460 461 Crc32 uint32 462 463 Parent *objectInfo 464 Children []*objectInfo 465 SHA1 plumbing.Hash 466} 467 468func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { 469 return newDeltaObject(offset, length, t, nil) 470} 471 472func newDeltaObject( 473 offset, length int64, 474 t plumbing.ObjectType, 475 parent *objectInfo, 476) *objectInfo { 477 obj := &objectInfo{ 478 Offset: offset, 479 Length: length, 480 Type: t, 481 DiskType: t, 482 Crc32: 0, 483 Parent: parent, 484 } 485 486 return obj 487} 488 489func (o *objectInfo) IsDelta() bool { 490 return o.Type.IsDelta() 491} 492 493func (o *objectInfo) Size() int64 { 494 return o.Length 495} 496