1package packfile 2 3import ( 4 "bytes" 5 6 "gopkg.in/src-d/go-git.v4/plumbing" 7 "gopkg.in/src-d/go-git.v4/plumbing/cache" 8 "gopkg.in/src-d/go-git.v4/plumbing/storer" 9) 10 11// Format specifies if the packfile uses ref-deltas or ofs-deltas. 12type Format int 13 14// Possible values of the Format type. 15const ( 16 UnknownFormat Format = iota 17 OFSDeltaFormat 18 REFDeltaFormat 19) 20 21var ( 22 // ErrMaxObjectsLimitReached is returned by Decode when the number 23 // of objects in the packfile is higher than 24 // Decoder.MaxObjectsLimit. 25 ErrMaxObjectsLimitReached = NewError("max. objects limit reached") 26 // ErrInvalidObject is returned by Decode when an invalid object is 27 // found in the packfile. 28 ErrInvalidObject = NewError("invalid git object") 29 // ErrPackEntryNotFound is returned by Decode when a reference in 30 // the packfile references and unknown object. 31 ErrPackEntryNotFound = NewError("can't find a pack entry") 32 // ErrZLib is returned by Decode when there was an error unzipping 33 // the packfile contents. 34 ErrZLib = NewError("zlib reading error") 35 // ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object 36 // to recall cannot be returned. 37 ErrCannotRecall = NewError("cannot recall object") 38 // ErrResolveDeltasNotSupported is returned if a NewDecoder is used with a 39 // non-seekable scanner and without a plumbing.ObjectStorage 40 ErrResolveDeltasNotSupported = NewError("resolve delta is not supported") 41 // ErrNonSeekable is returned if a ReadObjectAt method is called without a 42 // seekable scanner 43 ErrNonSeekable = NewError("non-seekable scanner") 44 // ErrRollback error making Rollback over a transaction after an error 45 ErrRollback = NewError("rollback error, during set error") 46 // ErrAlreadyDecoded is returned if NewDecoder is called for a second time 47 ErrAlreadyDecoded = NewError("packfile was already decoded") 48) 49 50// Decoder reads and decodes packfiles from an input Scanner, if an ObjectStorer 51// was provided the decoded objects are store there. If not the decode object 52// is destroyed. The Offsets and CRCs are calculated whether an 53// ObjectStorer was provided or not. 54type Decoder struct { 55 DeltaBaseCache cache.Object 56 57 s *Scanner 58 o storer.EncodedObjectStorer 59 tx storer.Transaction 60 61 isDecoded bool 62 63 // hasBuiltIndex indicates if the index is fully built or not. If it is not, 64 // will be built incrementally while decoding. 65 hasBuiltIndex bool 66 idx *Index 67 68 offsetToType map[int64]plumbing.ObjectType 69 decoderType plumbing.ObjectType 70} 71 72// NewDecoder returns a new Decoder that decodes a Packfile using the given 73// Scanner and stores the objects in the provided EncodedObjectStorer. ObjectStorer can be nil, in this 74// If the passed EncodedObjectStorer is nil, objects are not stored, but 75// offsets on the Packfile and CRCs are calculated. 76// 77// If EncodedObjectStorer is nil and the Scanner is not Seekable, ErrNonSeekable is 78// returned. 79// 80// If the ObjectStorer implements storer.Transactioner, a transaction is created 81// during the Decode execution. If anything fails, Rollback is called 82func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) { 83 return NewDecoderForType(s, o, plumbing.AnyObject) 84} 85 86// NewDecoderForType returns a new Decoder but in this case for a specific object type. 87// When an object is read using this Decoder instance and it is not of the same type of 88// the specified one, nil will be returned. This is intended to avoid the content 89// deserialization of all the objects 90func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, 91 t plumbing.ObjectType) (*Decoder, error) { 92 93 if t == plumbing.OFSDeltaObject || 94 t == plumbing.REFDeltaObject || 95 t == plumbing.InvalidObject { 96 return nil, plumbing.ErrInvalidType 97 } 98 99 if !canResolveDeltas(s, o) { 100 return nil, ErrResolveDeltasNotSupported 101 } 102 103 return &Decoder{ 104 s: s, 105 o: o, 106 107 idx: NewIndex(0), 108 offsetToType: make(map[int64]plumbing.ObjectType, 0), 109 decoderType: t, 110 }, nil 111} 112 113func canResolveDeltas(s *Scanner, o storer.EncodedObjectStorer) bool { 114 return s.IsSeekable || o != nil 115} 116 117// Decode reads a packfile and stores it in the value pointed to by s. The 118// offsets and the CRCs are calculated by this method 119func (d *Decoder) Decode() (checksum plumbing.Hash, err error) { 120 defer func() { d.isDecoded = true }() 121 122 if d.isDecoded { 123 return plumbing.ZeroHash, ErrAlreadyDecoded 124 } 125 126 if err := d.doDecode(); err != nil { 127 return plumbing.ZeroHash, err 128 } 129 130 return d.s.Checksum() 131} 132 133func (d *Decoder) doDecode() error { 134 _, count, err := d.s.Header() 135 if err != nil { 136 return err 137 } 138 139 if !d.hasBuiltIndex { 140 d.idx = NewIndex(int(count)) 141 } 142 defer func() { d.hasBuiltIndex = true }() 143 144 _, isTxStorer := d.o.(storer.Transactioner) 145 switch { 146 case d.o == nil: 147 return d.decodeObjects(int(count)) 148 case isTxStorer: 149 return d.decodeObjectsWithObjectStorerTx(int(count)) 150 default: 151 return d.decodeObjectsWithObjectStorer(int(count)) 152 } 153} 154 155func (d *Decoder) decodeObjects(count int) error { 156 for i := 0; i < count; i++ { 157 if _, err := d.DecodeObject(); err != nil { 158 return err 159 } 160 } 161 162 return nil 163} 164 165func (d *Decoder) decodeObjectsWithObjectStorer(count int) error { 166 for i := 0; i < count; i++ { 167 obj, err := d.DecodeObject() 168 if err != nil { 169 return err 170 } 171 172 if _, err := d.o.SetEncodedObject(obj); err != nil { 173 return err 174 } 175 } 176 177 return nil 178} 179 180func (d *Decoder) decodeObjectsWithObjectStorerTx(count int) error { 181 d.tx = d.o.(storer.Transactioner).Begin() 182 183 for i := 0; i < count; i++ { 184 obj, err := d.DecodeObject() 185 if err != nil { 186 return err 187 } 188 189 if _, err := d.tx.SetEncodedObject(obj); err != nil { 190 if rerr := d.tx.Rollback(); rerr != nil { 191 return ErrRollback.AddDetails( 192 "error: %s, during tx.Set error: %s", rerr, err, 193 ) 194 } 195 196 return err 197 } 198 199 } 200 201 return d.tx.Commit() 202} 203 204// DecodeObject reads the next object from the scanner and returns it. This 205// method can be used in replacement of the Decode method, to work in a 206// interactive way. If you created a new decoder instance using NewDecoderForType 207// constructor, if the object decoded is not equals to the specified one, nil will 208// be returned 209func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) { 210 h, err := d.s.NextObjectHeader() 211 if err != nil { 212 return nil, err 213 } 214 215 if d.decoderType == plumbing.AnyObject { 216 return d.decodeByHeader(h) 217 } 218 219 return d.decodeIfSpecificType(h) 220} 221 222func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, error) { 223 var ( 224 obj plumbing.EncodedObject 225 realType plumbing.ObjectType 226 err error 227 ) 228 switch h.Type { 229 case plumbing.OFSDeltaObject: 230 realType, err = d.ofsDeltaType(h.OffsetReference) 231 case plumbing.REFDeltaObject: 232 realType, err = d.refDeltaType(h.Reference) 233 if err == plumbing.ErrObjectNotFound { 234 obj, err = d.decodeByHeader(h) 235 if err != nil { 236 realType = obj.Type() 237 } 238 } 239 default: 240 realType = h.Type 241 } 242 243 if err != nil { 244 return nil, err 245 } 246 247 d.offsetToType[h.Offset] = realType 248 249 if d.decoderType == realType { 250 if obj != nil { 251 return obj, nil 252 } 253 254 return d.decodeByHeader(h) 255 } 256 257 return nil, nil 258} 259 260func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) { 261 t, ok := d.offsetToType[offset] 262 if !ok { 263 return plumbing.InvalidObject, plumbing.ErrObjectNotFound 264 } 265 266 return t, nil 267} 268 269func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) { 270 e, ok := d.idx.LookupHash(ref) 271 if !ok { 272 return plumbing.InvalidObject, plumbing.ErrObjectNotFound 273 } 274 275 return d.ofsDeltaType(int64(e.Offset)) 276} 277 278func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) { 279 obj := d.newObject() 280 obj.SetSize(h.Length) 281 obj.SetType(h.Type) 282 var crc uint32 283 var err error 284 switch h.Type { 285 case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: 286 crc, err = d.fillRegularObjectContent(obj) 287 case plumbing.REFDeltaObject: 288 crc, err = d.fillREFDeltaObjectContent(obj, h.Reference) 289 case plumbing.OFSDeltaObject: 290 crc, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) 291 default: 292 err = ErrInvalidObject.AddDetails("type %q", h.Type) 293 } 294 295 if err != nil { 296 return obj, err 297 } 298 299 if !d.hasBuiltIndex { 300 d.idx.Add(obj.Hash(), uint64(h.Offset), crc) 301 } 302 303 return obj, nil 304} 305 306func (d *Decoder) newObject() plumbing.EncodedObject { 307 if d.o == nil { 308 return &plumbing.MemoryObject{} 309 } 310 311 return d.o.NewEncodedObject() 312} 313 314// DecodeObjectAt reads an object at the given location. Every EncodedObject 315// returned is added into a internal index. This is intended to be able to regenerate 316// objects from deltas (offset deltas or reference deltas) without an package index 317// (.idx file). If Decode wasn't called previously objects offset should provided 318// using the SetOffsets method. 319func (d *Decoder) DecodeObjectAt(offset int64) (plumbing.EncodedObject, error) { 320 if !d.s.IsSeekable { 321 return nil, ErrNonSeekable 322 } 323 324 beforeJump, err := d.s.SeekFromStart(offset) 325 if err != nil { 326 return nil, err 327 } 328 329 defer func() { 330 _, seekErr := d.s.SeekFromStart(beforeJump) 331 if err == nil { 332 err = seekErr 333 } 334 }() 335 336 return d.DecodeObject() 337} 338 339func (d *Decoder) fillRegularObjectContent(obj plumbing.EncodedObject) (uint32, error) { 340 w, err := obj.Writer() 341 if err != nil { 342 return 0, err 343 } 344 345 _, crc, err := d.s.NextObject(w) 346 return crc, err 347} 348 349func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) (uint32, error) { 350 buf := bytes.NewBuffer(nil) 351 _, crc, err := d.s.NextObject(buf) 352 if err != nil { 353 return 0, err 354 } 355 356 base, ok := d.cacheGet(ref) 357 if !ok { 358 base, err = d.recallByHash(ref) 359 if err != nil { 360 return 0, err 361 } 362 } 363 364 obj.SetType(base.Type()) 365 err = ApplyDelta(obj, base, buf.Bytes()) 366 d.cachePut(obj) 367 368 return crc, err 369} 370 371func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) (uint32, error) { 372 buf := bytes.NewBuffer(nil) 373 _, crc, err := d.s.NextObject(buf) 374 if err != nil { 375 return 0, err 376 } 377 378 e, ok := d.idx.LookupOffset(uint64(offset)) 379 var base plumbing.EncodedObject 380 if ok { 381 base, ok = d.cacheGet(e.Hash) 382 } 383 384 if !ok { 385 base, err = d.recallByOffset(offset) 386 if err != nil { 387 return 0, err 388 } 389 } 390 391 obj.SetType(base.Type()) 392 err = ApplyDelta(obj, base, buf.Bytes()) 393 d.cachePut(obj) 394 395 return crc, err 396} 397 398func (d *Decoder) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) { 399 if d.DeltaBaseCache == nil { 400 return nil, false 401 } 402 403 return d.DeltaBaseCache.Get(h) 404} 405 406func (d *Decoder) cachePut(obj plumbing.EncodedObject) { 407 if d.DeltaBaseCache == nil { 408 return 409 } 410 411 d.DeltaBaseCache.Put(obj) 412} 413 414func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { 415 if d.s.IsSeekable { 416 return d.DecodeObjectAt(o) 417 } 418 419 if e, ok := d.idx.LookupOffset(uint64(o)); ok { 420 return d.recallByHashNonSeekable(e.Hash) 421 } 422 423 return nil, plumbing.ErrObjectNotFound 424} 425 426func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { 427 if d.s.IsSeekable { 428 if e, ok := d.idx.LookupHash(h); ok { 429 return d.DecodeObjectAt(int64(e.Offset)) 430 } 431 } 432 433 return d.recallByHashNonSeekable(h) 434} 435 436// recallByHashNonSeekable if we are in a transaction the objects are read from 437// the transaction, if not are directly read from the ObjectStorer 438func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.EncodedObject, err error) { 439 if d.tx != nil { 440 obj, err = d.tx.EncodedObject(plumbing.AnyObject, h) 441 } else { 442 obj, err = d.o.EncodedObject(plumbing.AnyObject, h) 443 } 444 445 if err != plumbing.ErrObjectNotFound { 446 return obj, err 447 } 448 449 return nil, plumbing.ErrObjectNotFound 450} 451 452// SetIndex sets an index for the packfile. It is recommended to set this. 453// The index might be read from a file or reused from a previous Decoder usage 454// (see Index function). 455func (d *Decoder) SetIndex(idx *Index) { 456 d.hasBuiltIndex = true 457 d.idx = idx 458} 459 460// Index returns the index for the packfile. If index was set with SetIndex, 461// Index will return it. Otherwise, it will return an index that is built while 462// decoding. If neither SetIndex was called with a full index or Decode called 463// for the whole packfile, then the returned index will be incomplete. 464func (d *Decoder) Index() *Index { 465 return d.idx 466} 467 468// Close closes the Scanner. usually this mean that the whole reader is read and 469// discarded 470func (d *Decoder) Close() error { 471 return d.s.Close() 472} 473