1package packfile
2
3import (
4	"bytes"
5
6	"gopkg.in/src-d/go-git.v4/plumbing"
7	"gopkg.in/src-d/go-git.v4/plumbing/cache"
8	"gopkg.in/src-d/go-git.v4/plumbing/storer"
9)
10
11// Format specifies if the packfile uses ref-deltas or ofs-deltas.
12type Format int
13
14// Possible values of the Format type.
15const (
16	UnknownFormat Format = iota
17	OFSDeltaFormat
18	REFDeltaFormat
19)
20
21var (
22	// ErrMaxObjectsLimitReached is returned by Decode when the number
23	// of objects in the packfile is higher than
24	// Decoder.MaxObjectsLimit.
25	ErrMaxObjectsLimitReached = NewError("max. objects limit reached")
26	// ErrInvalidObject is returned by Decode when an invalid object is
27	// found in the packfile.
28	ErrInvalidObject = NewError("invalid git object")
29	// ErrPackEntryNotFound is returned by Decode when a reference in
30	// the packfile references and unknown object.
31	ErrPackEntryNotFound = NewError("can't find a pack entry")
32	// ErrZLib is returned by Decode when there was an error unzipping
33	// the packfile contents.
34	ErrZLib = NewError("zlib reading error")
35	// ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object
36	// to recall cannot be returned.
37	ErrCannotRecall = NewError("cannot recall object")
38	// ErrResolveDeltasNotSupported is returned if a NewDecoder is used with a
39	// non-seekable scanner and without a plumbing.ObjectStorage
40	ErrResolveDeltasNotSupported = NewError("resolve delta is not supported")
41	// ErrNonSeekable is returned if a ReadObjectAt method is called without a
42	// seekable scanner
43	ErrNonSeekable = NewError("non-seekable scanner")
44	// ErrRollback error making Rollback over a transaction after an error
45	ErrRollback = NewError("rollback error, during set error")
46	// ErrAlreadyDecoded is returned if NewDecoder is called for a second time
47	ErrAlreadyDecoded = NewError("packfile was already decoded")
48)
49
50// Decoder reads and decodes packfiles from an input Scanner, if an ObjectStorer
51// was provided the decoded objects are store there. If not the decode object
52// is destroyed. The Offsets and CRCs are calculated whether an
53// ObjectStorer was provided or not.
54type Decoder struct {
55	DeltaBaseCache cache.Object
56
57	s  *Scanner
58	o  storer.EncodedObjectStorer
59	tx storer.Transaction
60
61	isDecoded bool
62
63	// hasBuiltIndex indicates if the index is fully built or not. If it is not,
64	// will be built incrementally while decoding.
65	hasBuiltIndex bool
66	idx           *Index
67
68	offsetToType map[int64]plumbing.ObjectType
69	decoderType  plumbing.ObjectType
70}
71
72// NewDecoder returns a new Decoder that decodes a Packfile using the given
73// Scanner and stores the objects in the provided EncodedObjectStorer. ObjectStorer can be nil, in this
74// If the passed EncodedObjectStorer is nil, objects are not stored, but
75// offsets on the Packfile and CRCs are calculated.
76//
77// If EncodedObjectStorer is nil and the Scanner is not Seekable, ErrNonSeekable is
78// returned.
79//
80// If the ObjectStorer implements storer.Transactioner, a transaction is created
81// during the Decode execution. If anything fails, Rollback is called
82func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) {
83	return NewDecoderForType(s, o, plumbing.AnyObject)
84}
85
86// NewDecoderForType returns a new Decoder but in this case for a specific object type.
87// When an object is read using this Decoder instance and it is not of the same type of
88// the specified one, nil will be returned. This is intended to avoid the content
89// deserialization of all the objects
90func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer,
91	t plumbing.ObjectType) (*Decoder, error) {
92
93	if t == plumbing.OFSDeltaObject ||
94		t == plumbing.REFDeltaObject ||
95		t == plumbing.InvalidObject {
96		return nil, plumbing.ErrInvalidType
97	}
98
99	if !canResolveDeltas(s, o) {
100		return nil, ErrResolveDeltasNotSupported
101	}
102
103	return &Decoder{
104		s: s,
105		o: o,
106
107		idx:          NewIndex(0),
108		offsetToType: make(map[int64]plumbing.ObjectType, 0),
109		decoderType:  t,
110	}, nil
111}
112
113func canResolveDeltas(s *Scanner, o storer.EncodedObjectStorer) bool {
114	return s.IsSeekable || o != nil
115}
116
117// Decode reads a packfile and stores it in the value pointed to by s. The
118// offsets and the CRCs are calculated by this method
119func (d *Decoder) Decode() (checksum plumbing.Hash, err error) {
120	defer func() { d.isDecoded = true }()
121
122	if d.isDecoded {
123		return plumbing.ZeroHash, ErrAlreadyDecoded
124	}
125
126	if err := d.doDecode(); err != nil {
127		return plumbing.ZeroHash, err
128	}
129
130	return d.s.Checksum()
131}
132
133func (d *Decoder) doDecode() error {
134	_, count, err := d.s.Header()
135	if err != nil {
136		return err
137	}
138
139	if !d.hasBuiltIndex {
140		d.idx = NewIndex(int(count))
141	}
142	defer func() { d.hasBuiltIndex = true }()
143
144	_, isTxStorer := d.o.(storer.Transactioner)
145	switch {
146	case d.o == nil:
147		return d.decodeObjects(int(count))
148	case isTxStorer:
149		return d.decodeObjectsWithObjectStorerTx(int(count))
150	default:
151		return d.decodeObjectsWithObjectStorer(int(count))
152	}
153}
154
155func (d *Decoder) decodeObjects(count int) error {
156	for i := 0; i < count; i++ {
157		if _, err := d.DecodeObject(); err != nil {
158			return err
159		}
160	}
161
162	return nil
163}
164
165func (d *Decoder) decodeObjectsWithObjectStorer(count int) error {
166	for i := 0; i < count; i++ {
167		obj, err := d.DecodeObject()
168		if err != nil {
169			return err
170		}
171
172		if _, err := d.o.SetEncodedObject(obj); err != nil {
173			return err
174		}
175	}
176
177	return nil
178}
179
180func (d *Decoder) decodeObjectsWithObjectStorerTx(count int) error {
181	d.tx = d.o.(storer.Transactioner).Begin()
182
183	for i := 0; i < count; i++ {
184		obj, err := d.DecodeObject()
185		if err != nil {
186			return err
187		}
188
189		if _, err := d.tx.SetEncodedObject(obj); err != nil {
190			if rerr := d.tx.Rollback(); rerr != nil {
191				return ErrRollback.AddDetails(
192					"error: %s, during tx.Set error: %s", rerr, err,
193				)
194			}
195
196			return err
197		}
198
199	}
200
201	return d.tx.Commit()
202}
203
204// DecodeObject reads the next object from the scanner and returns it. This
205// method can be used in replacement of the Decode method, to work in a
206// interactive way. If you created a new decoder instance using NewDecoderForType
207// constructor, if the object decoded is not equals to the specified one, nil will
208// be returned
209func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) {
210	h, err := d.s.NextObjectHeader()
211	if err != nil {
212		return nil, err
213	}
214
215	if d.decoderType == plumbing.AnyObject {
216		return d.decodeByHeader(h)
217	}
218
219	return d.decodeIfSpecificType(h)
220}
221
222func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, error) {
223	var (
224		obj      plumbing.EncodedObject
225		realType plumbing.ObjectType
226		err      error
227	)
228	switch h.Type {
229	case plumbing.OFSDeltaObject:
230		realType, err = d.ofsDeltaType(h.OffsetReference)
231	case plumbing.REFDeltaObject:
232		realType, err = d.refDeltaType(h.Reference)
233		if err == plumbing.ErrObjectNotFound {
234			obj, err = d.decodeByHeader(h)
235			if err != nil {
236				realType = obj.Type()
237			}
238		}
239	default:
240		realType = h.Type
241	}
242
243	if err != nil {
244		return nil, err
245	}
246
247	d.offsetToType[h.Offset] = realType
248
249	if d.decoderType == realType {
250		if obj != nil {
251			return obj, nil
252		}
253
254		return d.decodeByHeader(h)
255	}
256
257	return nil, nil
258}
259
260func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) {
261	t, ok := d.offsetToType[offset]
262	if !ok {
263		return plumbing.InvalidObject, plumbing.ErrObjectNotFound
264	}
265
266	return t, nil
267}
268
269func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) {
270	e, ok := d.idx.LookupHash(ref)
271	if !ok {
272		return plumbing.InvalidObject, plumbing.ErrObjectNotFound
273	}
274
275	return d.ofsDeltaType(int64(e.Offset))
276}
277
278func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) {
279	obj := d.newObject()
280	obj.SetSize(h.Length)
281	obj.SetType(h.Type)
282	var crc uint32
283	var err error
284	switch h.Type {
285	case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
286		crc, err = d.fillRegularObjectContent(obj)
287	case plumbing.REFDeltaObject:
288		crc, err = d.fillREFDeltaObjectContent(obj, h.Reference)
289	case plumbing.OFSDeltaObject:
290		crc, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference)
291	default:
292		err = ErrInvalidObject.AddDetails("type %q", h.Type)
293	}
294
295	if err != nil {
296		return obj, err
297	}
298
299	if !d.hasBuiltIndex {
300		d.idx.Add(obj.Hash(), uint64(h.Offset), crc)
301	}
302
303	return obj, nil
304}
305
306func (d *Decoder) newObject() plumbing.EncodedObject {
307	if d.o == nil {
308		return &plumbing.MemoryObject{}
309	}
310
311	return d.o.NewEncodedObject()
312}
313
314// DecodeObjectAt reads an object at the given location. Every EncodedObject
315// returned is added into a internal index. This is intended to be able to regenerate
316// objects from deltas (offset deltas or reference deltas) without an package index
317// (.idx file). If Decode wasn't called previously objects offset should provided
318// using the SetOffsets method.
319func (d *Decoder) DecodeObjectAt(offset int64) (plumbing.EncodedObject, error) {
320	if !d.s.IsSeekable {
321		return nil, ErrNonSeekable
322	}
323
324	beforeJump, err := d.s.SeekFromStart(offset)
325	if err != nil {
326		return nil, err
327	}
328
329	defer func() {
330		_, seekErr := d.s.SeekFromStart(beforeJump)
331		if err == nil {
332			err = seekErr
333		}
334	}()
335
336	return d.DecodeObject()
337}
338
339func (d *Decoder) fillRegularObjectContent(obj plumbing.EncodedObject) (uint32, error) {
340	w, err := obj.Writer()
341	if err != nil {
342		return 0, err
343	}
344
345	_, crc, err := d.s.NextObject(w)
346	return crc, err
347}
348
349func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) (uint32, error) {
350	buf := bytes.NewBuffer(nil)
351	_, crc, err := d.s.NextObject(buf)
352	if err != nil {
353		return 0, err
354	}
355
356	base, ok := d.cacheGet(ref)
357	if !ok {
358		base, err = d.recallByHash(ref)
359		if err != nil {
360			return 0, err
361		}
362	}
363
364	obj.SetType(base.Type())
365	err = ApplyDelta(obj, base, buf.Bytes())
366	d.cachePut(obj)
367
368	return crc, err
369}
370
371func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) (uint32, error) {
372	buf := bytes.NewBuffer(nil)
373	_, crc, err := d.s.NextObject(buf)
374	if err != nil {
375		return 0, err
376	}
377
378	e, ok := d.idx.LookupOffset(uint64(offset))
379	var base plumbing.EncodedObject
380	if ok {
381		base, ok = d.cacheGet(e.Hash)
382	}
383
384	if !ok {
385		base, err = d.recallByOffset(offset)
386		if err != nil {
387			return 0, err
388		}
389	}
390
391	obj.SetType(base.Type())
392	err = ApplyDelta(obj, base, buf.Bytes())
393	d.cachePut(obj)
394
395	return crc, err
396}
397
398func (d *Decoder) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) {
399	if d.DeltaBaseCache == nil {
400		return nil, false
401	}
402
403	return d.DeltaBaseCache.Get(h)
404}
405
406func (d *Decoder) cachePut(obj plumbing.EncodedObject) {
407	if d.DeltaBaseCache == nil {
408		return
409	}
410
411	d.DeltaBaseCache.Put(obj)
412}
413
414func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) {
415	if d.s.IsSeekable {
416		return d.DecodeObjectAt(o)
417	}
418
419	if e, ok := d.idx.LookupOffset(uint64(o)); ok {
420		return d.recallByHashNonSeekable(e.Hash)
421	}
422
423	return nil, plumbing.ErrObjectNotFound
424}
425
426func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) {
427	if d.s.IsSeekable {
428		if e, ok := d.idx.LookupHash(h); ok {
429			return d.DecodeObjectAt(int64(e.Offset))
430		}
431	}
432
433	return d.recallByHashNonSeekable(h)
434}
435
436// recallByHashNonSeekable if we are in a transaction the objects are read from
437// the transaction, if not are directly read from the ObjectStorer
438func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.EncodedObject, err error) {
439	if d.tx != nil {
440		obj, err = d.tx.EncodedObject(plumbing.AnyObject, h)
441	} else {
442		obj, err = d.o.EncodedObject(plumbing.AnyObject, h)
443	}
444
445	if err != plumbing.ErrObjectNotFound {
446		return obj, err
447	}
448
449	return nil, plumbing.ErrObjectNotFound
450}
451
452// SetIndex sets an index for the packfile. It is recommended to set this.
453// The index might be read from a file or reused from a previous Decoder usage
454// (see Index function).
455func (d *Decoder) SetIndex(idx *Index) {
456	d.hasBuiltIndex = true
457	d.idx = idx
458}
459
460// Index returns the index for the packfile. If index was set with SetIndex,
461// Index will return it. Otherwise, it will return an index that is built while
462// decoding. If neither SetIndex was called with a full index or Decode called
463// for the whole packfile, then the returned index will be incomplete.
464func (d *Decoder) Index() *Index {
465	return d.idx
466}
467
468// Close closes the Scanner. usually this mean that the whole reader is read and
469// discarded
470func (d *Decoder) Close() error {
471	return d.s.Close()
472}
473