1package packfile
2
3import (
4	"bytes"
5	"errors"
6	"io"
7	stdioutil "io/ioutil"
8
9	"github.com/go-git/go-git/v5/plumbing"
10	"github.com/go-git/go-git/v5/plumbing/cache"
11	"github.com/go-git/go-git/v5/plumbing/storer"
12	"github.com/go-git/go-git/v5/utils/ioutil"
13)
14
15var (
16	// ErrReferenceDeltaNotFound is returned when the reference delta is not
17	// found.
18	ErrReferenceDeltaNotFound = errors.New("reference delta not found")
19
20	// ErrNotSeekableSource is returned when the source for the parser is not
21	// seekable and a storage was not provided, so it can't be parsed.
22	ErrNotSeekableSource = errors.New("parser source is not seekable and storage was not provided")
23
24	// ErrDeltaNotCached is returned when the delta could not be found in cache.
25	ErrDeltaNotCached = errors.New("delta could not be found in cache")
26)
27
28// Observer interface is implemented by index encoders.
29type Observer interface {
30	// OnHeader is called when a new packfile is opened.
31	OnHeader(count uint32) error
32	// OnInflatedObjectHeader is called for each object header read.
33	OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error
34	// OnInflatedObjectContent is called for each decoded object.
35	OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, content []byte) error
36	// OnFooter is called when decoding is done.
37	OnFooter(h plumbing.Hash) error
38}
39
40// Parser decodes a packfile and calls any observer associated to it. Is used
41// to generate indexes.
42type Parser struct {
43	storage    storer.EncodedObjectStorer
44	scanner    *Scanner
45	count      uint32
46	oi         []*objectInfo
47	oiByHash   map[plumbing.Hash]*objectInfo
48	oiByOffset map[int64]*objectInfo
49	hashOffset map[plumbing.Hash]int64
50	checksum   plumbing.Hash
51
52	cache *cache.BufferLRU
53	// delta content by offset, only used if source is not seekable
54	deltas map[int64][]byte
55
56	ob []Observer
57}
58
59// NewParser creates a new Parser. The Scanner source must be seekable.
60// If it's not, NewParserWithStorage should be used instead.
61func NewParser(scanner *Scanner, ob ...Observer) (*Parser, error) {
62	return NewParserWithStorage(scanner, nil, ob...)
63}
64
65// NewParserWithStorage creates a new Parser. The scanner source must either
66// be seekable or a storage must be provided.
67func NewParserWithStorage(
68	scanner *Scanner,
69	storage storer.EncodedObjectStorer,
70	ob ...Observer,
71) (*Parser, error) {
72	if !scanner.IsSeekable && storage == nil {
73		return nil, ErrNotSeekableSource
74	}
75
76	var deltas map[int64][]byte
77	if !scanner.IsSeekable {
78		deltas = make(map[int64][]byte)
79	}
80
81	return &Parser{
82		storage: storage,
83		scanner: scanner,
84		ob:      ob,
85		count:   0,
86		cache:   cache.NewBufferLRUDefault(),
87		deltas:  deltas,
88	}, nil
89}
90
91func (p *Parser) forEachObserver(f func(o Observer) error) error {
92	for _, o := range p.ob {
93		if err := f(o); err != nil {
94			return err
95		}
96	}
97	return nil
98}
99
100func (p *Parser) onHeader(count uint32) error {
101	return p.forEachObserver(func(o Observer) error {
102		return o.OnHeader(count)
103	})
104}
105
106func (p *Parser) onInflatedObjectHeader(
107	t plumbing.ObjectType,
108	objSize int64,
109	pos int64,
110) error {
111	return p.forEachObserver(func(o Observer) error {
112		return o.OnInflatedObjectHeader(t, objSize, pos)
113	})
114}
115
116func (p *Parser) onInflatedObjectContent(
117	h plumbing.Hash,
118	pos int64,
119	crc uint32,
120	content []byte,
121) error {
122	return p.forEachObserver(func(o Observer) error {
123		return o.OnInflatedObjectContent(h, pos, crc, content)
124	})
125}
126
127func (p *Parser) onFooter(h plumbing.Hash) error {
128	return p.forEachObserver(func(o Observer) error {
129		return o.OnFooter(h)
130	})
131}
132
133// Parse start decoding phase of the packfile.
134func (p *Parser) Parse() (plumbing.Hash, error) {
135	if err := p.init(); err != nil {
136		return plumbing.ZeroHash, err
137	}
138
139	if err := p.indexObjects(); err != nil {
140		return plumbing.ZeroHash, err
141	}
142
143	var err error
144	p.checksum, err = p.scanner.Checksum()
145	if err != nil && err != io.EOF {
146		return plumbing.ZeroHash, err
147	}
148
149	if err := p.resolveDeltas(); err != nil {
150		return plumbing.ZeroHash, err
151	}
152
153	if err := p.onFooter(p.checksum); err != nil {
154		return plumbing.ZeroHash, err
155	}
156
157	return p.checksum, nil
158}
159
160func (p *Parser) init() error {
161	_, c, err := p.scanner.Header()
162	if err != nil {
163		return err
164	}
165
166	if err := p.onHeader(c); err != nil {
167		return err
168	}
169
170	p.count = c
171	p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count)
172	p.oiByOffset = make(map[int64]*objectInfo, p.count)
173	p.oi = make([]*objectInfo, p.count)
174
175	return nil
176}
177
178func (p *Parser) indexObjects() error {
179	buf := new(bytes.Buffer)
180
181	for i := uint32(0); i < p.count; i++ {
182		buf.Reset()
183
184		oh, err := p.scanner.NextObjectHeader()
185		if err != nil {
186			return err
187		}
188
189		delta := false
190		var ota *objectInfo
191		switch t := oh.Type; t {
192		case plumbing.OFSDeltaObject:
193			delta = true
194
195			parent, ok := p.oiByOffset[oh.OffsetReference]
196			if !ok {
197				return plumbing.ErrObjectNotFound
198			}
199
200			ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
201			parent.Children = append(parent.Children, ota)
202		case plumbing.REFDeltaObject:
203			delta = true
204			parent, ok := p.oiByHash[oh.Reference]
205			if !ok {
206				// can't find referenced object in this pack file
207				// this must be a "thin" pack.
208				parent = &objectInfo{ //Placeholder parent
209					SHA1:        oh.Reference,
210					ExternalRef: true, // mark as an external reference that must be resolved
211					Type:        plumbing.AnyObject,
212					DiskType:    plumbing.AnyObject,
213				}
214				p.oiByHash[oh.Reference] = parent
215			}
216			ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
217			parent.Children = append(parent.Children, ota)
218
219		default:
220			ota = newBaseObject(oh.Offset, oh.Length, t)
221		}
222
223		_, crc, err := p.scanner.NextObject(buf)
224		if err != nil {
225			return err
226		}
227
228		ota.Crc32 = crc
229		ota.Length = oh.Length
230
231		data := buf.Bytes()
232		if !delta {
233			sha1, err := getSHA1(ota.Type, data)
234			if err != nil {
235				return err
236			}
237
238			ota.SHA1 = sha1
239			p.oiByHash[ota.SHA1] = ota
240		}
241
242		if p.storage != nil && !delta {
243			obj := new(plumbing.MemoryObject)
244			obj.SetSize(oh.Length)
245			obj.SetType(oh.Type)
246			if _, err := obj.Write(data); err != nil {
247				return err
248			}
249
250			if _, err := p.storage.SetEncodedObject(obj); err != nil {
251				return err
252			}
253		}
254
255		if delta && !p.scanner.IsSeekable {
256			p.deltas[oh.Offset] = make([]byte, len(data))
257			copy(p.deltas[oh.Offset], data)
258		}
259
260		p.oiByOffset[oh.Offset] = ota
261		p.oi[i] = ota
262	}
263
264	return nil
265}
266
267func (p *Parser) resolveDeltas() error {
268	buf := &bytes.Buffer{}
269	for _, obj := range p.oi {
270		buf.Reset()
271		err := p.get(obj, buf)
272		if err != nil {
273			return err
274		}
275		content := buf.Bytes()
276
277		if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil {
278			return err
279		}
280
281		if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content); err != nil {
282			return err
283		}
284
285		if !obj.IsDelta() && len(obj.Children) > 0 {
286			for _, child := range obj.Children {
287				if err := p.resolveObject(stdioutil.Discard, child, content); err != nil {
288					return err
289				}
290			}
291
292			// Remove the delta from the cache.
293			if obj.DiskType.IsDelta() && !p.scanner.IsSeekable {
294				delete(p.deltas, obj.Offset)
295			}
296		}
297	}
298
299	return nil
300}
301
302func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) {
303	if !o.ExternalRef { // skip cache check for placeholder parents
304		b, ok := p.cache.Get(o.Offset)
305		if ok {
306			_, err := buf.Write(b)
307			return err
308		}
309	}
310
311	// If it's not on the cache and is not a delta we can try to find it in the
312	// storage, if there's one. External refs must enter here.
313	if p.storage != nil && !o.Type.IsDelta() {
314		var e plumbing.EncodedObject
315		e, err = p.storage.EncodedObject(plumbing.AnyObject, o.SHA1)
316		if err != nil {
317			return err
318		}
319		o.Type = e.Type()
320
321		var r io.ReadCloser
322		r, err = e.Reader()
323		if err != nil {
324			return err
325		}
326
327		defer ioutil.CheckClose(r, &err)
328
329		_, err = buf.ReadFrom(io.LimitReader(r, e.Size()))
330		return err
331	}
332
333	if o.ExternalRef {
334		// we were not able to resolve a ref in a thin pack
335		return ErrReferenceDeltaNotFound
336	}
337
338	if o.DiskType.IsDelta() {
339		b := bufPool.Get().(*bytes.Buffer)
340		defer bufPool.Put(b)
341		b.Reset()
342		err := p.get(o.Parent, b)
343		if err != nil {
344			return err
345		}
346		base := b.Bytes()
347
348		err = p.resolveObject(buf, o, base)
349		if err != nil {
350			return err
351		}
352	} else {
353		err := p.readData(buf, o)
354		if err != nil {
355			return err
356		}
357	}
358
359	if len(o.Children) > 0 {
360		data := make([]byte, buf.Len())
361		copy(data, buf.Bytes())
362		p.cache.Put(o.Offset, data)
363	}
364	return nil
365}
366
367func (p *Parser) resolveObject(
368	w io.Writer,
369	o *objectInfo,
370	base []byte,
371) error {
372	if !o.DiskType.IsDelta() {
373		return nil
374	}
375	buf := bufPool.Get().(*bytes.Buffer)
376	defer bufPool.Put(buf)
377	buf.Reset()
378	err := p.readData(buf, o)
379	if err != nil {
380		return err
381	}
382	data := buf.Bytes()
383
384	data, err = applyPatchBase(o, data, base)
385	if err != nil {
386		return err
387	}
388
389	if p.storage != nil {
390		obj := new(plumbing.MemoryObject)
391		obj.SetSize(o.Size())
392		obj.SetType(o.Type)
393		if _, err := obj.Write(data); err != nil {
394			return err
395		}
396
397		if _, err := p.storage.SetEncodedObject(obj); err != nil {
398			return err
399		}
400	}
401	_, err = w.Write(data)
402	return err
403}
404
405func (p *Parser) readData(w io.Writer, o *objectInfo) error {
406	if !p.scanner.IsSeekable && o.DiskType.IsDelta() {
407		data, ok := p.deltas[o.Offset]
408		if !ok {
409			return ErrDeltaNotCached
410		}
411		_, err := w.Write(data)
412		return err
413	}
414
415	if _, err := p.scanner.SeekObjectHeader(o.Offset); err != nil {
416		return err
417	}
418
419	if _, _, err := p.scanner.NextObject(w); err != nil {
420		return err
421	}
422	return nil
423}
424
425func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) {
426	patched, err := PatchDelta(base, data)
427	if err != nil {
428		return nil, err
429	}
430
431	if ota.SHA1 == plumbing.ZeroHash {
432		ota.Type = ota.Parent.Type
433		sha1, err := getSHA1(ota.Type, patched)
434		if err != nil {
435			return nil, err
436		}
437
438		ota.SHA1 = sha1
439		ota.Length = int64(len(patched))
440	}
441
442	return patched, nil
443}
444
445func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) {
446	hasher := plumbing.NewHasher(t, int64(len(data)))
447	if _, err := hasher.Write(data); err != nil {
448		return plumbing.ZeroHash, err
449	}
450
451	return hasher.Sum(), nil
452}
453
454type objectInfo struct {
455	Offset      int64
456	Length      int64
457	Type        plumbing.ObjectType
458	DiskType    plumbing.ObjectType
459	ExternalRef bool // indicates this is an external reference in a thin pack file
460
461	Crc32 uint32
462
463	Parent   *objectInfo
464	Children []*objectInfo
465	SHA1     plumbing.Hash
466}
467
468func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo {
469	return newDeltaObject(offset, length, t, nil)
470}
471
472func newDeltaObject(
473	offset, length int64,
474	t plumbing.ObjectType,
475	parent *objectInfo,
476) *objectInfo {
477	obj := &objectInfo{
478		Offset:   offset,
479		Length:   length,
480		Type:     t,
481		DiskType: t,
482		Crc32:    0,
483		Parent:   parent,
484	}
485
486	return obj
487}
488
489func (o *objectInfo) IsDelta() bool {
490	return o.Type.IsDelta()
491}
492
493func (o *objectInfo) Size() int64 {
494	return o.Length
495}
496