1package packfile
2
3import (
4	"bufio"
5	"bytes"
6	"compress/zlib"
7	"fmt"
8	"hash"
9	"hash/crc32"
10	"io"
11	stdioutil "io/ioutil"
12	"sync"
13
14	"github.com/go-git/go-git/v5/plumbing"
15	"github.com/go-git/go-git/v5/utils/binary"
16	"github.com/go-git/go-git/v5/utils/ioutil"
17)
18
19var (
20	// ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile
21	ErrEmptyPackfile = NewError("empty packfile")
22	// ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect.
23	ErrBadSignature = NewError("malformed pack file signature")
24	// ErrUnsupportedVersion is returned by ReadHeader when the packfile version is
25	// different than VersionSupported.
26	ErrUnsupportedVersion = NewError("unsupported packfile version")
27	// ErrSeekNotSupported returned if seek is not support
28	ErrSeekNotSupported = NewError("not seek support")
29)
30
31// ObjectHeader contains the information related to the object, this information
32// is collected from the previous bytes to the content of the object.
33type ObjectHeader struct {
34	Type            plumbing.ObjectType
35	Offset          int64
36	Length          int64
37	Reference       plumbing.Hash
38	OffsetReference int64
39}
40
41type Scanner struct {
42	r   *scannerReader
43	crc hash.Hash32
44
45	// pendingObject is used to detect if an object has been read, or still
46	// is waiting to be read
47	pendingObject    *ObjectHeader
48	version, objects uint32
49
50	// lsSeekable says if this scanner can do Seek or not, to have a Scanner
51	// seekable a r implementing io.Seeker is required
52	IsSeekable bool
53}
54
55// NewScanner returns a new Scanner based on a reader, if the given reader
56// implements io.ReadSeeker the Scanner will be also Seekable
57func NewScanner(r io.Reader) *Scanner {
58	_, ok := r.(io.ReadSeeker)
59
60	crc := crc32.NewIEEE()
61	return &Scanner{
62		r:          newScannerReader(r, crc),
63		crc:        crc,
64		IsSeekable: ok,
65	}
66}
67
68func (s *Scanner) Reset(r io.Reader) {
69	_, ok := r.(io.ReadSeeker)
70
71	s.r.Reset(r)
72	s.crc.Reset()
73	s.IsSeekable = ok
74	s.pendingObject = nil
75	s.version = 0
76	s.objects = 0
77}
78
79// Header reads the whole packfile header (signature, version and object count).
80// It returns the version and the object count and performs checks on the
81// validity of the signature and the version fields.
82func (s *Scanner) Header() (version, objects uint32, err error) {
83	if s.version != 0 {
84		return s.version, s.objects, nil
85	}
86
87	sig, err := s.readSignature()
88	if err != nil {
89		if err == io.EOF {
90			err = ErrEmptyPackfile
91		}
92
93		return
94	}
95
96	if !s.isValidSignature(sig) {
97		err = ErrBadSignature
98		return
99	}
100
101	version, err = s.readVersion()
102	s.version = version
103	if err != nil {
104		return
105	}
106
107	if !s.isSupportedVersion(version) {
108		err = ErrUnsupportedVersion.AddDetails("%d", version)
109		return
110	}
111
112	objects, err = s.readCount()
113	s.objects = objects
114	return
115}
116
117// readSignature reads an returns the signature field in the packfile.
118func (s *Scanner) readSignature() ([]byte, error) {
119	var sig = make([]byte, 4)
120	if _, err := io.ReadFull(s.r, sig); err != nil {
121		return []byte{}, err
122	}
123
124	return sig, nil
125}
126
127// isValidSignature returns if sig is a valid packfile signature.
128func (s *Scanner) isValidSignature(sig []byte) bool {
129	return bytes.Equal(sig, signature)
130}
131
132// readVersion reads and returns the version field of a packfile.
133func (s *Scanner) readVersion() (uint32, error) {
134	return binary.ReadUint32(s.r)
135}
136
137// isSupportedVersion returns whether version v is supported by the parser.
138// The current supported version is VersionSupported, defined above.
139func (s *Scanner) isSupportedVersion(v uint32) bool {
140	return v == VersionSupported
141}
142
143// readCount reads and returns the count of objects field of a packfile.
144func (s *Scanner) readCount() (uint32, error) {
145	return binary.ReadUint32(s.r)
146}
147
148// SeekObjectHeader seeks to specified offset and returns the ObjectHeader
149// for the next object in the reader
150func (s *Scanner) SeekObjectHeader(offset int64) (*ObjectHeader, error) {
151	// if seeking we assume that you are not interested in the header
152	if s.version == 0 {
153		s.version = VersionSupported
154	}
155
156	if _, err := s.r.Seek(offset, io.SeekStart); err != nil {
157		return nil, err
158	}
159
160	h, err := s.nextObjectHeader()
161	if err != nil {
162		return nil, err
163	}
164
165	h.Offset = offset
166	return h, nil
167}
168
169// NextObjectHeader returns the ObjectHeader for the next object in the reader
170func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) {
171	if err := s.doPending(); err != nil {
172		return nil, err
173	}
174
175	offset, err := s.r.Seek(0, io.SeekCurrent)
176	if err != nil {
177		return nil, err
178	}
179
180	h, err := s.nextObjectHeader()
181	if err != nil {
182		return nil, err
183	}
184
185	h.Offset = offset
186	return h, nil
187}
188
189// nextObjectHeader returns the ObjectHeader for the next object in the reader
190// without the Offset field
191func (s *Scanner) nextObjectHeader() (*ObjectHeader, error) {
192	s.r.Flush()
193	s.crc.Reset()
194
195	h := &ObjectHeader{}
196	s.pendingObject = h
197
198	var err error
199	h.Offset, err = s.r.Seek(0, io.SeekCurrent)
200	if err != nil {
201		return nil, err
202	}
203
204	h.Type, h.Length, err = s.readObjectTypeAndLength()
205	if err != nil {
206		return nil, err
207	}
208
209	switch h.Type {
210	case plumbing.OFSDeltaObject:
211		no, err := binary.ReadVariableWidthInt(s.r)
212		if err != nil {
213			return nil, err
214		}
215
216		h.OffsetReference = h.Offset - no
217	case plumbing.REFDeltaObject:
218		var err error
219		h.Reference, err = binary.ReadHash(s.r)
220		if err != nil {
221			return nil, err
222		}
223	}
224
225	return h, nil
226}
227
228func (s *Scanner) doPending() error {
229	if s.version == 0 {
230		var err error
231		s.version, s.objects, err = s.Header()
232		if err != nil {
233			return err
234		}
235	}
236
237	return s.discardObjectIfNeeded()
238}
239
240func (s *Scanner) discardObjectIfNeeded() error {
241	if s.pendingObject == nil {
242		return nil
243	}
244
245	h := s.pendingObject
246	n, _, err := s.NextObject(stdioutil.Discard)
247	if err != nil {
248		return err
249	}
250
251	if n != h.Length {
252		return fmt.Errorf(
253			"error discarding object, discarded %d, expected %d",
254			n, h.Length,
255		)
256	}
257
258	return nil
259}
260
261// ReadObjectTypeAndLength reads and returns the object type and the
262// length field from an object entry in a packfile.
263func (s *Scanner) readObjectTypeAndLength() (plumbing.ObjectType, int64, error) {
264	t, c, err := s.readType()
265	if err != nil {
266		return t, 0, err
267	}
268
269	l, err := s.readLength(c)
270
271	return t, l, err
272}
273
274func (s *Scanner) readType() (plumbing.ObjectType, byte, error) {
275	var c byte
276	var err error
277	if c, err = s.r.ReadByte(); err != nil {
278		return plumbing.ObjectType(0), 0, err
279	}
280
281	typ := parseType(c)
282
283	return typ, c, nil
284}
285
286func parseType(b byte) plumbing.ObjectType {
287	return plumbing.ObjectType((b & maskType) >> firstLengthBits)
288}
289
290// the length is codified in the last 4 bits of the first byte and in
291// the last 7 bits of subsequent bytes.  Last byte has a 0 MSB.
292func (s *Scanner) readLength(first byte) (int64, error) {
293	length := int64(first & maskFirstLength)
294
295	c := first
296	shift := firstLengthBits
297	var err error
298	for c&maskContinue > 0 {
299		if c, err = s.r.ReadByte(); err != nil {
300			return 0, err
301		}
302
303		length += int64(c&maskLength) << shift
304		shift += lengthBits
305	}
306
307	return length, nil
308}
309
310// NextObject writes the content of the next object into the reader, returns
311// the number of bytes written, the CRC32 of the content and an error, if any
312func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err error) {
313	s.pendingObject = nil
314	written, err = s.copyObject(w)
315
316	s.r.Flush()
317	crc32 = s.crc.Sum32()
318	s.crc.Reset()
319
320	return
321}
322
323// ReadObject returns a reader for the object content and an error
324func (s *Scanner) ReadObject() (io.ReadCloser, error) {
325	s.pendingObject = nil
326	zr := zlibReaderPool.Get().(io.ReadCloser)
327
328	if err := zr.(zlib.Resetter).Reset(s.r, nil); err != nil {
329		return nil, fmt.Errorf("zlib reset error: %s", err)
330	}
331
332	return ioutil.NewReadCloserWithCloser(zr, func() error {
333		zlibReaderPool.Put(zr)
334		return nil
335	}), nil
336}
337
338// ReadRegularObject reads and write a non-deltified object
339// from it zlib stream in an object entry in the packfile.
340func (s *Scanner) copyObject(w io.Writer) (n int64, err error) {
341	zr := zlibReaderPool.Get().(io.ReadCloser)
342	defer zlibReaderPool.Put(zr)
343
344	if err = zr.(zlib.Resetter).Reset(s.r, nil); err != nil {
345		return 0, fmt.Errorf("zlib reset error: %s", err)
346	}
347
348	defer ioutil.CheckClose(zr, &err)
349	buf := byteSlicePool.Get().([]byte)
350	n, err = io.CopyBuffer(w, zr, buf)
351	byteSlicePool.Put(buf)
352	return
353}
354
355var byteSlicePool = sync.Pool{
356	New: func() interface{} {
357		return make([]byte, 32*1024)
358	},
359}
360
361// SeekFromStart sets a new offset from start, returns the old position before
362// the change.
363func (s *Scanner) SeekFromStart(offset int64) (previous int64, err error) {
364	// if seeking we assume that you are not interested in the header
365	if s.version == 0 {
366		s.version = VersionSupported
367	}
368
369	previous, err = s.r.Seek(0, io.SeekCurrent)
370	if err != nil {
371		return -1, err
372	}
373
374	_, err = s.r.Seek(offset, io.SeekStart)
375	return previous, err
376}
377
378// Checksum returns the checksum of the packfile
379func (s *Scanner) Checksum() (plumbing.Hash, error) {
380	err := s.discardObjectIfNeeded()
381	if err != nil {
382		return plumbing.ZeroHash, err
383	}
384
385	return binary.ReadHash(s.r)
386}
387
388// Close reads the reader until io.EOF
389func (s *Scanner) Close() error {
390	buf := byteSlicePool.Get().([]byte)
391	_, err := io.CopyBuffer(stdioutil.Discard, s.r, buf)
392	byteSlicePool.Put(buf)
393	return err
394}
395
396// Flush is a no-op (deprecated)
397func (s *Scanner) Flush() error {
398	return nil
399}
400
401// scannerReader has the following characteristics:
402// - Provides an io.SeekReader impl for bufio.Reader, when the underlying
403//   reader supports it.
404// - Keeps track of the current read position, for when the underlying reader
405//   isn't an io.SeekReader, but we still want to know the current offset.
406// - Writes to the hash writer what it reads, with the aid of a smaller buffer.
407//   The buffer helps avoid a performance penality for performing small writes
408//   to the crc32 hash writer.
409type scannerReader struct {
410	reader io.Reader
411	crc    io.Writer
412	rbuf   *bufio.Reader
413	wbuf   *bufio.Writer
414	offset int64
415}
416
417func newScannerReader(r io.Reader, h io.Writer) *scannerReader {
418	sr := &scannerReader{
419		rbuf: bufio.NewReader(nil),
420		wbuf: bufio.NewWriterSize(nil, 64),
421		crc:  h,
422	}
423	sr.Reset(r)
424
425	return sr
426}
427
428func (r *scannerReader) Reset(reader io.Reader) {
429	r.reader = reader
430	r.rbuf.Reset(r.reader)
431	r.wbuf.Reset(r.crc)
432
433	r.offset = 0
434	if seeker, ok := r.reader.(io.ReadSeeker); ok {
435		r.offset, _ = seeker.Seek(0, io.SeekCurrent)
436	}
437}
438
439func (r *scannerReader) Read(p []byte) (n int, err error) {
440	n, err = r.rbuf.Read(p)
441
442	r.offset += int64(n)
443	if _, err := r.wbuf.Write(p[:n]); err != nil {
444		return n, err
445	}
446	return
447}
448
449func (r *scannerReader) ReadByte() (b byte, err error) {
450	b, err = r.rbuf.ReadByte()
451	if err == nil {
452		r.offset++
453		return b, r.wbuf.WriteByte(b)
454	}
455	return
456}
457
458func (r *scannerReader) Flush() error {
459	return r.wbuf.Flush()
460}
461
462// Seek seeks to a location. If the underlying reader is not an io.ReadSeeker,
463// then only whence=io.SeekCurrent is supported, any other operation fails.
464func (r *scannerReader) Seek(offset int64, whence int) (int64, error) {
465	var err error
466
467	if seeker, ok := r.reader.(io.ReadSeeker); !ok {
468		if whence != io.SeekCurrent || offset != 0 {
469			return -1, ErrSeekNotSupported
470		}
471	} else {
472		if whence == io.SeekCurrent && offset == 0 {
473			return r.offset, nil
474		}
475
476		r.offset, err = seeker.Seek(offset, whence)
477		r.rbuf.Reset(r.reader)
478	}
479
480	return r.offset, err
481}
482