1package idxfile
2
3import (
4	"bytes"
5	"io"
6	"sort"
7
8	encbin "encoding/binary"
9
10	"github.com/go-git/go-git/v5/plumbing"
11)
12
13const (
14	// VersionSupported is the only idx version supported.
15	VersionSupported = 2
16
17	noMapping = -1
18)
19
20var (
21	idxHeader = []byte{255, 't', 'O', 'c'}
22)
23
24// Index represents an index of a packfile.
25type Index interface {
26	// Contains checks whether the given hash is in the index.
27	Contains(h plumbing.Hash) (bool, error)
28	// FindOffset finds the offset in the packfile for the object with
29	// the given hash.
30	FindOffset(h plumbing.Hash) (int64, error)
31	// FindCRC32 finds the CRC32 of the object with the given hash.
32	FindCRC32(h plumbing.Hash) (uint32, error)
33	// FindHash finds the hash for the object with the given offset.
34	FindHash(o int64) (plumbing.Hash, error)
35	// Count returns the number of entries in the index.
36	Count() (int64, error)
37	// Entries returns an iterator to retrieve all index entries.
38	Entries() (EntryIter, error)
39	// EntriesByOffset returns an iterator to retrieve all index entries ordered
40	// by offset.
41	EntriesByOffset() (EntryIter, error)
42}
43
44// MemoryIndex is the in memory representation of an idx file.
45type MemoryIndex struct {
46	Version uint32
47	Fanout  [256]uint32
48	// FanoutMapping maps the position in the fanout table to the position
49	// in the Names, Offset32 and CRC32 slices. This improves the memory
50	// usage by not needing an array with unnecessary empty slots.
51	FanoutMapping    [256]int
52	Names            [][]byte
53	Offset32         [][]byte
54	CRC32            [][]byte
55	Offset64         []byte
56	PackfileChecksum [20]byte
57	IdxChecksum      [20]byte
58
59	offsetHash       map[int64]plumbing.Hash
60	offsetHashIsFull bool
61}
62
63var _ Index = (*MemoryIndex)(nil)
64
65// NewMemoryIndex returns an instance of a new MemoryIndex.
66func NewMemoryIndex() *MemoryIndex {
67	return &MemoryIndex{}
68}
69
70func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) (int, bool) {
71	k := idx.FanoutMapping[h[0]]
72	if k == noMapping {
73		return 0, false
74	}
75
76	if len(idx.Names) <= k {
77		return 0, false
78	}
79
80	data := idx.Names[k]
81	high := uint64(len(idx.Offset32[k])) >> 2
82	if high == 0 {
83		return 0, false
84	}
85
86	low := uint64(0)
87	for {
88		mid := (low + high) >> 1
89		offset := mid * objectIDLength
90
91		cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength])
92		if cmp < 0 {
93			high = mid
94		} else if cmp == 0 {
95			return int(mid), true
96		} else {
97			low = mid + 1
98		}
99
100		if low >= high {
101			break
102		}
103	}
104
105	return 0, false
106}
107
108// Contains implements the Index interface.
109func (idx *MemoryIndex) Contains(h plumbing.Hash) (bool, error) {
110	_, ok := idx.findHashIndex(h)
111	return ok, nil
112}
113
114// FindOffset implements the Index interface.
115func (idx *MemoryIndex) FindOffset(h plumbing.Hash) (int64, error) {
116	if len(idx.FanoutMapping) <= int(h[0]) {
117		return 0, plumbing.ErrObjectNotFound
118	}
119
120	k := idx.FanoutMapping[h[0]]
121	i, ok := idx.findHashIndex(h)
122	if !ok {
123		return 0, plumbing.ErrObjectNotFound
124	}
125
126	offset := idx.getOffset(k, i)
127
128	if !idx.offsetHashIsFull {
129		// Save the offset for reverse lookup
130		if idx.offsetHash == nil {
131			idx.offsetHash = make(map[int64]plumbing.Hash)
132		}
133		idx.offsetHash[int64(offset)] = h
134	}
135
136	return int64(offset), nil
137}
138
139const isO64Mask = uint64(1) << 31
140
141func (idx *MemoryIndex) getOffset(firstLevel, secondLevel int) uint64 {
142	offset := secondLevel << 2
143	ofs := encbin.BigEndian.Uint32(idx.Offset32[firstLevel][offset : offset+4])
144
145	if (uint64(ofs) & isO64Mask) != 0 {
146		offset := 8 * (uint64(ofs) & ^isO64Mask)
147		n := encbin.BigEndian.Uint64(idx.Offset64[offset : offset+8])
148		return n
149	}
150
151	return uint64(ofs)
152}
153
154// FindCRC32 implements the Index interface.
155func (idx *MemoryIndex) FindCRC32(h plumbing.Hash) (uint32, error) {
156	k := idx.FanoutMapping[h[0]]
157	i, ok := idx.findHashIndex(h)
158	if !ok {
159		return 0, plumbing.ErrObjectNotFound
160	}
161
162	return idx.getCRC32(k, i), nil
163}
164
165func (idx *MemoryIndex) getCRC32(firstLevel, secondLevel int) uint32 {
166	offset := secondLevel << 2
167	return encbin.BigEndian.Uint32(idx.CRC32[firstLevel][offset : offset+4])
168}
169
170// FindHash implements the Index interface.
171func (idx *MemoryIndex) FindHash(o int64) (plumbing.Hash, error) {
172	var hash plumbing.Hash
173	var ok bool
174
175	if idx.offsetHash != nil {
176		if hash, ok = idx.offsetHash[o]; ok {
177			return hash, nil
178		}
179	}
180
181	// Lazily generate the reverse offset/hash map if required.
182	if !idx.offsetHashIsFull || idx.offsetHash == nil {
183		if err := idx.genOffsetHash(); err != nil {
184			return plumbing.ZeroHash, err
185		}
186
187		hash, ok = idx.offsetHash[o]
188	}
189
190	if !ok {
191		return plumbing.ZeroHash, plumbing.ErrObjectNotFound
192	}
193
194	return hash, nil
195}
196
197// genOffsetHash generates the offset/hash mapping for reverse search.
198func (idx *MemoryIndex) genOffsetHash() error {
199	count, err := idx.Count()
200	if err != nil {
201		return err
202	}
203
204	idx.offsetHash = make(map[int64]plumbing.Hash, count)
205	idx.offsetHashIsFull = true
206
207	var hash plumbing.Hash
208	i := uint32(0)
209	for firstLevel, fanoutValue := range idx.Fanout {
210		mappedFirstLevel := idx.FanoutMapping[firstLevel]
211		for secondLevel := uint32(0); i < fanoutValue; i++ {
212			copy(hash[:], idx.Names[mappedFirstLevel][secondLevel*objectIDLength:])
213			offset := int64(idx.getOffset(mappedFirstLevel, int(secondLevel)))
214			idx.offsetHash[offset] = hash
215			secondLevel++
216		}
217	}
218
219	return nil
220}
221
222// Count implements the Index interface.
223func (idx *MemoryIndex) Count() (int64, error) {
224	return int64(idx.Fanout[fanout-1]), nil
225}
226
227// Entries implements the Index interface.
228func (idx *MemoryIndex) Entries() (EntryIter, error) {
229	return &idxfileEntryIter{idx, 0, 0, 0}, nil
230}
231
232// EntriesByOffset implements the Index interface.
233func (idx *MemoryIndex) EntriesByOffset() (EntryIter, error) {
234	count, err := idx.Count()
235	if err != nil {
236		return nil, err
237	}
238
239	iter := &idxfileEntryOffsetIter{
240		entries: make(entriesByOffset, count),
241	}
242
243	entries, err := idx.Entries()
244	if err != nil {
245		return nil, err
246	}
247
248	for pos := 0; int64(pos) < count; pos++ {
249		entry, err := entries.Next()
250		if err != nil {
251			return nil, err
252		}
253
254		iter.entries[pos] = entry
255	}
256
257	sort.Sort(iter.entries)
258
259	return iter, nil
260}
261
262// EntryIter is an iterator that will return the entries in a packfile index.
263type EntryIter interface {
264	// Next returns the next entry in the packfile index.
265	Next() (*Entry, error)
266	// Close closes the iterator.
267	Close() error
268}
269
270type idxfileEntryIter struct {
271	idx                     *MemoryIndex
272	total                   int
273	firstLevel, secondLevel int
274}
275
276func (i *idxfileEntryIter) Next() (*Entry, error) {
277	for {
278		if i.firstLevel >= fanout {
279			return nil, io.EOF
280		}
281
282		if i.total >= int(i.idx.Fanout[i.firstLevel]) {
283			i.firstLevel++
284			i.secondLevel = 0
285			continue
286		}
287
288		mappedFirstLevel := i.idx.FanoutMapping[i.firstLevel]
289		entry := new(Entry)
290		copy(entry.Hash[:], i.idx.Names[mappedFirstLevel][i.secondLevel*objectIDLength:])
291		entry.Offset = i.idx.getOffset(mappedFirstLevel, i.secondLevel)
292		entry.CRC32 = i.idx.getCRC32(mappedFirstLevel, i.secondLevel)
293
294		i.secondLevel++
295		i.total++
296
297		return entry, nil
298	}
299}
300
301func (i *idxfileEntryIter) Close() error {
302	i.firstLevel = fanout
303	return nil
304}
305
306// Entry is the in memory representation of an object entry in the idx file.
307type Entry struct {
308	Hash   plumbing.Hash
309	CRC32  uint32
310	Offset uint64
311}
312
313type idxfileEntryOffsetIter struct {
314	entries entriesByOffset
315	pos     int
316}
317
318func (i *idxfileEntryOffsetIter) Next() (*Entry, error) {
319	if i.pos >= len(i.entries) {
320		return nil, io.EOF
321	}
322
323	entry := i.entries[i.pos]
324	i.pos++
325
326	return entry, nil
327}
328
329func (i *idxfileEntryOffsetIter) Close() error {
330	i.pos = len(i.entries) + 1
331	return nil
332}
333
334type entriesByOffset []*Entry
335
336func (o entriesByOffset) Len() int {
337	return len(o)
338}
339
340func (o entriesByOffset) Less(i int, j int) bool {
341	return o[i].Offset < o[j].Offset
342}
343
344func (o entriesByOffset) Swap(i int, j int) {
345	o[i], o[j] = o[j], o[i]
346}
347