1package idxfile 2 3import ( 4 "bytes" 5 "io" 6 "sort" 7 8 encbin "encoding/binary" 9 10 "github.com/go-git/go-git/v5/plumbing" 11) 12 13const ( 14 // VersionSupported is the only idx version supported. 15 VersionSupported = 2 16 17 noMapping = -1 18) 19 20var ( 21 idxHeader = []byte{255, 't', 'O', 'c'} 22) 23 24// Index represents an index of a packfile. 25type Index interface { 26 // Contains checks whether the given hash is in the index. 27 Contains(h plumbing.Hash) (bool, error) 28 // FindOffset finds the offset in the packfile for the object with 29 // the given hash. 30 FindOffset(h plumbing.Hash) (int64, error) 31 // FindCRC32 finds the CRC32 of the object with the given hash. 32 FindCRC32(h plumbing.Hash) (uint32, error) 33 // FindHash finds the hash for the object with the given offset. 34 FindHash(o int64) (plumbing.Hash, error) 35 // Count returns the number of entries in the index. 36 Count() (int64, error) 37 // Entries returns an iterator to retrieve all index entries. 38 Entries() (EntryIter, error) 39 // EntriesByOffset returns an iterator to retrieve all index entries ordered 40 // by offset. 41 EntriesByOffset() (EntryIter, error) 42} 43 44// MemoryIndex is the in memory representation of an idx file. 45type MemoryIndex struct { 46 Version uint32 47 Fanout [256]uint32 48 // FanoutMapping maps the position in the fanout table to the position 49 // in the Names, Offset32 and CRC32 slices. This improves the memory 50 // usage by not needing an array with unnecessary empty slots. 51 FanoutMapping [256]int 52 Names [][]byte 53 Offset32 [][]byte 54 CRC32 [][]byte 55 Offset64 []byte 56 PackfileChecksum [20]byte 57 IdxChecksum [20]byte 58 59 offsetHash map[int64]plumbing.Hash 60 offsetHashIsFull bool 61} 62 63var _ Index = (*MemoryIndex)(nil) 64 65// NewMemoryIndex returns an instance of a new MemoryIndex. 66func NewMemoryIndex() *MemoryIndex { 67 return &MemoryIndex{} 68} 69 70func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) (int, bool) { 71 k := idx.FanoutMapping[h[0]] 72 if k == noMapping { 73 return 0, false 74 } 75 76 if len(idx.Names) <= k { 77 return 0, false 78 } 79 80 data := idx.Names[k] 81 high := uint64(len(idx.Offset32[k])) >> 2 82 if high == 0 { 83 return 0, false 84 } 85 86 low := uint64(0) 87 for { 88 mid := (low + high) >> 1 89 offset := mid * objectIDLength 90 91 cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength]) 92 if cmp < 0 { 93 high = mid 94 } else if cmp == 0 { 95 return int(mid), true 96 } else { 97 low = mid + 1 98 } 99 100 if low >= high { 101 break 102 } 103 } 104 105 return 0, false 106} 107 108// Contains implements the Index interface. 109func (idx *MemoryIndex) Contains(h plumbing.Hash) (bool, error) { 110 _, ok := idx.findHashIndex(h) 111 return ok, nil 112} 113 114// FindOffset implements the Index interface. 115func (idx *MemoryIndex) FindOffset(h plumbing.Hash) (int64, error) { 116 if len(idx.FanoutMapping) <= int(h[0]) { 117 return 0, plumbing.ErrObjectNotFound 118 } 119 120 k := idx.FanoutMapping[h[0]] 121 i, ok := idx.findHashIndex(h) 122 if !ok { 123 return 0, plumbing.ErrObjectNotFound 124 } 125 126 offset := idx.getOffset(k, i) 127 128 if !idx.offsetHashIsFull { 129 // Save the offset for reverse lookup 130 if idx.offsetHash == nil { 131 idx.offsetHash = make(map[int64]plumbing.Hash) 132 } 133 idx.offsetHash[int64(offset)] = h 134 } 135 136 return int64(offset), nil 137} 138 139const isO64Mask = uint64(1) << 31 140 141func (idx *MemoryIndex) getOffset(firstLevel, secondLevel int) uint64 { 142 offset := secondLevel << 2 143 ofs := encbin.BigEndian.Uint32(idx.Offset32[firstLevel][offset : offset+4]) 144 145 if (uint64(ofs) & isO64Mask) != 0 { 146 offset := 8 * (uint64(ofs) & ^isO64Mask) 147 n := encbin.BigEndian.Uint64(idx.Offset64[offset : offset+8]) 148 return n 149 } 150 151 return uint64(ofs) 152} 153 154// FindCRC32 implements the Index interface. 155func (idx *MemoryIndex) FindCRC32(h plumbing.Hash) (uint32, error) { 156 k := idx.FanoutMapping[h[0]] 157 i, ok := idx.findHashIndex(h) 158 if !ok { 159 return 0, plumbing.ErrObjectNotFound 160 } 161 162 return idx.getCRC32(k, i), nil 163} 164 165func (idx *MemoryIndex) getCRC32(firstLevel, secondLevel int) uint32 { 166 offset := secondLevel << 2 167 return encbin.BigEndian.Uint32(idx.CRC32[firstLevel][offset : offset+4]) 168} 169 170// FindHash implements the Index interface. 171func (idx *MemoryIndex) FindHash(o int64) (plumbing.Hash, error) { 172 var hash plumbing.Hash 173 var ok bool 174 175 if idx.offsetHash != nil { 176 if hash, ok = idx.offsetHash[o]; ok { 177 return hash, nil 178 } 179 } 180 181 // Lazily generate the reverse offset/hash map if required. 182 if !idx.offsetHashIsFull || idx.offsetHash == nil { 183 if err := idx.genOffsetHash(); err != nil { 184 return plumbing.ZeroHash, err 185 } 186 187 hash, ok = idx.offsetHash[o] 188 } 189 190 if !ok { 191 return plumbing.ZeroHash, plumbing.ErrObjectNotFound 192 } 193 194 return hash, nil 195} 196 197// genOffsetHash generates the offset/hash mapping for reverse search. 198func (idx *MemoryIndex) genOffsetHash() error { 199 count, err := idx.Count() 200 if err != nil { 201 return err 202 } 203 204 idx.offsetHash = make(map[int64]plumbing.Hash, count) 205 idx.offsetHashIsFull = true 206 207 var hash plumbing.Hash 208 i := uint32(0) 209 for firstLevel, fanoutValue := range idx.Fanout { 210 mappedFirstLevel := idx.FanoutMapping[firstLevel] 211 for secondLevel := uint32(0); i < fanoutValue; i++ { 212 copy(hash[:], idx.Names[mappedFirstLevel][secondLevel*objectIDLength:]) 213 offset := int64(idx.getOffset(mappedFirstLevel, int(secondLevel))) 214 idx.offsetHash[offset] = hash 215 secondLevel++ 216 } 217 } 218 219 return nil 220} 221 222// Count implements the Index interface. 223func (idx *MemoryIndex) Count() (int64, error) { 224 return int64(idx.Fanout[fanout-1]), nil 225} 226 227// Entries implements the Index interface. 228func (idx *MemoryIndex) Entries() (EntryIter, error) { 229 return &idxfileEntryIter{idx, 0, 0, 0}, nil 230} 231 232// EntriesByOffset implements the Index interface. 233func (idx *MemoryIndex) EntriesByOffset() (EntryIter, error) { 234 count, err := idx.Count() 235 if err != nil { 236 return nil, err 237 } 238 239 iter := &idxfileEntryOffsetIter{ 240 entries: make(entriesByOffset, count), 241 } 242 243 entries, err := idx.Entries() 244 if err != nil { 245 return nil, err 246 } 247 248 for pos := 0; int64(pos) < count; pos++ { 249 entry, err := entries.Next() 250 if err != nil { 251 return nil, err 252 } 253 254 iter.entries[pos] = entry 255 } 256 257 sort.Sort(iter.entries) 258 259 return iter, nil 260} 261 262// EntryIter is an iterator that will return the entries in a packfile index. 263type EntryIter interface { 264 // Next returns the next entry in the packfile index. 265 Next() (*Entry, error) 266 // Close closes the iterator. 267 Close() error 268} 269 270type idxfileEntryIter struct { 271 idx *MemoryIndex 272 total int 273 firstLevel, secondLevel int 274} 275 276func (i *idxfileEntryIter) Next() (*Entry, error) { 277 for { 278 if i.firstLevel >= fanout { 279 return nil, io.EOF 280 } 281 282 if i.total >= int(i.idx.Fanout[i.firstLevel]) { 283 i.firstLevel++ 284 i.secondLevel = 0 285 continue 286 } 287 288 mappedFirstLevel := i.idx.FanoutMapping[i.firstLevel] 289 entry := new(Entry) 290 copy(entry.Hash[:], i.idx.Names[mappedFirstLevel][i.secondLevel*objectIDLength:]) 291 entry.Offset = i.idx.getOffset(mappedFirstLevel, i.secondLevel) 292 entry.CRC32 = i.idx.getCRC32(mappedFirstLevel, i.secondLevel) 293 294 i.secondLevel++ 295 i.total++ 296 297 return entry, nil 298 } 299} 300 301func (i *idxfileEntryIter) Close() error { 302 i.firstLevel = fanout 303 return nil 304} 305 306// Entry is the in memory representation of an object entry in the idx file. 307type Entry struct { 308 Hash plumbing.Hash 309 CRC32 uint32 310 Offset uint64 311} 312 313type idxfileEntryOffsetIter struct { 314 entries entriesByOffset 315 pos int 316} 317 318func (i *idxfileEntryOffsetIter) Next() (*Entry, error) { 319 if i.pos >= len(i.entries) { 320 return nil, io.EOF 321 } 322 323 entry := i.entries[i.pos] 324 i.pos++ 325 326 return entry, nil 327} 328 329func (i *idxfileEntryOffsetIter) Close() error { 330 i.pos = len(i.entries) + 1 331 return nil 332} 333 334type entriesByOffset []*Entry 335 336func (o entriesByOffset) Len() int { 337 return len(o) 338} 339 340func (o entriesByOffset) Less(i int, j int) bool { 341 return o[i].Offset < o[j].Offset 342} 343 344func (o entriesByOffset) Swap(i int, j int) { 345 o[i], o[j] = o[j], o[i] 346} 347