1package packfile 2 3import ( 4 "bufio" 5 "bytes" 6 "encoding/binary" 7 "encoding/hex" 8 "fmt" 9 "io" 10 "math" 11 "math/big" 12 "os" 13 14 "gitlab.com/gitlab-org/gitaly/v14/internal/git/gitio" 15) 16 17// IndexBitmap is the in-memory representation of a .bitmap file. 18type IndexBitmap struct { 19 Commits *Bitmap 20 Trees *Bitmap 21 Blobs *Bitmap 22 Tags *Bitmap 23 bitmapCommits []*BitmapCommit 24 flags int 25} 26 27// BitmapCommit represents a bitmapped commit, i.e. a commit in the 28// packfile plus a bitmap indicating which objects are reachable from 29// that commit. 30type BitmapCommit struct { 31 OID string 32 *Bitmap 33 xorOffset byte 34 flags byte 35} 36 37// LoadBitmap opens the .bitmap file corresponding to idx and loads it 38// into memory. Returns an error if there is no .bitmap. 39func (idx *Index) LoadBitmap() error { 40 if idx.IndexBitmap != nil { 41 return nil 42 } 43 44 f, err := os.Open(idx.packBase + ".bitmap") 45 if err != nil { 46 return err 47 } 48 defer f.Close() 49 50 r := bufio.NewReader(gitio.NewHashfileReader(f)) 51 52 ib := &IndexBitmap{} 53 if err := ib.parseIndexBitmapHeader(r, idx); err != nil { 54 return err 55 } 56 57 for _, ptr := range []**Bitmap{&ib.Commits, &ib.Trees, &ib.Blobs, &ib.Tags} { 58 *ptr, err = ReadEWAH(r) 59 if err != nil { 60 return err 61 } 62 63 if err := (*ptr).Unpack(); err != nil { 64 return err 65 } 66 } 67 68 for i := range ib.bitmapCommits { 69 header, err := readN(r, 6) 70 if err != nil { 71 return err 72 } 73 74 bc := &BitmapCommit{ 75 OID: idx.Objects[binary.BigEndian.Uint32(header[:4])].OID, 76 xorOffset: header[4], 77 flags: header[5], 78 } 79 80 if bc.Bitmap, err = ReadEWAH(r); err != nil { 81 return err 82 } 83 84 ib.bitmapCommits[i] = bc 85 } 86 87 if ib.flags&bitmapOptHashCache > 0 { 88 // Discard bitmap hash cache 89 for range idx.Objects { 90 if _, err := r.Discard(4); err != nil { 91 return err 92 } 93 } 94 } 95 96 if _, err := r.Peek(1); err != io.EOF { 97 return fmt.Errorf("expected EOF, got %v", err) 98 } 99 100 idx.IndexBitmap = ib 101 return nil 102} 103 104const ( 105 bitmapOptFullDAG = 1 // BITMAP_OPT_FULL_DAG 106 bitmapOptHashCache = 4 // BITMAP_OPT_HASH_CACHE 107) 108 109func (ib *IndexBitmap) parseIndexBitmapHeader(r io.Reader, idx *Index) error { 110 const headerLen = 32 111 header, err := readN(r, headerLen) 112 if err != nil { 113 return err 114 } 115 116 const sig = "BITM\x00\x01" 117 if actualSig := string(header[:len(sig)]); actualSig != sig { 118 return fmt.Errorf("unexpected signature %q", actualSig) 119 } 120 header = header[len(sig):] 121 122 const flagLen = 2 123 ib.flags = int(binary.BigEndian.Uint16(header[:flagLen])) 124 header = header[flagLen:] 125 126 const knownFlags = bitmapOptFullDAG | bitmapOptHashCache 127 if ib.flags&^knownFlags != 0 || (ib.flags&bitmapOptFullDAG == 0) { 128 return fmt.Errorf("invalid flags %x", ib.flags) 129 } 130 131 const countLen = 4 132 count := binary.BigEndian.Uint32(header[:countLen]) 133 header = header[countLen:] 134 ib.bitmapCommits = make([]*BitmapCommit, count) 135 136 if s := hex.EncodeToString(header); s != idx.ID { 137 return fmt.Errorf("unexpected pack ID in bitmap header: %s", s) 138 } 139 140 return nil 141} 142 143// NumBitmapCommits returns the number of indexed commits in the .bitmap file. 144func (ib *IndexBitmap) NumBitmapCommits() int { return len(ib.bitmapCommits) } 145 146// BitmapCommit retrieves a bitmap commit, along with its bitmap. If the 147// bitmap is XOR-compressed this will decompress it. 148func (ib *IndexBitmap) BitmapCommit(i int) (*BitmapCommit, error) { 149 if i >= ib.NumBitmapCommits() { 150 return nil, fmt.Errorf("bitmap commit index %d out of range", i) 151 } 152 153 // This is wasteful but correct: bitmap commit i may depend, via XOR, on 154 // a chain of preceding commits j_0,..., j_m < i. Instead of finding that 155 // chain we just build and XOR all commits up to and including i. 156 for j, bc := range ib.bitmapCommits[:i+1] { 157 if bc.Bitmap.bm != nil { 158 continue 159 } 160 161 if err := bc.Bitmap.Unpack(); err != nil { 162 return nil, err 163 } 164 165 if k := int(bc.xorOffset); k > 0 { 166 bm := bc.Bitmap.bm 167 bm.Xor(bm, ib.bitmapCommits[j-k].Bitmap.bm) 168 } 169 } 170 171 return ib.bitmapCommits[i], nil 172} 173 174// Bitmap represents a bitmap as used in a packfile .bitmap file. 175type Bitmap struct { 176 bits int 177 words int 178 raw []byte 179 bm *big.Int 180} 181 182// ReadEWAH parses an EWAH-compressed bitmap into a *Bitmap. 183func ReadEWAH(r io.Reader) (*Bitmap, error) { 184 header := make([]byte, 8) 185 if _, err := io.ReadFull(r, header); err != nil { 186 return nil, err 187 } 188 189 e := &Bitmap{} 190 191 uBits := binary.BigEndian.Uint32(header[:4]) 192 if uBits > math.MaxInt32 { 193 return nil, fmt.Errorf("too many bits in bitmap: %d", uBits) 194 } 195 e.bits = int(uBits) 196 197 uWords := binary.BigEndian.Uint32(header[4:]) 198 if uWords > math.MaxInt32 { 199 return nil, fmt.Errorf("too many words in bitmap: %d", uWords) 200 } 201 e.words = int(uWords) 202 203 const ewahTrailerLen = 4 204 rawSize := int64(e.words)*8 + ewahTrailerLen 205 if rawSize > math.MaxInt32 { 206 return nil, fmt.Errorf("bitmap does not fit in Go slice") 207 } 208 209 e.raw = make([]byte, int(rawSize)) 210 211 if _, err := io.ReadFull(r, e.raw); err != nil { 212 return nil, err 213 } 214 215 return e, nil 216} 217 218// Unpack expands e.raw, which is EWAH-compressed, into an uncompressed *big.Int. 219func (e *Bitmap) Unpack() error { 220 if e.bm != nil { 221 return nil 222 } 223 224 const ( 225 wordSize = 8 226 wordBits = 8 * wordSize 227 ) 228 229 nUnpackedWords := e.bits / wordBits 230 if e.bits%wordBits > 0 { 231 nUnpackedWords++ 232 } 233 234 buf := make([]byte, nUnpackedWords*wordSize) 235 bufPos := len(buf) 236 237 fillOnes := bytes.Repeat([]byte{0xff}, wordSize) 238 239 for i := 0; i < e.words; { 240 header := binary.BigEndian.Uint64(e.raw[wordSize*i : wordSize*(i+1)]) 241 i++ 242 243 cleanBit := int(header & 1) 244 nClean := uint32(header >> 1) 245 nDirty := uint32(header >> 33) 246 247 for ; nClean > 0; nClean-- { 248 // If cleanBit == 0 we don't have to do anything, because each byte in 249 // buf is initially zero. 250 if cleanBit == 1 { 251 copy( 252 buf[bufPos-wordSize:bufPos], 253 fillOnes, 254 ) 255 } 256 257 bufPos -= wordSize 258 } 259 260 for ; nDirty > 0; nDirty-- { 261 copy( 262 buf[bufPos-wordSize:bufPos], 263 e.raw[wordSize*i:wordSize*(i+1)], 264 ) 265 bufPos -= wordSize 266 i++ 267 } 268 } 269 270 e.bm = big.NewInt(0) 271 e.bm.SetBytes(buf) 272 273 return nil 274} 275 276// Scan traverses the bitmap and calls f for each bit which is 1. 277func (e *Bitmap) Scan(f func(int) error) error { 278 for i := 0; i < e.bits; i++ { 279 if e.bm.Bit(i) == 1 { 280 if err := f(i); err != nil { 281 return err 282 } 283 } 284 } 285 286 return nil 287} 288