1package packfile
2
3import (
4	"bufio"
5	"bytes"
6	"encoding/binary"
7	"encoding/hex"
8	"fmt"
9	"io"
10	"math"
11	"math/big"
12	"os"
13
14	"gitlab.com/gitlab-org/gitaly/v14/internal/git/gitio"
15)
16
17// IndexBitmap is the in-memory representation of a .bitmap file.
18type IndexBitmap struct {
19	Commits       *Bitmap
20	Trees         *Bitmap
21	Blobs         *Bitmap
22	Tags          *Bitmap
23	bitmapCommits []*BitmapCommit
24	flags         int
25}
26
27// BitmapCommit represents a bitmapped commit, i.e. a commit in the
28// packfile plus a bitmap indicating which objects are reachable from
29// that commit.
30type BitmapCommit struct {
31	OID string
32	*Bitmap
33	xorOffset byte
34	flags     byte
35}
36
37// LoadBitmap opens the .bitmap file corresponding to idx and loads it
38// into memory. Returns an error if there is no .bitmap.
39func (idx *Index) LoadBitmap() error {
40	if idx.IndexBitmap != nil {
41		return nil
42	}
43
44	f, err := os.Open(idx.packBase + ".bitmap")
45	if err != nil {
46		return err
47	}
48	defer f.Close()
49
50	r := bufio.NewReader(gitio.NewHashfileReader(f))
51
52	ib := &IndexBitmap{}
53	if err := ib.parseIndexBitmapHeader(r, idx); err != nil {
54		return err
55	}
56
57	for _, ptr := range []**Bitmap{&ib.Commits, &ib.Trees, &ib.Blobs, &ib.Tags} {
58		*ptr, err = ReadEWAH(r)
59		if err != nil {
60			return err
61		}
62
63		if err := (*ptr).Unpack(); err != nil {
64			return err
65		}
66	}
67
68	for i := range ib.bitmapCommits {
69		header, err := readN(r, 6)
70		if err != nil {
71			return err
72		}
73
74		bc := &BitmapCommit{
75			OID:       idx.Objects[binary.BigEndian.Uint32(header[:4])].OID,
76			xorOffset: header[4],
77			flags:     header[5],
78		}
79
80		if bc.Bitmap, err = ReadEWAH(r); err != nil {
81			return err
82		}
83
84		ib.bitmapCommits[i] = bc
85	}
86
87	if ib.flags&bitmapOptHashCache > 0 {
88		// Discard bitmap hash cache
89		for range idx.Objects {
90			if _, err := r.Discard(4); err != nil {
91				return err
92			}
93		}
94	}
95
96	if _, err := r.Peek(1); err != io.EOF {
97		return fmt.Errorf("expected EOF, got %v", err)
98	}
99
100	idx.IndexBitmap = ib
101	return nil
102}
103
104const (
105	bitmapOptFullDAG   = 1 // BITMAP_OPT_FULL_DAG
106	bitmapOptHashCache = 4 // BITMAP_OPT_HASH_CACHE
107)
108
109func (ib *IndexBitmap) parseIndexBitmapHeader(r io.Reader, idx *Index) error {
110	const headerLen = 32
111	header, err := readN(r, headerLen)
112	if err != nil {
113		return err
114	}
115
116	const sig = "BITM\x00\x01"
117	if actualSig := string(header[:len(sig)]); actualSig != sig {
118		return fmt.Errorf("unexpected signature %q", actualSig)
119	}
120	header = header[len(sig):]
121
122	const flagLen = 2
123	ib.flags = int(binary.BigEndian.Uint16(header[:flagLen]))
124	header = header[flagLen:]
125
126	const knownFlags = bitmapOptFullDAG | bitmapOptHashCache
127	if ib.flags&^knownFlags != 0 || (ib.flags&bitmapOptFullDAG == 0) {
128		return fmt.Errorf("invalid flags %x", ib.flags)
129	}
130
131	const countLen = 4
132	count := binary.BigEndian.Uint32(header[:countLen])
133	header = header[countLen:]
134	ib.bitmapCommits = make([]*BitmapCommit, count)
135
136	if s := hex.EncodeToString(header); s != idx.ID {
137		return fmt.Errorf("unexpected pack ID in bitmap header: %s", s)
138	}
139
140	return nil
141}
142
143// NumBitmapCommits returns the number of indexed commits in the .bitmap file.
144func (ib *IndexBitmap) NumBitmapCommits() int { return len(ib.bitmapCommits) }
145
146// BitmapCommit retrieves a bitmap commit, along with its bitmap. If the
147// bitmap is XOR-compressed this will decompress it.
148func (ib *IndexBitmap) BitmapCommit(i int) (*BitmapCommit, error) {
149	if i >= ib.NumBitmapCommits() {
150		return nil, fmt.Errorf("bitmap commit index %d out of range", i)
151	}
152
153	// This is wasteful but correct: bitmap commit i may depend, via XOR, on
154	// a chain of preceding commits j_0,..., j_m < i. Instead of finding that
155	// chain we just build and XOR all commits up to and including i.
156	for j, bc := range ib.bitmapCommits[:i+1] {
157		if bc.Bitmap.bm != nil {
158			continue
159		}
160
161		if err := bc.Bitmap.Unpack(); err != nil {
162			return nil, err
163		}
164
165		if k := int(bc.xorOffset); k > 0 {
166			bm := bc.Bitmap.bm
167			bm.Xor(bm, ib.bitmapCommits[j-k].Bitmap.bm)
168		}
169	}
170
171	return ib.bitmapCommits[i], nil
172}
173
174// Bitmap represents a bitmap as used in a packfile .bitmap file.
175type Bitmap struct {
176	bits  int
177	words int
178	raw   []byte
179	bm    *big.Int
180}
181
182// ReadEWAH parses an EWAH-compressed bitmap into a *Bitmap.
183func ReadEWAH(r io.Reader) (*Bitmap, error) {
184	header := make([]byte, 8)
185	if _, err := io.ReadFull(r, header); err != nil {
186		return nil, err
187	}
188
189	e := &Bitmap{}
190
191	uBits := binary.BigEndian.Uint32(header[:4])
192	if uBits > math.MaxInt32 {
193		return nil, fmt.Errorf("too many bits in bitmap: %d", uBits)
194	}
195	e.bits = int(uBits)
196
197	uWords := binary.BigEndian.Uint32(header[4:])
198	if uWords > math.MaxInt32 {
199		return nil, fmt.Errorf("too many words in bitmap: %d", uWords)
200	}
201	e.words = int(uWords)
202
203	const ewahTrailerLen = 4
204	rawSize := int64(e.words)*8 + ewahTrailerLen
205	if rawSize > math.MaxInt32 {
206		return nil, fmt.Errorf("bitmap does not fit in Go slice")
207	}
208
209	e.raw = make([]byte, int(rawSize))
210
211	if _, err := io.ReadFull(r, e.raw); err != nil {
212		return nil, err
213	}
214
215	return e, nil
216}
217
218// Unpack expands e.raw, which is EWAH-compressed, into an uncompressed *big.Int.
219func (e *Bitmap) Unpack() error {
220	if e.bm != nil {
221		return nil
222	}
223
224	const (
225		wordSize = 8
226		wordBits = 8 * wordSize
227	)
228
229	nUnpackedWords := e.bits / wordBits
230	if e.bits%wordBits > 0 {
231		nUnpackedWords++
232	}
233
234	buf := make([]byte, nUnpackedWords*wordSize)
235	bufPos := len(buf)
236
237	fillOnes := bytes.Repeat([]byte{0xff}, wordSize)
238
239	for i := 0; i < e.words; {
240		header := binary.BigEndian.Uint64(e.raw[wordSize*i : wordSize*(i+1)])
241		i++
242
243		cleanBit := int(header & 1)
244		nClean := uint32(header >> 1)
245		nDirty := uint32(header >> 33)
246
247		for ; nClean > 0; nClean-- {
248			// If cleanBit == 0 we don't have to do anything, because each byte in
249			// buf is initially zero.
250			if cleanBit == 1 {
251				copy(
252					buf[bufPos-wordSize:bufPos],
253					fillOnes,
254				)
255			}
256
257			bufPos -= wordSize
258		}
259
260		for ; nDirty > 0; nDirty-- {
261			copy(
262				buf[bufPos-wordSize:bufPos],
263				e.raw[wordSize*i:wordSize*(i+1)],
264			)
265			bufPos -= wordSize
266			i++
267		}
268	}
269
270	e.bm = big.NewInt(0)
271	e.bm.SetBytes(buf)
272
273	return nil
274}
275
276// Scan traverses the bitmap and calls f for each bit which is 1.
277func (e *Bitmap) Scan(f func(int) error) error {
278	for i := 0; i < e.bits; i++ {
279		if e.bm.Bit(i) == 1 {
280			if err := f(i); err != nil {
281				return err
282			}
283		}
284	}
285
286	return nil
287}
288