1// Licensed to the Apache Software Foundation (ASF) under one 2// or more contributor license agreements. See the NOTICE file 3// distributed with this work for additional information 4// regarding copyright ownership. The ASF licenses this file 5// to you under the Apache License, Version 2.0 (the 6// "License"); you may not use this file except in compliance 7// with the License. You may obtain a copy of the License at 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, software 12// distributed under the License is distributed on an "AS IS" BASIS, 13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14// See the License for the specific language governing permissions and 15// limitations under the License. 16 17package bitutil_test 18 19import ( 20 "fmt" 21 "math/rand" 22 "strconv" 23 "testing" 24 25 "github.com/apache/arrow/go/v6/arrow/bitutil" 26 "github.com/stretchr/testify/assert" 27) 28 29func bitmapFromSlice(vals []int, bitOffset int) []byte { 30 out := make([]byte, int(bitutil.BytesForBits(int64(len(vals)+bitOffset)))) 31 writer := bitutil.NewBitmapWriter(out, bitOffset, len(vals)) 32 for _, val := range vals { 33 if val == 1 { 34 writer.Set() 35 } else { 36 writer.Clear() 37 } 38 writer.Next() 39 } 40 writer.Finish() 41 42 return out 43} 44 45func assertReaderVals(t *testing.T, reader *bitutil.BitmapReader, vals []bool) { 46 for _, v := range vals { 47 if v { 48 assert.True(t, reader.Set()) 49 assert.False(t, reader.NotSet()) 50 } else { 51 assert.True(t, reader.NotSet()) 52 assert.False(t, reader.Set()) 53 } 54 reader.Next() 55 } 56} 57 58func TestNormalOperation(t *testing.T) { 59 for _, offset := range []int{0, 1, 3, 5, 7, 8, 12, 13, 21, 38, 75, 120} { 60 buf := bitmapFromSlice([]int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}, offset) 61 62 reader := bitutil.NewBitmapReader(buf, offset, 14) 63 assertReaderVals(t, reader, []bool{false, true, true, true, false, false, false, true, false, true, false, true, false, true}) 64 } 65} 66 67func TestDoesNotReadOutOfBounds(t *testing.T) { 68 var bitmap [16]byte 69 const length = 128 70 71 reader := bitutil.NewBitmapReader(bitmap[:], 0, length) 72 assert.EqualValues(t, length, reader.Len()) 73 assert.NotPanics(t, func() { 74 for i := 0; i < length; i++ { 75 assert.True(t, reader.NotSet()) 76 reader.Next() 77 } 78 }) 79 assert.EqualValues(t, length, reader.Pos()) 80 81 reader = bitutil.NewBitmapReader(bitmap[:], 5, length-5) 82 assert.EqualValues(t, length-5, reader.Len()) 83 assert.NotPanics(t, func() { 84 for i := 0; i < length-5; i++ { 85 assert.True(t, reader.NotSet()) 86 reader.Next() 87 } 88 }) 89 assert.EqualValues(t, length-5, reader.Pos()) 90 91 assert.NotPanics(t, func() { 92 reader = bitutil.NewBitmapReader(nil, 0, 0) 93 }) 94} 95 96func writeToWriter(vals []int, wr *bitutil.BitmapWriter) { 97 for _, v := range vals { 98 if v != 0 { 99 wr.Set() 100 } else { 101 wr.Clear() 102 } 103 wr.Next() 104 } 105 wr.Finish() 106} 107 108func TestBitmapWriter(t *testing.T) { 109 for _, fillByte := range []byte{0x00, 0xFF} { 110 { 111 bitmap := []byte{fillByte, fillByte, fillByte, fillByte} 112 wr := bitutil.NewBitmapWriter(bitmap, 0, 12) 113 writeToWriter([]int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}, wr) 114 // {0b00110110, 0b....1010, ........, ........} 115 assert.Equal(t, []byte{0x36, (0x0A | (fillByte & 0xF0)), fillByte, fillByte}, bitmap) 116 } 117 { 118 bitmap := []byte{fillByte, fillByte, fillByte, fillByte} 119 wr := bitutil.NewBitmapWriter(bitmap, 0, 12) 120 wr.AppendBools([]bool{false, true, true, false, true, true, false, false, false, true, false, true}) 121 assert.Equal(t, []byte{0x36, (0x0A | (fillByte & 0xF0)), fillByte, fillByte}, bitmap) 122 } 123 { 124 bitmap := []byte{fillByte, fillByte, fillByte, fillByte} 125 wr := bitutil.NewBitmapWriter(bitmap, 3, 12) 126 writeToWriter([]int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}, wr) 127 // {0b10110..., 0b.1010001, ........, ........} 128 assert.Equal(t, []byte{0xb0 | (fillByte & 0x07), 0x51 | (fillByte & 0x80), fillByte, fillByte}, bitmap) 129 } 130 { 131 bitmap := []byte{fillByte, fillByte, fillByte, fillByte} 132 wr := bitutil.NewBitmapWriter(bitmap, 3, 12) 133 wr.AppendBools([]bool{false, true, true, false}) 134 wr.AppendBools([]bool{true, true, false, false}) 135 wr.AppendBools([]bool{false, true, false, true}) 136 assert.Equal(t, []byte{0xb0 | (fillByte & 0x07), 0x51 | (fillByte & 0x80), fillByte, fillByte}, bitmap) 137 } 138 { 139 bitmap := []byte{fillByte, fillByte, fillByte, fillByte} 140 wr := bitutil.NewBitmapWriter(bitmap, 20, 12) 141 writeToWriter([]int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}, wr) 142 // {........, ........, 0b0110...., 0b10100011} 143 assert.Equal(t, []byte{fillByte, fillByte, 0x60 | (fillByte & 0x0f), 0xa3}, bitmap) 144 } 145 } 146} 147 148func TestBitmapReader(t *testing.T) { 149 assertReaderVals := func(vals []int, rdr *bitutil.BitmapReader) { 150 for _, v := range vals { 151 if v != 0 { 152 assert.True(t, rdr.Set()) 153 assert.False(t, rdr.NotSet()) 154 } else { 155 assert.False(t, rdr.Set()) 156 assert.True(t, rdr.NotSet()) 157 } 158 rdr.Next() 159 } 160 } 161 162 vals := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} 163 164 for _, offset := range []int{0, 1, 3, 5, 7, 8, 12, 13, 21, 38, 75, 120} { 165 bm := make([]byte, bitutil.BytesForBits(int64(len(vals)+offset))) 166 wr := bitutil.NewBitmapWriter(bm, offset, len(vals)) 167 writeToWriter(vals, wr) 168 169 rdr := bitutil.NewBitmapReader(bm, offset, 14) 170 assertReaderVals(vals, rdr) 171 } 172} 173 174func TestCopyBitmap(t *testing.T) { 175 const bufsize = 1000 176 lengths := []int{bufsize*8 - 4, bufsize * 8} 177 offsets := []int{0, 12, 16, 32, 37, 63, 64, 128} 178 179 buffer := make([]byte, bufsize) 180 181 // random bytes 182 r := rand.New(rand.NewSource(0)) 183 r.Read(buffer) 184 185 // add 16 byte padding 186 otherBuffer := make([]byte, bufsize+32) 187 r.Read(otherBuffer) 188 189 for _, nbits := range lengths { 190 for _, offset := range offsets { 191 for _, destOffset := range offsets { 192 t.Run(fmt.Sprintf("bits %d off %d dst %d", nbits, offset, destOffset), func(t *testing.T) { 193 copyLen := nbits - offset 194 195 bmCopy := make([]byte, len(otherBuffer)) 196 copy(bmCopy, otherBuffer) 197 198 bitutil.CopyBitmap(buffer, offset, copyLen, bmCopy, destOffset) 199 200 for i := 0; i < int(destOffset); i++ { 201 assert.Equalf(t, bitutil.BitIsSet(otherBuffer, i), bitutil.BitIsSet(bmCopy, i), "bit index: %d", i) 202 } 203 for i := 0; i < int(copyLen); i++ { 204 assert.Equalf(t, bitutil.BitIsSet(buffer, i+int(offset)), bitutil.BitIsSet(bmCopy, i+int(destOffset)), "bit index: %d", i) 205 } 206 for i := int(destOffset + copyLen); i < len(otherBuffer); i++ { 207 assert.Equalf(t, bitutil.BitIsSet(otherBuffer, i), bitutil.BitIsSet(bmCopy, i), "bit index: %d", i) 208 } 209 }) 210 } 211 } 212 } 213} 214 215func benchmarkCopyBitmapN(b *testing.B, offsetSrc, offsetDest, n int) { 216 nbits := n * 8 217 // random bytes 218 r := rand.New(rand.NewSource(0)) 219 src := make([]byte, n) 220 r.Read(src) 221 222 length := nbits - offsetSrc 223 224 dest := make([]byte, bitutil.BytesForBits(int64(length+offsetDest))) 225 226 b.ResetTimer() 227 b.SetBytes(int64(n)) 228 for i := 0; i < b.N; i++ { 229 bitutil.CopyBitmap(src, offsetSrc, length, dest, offsetDest) 230 } 231} 232 233// Fast path which is just a memcopy 234func BenchmarkCopyBitmapWithoutOffset(b *testing.B) { 235 for _, sz := range []int{32, 128, 1000, 1024} { 236 b.Run(strconv.Itoa(sz), func(b *testing.B) { 237 benchmarkCopyBitmapN(b, 0, 0, sz) 238 }) 239 } 240} 241 242// slow path where the source buffer is not byte aligned 243func BenchmarkCopyBitmapWithOffset(b *testing.B) { 244 for _, sz := range []int{32, 128, 1000, 1024} { 245 b.Run(strconv.Itoa(sz), func(b *testing.B) { 246 benchmarkCopyBitmapN(b, 4, 0, sz) 247 }) 248 } 249} 250 251// slow path where both source and dest are not byte aligned 252func BenchmarkCopyBitmapWithOffsetBoth(b *testing.B) { 253 for _, sz := range []int{32, 128, 1000, 1024} { 254 b.Run(strconv.Itoa(sz), func(b *testing.B) { 255 benchmarkCopyBitmapN(b, 3, 7, sz) 256 }) 257 } 258} 259 260const bufferSize = 1024 * 8 261 262// a naive bitmap reader for a baseline 263 264type NaiveBitmapReader struct { 265 bitmap []byte 266 pos int 267} 268 269func (n *NaiveBitmapReader) IsSet() bool { return bitutil.BitIsSet(n.bitmap, n.pos) } 270func (n *NaiveBitmapReader) IsNotSet() bool { return !n.IsSet() } 271func (n *NaiveBitmapReader) Next() { n.pos++ } 272 273// naive bitmap writer for a baseline 274 275type NaiveBitmapWriter struct { 276 bitmap []byte 277 pos int 278} 279 280func (n *NaiveBitmapWriter) Set() { 281 byteOffset := n.pos / 8 282 bitOffset := n.pos % 8 283 bitSetMask := uint8(1 << bitOffset) 284 n.bitmap[byteOffset] |= bitSetMask 285} 286 287func (n *NaiveBitmapWriter) Clear() { 288 byteOffset := n.pos / 8 289 bitOffset := n.pos % 8 290 bitClearMask := uint8(0xFF ^ (1 << bitOffset)) 291 n.bitmap[byteOffset] &= bitClearMask 292} 293 294func (n *NaiveBitmapWriter) Next() { n.pos++ } 295func (n *NaiveBitmapWriter) Finish() {} 296 297func randomBuffer(nbytes int64) []byte { 298 buf := make([]byte, nbytes) 299 r := rand.New(rand.NewSource(0)) 300 r.Read(buf) 301 return buf 302} 303 304func BenchmarkBitmapReader(b *testing.B) { 305 buf := randomBuffer(bufferSize) 306 nbits := bufferSize * 8 307 308 b.Run("naive baseline", func(b *testing.B) { 309 b.SetBytes(2 * bufferSize) 310 for i := 0; i < b.N; i++ { 311 { 312 total := 0 313 rdr := NaiveBitmapReader{buf, 0} 314 for j := 0; j < nbits; j++ { 315 if rdr.IsSet() { 316 total++ 317 } 318 rdr.Next() 319 } 320 } 321 { 322 total := 0 323 rdr := NaiveBitmapReader{buf, 0} 324 for j := 0; j < nbits; j++ { 325 if rdr.IsSet() { 326 total++ 327 } 328 rdr.Next() 329 } 330 } 331 } 332 }) 333 b.Run("bitmap reader", func(b *testing.B) { 334 b.SetBytes(2 * bufferSize) 335 for i := 0; i < b.N; i++ { 336 { 337 total := 0 338 rdr := bitutil.NewBitmapReader(buf, 0, nbits) 339 for j := 0; j < nbits; j++ { 340 if rdr.Set() { 341 total++ 342 } 343 rdr.Next() 344 } 345 } 346 { 347 total := 0 348 rdr := bitutil.NewBitmapReader(buf, 0, nbits) 349 for j := 0; j < nbits; j++ { 350 if rdr.Set() { 351 total++ 352 } 353 rdr.Next() 354 } 355 } 356 } 357 }) 358} 359