1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #pragma once
19 
20 #if defined(_MSC_VER)
21 #include <intrin.h>  // IWYU pragma: keep
22 #include <nmmintrin.h>
23 #pragma intrinsic(_BitScanReverse)
24 #pragma intrinsic(_BitScanForward)
25 #define ARROW_POPCOUNT64 __popcnt64
26 #define ARROW_POPCOUNT32 __popcnt
27 #else
28 #define ARROW_POPCOUNT64 __builtin_popcountll
29 #define ARROW_POPCOUNT32 __builtin_popcount
30 #endif
31 
32 #include <cstdint>
33 #include <type_traits>
34 
35 #include "arrow/util/macros.h"
36 #include "arrow/util/visibility.h"
37 
38 namespace arrow {
39 namespace detail {
40 
41 template <typename Integer>
as_unsigned(Integer x)42 typename std::make_unsigned<Integer>::type as_unsigned(Integer x) {
43   return static_cast<typename std::make_unsigned<Integer>::type>(x);
44 }
45 
46 }  // namespace detail
47 
48 namespace BitUtil {
49 
50 // The number of set bits in a given unsigned byte value, pre-computed
51 //
52 // Generated with the following Python code
53 // output = 'static constexpr uint8_t kBytePopcount[] = {{{0}}};'
54 // popcounts = [str(bin(i).count('1')) for i in range(0, 256)]
55 // print(output.format(', '.join(popcounts)))
56 static constexpr uint8_t kBytePopcount[] = {
57     0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3,
58     4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4,
59     4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4,
60     5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,
61     4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2,
62     3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5,
63     5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4,
64     5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6,
65     4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
66 
PopCount(uint64_t bitmap)67 static inline uint64_t PopCount(uint64_t bitmap) { return ARROW_POPCOUNT64(bitmap); }
PopCount(uint32_t bitmap)68 static inline uint32_t PopCount(uint32_t bitmap) { return ARROW_POPCOUNT32(bitmap); }
69 
70 //
71 // Bit-related computations on integer values
72 //
73 
74 // Returns the ceil of value/divisor
CeilDiv(int64_t value,int64_t divisor)75 constexpr int64_t CeilDiv(int64_t value, int64_t divisor) {
76   return (value == 0) ? 0 : 1 + (value - 1) / divisor;
77 }
78 
79 // Return the number of bytes needed to fit the given number of bits
BytesForBits(int64_t bits)80 constexpr int64_t BytesForBits(int64_t bits) {
81   // This formula avoids integer overflow on very large `bits`
82   return (bits >> 3) + ((bits & 7) != 0);
83 }
84 
IsPowerOf2(int64_t value)85 constexpr bool IsPowerOf2(int64_t value) {
86   return value > 0 && (value & (value - 1)) == 0;
87 }
88 
IsPowerOf2(uint64_t value)89 constexpr bool IsPowerOf2(uint64_t value) {
90   return value > 0 && (value & (value - 1)) == 0;
91 }
92 
93 // Returns the smallest power of two that contains v.  If v is already a
94 // power of two, it is returned as is.
NextPower2(int64_t n)95 static inline int64_t NextPower2(int64_t n) {
96   // Taken from
97   // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
98   n--;
99   n |= n >> 1;
100   n |= n >> 2;
101   n |= n >> 4;
102   n |= n >> 8;
103   n |= n >> 16;
104   n |= n >> 32;
105   n++;
106   return n;
107 }
108 
IsMultipleOf64(int64_t n)109 constexpr bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; }
110 
IsMultipleOf8(int64_t n)111 constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; }
112 
113 // Returns a mask for the bit_index lower order bits.
114 // Only valid for bit_index in the range [0, 64).
LeastSignificantBitMask(int64_t bit_index)115 constexpr uint64_t LeastSignificantBitMask(int64_t bit_index) {
116   return (static_cast<uint64_t>(1) << bit_index) - 1;
117 }
118 
119 // Returns 'value' rounded up to the nearest multiple of 'factor'
RoundUp(int64_t value,int64_t factor)120 constexpr int64_t RoundUp(int64_t value, int64_t factor) {
121   return CeilDiv(value, factor) * factor;
122 }
123 
124 // Returns 'value' rounded down to the nearest multiple of 'factor'
RoundDown(int64_t value,int64_t factor)125 constexpr int64_t RoundDown(int64_t value, int64_t factor) {
126   return (value / factor) * factor;
127 }
128 
129 // Returns 'value' rounded up to the nearest multiple of 'factor' when factor
130 // is a power of two.
131 // The result is undefined on overflow, i.e. if `value > 2**64 - factor`,
132 // since we cannot return the correct result which would be 2**64.
RoundUpToPowerOf2(int64_t value,int64_t factor)133 constexpr int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) {
134   // DCHECK(value >= 0);
135   // DCHECK(IsPowerOf2(factor));
136   return (value + (factor - 1)) & ~(factor - 1);
137 }
138 
RoundUpToPowerOf2(uint64_t value,uint64_t factor)139 constexpr uint64_t RoundUpToPowerOf2(uint64_t value, uint64_t factor) {
140   // DCHECK(IsPowerOf2(factor));
141   return (value + (factor - 1)) & ~(factor - 1);
142 }
143 
RoundUpToMultipleOf8(int64_t num)144 constexpr int64_t RoundUpToMultipleOf8(int64_t num) { return RoundUpToPowerOf2(num, 8); }
145 
RoundUpToMultipleOf64(int64_t num)146 constexpr int64_t RoundUpToMultipleOf64(int64_t num) {
147   return RoundUpToPowerOf2(num, 64);
148 }
149 
150 // Returns the number of bytes covering a sliced bitmap. Find the length
151 // rounded to cover full bytes on both extremities.
152 //
153 // The following example represents a slice (offset=10, length=9)
154 //
155 // 0       8       16     24
156 // |-------|-------|------|
157 //           [       ]          (slice)
158 //         [             ]      (same slice aligned to bytes bounds, length=16)
159 //
160 // The covering bytes is the length (in bytes) of this new aligned slice.
CoveringBytes(int64_t offset,int64_t length)161 constexpr int64_t CoveringBytes(int64_t offset, int64_t length) {
162   return (BitUtil::RoundUp(length + offset, 8) - BitUtil::RoundDown(offset, 8)) / 8;
163 }
164 
165 // Returns the 'num_bits' least-significant bits of 'v'.
TrailingBits(uint64_t v,int num_bits)166 static inline uint64_t TrailingBits(uint64_t v, int num_bits) {
167   if (ARROW_PREDICT_FALSE(num_bits == 0)) return 0;
168   if (ARROW_PREDICT_FALSE(num_bits >= 64)) return v;
169   int n = 64 - num_bits;
170   return (v << n) >> n;
171 }
172 
173 /// \brief Count the number of leading zeros in an unsigned integer.
CountLeadingZeros(uint32_t value)174 static inline int CountLeadingZeros(uint32_t value) {
175 #if defined(__clang__) || defined(__GNUC__)
176   if (value == 0) return 32;
177   return static_cast<int>(__builtin_clz(value));
178 #elif defined(_MSC_VER)
179   unsigned long index;                                               // NOLINT
180   if (_BitScanReverse(&index, static_cast<unsigned long>(value))) {  // NOLINT
181     return 31 - static_cast<int>(index);
182   } else {
183     return 32;
184   }
185 #else
186   int bitpos = 0;
187   while (value != 0) {
188     value >>= 1;
189     ++bitpos;
190   }
191   return 32 - bitpos;
192 #endif
193 }
194 
CountLeadingZeros(uint64_t value)195 static inline int CountLeadingZeros(uint64_t value) {
196 #if defined(__clang__) || defined(__GNUC__)
197   if (value == 0) return 64;
198   return static_cast<int>(__builtin_clzll(value));
199 #elif defined(_MSC_VER)
200   unsigned long index;                     // NOLINT
201   if (_BitScanReverse64(&index, value)) {  // NOLINT
202     return 63 - static_cast<int>(index);
203   } else {
204     return 64;
205   }
206 #else
207   int bitpos = 0;
208   while (value != 0) {
209     value >>= 1;
210     ++bitpos;
211   }
212   return 64 - bitpos;
213 #endif
214 }
215 
CountTrailingZeros(uint32_t value)216 static inline int CountTrailingZeros(uint32_t value) {
217 #if defined(__clang__) || defined(__GNUC__)
218   if (value == 0) return 32;
219   return static_cast<int>(__builtin_ctzl(value));
220 #elif defined(_MSC_VER)
221   unsigned long index;  // NOLINT
222   if (_BitScanForward(&index, value)) {
223     return static_cast<int>(index);
224   } else {
225     return 32;
226   }
227 #else
228   int bitpos = 0;
229   if (value) {
230     while (value & 1 == 0) {
231       value >>= 1;
232       ++bitpos;
233     }
234   } else {
235     bitpos = 32;
236   }
237   return bitpos;
238 #endif
239 }
240 
CountTrailingZeros(uint64_t value)241 static inline int CountTrailingZeros(uint64_t value) {
242 #if defined(__clang__) || defined(__GNUC__)
243   if (value == 0) return 64;
244   return static_cast<int>(__builtin_ctzll(value));
245 #elif defined(_MSC_VER)
246   unsigned long index;  // NOLINT
247   if (_BitScanForward64(&index, value)) {
248     return static_cast<int>(index);
249   } else {
250     return 64;
251   }
252 #else
253   int bitpos = 0;
254   if (value) {
255     while (value & 1 == 0) {
256       value >>= 1;
257       ++bitpos;
258     }
259   } else {
260     bitpos = 64;
261   }
262   return bitpos;
263 #endif
264 }
265 
266 // Returns the minimum number of bits needed to represent an unsigned value
NumRequiredBits(uint64_t x)267 static inline int NumRequiredBits(uint64_t x) { return 64 - CountLeadingZeros(x); }
268 
269 // Returns ceil(log2(x)).
Log2(uint64_t x)270 static inline int Log2(uint64_t x) {
271   // DCHECK_GT(x, 0);
272   return NumRequiredBits(x - 1);
273 }
274 
275 //
276 // Utilities for reading and writing individual bits by their index
277 // in a memory area.
278 //
279 
280 // Bitmask selecting the k-th bit in a byte
281 static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
282 
283 // the bitwise complement version of kBitmask
284 static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127};
285 
286 // Bitmask selecting the (k - 1) preceding bits in a byte
287 static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
288 static constexpr uint8_t kPrecedingWrappingBitmask[] = {255, 1, 3, 7, 15, 31, 63, 127};
289 
290 // the bitwise complement version of kPrecedingBitmask
291 static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
292 
GetBit(const uint8_t * bits,uint64_t i)293 static constexpr bool GetBit(const uint8_t* bits, uint64_t i) {
294   return (bits[i >> 3] >> (i & 0x07)) & 1;
295 }
296 
297 // Gets the i-th bit from a byte. Should only be used with i <= 7.
GetBitFromByte(uint8_t byte,uint8_t i)298 static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) {
299   return byte & kBitmask[i];
300 }
301 
ClearBit(uint8_t * bits,int64_t i)302 static inline void ClearBit(uint8_t* bits, int64_t i) {
303   bits[i / 8] &= kFlippedBitmask[i % 8];
304 }
305 
SetBit(uint8_t * bits,int64_t i)306 static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |= kBitmask[i % 8]; }
307 
SetBitTo(uint8_t * bits,int64_t i,bool bit_is_set)308 static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
309   // https://graphics.stanford.edu/~seander/bithacks.html
310   // "Conditionally set or clear bits without branching"
311   // NOTE: this seems to confuse Valgrind as it reads from potentially
312   // uninitialized memory
313   bits[i / 8] ^= static_cast<uint8_t>(-static_cast<uint8_t>(bit_is_set) ^ bits[i / 8]) &
314                  kBitmask[i % 8];
315 }
316 
317 /// \brief set or clear a range of bits quickly
318 ARROW_EXPORT
319 void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set);
320 
321 /// \brief Sets all bits in the bitmap to true
322 ARROW_EXPORT
323 void SetBitmap(uint8_t* data, int64_t offset, int64_t length);
324 
325 /// \brief Clears all bits in the bitmap (set to false)
326 ARROW_EXPORT
327 void ClearBitmap(uint8_t* data, int64_t offset, int64_t length);
328 
329 /// Returns a mask with lower i bits set to 1. If i >= sizeof(Word)*8, all-ones will be
330 /// returned
331 /// ex:
332 /// ref: https://stackoverflow.com/a/59523400
333 template <typename Word>
PrecedingWordBitmask(unsigned int const i)334 constexpr Word PrecedingWordBitmask(unsigned int const i) {
335   return (static_cast<Word>(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 - 1))) - 1;
336 }
337 static_assert(PrecedingWordBitmask<uint8_t>(0) == 0x00, "");
338 static_assert(PrecedingWordBitmask<uint8_t>(4) == 0x0f, "");
339 static_assert(PrecedingWordBitmask<uint8_t>(8) == 0xff, "");
340 static_assert(PrecedingWordBitmask<uint16_t>(8) == 0x00ff, "");
341 
342 /// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits
343 /// from `high`.
344 /// Word ret
345 /// for (i = 0; i < sizeof(Word)*8; i++){
346 ///     ret[i]= i < n ? low[i]: high[i];
347 /// }
348 template <typename Word>
SpliceWord(int n,Word low,Word high)349 constexpr Word SpliceWord(int n, Word low, Word high) {
350   return (high & ~PrecedingWordBitmask<Word>(n)) | (low & PrecedingWordBitmask<Word>(n));
351 }
352 
353 }  // namespace BitUtil
354 }  // namespace arrow
355