1 #ifndef BMUTIL__H__INCLUDED__
2 #define BMUTIL__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17
18 For more information please visit: http://bitmagic.io
19 */
20
21 /*! \file bmutil.h
22 \brief Bit manipulation primitives (internal)
23 */
24
25 #include "bmdef.h"
26 #include "bmconst.h"
27
28 #if defined(_M_AMD64) || defined(_M_X64)
29 #include <intrin.h>
30 #elif defined(BMSSE2OPT) || defined(BMSSE42OPT)
31 #include <emmintrin.h>
32 #elif defined(BMAVX2OPT)
33 #include <emmintrin.h>
34 #include <avx2intrin.h>
35 #endif
36
37 #ifdef __GNUG__
38 #pragma GCC diagnostic push
39 #pragma GCC diagnostic ignored "-Wconversion"
40 #endif
41
42 #ifdef _MSC_VER
43 #pragma warning( push )
44 #pragma warning( disable : 4146)
45 #endif
46
47
48 namespace bm
49 {
50
51 /**
52 bit-block array wrapped into union for correct interpretation of
53 32-bit vs 64-bit access vs SIMD
54 @internal
55 */
56 struct bit_block_t
57 {
58 union bunion_t
59 {
60 bm::word_t BM_VECT_ALIGN w32[bm::set_block_size] BM_VECT_ALIGN_ATTR;
61 bm::id64_t BM_VECT_ALIGN w64[bm::set_block_size / 2] BM_VECT_ALIGN_ATTR;
62
63 #if defined(BMAVX512OPT)
64 __m512i BM_VECT_ALIGN w512[bm::set_block_size / 16] BM_VECT_ALIGN_ATTR;
65 #endif
66 #if defined(BMAVX2OPT)
67 __m256i BM_VECT_ALIGN w256[bm::set_block_size / 8] BM_VECT_ALIGN_ATTR;
68 #endif
69 #if defined(BMSSE2OPT) || defined(BMSSE42OPT)
70 __m128i BM_VECT_ALIGN w128[bm::set_block_size / 4] BM_VECT_ALIGN_ATTR;
71 #endif
72 } b_;
73
74 operator bm::word_t*() { return &(b_.w32[0]); }
75 operator const bm::word_t*() const { return &(b_.w32[0]); }
76 explicit operator bm::id64_t*() { return &b_.w64[0]; }
77 explicit operator const bm::id64_t*() const { return &b_.w64[0]; }
78 #ifdef BMAVX512OPT
79 explicit operator __m512i*() { return &b_.w512[0]; }
80 explicit operator const __m512i*() const { return &b_.w512[0]; }
81 #endif
82 #ifdef BMAVX2OPT
83 explicit operator __m256i*() { return &b_.w256[0]; }
84 explicit operator const __m256i*() const { return &b_.w256[0]; }
85 #endif
86 #if defined(BMSSE2OPT) || defined(BMSSE42OPT)
87 explicit operator __m128i*() { return &b_.w128[0]; }
88 explicit operator const __m128i*() const { return &b_.w128[0]; }
89 #endif
90
beginbit_block_t91 const bm::word_t* begin() const { return (b_.w32 + 0); }
beginbit_block_t92 bm::word_t* begin() { return (b_.w32 + 0); }
endbit_block_t93 const bm::word_t* end() const { return (b_.w32 + bm::set_block_size); }
endbit_block_t94 bm::word_t* end() { return (b_.w32 + bm::set_block_size); }
95 };
96
97 /**
98 Get minimum of 2 values
99 */
100 template<typename T>
min_value(T v1,T v2)101 BMFORCEINLINE T min_value(T v1, T v2) BMNOEXCEPT
102 {
103 return v1 < v2 ? v1 : v2;
104 }
105
106 /**
107 \brief ad-hoc conditional expressions
108 \internal
109 */
110 template <bool b> struct conditional
111 {
testconditional112 static bool test() { return true; }
113 };
114 template <> struct conditional<false>
115 {
116 static bool test() { return false; }
117 };
118
119
120 /**
121 Fast loop-less function to find LOG2
122 */
123 template<typename T>
124 BMFORCEINLINE T ilog2(T x) BMNOEXCEPT
125 {
126 unsigned int l = 0;
127
128 if (x >= 1<<16) { x = (T)(x >> 16); l |= 16; }
129 if (x >= 1<<8) { x = (T)(x >> 8); l |= 8; }
130 if (x >= 1<<4) { x = (T)(x >> 4); l |= 4; }
131 if (x >= 1<<2) { x = (T)(x >> 2); l |= 2; }
132 if (x >= 1<<1) l |=1;
133 return (T)l;
134 }
135
136 template<>
137 BMFORCEINLINE
138 bm::gap_word_t ilog2(gap_word_t x) BMNOEXCEPT
139 {
140 unsigned int l = 0;
141 if (x >= 1<<8) { x = (bm::gap_word_t)(x >> 8); l |= 8; }
142 if (x >= 1<<4) { x = (bm::gap_word_t)(x >> 4); l |= 4; }
143 if (x >= 1<<2) { x = (bm::gap_word_t)(x >> 2); l |= 2; }
144 if (x >= 1<<1) l |=1;
145 return (bm::gap_word_t)l;
146 }
147
148 /**
149 Mini auto-pointer for internal memory management
150 @internal
151 */
152 template<class T>
153 class ptr_guard
154 {
155 public:
156 ptr_guard(T* p) BMNOEXCEPT : ptr_(p) {}
157 ~ptr_guard() { delete ptr_; }
158 private:
159 ptr_guard(const ptr_guard<T>& p);
160 ptr_guard& operator=(const ptr_guard<T>& p);
161 private:
162 T* ptr_;
163 };
164
165 /**
166 Portable LZCNT with (uses minimal LUT)
167 @ingroup bitfunc
168 @internal
169 */
170 BMFORCEINLINE
171 unsigned count_leading_zeros(unsigned x) BMNOEXCEPT
172 {
173 unsigned n =
174 (x >= (1U << 16)) ?
175 ((x >= (1U << 24)) ? ((x >= (1 << 28)) ? 28u : 24u) : ((x >= (1U << 20)) ? 20u : 16u))
176 :
177 ((x >= (1U << 8)) ? ((x >= (1U << 12)) ? 12u : 8u) : ((x >= (1U << 4)) ? 4u : 0u));
178 return unsigned(bm::lzcnt_table<true>::_lut[x >> n]) - n;
179 }
180
181 /**
182 Portable TZCNT with (uses 37-LUT)
183 @ingroup bitfunc
184 @internal
185 */
186 BMFORCEINLINE
187 unsigned count_trailing_zeros(unsigned v) BMNOEXCEPT
188 {
189 // (v & -v) isolates the last set bit
190 return unsigned(bm::tzcnt_table<true>::_lut[(-v & v) % 37]);
191 }
192
193 /**
194 Lookup table based integer LOG2
195 */
196 template<typename T>
197 BMFORCEINLINE T ilog2_LUT(T x) BMNOEXCEPT
198 {
199 unsigned l = 0;
200 if (x & 0xffff0000)
201 {
202 l += 16; x >>= 16;
203 }
204
205 if (x & 0xff00)
206 {
207 l += 8; x >>= 8;
208 }
209 return l + T(bm::first_bit_table<true>::_idx[x]);
210 }
211
212 /**
213 Lookup table based short integer LOG2
214 */
215 template<>
216 BMFORCEINLINE bm::gap_word_t ilog2_LUT<bm::gap_word_t>(bm::gap_word_t x) BMNOEXCEPT
217 {
218 if (x & 0xff00)
219 {
220 x = bm::gap_word_t(x >> 8u);
221 return bm::gap_word_t(8u + bm::first_bit_table<true>::_idx[x]);
222 }
223 return bm::gap_word_t(bm::first_bit_table<true>::_idx[x]);
224 }
225
226
227 // if we are running on x86 CPU we can use inline ASM
228
229 #ifdef BM_x86
230 #ifdef __GNUG__
231
232 BMFORCEINLINE
233 unsigned bsf_asm32(unsigned int v) BMNOEXCEPT
234 {
235 unsigned r;
236 asm volatile(" bsfl %1, %0": "=r"(r): "rm"(v) );
237 return r;
238 }
239
240 BMFORCEINLINE
241 unsigned bsr_asm32(unsigned int v) BMNOEXCEPT
242 {
243 unsigned r;
244 asm volatile(" bsrl %1, %0": "=r"(r): "rm"(v) );
245 return r;
246 }
247
248 #endif // __GNUG__
249
250 #ifdef _MSC_VER
251
252 #if defined(_M_AMD64) || defined(_M_X64) // inline assembly not supported
253
254 BMFORCEINLINE
255 unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
256 {
257 unsigned long r;
258 _BitScanReverse(&r, value);
259 return r;
260 }
261
262 BMFORCEINLINE
263 unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
264 {
265 unsigned long r;
266 _BitScanForward(&r, value);
267 return r;
268 }
269
270 #else
271
272 BMFORCEINLINE
273 unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
274 {
275 __asm bsr eax, value
276 }
277
278 BMFORCEINLINE
279 unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
280 {
281 __asm bsf eax, value
282 }
283
284 #endif
285
286 #endif // _MSC_VER
287
288 #endif // BM_x86
289
290
291 // From:
292 // http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.8562
293 //
294 template<typename T>
295 BMFORCEINLINE T bit_scan_fwd(T v) BMNOEXCEPT
296 {
297 return
298 DeBruijn_bit_position<true>::_multiply[(((v & -v) * 0x077CB531U)) >> 27];
299 }
300
301 BMFORCEINLINE
302 unsigned bit_scan_reverse32(unsigned w) BMNOEXCEPT
303 {
304 BM_ASSERT(w);
305 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
306 return (unsigned) (31 - __builtin_clz(w));
307 #else
308 # if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
309 return bm::bsr_asm32(w);
310 # else
311 return bm::ilog2_LUT<unsigned int>(w);
312 # endif
313 #endif
314 }
315
316 BMFORCEINLINE
317 unsigned bit_scan_forward32(unsigned w) BMNOEXCEPT
318 {
319 BM_ASSERT(w);
320 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
321 return (unsigned) __builtin_ctz(w);
322 #else
323 # if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
324 return bm::bsf_asm32(w);
325 # else
326 return bit_scan_fwd(w);
327 # endif
328 #endif
329 }
330
331
332 BMFORCEINLINE
333 unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
334 {
335 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
336 return _blsr_u64(w);
337 #else
338 return w & (w - 1);
339 #endif
340 }
341
342 BMFORCEINLINE
343 unsigned long long bmi_blsi_u64(unsigned long long w)
344 {
345 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
346 return _blsi_u64(w);
347 #else
348 return w & (-w);
349 #endif
350 }
351
352 /// 32-bit bit-scan reverse
353 inline
354 unsigned count_leading_zeros_u32(unsigned w) BMNOEXCEPT
355 {
356 BM_ASSERT(w);
357 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
358 return (unsigned)_lzcnt_u32(w);
359 #else
360 #if defined(BM_USE_GCC_BUILD) || defined(__GNUG__)
361 return (unsigned) __builtin_clz(w);
362 #else
363 return bm::count_leading_zeros(w); // portable
364 #endif
365 #endif
366 }
367
368
369 /// 64-bit bit-scan reverse
370 inline
371 unsigned count_leading_zeros_u64(bm::id64_t w) BMNOEXCEPT
372 {
373 BM_ASSERT(w);
374 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
375 return (unsigned)_lzcnt_u64(w);
376 #else
377 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
378 return (unsigned) __builtin_clzll(w);
379 #else
380 unsigned z;
381 unsigned w1 = unsigned(w >> 32);
382 if (!w1)
383 {
384 z = 32;
385 w1 = unsigned(w);
386 z += 31 - bm::bit_scan_reverse32(w1);
387 }
388 else
389 {
390 z = 31 - bm::bit_scan_reverse32(w1);
391 }
392 return z;
393 #endif
394 #endif
395 }
396
397 /// 32-bit bit-scan fwd
398 inline
399 unsigned count_trailing_zeros_u32(unsigned w) BMNOEXCEPT
400 {
401 BM_ASSERT(w);
402
403 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
404 return (unsigned)_tzcnt_u32(w);
405 #else
406 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
407 return (unsigned) __builtin_ctz(w);
408 #else
409 return bm::bit_scan_forward32(w);
410 #endif
411 #endif
412 }
413
414
415 /// 64-bit bit-scan fwd
416 inline
417 unsigned count_trailing_zeros_u64(bm::id64_t w) BMNOEXCEPT
418 {
419 BM_ASSERT(w);
420
421 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
422 return (unsigned)_tzcnt_u64(w);
423 #else
424 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
425 return (unsigned) __builtin_ctzll(w);
426 #else
427 unsigned z;
428 unsigned w1 = unsigned(w);
429 if (!w1)
430 {
431 z = 32;
432 w1 = unsigned(w >> 32);
433 z += bm::bit_scan_forward32(w1);
434 }
435 else
436 {
437 z = bm::bit_scan_forward32(w1);
438 }
439 return z;
440 #endif
441 #endif
442 }
443
444
445
446 /*!
447 Returns BSR value
448 @ingroup bitfunc
449 */
450 template <class T>
451 unsigned bit_scan_reverse(T value) BMNOEXCEPT
452 {
453 BM_ASSERT(value);
454
455 if (bm::conditional<sizeof(T)==8>::test())
456 {
457 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
458 return (unsigned) (63 - __builtin_clzll(value));
459 #else
460 bm::id64_t v8 = value;
461 v8 >>= 32;
462 unsigned v = (unsigned)v8;
463 if (v)
464 {
465 v = bm::bit_scan_reverse32(v);
466 return v + 32;
467 }
468 #endif
469 }
470 return bm::bit_scan_reverse32((unsigned)value);
471 }
472
473 /*! \brief and functor
474 \internal
475 */
476 struct and_func
477 {
478 static
479 BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
480 { return v1 & v2; }
481 };
482 /*! \brief xor functor
483 \internal
484 */
485 struct xor_func
486 {
487 static
488 BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
489 { return v1 ^ v2; }
490 };
491 /*! \brief or functor
492 \internal
493 */
494 struct or_func
495 {
496 static
497 BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
498 { return v1 | v2; }
499 };
500 /*! \brief sub functor
501 \internal
502 */
503 struct sub_func
504 {
505 static
506 BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
507 { return v1 & ~v2; }
508 };
509
510 BMFORCEINLINE
511 unsigned mask_r_u32(unsigned nbit) BMNOEXCEPT
512 {
513 BM_ASSERT(nbit < 32);
514 unsigned m = (~0u << nbit);
515 BM_ASSERT(m == block_set_table<true>::_right[nbit]);
516 return m;
517 }
518
519 BMFORCEINLINE
520 unsigned mask_l_u32(unsigned nbit) BMNOEXCEPT
521 {
522 BM_ASSERT(nbit < 32);
523 unsigned m = ~0u >> (31 - nbit);
524 BM_ASSERT(m == block_set_table<true>::_left[nbit]);
525 return m;
526 }
527
528 /// XOR swap two variables
529 ///
530 /// @internal
531 template<typename W>
532 BMFORCEINLINE void xor_swap(W& x, W& y) BMNOEXCEPT
533 {
534 BM_ASSERT(&x != &y);
535 x ^= y; y ^= x; x ^= y;
536 }
537
538
539 #ifdef __GNUG__
540 #pragma GCC diagnostic pop
541 #endif
542 #ifdef _MSC_VER
543 #pragma warning( pop )
544 #endif
545
546 /**
547 Сompute mask of bytes presense in 64-bit word
548
549 @param w - [in] input 64-bit word
550 @return mask with 8 bits
551 @internal
552 */
553 inline
554 unsigned compute_h64_mask(unsigned long long w)
555 {
556 unsigned h_mask = 0;
557 for (unsigned i = 0; w && (i < 8); ++i, w >>= 8)
558 {
559 if ((unsigned char) w)
560 h_mask |= (1u<<i);
561 } // for
562 return h_mask;
563 }
564
565
566
567 } // bm
568
569 #endif
570