1 #ifndef BMUTIL__H__INCLUDED__
2 #define BMUTIL__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10     http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit:  http://bitmagic.io
19 */
20 
21 /*! \file bmutil.h
22     \brief Bit manipulation primitives (internal)
23 */
24 
25 #include "bmdef.h"
26 #include "bmconst.h"
27 
28 #if defined(_M_AMD64) || defined(_M_X64)
29 #include <intrin.h>
30 #elif defined(BMSSE2OPT) || defined(BMSSE42OPT)
31 #include <emmintrin.h>
32 #elif defined(BMAVX2OPT)
33 #include <emmintrin.h>
34 #include <avx2intrin.h>
35 #endif
36 
37 #ifdef __GNUG__
38 #pragma GCC diagnostic push
39 #pragma GCC diagnostic ignored "-Wconversion"
40 #endif
41 
42 #ifdef _MSC_VER
43 #pragma warning( push )
44 #pragma warning( disable : 4146)
45 #endif
46 
47 
48 namespace bm
49 {
50 
51         /**
52         bit-block array wrapped into union for correct interpretation of
53         32-bit vs 64-bit access vs SIMD
54         @internal
55         */
56         struct bit_block_t
57         {
58             union bunion_t
59             {
60                 bm::word_t BM_VECT_ALIGN w32[bm::set_block_size] BM_VECT_ALIGN_ATTR;
61                 bm::id64_t BM_VECT_ALIGN w64[bm::set_block_size / 2] BM_VECT_ALIGN_ATTR;
62 
63 #if defined(BMAVX512OPT)
64                 __m512i  BM_VECT_ALIGN w512[bm::set_block_size / 16] BM_VECT_ALIGN_ATTR;
65 #endif
66 #if defined(BMAVX2OPT)
67                 __m256i  BM_VECT_ALIGN w256[bm::set_block_size / 8] BM_VECT_ALIGN_ATTR;
68 #endif
69 #if defined(BMSSE2OPT) || defined(BMSSE42OPT)
70                 __m128i  BM_VECT_ALIGN w128[bm::set_block_size / 4] BM_VECT_ALIGN_ATTR;
71 #endif
72             } b_;
73 
74             operator bm::word_t*() { return &(b_.w32[0]); }
75             operator const bm::word_t*() const { return &(b_.w32[0]); }
76             explicit operator bm::id64_t*() { return &b_.w64[0]; }
77             explicit operator const bm::id64_t*() const { return &b_.w64[0]; }
78 #ifdef BMAVX512OPT
79             explicit operator __m512i*() { return &b_.w512[0]; }
80             explicit operator const __m512i*() const { return &b_.w512[0]; }
81 #endif
82 #ifdef BMAVX2OPT
83             explicit operator __m256i*() { return &b_.w256[0]; }
84             explicit operator const __m256i*() const { return &b_.w256[0]; }
85 #endif
86 #if defined(BMSSE2OPT) || defined(BMSSE42OPT)
87             explicit operator __m128i*() { return &b_.w128[0]; }
88             explicit operator const __m128i*() const { return &b_.w128[0]; }
89 #endif
90 
beginbit_block_t91             const bm::word_t* begin() const { return (b_.w32 + 0); }
beginbit_block_t92             bm::word_t* begin() { return (b_.w32 + 0); }
endbit_block_t93             const bm::word_t* end() const { return (b_.w32 + bm::set_block_size); }
endbit_block_t94             bm::word_t* end() { return (b_.w32 + bm::set_block_size); }
95         };
96 
97 /**
98     Get minimum of 2 values
99 */
100 template<typename T>
min_value(T v1,T v2)101 BMFORCEINLINE T min_value(T v1, T v2) BMNOEXCEPT
102 {
103     return v1 < v2 ? v1 : v2;
104 }
105 
106 /**
107     \brief ad-hoc conditional expressions
108     \internal
109 */
110 template <bool b> struct conditional
111 {
testconditional112     static bool test() { return true; }
113 };
114 template <> struct conditional<false>
115 {
116     static bool test() { return false; }
117 };
118 
119 
120 /**
121     Fast loop-less function to find LOG2
122 */
123 template<typename T>
124 BMFORCEINLINE T ilog2(T x) BMNOEXCEPT
125 {
126     unsigned int l = 0;
127 
128     if (x >= 1<<16) { x = (T)(x >> 16); l |= 16; }
129     if (x >= 1<<8)  { x = (T)(x >> 8);  l |= 8; }
130     if (x >= 1<<4)  { x = (T)(x >> 4);  l |= 4; }
131     if (x >= 1<<2)  { x = (T)(x >> 2);  l |= 2; }
132     if (x >= 1<<1)  l |=1;
133     return (T)l;
134 }
135 
136 template<>
137 BMFORCEINLINE
138 bm::gap_word_t ilog2(gap_word_t x) BMNOEXCEPT
139 {
140     unsigned int l = 0;
141     if (x >= 1<<8)  { x = (bm::gap_word_t)(x >> 8); l |= 8; }
142     if (x >= 1<<4)  { x = (bm::gap_word_t)(x >> 4); l |= 4; }
143     if (x >= 1<<2)  { x = (bm::gap_word_t)(x >> 2); l |= 2; }
144     if (x >= 1<<1)  l |=1;
145     return (bm::gap_word_t)l;
146 }
147 
148 /**
149      Mini auto-pointer for internal memory management
150      @internal
151 */
152 template<class T>
153 class ptr_guard
154 {
155 public:
156     ptr_guard(T* p) BMNOEXCEPT : ptr_(p) {}
157     ~ptr_guard() { delete ptr_; }
158 private:
159     ptr_guard(const ptr_guard<T>& p);
160     ptr_guard& operator=(const ptr_guard<T>& p);
161 private:
162     T* ptr_;
163 };
164 
165 /**
166     Portable LZCNT with (uses minimal LUT)
167     @ingroup bitfunc
168     @internal
169 */
170 BMFORCEINLINE
171 unsigned count_leading_zeros(unsigned x) BMNOEXCEPT
172 {
173     unsigned n =
174         (x >= (1U << 16)) ?
175         ((x >= (1U << 24)) ? ((x >= (1 << 28)) ? 28u : 24u) : ((x >= (1U << 20)) ? 20u : 16u))
176         :
177         ((x >= (1U << 8)) ? ((x >= (1U << 12)) ? 12u : 8u) : ((x >= (1U << 4)) ? 4u : 0u));
178     return unsigned(bm::lzcnt_table<true>::_lut[x >> n]) - n;
179 }
180 
181 /**
182     Portable TZCNT with (uses 37-LUT)
183     @ingroup bitfunc
184     @internal
185 */
186 BMFORCEINLINE
187 unsigned count_trailing_zeros(unsigned v) BMNOEXCEPT
188 {
189     // (v & -v) isolates the last set bit
190     return unsigned(bm::tzcnt_table<true>::_lut[(-v & v) % 37]);
191 }
192 
193 /**
194     Lookup table based integer LOG2
195 */
196 template<typename T>
197 BMFORCEINLINE T ilog2_LUT(T x) BMNOEXCEPT
198 {
199     unsigned l = 0;
200     if (x & 0xffff0000)
201     {
202         l += 16; x >>= 16;
203     }
204 
205     if (x & 0xff00)
206     {
207         l += 8; x >>= 8;
208     }
209     return l + T(bm::first_bit_table<true>::_idx[x]);
210 }
211 
212 /**
213     Lookup table based short integer LOG2
214 */
215 template<>
216 BMFORCEINLINE bm::gap_word_t ilog2_LUT<bm::gap_word_t>(bm::gap_word_t x) BMNOEXCEPT
217 {
218     if (x & 0xff00)
219     {
220         x = bm::gap_word_t(x >> 8u);
221         return bm::gap_word_t(8u + bm::first_bit_table<true>::_idx[x]);
222     }
223     return bm::gap_word_t(bm::first_bit_table<true>::_idx[x]);
224 }
225 
226 
227 // if we are running on x86 CPU we can use inline ASM
228 
229 #ifdef BM_x86
230 #ifdef __GNUG__
231 
232 BMFORCEINLINE
233 unsigned bsf_asm32(unsigned int v) BMNOEXCEPT
234 {
235     unsigned r;
236     asm volatile(" bsfl %1, %0": "=r"(r): "rm"(v) );
237     return r;
238 }
239 
240 BMFORCEINLINE
241 unsigned bsr_asm32(unsigned int v) BMNOEXCEPT
242 {
243     unsigned r;
244     asm volatile(" bsrl %1, %0": "=r"(r): "rm"(v) );
245     return r;
246 }
247 
248 #endif  // __GNUG__
249 
250 #ifdef _MSC_VER
251 
252 #if defined(_M_AMD64) || defined(_M_X64) // inline assembly not supported
253 
254 BMFORCEINLINE
255 unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
256 {
257     unsigned long r;
258     _BitScanReverse(&r, value);
259     return r;
260 }
261 
262 BMFORCEINLINE
263 unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
264 {
265     unsigned long r;
266     _BitScanForward(&r, value);
267     return r;
268 }
269 
270 #else
271 
272 BMFORCEINLINE
273 unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
274 {
275   __asm  bsr  eax, value
276 }
277 
278 BMFORCEINLINE
279 unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
280 {
281   __asm  bsf  eax, value
282 }
283 
284 #endif
285 
286 #endif // _MSC_VER
287 
288 #endif // BM_x86
289 
290 
291 // From:
292 // http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.8562
293 //
294 template<typename T>
295 BMFORCEINLINE T bit_scan_fwd(T v) BMNOEXCEPT
296 {
297     return
298         DeBruijn_bit_position<true>::_multiply[(((v & -v) * 0x077CB531U)) >> 27];
299 }
300 
301 BMFORCEINLINE
302 unsigned bit_scan_reverse32(unsigned w) BMNOEXCEPT
303 {
304     BM_ASSERT(w);
305 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
306     return (unsigned) (31 - __builtin_clz(w));
307 #else
308 # if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
309     return bm::bsr_asm32(w);
310 # else
311     return bm::ilog2_LUT<unsigned int>(w);
312 # endif
313 #endif
314 }
315 
316 BMFORCEINLINE
317 unsigned bit_scan_forward32(unsigned w) BMNOEXCEPT
318 {
319     BM_ASSERT(w);
320 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
321     return (unsigned) __builtin_ctz(w);
322 #else
323 # if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
324     return bm::bsf_asm32(w);
325 # else
326         return bit_scan_fwd(w);
327 # endif
328 #endif
329 }
330 
331 
332 BMFORCEINLINE
333 unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
334 {
335 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
336     return _blsr_u64(w);
337 #else
338     return w & (w - 1);
339 #endif
340 }
341 
342 BMFORCEINLINE
343 unsigned long long bmi_blsi_u64(unsigned long long w)
344 {
345 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
346     return _blsi_u64(w);
347 #else
348     return w & (-w);
349 #endif
350 }
351 
352 /// 32-bit bit-scan reverse
353 inline
354 unsigned count_leading_zeros_u32(unsigned w) BMNOEXCEPT
355 {
356     BM_ASSERT(w);
357 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
358     return (unsigned)_lzcnt_u32(w);
359 #else
360     #if defined(BM_USE_GCC_BUILD) || defined(__GNUG__)
361         return (unsigned) __builtin_clz(w);
362     #else
363         return bm::count_leading_zeros(w); // portable
364     #endif
365 #endif
366 }
367 
368 
369 /// 64-bit bit-scan reverse
370 inline
371 unsigned count_leading_zeros_u64(bm::id64_t w) BMNOEXCEPT
372 {
373     BM_ASSERT(w);
374 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
375     return (unsigned)_lzcnt_u64(w);
376 #else
377     #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
378         return (unsigned) __builtin_clzll(w);
379     #else
380         unsigned z;
381         unsigned w1 = unsigned(w >> 32);
382         if (!w1)
383         {
384             z = 32;
385             w1 = unsigned(w);
386             z += 31 - bm::bit_scan_reverse32(w1);
387         }
388         else
389         {
390             z = 31 - bm::bit_scan_reverse32(w1);
391         }
392         return z;
393     #endif
394 #endif
395 }
396 
397 /// 32-bit bit-scan fwd
398 inline
399 unsigned count_trailing_zeros_u32(unsigned w) BMNOEXCEPT
400 {
401     BM_ASSERT(w);
402 
403 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
404     return (unsigned)_tzcnt_u32(w);
405 #else
406     #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
407         return (unsigned) __builtin_ctz(w);
408     #else
409         return bm::bit_scan_forward32(w);
410     #endif
411 #endif
412 }
413 
414 
415 /// 64-bit bit-scan fwd
416 inline
417 unsigned count_trailing_zeros_u64(bm::id64_t w) BMNOEXCEPT
418 {
419     BM_ASSERT(w);
420 
421 #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
422     return (unsigned)_tzcnt_u64(w);
423 #else
424     #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
425         return (unsigned) __builtin_ctzll(w);
426     #else
427         unsigned z;
428         unsigned w1 = unsigned(w);
429         if (!w1)
430         {
431             z = 32;
432             w1 = unsigned(w >> 32);
433             z += bm::bit_scan_forward32(w1);
434         }
435         else
436         {
437             z = bm::bit_scan_forward32(w1);
438         }
439         return z;
440     #endif
441 #endif
442 }
443 
444 
445 
446 /*!
447     Returns BSR value
448     @ingroup bitfunc
449 */
450 template <class T>
451 unsigned bit_scan_reverse(T value) BMNOEXCEPT
452 {
453     BM_ASSERT(value);
454 
455     if (bm::conditional<sizeof(T)==8>::test())
456     {
457     #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
458         return (unsigned) (63 - __builtin_clzll(value));
459     #else
460         bm::id64_t v8 = value;
461         v8 >>= 32;
462         unsigned v = (unsigned)v8;
463         if (v)
464         {
465             v = bm::bit_scan_reverse32(v);
466             return v + 32;
467         }
468     #endif
469     }
470     return bm::bit_scan_reverse32((unsigned)value);
471 }
472 
473 /*! \brief and functor
474     \internal
475  */
476 struct and_func
477 {
478     static
479     BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
480         { return v1 & v2; }
481 };
482 /*! \brief xor functor
483     \internal
484  */
485 struct xor_func
486 {
487     static
488     BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
489         { return v1 ^ v2; }
490 };
491 /*! \brief or functor
492     \internal
493  */
494 struct or_func
495 {
496     static
497     BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
498         { return v1 | v2; }
499 };
500 /*! \brief sub functor
501     \internal
502  */
503 struct sub_func
504 {
505     static
506     BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
507         { return v1 & ~v2; }
508 };
509 
510 BMFORCEINLINE
511 unsigned mask_r_u32(unsigned nbit) BMNOEXCEPT
512 {
513     BM_ASSERT(nbit < 32);
514     unsigned m = (~0u << nbit);
515     BM_ASSERT(m == block_set_table<true>::_right[nbit]);
516     return m;
517 }
518 
519 BMFORCEINLINE
520 unsigned mask_l_u32(unsigned nbit) BMNOEXCEPT
521 {
522     BM_ASSERT(nbit < 32);
523     unsigned m = ~0u >> (31 - nbit);
524     BM_ASSERT(m == block_set_table<true>::_left[nbit]);
525     return m;
526 }
527 
528 /// XOR swap two variables
529 ///
530 /// @internal
531 template<typename W>
532 BMFORCEINLINE void xor_swap(W& x, W& y) BMNOEXCEPT
533 {
534     BM_ASSERT(&x != &y);
535     x ^= y; y ^= x; x ^= y;
536 }
537 
538 
539 #ifdef __GNUG__
540 #pragma GCC diagnostic pop
541 #endif
542 #ifdef _MSC_VER
543 #pragma warning( pop )
544 #endif
545 
546 /**
547     Сompute mask of bytes presense in 64-bit word
548 
549     @param w - [in] input 64-bit word
550     @return mask with 8 bits
551     @internal
552  */
553 inline
554 unsigned compute_h64_mask(unsigned long long w)
555 {
556     unsigned h_mask = 0;
557     for (unsigned i = 0; w && (i < 8); ++i, w >>= 8)
558     {
559         if ((unsigned char) w)
560             h_mask |= (1u<<i);
561     } // for
562     return h_mask;
563 }
564 
565 
566 
567 } // bm
568 
569 #endif
570