1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #ifndef KOKKOS_BITOPS_HPP
46 #define KOKKOS_BITOPS_HPP
47 
48 #include <Kokkos_Macros.hpp>
49 #include <cstdint>
50 #include <climits>
51 
52 #ifdef KOKKOS_COMPILER_INTEL
53 #include <immintrin.h>
54 #endif
55 
56 namespace Kokkos {
57 
58 KOKKOS_FORCEINLINE_FUNCTION
log2(unsigned i)59 int log2(unsigned i) {
60   enum : int { shift = sizeof(unsigned) * CHAR_BIT - 1 };
61 #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
62   return shift - __clz(i);
63 #elif defined(KOKKOS_COMPILER_INTEL)
64   return _bit_scan_reverse(i);
65 #elif defined(KOKKOS_COMPILER_CRAYC)
66   return i ? shift - _leadz32(i) : 0;
67 #elif defined(__GNUC__) || defined(__GNUG__)
68   return shift - __builtin_clz(i);
69 #else
70   int offset = 0;
71   if (i) {
72     for (offset = shift; (i & (1 << offset)) == 0; --offset)
73       ;
74   }
75   return offset;
76 #endif
77 }
78 
79 namespace Impl {
80 
81 /**\brief  Find first zero bit.
82  *
83  *  If none then return -1 ;
84  */
85 KOKKOS_FORCEINLINE_FUNCTION
bit_first_zero(unsigned i)86 int bit_first_zero(unsigned i) noexcept {
87   enum : unsigned { full = ~0u };
88 
89 #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
90   return full != i ? __ffs(~i) - 1 : -1;
91 #elif defined(KOKKOS_COMPILER_INTEL)
92   return full != i ? _bit_scan_forward(~i) : -1;
93 #elif defined(KOKKOS_COMPILER_CRAYC)
94   return full != i ? _popcnt(i ^ (i + 1)) - 1 : -1;
95 #elif defined(KOKKOS_COMPILER_GNU) || defined(__GNUC__) || defined(__GNUG__)
96   return full != i ? __builtin_ffs(~i) - 1 : -1;
97 #else
98   int offset = -1;
99   if (full != i) {
100     for (offset = 0; i & (1 << offset); ++offset)
101       ;
102   }
103   return offset;
104 #endif
105 }
106 
107 KOKKOS_FORCEINLINE_FUNCTION
bit_scan_forward(unsigned i)108 int bit_scan_forward(unsigned i) {
109 #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
110   return __ffs(i) - 1;
111 #elif defined(KOKKOS_COMPILER_INTEL)
112   return _bit_scan_forward(i);
113 #elif defined(KOKKOS_COMPILER_CRAYC)
114   return i ? _popcnt(~i & (i - 1)) : -1;
115 #elif defined(KOKKOS_COMPILER_GNU) || defined(__GNUC__) || defined(__GNUG__)
116   return __builtin_ffs(i) - 1;
117 #else
118   int offset = -1;
119   if (i) {
120     for (offset = 0; (i & (1 << offset)) == 0; ++offset)
121       ;
122   }
123   return offset;
124 #endif
125 }
126 
127 /// Count the number of bits set.
128 KOKKOS_FORCEINLINE_FUNCTION
bit_count(unsigned i)129 int bit_count(unsigned i) {
130 #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
131   return __popc(i);
132 #elif defined(__INTEL_COMPILER)
133   return _popcnt32(i);
134 #elif defined(KOKKOS_COMPILER_CRAYC)
135   return _popcnt(i);
136 #elif defined(__GNUC__) || defined(__GNUG__)
137   return __builtin_popcount(i);
138 #else
139   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive
140   i = i - ((i >> 1) & ~0u / 3u);                           // temp
141   i = (i & ~0u / 15u * 3u) + ((i >> 2) & ~0u / 15u * 3u);  // temp
142   i = (i + (i >> 4)) & ~0u / 255u * 15u;                   // temp
143 
144   // count
145   return (int)((i * (~0u / 255u)) >> (sizeof(unsigned) - 1) * CHAR_BIT);
146 #endif
147 }
148 
149 KOKKOS_INLINE_FUNCTION
integral_power_of_two_that_contains(const unsigned N)150 unsigned integral_power_of_two_that_contains(const unsigned N) {
151   const unsigned i = Kokkos::log2(N);
152   return ((1u << i) < N) ? i + 1 : i;
153 }
154 
155 }  // namespace Impl
156 }  // namespace Kokkos
157 
158 #endif  // KOKKOS_BITOPS_HPP
159