1 /*
2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3  *                         University Research and Technology
4  *                         Corporation.  All rights reserved.
5  * Copyright (c) 2004-2005 The University of Tennessee and The University
6  *                         of Tennessee Research Foundation.  All rights
7  *                         reserved.
8  * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
9  *                         University of Stuttgart.  All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  *                         All rights reserved.
12  * $COPYRIGHT$
13  *
14  * Additional copyrights may follow
15  *
16  * $HEADER$
17  */
18 
19 #ifndef OPAL_BIT_OPS_H
20 #define OPAL_BIT_OPS_H
21 
22 #include "opal/prefetch.h"
23 
24 /**
25  * Calculates the highest bit in an integer
26  *
27  * @param value The integer value to examine
28  * @param start Position to start looking
29  *
30  * @returns pos Position of highest-set integer or -1 if none are set.
31  *
32  * Look at the integer "value" starting at position "start", and move
33  * to the right.  Return the index of the highest bit that is set to
34  * 1.
35  *
36  * WARNING: *NO* error checking is performed.  This is meant to be a
37  * fast inline function.
38  * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead
39  * of 17 cycles (on average value, with start=32)
40  * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
41  */
opal_hibit(int value,int start)42 static inline int opal_hibit(int value, int start)
43 {
44     unsigned int mask;
45 
46 #if OPAL_C_HAVE_BUILTIN_CLZ
47     /* Only look at the part that the caller wanted looking at */
48     mask = value & ((1 << start) - 1);
49 
50     if (OPAL_UNLIKELY (0 == mask)) {
51         return -1;
52     }
53 
54     start = (8*sizeof(int)-1) - __builtin_clz(mask);
55 #else
56     --start;
57     mask = 1 << start;
58 
59     for (; start >= 0; --start, mask >>= 1) {
60         if (value & mask) {
61             break;
62         }
63     }
64 #endif
65 
66     return start;
67 }
68 
69 
70 /**
71  * Returns the cube dimension of a given value.
72  *
73  * @param value The integer value to examine
74  *
75  * @returns cubedim The smallest cube dimension containing that value
76  *
77  * Look at the integer "value" and calculate the smallest power of two
78  * dimension that contains that value.
79  *
80  * WARNING: *NO* error checking is performed.  This is meant to be a
81  * fast inline function.
82  * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles
83  * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
84  */
opal_cube_dim(int value)85 static inline int opal_cube_dim(int value)
86 {
87     int dim, size;
88 
89 #if OPAL_C_HAVE_BUILTIN_CLZ
90     if (OPAL_UNLIKELY (1 >= value)) {
91         return 0;
92     }
93     size = 8 * sizeof(int);
94     dim = size - __builtin_clz(value-1);
95 #else
96     for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */;
97 #endif
98 
99     return dim;
100 }
101 
102 
103 /**
104  * @brief Returns next power-of-two of the given value.
105  *
106  * @param value The integer value to return power of 2
107  *
108  * @returns The next power of two
109  *
110  * WARNING: *NO* error checking is performed.  This is meant to be a
111  * fast inline function.
112  * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 77
113  * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
114  */
opal_next_poweroftwo(int value)115 static inline int opal_next_poweroftwo(int value)
116 {
117     int power2;
118 
119 #if OPAL_C_HAVE_BUILTIN_CLZ
120     if (OPAL_UNLIKELY (0 == value)) {
121         return 1;
122     }
123     power2 = 1 << (8 * sizeof (int) - __builtin_clz(value));
124 #else
125     for (power2 = 1; value > 0; value >>= 1, power2 <<= 1) /* empty */;
126 #endif
127 
128     return power2;
129 }
130 
131 
132 /**
133  * @brief Returns next power-of-two of the given value (and the value itselve if already power-of-two).
134  *
135  * @param value The integer value to return power of 2
136  *
137  * @returns The next power of two (inclusive)
138  *
139  * WARNING: *NO* error checking is performed.  This is meant to be a
140  * fast inline function.
141  * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 56
142  * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
143  */
opal_next_poweroftwo_inclusive(int value)144 static inline int opal_next_poweroftwo_inclusive(int value)
145 {
146     int power2;
147 
148 #if OPAL_C_HAVE_BUILTIN_CLZ
149     if (OPAL_UNLIKELY (1 >= value)) {
150         return 1;
151     }
152     power2 = 1 << (8 * sizeof (int) - __builtin_clz(value - 1));
153 #else
154     for (power2 = 1 ; power2 < value; power2 <<= 1) /* empty */;
155 #endif
156 
157     return power2;
158 }
159 
160 
161 #endif /* OPAL_BIT_OPS_H */
162 
163