1 /*
2 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3 * University Research and Technology
4 * Corporation. All rights reserved.
5 * Copyright (c) 2004-2005 The University of Tennessee and The University
6 * of Tennessee Research Foundation. All rights
7 * reserved.
8 * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
9 * University of Stuttgart. All rights reserved.
10 * Copyright (c) 2004-2005 The Regents of the University of California.
11 * All rights reserved.
12 * $COPYRIGHT$
13 *
14 * Additional copyrights may follow
15 *
16 * $HEADER$
17 */
18
19 #ifndef OPAL_BIT_OPS_H
20 #define OPAL_BIT_OPS_H
21
22 #include "opal/prefetch.h"
23
24 /**
25 * Calculates the highest bit in an integer
26 *
27 * @param value The integer value to examine
28 * @param start Position to start looking
29 *
30 * @returns pos Position of highest-set integer or -1 if none are set.
31 *
32 * Look at the integer "value" starting at position "start", and move
33 * to the right. Return the index of the highest bit that is set to
34 * 1.
35 *
36 * WARNING: *NO* error checking is performed. This is meant to be a
37 * fast inline function.
38 * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead
39 * of 17 cycles (on average value, with start=32)
40 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
41 */
opal_hibit(int value,int start)42 static inline int opal_hibit(int value, int start)
43 {
44 unsigned int mask;
45
46 #if OPAL_C_HAVE_BUILTIN_CLZ
47 /* Only look at the part that the caller wanted looking at */
48 mask = value & ((1 << start) - 1);
49
50 if (OPAL_UNLIKELY (0 == mask)) {
51 return -1;
52 }
53
54 start = (8*sizeof(int)-1) - __builtin_clz(mask);
55 #else
56 --start;
57 mask = 1 << start;
58
59 for (; start >= 0; --start, mask >>= 1) {
60 if (value & mask) {
61 break;
62 }
63 }
64 #endif
65
66 return start;
67 }
68
69
70 /**
71 * Returns the cube dimension of a given value.
72 *
73 * @param value The integer value to examine
74 *
75 * @returns cubedim The smallest cube dimension containing that value
76 *
77 * Look at the integer "value" and calculate the smallest power of two
78 * dimension that contains that value.
79 *
80 * WARNING: *NO* error checking is performed. This is meant to be a
81 * fast inline function.
82 * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles
83 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
84 */
opal_cube_dim(int value)85 static inline int opal_cube_dim(int value)
86 {
87 int dim, size;
88
89 #if OPAL_C_HAVE_BUILTIN_CLZ
90 if (OPAL_UNLIKELY (1 >= value)) {
91 return 0;
92 }
93 size = 8 * sizeof(int);
94 dim = size - __builtin_clz(value-1);
95 #else
96 for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */;
97 #endif
98
99 return dim;
100 }
101
102
103 /**
104 * @brief Returns next power-of-two of the given value.
105 *
106 * @param value The integer value to return power of 2
107 *
108 * @returns The next power of two
109 *
110 * WARNING: *NO* error checking is performed. This is meant to be a
111 * fast inline function.
112 * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 77
113 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
114 */
opal_next_poweroftwo(int value)115 static inline int opal_next_poweroftwo(int value)
116 {
117 int power2;
118
119 #if OPAL_C_HAVE_BUILTIN_CLZ
120 if (OPAL_UNLIKELY (0 == value)) {
121 return 1;
122 }
123 power2 = 1 << (8 * sizeof (int) - __builtin_clz(value));
124 #else
125 for (power2 = 1; value > 0; value >>= 1, power2 <<= 1) /* empty */;
126 #endif
127
128 return power2;
129 }
130
131
132 /**
133 * @brief Returns next power-of-two of the given value (and the value itselve if already power-of-two).
134 *
135 * @param value The integer value to return power of 2
136 *
137 * @returns The next power of two (inclusive)
138 *
139 * WARNING: *NO* error checking is performed. This is meant to be a
140 * fast inline function.
141 * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 56
142 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
143 */
opal_next_poweroftwo_inclusive(int value)144 static inline int opal_next_poweroftwo_inclusive(int value)
145 {
146 int power2;
147
148 #if OPAL_C_HAVE_BUILTIN_CLZ
149 if (OPAL_UNLIKELY (1 >= value)) {
150 return 1;
151 }
152 power2 = 1 << (8 * sizeof (int) - __builtin_clz(value - 1));
153 #else
154 for (power2 = 1 ; power2 < value; power2 <<= 1) /* empty */;
155 #endif
156
157 return power2;
158 }
159
160
161 #endif /* OPAL_BIT_OPS_H */
162
163