1 /*-------------------------------------------------------------------------
2 *
3 * pg_bitutils.c
4 * Miscellaneous functions for bit-wise operations.
5 *
6 * Copyright (c) 2019-2020, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/port/pg_bitutils.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "c.h"
14
15 #ifdef HAVE__GET_CPUID
16 #include <cpuid.h>
17 #endif
18 #ifdef HAVE__CPUID
19 #include <intrin.h>
20 #endif
21
22 #include "port/pg_bitutils.h"
23
24
25 /*
26 * Array giving the position of the left-most set bit for each possible
27 * byte value. We count the right-most position as the 0th bit, and the
28 * left-most the 7th bit. The 0th entry of the array should not be used.
29 *
30 * Note: this is not used by the functions in pg_bitutils.h when
31 * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that
32 * extensions possibly compiled with a different compiler can use it.
33 */
34 const uint8 pg_leftmost_one_pos[256] = {
35 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
51 };
52
53 /*
54 * Array giving the position of the right-most set bit for each possible
55 * byte value. We count the right-most position as the 0th bit, and the
56 * left-most the 7th bit. The 0th entry of the array should not be used.
57 *
58 * Note: this is not used by the functions in pg_bitutils.h when
59 * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that
60 * extensions possibly compiled with a different compiler can use it.
61 */
62 const uint8 pg_rightmost_one_pos[256] = {
63 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
79 };
80
81 /*
82 * Array giving the number of 1-bits in each possible byte value.
83 *
84 * Note: we export this for use by functions in which explicit use
85 * of the popcount functions seems unlikely to be a win.
86 */
87 const uint8 pg_number_of_ones[256] = {
88 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
104 };
105
106 /*
107 * On x86_64, we can use the hardware popcount instruction, but only if
108 * we can verify that the CPU supports it via the cpuid instruction.
109 *
110 * Otherwise, we fall back to __builtin_popcount if the compiler has that,
111 * or a hand-rolled implementation if not.
112 */
113 #ifdef HAVE_X86_64_POPCNTQ
114 #if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
115 #define USE_POPCNT_ASM 1
116 #endif
117 #endif
118
119 static int pg_popcount32_slow(uint32 word);
120 static int pg_popcount64_slow(uint64 word);
121
122 #ifdef USE_POPCNT_ASM
123 static bool pg_popcount_available(void);
124 static int pg_popcount32_choose(uint32 word);
125 static int pg_popcount64_choose(uint64 word);
126 static int pg_popcount32_asm(uint32 word);
127 static int pg_popcount64_asm(uint64 word);
128
129 int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
130 int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
131 #else
132 int (*pg_popcount32) (uint32 word) = pg_popcount32_slow;
133 int (*pg_popcount64) (uint64 word) = pg_popcount64_slow;
134 #endif /* USE_POPCNT_ASM */
135
136 #ifdef USE_POPCNT_ASM
137
138 /*
139 * Return true if CPUID indicates that the POPCNT instruction is available.
140 */
141 static bool
pg_popcount_available(void)142 pg_popcount_available(void)
143 {
144 unsigned int exx[4] = {0, 0, 0, 0};
145
146 #if defined(HAVE__GET_CPUID)
147 __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
148 #elif defined(HAVE__CPUID)
149 __cpuid(exx, 1);
150 #else
151 #error cpuid instruction not available
152 #endif
153
154 return (exx[2] & (1 << 23)) != 0; /* POPCNT */
155 }
156
157 /*
158 * These functions get called on the first call to pg_popcount32 etc.
159 * They detect whether we can use the asm implementations, and replace
160 * the function pointers so that subsequent calls are routed directly to
161 * the chosen implementation.
162 */
163 static int
pg_popcount32_choose(uint32 word)164 pg_popcount32_choose(uint32 word)
165 {
166 if (pg_popcount_available())
167 {
168 pg_popcount32 = pg_popcount32_asm;
169 pg_popcount64 = pg_popcount64_asm;
170 }
171 else
172 {
173 pg_popcount32 = pg_popcount32_slow;
174 pg_popcount64 = pg_popcount64_slow;
175 }
176
177 return pg_popcount32(word);
178 }
179
180 static int
pg_popcount64_choose(uint64 word)181 pg_popcount64_choose(uint64 word)
182 {
183 if (pg_popcount_available())
184 {
185 pg_popcount32 = pg_popcount32_asm;
186 pg_popcount64 = pg_popcount64_asm;
187 }
188 else
189 {
190 pg_popcount32 = pg_popcount32_slow;
191 pg_popcount64 = pg_popcount64_slow;
192 }
193
194 return pg_popcount64(word);
195 }
196
197 /*
198 * pg_popcount32_asm
199 * Return the number of 1 bits set in word
200 */
201 static int
pg_popcount32_asm(uint32 word)202 pg_popcount32_asm(uint32 word)
203 {
204 uint32 res;
205
206 __asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
207 return (int) res;
208 }
209
210 /*
211 * pg_popcount64_asm
212 * Return the number of 1 bits set in word
213 */
214 static int
pg_popcount64_asm(uint64 word)215 pg_popcount64_asm(uint64 word)
216 {
217 uint64 res;
218
219 __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
220 return (int) res;
221 }
222
223 #endif /* USE_POPCNT_ASM */
224
225
226 /*
227 * pg_popcount32_slow
228 * Return the number of 1 bits set in word
229 */
230 static int
pg_popcount32_slow(uint32 word)231 pg_popcount32_slow(uint32 word)
232 {
233 #ifdef HAVE__BUILTIN_POPCOUNT
234 return __builtin_popcount(word);
235 #else /* !HAVE__BUILTIN_POPCOUNT */
236 int result = 0;
237
238 while (word != 0)
239 {
240 result += pg_number_of_ones[word & 255];
241 word >>= 8;
242 }
243
244 return result;
245 #endif /* HAVE__BUILTIN_POPCOUNT */
246 }
247
248 /*
249 * pg_popcount64_slow
250 * Return the number of 1 bits set in word
251 */
252 static int
pg_popcount64_slow(uint64 word)253 pg_popcount64_slow(uint64 word)
254 {
255 #ifdef HAVE__BUILTIN_POPCOUNT
256 #if defined(HAVE_LONG_INT_64)
257 return __builtin_popcountl(word);
258 #elif defined(HAVE_LONG_LONG_INT_64)
259 return __builtin_popcountll(word);
260 #else
261 #error must have a working 64-bit integer datatype
262 #endif
263 #else /* !HAVE__BUILTIN_POPCOUNT */
264 int result = 0;
265
266 while (word != 0)
267 {
268 result += pg_number_of_ones[word & 255];
269 word >>= 8;
270 }
271
272 return result;
273 #endif /* HAVE__BUILTIN_POPCOUNT */
274 }
275
276
277 /*
278 * pg_popcount
279 * Returns the number of 1-bits in buf
280 */
281 uint64
pg_popcount(const char * buf,int bytes)282 pg_popcount(const char *buf, int bytes)
283 {
284 uint64 popcnt = 0;
285
286 #if SIZEOF_VOID_P >= 8
287 /* Process in 64-bit chunks if the buffer is aligned. */
288 if (buf == (const char *) TYPEALIGN(8, buf))
289 {
290 const uint64 *words = (const uint64 *) buf;
291
292 while (bytes >= 8)
293 {
294 popcnt += pg_popcount64(*words++);
295 bytes -= 8;
296 }
297
298 buf = (const char *) words;
299 }
300 #else
301 /* Process in 32-bit chunks if the buffer is aligned. */
302 if (buf == (const char *) TYPEALIGN(4, buf))
303 {
304 const uint32 *words = (const uint32 *) buf;
305
306 while (bytes >= 4)
307 {
308 popcnt += pg_popcount32(*words++);
309 bytes -= 4;
310 }
311
312 buf = (const char *) words;
313 }
314 #endif
315
316 /* Process any remaining bytes */
317 while (bytes--)
318 popcnt += pg_number_of_ones[(unsigned char) *buf++];
319
320 return popcnt;
321 }
322