1 /*-------------------------------------------------------------------------
2  *
3  * pg_bitutils.c
4  *	  Miscellaneous functions for bit-wise operations.
5  *
6  * Copyright (c) 2019, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/port/pg_bitutils.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "c.h"
14 
15 #ifdef HAVE__GET_CPUID
16 #include <cpuid.h>
17 #endif
18 #ifdef HAVE__CPUID
19 #include <intrin.h>
20 #endif
21 
22 #include "port/pg_bitutils.h"
23 
24 
25 /*
26  * Array giving the position of the left-most set bit for each possible
27  * byte value.  We count the right-most position as the 0th bit, and the
28  * left-most the 7th bit.  The 0th entry of the array should not be used.
29  *
30  * Note: this is not used by the functions in pg_bitutils.h when
31  * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that
32  * extensions possibly compiled with a different compiler can use it.
33  */
34 const uint8 pg_leftmost_one_pos[256] = {
35 	0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
51 };
52 
53 /*
54  * Array giving the position of the right-most set bit for each possible
55  * byte value.  We count the right-most position as the 0th bit, and the
56  * left-most the 7th bit.  The 0th entry of the array should not be used.
57  *
58  * Note: this is not used by the functions in pg_bitutils.h when
59  * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that
60  * extensions possibly compiled with a different compiler can use it.
61  */
62 const uint8 pg_rightmost_one_pos[256] = {
63 	0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 	7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
79 };
80 
81 /*
82  * Array giving the number of 1-bits in each possible byte value.
83  *
84  * Note: we export this for use by functions in which explicit use
85  * of the popcount functions seems unlikely to be a win.
86  */
87 const uint8 pg_number_of_ones[256] = {
88 	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
104 };
105 
106 /*
107  * On x86_64, we can use the hardware popcount instruction, but only if
108  * we can verify that the CPU supports it via the cpuid instruction.
109  *
110  * Otherwise, we fall back to __builtin_popcount if the compiler has that,
111  * or a hand-rolled implementation if not.
112  */
113 #ifdef HAVE_X86_64_POPCNTQ
114 #if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
115 #define USE_POPCNT_ASM 1
116 #endif
117 #endif
118 
119 static int	pg_popcount32_slow(uint32 word);
120 static int	pg_popcount64_slow(uint64 word);
121 
122 #ifdef USE_POPCNT_ASM
123 static bool pg_popcount_available(void);
124 static int	pg_popcount32_choose(uint32 word);
125 static int	pg_popcount64_choose(uint64 word);
126 static int	pg_popcount32_asm(uint32 word);
127 static int	pg_popcount64_asm(uint64 word);
128 
129 int			(*pg_popcount32) (uint32 word) = pg_popcount32_choose;
130 int			(*pg_popcount64) (uint64 word) = pg_popcount64_choose;
131 #else
132 int			(*pg_popcount32) (uint32 word) = pg_popcount32_slow;
133 int			(*pg_popcount64) (uint64 word) = pg_popcount64_slow;
134 #endif							/* USE_POPCNT_ASM */
135 
136 #ifdef USE_POPCNT_ASM
137 
138 /*
139  * Return true if CPUID indicates that the POPCNT instruction is available.
140  */
141 static bool
pg_popcount_available(void)142 pg_popcount_available(void)
143 {
144 	unsigned int exx[4] = {0, 0, 0, 0};
145 
146 #if defined(HAVE__GET_CPUID)
147 	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
148 #elif defined(HAVE__CPUID)
149 	__cpuid(exx, 1);
150 #else
151 #error cpuid instruction not available
152 #endif
153 
154 	return (exx[2] & (1 << 23)) != 0;	/* POPCNT */
155 }
156 
157 /*
158  * These functions get called on the first call to pg_popcount32 etc.
159  * They detect whether we can use the asm implementations, and replace
160  * the function pointers so that subsequent calls are routed directly to
161  * the chosen implementation.
162  */
163 static int
pg_popcount32_choose(uint32 word)164 pg_popcount32_choose(uint32 word)
165 {
166 	if (pg_popcount_available())
167 	{
168 		pg_popcount32 = pg_popcount32_asm;
169 		pg_popcount64 = pg_popcount64_asm;
170 	}
171 	else
172 	{
173 		pg_popcount32 = pg_popcount32_slow;
174 		pg_popcount64 = pg_popcount64_slow;
175 	}
176 
177 	return pg_popcount32(word);
178 }
179 
180 static int
pg_popcount64_choose(uint64 word)181 pg_popcount64_choose(uint64 word)
182 {
183 	if (pg_popcount_available())
184 	{
185 		pg_popcount32 = pg_popcount32_asm;
186 		pg_popcount64 = pg_popcount64_asm;
187 	}
188 	else
189 	{
190 		pg_popcount32 = pg_popcount32_slow;
191 		pg_popcount64 = pg_popcount64_slow;
192 	}
193 
194 	return pg_popcount64(word);
195 }
196 
197 /*
198  * pg_popcount32_asm
199  *		Return the number of 1 bits set in word
200  */
201 static int
pg_popcount32_asm(uint32 word)202 pg_popcount32_asm(uint32 word)
203 {
204 	uint32		res;
205 
206 __asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
207 	return (int) res;
208 }
209 
210 /*
211  * pg_popcount64_asm
212  *		Return the number of 1 bits set in word
213  */
214 static int
pg_popcount64_asm(uint64 word)215 pg_popcount64_asm(uint64 word)
216 {
217 	uint64		res;
218 
219 __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
220 	return (int) res;
221 }
222 
223 #endif							/* USE_POPCNT_ASM */
224 
225 
226 /*
227  * pg_popcount32_slow
228  *		Return the number of 1 bits set in word
229  */
230 static int
pg_popcount32_slow(uint32 word)231 pg_popcount32_slow(uint32 word)
232 {
233 #ifdef HAVE__BUILTIN_POPCOUNT
234 	return __builtin_popcount(word);
235 #else							/* !HAVE__BUILTIN_POPCOUNT */
236 	int			result = 0;
237 
238 	while (word != 0)
239 	{
240 		result += pg_number_of_ones[word & 255];
241 		word >>= 8;
242 	}
243 
244 	return result;
245 #endif							/* HAVE__BUILTIN_POPCOUNT */
246 }
247 
248 /*
249  * pg_popcount64_slow
250  *		Return the number of 1 bits set in word
251  */
252 static int
pg_popcount64_slow(uint64 word)253 pg_popcount64_slow(uint64 word)
254 {
255 #ifdef HAVE__BUILTIN_POPCOUNT
256 #if defined(HAVE_LONG_INT_64)
257 	return __builtin_popcountl(word);
258 #elif defined(HAVE_LONG_LONG_INT_64)
259 	return __builtin_popcountll(word);
260 #else
261 #error must have a working 64-bit integer datatype
262 #endif
263 #else							/* !HAVE__BUILTIN_POPCOUNT */
264 	int			result = 0;
265 
266 	while (word != 0)
267 	{
268 		result += pg_number_of_ones[word & 255];
269 		word >>= 8;
270 	}
271 
272 	return result;
273 #endif							/* HAVE__BUILTIN_POPCOUNT */
274 }
275 
276 
277 /*
278  * pg_popcount
279  *		Returns the number of 1-bits in buf
280  */
281 uint64
pg_popcount(const char * buf,int bytes)282 pg_popcount(const char *buf, int bytes)
283 {
284 	uint64		popcnt = 0;
285 
286 #if SIZEOF_VOID_P >= 8
287 	/* Process in 64-bit chunks if the buffer is aligned. */
288 	if (buf == (const char *) TYPEALIGN(8, buf))
289 	{
290 		const uint64 *words = (const uint64 *) buf;
291 
292 		while (bytes >= 8)
293 		{
294 			popcnt += pg_popcount64(*words++);
295 			bytes -= 8;
296 		}
297 
298 		buf = (const char *) words;
299 	}
300 #else
301 	/* Process in 32-bit chunks if the buffer is aligned. */
302 	if (buf == (const char *) TYPEALIGN(4, buf))
303 	{
304 		const uint32 *words = (const uint32 *) buf;
305 
306 		while (bytes >= 4)
307 		{
308 			popcnt += pg_popcount32(*words++);
309 			bytes -= 4;
310 		}
311 
312 		buf = (const char *) words;
313 	}
314 #endif
315 
316 	/* Process any remaining bytes */
317 	while (bytes--)
318 		popcnt += pg_number_of_ones[(unsigned char) *buf++];
319 
320 	return popcnt;
321 }
322