1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file tuklib_integer.h
4 /// \brief Various integer and bit operations
5 ///
6 /// This file provides macros or functions to do some basic integer and bit
7 /// operations.
8 ///
9 /// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l):
10 /// - Byte swapping: bswapXX(num)
11 /// - Byte order conversions to/from native: convXXYe(num)
12 /// - Aligned reads: readXXYe(ptr)
13 /// - Aligned writes: writeXXYe(ptr, num)
14 /// - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr)
15 /// - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num)
16 ///
17 /// Since they can macros, the arguments should have no side effects since
18 /// they may be evaluated more than once.
19 ///
20 /// \todo PowerPC and possibly some other architectures support
21 /// byte swapping load and store instructions. This file
22 /// doesn't take advantage of those instructions.
23 ///
24 /// Bit scan operations for non-zero 32-bit integers:
25 /// - Bit scan reverse (find highest non-zero bit): bsr32(num)
26 /// - Count leading zeros: clz32(num)
27 /// - Count trailing zeros: ctz32(num)
28 /// - Bit scan forward (simply an alias for ctz32()): bsf32(num)
29 ///
30 /// The above bit scan operations return 0-31. If num is zero,
31 /// the result is undefined.
32 //
33 // Authors: Lasse Collin
34 // Joachim Henke
35 //
36 // This file has been put into the public domain.
37 // You can do whatever you want with this file.
38 //
39 ///////////////////////////////////////////////////////////////////////////////
40
41 #ifndef TUKLIB_INTEGER_H
42 #define TUKLIB_INTEGER_H
43
44 #include "tuklib_common.h"
45
46
47 ////////////////////////////////////////
48 // Operating system specific features //
49 ////////////////////////////////////////
50
51 #if defined(HAVE_BYTESWAP_H)
52 // glibc, uClibc, dietlibc
53 # include <byteswap.h>
54 # ifdef HAVE_BSWAP_16
55 # define bswap16(num) bswap_16(num)
56 # endif
57 # ifdef HAVE_BSWAP_32
58 # define bswap32(num) bswap_32(num)
59 # endif
60 # ifdef HAVE_BSWAP_64
61 # define bswap64(num) bswap_64(num)
62 # endif
63
64 #elif defined(HAVE_SYS_ENDIAN_H)
65 // *BSDs and Darwin
66 # include <sys/endian.h>
67
68 #elif defined(HAVE_SYS_BYTEORDER_H)
69 // Solaris
70 # include <sys/byteorder.h>
71 # ifdef BSWAP_16
72 # define bswap16(num) BSWAP_16(num)
73 # endif
74 # ifdef BSWAP_32
75 # define bswap32(num) BSWAP_32(num)
76 # endif
77 # ifdef BSWAP_64
78 # define bswap64(num) BSWAP_64(num)
79 # endif
80 # ifdef BE_16
81 # define conv16be(num) BE_16(num)
82 # endif
83 # ifdef BE_32
84 # define conv32be(num) BE_32(num)
85 # endif
86 # ifdef BE_64
87 # define conv64be(num) BE_64(num)
88 # endif
89 # ifdef LE_16
90 # define conv16le(num) LE_16(num)
91 # endif
92 # ifdef LE_32
93 # define conv32le(num) LE_32(num)
94 # endif
95 # ifdef LE_64
96 # define conv64le(num) LE_64(num)
97 # endif
98 #endif
99
100 #ifdef _MSC_VER
101 #include <Windows.h>
102 #endif
103
104
105 ////////////////////////////////
106 // Compiler-specific features //
107 ////////////////////////////////
108
109 // Newer Intel C compilers require immintrin.h for _bit_scan_reverse()
110 // and such functions.
111 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500)
112 # include <immintrin.h>
113 #endif
114
115
116 ///////////////////
117 // Byte swapping //
118 ///////////////////
119
120 #ifndef bswap16
121 # define bswap16(num) \
122 (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8))
123 #endif
124
125 #ifndef bswap32
126 # define bswap32(num) \
127 ( (((uint32_t)(num) << 24) ) \
128 | (((uint32_t)(num) << 8) & UINT32_C(0x00FF0000)) \
129 | (((uint32_t)(num) >> 8) & UINT32_C(0x0000FF00)) \
130 | (((uint32_t)(num) >> 24) ) )
131 #endif
132
133 #ifndef bswap64
134 # define bswap64(num) \
135 ( (((uint64_t)(num) << 56) ) \
136 | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \
137 | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \
138 | (((uint64_t)(num) << 8) & UINT64_C(0x000000FF00000000)) \
139 | (((uint64_t)(num) >> 8) & UINT64_C(0x00000000FF000000)) \
140 | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \
141 | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \
142 | (((uint64_t)(num) >> 56) ) )
143 #endif
144
145 // Define conversion macros using the basic byte swapping macros.
146 #ifdef WORDS_BIGENDIAN
147 # ifndef conv16be
148 # define conv16be(num) ((uint16_t)(num))
149 # endif
150 # ifndef conv32be
151 # define conv32be(num) ((uint32_t)(num))
152 # endif
153 # ifndef conv64be
154 # define conv64be(num) ((uint64_t)(num))
155 # endif
156 # ifndef conv16le
157 # define conv16le(num) bswap16(num)
158 # endif
159 # ifndef conv32le
160 # define conv32le(num) bswap32(num)
161 # endif
162 # ifndef conv64le
163 # define conv64le(num) bswap64(num)
164 # endif
165 #else
166 # ifndef conv16be
167 # define conv16be(num) bswap16(num)
168 # endif
169 # ifndef conv32be
170 # define conv32be(num) bswap32(num)
171 # endif
172 # ifndef conv64be
173 # define conv64be(num) bswap64(num)
174 # endif
175 # ifndef conv16le
176 # define conv16le(num) ((uint16_t)(num))
177 # endif
178 # ifndef conv32le
179 # define conv32le(num) ((uint32_t)(num))
180 # endif
181 # ifndef conv64le
182 # define conv64le(num) ((uint64_t)(num))
183 # endif
184 #endif
185
186
187 //////////////////////////////
188 // Aligned reads and writes //
189 //////////////////////////////
190
191 static inline uint16_t
read16be(const uint8_t * buf)192 read16be(const uint8_t *buf)
193 {
194 uint16_t num = *(const uint16_t *)buf;
195 return conv16be(num);
196 }
197
198
199 static inline uint16_t
read16le(const uint8_t * buf)200 read16le(const uint8_t *buf)
201 {
202 uint16_t num = *(const uint16_t *)buf;
203 return conv16le(num);
204 }
205
206
207 static inline uint32_t
read32be(const uint8_t * buf)208 read32be(const uint8_t *buf)
209 {
210 uint32_t num = *(const uint32_t *)buf;
211 return conv32be(num);
212 }
213
214
215 static inline uint32_t
read32le(const uint8_t * buf)216 read32le(const uint8_t *buf)
217 {
218 uint32_t num = *(const uint32_t *)buf;
219 return conv32le(num);
220 }
221
222
223 static inline uint64_t
read64be(const uint8_t * buf)224 read64be(const uint8_t *buf)
225 {
226 uint64_t num = *(const uint64_t *)buf;
227 return conv64be(num);
228 }
229
230
231 static inline uint64_t
read64le(const uint8_t * buf)232 read64le(const uint8_t *buf)
233 {
234 uint64_t num = *(const uint64_t *)buf;
235 return conv64le(num);
236 }
237
238
239 // NOTE: Possible byte swapping must be done in a macro to allow GCC
240 // to optimize byte swapping of constants when using glibc's or *BSD's
241 // byte swapping macros. The actual write is done in an inline function
242 // to make type checking of the buf pointer possible similarly to readXXYe()
243 // functions.
244
245 #define write16be(buf, num) write16ne((buf), conv16be(num))
246 #define write16le(buf, num) write16ne((buf), conv16le(num))
247 #define write32be(buf, num) write32ne((buf), conv32be(num))
248 #define write32le(buf, num) write32ne((buf), conv32le(num))
249 #define write64be(buf, num) write64ne((buf), conv64be(num))
250 #define write64le(buf, num) write64ne((buf), conv64le(num))
251
252
253 static inline void
write16ne(uint8_t * buf,uint16_t num)254 write16ne(uint8_t *buf, uint16_t num)
255 {
256 *(uint16_t *)buf = num;
257 return;
258 }
259
260
261 static inline void
write32ne(uint8_t * buf,uint32_t num)262 write32ne(uint8_t *buf, uint32_t num)
263 {
264 *(uint32_t *)buf = num;
265 return;
266 }
267
268
269 static inline void
write64ne(uint8_t * buf,uint64_t num)270 write64ne(uint8_t *buf, uint64_t num)
271 {
272 *(uint64_t *)buf = num;
273 return;
274 }
275
276
277 ////////////////////////////////
278 // Unaligned reads and writes //
279 ////////////////////////////////
280
281 // NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and
282 // 32-bit unaligned integer loads and stores. It's possible that 64-bit
283 // unaligned access doesn't work or is slower than byte-by-byte access.
284 // Since unaligned 64-bit is probably not needed as often as 16-bit or
285 // 32-bit, we simply don't support 64-bit unaligned access for now.
286 #ifdef TUKLIB_FAST_UNALIGNED_ACCESS
287 # define unaligned_read16be read16be
288 # define unaligned_read16le read16le
289 # define unaligned_read32be read32be
290 # define unaligned_read32le read32le
291 # define unaligned_write16be write16be
292 # define unaligned_write16le write16le
293 # define unaligned_write32be write32be
294 # define unaligned_write32le write32le
295
296 #else
297
298 static inline uint16_t
unaligned_read16be(const uint8_t * buf)299 unaligned_read16be(const uint8_t *buf)
300 {
301 uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
302 return num;
303 }
304
305
306 static inline uint16_t
unaligned_read16le(const uint8_t * buf)307 unaligned_read16le(const uint8_t *buf)
308 {
309 uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
310 return num;
311 }
312
313
314 static inline uint32_t
unaligned_read32be(const uint8_t * buf)315 unaligned_read32be(const uint8_t *buf)
316 {
317 uint32_t num = (uint32_t)buf[0] << 24;
318 num |= (uint32_t)buf[1] << 16;
319 num |= (uint32_t)buf[2] << 8;
320 num |= (uint32_t)buf[3];
321 return num;
322 }
323
324
325 static inline uint32_t
unaligned_read32le(const uint8_t * buf)326 unaligned_read32le(const uint8_t *buf)
327 {
328 uint32_t num = (uint32_t)buf[0];
329 num |= (uint32_t)buf[1] << 8;
330 num |= (uint32_t)buf[2] << 16;
331 num |= (uint32_t)buf[3] << 24;
332 return num;
333 }
334
335
336 static inline void
unaligned_write16be(uint8_t * buf,uint16_t num)337 unaligned_write16be(uint8_t *buf, uint16_t num)
338 {
339 buf[0] = (uint8_t)(num >> 8);
340 buf[1] = (uint8_t)num;
341 return;
342 }
343
344
345 static inline void
unaligned_write16le(uint8_t * buf,uint16_t num)346 unaligned_write16le(uint8_t *buf, uint16_t num)
347 {
348 buf[0] = (uint8_t)num;
349 buf[1] = (uint8_t)(num >> 8);
350 return;
351 }
352
353
354 static inline void
unaligned_write32be(uint8_t * buf,uint32_t num)355 unaligned_write32be(uint8_t *buf, uint32_t num)
356 {
357 buf[0] = (uint8_t)(num >> 24);
358 buf[1] = (uint8_t)(num >> 16);
359 buf[2] = (uint8_t)(num >> 8);
360 buf[3] = (uint8_t)num;
361 return;
362 }
363
364
365 static inline void
unaligned_write32le(uint8_t * buf,uint32_t num)366 unaligned_write32le(uint8_t *buf, uint32_t num)
367 {
368 buf[0] = (uint8_t)num;
369 buf[1] = (uint8_t)(num >> 8);
370 buf[2] = (uint8_t)(num >> 16);
371 buf[3] = (uint8_t)(num >> 24);
372 return;
373 }
374
375 #endif
376
377
378 static inline uint32_t
bsr32(uint32_t n)379 bsr32(uint32_t n)
380 {
381 // Check for ICC first, since it tends to define __GNUC__ too.
382 #if defined(__INTEL_COMPILER)
383 return _bit_scan_reverse(n);
384
385 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
386 // GCC >= 3.4 has __builtin_clz(), which gives good results on
387 // multiple architectures. On x86, __builtin_clz() ^ 31U becomes
388 // either plain BSR (so the XOR gets optimized away) or LZCNT and
389 // XOR (if -march indicates that SSE4a instructions are supported).
390 return __builtin_clz(n) ^ 31U;
391
392 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
393 uint32_t i;
394 __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
395 return i;
396
397 #elif defined(_MSC_VER) && _MSC_VER >= 1400
398 // MSVC isn't supported by tuklib, but since this code exists,
399 // it doesn't hurt to have it here anyway.
400 uint32_t i;
401 _BitScanReverse((DWORD *)&i, n);
402 return i;
403
404 #else
405 uint32_t i = 31;
406
407 if ((n & UINT32_C(0xFFFF0000)) == 0) {
408 n <<= 16;
409 i = 15;
410 }
411
412 if ((n & UINT32_C(0xFF000000)) == 0) {
413 n <<= 8;
414 i -= 8;
415 }
416
417 if ((n & UINT32_C(0xF0000000)) == 0) {
418 n <<= 4;
419 i -= 4;
420 }
421
422 if ((n & UINT32_C(0xC0000000)) == 0) {
423 n <<= 2;
424 i -= 2;
425 }
426
427 if ((n & UINT32_C(0x80000000)) == 0)
428 --i;
429
430 return i;
431 #endif
432 }
433
434
435 static inline uint32_t
clz32(uint32_t n)436 clz32(uint32_t n)
437 {
438 #if defined(__INTEL_COMPILER)
439 return _bit_scan_reverse(n) ^ 31U;
440
441 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
442 return __builtin_clz(n);
443
444 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
445 uint32_t i;
446 __asm__("bsrl %1, %0\n\t"
447 "xorl $31, %0"
448 : "=r" (i) : "rm" (n));
449 return i;
450
451 #elif defined(_MSC_VER) && _MSC_VER >= 1400
452 uint32_t i;
453 _BitScanReverse((DWORD *)&i, n);
454 return i ^ 31U;
455
456 #else
457 uint32_t i = 0;
458
459 if ((n & UINT32_C(0xFFFF0000)) == 0) {
460 n <<= 16;
461 i = 16;
462 }
463
464 if ((n & UINT32_C(0xFF000000)) == 0) {
465 n <<= 8;
466 i += 8;
467 }
468
469 if ((n & UINT32_C(0xF0000000)) == 0) {
470 n <<= 4;
471 i += 4;
472 }
473
474 if ((n & UINT32_C(0xC0000000)) == 0) {
475 n <<= 2;
476 i += 2;
477 }
478
479 if ((n & UINT32_C(0x80000000)) == 0)
480 ++i;
481
482 return i;
483 #endif
484 }
485
486
487 static inline uint32_t
ctz32(uint32_t n)488 ctz32(uint32_t n)
489 {
490 #if defined(__INTEL_COMPILER)
491 return _bit_scan_forward(n);
492
493 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
494 return __builtin_ctz(n);
495
496 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
497 uint32_t i;
498 __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
499 return i;
500
501 #elif defined(_MSC_VER) && _MSC_VER >= 1400
502 uint32_t i;
503 _BitScanForward((DWORD *)&i, n);
504 return i;
505
506 #else
507 uint32_t i = 0;
508
509 if ((n & UINT32_C(0x0000FFFF)) == 0) {
510 n >>= 16;
511 i = 16;
512 }
513
514 if ((n & UINT32_C(0x000000FF)) == 0) {
515 n >>= 8;
516 i += 8;
517 }
518
519 if ((n & UINT32_C(0x0000000F)) == 0) {
520 n >>= 4;
521 i += 4;
522 }
523
524 if ((n & UINT32_C(0x00000003)) == 0) {
525 n >>= 2;
526 i += 2;
527 }
528
529 if ((n & UINT32_C(0x00000001)) == 0)
530 ++i;
531
532 return i;
533 #endif
534 }
535
536 #define bsf32 ctz32
537
538 #endif
539