1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       tuklib_integer.h
4 /// \brief      Various integer and bit operations
5 ///
6 /// This file provides macros or functions to do some basic integer and bit
7 /// operations.
8 ///
9 /// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l):
10 ///   - Byte swapping: bswapXX(num)
11 ///   - Byte order conversions to/from native: convXXYe(num)
12 ///   - Aligned reads: readXXYe(ptr)
13 ///   - Aligned writes: writeXXYe(ptr, num)
14 ///   - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr)
15 ///   - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num)
16 ///
17 /// Since they can macros, the arguments should have no side effects since
18 /// they may be evaluated more than once.
19 ///
20 /// \todo       PowerPC and possibly some other architectures support
21 ///             byte swapping load and store instructions. This file
22 ///             doesn't take advantage of those instructions.
23 ///
24 /// Bit scan operations for non-zero 32-bit integers:
25 ///   - Bit scan reverse (find highest non-zero bit): bsr32(num)
26 ///   - Count leading zeros: clz32(num)
27 ///   - Count trailing zeros: ctz32(num)
28 ///   - Bit scan forward (simply an alias for ctz32()): bsf32(num)
29 ///
30 /// The above bit scan operations return 0-31. If num is zero,
31 /// the result is undefined.
32 //
33 //  Authors:    Lasse Collin
34 //              Joachim Henke
35 //
36 //  This file has been put into the public domain.
37 //  You can do whatever you want with this file.
38 //
39 ///////////////////////////////////////////////////////////////////////////////
40 
41 #ifndef TUKLIB_INTEGER_H
42 #define TUKLIB_INTEGER_H
43 
44 #include "tuklib_common.h"
45 
46 
47 ////////////////////////////////////////
48 // Operating system specific features //
49 ////////////////////////////////////////
50 
51 #if defined(HAVE_BYTESWAP_H)
52 	// glibc, uClibc, dietlibc
53 #	include <byteswap.h>
54 #	ifdef HAVE_BSWAP_16
55 #		define bswap16(num) bswap_16(num)
56 #	endif
57 #	ifdef HAVE_BSWAP_32
58 #		define bswap32(num) bswap_32(num)
59 #	endif
60 #	ifdef HAVE_BSWAP_64
61 #		define bswap64(num) bswap_64(num)
62 #	endif
63 
64 #elif defined(HAVE_SYS_ENDIAN_H)
65 	// *BSDs and Darwin
66 #	include <sys/endian.h>
67 
68 #elif defined(HAVE_SYS_BYTEORDER_H)
69 	// Solaris
70 #	include <sys/byteorder.h>
71 #	ifdef BSWAP_16
72 #		define bswap16(num) BSWAP_16(num)
73 #	endif
74 #	ifdef BSWAP_32
75 #		define bswap32(num) BSWAP_32(num)
76 #	endif
77 #	ifdef BSWAP_64
78 #		define bswap64(num) BSWAP_64(num)
79 #	endif
80 #	ifdef BE_16
81 #		define conv16be(num) BE_16(num)
82 #	endif
83 #	ifdef BE_32
84 #		define conv32be(num) BE_32(num)
85 #	endif
86 #	ifdef BE_64
87 #		define conv64be(num) BE_64(num)
88 #	endif
89 #	ifdef LE_16
90 #		define conv16le(num) LE_16(num)
91 #	endif
92 #	ifdef LE_32
93 #		define conv32le(num) LE_32(num)
94 #	endif
95 #	ifdef LE_64
96 #		define conv64le(num) LE_64(num)
97 #	endif
98 #endif
99 
100 
101 ///////////////////
102 // Byte swapping //
103 ///////////////////
104 
105 #ifndef bswap16
106 #	define bswap16(num) \
107 		(((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8))
108 #endif
109 
110 #ifndef bswap32
111 #	define bswap32(num) \
112 		( (((uint32_t)(num) << 24)                       ) \
113 		| (((uint32_t)(num) <<  8) & UINT32_C(0x00FF0000)) \
114 		| (((uint32_t)(num) >>  8) & UINT32_C(0x0000FF00)) \
115 		| (((uint32_t)(num) >> 24)                       ) )
116 #endif
117 
118 #ifndef bswap64
119 #	define bswap64(num) \
120 		( (((uint64_t)(num) << 56)                               ) \
121 		| (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \
122 		| (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \
123 		| (((uint64_t)(num) <<  8) & UINT64_C(0x000000FF00000000)) \
124 		| (((uint64_t)(num) >>  8) & UINT64_C(0x00000000FF000000)) \
125 		| (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \
126 		| (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \
127 		| (((uint64_t)(num) >> 56)                               ) )
128 #endif
129 
130 // Define conversion macros using the basic byte swapping macros.
131 #ifdef WORDS_BIGENDIAN
132 #	ifndef conv16be
133 #		define conv16be(num) ((uint16_t)(num))
134 #	endif
135 #	ifndef conv32be
136 #		define conv32be(num) ((uint32_t)(num))
137 #	endif
138 #	ifndef conv64be
139 #		define conv64be(num) ((uint64_t)(num))
140 #	endif
141 #	ifndef conv16le
142 #		define conv16le(num) bswap16(num)
143 #	endif
144 #	ifndef conv32le
145 #		define conv32le(num) bswap32(num)
146 #	endif
147 #	ifndef conv64le
148 #		define conv64le(num) bswap64(num)
149 #	endif
150 #else
151 #	ifndef conv16be
152 #		define conv16be(num) bswap16(num)
153 #	endif
154 #	ifndef conv32be
155 #		define conv32be(num) bswap32(num)
156 #	endif
157 #	ifndef conv64be
158 #		define conv64be(num) bswap64(num)
159 #	endif
160 #	ifndef conv16le
161 #		define conv16le(num) ((uint16_t)(num))
162 #	endif
163 #	ifndef conv32le
164 #		define conv32le(num) ((uint32_t)(num))
165 #	endif
166 #	ifndef conv64le
167 #		define conv64le(num) ((uint64_t)(num))
168 #	endif
169 #endif
170 
171 
172 //////////////////////////////
173 // Aligned reads and writes //
174 //////////////////////////////
175 
176 static inline uint16_t
read16be(const uint8_t * buf)177 read16be(const uint8_t *buf)
178 {
179 	uint16_t num = *(const uint16_t *)buf;
180 	return conv16be(num);
181 }
182 
183 
184 static inline uint16_t
read16le(const uint8_t * buf)185 read16le(const uint8_t *buf)
186 {
187 	uint16_t num = *(const uint16_t *)buf;
188 	return conv16le(num);
189 }
190 
191 
192 static inline uint32_t
read32be(const uint8_t * buf)193 read32be(const uint8_t *buf)
194 {
195 	uint32_t num = *(const uint32_t *)buf;
196 	return conv32be(num);
197 }
198 
199 
200 static inline uint32_t
read32le(const uint8_t * buf)201 read32le(const uint8_t *buf)
202 {
203 	uint32_t num = *(const uint32_t *)buf;
204 	return conv32le(num);
205 }
206 
207 
208 static inline uint64_t
read64be(const uint8_t * buf)209 read64be(const uint8_t *buf)
210 {
211 	uint64_t num = *(const uint64_t *)buf;
212 	return conv64be(num);
213 }
214 
215 
216 static inline uint64_t
read64le(const uint8_t * buf)217 read64le(const uint8_t *buf)
218 {
219 	uint64_t num = *(const uint64_t *)buf;
220 	return conv64le(num);
221 }
222 
223 
224 // NOTE: Possible byte swapping must be done in a macro to allow GCC
225 // to optimize byte swapping of constants when using glibc's or *BSD's
226 // byte swapping macros. The actual write is done in an inline function
227 // to make type checking of the buf pointer possible similarly to readXXYe()
228 // functions.
229 
230 #define write16be(buf, num) write16ne((buf), conv16be(num))
231 #define write16le(buf, num) write16ne((buf), conv16le(num))
232 #define write32be(buf, num) write32ne((buf), conv32be(num))
233 #define write32le(buf, num) write32ne((buf), conv32le(num))
234 #define write64be(buf, num) write64ne((buf), conv64be(num))
235 #define write64le(buf, num) write64ne((buf), conv64le(num))
236 
237 
238 static inline void
write16ne(uint8_t * buf,uint16_t num)239 write16ne(uint8_t *buf, uint16_t num)
240 {
241 	*(uint16_t *)buf = num;
242 	return;
243 }
244 
245 
246 static inline void
write32ne(uint8_t * buf,uint32_t num)247 write32ne(uint8_t *buf, uint32_t num)
248 {
249 	*(uint32_t *)buf = num;
250 	return;
251 }
252 
253 
254 static inline void
write64ne(uint8_t * buf,uint64_t num)255 write64ne(uint8_t *buf, uint64_t num)
256 {
257 	*(uint64_t *)buf = num;
258 	return;
259 }
260 
261 
262 ////////////////////////////////
263 // Unaligned reads and writes //
264 ////////////////////////////////
265 
266 // NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and
267 // 32-bit unaligned integer loads and stores. It's possible that 64-bit
268 // unaligned access doesn't work or is slower than byte-by-byte access.
269 // Since unaligned 64-bit is probably not needed as often as 16-bit or
270 // 32-bit, we simply don't support 64-bit unaligned access for now.
271 #ifdef TUKLIB_FAST_UNALIGNED_ACCESS
272 #	define unaligned_read16be read16be
273 #	define unaligned_read16le read16le
274 #	define unaligned_read32be read32be
275 #	define unaligned_read32le read32le
276 #	define unaligned_write16be write16be
277 #	define unaligned_write16le write16le
278 #	define unaligned_write32be write32be
279 #	define unaligned_write32le write32le
280 
281 #else
282 
283 static inline uint16_t
unaligned_read16be(const uint8_t * buf)284 unaligned_read16be(const uint8_t *buf)
285 {
286 	uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
287 	return num;
288 }
289 
290 
291 static inline uint16_t
unaligned_read16le(const uint8_t * buf)292 unaligned_read16le(const uint8_t *buf)
293 {
294 	uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
295 	return num;
296 }
297 
298 
299 static inline uint32_t
unaligned_read32be(const uint8_t * buf)300 unaligned_read32be(const uint8_t *buf)
301 {
302 	uint32_t num = (uint32_t)buf[0] << 24;
303 	num |= (uint32_t)buf[1] << 16;
304 	num |= (uint32_t)buf[2] << 8;
305 	num |= (uint32_t)buf[3];
306 	return num;
307 }
308 
309 
310 static inline uint32_t
unaligned_read32le(const uint8_t * buf)311 unaligned_read32le(const uint8_t *buf)
312 {
313 	uint32_t num = (uint32_t)buf[0];
314 	num |= (uint32_t)buf[1] << 8;
315 	num |= (uint32_t)buf[2] << 16;
316 	num |= (uint32_t)buf[3] << 24;
317 	return num;
318 }
319 
320 
321 static inline void
unaligned_write16be(uint8_t * buf,uint16_t num)322 unaligned_write16be(uint8_t *buf, uint16_t num)
323 {
324 	buf[0] = (uint8_t)(num >> 8);
325 	buf[1] = (uint8_t)num;
326 	return;
327 }
328 
329 
330 static inline void
unaligned_write16le(uint8_t * buf,uint16_t num)331 unaligned_write16le(uint8_t *buf, uint16_t num)
332 {
333 	buf[0] = (uint8_t)num;
334 	buf[1] = (uint8_t)(num >> 8);
335 	return;
336 }
337 
338 
339 static inline void
unaligned_write32be(uint8_t * buf,uint32_t num)340 unaligned_write32be(uint8_t *buf, uint32_t num)
341 {
342 	buf[0] = (uint8_t)(num >> 24);
343 	buf[1] = (uint8_t)(num >> 16);
344 	buf[2] = (uint8_t)(num >> 8);
345 	buf[3] = (uint8_t)num;
346 	return;
347 }
348 
349 
350 static inline void
unaligned_write32le(uint8_t * buf,uint32_t num)351 unaligned_write32le(uint8_t *buf, uint32_t num)
352 {
353 	buf[0] = (uint8_t)num;
354 	buf[1] = (uint8_t)(num >> 8);
355 	buf[2] = (uint8_t)(num >> 16);
356 	buf[3] = (uint8_t)(num >> 24);
357 	return;
358 }
359 
360 #endif
361 
362 
363 static inline uint32_t
bsr32(uint32_t n)364 bsr32(uint32_t n)
365 {
366 	// Check for ICC first, since it tends to define __GNUC__ too.
367 #if defined(__INTEL_COMPILER)
368 	return _bit_scan_reverse(n);
369 
370 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
371 	// GCC >= 3.4 has __builtin_clz(), which gives good results on
372 	// multiple architectures. On x86, __builtin_clz() ^ 31U becomes
373 	// either plain BSR (so the XOR gets optimized away) or LZCNT and
374 	// XOR (if -march indicates that SSE4a instructions are supported).
375 	return __builtin_clz(n) ^ 31U;
376 
377 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
378 	uint32_t i;
379 	__asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
380 	return i;
381 
382 #elif defined(_MSC_VER) && _MSC_VER >= 1400
383 	// MSVC isn't supported by tuklib, but since this code exists,
384 	// it doesn't hurt to have it here anyway.
385 	uint32_t i;
386 	_BitScanReverse((DWORD *)&i, n);
387 	return i;
388 
389 #else
390 	uint32_t i = 31;
391 
392 	if ((n & UINT32_C(0xFFFF0000)) == 0) {
393 		n <<= 16;
394 		i = 15;
395 	}
396 
397 	if ((n & UINT32_C(0xFF000000)) == 0) {
398 		n <<= 8;
399 		i -= 8;
400 	}
401 
402 	if ((n & UINT32_C(0xF0000000)) == 0) {
403 		n <<= 4;
404 		i -= 4;
405 	}
406 
407 	if ((n & UINT32_C(0xC0000000)) == 0) {
408 		n <<= 2;
409 		i -= 2;
410 	}
411 
412 	if ((n & UINT32_C(0x80000000)) == 0)
413 		--i;
414 
415 	return i;
416 #endif
417 }
418 
419 
420 static inline uint32_t
clz32(uint32_t n)421 clz32(uint32_t n)
422 {
423 #if defined(__INTEL_COMPILER)
424 	return _bit_scan_reverse(n) ^ 31U;
425 
426 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
427 	return __builtin_clz(n);
428 
429 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
430 	uint32_t i;
431 	__asm__("bsrl %1, %0\n\t"
432 		"xorl $31, %0"
433 		: "=r" (i) : "rm" (n));
434 	return i;
435 
436 #elif defined(_MSC_VER) && _MSC_VER >= 1400
437 	uint32_t i;
438 	_BitScanReverse((DWORD *)&i, n);
439 	return i ^ 31U;
440 
441 #else
442 	uint32_t i = 0;
443 
444 	if ((n & UINT32_C(0xFFFF0000)) == 0) {
445 		n <<= 16;
446 		i = 16;
447 	}
448 
449 	if ((n & UINT32_C(0xFF000000)) == 0) {
450 		n <<= 8;
451 		i += 8;
452 	}
453 
454 	if ((n & UINT32_C(0xF0000000)) == 0) {
455 		n <<= 4;
456 		i += 4;
457 	}
458 
459 	if ((n & UINT32_C(0xC0000000)) == 0) {
460 		n <<= 2;
461 		i += 2;
462 	}
463 
464 	if ((n & UINT32_C(0x80000000)) == 0)
465 		++i;
466 
467 	return i;
468 #endif
469 }
470 
471 
472 static inline uint32_t
ctz32(uint32_t n)473 ctz32(uint32_t n)
474 {
475 #if defined(__INTEL_COMPILER)
476 	return _bit_scan_forward(n);
477 
478 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
479 	return __builtin_ctz(n);
480 
481 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
482 	uint32_t i;
483 	__asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
484 	return i;
485 
486 #elif defined(_MSC_VER) && _MSC_VER >= 1400
487 	uint32_t i;
488 	_BitScanForward((DWORD *)&i, n);
489 	return i;
490 
491 #else
492 	uint32_t i = 0;
493 
494 	if ((n & UINT32_C(0x0000FFFF)) == 0) {
495 		n >>= 16;
496 		i = 16;
497 	}
498 
499 	if ((n & UINT32_C(0x000000FF)) == 0) {
500 		n >>= 8;
501 		i += 8;
502 	}
503 
504 	if ((n & UINT32_C(0x0000000F)) == 0) {
505 		n >>= 4;
506 		i += 4;
507 	}
508 
509 	if ((n & UINT32_C(0x00000003)) == 0) {
510 		n >>= 2;
511 		i += 2;
512 	}
513 
514 	if ((n & UINT32_C(0x00000001)) == 0)
515 		++i;
516 
517 	return i;
518 #endif
519 }
520 
521 #define bsf32 ctz32
522 
523 #endif
524