1 /* ===-------- Intrin.h ---------------------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 /* Only include this if we're compiling for the windows platform. */ 25 #ifndef _MSC_VER 26 #include_next <Intrin.h> 27 #else 28 29 #ifndef __INTRIN_H 30 #define __INTRIN_H 31 32 /* First include the standard intrinsics. */ 33 #include <x86intrin.h> 34 35 #ifdef __cplusplus 36 extern "C" { 37 #endif 38 39 /* And the random ones that aren't in those files. */ 40 __m64 _m_from_float(float); 41 __m64 _m_from_int(int _l); 42 void _m_prefetch(void *); 43 float _m_to_float(__m64); 44 int _m_to_int(__m64 _M); 45 46 /* Other assorted instruction intrinsics. */ 47 void __addfsbyte(unsigned long, unsigned char); 48 void __addfsdword(unsigned long, unsigned long); 49 void __addfsword(unsigned long, unsigned short); 50 void __code_seg(const char *); 51 void __cpuid(int[4], int); 52 void __cpuidex(int[4], int, int); 53 void __debugbreak(void); 54 __int64 __emul(int, int); 55 unsigned __int64 __emulu(unsigned int, unsigned int); 56 void __cdecl __fastfail(unsigned int); 57 unsigned int __getcallerseflags(void); 58 void __halt(void); 59 unsigned char __inbyte(unsigned short); 60 void __inbytestring(unsigned short, unsigned char *, unsigned long); 61 void __incfsbyte(unsigned long); 62 void __incfsdword(unsigned long); 63 void __incfsword(unsigned long); 64 unsigned long __indword(unsigned short); 65 void __indwordstring(unsigned short, unsigned long *, unsigned long); 66 void __int2c(void); 67 void __invlpg(void *); 68 unsigned short __inword(unsigned short); 69 void __inwordstring(unsigned short, unsigned short *, unsigned long); 70 void __lidt(void *); 71 unsigned __int64 __ll_lshift(unsigned __int64, int); 72 __int64 __ll_rshift(__int64, int); 73 void __llwpcb(void *); 74 unsigned char __lwpins32(unsigned int, unsigned int, unsigned int); 75 void __lwpval32(unsigned int, unsigned int, unsigned int); 76 unsigned int __lzcnt(unsigned int); 77 unsigned short __lzcnt16(unsigned short); 78 void __movsb(unsigned char *, unsigned char const *, size_t); 79 void __movsd(unsigned long *, unsigned long const *, size_t); 80 void __movsw(unsigned short *, unsigned short const *, size_t); 81 void __nop(void); 82 void __nvreg_restore_fence(void); 83 void __nvreg_save_fence(void); 84 void __outbyte(unsigned short, unsigned char); 85 void __outbytestring(unsigned short, unsigned char *, unsigned long); 86 void __outdword(unsigned short, unsigned long); 87 void __outdwordstring(unsigned short, unsigned long *, unsigned long); 88 void __outword(unsigned short, unsigned short); 89 void __outwordstring(unsigned short, unsigned short *, unsigned long); 90 static __inline__ 91 unsigned int __popcnt(unsigned int); 92 static __inline__ 93 unsigned short __popcnt16(unsigned short); 94 unsigned __int64 __rdtsc(void); 95 unsigned __int64 __rdtscp(unsigned int *); 96 unsigned long __readcr0(void); 97 unsigned long __readcr2(void); 98 unsigned long __readcr3(void); 99 unsigned long __readcr5(void); 100 unsigned long __readcr8(void); 101 unsigned int __readdr(unsigned int); 102 unsigned int __readeflags(void); 103 unsigned char __readfsbyte(unsigned long); 104 unsigned long __readfsdword(unsigned long); 105 unsigned __int64 __readfsqword(unsigned long); 106 unsigned short __readfsword(unsigned long); 107 unsigned __int64 __readmsr(unsigned long); 108 unsigned __int64 __readpmc(unsigned long); 109 unsigned long __segmentlimit(unsigned long); 110 void __sidt(void *); 111 void *__slwpcb(void); 112 void __stosb(unsigned char *, unsigned char, size_t); 113 void __stosd(unsigned long *, unsigned long, size_t); 114 void __stosw(unsigned short *, unsigned short, size_t); 115 void __svm_clgi(void); 116 void __svm_invlpga(void *, int); 117 void __svm_skinit(int); 118 void __svm_stgi(void); 119 void __svm_vmload(size_t); 120 void __svm_vmrun(size_t); 121 void __svm_vmsave(size_t); 122 void __ud2(void); 123 unsigned __int64 __ull_rshift(unsigned __int64, int); 124 void __vmx_off(void); 125 void __vmx_vmptrst(unsigned __int64 *); 126 void __wbinvd(void); 127 void __writecr0(unsigned int); 128 void __writecr3(unsigned int); 129 void __writecr4(unsigned int); 130 void __writecr8(unsigned int); 131 void __writedr(unsigned int, unsigned int); 132 void __writeeflags(unsigned int); 133 void __writefsbyte(unsigned long, unsigned char); 134 void __writefsdword(unsigned long, unsigned long); 135 void __writefsqword(unsigned long, unsigned __int64); 136 void __writefsword(unsigned long, unsigned short); 137 void __writemsr(unsigned long, unsigned __int64); 138 static __inline__ 139 void *_AddressOfReturnAddress(void); 140 unsigned int _andn_u32(unsigned int, unsigned int); 141 unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int); 142 unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int); 143 unsigned int _bextri_u32(unsigned int, unsigned int); 144 static __inline__ 145 unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); 146 static __inline__ 147 unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); 148 static __inline__ 149 unsigned char _bittest(long const *, long); 150 static __inline__ 151 unsigned char _bittestandcomplement(long *, long); 152 static __inline__ 153 unsigned char _bittestandreset(long *, long); 154 static __inline__ 155 unsigned char _bittestandset(long *, long); 156 unsigned int _blcfill_u32(unsigned int); 157 unsigned int _blci_u32(unsigned int); 158 unsigned int _blcic_u32(unsigned int); 159 unsigned int _blcmsk_u32(unsigned int); 160 unsigned int _blcs_u32(unsigned int); 161 unsigned int _blsfill_u32(unsigned int); 162 unsigned int _blsi_u32(unsigned int); 163 unsigned int _blsic_u32(unsigned int); 164 unsigned int _blsmsk_u32(unsigned int); 165 unsigned int _blsmsk_u32(unsigned int); 166 unsigned int _blsr_u32(unsigned int); 167 unsigned int _blsr_u32(unsigned int); 168 unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); 169 unsigned long __cdecl _byteswap_ulong(unsigned long); 170 unsigned short __cdecl _byteswap_ushort(unsigned short); 171 unsigned _bzhi_u32(unsigned int, unsigned int); 172 void __cdecl _disable(void); 173 void __cdecl _enable(void); 174 void __cdecl _fxrstor(void const *); 175 void __cdecl _fxsave(void *); 176 long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value); 177 static __inline__ 178 long _InterlockedAnd(long volatile *_Value, long _Mask); 179 static __inline__ 180 short _InterlockedAnd16(short volatile *_Value, short _Mask); 181 static __inline__ 182 char _InterlockedAnd8(char volatile *_Value, char _Mask); 183 unsigned char _interlockedbittestandreset(long volatile *, long); 184 unsigned char _interlockedbittestandset(long volatile *, long); 185 static __inline__ 186 long __cdecl _InterlockedCompareExchange(long volatile *_Destination, 187 long _Exchange, long _Comparand); 188 long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long); 189 long _InterlockedCompareExchange_HLERelease(long volatile *, long, long); 190 static __inline__ 191 short _InterlockedCompareExchange16(short volatile *_Destination, 192 short _Exchange, short _Comparand); 193 static __inline__ 194 __int64 _InterlockedCompareExchange64(__int64 volatile *_Destination, 195 __int64 _Exchange, __int64 _Comparand); 196 __int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64, 197 __int64); 198 __int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, 199 __int64); 200 static __inline__ 201 char _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange, 202 char _Comparand); 203 void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, 204 void *); 205 void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, 206 void *); 207 static __inline__ 208 long __cdecl _InterlockedDecrement(long volatile *_Addend); 209 static __inline__ 210 short _InterlockedDecrement16(short volatile *_Addend); 211 static __inline__ 212 long __cdecl _InterlockedExchange(long volatile *_Target, long _Value); 213 static __inline__ 214 short _InterlockedExchange16(short volatile *_Target, short _Value); 215 static __inline__ 216 char _InterlockedExchange8(char volatile *_Target, char _Value); 217 static __inline__ 218 long __cdecl _InterlockedExchangeAdd(long volatile *_Addend, long _Value); 219 long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long); 220 long _InterlockedExchangeAdd_HLERelease(long volatile *, long); 221 static __inline__ 222 char _InterlockedExchangeAdd8(char volatile *_Addend, char _Value); 223 static __inline__ 224 long __cdecl _InterlockedIncrement(long volatile *_Addend); 225 static __inline__ 226 short _InterlockedIncrement16(short volatile *_Addend); 227 static __inline__ 228 long _InterlockedOr(long volatile *_Value, long _Mask); 229 static __inline__ 230 short _InterlockedOr16(short volatile *_Value, short _Mask); 231 static __inline__ 232 char _InterlockedOr8(char volatile *_Value, char _Mask); 233 static __inline__ 234 long _InterlockedXor(long volatile *_Value, long _Mask); 235 static __inline__ 236 short _InterlockedXor16(short volatile *_Value, short _Mask); 237 static __inline__ 238 char _InterlockedXor8(char volatile *_Value, char _Mask); 239 void __cdecl _invpcid(unsigned int, void *); 240 static __inline__ 241 unsigned long __cdecl _lrotl(unsigned long, int); 242 static __inline__ 243 unsigned long __cdecl _lrotr(unsigned long, int); 244 static __inline__ 245 unsigned int _lzcnt_u32(unsigned int); 246 static __inline__ 247 void _ReadBarrier(void); 248 static __inline__ 249 void _ReadWriteBarrier(void); 250 static __inline__ 251 void *_ReturnAddress(void); 252 unsigned int _rorx_u32(unsigned int, const unsigned int); 253 int __cdecl _rdrand16_step(unsigned short *); 254 int __cdecl _rdrand32_step(unsigned int *); 255 static __inline__ 256 unsigned int __cdecl _rotl(unsigned int _Value, int _Shift); 257 static __inline__ 258 unsigned short _rotl16(unsigned short _Value, unsigned char _Shift); 259 static __inline__ 260 unsigned __int64 __cdecl _rotl64(unsigned __int64 _Value, int _Shift); 261 static __inline__ 262 unsigned char _rotl8(unsigned char _Value, unsigned char _Shift); 263 static __inline__ 264 unsigned int __cdecl _rotr(unsigned int _Value, int _Shift); 265 static __inline__ 266 unsigned short _rotr16(unsigned short _Value, unsigned char _Shift); 267 static __inline__ 268 unsigned __int64 __cdecl _rotr64(unsigned __int64 _Value, int _Shift); 269 static __inline__ 270 unsigned char _rotr8(unsigned char _Value, unsigned char _Shift); 271 int _sarx_i32(int, unsigned int); 272 273 /* FIXME: Need definition for jmp_buf. 274 int __cdecl _setjmp(jmp_buf); */ 275 276 unsigned int _shlx_u32(unsigned int, unsigned int); 277 unsigned int _shrx_u32(unsigned int, unsigned int); 278 void _Store_HLERelease(long volatile *, long); 279 void _Store64_HLERelease(__int64 volatile *, __int64); 280 void _StorePointer_HLERelease(void *volatile *, void *); 281 unsigned int _t1mskc_u32(unsigned int); 282 unsigned int _tzcnt_u32(unsigned int); 283 unsigned int _tzcnt_u32(unsigned int); 284 unsigned int _tzmsk_u32(unsigned int); 285 static __inline__ 286 void _WriteBarrier(void); 287 void _xabort(const unsigned int imm); 288 unsigned __int32 xbegin(void); 289 void _xend(void); 290 unsigned __int64 __cdecl _xgetbv(unsigned int); 291 void __cdecl _xrstor(void const *, unsigned __int64); 292 void __cdecl _xsave(void *, unsigned __int64); 293 void __cdecl _xsaveopt(void *, unsigned __int64); 294 void __cdecl _xsetbv(unsigned int, unsigned __int64); 295 unsigned char _xtest(void); 296 297 /* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ 298 #ifdef __x86_64__ 299 void __addgsbyte(unsigned long, unsigned char); 300 void __addgsdword(unsigned long, unsigned long); 301 void __addgsqword(unsigned long, unsigned __int64); 302 void __addgsword(unsigned long, unsigned short); 303 void __faststorefence(void); 304 void __incgsbyte(unsigned long); 305 void __incgsdword(unsigned long); 306 void __incgsqword(unsigned long); 307 void __incgsword(unsigned long); 308 unsigned __int64 __popcnt64(unsigned __int64); 309 unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, 310 unsigned __int64 _HighPart, 311 unsigned char _Shift); 312 unsigned __int64 __shiftright128(unsigned __int64 _LowPart, 313 unsigned __int64 _HighPart, 314 unsigned char _Shift); 315 void __stosq(unsigned __int64 *, unsigned __int64, size_t); 316 unsigned __int64 _andn_u64(unsigned __int64, unsigned __int64); 317 unsigned __int64 _bextr_u64(unsigned __int64, unsigned int, unsigned int); 318 unsigned __int64 _bextri_u64(unsigned __int64, unsigned int); 319 static __inline__ 320 unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); 321 static __inline__ 322 unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); 323 static __inline__ 324 unsigned char _bittest64(__int64 const *, __int64); 325 static __inline__ 326 unsigned char _bittestandcomplement64(__int64 *, __int64); 327 static __inline__ 328 unsigned char _bittestandreset64(__int64 *, __int64); 329 static __inline__ 330 unsigned char _bittestandset64(__int64 *, __int64); 331 unsigned __int64 _blcfill_u64(unsigned __int64); 332 unsigned __int64 _blci_u64(unsigned __int64); 333 unsigned __int64 _blcic_u64(unsigned __int64); 334 unsigned __int64 _blcmsk_u64(unsigned __int64); 335 unsigned __int64 _blcs_u64(unsigned __int64); 336 unsigned __int64 _blsfill_u64(unsigned __int64); 337 unsigned __int64 _blsi_u64(unsigned __int64); 338 unsigned __int64 _blsic_u64(unsigned __int64); 339 unsigned __int64 _blmsk_u64(unsigned __int64); 340 unsigned __int64 _blsr_u64(unsigned __int64); 341 unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); 342 unsigned __int64 _bzhi_u64(unsigned __int64, unsigned int); 343 void __cdecl _fxrstor64(void const *); 344 void __cdecl _fxsave64(void *); 345 long _InterlockedAnd_np(long volatile *_Value, long _Mask); 346 short _InterlockedAnd16_np(short volatile *_Value, short _Mask); 347 __int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask); 348 char _InterlockedAnd8_np(char volatile *_Value, char _Mask); 349 unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64); 350 unsigned char _interlockedbittestandset64(__int64 volatile *, __int64); 351 long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange, 352 long _Comparand); 353 unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination, 354 __int64 _ExchangeHigh, 355 __int64 _ExchangeLow, 356 __int64 *_CompareandResult); 357 unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination, 358 __int64 _ExchangeHigh, 359 __int64 _ExchangeLow, 360 __int64 *_ComparandResult); 361 short _InterlockedCompareExchange16_np(short volatile *_Destination, 362 short _Exchange, short _Comparand); 363 __int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination, 364 __int64 _Exchange, __int64 _Comparand); 365 void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination, 366 void *_Exchange, void *_Comparand); 367 long _InterlockedOr_np(long volatile *_Value, long _Mask); 368 short _InterlockedOr16_np(short volatile *_Value, short _Mask); 369 __int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask); 370 char _InterlockedOr8_np(char volatile *_Value, char _Mask); 371 long _InterlockedXor_np(long volatile *_Value, long _Mask); 372 short _InterlockedXor16_np(short volatile *_Value, short _Mask); 373 __int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask); 374 char _InterlockedXor8_np(char volatile *_Value, char _Mask); 375 unsigned __int64 _lzcnt_u64(unsigned __int64); 376 __int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand, 377 __int64 *_HighProduct); 378 unsigned int __cdecl _readfsbase_u32(void); 379 unsigned __int64 __cdecl _readfsbase_u64(void); 380 unsigned int __cdecl _readgsbase_u32(void); 381 unsigned __int64 __cdecl _readgsbase_u64(void); 382 unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int); 383 unsigned __int64 _tzcnt_u64(unsigned __int64); 384 unsigned __int64 _tzmsk_u64(unsigned __int64); 385 unsigned __int64 _umul128(unsigned __int64 _Multiplier, 386 unsigned __int64 _Multiplicand, 387 unsigned __int64 *_HighProduct); 388 void __cdecl _writefsbase_u32(unsigned int); 389 void _cdecl _writefsbase_u64(unsigned __int64); 390 void __cdecl _writegsbase_u32(unsigned int); 391 void __cdecl _writegsbase_u64(unsigned __int64); 392 void __cdecl _xrstor64(void const *, unsigned __int64); 393 void __cdecl _xsave64(void *, unsigned __int64); 394 void __cdecl _xsaveopt64(void *, unsigned __int64); 395 396 #endif /* __x86_64__ */ 397 398 /*----------------------------------------------------------------------------*\ 399 |* Bit Twiddling 400 \*----------------------------------------------------------------------------*/ 401 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 402 _rotl8(unsigned char _Value, unsigned char _Shift) { 403 _Shift &= 0x7; 404 return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value; 405 } 406 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 407 _rotr8(unsigned char _Value, unsigned char _Shift) { 408 _Shift &= 0x7; 409 return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value; 410 } 411 static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 412 _rotl16(unsigned short _Value, unsigned char _Shift) { 413 _Shift &= 0xf; 414 return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value; 415 } 416 static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 417 _rotr16(unsigned short _Value, unsigned char _Shift) { 418 _Shift &= 0xf; 419 return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value; 420 } 421 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 422 _rotl(unsigned int _Value, int _Shift) { 423 _Shift &= 0x1f; 424 return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; 425 } 426 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 427 _rotr(unsigned int _Value, int _Shift) { 428 _Shift &= 0x1f; 429 return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; 430 } 431 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 432 _lrotl(unsigned long _Value, int _Shift) { 433 _Shift &= 0x1f; 434 return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; 435 } 436 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 437 _lrotr(unsigned long _Value, int _Shift) { 438 _Shift &= 0x1f; 439 return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; 440 } 441 static 442 __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 443 _rotl64(unsigned __int64 _Value, int _Shift) { 444 _Shift &= 0x3f; 445 return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value; 446 } 447 static 448 __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 449 _rotr64(unsigned __int64 _Value, int _Shift) { 450 _Shift &= 0x3f; 451 return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value; 452 } 453 /*----------------------------------------------------------------------------*\ 454 |* Bit Counting and Testing 455 \*----------------------------------------------------------------------------*/ 456 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 457 _BitScanForward(unsigned long *_Index, unsigned long _Mask) { 458 if (!_Mask) 459 return 0; 460 *_Index = __builtin_ctzl(_Mask); 461 return 1; 462 } 463 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 464 _BitScanReverse(unsigned long *_Index, unsigned long _Mask) { 465 if (!_Mask) 466 return 0; 467 *_Index = 31 - __builtin_clzl(_Mask); 468 return 1; 469 } 470 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 471 _lzcnt_u32(unsigned int a) { 472 if (!a) 473 return 32; 474 return __builtin_clzl(a); 475 } 476 static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 477 __popcnt16(unsigned short value) { 478 return __builtin_popcount((int)value); 479 } 480 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 481 __popcnt(unsigned int value) { 482 return __builtin_popcount(value); 483 } 484 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 485 _bittest(long const *a, long b) { 486 return (*a >> b) & 1; 487 } 488 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 489 _bittestandcomplement(long *a, long b) { 490 unsigned char x = (*a >> b) & 1; 491 *a = *a ^ (1 << b); 492 return x; 493 } 494 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 495 _bittestandreset(long *a, long b) { 496 unsigned char x = (*a >> b) & 1; 497 *a = *a & ~(1 << b); 498 return x; 499 } 500 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 501 _bittestandset(long *a, long b) { 502 unsigned char x = (*a >> b) & 1; 503 *a = *a | (1 << b); 504 return x; 505 } 506 #ifdef __x86_64__ 507 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 508 _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) { 509 if (!_Mask) 510 return 0; 511 *_Index = __builtin_ctzll(_Mask); 512 return 1; 513 } 514 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 515 _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) { 516 if (!_Mask) 517 return 0; 518 *_Index = 63 - __builtin_clzll(_Mask); 519 return 1; 520 } 521 static 522 __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 523 _lzcnt_u64(unsigned __int64 a) { 524 if (!a) 525 return 64; 526 return __builtin_clzll(a); 527 } 528 static __inline__ 529 unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 530 __popcnt64(unsigned __int64 value) { 531 return __builtin_popcountll(value); 532 } 533 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 534 _bittest64(__int64 const *a, __int64 b) { 535 return (*a >> b) & 1; 536 } 537 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 538 _bittestandcomplement64(__int64 *a, __int64 b) { 539 unsigned char x = (*a >> b) & 1; 540 *a = *a ^ (1ll << b); 541 return x; 542 } 543 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 544 _bittestandreset64(__int64 *a, __int64 b) { 545 unsigned char x = (*a >> b) & 1; 546 *a = *a & ~(1ll << b); 547 return x; 548 } 549 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 550 _bittestandset64(__int64 *a, __int64 b) { 551 unsigned char x = (*a >> b) & 1; 552 *a = *a | (1ll << b); 553 return x; 554 } 555 #endif 556 /*----------------------------------------------------------------------------*\ 557 |* Interlocked Exchange Add 558 \*----------------------------------------------------------------------------*/ 559 static __inline__ char __attribute__((__always_inline__, __nodebug__)) 560 _InterlockedExchangeAdd8(char volatile *_Addend, char _Value) { 561 return __atomic_add_fetch(_Addend, _Value, 0) - _Value; 562 } 563 static __inline__ short __attribute__((__always_inline__, __nodebug__)) 564 _InterlockedExchangeAdd16(short volatile *_Addend, short _Value) { 565 return __atomic_add_fetch(_Addend, _Value, 0) - _Value; 566 } 567 static __inline__ long __attribute__((__always_inline__, __nodebug__)) 568 _InterlockedExchangeAdd(long volatile *_Addend, long _Value) { 569 return __atomic_add_fetch(_Addend, _Value, 0) - _Value; 570 } 571 #ifdef __x86_64__ 572 static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 573 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) { 574 return __atomic_add_fetch(_Addend, _Value, 0) - _Value; 575 } 576 #endif 577 /*----------------------------------------------------------------------------*\ 578 |* Interlocked Exchange Sub 579 \*----------------------------------------------------------------------------*/ 580 static __inline__ char __attribute__((__always_inline__, __nodebug__)) 581 _InterlockedExchangeSub8(char volatile *_Subend, char _Value) { 582 return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; 583 } 584 static __inline__ short __attribute__((__always_inline__, __nodebug__)) 585 _InterlockedExchangeSub16(short volatile *_Subend, short _Value) { 586 return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; 587 } 588 static __inline__ long __attribute__((__always_inline__, __nodebug__)) 589 _InterlockedExchangeSub(long volatile *_Subend, long _Value) { 590 return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; 591 } 592 #ifdef __x86_64__ 593 static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 594 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) { 595 return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; 596 } 597 #endif 598 /*----------------------------------------------------------------------------*\ 599 |* Interlocked Increment 600 \*----------------------------------------------------------------------------*/ 601 static __inline__ char __attribute__((__always_inline__, __nodebug__)) 602 _InterlockedIncrement16(char volatile *_Value) { 603 return __atomic_add_fetch(_Value, 1, 0); 604 } 605 static __inline__ long __attribute__((__always_inline__, __nodebug__)) 606 _InterlockedIncrement(long volatile *_Value) { 607 return __atomic_add_fetch(_Value, 1, 0); 608 } 609 #ifdef __x86_64__ 610 static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 611 _InterlockedIncrement64(__int64 volatile *_Value) { 612 return __atomic_add_fetch(_Value, 1, 0); 613 } 614 #endif 615 /*----------------------------------------------------------------------------*\ 616 |* Interlocked Decrement 617 \*----------------------------------------------------------------------------*/ 618 static __inline__ char __attribute__((__always_inline__, __nodebug__)) 619 _InterlockedDecrement16(char volatile *_Value) { 620 return __atomic_sub_fetch(_Value, 1, 0); 621 } 622 static __inline__ long __attribute__((__always_inline__, __nodebug__)) 623 _InterlockedDecrement(long volatile *_Value) { 624 return __atomic_sub_fetch(_Value, 1, 0); 625 } 626 #ifdef __x86_64__ 627 static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 628 _InterlockedDecrement64(__int64 volatile *_Value) { 629 return __atomic_sub_fetch(_Value, 1, 0); 630 } 631 #endif 632 /*----------------------------------------------------------------------------*\ 633 |* Interlocked And 634 \*----------------------------------------------------------------------------*/ 635 static __inline__ char __attribute__((__always_inline__, __nodebug__)) 636 _InterlockedAnd8(char volatile *_Value, char _Mask) { 637 return __atomic_and_fetch(_Value, _Mask, 0); 638 } 639 static __inline__ short __attribute__((__always_inline__, __nodebug__)) 640 _InterlockedAnd16(short volatile *_Value, short _Mask) { 641 return __atomic_and_fetch(_Value, _Mask, 0); 642 } 643 static __inline__ long __attribute__((__always_inline__, __nodebug__)) 644 _InterlockedAnd(long volatile *_Value, long _Mask) { 645 return __atomic_and_fetch(_Value, _Mask, 0); 646 } 647 #ifdef __x86_64__ 648 static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 649 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) { 650 return __atomic_and_fetch(_Value, _Mask, 0); 651 } 652 #endif 653 /*----------------------------------------------------------------------------*\ 654 |* Interlocked Or 655 \*----------------------------------------------------------------------------*/ 656 static __inline__ char __attribute__((__always_inline__, __nodebug__)) 657 _InterlockedOr8(char volatile *_Value, char _Mask) { 658 return __atomic_or_fetch(_Value, _Mask, 0); 659 } 660 static __inline__ short __attribute__((__always_inline__, __nodebug__)) 661 _InterlockedOr16(short volatile *_Value, short _Mask) { 662 return __atomic_or_fetch(_Value, _Mask, 0); 663 } 664 static __inline__ long __attribute__((__always_inline__, __nodebug__)) 665 _InterlockedOr(long volatile *_Value, long _Mask) { 666 return __atomic_or_fetch(_Value, _Mask, 0); 667 } 668 #ifdef __x86_64__ 669 static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 670 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) { 671 return __atomic_or_fetch(_Value, _Mask, 0); 672 } 673 #endif 674 /*----------------------------------------------------------------------------*\ 675 |* Interlocked Xor 676 \*----------------------------------------------------------------------------*/ 677 static __inline__ char __attribute__((__always_inline__, __nodebug__)) 678 _InterlockedXor8(char volatile *_Value, char _Mask) { 679 return __atomic_xor_fetch(_Value, _Mask, 0); 680 } 681 static __inline__ short __attribute__((__always_inline__, __nodebug__)) 682 _InterlockedXor16(short volatile *_Value, short _Mask) { 683 return __atomic_xor_fetch(_Value, _Mask, 0); 684 } 685 static __inline__ long __attribute__((__always_inline__, __nodebug__)) 686 _InterlockedXor(long volatile *_Value, long _Mask) { 687 return __atomic_xor_fetch(_Value, _Mask, 0); 688 } 689 #ifdef __x86_64__ 690 static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 691 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) { 692 return __atomic_xor_fetch(_Value, _Mask, 0); 693 } 694 #endif 695 /*----------------------------------------------------------------------------*\ 696 |* Interlocked Exchange 697 \*----------------------------------------------------------------------------*/ 698 static __inline__ char __attribute__((__always_inline__, __nodebug__)) 699 _InterlockedExchange8(char volatile *_Target, char _Value) { 700 __atomic_exchange(_Target, &_Value, &_Value, 0); 701 return _Value; 702 } 703 static __inline__ short __attribute__((__always_inline__, __nodebug__)) 704 _InterlockedExchange16(short volatile *_Target, short _Value) { 705 __atomic_exchange(_Target, &_Value, &_Value, 0); 706 return _Value; 707 } 708 static __inline__ long __attribute__((__always_inline__, __nodebug__)) 709 _InterlockedExchange(long volatile *_Target, long _Value) { 710 __atomic_exchange(_Target, &_Value, &_Value, 0); 711 return _Value; 712 } 713 #ifdef __x86_64__ 714 static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 715 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) { 716 __atomic_exchange(_Target, &_Value, &_Value, 0); 717 return _Value; 718 } 719 #endif 720 /*----------------------------------------------------------------------------*\ 721 |* Interlocked Compare Exchange 722 \*----------------------------------------------------------------------------*/ 723 static __inline__ char __attribute__((__always_inline__, __nodebug__)) 724 _InterlockedCompareExchange8(char volatile *_Destination, 725 char _Exchange, char _Comparand) { 726 __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); 727 return _Comparand; 728 } 729 static __inline__ short __attribute__((__always_inline__, __nodebug__)) 730 _InterlockedCompareExchange16(short volatile *_Destination, 731 short _Exchange, short _Comparand) { 732 __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); 733 return _Comparand; 734 } 735 static __inline__ long __attribute__((__always_inline__, __nodebug__)) 736 _InterlockedCompareExchange(long volatile *_Destination, 737 long _Exchange, long _Comparand) { 738 __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); 739 return _Comparand; 740 } 741 #ifdef __x86_64__ 742 static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 743 _InterlockedCompareExchange64(__int64 volatile *_Destination, 744 __int64 _Exchange, __int64 _Comparand) { 745 __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); 746 return _Comparand; 747 } 748 #endif 749 /*----------------------------------------------------------------------------*\ 750 |* Barriers 751 \*----------------------------------------------------------------------------*/ 752 static __inline__ void __attribute__((__always_inline__, __nodebug__)) 753 __attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) 754 _ReadWriteBarrier(void) { 755 __asm__ volatile ("" : : : "memory"); 756 } 757 static __inline__ void __attribute__((__always_inline__, __nodebug__)) 758 __attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) 759 _ReadBarrier(void) { 760 __asm__ volatile ("" : : : "memory"); 761 } 762 static __inline__ void __attribute__((__always_inline__, __nodebug__)) 763 __attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) 764 _WriteBarrier(void) { 765 __asm__ volatile ("" : : : "memory"); 766 } 767 /*----------------------------------------------------------------------------*\ 768 |* Misc 769 \*----------------------------------------------------------------------------*/ 770 static __inline__ void * __attribute__((__always_inline__, __nodebug__)) 771 _AddressOfReturnAddress(void) { 772 return (void*)((char*)__builtin_frame_address(0) + sizeof(void*)); 773 } 774 static __inline__ void * __attribute__((__always_inline__, __nodebug__)) 775 _ReturnAddress(void) { 776 return __builtin_return_address(0); 777 } 778 779 #ifdef __cplusplus 780 } 781 #endif 782 783 #endif /* __INTRIN_H */ 784 #endif /* _MSC_VER */ 785