1 /* 2 Copyright (c) 2005-2017 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 16 17 18 19 */ 20 21 #ifndef __TBB_machine_H 22 #define __TBB_machine_H 23 24 /** This header provides basic platform abstraction layer by hooking up appropriate 25 architecture/OS/compiler specific headers from the /include/tbb/machine directory. 26 If a plug-in header does not implement all the required APIs, it must specify 27 the missing ones by setting one or more of the following macros: 28 29 __TBB_USE_GENERIC_PART_WORD_CAS 30 __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 31 __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 32 __TBB_USE_GENERIC_FETCH_ADD 33 __TBB_USE_GENERIC_FETCH_STORE 34 __TBB_USE_GENERIC_DWORD_FETCH_ADD 35 __TBB_USE_GENERIC_DWORD_FETCH_STORE 36 __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 37 __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 38 __TBB_USE_GENERIC_RELAXED_LOAD_STORE 39 __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 40 41 In this case tbb_machine.h will add missing functionality based on a minimal set 42 of APIs that are required to be implemented by all plug-n headers as described 43 further. 44 Note that these generic implementations may be sub-optimal for a particular 45 architecture, and thus should be relied upon only after careful evaluation 46 or as the last resort. 47 48 Additionally __TBB_64BIT_ATOMICS can be set to 0 on a 32-bit architecture to 49 indicate that the port is not going to support double word atomics. It may also 50 be set to 1 explicitly, though normally this is not necessary as tbb_machine.h 51 will set it automatically. 52 53 __TBB_ENDIANNESS macro can be defined by the implementation as well. 54 It is used only if __TBB_USE_GENERIC_PART_WORD_CAS is set (or for testing), 55 and must specify the layout of aligned 16-bit and 32-bit data anywhere within a process 56 (while the details of unaligned 16-bit or 32-bit data or of 64-bit data are irrelevant). 57 The layout must be the same at all relevant memory locations within the current process; 58 in case of page-specific endianness, one endianness must be kept "out of sight". 59 Possible settings, reflecting hardware and possibly O.S. convention, are: 60 - __TBB_ENDIAN_BIG for big-endian data, 61 - __TBB_ENDIAN_LITTLE for little-endian data, 62 - __TBB_ENDIAN_DETECT for run-time detection iff exactly one of the above, 63 - __TBB_ENDIAN_UNSUPPORTED to prevent undefined behavior if none of the above. 64 65 Prerequisites for each architecture port 66 ---------------------------------------- 67 The following functions and macros have no generic implementation. Therefore they must be 68 implemented in each machine architecture specific header either as a conventional 69 function or as a functional macro. 70 71 __TBB_WORDSIZE 72 This is the size of machine word in bytes, i.e. for 32 bit systems it 73 should be defined to 4. 74 75 __TBB_Yield() 76 Signals OS that the current thread is willing to relinquish the remainder 77 of its time quantum. 78 79 __TBB_full_memory_fence() 80 Must prevent all memory operations from being reordered across it (both 81 by hardware and compiler). All such fences must be totally ordered (or 82 sequentially consistent). 83 84 __TBB_machine_cmpswp4( volatile void *ptr, int32_t value, int32_t comparand ) 85 Must be provided if __TBB_USE_FENCED_ATOMICS is not set. 86 87 __TBB_machine_cmpswp8( volatile void *ptr, int32_t value, int64_t comparand ) 88 Must be provided for 64-bit architectures if __TBB_USE_FENCED_ATOMICS is not set, 89 and for 32-bit architectures if __TBB_64BIT_ATOMICS is set 90 91 __TBB_machine_<op><S><fence>(...), where 92 <op> = {cmpswp, fetchadd, fetchstore} 93 <S> = {1, 2, 4, 8} 94 <fence> = {full_fence, acquire, release, relaxed} 95 Must be provided if __TBB_USE_FENCED_ATOMICS is set. 96 97 __TBB_control_consistency_helper() 98 Bridges the memory-semantics gap between architectures providing only 99 implicit C++0x "consume" semantics (like Power Architecture) and those 100 also implicitly obeying control dependencies (like IA-64 architecture). 101 It must be used only in conditional code where the condition is itself 102 data-dependent, and will then make subsequent code behave as if the 103 original data dependency were acquired. 104 It needs only a compiler fence where implied by the architecture 105 either specifically (like IA-64 architecture) or because generally stronger 106 "acquire" semantics are enforced (like x86). 107 It is always valid, though potentially suboptimal, to replace 108 control with acquire on the load and then remove the helper. 109 110 __TBB_acquire_consistency_helper(), __TBB_release_consistency_helper() 111 Must be provided if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE is set. 112 Enforce acquire and release semantics in generic implementations of fenced 113 store and load operations. Depending on the particular architecture/compiler 114 combination they may be a hardware fence, a compiler fence, both or nothing. 115 **/ 116 117 #include "tbb_stddef.h" 118 119 namespace tbb { 120 namespace internal { //< @cond INTERNAL 121 122 //////////////////////////////////////////////////////////////////////////////// 123 // Overridable helpers declarations 124 // 125 // A machine/*.h file may choose to define these templates, otherwise it must 126 // request default implementation by setting appropriate __TBB_USE_GENERIC_XXX macro(s). 127 // 128 template <typename T, std::size_t S> 129 struct machine_load_store; 130 131 template <typename T, std::size_t S> 132 struct machine_load_store_relaxed; 133 134 template <typename T, std::size_t S> 135 struct machine_load_store_seq_cst; 136 // 137 // End of overridable helpers declarations 138 //////////////////////////////////////////////////////////////////////////////// 139 140 template<size_t S> struct atomic_selector; 141 142 template<> struct atomic_selector<1> { 143 typedef int8_t word; 144 inline static word fetch_store ( volatile void* location, word value ); 145 }; 146 147 template<> struct atomic_selector<2> { 148 typedef int16_t word; 149 inline static word fetch_store ( volatile void* location, word value ); 150 }; 151 152 template<> struct atomic_selector<4> { 153 #if _MSC_VER && !_WIN64 154 // Work-around that avoids spurious /Wp64 warnings 155 typedef intptr_t word; 156 #else 157 typedef int32_t word; 158 #endif 159 inline static word fetch_store ( volatile void* location, word value ); 160 }; 161 162 template<> struct atomic_selector<8> { 163 typedef int64_t word; 164 inline static word fetch_store ( volatile void* location, word value ); 165 }; 166 167 }} //< namespaces internal @endcond, tbb 168 169 #define __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(M) \ 170 inline void __TBB_machine_generic_store8##M(volatile void *ptr, int64_t value) { \ 171 for(;;) { \ 172 int64_t result = *(volatile int64_t *)ptr; \ 173 if( __TBB_machine_cmpswp8##M(ptr,value,result)==result ) break; \ 174 } \ 175 } \ 176 177 #define __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(M) \ 178 inline int64_t __TBB_machine_generic_load8##M(const volatile void *ptr) { \ 179 /* Comparand and new value may be anything, they only must be equal, and */ \ 180 /* the value should have a low probability to be actually found in 'location'.*/ \ 181 const int64_t anyvalue = 2305843009213693951LL; \ 182 return __TBB_machine_cmpswp8##M(const_cast<volatile void *>(ptr),anyvalue,anyvalue); \ 183 } \ 184 185 // The set of allowed values for __TBB_ENDIANNESS (see above for details) 186 #define __TBB_ENDIAN_UNSUPPORTED -1 187 #define __TBB_ENDIAN_LITTLE 0 188 #define __TBB_ENDIAN_BIG 1 189 #define __TBB_ENDIAN_DETECT 2 190 191 #if _WIN32||_WIN64 192 193 #ifdef _MANAGED 194 #pragma managed(push, off) 195 #endif 196 197 #if __MINGW64__ || __MINGW32__ 198 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); 199 #define __TBB_Yield() SwitchToThread() 200 #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT) 201 #include "machine/gcc_generic.h" 202 #elif __MINGW64__ 203 #include "machine/linux_intel64.h" 204 #elif __MINGW32__ 205 #include "machine/linux_ia32.h" 206 #endif 207 #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT) 208 #include "machine/icc_generic.h" 209 #elif defined(_M_IX86) && !defined(__TBB_WIN32_USE_CL_BUILTINS) 210 #include "machine/windows_ia32.h" 211 #elif defined(_M_X64) 212 #include "machine/windows_intel64.h" 213 #elif defined(_M_ARM) || defined(__TBB_WIN32_USE_CL_BUILTINS) 214 #include "machine/msvc_armv7.h" 215 #endif 216 217 #ifdef _MANAGED 218 #pragma managed(pop) 219 #endif 220 221 #elif __TBB_DEFINE_MIC 222 223 #include "machine/mic_common.h" 224 #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT) 225 #include "machine/icc_generic.h" 226 #else 227 #include "machine/linux_intel64.h" 228 #endif 229 230 #elif __linux__ || __FreeBSD__ || __NetBSD__ 231 232 #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT) 233 #include "machine/gcc_generic.h" 234 #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT) 235 #include "machine/icc_generic.h" 236 #elif __i386__ 237 #include "machine/linux_ia32.h" 238 #elif __x86_64__ 239 #include "machine/linux_intel64.h" 240 #elif __ia64__ 241 #include "machine/linux_ia64.h" 242 #elif __powerpc__ 243 #include "machine/mac_ppc.h" 244 #elif __ARM_ARCH_7A__ 245 #include "machine/gcc_armv7.h" 246 #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT 247 #include "machine/gcc_generic.h" 248 #endif 249 #include "machine/linux_common.h" 250 251 #elif __APPLE__ 252 //TODO: TBB_USE_GCC_BUILTINS is not used for Mac, Sun, Aix 253 #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT) 254 #include "machine/icc_generic.h" 255 #elif __TBB_x86_32 256 #include "machine/linux_ia32.h" 257 #elif __TBB_x86_64 258 #include "machine/linux_intel64.h" 259 #elif __POWERPC__ 260 #include "machine/mac_ppc.h" 261 #endif 262 #include "machine/macos_common.h" 263 264 #elif _AIX 265 266 #include "machine/ibm_aix51.h" 267 268 #elif __sun || __SUNPRO_CC 269 270 #define __asm__ asm 271 #define __volatile__ volatile 272 273 #if __i386 || __i386__ 274 #include "machine/linux_ia32.h" 275 #elif __x86_64__ 276 #include "machine/linux_intel64.h" 277 #elif __sparc 278 #include "machine/sunos_sparc.h" 279 #endif 280 #include <sched.h> 281 282 #define __TBB_Yield() sched_yield() 283 284 #endif /* OS selection */ 285 286 #ifndef __TBB_64BIT_ATOMICS 287 #define __TBB_64BIT_ATOMICS 1 288 #endif 289 290 //TODO: replace usage of these functions with usage of tbb::atomic, and then remove them 291 //TODO: map functions with W suffix to use cast to tbb::atomic and according op, i.e. as_atomic().op() 292 // Special atomic functions 293 #if __TBB_USE_FENCED_ATOMICS 294 #define __TBB_machine_cmpswp1 __TBB_machine_cmpswp1full_fence 295 #define __TBB_machine_cmpswp2 __TBB_machine_cmpswp2full_fence 296 #define __TBB_machine_cmpswp4 __TBB_machine_cmpswp4full_fence 297 #define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8full_fence 298 299 #if __TBB_WORDSIZE==8 300 #define __TBB_machine_fetchadd8 __TBB_machine_fetchadd8full_fence 301 #define __TBB_machine_fetchstore8 __TBB_machine_fetchstore8full_fence 302 #define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd8release(P,V) 303 #define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd8acquire(P,1) 304 #define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd8release(P,(-1)) 305 #else 306 #define __TBB_machine_fetchadd4 __TBB_machine_fetchadd4full_fence 307 #define __TBB_machine_fetchstore4 __TBB_machine_fetchstore4full_fence 308 #define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd4release(P,V) 309 #define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd4acquire(P,1) 310 #define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd4release(P,(-1)) 311 #endif /* __TBB_WORDSIZE==4 */ 312 #else /* !__TBB_USE_FENCED_ATOMICS */ 313 #define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V) 314 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1) 315 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1)) 316 #endif /* !__TBB_USE_FENCED_ATOMICS */ 317 318 #if __TBB_WORDSIZE==4 319 #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C) 320 #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd4(P,V) 321 #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore4(P,V) 322 #elif __TBB_WORDSIZE==8 323 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE 324 #error These macros should only be used on 32-bit platforms. 325 #endif 326 327 #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C) 328 #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd8(P,V) 329 #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore8(P,V) 330 #else /* __TBB_WORDSIZE != 8 */ 331 #error Unsupported machine word size. 332 #endif /* __TBB_WORDSIZE */ 333 334 #ifndef __TBB_Pause 335 inline void __TBB_Pause(int32_t) { 336 __TBB_Yield(); 337 } 338 #endif 339 340 namespace tbb { 341 342 //! Sequentially consistent full memory fence. 343 inline void atomic_fence () { __TBB_full_memory_fence(); } 344 345 namespace internal { //< @cond INTERNAL 346 347 //! Class that implements exponential backoff. 348 /** See implementation of spin_wait_while_eq for an example. */ 349 class atomic_backoff : no_copy { 350 //! Time delay, in units of "pause" instructions. 351 /** Should be equal to approximately the number of "pause" instructions 352 that take the same time as an context switch. Must be a power of two.*/ 353 static const int32_t LOOPS_BEFORE_YIELD = 16; 354 int32_t count; 355 public: 356 // In many cases, an object of this type is initialized eagerly on hot path, 357 // as in for(atomic_backoff b; ; b.pause()) { /*loop body*/ } 358 // For this reason, the construction cost must be very small! 359 atomic_backoff() : count(1) {} 360 // This constructor pauses immediately; do not use on hot paths! 361 atomic_backoff( bool ) : count(1) { pause(); } 362 363 //! Pause for a while. 364 void pause() { 365 if( count<=LOOPS_BEFORE_YIELD ) { 366 __TBB_Pause(count); 367 // Pause twice as long the next time. 368 count*=2; 369 } else { 370 // Pause is so long that we might as well yield CPU to scheduler. 371 __TBB_Yield(); 372 } 373 } 374 375 //! Pause for a few times and return false if saturated. 376 bool bounded_pause() { 377 __TBB_Pause(count); 378 if( count<LOOPS_BEFORE_YIELD ) { 379 // Pause twice as long the next time. 380 count*=2; 381 return true; 382 } else { 383 return false; 384 } 385 } 386 387 void reset() { 388 count = 1; 389 } 390 }; 391 392 //! Spin WHILE the value of the variable is equal to a given value 393 /** T and U should be comparable types. */ 394 template<typename T, typename U> 395 void spin_wait_while_eq( const volatile T& location, U value ) { 396 atomic_backoff backoff; 397 while( location==value ) backoff.pause(); 398 } 399 400 //! Spin UNTIL the value of the variable is equal to a given value 401 /** T and U should be comparable types. */ 402 template<typename T, typename U> 403 void spin_wait_until_eq( const volatile T& location, const U value ) { 404 atomic_backoff backoff; 405 while( location!=value ) backoff.pause(); 406 } 407 408 template <typename predicate_type> 409 void spin_wait_while(predicate_type condition){ 410 atomic_backoff backoff; 411 while( condition() ) backoff.pause(); 412 } 413 414 //////////////////////////////////////////////////////////////////////////////// 415 // Generic compare-and-swap applied to only a part of a machine word. 416 // 417 #ifndef __TBB_ENDIANNESS 418 #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT 419 #endif 420 421 #if __TBB_USE_GENERIC_PART_WORD_CAS && __TBB_ENDIANNESS==__TBB_ENDIAN_UNSUPPORTED 422 #error Generic implementation of part-word CAS may not be used with __TBB_ENDIAN_UNSUPPORTED 423 #endif 424 425 #if __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED 426 // 427 // This function is the only use of __TBB_ENDIANNESS. 428 // The following restrictions/limitations apply for this operation: 429 // - T must be an integer type of at most 4 bytes for the casts and calculations to work 430 // - T must also be less than 4 bytes to avoid compiler warnings when computing mask 431 // (and for the operation to be useful at all, so no workaround is applied) 432 // - the architecture must consistently use either little-endian or big-endian (same for all locations) 433 // 434 // TODO: static_assert for the type requirements stated above 435 template<typename T> 436 inline T __TBB_MaskedCompareAndSwap (volatile T * const ptr, const T value, const T comparand ) { 437 struct endianness{ static bool is_big_endian(){ 438 #if __TBB_ENDIANNESS==__TBB_ENDIAN_DETECT 439 const uint32_t probe = 0x03020100; 440 return (((const char*)(&probe))[0]==0x03); 441 #elif __TBB_ENDIANNESS==__TBB_ENDIAN_BIG || __TBB_ENDIANNESS==__TBB_ENDIAN_LITTLE 442 return __TBB_ENDIANNESS==__TBB_ENDIAN_BIG; 443 #else 444 #error Unexpected value of __TBB_ENDIANNESS 445 #endif 446 }}; 447 448 const uint32_t byte_offset = (uint32_t) ((uintptr_t)ptr & 0x3); 449 volatile uint32_t * const aligned_ptr = (uint32_t*)((uintptr_t)ptr - byte_offset ); 450 451 // location of T within uint32_t for a C++ shift operation 452 const uint32_t bits_to_shift = 8*(endianness::is_big_endian() ? (4 - sizeof(T) - (byte_offset)) : byte_offset); 453 const uint32_t mask = (((uint32_t)1<<(sizeof(T)*8)) - 1 )<<bits_to_shift; 454 // for signed T, any sign extension bits in cast value/comparand are immediately clipped by mask 455 const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shift)&mask; 456 const uint32_t shifted_value = ((uint32_t)value << bits_to_shift)&mask; 457 458 for( atomic_backoff b;;b.pause() ) { 459 const uint32_t surroundings = *aligned_ptr & ~mask ; // may have changed during the pause 460 const uint32_t big_comparand = surroundings | shifted_comparand ; 461 const uint32_t big_value = surroundings | shifted_value ; 462 // __TBB_machine_cmpswp4 presumed to have full fence. 463 // Cast shuts up /Wp64 warning 464 const uint32_t big_result = (uint32_t)__TBB_machine_cmpswp4( aligned_ptr, big_value, big_comparand ); 465 if( big_result == big_comparand // CAS succeeded 466 || ((big_result ^ big_comparand) & mask) != 0) // CAS failed and the bits of interest have changed 467 { 468 return T((big_result & mask) >> bits_to_shift); 469 } 470 else continue; // CAS failed but the bits of interest were not changed 471 } 472 } 473 #endif // __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED 474 //////////////////////////////////////////////////////////////////////////////// 475 476 template<size_t S, typename T> 477 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ); 478 479 template<> 480 inline int8_t __TBB_CompareAndSwapGeneric <1,int8_t> (volatile void *ptr, int8_t value, int8_t comparand ) { 481 #if __TBB_USE_GENERIC_PART_WORD_CAS 482 return __TBB_MaskedCompareAndSwap<int8_t>((volatile int8_t *)ptr,value,comparand); 483 #else 484 return __TBB_machine_cmpswp1(ptr,value,comparand); 485 #endif 486 } 487 488 template<> 489 inline int16_t __TBB_CompareAndSwapGeneric <2,int16_t> (volatile void *ptr, int16_t value, int16_t comparand ) { 490 #if __TBB_USE_GENERIC_PART_WORD_CAS 491 return __TBB_MaskedCompareAndSwap<int16_t>((volatile int16_t *)ptr,value,comparand); 492 #else 493 return __TBB_machine_cmpswp2(ptr,value,comparand); 494 #endif 495 } 496 497 template<> 498 inline int32_t __TBB_CompareAndSwapGeneric <4,int32_t> (volatile void *ptr, int32_t value, int32_t comparand ) { 499 // Cast shuts up /Wp64 warning 500 return (int32_t)__TBB_machine_cmpswp4(ptr,value,comparand); 501 } 502 503 #if __TBB_64BIT_ATOMICS 504 template<> 505 inline int64_t __TBB_CompareAndSwapGeneric <8,int64_t> (volatile void *ptr, int64_t value, int64_t comparand ) { 506 return __TBB_machine_cmpswp8(ptr,value,comparand); 507 } 508 #endif 509 510 template<size_t S, typename T> 511 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) { 512 T result; 513 for( atomic_backoff b;;b.pause() ) { 514 result = *reinterpret_cast<volatile T *>(ptr); 515 // __TBB_CompareAndSwapGeneric presumed to have full fence. 516 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result ) 517 break; 518 } 519 return result; 520 } 521 522 template<size_t S, typename T> 523 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) { 524 T result; 525 for( atomic_backoff b;;b.pause() ) { 526 result = *reinterpret_cast<volatile T *>(ptr); 527 // __TBB_CompareAndSwapGeneric presumed to have full fence. 528 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result ) 529 break; 530 } 531 return result; 532 } 533 534 #if __TBB_USE_GENERIC_PART_WORD_CAS 535 #define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,int8_t> 536 #define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,int16_t> 537 #endif 538 539 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 540 #define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,int8_t> 541 #define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,int16_t> 542 #endif 543 544 #if __TBB_USE_GENERIC_FETCH_ADD 545 #define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,int32_t> 546 #endif 547 548 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD 549 #define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,int64_t> 550 #endif 551 552 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 553 #define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,int8_t> 554 #define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,int16_t> 555 #endif 556 557 #if __TBB_USE_GENERIC_FETCH_STORE 558 #define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,int32_t> 559 #endif 560 561 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE 562 #define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,int64_t> 563 #endif 564 565 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 566 #define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S) \ 567 atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile void* location, word value ) { \ 568 return __TBB_machine_fetchstore##S( location, value ); \ 569 } 570 571 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1) 572 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2) 573 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4) 574 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8) 575 576 #undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE 577 #endif /* __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */ 578 579 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE 580 /*TODO: find a more elegant way to handle function names difference*/ 581 #if ! __TBB_USE_FENCED_ATOMICS 582 /* This name forwarding is needed for generic implementation of 583 * load8/store8 defined below (via macro) to pick the right CAS function*/ 584 #define __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8 585 #endif 586 __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(full_fence) 587 __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(full_fence) 588 589 #if ! __TBB_USE_FENCED_ATOMICS 590 #undef __TBB_machine_cmpswp8full_fence 591 #endif 592 593 #define __TBB_machine_store8 tbb::internal::__TBB_machine_generic_store8full_fence 594 #define __TBB_machine_load8 tbb::internal::__TBB_machine_generic_load8full_fence 595 #endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */ 596 597 #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 598 /** Fenced operations use volatile qualifier to prevent compiler from optimizing 599 them out, and on architectures with weak memory ordering to induce compiler 600 to generate code with appropriate acquire/release semantics. 601 On architectures like IA32, Intel64 (and likely Sparc TSO) volatile has 602 no effect on code gen, and consistency helpers serve as a compiler fence (the 603 latter being true for IA64/gcc as well to fix a bug in some gcc versions). 604 This code assumes that the generated instructions will operate atomically, 605 which typically requires a type that can be moved in a single instruction, 606 cooperation from the compiler for effective use of such an instruction, 607 and appropriate alignment of the data. **/ 608 template <typename T, size_t S> 609 struct machine_load_store { 610 static T load_with_acquire ( const volatile T& location ) { 611 T to_return = location; 612 __TBB_acquire_consistency_helper(); 613 return to_return; 614 } 615 static void store_with_release ( volatile T &location, T value ) { 616 __TBB_release_consistency_helper(); 617 location = value; 618 } 619 }; 620 621 //in general, plain load and store of 32bit compiler is not atomic for 64bit types 622 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS 623 template <typename T> 624 struct machine_load_store<T,8> { 625 static T load_with_acquire ( const volatile T& location ) { 626 return (T)__TBB_machine_load8( (const volatile void*)&location ); 627 } 628 static void store_with_release ( volatile T& location, T value ) { 629 __TBB_machine_store8( (volatile void*)&location, (int64_t)value ); 630 } 631 }; 632 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ 633 #endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */ 634 635 #if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 636 template <typename T, size_t S> 637 struct machine_load_store_seq_cst { 638 static T load ( const volatile T& location ) { 639 __TBB_full_memory_fence(); 640 return machine_load_store<T,S>::load_with_acquire( location ); 641 } 642 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 643 static void store ( volatile T &location, T value ) { 644 atomic_selector<S>::fetch_store( (volatile void*)&location, (typename atomic_selector<S>::word)value ); 645 } 646 #else /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */ 647 static void store ( volatile T &location, T value ) { 648 machine_load_store<T,S>::store_with_release( location, value ); 649 __TBB_full_memory_fence(); 650 } 651 #endif /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */ 652 }; 653 654 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS 655 /** The implementation does not use functions __TBB_machine_load8/store8 as they 656 are not required to be sequentially consistent. **/ 657 template <typename T> 658 struct machine_load_store_seq_cst<T,8> { 659 static T load ( const volatile T& location ) { 660 // Comparand and new value may be anything, they only must be equal, and 661 // the value should have a low probability to be actually found in 'location'. 662 const int64_t anyvalue = 2305843009213693951LL; 663 return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T*>(&location), anyvalue, anyvalue ); 664 } 665 static void store ( volatile T &location, T value ) { 666 #if __TBB_GCC_VERSION >= 40702 667 #pragma GCC diagnostic push 668 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" 669 #endif 670 // An atomic initialization leads to reading of uninitialized memory 671 int64_t result = (volatile int64_t&)location; 672 #if __TBB_GCC_VERSION >= 40702 673 #pragma GCC diagnostic pop 674 #endif 675 while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)value, result) != result ) 676 result = (volatile int64_t&)location; 677 } 678 }; 679 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ 680 #endif /*__TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE */ 681 682 #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE 683 // Relaxed operations add volatile qualifier to prevent compiler from optimizing them out. 684 /** Volatile should not incur any additional cost on IA32, Intel64, and Sparc TSO 685 architectures. However on architectures with weak memory ordering compiler may 686 generate code with acquire/release semantics for operations on volatile data. **/ 687 template <typename T, size_t S> 688 struct machine_load_store_relaxed { 689 static inline T load ( const volatile T& location ) { 690 return location; 691 } 692 static inline void store ( volatile T& location, T value ) { 693 location = value; 694 } 695 }; 696 697 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS 698 template <typename T> 699 struct machine_load_store_relaxed<T,8> { 700 static inline T load ( const volatile T& location ) { 701 return (T)__TBB_machine_load8( (const volatile void*)&location ); 702 } 703 static inline void store ( volatile T& location, T value ) { 704 __TBB_machine_store8( (volatile void*)&location, (int64_t)value ); 705 } 706 }; 707 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ 708 #endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */ 709 710 #undef __TBB_WORDSIZE //this macro is forbidden to use outside of atomic machinery 711 712 template<typename T> 713 inline T __TBB_load_with_acquire(const volatile T &location) { 714 return machine_load_store<T,sizeof(T)>::load_with_acquire( location ); 715 } 716 template<typename T, typename V> 717 inline void __TBB_store_with_release(volatile T& location, V value) { 718 machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) ); 719 } 720 //! Overload that exists solely to avoid /Wp64 warnings. 721 inline void __TBB_store_with_release(volatile size_t& location, size_t value) { 722 machine_load_store<size_t,sizeof(size_t)>::store_with_release( location, value ); 723 } 724 725 template<typename T> 726 inline T __TBB_load_full_fence(const volatile T &location) { 727 return machine_load_store_seq_cst<T,sizeof(T)>::load( location ); 728 } 729 template<typename T, typename V> 730 inline void __TBB_store_full_fence(volatile T& location, V value) { 731 machine_load_store_seq_cst<T,sizeof(T)>::store( location, T(value) ); 732 } 733 //! Overload that exists solely to avoid /Wp64 warnings. 734 inline void __TBB_store_full_fence(volatile size_t& location, size_t value) { 735 machine_load_store_seq_cst<size_t,sizeof(size_t)>::store( location, value ); 736 } 737 738 template<typename T> 739 inline T __TBB_load_relaxed (const volatile T& location) { 740 return machine_load_store_relaxed<T,sizeof(T)>::load( const_cast<T&>(location) ); 741 } 742 template<typename T, typename V> 743 inline void __TBB_store_relaxed ( volatile T& location, V value ) { 744 machine_load_store_relaxed<T,sizeof(T)>::store( const_cast<T&>(location), T(value) ); 745 } 746 //! Overload that exists solely to avoid /Wp64 warnings. 747 inline void __TBB_store_relaxed ( volatile size_t& location, size_t value ) { 748 machine_load_store_relaxed<size_t,sizeof(size_t)>::store( const_cast<size_t&>(location), value ); 749 } 750 751 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as 752 // strict as type T. The type should have a trivial default constructor and destructor, so that 753 // arrays of that type can be declared without initializers. 754 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands 755 // to a type bigger than T. 756 // The default definition here works on machines where integers are naturally aligned and the 757 // strictest alignment is 64. 758 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict 759 760 #if __TBB_ALIGNAS_PRESENT 761 762 // Use C++11 keywords alignas and alignof 763 #define __TBB_DefineTypeWithAlignment(PowerOf2) \ 764 struct alignas(PowerOf2) __TBB_machine_type_with_alignment_##PowerOf2 { \ 765 uint32_t member[PowerOf2/sizeof(uint32_t)]; \ 766 }; 767 #define __TBB_alignof(T) alignof(T) 768 769 #elif __TBB_ATTRIBUTE_ALIGNED_PRESENT 770 771 #define __TBB_DefineTypeWithAlignment(PowerOf2) \ 772 struct __TBB_machine_type_with_alignment_##PowerOf2 { \ 773 uint32_t member[PowerOf2/sizeof(uint32_t)]; \ 774 } __attribute__((aligned(PowerOf2))); 775 #define __TBB_alignof(T) __alignof__(T) 776 777 #elif __TBB_DECLSPEC_ALIGN_PRESENT 778 779 #define __TBB_DefineTypeWithAlignment(PowerOf2) \ 780 __declspec(align(PowerOf2)) \ 781 struct __TBB_machine_type_with_alignment_##PowerOf2 { \ 782 uint32_t member[PowerOf2/sizeof(uint32_t)]; \ 783 }; 784 #define __TBB_alignof(T) __alignof(T) 785 786 #else /* A compiler with unknown syntax for data alignment */ 787 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) 788 #endif 789 790 /* Now declare types aligned to useful powers of two */ 791 // TODO: Is __TBB_DefineTypeWithAlignment(8) needed on 32 bit platforms? 792 __TBB_DefineTypeWithAlignment(16) 793 __TBB_DefineTypeWithAlignment(32) 794 __TBB_DefineTypeWithAlignment(64) 795 796 typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment; 797 798 // Primary template is a declaration of incomplete type so that it fails with unknown alignments 799 template<size_t N> struct type_with_alignment; 800 801 // Specializations for allowed alignments 802 template<> struct type_with_alignment<1> { char member; }; 803 template<> struct type_with_alignment<2> { uint16_t member; }; 804 template<> struct type_with_alignment<4> { uint32_t member; }; 805 template<> struct type_with_alignment<8> { uint64_t member; }; 806 template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignment_16 member; }; 807 template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignment_32 member; }; 808 template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignment_64 member; }; 809 810 #if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN 811 //! Work around for bug in GNU 3.2 and MSVC compilers. 812 /** Bug is that compiler sometimes returns 0 for __alignof(T) when T has not yet been instantiated. 813 The work-around forces instantiation by forcing computation of sizeof(T) before __alignof(T). */ 814 template<size_t Size, typename T> 815 struct work_around_alignment_bug { 816 static const size_t alignment = __TBB_alignof(T); 817 }; 818 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment> 819 #else 820 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)> 821 #endif /* __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN */ 822 823 #endif /* __TBB_TypeWithAlignmentAtLeastAsStrict */ 824 825 // Template class here is to avoid instantiation of the static data for modules that don't use it 826 template<typename T> 827 struct reverse { 828 static const T byte_table[256]; 829 }; 830 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed 831 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost. 832 template<typename T> 833 const T reverse<T>::byte_table[256] = { 834 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 835 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 836 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 837 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 838 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 839 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 840 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 841 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 842 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, 843 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 844 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 845 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 846 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 847 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 848 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 849 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF 850 }; 851 852 } // namespace internal @endcond 853 } // namespace tbb 854 855 // Preserving access to legacy APIs 856 using tbb::internal::__TBB_load_with_acquire; 857 using tbb::internal::__TBB_store_with_release; 858 859 // Mapping historically used names to the ones expected by atomic_load_store_traits 860 #define __TBB_load_acquire __TBB_load_with_acquire 861 #define __TBB_store_release __TBB_store_with_release 862 863 #ifndef __TBB_Log2 864 inline intptr_t __TBB_Log2( uintptr_t x ) { 865 if( x==0 ) return -1; 866 intptr_t result = 0; 867 868 #if !defined(_M_ARM) 869 uintptr_t tmp_; 870 if( sizeof(x)>4 && (tmp_ = ((uint64_t)x)>>32) ) { x=tmp_; result += 32; } 871 #endif 872 if( uintptr_t tmp = x>>16 ) { x=tmp; result += 16; } 873 if( uintptr_t tmp = x>>8 ) { x=tmp; result += 8; } 874 if( uintptr_t tmp = x>>4 ) { x=tmp; result += 4; } 875 if( uintptr_t tmp = x>>2 ) { x=tmp; result += 2; } 876 877 return (x&2)? result+1: result; 878 } 879 #endif 880 881 #ifndef __TBB_AtomicOR 882 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) { 883 for( tbb::internal::atomic_backoff b;;b.pause() ) { 884 uintptr_t tmp = *(volatile uintptr_t *)operand; 885 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp); 886 if( result==tmp ) break; 887 } 888 } 889 #endif 890 891 #ifndef __TBB_AtomicAND 892 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) { 893 for( tbb::internal::atomic_backoff b;;b.pause() ) { 894 uintptr_t tmp = *(volatile uintptr_t *)operand; 895 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp); 896 if( result==tmp ) break; 897 } 898 } 899 #endif 900 901 #if __TBB_PREFETCHING 902 #ifndef __TBB_cl_prefetch 903 #error This platform does not define cache management primitives required for __TBB_PREFETCHING 904 #endif 905 906 #ifndef __TBB_cl_evict 907 #define __TBB_cl_evict(p) 908 #endif 909 #endif 910 911 #ifndef __TBB_Flag 912 typedef unsigned char __TBB_Flag; 913 #endif 914 typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag; 915 916 #ifndef __TBB_TryLockByte 917 inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) { 918 return __TBB_machine_cmpswp1(&flag,1,0)==0; 919 } 920 #endif 921 922 #ifndef __TBB_LockByte 923 inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) { 924 tbb::internal::atomic_backoff backoff; 925 while( !__TBB_TryLockByte(flag) ) backoff.pause(); 926 return 0; 927 } 928 #endif 929 930 #ifndef __TBB_UnlockByte 931 #define __TBB_UnlockByte(addr) __TBB_store_with_release((addr),0) 932 #endif 933 934 // lock primitives with Intel(R) Transactional Synchronization Extensions (Intel(R) TSX) 935 #if ( __TBB_x86_32 || __TBB_x86_64 ) /* only on ia32/intel64 */ 936 inline void __TBB_TryLockByteElidedCancel() { __TBB_machine_try_lock_elided_cancel(); } 937 938 inline bool __TBB_TryLockByteElided( __TBB_atomic_flag& flag ) { 939 bool res = __TBB_machine_try_lock_elided( &flag )!=0; 940 // to avoid the "lemming" effect, we need to abort the transaction 941 // if __TBB_machine_try_lock_elided returns false (i.e., someone else 942 // has acquired the mutex non-speculatively). 943 if( !res ) __TBB_TryLockByteElidedCancel(); 944 return res; 945 } 946 947 inline void __TBB_LockByteElided( __TBB_atomic_flag& flag ) 948 { 949 for(;;) { 950 tbb::internal::spin_wait_while_eq( flag, 1 ); 951 if( __TBB_machine_try_lock_elided( &flag ) ) 952 return; 953 // Another thread acquired the lock "for real". 954 // To avoid the "lemming" effect, we abort the transaction. 955 __TBB_TryLockByteElidedCancel(); 956 } 957 } 958 959 inline void __TBB_UnlockByteElided( __TBB_atomic_flag& flag ) { 960 __TBB_machine_unlock_elided( &flag ); 961 } 962 #endif 963 964 #ifndef __TBB_ReverseByte 965 inline unsigned char __TBB_ReverseByte(unsigned char src) { 966 return tbb::internal::reverse<unsigned char>::byte_table[src]; 967 } 968 #endif 969 970 template<typename T> 971 T __TBB_ReverseBits(T src) { 972 T dst; 973 unsigned char *original = (unsigned char *) &src; 974 unsigned char *reversed = (unsigned char *) &dst; 975 976 for( int i = sizeof(T)-1; i >= 0; i-- ) 977 reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] ); 978 979 return dst; 980 } 981 982 #endif /* __TBB_machine_H */ 983