1/* 2 * Copyright 2010-2019 Branimir Karadzic. All rights reserved. 3 * License: https://github.com/bkaradzic/bx#license-bsd-2-clause 4 */ 5 6#ifndef BX_CPU_H_HEADER_GUARD 7# error "Must be included from bx/cpu.h!" 8#endif // BX_CPU_H_HEADER_GUARD 9 10#if BX_COMPILER_MSVC 11# if BX_PLATFORM_WINRT 12# include <windows.h> 13# endif // BX_PLATFORM_WINRT 14 15# if BX_CPU_X86 16# include <emmintrin.h> // _mm_fence 17# endif 18 19extern "C" void _ReadBarrier(); 20# pragma intrinsic(_ReadBarrier) 21 22extern "C" void _WriteBarrier(); 23# pragma intrinsic(_WriteBarrier) 24 25extern "C" void _ReadWriteBarrier(); 26# pragma intrinsic(_ReadWriteBarrier) 27 28extern "C" long _InterlockedExchangeAdd(long volatile* _ptr, long _value); 29# pragma intrinsic(_InterlockedExchangeAdd) 30 31extern "C" int64_t __cdecl _InterlockedExchangeAdd64(int64_t volatile* _ptr, int64_t _value); 32//# pragma intrinsic(_InterlockedExchangeAdd64) 33 34extern "C" long _InterlockedCompareExchange(long volatile* _ptr, long _exchange, long _comparand); 35# pragma intrinsic(_InterlockedCompareExchange) 36 37extern "C" int64_t _InterlockedCompareExchange64(int64_t volatile* _ptr, int64_t _exchange, int64_t _comparand); 38# pragma intrinsic(_InterlockedCompareExchange64) 39 40#if (_MSC_VER == 1800) && !defined(FIXED_592562) && defined (_M_IX86) && !defined (_M_CEE_PURE) 41 42extern "C" long _InterlockedExchange(long volatile* _ptr, long _value); 43# pragma intrinsic(_InterlockedExchange) 44 45__forceinline static void * _InterlockedExchangePointer_impl(void * volatile * _Target, void * _Value) 46{ 47 return (void *)_InterlockedExchange((long volatile *) _Target, (long) _Value); 48} 49#define _InterlockedExchangePointer(p,v) _InterlockedExchangePointer_impl(p,v) 50 51#else 52 53extern "C" void* _InterlockedExchangePointer(void* volatile* _ptr, void* _value); 54# pragma intrinsic(_InterlockedExchangePointer) 55 56#endif 57 58# if BX_PLATFORM_WINRT 59# define _InterlockedExchangeAdd64 InterlockedExchangeAdd64 60# endif // BX_PLATFORM_WINRT 61#endif // BX_COMPILER_MSVC 62 63namespace bx 64{ 65 inline void readBarrier() 66 { 67#if BX_COMPILER_MSVC 68 _ReadBarrier(); 69#else 70 asm volatile("":::"memory"); 71#endif // BX_COMPILER 72 } 73 74 inline void writeBarrier() 75 { 76#if BX_COMPILER_MSVC 77 _WriteBarrier(); 78#else 79 asm volatile("":::"memory"); 80#endif // BX_COMPILER 81 } 82 83 inline void readWriteBarrier() 84 { 85#if BX_COMPILER_MSVC 86 _ReadWriteBarrier(); 87#else 88 asm volatile("":::"memory"); 89#endif // BX_COMPILER 90 } 91 92 inline void memoryBarrier() 93 { 94#if BX_PLATFORM_WINRT 95 MemoryBarrier(); 96#elif BX_COMPILER_MSVC 97 _mm_mfence(); 98#else 99 __sync_synchronize(); 100#endif // BX_COMPILER 101 } 102 103 template<> 104 inline int32_t atomicCompareAndSwap<int32_t>(volatile int32_t* _ptr, int32_t _old, int32_t _new) 105 { 106#if BX_COMPILER_MSVC 107 return int32_t(_InterlockedCompareExchange( (volatile long*)(_ptr), long(_new), long(_old) ) ); 108#else 109 return __sync_val_compare_and_swap( (volatile int32_t*)_ptr, _old, _new); 110#endif // BX_COMPILER 111 } 112 113 template<> 114 inline uint32_t atomicCompareAndSwap<uint32_t>(volatile uint32_t* _ptr, uint32_t _old, uint32_t _new) 115 { 116#if BX_COMPILER_MSVC 117 return uint32_t(_InterlockedCompareExchange( (volatile long*)(_ptr), long(_new), long(_old) ) ); 118#else 119 return __sync_val_compare_and_swap( (volatile int32_t*)_ptr, _old, _new); 120#endif // BX_COMPILER 121 } 122 123 template<> 124 inline int64_t atomicCompareAndSwap<int64_t>(volatile int64_t* _ptr, int64_t _old, int64_t _new) 125 { 126#if BX_COMPILER_MSVC 127 return _InterlockedCompareExchange64(_ptr, _new, _old); 128#else 129 return __sync_val_compare_and_swap( (volatile int64_t*)_ptr, _old, _new); 130#endif // BX_COMPILER 131 } 132 133 template<> 134 inline uint64_t atomicCompareAndSwap<uint64_t>(volatile uint64_t* _ptr, uint64_t _old, uint64_t _new) 135 { 136#if BX_COMPILER_MSVC 137 return uint64_t(_InterlockedCompareExchange64( (volatile int64_t*)(_ptr), int64_t(_new), int64_t(_old) ) ); 138#else 139 return __sync_val_compare_and_swap( (volatile int64_t*)_ptr, _old, _new); 140#endif // BX_COMPILER 141 } 142 143 template<> 144 inline int32_t atomicFetchAndAdd<int32_t>(volatile int32_t* _ptr, int32_t _add) 145 { 146#if BX_COMPILER_MSVC 147 return _InterlockedExchangeAdd( (volatile long*)_ptr, _add); 148#else 149 return __sync_fetch_and_add(_ptr, _add); 150#endif // BX_COMPILER_ 151 } 152 153 template<> 154 inline uint32_t atomicFetchAndAdd<uint32_t>(volatile uint32_t* _ptr, uint32_t _add) 155 { 156 return uint32_t(atomicFetchAndAdd<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) ); 157 } 158 159 template<> 160 inline int64_t atomicFetchAndAdd<int64_t>(volatile int64_t* _ptr, int64_t _add) 161 { 162#if BX_COMPILER_MSVC 163# if _WIN32_WINNT >= 0x600 164 return _InterlockedExchangeAdd64( (volatile int64_t*)_ptr, _add); 165# else 166 int64_t oldVal; 167 int64_t newVal = *(int64_t volatile*)_ptr; 168 do 169 { 170 oldVal = newVal; 171 newVal = atomicCompareAndSwap<int64_t>(_ptr, oldVal, newVal + _add); 172 173 } while (oldVal != newVal); 174 175 return oldVal; 176# endif 177#else 178 return __sync_fetch_and_add(_ptr, _add); 179#endif // BX_COMPILER_ 180 } 181 182 template<> 183 inline uint64_t atomicFetchAndAdd<uint64_t>(volatile uint64_t* _ptr, uint64_t _add) 184 { 185 return uint64_t(atomicFetchAndAdd<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) ); 186 } 187 188 template<> 189 inline int32_t atomicAddAndFetch<int32_t>(volatile int32_t* _ptr, int32_t _add) 190 { 191#if BX_COMPILER_MSVC 192 return atomicFetchAndAdd(_ptr, _add) + _add; 193#else 194 return __sync_add_and_fetch(_ptr, _add); 195#endif // BX_COMPILER_ 196 } 197 198 template<> 199 inline int64_t atomicAddAndFetch<int64_t>(volatile int64_t* _ptr, int64_t _add) 200 { 201#if BX_COMPILER_MSVC 202 return atomicFetchAndAdd(_ptr, _add) + _add; 203#else 204 return __sync_add_and_fetch(_ptr, _add); 205#endif // BX_COMPILER_ 206 } 207 208 template<> 209 inline uint32_t atomicAddAndFetch<uint32_t>(volatile uint32_t* _ptr, uint32_t _add) 210 { 211 return uint32_t(atomicAddAndFetch<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) ); 212 } 213 214 template<> 215 inline uint64_t atomicAddAndFetch<uint64_t>(volatile uint64_t* _ptr, uint64_t _add) 216 { 217 return uint64_t(atomicAddAndFetch<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) ); 218 } 219 220 template<> 221 inline int32_t atomicFetchAndSub<int32_t>(volatile int32_t* _ptr, int32_t _sub) 222 { 223#if BX_COMPILER_MSVC 224 return atomicFetchAndAdd(_ptr, -_sub); 225#else 226 return __sync_fetch_and_sub(_ptr, _sub); 227#endif // BX_COMPILER_ 228 } 229 230 template<> 231 inline int64_t atomicFetchAndSub<int64_t>(volatile int64_t* _ptr, int64_t _sub) 232 { 233#if BX_COMPILER_MSVC 234 return atomicFetchAndAdd(_ptr, -_sub); 235#else 236 return __sync_fetch_and_sub(_ptr, _sub); 237#endif // BX_COMPILER_ 238 } 239 240 template<> 241 inline uint32_t atomicFetchAndSub<uint32_t>(volatile uint32_t* _ptr, uint32_t _add) 242 { 243 return uint32_t(atomicFetchAndSub<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) ); 244 } 245 246 template<> 247 inline uint64_t atomicFetchAndSub<uint64_t>(volatile uint64_t* _ptr, uint64_t _add) 248 { 249 return uint64_t(atomicFetchAndSub<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) ); 250 } 251 252 template<> 253 inline int32_t atomicSubAndFetch<int32_t>(volatile int32_t* _ptr, int32_t _sub) 254 { 255#if BX_COMPILER_MSVC 256 return atomicFetchAndAdd(_ptr, -_sub) - _sub; 257#else 258 return __sync_sub_and_fetch(_ptr, _sub); 259#endif // BX_COMPILER_ 260 } 261 262 template<> 263 inline int64_t atomicSubAndFetch<int64_t>(volatile int64_t* _ptr, int64_t _sub) 264 { 265#if BX_COMPILER_MSVC 266 return atomicFetchAndAdd(_ptr, -_sub) - _sub; 267#else 268 return __sync_sub_and_fetch(_ptr, _sub); 269#endif // BX_COMPILER_ 270 } 271 272 template<> 273 inline uint32_t atomicSubAndFetch<uint32_t>(volatile uint32_t* _ptr, uint32_t _add) 274 { 275 return uint32_t(atomicSubAndFetch<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) ); 276 } 277 278 template<> 279 inline uint64_t atomicSubAndFetch<uint64_t>(volatile uint64_t* _ptr, uint64_t _add) 280 { 281 return uint64_t(atomicSubAndFetch<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) ); 282 } 283 284 template<typename Ty> 285 inline Ty atomicFetchTestAndAdd(volatile Ty* _ptr, Ty _test, Ty _value) 286 { 287 Ty oldVal; 288 Ty newVal = *_ptr; 289 do 290 { 291 oldVal = newVal; 292 newVal = atomicCompareAndSwap<Ty>(_ptr, oldVal, newVal >= _test ? _test : newVal+_value); 293 294 } while (oldVal != newVal); 295 296 return oldVal; 297 } 298 299 template<typename Ty> 300 inline Ty atomicFetchTestAndSub(volatile Ty* _ptr, Ty _test, Ty _value) 301 { 302 Ty oldVal; 303 Ty newVal = *_ptr; 304 do 305 { 306 oldVal = newVal; 307 newVal = atomicCompareAndSwap<Ty>(_ptr, oldVal, newVal <= _test ? _test : newVal-_value); 308 309 } while (oldVal != newVal); 310 311 return oldVal; 312 } 313 314 template<typename Ty> 315 Ty atomicFetchAndAddsat(volatile Ty* _ptr, Ty _value, Ty _max) 316 { 317 Ty oldVal; 318 Ty newVal = *_ptr; 319 do 320 { 321 oldVal = newVal; 322 newVal = atomicCompareAndSwap<Ty>(_ptr, oldVal, newVal >= _max ? _max : min(_max, newVal+_value) ); 323 324 } while (oldVal != newVal && oldVal != _max); 325 326 return oldVal; 327 } 328 329 template<typename Ty> 330 Ty atomicFetchAndSubsat(volatile Ty* _ptr, Ty _value, Ty _min) 331 { 332 Ty oldVal; 333 Ty newVal = *_ptr; 334 do 335 { 336 oldVal = newVal; 337 newVal = atomicCompareAndSwap<Ty>(_ptr, oldVal, newVal <= _min ? _min : max(_min, newVal-_value) ); 338 339 } while (oldVal != newVal && oldVal != _min); 340 341 return oldVal; 342 } 343 344 inline void* atomicExchangePtr(void** _ptr, void* _new) 345 { 346#if BX_COMPILER_MSVC 347 return _InterlockedExchangePointer(_ptr, _new); 348#else 349 return __sync_lock_test_and_set(_ptr, _new); 350#endif // BX_COMPILER 351 } 352 353} // namespace bx 354