1 #ifndef Py_ATOMIC_H 2 #define Py_ATOMIC_H 3 #ifdef __cplusplus 4 extern "C" { 5 #endif 6 7 #ifndef Py_BUILD_CORE 8 # error "this header requires Py_BUILD_CORE define" 9 #endif 10 11 #include "dynamic_annotations.h" 12 13 #include "pyconfig.h" 14 15 #if defined(HAVE_STD_ATOMIC) 16 #include <stdatomic.h> 17 #endif 18 19 20 #if defined(_MSC_VER) 21 #include <intrin.h> 22 #if defined(_M_IX86) || defined(_M_X64) 23 # include <immintrin.h> 24 #endif 25 #endif 26 27 /* This is modeled after the atomics interface from C1x, according to 28 * the draft at 29 * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf. 30 * Operations and types are named the same except with a _Py_ prefix 31 * and have the same semantics. 32 * 33 * Beware, the implementations here are deep magic. 34 */ 35 36 #if defined(HAVE_STD_ATOMIC) 37 38 typedef enum _Py_memory_order { 39 _Py_memory_order_relaxed = memory_order_relaxed, 40 _Py_memory_order_acquire = memory_order_acquire, 41 _Py_memory_order_release = memory_order_release, 42 _Py_memory_order_acq_rel = memory_order_acq_rel, 43 _Py_memory_order_seq_cst = memory_order_seq_cst 44 } _Py_memory_order; 45 46 typedef struct _Py_atomic_address { 47 atomic_uintptr_t _value; 48 } _Py_atomic_address; 49 50 typedef struct _Py_atomic_int { 51 atomic_int _value; 52 } _Py_atomic_int; 53 54 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \ 55 atomic_signal_fence(ORDER) 56 57 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \ 58 atomic_thread_fence(ORDER) 59 60 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 61 atomic_store_explicit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER) 62 63 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 64 atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER) 65 66 /* Use builtin atomic operations in GCC >= 4.7 */ 67 #elif defined(HAVE_BUILTIN_ATOMIC) 68 69 typedef enum _Py_memory_order { 70 _Py_memory_order_relaxed = __ATOMIC_RELAXED, 71 _Py_memory_order_acquire = __ATOMIC_ACQUIRE, 72 _Py_memory_order_release = __ATOMIC_RELEASE, 73 _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL, 74 _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST 75 } _Py_memory_order; 76 77 typedef struct _Py_atomic_address { 78 uintptr_t _value; 79 } _Py_atomic_address; 80 81 typedef struct _Py_atomic_int { 82 int _value; 83 } _Py_atomic_int; 84 85 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \ 86 __atomic_signal_fence(ORDER) 87 88 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \ 89 __atomic_thread_fence(ORDER) 90 91 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 92 (assert((ORDER) == __ATOMIC_RELAXED \ 93 || (ORDER) == __ATOMIC_SEQ_CST \ 94 || (ORDER) == __ATOMIC_RELEASE), \ 95 __atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER)) 96 97 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 98 (assert((ORDER) == __ATOMIC_RELAXED \ 99 || (ORDER) == __ATOMIC_SEQ_CST \ 100 || (ORDER) == __ATOMIC_ACQUIRE \ 101 || (ORDER) == __ATOMIC_CONSUME), \ 102 __atomic_load_n(&((ATOMIC_VAL)->_value), ORDER)) 103 104 /* Only support GCC (for expression statements) and x86 (for simple 105 * atomic semantics) and MSVC x86/x64/ARM */ 106 #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64)) 107 typedef enum _Py_memory_order { 108 _Py_memory_order_relaxed, 109 _Py_memory_order_acquire, 110 _Py_memory_order_release, 111 _Py_memory_order_acq_rel, 112 _Py_memory_order_seq_cst 113 } _Py_memory_order; 114 115 typedef struct _Py_atomic_address { 116 uintptr_t _value; 117 } _Py_atomic_address; 118 119 typedef struct _Py_atomic_int { 120 int _value; 121 } _Py_atomic_int; 122 123 124 static __inline__ void 125 _Py_atomic_signal_fence(_Py_memory_order order) 126 { 127 if (order != _Py_memory_order_relaxed) 128 __asm__ volatile("":::"memory"); 129 } 130 131 static __inline__ void 132 _Py_atomic_thread_fence(_Py_memory_order order) 133 { 134 if (order != _Py_memory_order_relaxed) 135 __asm__ volatile("mfence":::"memory"); 136 } 137 138 /* Tell the race checker about this operation's effects. */ 139 static __inline__ void 140 _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) 141 { 142 (void)address; /* shut up -Wunused-parameter */ 143 switch(order) { 144 case _Py_memory_order_release: 145 case _Py_memory_order_acq_rel: 146 case _Py_memory_order_seq_cst: 147 _Py_ANNOTATE_HAPPENS_BEFORE(address); 148 break; 149 case _Py_memory_order_relaxed: 150 case _Py_memory_order_acquire: 151 break; 152 } 153 switch(order) { 154 case _Py_memory_order_acquire: 155 case _Py_memory_order_acq_rel: 156 case _Py_memory_order_seq_cst: 157 _Py_ANNOTATE_HAPPENS_AFTER(address); 158 break; 159 case _Py_memory_order_relaxed: 160 case _Py_memory_order_release: 161 break; 162 } 163 } 164 165 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 166 __extension__ ({ \ 167 __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \ 168 __typeof__(atomic_val->_value) new_val = NEW_VAL;\ 169 volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \ 170 _Py_memory_order order = ORDER; \ 171 _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \ 172 \ 173 /* Perform the operation. */ \ 174 _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \ 175 switch(order) { \ 176 case _Py_memory_order_release: \ 177 _Py_atomic_signal_fence(_Py_memory_order_release); \ 178 /* fallthrough */ \ 179 case _Py_memory_order_relaxed: \ 180 *volatile_data = new_val; \ 181 break; \ 182 \ 183 case _Py_memory_order_acquire: \ 184 case _Py_memory_order_acq_rel: \ 185 case _Py_memory_order_seq_cst: \ 186 __asm__ volatile("xchg %0, %1" \ 187 : "+r"(new_val) \ 188 : "m"(atomic_val->_value) \ 189 : "memory"); \ 190 break; \ 191 } \ 192 _Py_ANNOTATE_IGNORE_WRITES_END(); \ 193 }) 194 195 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 196 __extension__ ({ \ 197 __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \ 198 __typeof__(atomic_val->_value) result; \ 199 volatile __typeof__(result) *volatile_data = &atomic_val->_value; \ 200 _Py_memory_order order = ORDER; \ 201 _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \ 202 \ 203 /* Perform the operation. */ \ 204 _Py_ANNOTATE_IGNORE_READS_BEGIN(); \ 205 switch(order) { \ 206 case _Py_memory_order_release: \ 207 case _Py_memory_order_acq_rel: \ 208 case _Py_memory_order_seq_cst: \ 209 /* Loads on x86 are not releases by default, so need a */ \ 210 /* thread fence. */ \ 211 _Py_atomic_thread_fence(_Py_memory_order_release); \ 212 break; \ 213 default: \ 214 /* No fence */ \ 215 break; \ 216 } \ 217 result = *volatile_data; \ 218 switch(order) { \ 219 case _Py_memory_order_acquire: \ 220 case _Py_memory_order_acq_rel: \ 221 case _Py_memory_order_seq_cst: \ 222 /* Loads on x86 are automatically acquire operations so */ \ 223 /* can get by with just a compiler fence. */ \ 224 _Py_atomic_signal_fence(_Py_memory_order_acquire); \ 225 break; \ 226 default: \ 227 /* No fence */ \ 228 break; \ 229 } \ 230 _Py_ANNOTATE_IGNORE_READS_END(); \ 231 result; \ 232 }) 233 234 #elif defined(_MSC_VER) 235 /* _Interlocked* functions provide a full memory barrier and are therefore 236 enough for acq_rel and seq_cst. If the HLE variants aren't available 237 in hardware they will fall back to a full memory barrier as well. 238 239 This might affect performance but likely only in some very specific and 240 hard to meassure scenario. 241 */ 242 #if defined(_M_IX86) || defined(_M_X64) 243 typedef enum _Py_memory_order { 244 _Py_memory_order_relaxed, 245 _Py_memory_order_acquire, 246 _Py_memory_order_release, 247 _Py_memory_order_acq_rel, 248 _Py_memory_order_seq_cst 249 } _Py_memory_order; 250 251 typedef struct _Py_atomic_address { 252 volatile uintptr_t _value; 253 } _Py_atomic_address; 254 255 typedef struct _Py_atomic_int { 256 volatile int _value; 257 } _Py_atomic_int; 258 259 260 #if defined(_M_X64) 261 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ 262 switch (ORDER) { \ 263 case _Py_memory_order_acquire: \ 264 _InterlockedExchange64_HLEAcquire((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \ 265 break; \ 266 case _Py_memory_order_release: \ 267 _InterlockedExchange64_HLERelease((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \ 268 break; \ 269 default: \ 270 _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \ 271 break; \ 272 } 273 #else 274 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); 275 #endif 276 277 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ 278 switch (ORDER) { \ 279 case _Py_memory_order_acquire: \ 280 _InterlockedExchange_HLEAcquire((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \ 281 break; \ 282 case _Py_memory_order_release: \ 283 _InterlockedExchange_HLERelease((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \ 284 break; \ 285 default: \ 286 _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \ 287 break; \ 288 } 289 290 #if defined(_M_X64) 291 /* This has to be an intptr_t for now. 292 gil_created() uses -1 as a sentinel value, if this returns 293 a uintptr_t it will do an unsigned compare and crash 294 */ 295 inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) { 296 __int64 old; 297 switch (order) { 298 case _Py_memory_order_acquire: 299 { 300 do { 301 old = *value; 302 } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old); 303 break; 304 } 305 case _Py_memory_order_release: 306 { 307 do { 308 old = *value; 309 } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old); 310 break; 311 } 312 case _Py_memory_order_relaxed: 313 old = *value; 314 break; 315 default: 316 { 317 do { 318 old = *value; 319 } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old); 320 break; 321 } 322 } 323 return old; 324 } 325 326 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \ 327 _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER)) 328 329 #else 330 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value) 331 #endif 332 333 inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) { 334 long old; 335 switch (order) { 336 case _Py_memory_order_acquire: 337 { 338 do { 339 old = *value; 340 } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old); 341 break; 342 } 343 case _Py_memory_order_release: 344 { 345 do { 346 old = *value; 347 } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old); 348 break; 349 } 350 case _Py_memory_order_relaxed: 351 old = *value; 352 break; 353 default: 354 { 355 do { 356 old = *value; 357 } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old); 358 break; 359 } 360 } 361 return old; 362 } 363 364 #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \ 365 _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER)) 366 367 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 368 if (sizeof((ATOMIC_VAL)->_value) == 8) { \ 369 _Py_atomic_store_64bit((ATOMIC_VAL), NEW_VAL, ORDER) } else { \ 370 _Py_atomic_store_32bit((ATOMIC_VAL), NEW_VAL, ORDER) } 371 372 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 373 ( \ 374 sizeof((ATOMIC_VAL)->_value) == 8 ? \ 375 _Py_atomic_load_64bit((ATOMIC_VAL), ORDER) : \ 376 _Py_atomic_load_32bit((ATOMIC_VAL), ORDER) \ 377 ) 378 #elif defined(_M_ARM) || defined(_M_ARM64) 379 typedef enum _Py_memory_order { 380 _Py_memory_order_relaxed, 381 _Py_memory_order_acquire, 382 _Py_memory_order_release, 383 _Py_memory_order_acq_rel, 384 _Py_memory_order_seq_cst 385 } _Py_memory_order; 386 387 typedef struct _Py_atomic_address { 388 volatile uintptr_t _value; 389 } _Py_atomic_address; 390 391 typedef struct _Py_atomic_int { 392 volatile int _value; 393 } _Py_atomic_int; 394 395 396 #if defined(_M_ARM64) 397 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ 398 switch (ORDER) { \ 399 case _Py_memory_order_acquire: \ 400 _InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \ 401 break; \ 402 case _Py_memory_order_release: \ 403 _InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \ 404 break; \ 405 default: \ 406 _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \ 407 break; \ 408 } 409 #else 410 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); 411 #endif 412 413 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ 414 switch (ORDER) { \ 415 case _Py_memory_order_acquire: \ 416 _InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \ 417 break; \ 418 case _Py_memory_order_release: \ 419 _InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \ 420 break; \ 421 default: \ 422 _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \ 423 break; \ 424 } 425 426 #if defined(_M_ARM64) 427 /* This has to be an intptr_t for now. 428 gil_created() uses -1 as a sentinel value, if this returns 429 a uintptr_t it will do an unsigned compare and crash 430 */ 431 inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) { 432 uintptr_t old; 433 switch (order) { 434 case _Py_memory_order_acquire: 435 { 436 do { 437 old = *value; 438 } while(_InterlockedCompareExchange64_acq(value, old, old) != old); 439 break; 440 } 441 case _Py_memory_order_release: 442 { 443 do { 444 old = *value; 445 } while(_InterlockedCompareExchange64_rel(value, old, old) != old); 446 break; 447 } 448 case _Py_memory_order_relaxed: 449 old = *value; 450 break; 451 default: 452 { 453 do { 454 old = *value; 455 } while(_InterlockedCompareExchange64(value, old, old) != old); 456 break; 457 } 458 } 459 return old; 460 } 461 462 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \ 463 _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER)) 464 465 #else 466 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value) 467 #endif 468 469 inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) { 470 int old; 471 switch (order) { 472 case _Py_memory_order_acquire: 473 { 474 do { 475 old = *value; 476 } while(_InterlockedCompareExchange_acq(value, old, old) != old); 477 break; 478 } 479 case _Py_memory_order_release: 480 { 481 do { 482 old = *value; 483 } while(_InterlockedCompareExchange_rel(value, old, old) != old); 484 break; 485 } 486 case _Py_memory_order_relaxed: 487 old = *value; 488 break; 489 default: 490 { 491 do { 492 old = *value; 493 } while(_InterlockedCompareExchange(value, old, old) != old); 494 break; 495 } 496 } 497 return old; 498 } 499 500 #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \ 501 _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER)) 502 503 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 504 if (sizeof((ATOMIC_VAL)->_value) == 8) { \ 505 _Py_atomic_store_64bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } else { \ 506 _Py_atomic_store_32bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } 507 508 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 509 ( \ 510 sizeof((ATOMIC_VAL)->_value) == 8 ? \ 511 _Py_atomic_load_64bit((ATOMIC_VAL), (ORDER)) : \ 512 _Py_atomic_load_32bit((ATOMIC_VAL), (ORDER)) \ 513 ) 514 #endif 515 #else /* !gcc x86 !_msc_ver */ 516 typedef enum _Py_memory_order { 517 _Py_memory_order_relaxed, 518 _Py_memory_order_acquire, 519 _Py_memory_order_release, 520 _Py_memory_order_acq_rel, 521 _Py_memory_order_seq_cst 522 } _Py_memory_order; 523 524 typedef struct _Py_atomic_address { 525 uintptr_t _value; 526 } _Py_atomic_address; 527 528 typedef struct _Py_atomic_int { 529 int _value; 530 } _Py_atomic_int; 531 /* Fall back to other compilers and processors by assuming that simple 532 volatile accesses are atomic. This is false, so people should port 533 this. */ 534 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0) 535 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0) 536 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 537 ((ATOMIC_VAL)->_value = NEW_VAL) 538 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 539 ((ATOMIC_VAL)->_value) 540 #endif 541 542 /* Standardized shortcuts. */ 543 #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \ 544 _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_seq_cst) 545 #define _Py_atomic_load(ATOMIC_VAL) \ 546 _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_seq_cst) 547 548 /* Python-local extensions */ 549 550 #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \ 551 _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_relaxed) 552 #define _Py_atomic_load_relaxed(ATOMIC_VAL) \ 553 _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_relaxed) 554 555 #ifdef __cplusplus 556 } 557 #endif 558 #endif /* Py_ATOMIC_H */ 559