1 #ifndef Py_ATOMIC_H
2 #define Py_ATOMIC_H
3 #ifdef Py_BUILD_CORE
4 
5 #include "dynamic_annotations.h"
6 
7 #include "pyconfig.h"
8 
9 #if defined(HAVE_STD_ATOMIC)
10 #include <stdatomic.h>
11 #endif
12 
13 
14 #if defined(_MSC_VER)
15 #include <intrin.h>
16 #include <immintrin.h>
17 #endif
18 
19 /* This is modeled after the atomics interface from C1x, according to
20  * the draft at
21  * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
22  * Operations and types are named the same except with a _Py_ prefix
23  * and have the same semantics.
24  *
25  * Beware, the implementations here are deep magic.
26  */
27 
28 #if defined(HAVE_STD_ATOMIC)
29 
30 typedef enum _Py_memory_order {
31     _Py_memory_order_relaxed = memory_order_relaxed,
32     _Py_memory_order_acquire = memory_order_acquire,
33     _Py_memory_order_release = memory_order_release,
34     _Py_memory_order_acq_rel = memory_order_acq_rel,
35     _Py_memory_order_seq_cst = memory_order_seq_cst
36 } _Py_memory_order;
37 
38 typedef struct _Py_atomic_address {
39     atomic_uintptr_t _value;
40 } _Py_atomic_address;
41 
42 typedef struct _Py_atomic_int {
43     atomic_int _value;
44 } _Py_atomic_int;
45 
46 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
47     atomic_signal_fence(ORDER)
48 
49 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
50     atomic_thread_fence(ORDER)
51 
52 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
53     atomic_store_explicit(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER)
54 
55 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
56     atomic_load_explicit(&(ATOMIC_VAL)->_value, ORDER)
57 
58 /* Use builtin atomic operations in GCC >= 4.7 */
59 #elif defined(HAVE_BUILTIN_ATOMIC)
60 
61 typedef enum _Py_memory_order {
62     _Py_memory_order_relaxed = __ATOMIC_RELAXED,
63     _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
64     _Py_memory_order_release = __ATOMIC_RELEASE,
65     _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
66     _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
67 } _Py_memory_order;
68 
69 typedef struct _Py_atomic_address {
70     uintptr_t _value;
71 } _Py_atomic_address;
72 
73 typedef struct _Py_atomic_int {
74     int _value;
75 } _Py_atomic_int;
76 
77 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
78     __atomic_signal_fence(ORDER)
79 
80 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
81     __atomic_thread_fence(ORDER)
82 
83 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
84     (assert((ORDER) == __ATOMIC_RELAXED                       \
85             || (ORDER) == __ATOMIC_SEQ_CST                    \
86             || (ORDER) == __ATOMIC_RELEASE),                  \
87      __atomic_store_n(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER))
88 
89 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER)           \
90     (assert((ORDER) == __ATOMIC_RELAXED                       \
91             || (ORDER) == __ATOMIC_SEQ_CST                    \
92             || (ORDER) == __ATOMIC_ACQUIRE                    \
93             || (ORDER) == __ATOMIC_CONSUME),                  \
94      __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
95 
96 /* Only support GCC (for expression statements) and x86 (for simple
97  * atomic semantics) and MSVC x86/x64/ARM */
98 #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
99 typedef enum _Py_memory_order {
100     _Py_memory_order_relaxed,
101     _Py_memory_order_acquire,
102     _Py_memory_order_release,
103     _Py_memory_order_acq_rel,
104     _Py_memory_order_seq_cst
105 } _Py_memory_order;
106 
107 typedef struct _Py_atomic_address {
108     uintptr_t _value;
109 } _Py_atomic_address;
110 
111 typedef struct _Py_atomic_int {
112     int _value;
113 } _Py_atomic_int;
114 
115 
116 static __inline__ void
_Py_atomic_signal_fence(_Py_memory_order order)117 _Py_atomic_signal_fence(_Py_memory_order order)
118 {
119     if (order != _Py_memory_order_relaxed)
120         __asm__ volatile("":::"memory");
121 }
122 
123 static __inline__ void
_Py_atomic_thread_fence(_Py_memory_order order)124 _Py_atomic_thread_fence(_Py_memory_order order)
125 {
126     if (order != _Py_memory_order_relaxed)
127         __asm__ volatile("mfence":::"memory");
128 }
129 
130 /* Tell the race checker about this operation's effects. */
131 static __inline__ void
_Py_ANNOTATE_MEMORY_ORDER(const volatile void * address,_Py_memory_order order)132 _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
133 {
134     (void)address;              /* shut up -Wunused-parameter */
135     switch(order) {
136     case _Py_memory_order_release:
137     case _Py_memory_order_acq_rel:
138     case _Py_memory_order_seq_cst:
139         _Py_ANNOTATE_HAPPENS_BEFORE(address);
140         break;
141     case _Py_memory_order_relaxed:
142     case _Py_memory_order_acquire:
143         break;
144     }
145     switch(order) {
146     case _Py_memory_order_acquire:
147     case _Py_memory_order_acq_rel:
148     case _Py_memory_order_seq_cst:
149         _Py_ANNOTATE_HAPPENS_AFTER(address);
150         break;
151     case _Py_memory_order_relaxed:
152     case _Py_memory_order_release:
153         break;
154     }
155 }
156 
157 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
158     __extension__ ({ \
159         __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
160         __typeof__(atomic_val->_value) new_val = NEW_VAL;\
161         volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
162         _Py_memory_order order = ORDER; \
163         _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
164         \
165         /* Perform the operation. */ \
166         _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
167         switch(order) { \
168         case _Py_memory_order_release: \
169             _Py_atomic_signal_fence(_Py_memory_order_release); \
170             /* fallthrough */ \
171         case _Py_memory_order_relaxed: \
172             *volatile_data = new_val; \
173             break; \
174         \
175         case _Py_memory_order_acquire: \
176         case _Py_memory_order_acq_rel: \
177         case _Py_memory_order_seq_cst: \
178             __asm__ volatile("xchg %0, %1" \
179                          : "+r"(new_val) \
180                          : "m"(atomic_val->_value) \
181                          : "memory"); \
182             break; \
183         } \
184         _Py_ANNOTATE_IGNORE_WRITES_END(); \
185     })
186 
187 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
188     __extension__ ({  \
189         __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
190         __typeof__(atomic_val->_value) result; \
191         volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
192         _Py_memory_order order = ORDER; \
193         _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
194         \
195         /* Perform the operation. */ \
196         _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
197         switch(order) { \
198         case _Py_memory_order_release: \
199         case _Py_memory_order_acq_rel: \
200         case _Py_memory_order_seq_cst: \
201             /* Loads on x86 are not releases by default, so need a */ \
202             /* thread fence. */ \
203             _Py_atomic_thread_fence(_Py_memory_order_release); \
204             break; \
205         default: \
206             /* No fence */ \
207             break; \
208         } \
209         result = *volatile_data; \
210         switch(order) { \
211         case _Py_memory_order_acquire: \
212         case _Py_memory_order_acq_rel: \
213         case _Py_memory_order_seq_cst: \
214             /* Loads on x86 are automatically acquire operations so */ \
215             /* can get by with just a compiler fence. */ \
216             _Py_atomic_signal_fence(_Py_memory_order_acquire); \
217             break; \
218         default: \
219             /* No fence */ \
220             break; \
221         } \
222         _Py_ANNOTATE_IGNORE_READS_END(); \
223         result; \
224     })
225 
226 #elif defined(_MSC_VER)
227 /*  _Interlocked* functions provide a full memory barrier and are therefore
228     enough for acq_rel and seq_cst. If the HLE variants aren't available
229     in hardware they will fall back to a full memory barrier as well.
230 
231     This might affect performance but likely only in some very specific and
232     hard to meassure scenario.
233 */
234 #if defined(_M_IX86) || defined(_M_X64)
235 typedef enum _Py_memory_order {
236     _Py_memory_order_relaxed,
237     _Py_memory_order_acquire,
238     _Py_memory_order_release,
239     _Py_memory_order_acq_rel,
240     _Py_memory_order_seq_cst
241 } _Py_memory_order;
242 
243 typedef struct _Py_atomic_address {
244     volatile uintptr_t _value;
245 } _Py_atomic_address;
246 
247 typedef struct _Py_atomic_int {
248     volatile int _value;
249 } _Py_atomic_int;
250 
251 
252 #if defined(_M_X64)
253 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
254     switch (ORDER) { \
255     case _Py_memory_order_acquire: \
256       _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
257       break; \
258     case _Py_memory_order_release: \
259       _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
260       break; \
261     default: \
262       _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
263       break; \
264   }
265 #else
266 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
267 #endif
268 
269 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
270   switch (ORDER) { \
271   case _Py_memory_order_acquire: \
272     _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
273     break; \
274   case _Py_memory_order_release: \
275     _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
276     break; \
277   default: \
278     _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
279     break; \
280   }
281 
282 #if defined(_M_X64)
283 /*  This has to be an intptr_t for now.
284     gil_created() uses -1 as a sentinel value, if this returns
285     a uintptr_t it will do an unsigned compare and crash
286 */
_Py_atomic_load_64bit(volatile uintptr_t * value,int order)287 inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
288     __int64 old;
289     switch (order) {
290     case _Py_memory_order_acquire:
291     {
292       do {
293         old = *value;
294       } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
295       break;
296     }
297     case _Py_memory_order_release:
298     {
299       do {
300         old = *value;
301       } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
302       break;
303     }
304     case _Py_memory_order_relaxed:
305       old = *value;
306       break;
307     default:
308     {
309       do {
310         old = *value;
311       } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
312       break;
313     }
314     }
315     return old;
316 }
317 
318 #else
319 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
320 #endif
321 
_Py_atomic_load_32bit(volatile int * value,int order)322 inline int _Py_atomic_load_32bit(volatile int* value, int order) {
323     long old;
324     switch (order) {
325     case _Py_memory_order_acquire:
326     {
327       do {
328         old = *value;
329       } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
330       break;
331     }
332     case _Py_memory_order_release:
333     {
334       do {
335         old = *value;
336       } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
337       break;
338     }
339     case _Py_memory_order_relaxed:
340       old = *value;
341       break;
342     default:
343     {
344       do {
345         old = *value;
346       } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
347       break;
348     }
349     }
350     return old;
351 }
352 
353 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
354   if (sizeof(*ATOMIC_VAL._value) == 8) { \
355     _Py_atomic_store_64bit((volatile long long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
356     _Py_atomic_store_32bit((volatile long*)ATOMIC_VAL._value, NEW_VAL, ORDER) }
357 
358 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
359   ( \
360     sizeof(*(ATOMIC_VAL._value)) == 8 ? \
361     _Py_atomic_load_64bit((volatile long long*)ATOMIC_VAL._value, ORDER) : \
362     _Py_atomic_load_32bit((volatile long*)ATOMIC_VAL._value, ORDER) \
363   )
364 #elif defined(_M_ARM) || defined(_M_ARM64)
365 typedef enum _Py_memory_order {
366     _Py_memory_order_relaxed,
367     _Py_memory_order_acquire,
368     _Py_memory_order_release,
369     _Py_memory_order_acq_rel,
370     _Py_memory_order_seq_cst
371 } _Py_memory_order;
372 
373 typedef struct _Py_atomic_address {
374     volatile uintptr_t _value;
375 } _Py_atomic_address;
376 
377 typedef struct _Py_atomic_int {
378     volatile int _value;
379 } _Py_atomic_int;
380 
381 
382 #if defined(_M_ARM64)
383 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
384     switch (ORDER) { \
385     case _Py_memory_order_acquire: \
386       _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
387       break; \
388     case _Py_memory_order_release: \
389       _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
390       break; \
391     default: \
392       _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
393       break; \
394   }
395 #else
396 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
397 #endif
398 
399 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
400   switch (ORDER) { \
401   case _Py_memory_order_acquire: \
402     _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
403     break; \
404   case _Py_memory_order_release: \
405     _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
406     break; \
407   default: \
408     _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
409     break; \
410   }
411 
412 #if defined(_M_ARM64)
413 /*  This has to be an intptr_t for now.
414     gil_created() uses -1 as a sentinel value, if this returns
415     a uintptr_t it will do an unsigned compare and crash
416 */
_Py_atomic_load_64bit(volatile uintptr_t * value,int order)417 inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
418     uintptr_t old;
419     switch (order) {
420     case _Py_memory_order_acquire:
421     {
422       do {
423         old = *value;
424       } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
425       break;
426     }
427     case _Py_memory_order_release:
428     {
429       do {
430         old = *value;
431       } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
432       break;
433     }
434     case _Py_memory_order_relaxed:
435       old = *value;
436       break;
437     default:
438     {
439       do {
440         old = *value;
441       } while(_InterlockedCompareExchange64(value, old, old) != old);
442       break;
443     }
444     }
445     return old;
446 }
447 
448 #else
449 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
450 #endif
451 
_Py_atomic_load_32bit(volatile int * value,int order)452 inline int _Py_atomic_load_32bit(volatile int* value, int order) {
453     int old;
454     switch (order) {
455     case _Py_memory_order_acquire:
456     {
457       do {
458         old = *value;
459       } while(_InterlockedCompareExchange_acq(value, old, old) != old);
460       break;
461     }
462     case _Py_memory_order_release:
463     {
464       do {
465         old = *value;
466       } while(_InterlockedCompareExchange_rel(value, old, old) != old);
467       break;
468     }
469     case _Py_memory_order_relaxed:
470       old = *value;
471       break;
472     default:
473     {
474       do {
475         old = *value;
476       } while(_InterlockedCompareExchange(value, old, old) != old);
477       break;
478     }
479     }
480     return old;
481 }
482 
483 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
484   if (sizeof(*ATOMIC_VAL._value) == 8) { \
485     _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
486     _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
487 
488 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
489   ( \
490     sizeof(*(ATOMIC_VAL._value)) == 8 ? \
491     _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
492     _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
493   )
494 #endif
495 #else  /* !gcc x86  !_msc_ver */
496 typedef enum _Py_memory_order {
497     _Py_memory_order_relaxed,
498     _Py_memory_order_acquire,
499     _Py_memory_order_release,
500     _Py_memory_order_acq_rel,
501     _Py_memory_order_seq_cst
502 } _Py_memory_order;
503 
504 typedef struct _Py_atomic_address {
505     uintptr_t _value;
506 } _Py_atomic_address;
507 
508 typedef struct _Py_atomic_int {
509     int _value;
510 } _Py_atomic_int;
511 /* Fall back to other compilers and processors by assuming that simple
512    volatile accesses are atomic.  This is false, so people should port
513    this. */
514 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
515 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
516 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
517     ((ATOMIC_VAL)->_value = NEW_VAL)
518 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
519     ((ATOMIC_VAL)->_value)
520 #endif
521 
522 /* Standardized shortcuts. */
523 #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
524     _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_seq_cst)
525 #define _Py_atomic_load(ATOMIC_VAL) \
526     _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_seq_cst)
527 
528 /* Python-local extensions */
529 
530 #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
531     _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed)
532 #define _Py_atomic_load_relaxed(ATOMIC_VAL) \
533     _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed)
534 #endif  /* Py_BUILD_CORE */
535 #endif  /* Py_ATOMIC_H */
536