1 #ifndef Py_ATOMIC_H
2 #define Py_ATOMIC_H
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
6 
7 #ifndef Py_BUILD_CORE
8 #  error "this header requires Py_BUILD_CORE define"
9 #endif
10 
11 #include "dynamic_annotations.h"
12 
13 #include "pyconfig.h"
14 
15 #if defined(HAVE_STD_ATOMIC)
16 #include <stdatomic.h>
17 #endif
18 
19 
20 #if defined(_MSC_VER)
21 #include <intrin.h>
22 #if defined(_M_IX86) || defined(_M_X64)
23 #  include <immintrin.h>
24 #endif
25 #endif
26 
27 /* This is modeled after the atomics interface from C1x, according to
28  * the draft at
29  * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
30  * Operations and types are named the same except with a _Py_ prefix
31  * and have the same semantics.
32  *
33  * Beware, the implementations here are deep magic.
34  */
35 
36 #if defined(HAVE_STD_ATOMIC)
37 
38 typedef enum _Py_memory_order {
39     _Py_memory_order_relaxed = memory_order_relaxed,
40     _Py_memory_order_acquire = memory_order_acquire,
41     _Py_memory_order_release = memory_order_release,
42     _Py_memory_order_acq_rel = memory_order_acq_rel,
43     _Py_memory_order_seq_cst = memory_order_seq_cst
44 } _Py_memory_order;
45 
46 typedef struct _Py_atomic_address {
47     atomic_uintptr_t _value;
48 } _Py_atomic_address;
49 
50 typedef struct _Py_atomic_int {
51     atomic_int _value;
52 } _Py_atomic_int;
53 
54 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
55     atomic_signal_fence(ORDER)
56 
57 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
58     atomic_thread_fence(ORDER)
59 
60 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
61     atomic_store_explicit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER)
62 
63 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
64     atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER)
65 
66 /* Use builtin atomic operations in GCC >= 4.7 */
67 #elif defined(HAVE_BUILTIN_ATOMIC)
68 
69 typedef enum _Py_memory_order {
70     _Py_memory_order_relaxed = __ATOMIC_RELAXED,
71     _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
72     _Py_memory_order_release = __ATOMIC_RELEASE,
73     _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
74     _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
75 } _Py_memory_order;
76 
77 typedef struct _Py_atomic_address {
78     uintptr_t _value;
79 } _Py_atomic_address;
80 
81 typedef struct _Py_atomic_int {
82     int _value;
83 } _Py_atomic_int;
84 
85 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
86     __atomic_signal_fence(ORDER)
87 
88 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
89     __atomic_thread_fence(ORDER)
90 
91 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
92     (assert((ORDER) == __ATOMIC_RELAXED                       \
93             || (ORDER) == __ATOMIC_SEQ_CST                    \
94             || (ORDER) == __ATOMIC_RELEASE),                  \
95      __atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER))
96 
97 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER)           \
98     (assert((ORDER) == __ATOMIC_RELAXED                       \
99             || (ORDER) == __ATOMIC_SEQ_CST                    \
100             || (ORDER) == __ATOMIC_ACQUIRE                    \
101             || (ORDER) == __ATOMIC_CONSUME),                  \
102      __atomic_load_n(&((ATOMIC_VAL)->_value), ORDER))
103 
104 /* Only support GCC (for expression statements) and x86 (for simple
105  * atomic semantics) and MSVC x86/x64/ARM */
106 #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
107 typedef enum _Py_memory_order {
108     _Py_memory_order_relaxed,
109     _Py_memory_order_acquire,
110     _Py_memory_order_release,
111     _Py_memory_order_acq_rel,
112     _Py_memory_order_seq_cst
113 } _Py_memory_order;
114 
115 typedef struct _Py_atomic_address {
116     uintptr_t _value;
117 } _Py_atomic_address;
118 
119 typedef struct _Py_atomic_int {
120     int _value;
121 } _Py_atomic_int;
122 
123 
124 static __inline__ void
125 _Py_atomic_signal_fence(_Py_memory_order order)
126 {
127     if (order != _Py_memory_order_relaxed)
128         __asm__ volatile("":::"memory");
129 }
130 
131 static __inline__ void
132 _Py_atomic_thread_fence(_Py_memory_order order)
133 {
134     if (order != _Py_memory_order_relaxed)
135         __asm__ volatile("mfence":::"memory");
136 }
137 
138 /* Tell the race checker about this operation's effects. */
139 static __inline__ void
140 _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
141 {
142     (void)address;              /* shut up -Wunused-parameter */
143     switch(order) {
144     case _Py_memory_order_release:
145     case _Py_memory_order_acq_rel:
146     case _Py_memory_order_seq_cst:
147         _Py_ANNOTATE_HAPPENS_BEFORE(address);
148         break;
149     case _Py_memory_order_relaxed:
150     case _Py_memory_order_acquire:
151         break;
152     }
153     switch(order) {
154     case _Py_memory_order_acquire:
155     case _Py_memory_order_acq_rel:
156     case _Py_memory_order_seq_cst:
157         _Py_ANNOTATE_HAPPENS_AFTER(address);
158         break;
159     case _Py_memory_order_relaxed:
160     case _Py_memory_order_release:
161         break;
162     }
163 }
164 
165 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
166     __extension__ ({ \
167         __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
168         __typeof__(atomic_val->_value) new_val = NEW_VAL;\
169         volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
170         _Py_memory_order order = ORDER; \
171         _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
172         \
173         /* Perform the operation. */ \
174         _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
175         switch(order) { \
176         case _Py_memory_order_release: \
177             _Py_atomic_signal_fence(_Py_memory_order_release); \
178             /* fallthrough */ \
179         case _Py_memory_order_relaxed: \
180             *volatile_data = new_val; \
181             break; \
182         \
183         case _Py_memory_order_acquire: \
184         case _Py_memory_order_acq_rel: \
185         case _Py_memory_order_seq_cst: \
186             __asm__ volatile("xchg %0, %1" \
187                          : "+r"(new_val) \
188                          : "m"(atomic_val->_value) \
189                          : "memory"); \
190             break; \
191         } \
192         _Py_ANNOTATE_IGNORE_WRITES_END(); \
193     })
194 
195 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
196     __extension__ ({  \
197         __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
198         __typeof__(atomic_val->_value) result; \
199         volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
200         _Py_memory_order order = ORDER; \
201         _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
202         \
203         /* Perform the operation. */ \
204         _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
205         switch(order) { \
206         case _Py_memory_order_release: \
207         case _Py_memory_order_acq_rel: \
208         case _Py_memory_order_seq_cst: \
209             /* Loads on x86 are not releases by default, so need a */ \
210             /* thread fence. */ \
211             _Py_atomic_thread_fence(_Py_memory_order_release); \
212             break; \
213         default: \
214             /* No fence */ \
215             break; \
216         } \
217         result = *volatile_data; \
218         switch(order) { \
219         case _Py_memory_order_acquire: \
220         case _Py_memory_order_acq_rel: \
221         case _Py_memory_order_seq_cst: \
222             /* Loads on x86 are automatically acquire operations so */ \
223             /* can get by with just a compiler fence. */ \
224             _Py_atomic_signal_fence(_Py_memory_order_acquire); \
225             break; \
226         default: \
227             /* No fence */ \
228             break; \
229         } \
230         _Py_ANNOTATE_IGNORE_READS_END(); \
231         result; \
232     })
233 
234 #elif defined(_MSC_VER)
235 /*  _Interlocked* functions provide a full memory barrier and are therefore
236     enough for acq_rel and seq_cst. If the HLE variants aren't available
237     in hardware they will fall back to a full memory barrier as well.
238 
239     This might affect performance but likely only in some very specific and
240     hard to meassure scenario.
241 */
242 #if defined(_M_IX86) || defined(_M_X64)
243 typedef enum _Py_memory_order {
244     _Py_memory_order_relaxed,
245     _Py_memory_order_acquire,
246     _Py_memory_order_release,
247     _Py_memory_order_acq_rel,
248     _Py_memory_order_seq_cst
249 } _Py_memory_order;
250 
251 typedef struct _Py_atomic_address {
252     volatile uintptr_t _value;
253 } _Py_atomic_address;
254 
255 typedef struct _Py_atomic_int {
256     volatile int _value;
257 } _Py_atomic_int;
258 
259 
260 #if defined(_M_X64)
261 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
262     switch (ORDER) { \
263     case _Py_memory_order_acquire: \
264       _InterlockedExchange64_HLEAcquire((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
265       break; \
266     case _Py_memory_order_release: \
267       _InterlockedExchange64_HLERelease((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
268       break; \
269     default: \
270       _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
271       break; \
272   }
273 #else
274 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
275 #endif
276 
277 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
278   switch (ORDER) { \
279   case _Py_memory_order_acquire: \
280     _InterlockedExchange_HLEAcquire((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
281     break; \
282   case _Py_memory_order_release: \
283     _InterlockedExchange_HLERelease((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
284     break; \
285   default: \
286     _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
287     break; \
288   }
289 
290 #if defined(_M_X64)
291 /*  This has to be an intptr_t for now.
292     gil_created() uses -1 as a sentinel value, if this returns
293     a uintptr_t it will do an unsigned compare and crash
294 */
295 inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
296     __int64 old;
297     switch (order) {
298     case _Py_memory_order_acquire:
299     {
300       do {
301         old = *value;
302       } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
303       break;
304     }
305     case _Py_memory_order_release:
306     {
307       do {
308         old = *value;
309       } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
310       break;
311     }
312     case _Py_memory_order_relaxed:
313       old = *value;
314       break;
315     default:
316     {
317       do {
318         old = *value;
319       } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
320       break;
321     }
322     }
323     return old;
324 }
325 
326 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
327     _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
328 
329 #else
330 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
331 #endif
332 
333 inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
334     long old;
335     switch (order) {
336     case _Py_memory_order_acquire:
337     {
338       do {
339         old = *value;
340       } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
341       break;
342     }
343     case _Py_memory_order_release:
344     {
345       do {
346         old = *value;
347       } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
348       break;
349     }
350     case _Py_memory_order_relaxed:
351       old = *value;
352       break;
353     default:
354     {
355       do {
356         old = *value;
357       } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
358       break;
359     }
360     }
361     return old;
362 }
363 
364 #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
365     _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
366 
367 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
368   if (sizeof((ATOMIC_VAL)->_value) == 8) { \
369     _Py_atomic_store_64bit((ATOMIC_VAL), NEW_VAL, ORDER) } else { \
370     _Py_atomic_store_32bit((ATOMIC_VAL), NEW_VAL, ORDER) }
371 
372 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
373   ( \
374     sizeof((ATOMIC_VAL)->_value) == 8 ? \
375     _Py_atomic_load_64bit((ATOMIC_VAL), ORDER) : \
376     _Py_atomic_load_32bit((ATOMIC_VAL), ORDER) \
377   )
378 #elif defined(_M_ARM) || defined(_M_ARM64)
379 typedef enum _Py_memory_order {
380     _Py_memory_order_relaxed,
381     _Py_memory_order_acquire,
382     _Py_memory_order_release,
383     _Py_memory_order_acq_rel,
384     _Py_memory_order_seq_cst
385 } _Py_memory_order;
386 
387 typedef struct _Py_atomic_address {
388     volatile uintptr_t _value;
389 } _Py_atomic_address;
390 
391 typedef struct _Py_atomic_int {
392     volatile int _value;
393 } _Py_atomic_int;
394 
395 
396 #if defined(_M_ARM64)
397 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
398     switch (ORDER) { \
399     case _Py_memory_order_acquire: \
400       _InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
401       break; \
402     case _Py_memory_order_release: \
403       _InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
404       break; \
405     default: \
406       _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
407       break; \
408   }
409 #else
410 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
411 #endif
412 
413 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
414   switch (ORDER) { \
415   case _Py_memory_order_acquire: \
416     _InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
417     break; \
418   case _Py_memory_order_release: \
419     _InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
420     break; \
421   default: \
422     _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
423     break; \
424   }
425 
426 #if defined(_M_ARM64)
427 /*  This has to be an intptr_t for now.
428     gil_created() uses -1 as a sentinel value, if this returns
429     a uintptr_t it will do an unsigned compare and crash
430 */
431 inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
432     uintptr_t old;
433     switch (order) {
434     case _Py_memory_order_acquire:
435     {
436       do {
437         old = *value;
438       } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
439       break;
440     }
441     case _Py_memory_order_release:
442     {
443       do {
444         old = *value;
445       } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
446       break;
447     }
448     case _Py_memory_order_relaxed:
449       old = *value;
450       break;
451     default:
452     {
453       do {
454         old = *value;
455       } while(_InterlockedCompareExchange64(value, old, old) != old);
456       break;
457     }
458     }
459     return old;
460 }
461 
462 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
463     _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
464 
465 #else
466 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
467 #endif
468 
469 inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
470     int old;
471     switch (order) {
472     case _Py_memory_order_acquire:
473     {
474       do {
475         old = *value;
476       } while(_InterlockedCompareExchange_acq(value, old, old) != old);
477       break;
478     }
479     case _Py_memory_order_release:
480     {
481       do {
482         old = *value;
483       } while(_InterlockedCompareExchange_rel(value, old, old) != old);
484       break;
485     }
486     case _Py_memory_order_relaxed:
487       old = *value;
488       break;
489     default:
490     {
491       do {
492         old = *value;
493       } while(_InterlockedCompareExchange(value, old, old) != old);
494       break;
495     }
496     }
497     return old;
498 }
499 
500 #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
501     _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
502 
503 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
504   if (sizeof((ATOMIC_VAL)->_value) == 8) { \
505     _Py_atomic_store_64bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } else { \
506     _Py_atomic_store_32bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) }
507 
508 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
509   ( \
510     sizeof((ATOMIC_VAL)->_value) == 8 ? \
511     _Py_atomic_load_64bit((ATOMIC_VAL), (ORDER)) : \
512     _Py_atomic_load_32bit((ATOMIC_VAL), (ORDER)) \
513   )
514 #endif
515 #else  /* !gcc x86  !_msc_ver */
516 typedef enum _Py_memory_order {
517     _Py_memory_order_relaxed,
518     _Py_memory_order_acquire,
519     _Py_memory_order_release,
520     _Py_memory_order_acq_rel,
521     _Py_memory_order_seq_cst
522 } _Py_memory_order;
523 
524 typedef struct _Py_atomic_address {
525     uintptr_t _value;
526 } _Py_atomic_address;
527 
528 typedef struct _Py_atomic_int {
529     int _value;
530 } _Py_atomic_int;
531 /* Fall back to other compilers and processors by assuming that simple
532    volatile accesses are atomic.  This is false, so people should port
533    this. */
534 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
535 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
536 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
537     ((ATOMIC_VAL)->_value = NEW_VAL)
538 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
539     ((ATOMIC_VAL)->_value)
540 #endif
541 
542 /* Standardized shortcuts. */
543 #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
544     _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_seq_cst)
545 #define _Py_atomic_load(ATOMIC_VAL) \
546     _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_seq_cst)
547 
548 /* Python-local extensions */
549 
550 #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
551     _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_relaxed)
552 #define _Py_atomic_load_relaxed(ATOMIC_VAL) \
553     _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_relaxed)
554 
555 #ifdef __cplusplus
556 }
557 #endif
558 #endif  /* Py_ATOMIC_H */
559