xref: /qemu/include/qemu/atomic.h (revision 814bb12a)
1 /*
2  * Simple interface for atomic operations.
3  *
4  * Copyright (C) 2013 Red Hat, Inc.
5  *
6  * Author: Paolo Bonzini <pbonzini@redhat.com>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  *
11  * See docs/atomics.txt for discussion about the guarantees each
12  * atomic primitive is meant to provide.
13  */
14 
15 #ifndef QEMU_ATOMIC_H
16 #define QEMU_ATOMIC_H
17 
18 /* Compiler barrier */
19 #define barrier()   ({ asm volatile("" ::: "memory"); (void)0; })
20 
21 /* The variable that receives the old value of an atomically-accessed
22  * variable must be non-qualified, because atomic builtins return values
23  * through a pointer-type argument as in __atomic_load(&var, &old, MODEL).
24  *
25  * This macro has to handle types smaller than int manually, because of
26  * implicit promotion.  int and larger types, as well as pointers, can be
27  * converted to a non-qualified type just by applying a binary operator.
28  */
29 #define typeof_strip_qual(expr)                                                    \
30   typeof(                                                                          \
31     __builtin_choose_expr(                                                         \
32       __builtin_types_compatible_p(typeof(expr), bool) ||                          \
33         __builtin_types_compatible_p(typeof(expr), const bool) ||                  \
34         __builtin_types_compatible_p(typeof(expr), volatile bool) ||               \
35         __builtin_types_compatible_p(typeof(expr), const volatile bool),           \
36         (bool)1,                                                                   \
37     __builtin_choose_expr(                                                         \
38       __builtin_types_compatible_p(typeof(expr), signed char) ||                   \
39         __builtin_types_compatible_p(typeof(expr), const signed char) ||           \
40         __builtin_types_compatible_p(typeof(expr), volatile signed char) ||        \
41         __builtin_types_compatible_p(typeof(expr), const volatile signed char),    \
42         (signed char)1,                                                            \
43     __builtin_choose_expr(                                                         \
44       __builtin_types_compatible_p(typeof(expr), unsigned char) ||                 \
45         __builtin_types_compatible_p(typeof(expr), const unsigned char) ||         \
46         __builtin_types_compatible_p(typeof(expr), volatile unsigned char) ||      \
47         __builtin_types_compatible_p(typeof(expr), const volatile unsigned char),  \
48         (unsigned char)1,                                                          \
49     __builtin_choose_expr(                                                         \
50       __builtin_types_compatible_p(typeof(expr), signed short) ||                  \
51         __builtin_types_compatible_p(typeof(expr), const signed short) ||          \
52         __builtin_types_compatible_p(typeof(expr), volatile signed short) ||       \
53         __builtin_types_compatible_p(typeof(expr), const volatile signed short),   \
54         (signed short)1,                                                           \
55     __builtin_choose_expr(                                                         \
56       __builtin_types_compatible_p(typeof(expr), unsigned short) ||                \
57         __builtin_types_compatible_p(typeof(expr), const unsigned short) ||        \
58         __builtin_types_compatible_p(typeof(expr), volatile unsigned short) ||     \
59         __builtin_types_compatible_p(typeof(expr), const volatile unsigned short), \
60         (unsigned short)1,                                                         \
61       (expr)+0))))))
62 
63 #ifdef __ATOMIC_RELAXED
64 /* For C11 atomic ops */
65 
66 /* Manual memory barriers
67  *
68  *__atomic_thread_fence does not include a compiler barrier; instead,
69  * the barrier is part of __atomic_load/__atomic_store's "volatile-like"
70  * semantics. If smp_wmb() is a no-op, absence of the barrier means that
71  * the compiler is free to reorder stores on each side of the barrier.
72  * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends().
73  */
74 
75 #define smp_mb()                     ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); })
76 #define smp_mb_release()             ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); })
77 #define smp_mb_acquire()             ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); })
78 
79 /* Most compilers currently treat consume and acquire the same, but really
80  * no processors except Alpha need a barrier here.  Leave it in if
81  * using Thread Sanitizer to avoid warnings, otherwise optimize it away.
82  */
83 #if defined(__SANITIZE_THREAD__)
84 #define smp_read_barrier_depends()   ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); })
85 #elif defined(__alpha__)
86 #define smp_read_barrier_depends()   asm volatile("mb":::"memory")
87 #else
88 #define smp_read_barrier_depends()   barrier()
89 #endif
90 
91 
92 /* Weak atomic operations prevent the compiler moving other
93  * loads/stores past the atomic operation load/store. However there is
94  * no explicit memory barrier for the processor.
95  *
96  * The C11 memory model says that variables that are accessed from
97  * different threads should at least be done with __ATOMIC_RELAXED
98  * primitives or the result is undefined. Generally this has little to
99  * no effect on the generated code but not using the atomic primitives
100  * will get flagged by sanitizers as a violation.
101  */
102 #define atomic_read__nocheck(ptr) \
103     __atomic_load_n(ptr, __ATOMIC_RELAXED)
104 
105 #define atomic_read(ptr)                              \
106     ({                                                \
107     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
108     atomic_read__nocheck(ptr);                        \
109     })
110 
111 #define atomic_set__nocheck(ptr, i) \
112     __atomic_store_n(ptr, i, __ATOMIC_RELAXED)
113 
114 #define atomic_set(ptr, i)  do {                      \
115     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
116     atomic_set__nocheck(ptr, i);                      \
117 } while(0)
118 
119 /* See above: most compilers currently treat consume and acquire the
120  * same, but this slows down atomic_rcu_read unnecessarily.
121  */
122 #ifdef __SANITIZE_THREAD__
123 #define atomic_rcu_read__nocheck(ptr, valptr)           \
124     __atomic_load(ptr, valptr, __ATOMIC_CONSUME);
125 #else
126 #define atomic_rcu_read__nocheck(ptr, valptr)           \
127     __atomic_load(ptr, valptr, __ATOMIC_RELAXED);       \
128     smp_read_barrier_depends();
129 #endif
130 
131 #define atomic_rcu_read(ptr)                          \
132     ({                                                \
133     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
134     typeof_strip_qual(*ptr) _val;                     \
135     atomic_rcu_read__nocheck(ptr, &_val);             \
136     _val;                                             \
137     })
138 
139 #define atomic_rcu_set(ptr, i) do {                   \
140     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
141     __atomic_store_n(ptr, i, __ATOMIC_RELEASE);       \
142 } while(0)
143 
144 #define atomic_load_acquire(ptr)                        \
145     ({                                                  \
146     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));   \
147     typeof_strip_qual(*ptr) _val;                       \
148     __atomic_load(ptr, &_val, __ATOMIC_ACQUIRE);        \
149     _val;                                               \
150     })
151 
152 #define atomic_store_release(ptr, i)  do {              \
153     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));   \
154     __atomic_store_n(ptr, i, __ATOMIC_RELEASE);         \
155 } while(0)
156 
157 
158 /* All the remaining operations are fully sequentially consistent */
159 
160 #define atomic_xchg__nocheck(ptr, i)    ({                  \
161     __atomic_exchange_n(ptr, (i), __ATOMIC_SEQ_CST);        \
162 })
163 
164 #define atomic_xchg(ptr, i)    ({                           \
165     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));       \
166     atomic_xchg__nocheck(ptr, i);                           \
167 })
168 
169 /* Returns the eventual value, failed or not */
170 #define atomic_cmpxchg__nocheck(ptr, old, new)    ({                    \
171     typeof_strip_qual(*ptr) _old = (old);                               \
172     __atomic_compare_exchange_n(ptr, &_old, new, false,                 \
173                               __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);      \
174     _old;                                                               \
175 })
176 
177 #define atomic_cmpxchg(ptr, old, new)    ({                             \
178     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));                   \
179     atomic_cmpxchg__nocheck(ptr, old, new);                             \
180 })
181 
182 /* Provide shorter names for GCC atomic builtins, return old value */
183 #define atomic_fetch_inc(ptr)  __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST)
184 #define atomic_fetch_dec(ptr)  __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST)
185 #define atomic_fetch_add(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST)
186 #define atomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)
187 #define atomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)
188 #define atomic_fetch_or(ptr, n)  __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)
189 #define atomic_fetch_xor(ptr, n) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST)
190 
191 #define atomic_inc_fetch(ptr)    __atomic_add_fetch(ptr, 1, __ATOMIC_SEQ_CST)
192 #define atomic_dec_fetch(ptr)    __atomic_sub_fetch(ptr, 1, __ATOMIC_SEQ_CST)
193 #define atomic_add_fetch(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_SEQ_CST)
194 #define atomic_sub_fetch(ptr, n) __atomic_sub_fetch(ptr, n, __ATOMIC_SEQ_CST)
195 #define atomic_and_fetch(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_SEQ_CST)
196 #define atomic_or_fetch(ptr, n)  __atomic_or_fetch(ptr, n, __ATOMIC_SEQ_CST)
197 #define atomic_xor_fetch(ptr, n) __atomic_xor_fetch(ptr, n, __ATOMIC_SEQ_CST)
198 
199 /* And even shorter names that return void.  */
200 #define atomic_inc(ptr)    ((void) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST))
201 #define atomic_dec(ptr)    ((void) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST))
202 #define atomic_add(ptr, n) ((void) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST))
203 #define atomic_sub(ptr, n) ((void) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST))
204 #define atomic_and(ptr, n) ((void) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST))
205 #define atomic_or(ptr, n)  ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST))
206 #define atomic_xor(ptr, n) ((void) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST))
207 
208 #else /* __ATOMIC_RELAXED */
209 
210 /*
211  * We use GCC builtin if it's available, as that can use mfence on
212  * 32-bit as well, e.g. if built with -march=pentium-m. However, on
213  * i386 the spec is buggy, and the implementation followed it until
214  * 4.3 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793).
215  */
216 #if defined(__i386__) || defined(__x86_64__)
217 #if !QEMU_GNUC_PREREQ(4, 4)
218 #if defined __x86_64__
219 #define smp_mb()    ({ asm volatile("mfence" ::: "memory"); (void)0; })
220 #else
221 #define smp_mb()    ({ asm volatile("lock; addl $0,0(%%esp) " ::: "memory"); (void)0; })
222 #endif
223 #endif
224 #endif
225 
226 
227 #ifdef __alpha__
228 #define smp_read_barrier_depends()   asm volatile("mb":::"memory")
229 #endif
230 
231 #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
232 
233 /*
234  * Because of the strongly ordered storage model, wmb() and rmb() are nops
235  * here (a compiler barrier only).  QEMU doesn't do accesses to write-combining
236  * qemu memory or non-temporal load/stores from C code.
237  */
238 #define smp_mb_release()   barrier()
239 #define smp_mb_acquire()   barrier()
240 
241 /*
242  * __sync_lock_test_and_set() is documented to be an acquire barrier only,
243  * but it is a full barrier at the hardware level.  Add a compiler barrier
244  * to make it a full barrier also at the compiler level.
245  */
246 #define atomic_xchg(ptr, i)    (barrier(), __sync_lock_test_and_set(ptr, i))
247 
248 #elif defined(_ARCH_PPC)
249 
250 /*
251  * We use an eieio() for wmb() on powerpc.  This assumes we don't
252  * need to order cacheable and non-cacheable stores with respect to
253  * each other.
254  *
255  * smp_mb has the same problem as on x86 for not-very-new GCC
256  * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011).
257  */
258 #define smp_wmb()          ({ asm volatile("eieio" ::: "memory"); (void)0; })
259 #if defined(__powerpc64__)
260 #define smp_mb_release()   ({ asm volatile("lwsync" ::: "memory"); (void)0; })
261 #define smp_mb_acquire()   ({ asm volatile("lwsync" ::: "memory"); (void)0; })
262 #else
263 #define smp_mb_release()   ({ asm volatile("sync" ::: "memory"); (void)0; })
264 #define smp_mb_acquire()   ({ asm volatile("sync" ::: "memory"); (void)0; })
265 #endif
266 #define smp_mb()           ({ asm volatile("sync" ::: "memory"); (void)0; })
267 
268 #endif /* _ARCH_PPC */
269 
270 /*
271  * For (host) platforms we don't have explicit barrier definitions
272  * for, we use the gcc __sync_synchronize() primitive to generate a
273  * full barrier.  This should be safe on all platforms, though it may
274  * be overkill for smp_mb_acquire() and smp_mb_release().
275  */
276 #ifndef smp_mb
277 #define smp_mb()           __sync_synchronize()
278 #endif
279 
280 #ifndef smp_mb_acquire
281 #define smp_mb_acquire()   __sync_synchronize()
282 #endif
283 
284 #ifndef smp_mb_release
285 #define smp_mb_release()   __sync_synchronize()
286 #endif
287 
288 #ifndef smp_read_barrier_depends
289 #define smp_read_barrier_depends()   barrier()
290 #endif
291 
292 /* These will only be atomic if the processor does the fetch or store
293  * in a single issue memory operation
294  */
295 #define atomic_read__nocheck(p)   (*(__typeof__(*(p)) volatile*) (p))
296 #define atomic_set__nocheck(p, i) ((*(__typeof__(*(p)) volatile*) (p)) = (i))
297 
298 #define atomic_read(ptr)       atomic_read__nocheck(ptr)
299 #define atomic_set(ptr, i)     atomic_set__nocheck(ptr,i)
300 
301 /**
302  * atomic_rcu_read - reads a RCU-protected pointer to a local variable
303  * into a RCU read-side critical section. The pointer can later be safely
304  * dereferenced within the critical section.
305  *
306  * This ensures that the pointer copy is invariant thorough the whole critical
307  * section.
308  *
309  * Inserts memory barriers on architectures that require them (currently only
310  * Alpha) and documents which pointers are protected by RCU.
311  *
312  * atomic_rcu_read also includes a compiler barrier to ensure that
313  * value-speculative optimizations (e.g. VSS: Value Speculation
314  * Scheduling) does not perform the data read before the pointer read
315  * by speculating the value of the pointer.
316  *
317  * Should match atomic_rcu_set(), atomic_xchg(), atomic_cmpxchg().
318  */
319 #define atomic_rcu_read(ptr)    ({                \
320     typeof(*ptr) _val = atomic_read(ptr);         \
321     smp_read_barrier_depends();                   \
322     _val;                                         \
323 })
324 
325 /**
326  * atomic_rcu_set - assigns (publicizes) a pointer to a new data structure
327  * meant to be read by RCU read-side critical sections.
328  *
329  * Documents which pointers will be dereferenced by RCU read-side critical
330  * sections and adds the required memory barriers on architectures requiring
331  * them. It also makes sure the compiler does not reorder code initializing the
332  * data structure before its publication.
333  *
334  * Should match atomic_rcu_read().
335  */
336 #define atomic_rcu_set(ptr, i)  do {              \
337     smp_wmb();                                    \
338     atomic_set(ptr, i);                           \
339 } while (0)
340 
341 #define atomic_load_acquire(ptr)    ({      \
342     typeof(*ptr) _val = atomic_read(ptr);   \
343     smp_mb_acquire();                       \
344     _val;                                   \
345 })
346 
347 #define atomic_store_release(ptr, i)  do {  \
348     smp_mb_release();                       \
349     atomic_set(ptr, i);                     \
350 } while (0)
351 
352 #ifndef atomic_xchg
353 #if defined(__clang__)
354 #define atomic_xchg(ptr, i)    __sync_swap(ptr, i)
355 #else
356 /* __sync_lock_test_and_set() is documented to be an acquire barrier only.  */
357 #define atomic_xchg(ptr, i)    (smp_mb(), __sync_lock_test_and_set(ptr, i))
358 #endif
359 #endif
360 #define atomic_xchg__nocheck  atomic_xchg
361 
362 /* Provide shorter names for GCC atomic builtins.  */
363 #define atomic_fetch_inc(ptr)  __sync_fetch_and_add(ptr, 1)
364 #define atomic_fetch_dec(ptr)  __sync_fetch_and_add(ptr, -1)
365 #define atomic_fetch_add(ptr, n) __sync_fetch_and_add(ptr, n)
366 #define atomic_fetch_sub(ptr, n) __sync_fetch_and_sub(ptr, n)
367 #define atomic_fetch_and(ptr, n) __sync_fetch_and_and(ptr, n)
368 #define atomic_fetch_or(ptr, n) __sync_fetch_and_or(ptr, n)
369 #define atomic_fetch_xor(ptr, n) __sync_fetch_and_xor(ptr, n)
370 
371 #define atomic_inc_fetch(ptr)  __sync_add_and_fetch(ptr, 1)
372 #define atomic_dec_fetch(ptr)  __sync_add_and_fetch(ptr, -1)
373 #define atomic_add_fetch(ptr, n) __sync_add_and_fetch(ptr, n)
374 #define atomic_sub_fetch(ptr, n) __sync_sub_and_fetch(ptr, n)
375 #define atomic_and_fetch(ptr, n) __sync_and_and_fetch(ptr, n)
376 #define atomic_or_fetch(ptr, n) __sync_or_and_fetch(ptr, n)
377 #define atomic_xor_fetch(ptr, n) __sync_xor_and_fetch(ptr, n)
378 
379 #define atomic_cmpxchg(ptr, old, new) __sync_val_compare_and_swap(ptr, old, new)
380 #define atomic_cmpxchg__nocheck(ptr, old, new)  atomic_cmpxchg(ptr, old, new)
381 
382 /* And even shorter names that return void.  */
383 #define atomic_inc(ptr)        ((void) __sync_fetch_and_add(ptr, 1))
384 #define atomic_dec(ptr)        ((void) __sync_fetch_and_add(ptr, -1))
385 #define atomic_add(ptr, n)     ((void) __sync_fetch_and_add(ptr, n))
386 #define atomic_sub(ptr, n)     ((void) __sync_fetch_and_sub(ptr, n))
387 #define atomic_and(ptr, n)     ((void) __sync_fetch_and_and(ptr, n))
388 #define atomic_or(ptr, n)      ((void) __sync_fetch_and_or(ptr, n))
389 #define atomic_xor(ptr, n)     ((void) __sync_fetch_and_xor(ptr, n))
390 
391 #endif /* __ATOMIC_RELAXED */
392 
393 #ifndef smp_wmb
394 #define smp_wmb()   smp_mb_release()
395 #endif
396 #ifndef smp_rmb
397 #define smp_rmb()   smp_mb_acquire()
398 #endif
399 
400 /* This is more efficient than a store plus a fence.  */
401 #if !defined(__SANITIZE_THREAD__)
402 #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
403 #define atomic_mb_set(ptr, i)  ((void)atomic_xchg(ptr, i))
404 #endif
405 #endif
406 
407 /* atomic_mb_read/set semantics map Java volatile variables. They are
408  * less expensive on some platforms (notably POWER) than fully
409  * sequentially consistent operations.
410  *
411  * As long as they are used as paired operations they are safe to
412  * use. See docs/atomic.txt for more discussion.
413  */
414 
415 #ifndef atomic_mb_read
416 #define atomic_mb_read(ptr)                             \
417     atomic_load_acquire(ptr)
418 #endif
419 
420 #ifndef atomic_mb_set
421 #define atomic_mb_set(ptr, i)  do {                     \
422     atomic_store_release(ptr, i);                       \
423     smp_mb();                                           \
424 } while(0)
425 #endif
426 
427 #endif /* QEMU_ATOMIC_H */
428