1 /* 2 * Simple interface for atomic operations. 3 * 4 * Copyright (C) 2013 Red Hat, Inc. 5 * 6 * Author: Paolo Bonzini <pbonzini@redhat.com> 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 * 11 * See docs/atomics.txt for discussion about the guarantees each 12 * atomic primitive is meant to provide. 13 */ 14 15 #ifndef __QEMU_ATOMIC_H 16 #define __QEMU_ATOMIC_H 1 17 18 #include "qemu/compiler.h" 19 20 21 /* Compiler barrier */ 22 #define barrier() ({ asm volatile("" ::: "memory"); (void)0; }) 23 24 #ifdef __ATOMIC_RELAXED 25 /* For C11 atomic ops */ 26 27 /* Manual memory barriers 28 * 29 *__atomic_thread_fence does not include a compiler barrier; instead, 30 * the barrier is part of __atomic_load/__atomic_store's "volatile-like" 31 * semantics. If smp_wmb() is a no-op, absence of the barrier means that 32 * the compiler is free to reorder stores on each side of the barrier. 33 * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends(). 34 */ 35 36 #define smp_mb() ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); barrier(); }) 37 #define smp_wmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); barrier(); }) 38 #define smp_rmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); barrier(); }) 39 40 #define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); barrier(); }) 41 42 /* Weak atomic operations prevent the compiler moving other 43 * loads/stores past the atomic operation load/store. However there is 44 * no explicit memory barrier for the processor. 45 */ 46 #define atomic_read(ptr) \ 47 ({ \ 48 typeof(*ptr) _val; \ 49 __atomic_load(ptr, &_val, __ATOMIC_RELAXED); \ 50 _val; \ 51 }) 52 53 #define atomic_set(ptr, i) do { \ 54 typeof(*ptr) _val = (i); \ 55 __atomic_store(ptr, &_val, __ATOMIC_RELAXED); \ 56 } while(0) 57 58 /* Atomic RCU operations imply weak memory barriers */ 59 60 #define atomic_rcu_read(ptr) \ 61 ({ \ 62 typeof(*ptr) _val; \ 63 __atomic_load(ptr, &_val, __ATOMIC_CONSUME); \ 64 _val; \ 65 }) 66 67 #define atomic_rcu_set(ptr, i) do { \ 68 typeof(*ptr) _val = (i); \ 69 __atomic_store(ptr, &_val, __ATOMIC_RELEASE); \ 70 } while(0) 71 72 /* atomic_mb_read/set semantics map Java volatile variables. They are 73 * less expensive on some platforms (notably POWER & ARMv7) than fully 74 * sequentially consistent operations. 75 * 76 * As long as they are used as paired operations they are safe to 77 * use. See docs/atomic.txt for more discussion. 78 */ 79 80 #if defined(_ARCH_PPC) 81 #define atomic_mb_read(ptr) \ 82 ({ \ 83 typeof(*ptr) _val; \ 84 __atomic_load(ptr, &_val, __ATOMIC_RELAXED); \ 85 smp_rmb(); \ 86 _val; \ 87 }) 88 89 #define atomic_mb_set(ptr, i) do { \ 90 typeof(*ptr) _val = (i); \ 91 smp_wmb(); \ 92 __atomic_store(ptr, &_val, __ATOMIC_RELAXED); \ 93 smp_mb(); \ 94 } while(0) 95 #else 96 #define atomic_mb_read(ptr) \ 97 ({ \ 98 typeof(*ptr) _val; \ 99 __atomic_load(ptr, &_val, __ATOMIC_SEQ_CST); \ 100 _val; \ 101 }) 102 103 #define atomic_mb_set(ptr, i) do { \ 104 typeof(*ptr) _val = (i); \ 105 __atomic_store(ptr, &_val, __ATOMIC_SEQ_CST); \ 106 } while(0) 107 #endif 108 109 110 /* All the remaining operations are fully sequentially consistent */ 111 112 #define atomic_xchg(ptr, i) ({ \ 113 typeof(*ptr) _new = (i), _old; \ 114 __atomic_exchange(ptr, &_new, &_old, __ATOMIC_SEQ_CST); \ 115 _old; \ 116 }) 117 118 /* Returns the eventual value, failed or not */ 119 #define atomic_cmpxchg(ptr, old, new) \ 120 ({ \ 121 typeof(*ptr) _old = (old), _new = (new); \ 122 __atomic_compare_exchange(ptr, &_old, &_new, false, \ 123 __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \ 124 _old; \ 125 }) 126 127 /* Provide shorter names for GCC atomic builtins, return old value */ 128 #define atomic_fetch_inc(ptr) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST) 129 #define atomic_fetch_dec(ptr) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST) 130 #define atomic_fetch_add(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST) 131 #define atomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST) 132 #define atomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST) 133 #define atomic_fetch_or(ptr, n) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST) 134 135 /* And even shorter names that return void. */ 136 #define atomic_inc(ptr) ((void) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST)) 137 #define atomic_dec(ptr) ((void) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST)) 138 #define atomic_add(ptr, n) ((void) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST)) 139 #define atomic_sub(ptr, n) ((void) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)) 140 #define atomic_and(ptr, n) ((void) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)) 141 #define atomic_or(ptr, n) ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)) 142 143 #else /* __ATOMIC_RELAXED */ 144 145 /* 146 * We use GCC builtin if it's available, as that can use mfence on 147 * 32-bit as well, e.g. if built with -march=pentium-m. However, on 148 * i386 the spec is buggy, and the implementation followed it until 149 * 4.3 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793). 150 */ 151 #if defined(__i386__) || defined(__x86_64__) 152 #if !QEMU_GNUC_PREREQ(4, 4) 153 #if defined __x86_64__ 154 #define smp_mb() ({ asm volatile("mfence" ::: "memory"); (void)0; }) 155 #else 156 #define smp_mb() ({ asm volatile("lock; addl $0,0(%%esp) " ::: "memory"); (void)0; }) 157 #endif 158 #endif 159 #endif 160 161 162 #ifdef __alpha__ 163 #define smp_read_barrier_depends() asm volatile("mb":::"memory") 164 #endif 165 166 #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) 167 168 /* 169 * Because of the strongly ordered storage model, wmb() and rmb() are nops 170 * here (a compiler barrier only). QEMU doesn't do accesses to write-combining 171 * qemu memory or non-temporal load/stores from C code. 172 */ 173 #define smp_wmb() barrier() 174 #define smp_rmb() barrier() 175 176 /* 177 * __sync_lock_test_and_set() is documented to be an acquire barrier only, 178 * but it is a full barrier at the hardware level. Add a compiler barrier 179 * to make it a full barrier also at the compiler level. 180 */ 181 #define atomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i)) 182 183 /* 184 * Load/store with Java volatile semantics. 185 */ 186 #define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i)) 187 188 #elif defined(_ARCH_PPC) 189 190 /* 191 * We use an eieio() for wmb() on powerpc. This assumes we don't 192 * need to order cacheable and non-cacheable stores with respect to 193 * each other. 194 * 195 * smp_mb has the same problem as on x86 for not-very-new GCC 196 * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011). 197 */ 198 #define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; }) 199 #if defined(__powerpc64__) 200 #define smp_rmb() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) 201 #else 202 #define smp_rmb() ({ asm volatile("sync" ::: "memory"); (void)0; }) 203 #endif 204 #define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; }) 205 206 #endif /* _ARCH_PPC */ 207 208 /* 209 * For (host) platforms we don't have explicit barrier definitions 210 * for, we use the gcc __sync_synchronize() primitive to generate a 211 * full barrier. This should be safe on all platforms, though it may 212 * be overkill for smp_wmb() and smp_rmb(). 213 */ 214 #ifndef smp_mb 215 #define smp_mb() __sync_synchronize() 216 #endif 217 218 #ifndef smp_wmb 219 #define smp_wmb() __sync_synchronize() 220 #endif 221 222 #ifndef smp_rmb 223 #define smp_rmb() __sync_synchronize() 224 #endif 225 226 #ifndef smp_read_barrier_depends 227 #define smp_read_barrier_depends() barrier() 228 #endif 229 230 /* These will only be atomic if the processor does the fetch or store 231 * in a single issue memory operation 232 */ 233 #define atomic_read(ptr) (*(__typeof__(*ptr) volatile*) (ptr)) 234 #define atomic_set(ptr, i) ((*(__typeof__(*ptr) volatile*) (ptr)) = (i)) 235 236 /** 237 * atomic_rcu_read - reads a RCU-protected pointer to a local variable 238 * into a RCU read-side critical section. The pointer can later be safely 239 * dereferenced within the critical section. 240 * 241 * This ensures that the pointer copy is invariant thorough the whole critical 242 * section. 243 * 244 * Inserts memory barriers on architectures that require them (currently only 245 * Alpha) and documents which pointers are protected by RCU. 246 * 247 * atomic_rcu_read also includes a compiler barrier to ensure that 248 * value-speculative optimizations (e.g. VSS: Value Speculation 249 * Scheduling) does not perform the data read before the pointer read 250 * by speculating the value of the pointer. 251 * 252 * Should match atomic_rcu_set(), atomic_xchg(), atomic_cmpxchg(). 253 */ 254 #define atomic_rcu_read(ptr) ({ \ 255 typeof(*ptr) _val = atomic_read(ptr); \ 256 smp_read_barrier_depends(); \ 257 _val; \ 258 }) 259 260 /** 261 * atomic_rcu_set - assigns (publicizes) a pointer to a new data structure 262 * meant to be read by RCU read-side critical sections. 263 * 264 * Documents which pointers will be dereferenced by RCU read-side critical 265 * sections and adds the required memory barriers on architectures requiring 266 * them. It also makes sure the compiler does not reorder code initializing the 267 * data structure before its publication. 268 * 269 * Should match atomic_rcu_read(). 270 */ 271 #define atomic_rcu_set(ptr, i) do { \ 272 smp_wmb(); \ 273 atomic_set(ptr, i); \ 274 } while (0) 275 276 /* These have the same semantics as Java volatile variables. 277 * See http://gee.cs.oswego.edu/dl/jmm/cookbook.html: 278 * "1. Issue a StoreStore barrier (wmb) before each volatile store." 279 * 2. Issue a StoreLoad barrier after each volatile store. 280 * Note that you could instead issue one before each volatile load, but 281 * this would be slower for typical programs using volatiles in which 282 * reads greatly outnumber writes. Alternatively, if available, you 283 * can implement volatile store as an atomic instruction (for example 284 * XCHG on x86) and omit the barrier. This may be more efficient if 285 * atomic instructions are cheaper than StoreLoad barriers. 286 * 3. Issue LoadLoad and LoadStore barriers after each volatile load." 287 * 288 * If you prefer to think in terms of "pairing" of memory barriers, 289 * an atomic_mb_read pairs with an atomic_mb_set. 290 * 291 * And for the few ia64 lovers that exist, an atomic_mb_read is a ld.acq, 292 * while an atomic_mb_set is a st.rel followed by a memory barrier. 293 * 294 * These are a bit weaker than __atomic_load/store with __ATOMIC_SEQ_CST 295 * (see docs/atomics.txt), and I'm not sure that __ATOMIC_ACQ_REL is enough. 296 * Just always use the barriers manually by the rules above. 297 */ 298 #define atomic_mb_read(ptr) ({ \ 299 typeof(*ptr) _val = atomic_read(ptr); \ 300 smp_rmb(); \ 301 _val; \ 302 }) 303 304 #ifndef atomic_mb_set 305 #define atomic_mb_set(ptr, i) do { \ 306 smp_wmb(); \ 307 atomic_set(ptr, i); \ 308 smp_mb(); \ 309 } while (0) 310 #endif 311 312 #ifndef atomic_xchg 313 #if defined(__clang__) 314 #define atomic_xchg(ptr, i) __sync_swap(ptr, i) 315 #else 316 /* __sync_lock_test_and_set() is documented to be an acquire barrier only. */ 317 #define atomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i)) 318 #endif 319 #endif 320 321 /* Provide shorter names for GCC atomic builtins. */ 322 #define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1) 323 #define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1) 324 #define atomic_fetch_add __sync_fetch_and_add 325 #define atomic_fetch_sub __sync_fetch_and_sub 326 #define atomic_fetch_and __sync_fetch_and_and 327 #define atomic_fetch_or __sync_fetch_and_or 328 #define atomic_cmpxchg __sync_val_compare_and_swap 329 330 /* And even shorter names that return void. */ 331 #define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) 332 #define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1)) 333 #define atomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n)) 334 #define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n)) 335 #define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n)) 336 #define atomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n)) 337 338 #endif /* __ATOMIC_RELAXED */ 339 #endif /* __QEMU_ATOMIC_H */ 340