1 /* 2 * Simple interface for atomic operations. 3 * 4 * Copyright (C) 2013 Red Hat, Inc. 5 * 6 * Author: Paolo Bonzini <pbonzini@redhat.com> 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 * 11 * See docs/atomics.txt for discussion about the guarantees each 12 * atomic primitive is meant to provide. 13 */ 14 15 #ifndef QEMU_ATOMIC_H 16 #define QEMU_ATOMIC_H 17 18 /* Compiler barrier */ 19 #define barrier() ({ asm volatile("" ::: "memory"); (void)0; }) 20 21 #ifdef __ATOMIC_RELAXED 22 /* For C11 atomic ops */ 23 24 /* Manual memory barriers 25 * 26 *__atomic_thread_fence does not include a compiler barrier; instead, 27 * the barrier is part of __atomic_load/__atomic_store's "volatile-like" 28 * semantics. If smp_wmb() is a no-op, absence of the barrier means that 29 * the compiler is free to reorder stores on each side of the barrier. 30 * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends(). 31 */ 32 33 #define smp_mb() ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); barrier(); }) 34 #define smp_wmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); barrier(); }) 35 #define smp_rmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); barrier(); }) 36 37 /* Most compilers currently treat consume and acquire the same, but really 38 * no processors except Alpha need a barrier here. Leave it in if 39 * using Thread Sanitizer to avoid warnings, otherwise optimize it away. 40 */ 41 #if defined(__SANITIZE_THREAD__) 42 #define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); barrier(); }) 43 #elsif defined(__alpha__) 44 #define smp_read_barrier_depends() asm volatile("mb":::"memory") 45 #else 46 #define smp_read_barrier_depends() barrier() 47 #endif 48 49 50 /* Weak atomic operations prevent the compiler moving other 51 * loads/stores past the atomic operation load/store. However there is 52 * no explicit memory barrier for the processor. 53 */ 54 #define atomic_read(ptr) \ 55 ({ \ 56 QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ 57 typeof(*ptr) _val; \ 58 __atomic_load(ptr, &_val, __ATOMIC_RELAXED); \ 59 _val; \ 60 }) 61 62 #define atomic_set(ptr, i) do { \ 63 QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ 64 typeof(*ptr) _val = (i); \ 65 __atomic_store(ptr, &_val, __ATOMIC_RELAXED); \ 66 } while(0) 67 68 /* See above: most compilers currently treat consume and acquire the 69 * same, but this slows down atomic_rcu_read unnecessarily. 70 */ 71 #ifdef __SANITIZE_THREAD__ 72 #define atomic_rcu_read__nocheck(ptr, valptr) \ 73 __atomic_load(ptr, valptr, __ATOMIC_CONSUME); 74 #else 75 #define atomic_rcu_read__nocheck(ptr, valptr) \ 76 __atomic_load(ptr, valptr, __ATOMIC_RELAXED); \ 77 smp_read_barrier_depends(); 78 #endif 79 80 #define atomic_rcu_read(ptr) \ 81 ({ \ 82 QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ 83 typeof(*ptr) _val; \ 84 atomic_rcu_read__nocheck(ptr, &_val); \ 85 _val; \ 86 }) 87 88 #define atomic_rcu_set(ptr, i) do { \ 89 QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ 90 typeof(*ptr) _val = (i); \ 91 __atomic_store(ptr, &_val, __ATOMIC_RELEASE); \ 92 } while(0) 93 94 /* atomic_mb_read/set semantics map Java volatile variables. They are 95 * less expensive on some platforms (notably POWER & ARMv7) than fully 96 * sequentially consistent operations. 97 * 98 * As long as they are used as paired operations they are safe to 99 * use. See docs/atomic.txt for more discussion. 100 */ 101 102 #if defined(_ARCH_PPC) 103 #define atomic_mb_read(ptr) \ 104 ({ \ 105 QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ 106 typeof(*ptr) _val; \ 107 __atomic_load(ptr, &_val, __ATOMIC_RELAXED); \ 108 smp_rmb(); \ 109 _val; \ 110 }) 111 112 #define atomic_mb_set(ptr, i) do { \ 113 QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ 114 typeof(*ptr) _val = (i); \ 115 smp_wmb(); \ 116 __atomic_store(ptr, &_val, __ATOMIC_RELAXED); \ 117 smp_mb(); \ 118 } while(0) 119 #else 120 #define atomic_mb_read(ptr) \ 121 ({ \ 122 QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ 123 typeof(*ptr) _val; \ 124 __atomic_load(ptr, &_val, __ATOMIC_SEQ_CST); \ 125 _val; \ 126 }) 127 128 #define atomic_mb_set(ptr, i) do { \ 129 QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ 130 typeof(*ptr) _val = (i); \ 131 __atomic_store(ptr, &_val, __ATOMIC_SEQ_CST); \ 132 } while(0) 133 #endif 134 135 136 /* All the remaining operations are fully sequentially consistent */ 137 138 #define atomic_xchg(ptr, i) ({ \ 139 QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ 140 typeof(*ptr) _new = (i), _old; \ 141 __atomic_exchange(ptr, &_new, &_old, __ATOMIC_SEQ_CST); \ 142 _old; \ 143 }) 144 145 /* Returns the eventual value, failed or not */ 146 #define atomic_cmpxchg(ptr, old, new) \ 147 ({ \ 148 QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ 149 typeof(*ptr) _old = (old), _new = (new); \ 150 __atomic_compare_exchange(ptr, &_old, &_new, false, \ 151 __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \ 152 _old; \ 153 }) 154 155 /* Provide shorter names for GCC atomic builtins, return old value */ 156 #define atomic_fetch_inc(ptr) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST) 157 #define atomic_fetch_dec(ptr) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST) 158 #define atomic_fetch_add(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST) 159 #define atomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST) 160 #define atomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST) 161 #define atomic_fetch_or(ptr, n) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST) 162 163 /* And even shorter names that return void. */ 164 #define atomic_inc(ptr) ((void) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST)) 165 #define atomic_dec(ptr) ((void) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST)) 166 #define atomic_add(ptr, n) ((void) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST)) 167 #define atomic_sub(ptr, n) ((void) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)) 168 #define atomic_and(ptr, n) ((void) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)) 169 #define atomic_or(ptr, n) ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)) 170 171 #else /* __ATOMIC_RELAXED */ 172 173 /* 174 * We use GCC builtin if it's available, as that can use mfence on 175 * 32-bit as well, e.g. if built with -march=pentium-m. However, on 176 * i386 the spec is buggy, and the implementation followed it until 177 * 4.3 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793). 178 */ 179 #if defined(__i386__) || defined(__x86_64__) 180 #if !QEMU_GNUC_PREREQ(4, 4) 181 #if defined __x86_64__ 182 #define smp_mb() ({ asm volatile("mfence" ::: "memory"); (void)0; }) 183 #else 184 #define smp_mb() ({ asm volatile("lock; addl $0,0(%%esp) " ::: "memory"); (void)0; }) 185 #endif 186 #endif 187 #endif 188 189 190 #ifdef __alpha__ 191 #define smp_read_barrier_depends() asm volatile("mb":::"memory") 192 #endif 193 194 #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) 195 196 /* 197 * Because of the strongly ordered storage model, wmb() and rmb() are nops 198 * here (a compiler barrier only). QEMU doesn't do accesses to write-combining 199 * qemu memory or non-temporal load/stores from C code. 200 */ 201 #define smp_wmb() barrier() 202 #define smp_rmb() barrier() 203 204 /* 205 * __sync_lock_test_and_set() is documented to be an acquire barrier only, 206 * but it is a full barrier at the hardware level. Add a compiler barrier 207 * to make it a full barrier also at the compiler level. 208 */ 209 #define atomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i)) 210 211 /* 212 * Load/store with Java volatile semantics. 213 */ 214 #define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i)) 215 216 #elif defined(_ARCH_PPC) 217 218 /* 219 * We use an eieio() for wmb() on powerpc. This assumes we don't 220 * need to order cacheable and non-cacheable stores with respect to 221 * each other. 222 * 223 * smp_mb has the same problem as on x86 for not-very-new GCC 224 * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011). 225 */ 226 #define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; }) 227 #if defined(__powerpc64__) 228 #define smp_rmb() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) 229 #else 230 #define smp_rmb() ({ asm volatile("sync" ::: "memory"); (void)0; }) 231 #endif 232 #define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; }) 233 234 #endif /* _ARCH_PPC */ 235 236 /* 237 * For (host) platforms we don't have explicit barrier definitions 238 * for, we use the gcc __sync_synchronize() primitive to generate a 239 * full barrier. This should be safe on all platforms, though it may 240 * be overkill for smp_wmb() and smp_rmb(). 241 */ 242 #ifndef smp_mb 243 #define smp_mb() __sync_synchronize() 244 #endif 245 246 #ifndef smp_wmb 247 #define smp_wmb() __sync_synchronize() 248 #endif 249 250 #ifndef smp_rmb 251 #define smp_rmb() __sync_synchronize() 252 #endif 253 254 #ifndef smp_read_barrier_depends 255 #define smp_read_barrier_depends() barrier() 256 #endif 257 258 /* These will only be atomic if the processor does the fetch or store 259 * in a single issue memory operation 260 */ 261 #define atomic_read(ptr) (*(__typeof__(*ptr) volatile*) (ptr)) 262 #define atomic_set(ptr, i) ((*(__typeof__(*ptr) volatile*) (ptr)) = (i)) 263 264 /** 265 * atomic_rcu_read - reads a RCU-protected pointer to a local variable 266 * into a RCU read-side critical section. The pointer can later be safely 267 * dereferenced within the critical section. 268 * 269 * This ensures that the pointer copy is invariant thorough the whole critical 270 * section. 271 * 272 * Inserts memory barriers on architectures that require them (currently only 273 * Alpha) and documents which pointers are protected by RCU. 274 * 275 * atomic_rcu_read also includes a compiler barrier to ensure that 276 * value-speculative optimizations (e.g. VSS: Value Speculation 277 * Scheduling) does not perform the data read before the pointer read 278 * by speculating the value of the pointer. 279 * 280 * Should match atomic_rcu_set(), atomic_xchg(), atomic_cmpxchg(). 281 */ 282 #define atomic_rcu_read(ptr) ({ \ 283 typeof(*ptr) _val = atomic_read(ptr); \ 284 smp_read_barrier_depends(); \ 285 _val; \ 286 }) 287 288 /** 289 * atomic_rcu_set - assigns (publicizes) a pointer to a new data structure 290 * meant to be read by RCU read-side critical sections. 291 * 292 * Documents which pointers will be dereferenced by RCU read-side critical 293 * sections and adds the required memory barriers on architectures requiring 294 * them. It also makes sure the compiler does not reorder code initializing the 295 * data structure before its publication. 296 * 297 * Should match atomic_rcu_read(). 298 */ 299 #define atomic_rcu_set(ptr, i) do { \ 300 smp_wmb(); \ 301 atomic_set(ptr, i); \ 302 } while (0) 303 304 /* These have the same semantics as Java volatile variables. 305 * See http://gee.cs.oswego.edu/dl/jmm/cookbook.html: 306 * "1. Issue a StoreStore barrier (wmb) before each volatile store." 307 * 2. Issue a StoreLoad barrier after each volatile store. 308 * Note that you could instead issue one before each volatile load, but 309 * this would be slower for typical programs using volatiles in which 310 * reads greatly outnumber writes. Alternatively, if available, you 311 * can implement volatile store as an atomic instruction (for example 312 * XCHG on x86) and omit the barrier. This may be more efficient if 313 * atomic instructions are cheaper than StoreLoad barriers. 314 * 3. Issue LoadLoad and LoadStore barriers after each volatile load." 315 * 316 * If you prefer to think in terms of "pairing" of memory barriers, 317 * an atomic_mb_read pairs with an atomic_mb_set. 318 * 319 * And for the few ia64 lovers that exist, an atomic_mb_read is a ld.acq, 320 * while an atomic_mb_set is a st.rel followed by a memory barrier. 321 * 322 * These are a bit weaker than __atomic_load/store with __ATOMIC_SEQ_CST 323 * (see docs/atomics.txt), and I'm not sure that __ATOMIC_ACQ_REL is enough. 324 * Just always use the barriers manually by the rules above. 325 */ 326 #define atomic_mb_read(ptr) ({ \ 327 typeof(*ptr) _val = atomic_read(ptr); \ 328 smp_rmb(); \ 329 _val; \ 330 }) 331 332 #ifndef atomic_mb_set 333 #define atomic_mb_set(ptr, i) do { \ 334 smp_wmb(); \ 335 atomic_set(ptr, i); \ 336 smp_mb(); \ 337 } while (0) 338 #endif 339 340 #ifndef atomic_xchg 341 #if defined(__clang__) 342 #define atomic_xchg(ptr, i) __sync_swap(ptr, i) 343 #else 344 /* __sync_lock_test_and_set() is documented to be an acquire barrier only. */ 345 #define atomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i)) 346 #endif 347 #endif 348 349 /* Provide shorter names for GCC atomic builtins. */ 350 #define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1) 351 #define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1) 352 #define atomic_fetch_add __sync_fetch_and_add 353 #define atomic_fetch_sub __sync_fetch_and_sub 354 #define atomic_fetch_and __sync_fetch_and_and 355 #define atomic_fetch_or __sync_fetch_and_or 356 #define atomic_cmpxchg __sync_val_compare_and_swap 357 358 /* And even shorter names that return void. */ 359 #define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) 360 #define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1)) 361 #define atomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n)) 362 #define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n)) 363 #define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n)) 364 #define atomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n)) 365 366 #endif /* __ATOMIC_RELAXED */ 367 #endif /* QEMU_ATOMIC_H */ 368