1 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H 2 #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H 3 4 #define ATOMIC_INIT(...) {__VA_ARGS__} 5 6 typedef enum { 7 atomic_memory_order_relaxed, 8 atomic_memory_order_acquire, 9 atomic_memory_order_release, 10 atomic_memory_order_acq_rel, 11 atomic_memory_order_seq_cst 12 } atomic_memory_order_t; 13 14 ATOMIC_INLINE void 15 atomic_fence(atomic_memory_order_t mo) { 16 /* Easy cases first: no barrier, and full barrier. */ 17 if (mo == atomic_memory_order_relaxed) { 18 asm volatile("" ::: "memory"); 19 return; 20 } 21 if (mo == atomic_memory_order_seq_cst) { 22 asm volatile("" ::: "memory"); 23 __sync_synchronize(); 24 asm volatile("" ::: "memory"); 25 return; 26 } 27 asm volatile("" ::: "memory"); 28 # if defined(__i386__) || defined(__x86_64__) 29 /* This is implicit on x86. */ 30 # elif defined(__ppc64__) 31 asm volatile("lwsync"); 32 # elif defined(__ppc__) 33 asm volatile("sync"); 34 # elif defined(__sparc__) && defined(__arch64__) 35 if (mo == atomic_memory_order_acquire) { 36 asm volatile("membar #LoadLoad | #LoadStore"); 37 } else if (mo == atomic_memory_order_release) { 38 asm volatile("membar #LoadStore | #StoreStore"); 39 } else { 40 asm volatile("membar #LoadLoad | #LoadStore | #StoreStore"); 41 } 42 # else 43 __sync_synchronize(); 44 # endif 45 asm volatile("" ::: "memory"); 46 } 47 48 /* 49 * A correct implementation of seq_cst loads and stores on weakly ordered 50 * architectures could do either of the following: 51 * 1. store() is weak-fence -> store -> strong fence, load() is load -> 52 * strong-fence. 53 * 2. store() is strong-fence -> store, load() is strong-fence -> load -> 54 * weak-fence. 55 * The tricky thing is, load() and store() above can be the load or store 56 * portions of a gcc __sync builtin, so we have to follow GCC's lead, which 57 * means going with strategy 2. 58 * On strongly ordered architectures, the natural strategy is to stick a strong 59 * fence after seq_cst stores, and have naked loads. So we want the strong 60 * fences in different places on different architectures. 61 * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to 62 * accomplish this. 63 */ 64 65 ATOMIC_INLINE void 66 atomic_pre_sc_load_fence() { 67 # if defined(__i386__) || defined(__x86_64__) || \ 68 (defined(__sparc__) && defined(__arch64__)) 69 atomic_fence(atomic_memory_order_relaxed); 70 # else 71 atomic_fence(atomic_memory_order_seq_cst); 72 # endif 73 } 74 75 ATOMIC_INLINE void 76 atomic_post_sc_store_fence() { 77 # if defined(__i386__) || defined(__x86_64__) || \ 78 (defined(__sparc__) && defined(__arch64__)) 79 atomic_fence(atomic_memory_order_seq_cst); 80 # else 81 atomic_fence(atomic_memory_order_relaxed); 82 # endif 83 84 } 85 86 #define JEMALLOC_GENERATE_ATOMICS(type, short_type, \ 87 /* unused */ lg_size) \ 88 typedef struct { \ 89 type volatile repr; \ 90 } atomic_##short_type##_t; \ 91 \ 92 ATOMIC_INLINE type \ 93 atomic_load_##short_type(const atomic_##short_type##_t *a, \ 94 atomic_memory_order_t mo) { \ 95 if (mo == atomic_memory_order_seq_cst) { \ 96 atomic_pre_sc_load_fence(); \ 97 } \ 98 type result = a->repr; \ 99 if (mo != atomic_memory_order_relaxed) { \ 100 atomic_fence(atomic_memory_order_acquire); \ 101 } \ 102 return result; \ 103 } \ 104 \ 105 ATOMIC_INLINE void \ 106 atomic_store_##short_type(atomic_##short_type##_t *a, \ 107 type val, atomic_memory_order_t mo) { \ 108 if (mo != atomic_memory_order_relaxed) { \ 109 atomic_fence(atomic_memory_order_release); \ 110 } \ 111 a->repr = val; \ 112 if (mo == atomic_memory_order_seq_cst) { \ 113 atomic_post_sc_store_fence(); \ 114 } \ 115 } \ 116 \ 117 ATOMIC_INLINE type \ 118 atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ 119 atomic_memory_order_t mo) { \ 120 /* \ 121 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\ 122 * an atomic exchange builtin. We fake it with a CAS loop. \ 123 */ \ 124 while (true) { \ 125 type old = a->repr; \ 126 if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \ 127 return old; \ 128 } \ 129 } \ 130 } \ 131 \ 132 ATOMIC_INLINE bool \ 133 atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ 134 type *expected, type desired, \ 135 atomic_memory_order_t success_mo, \ 136 atomic_memory_order_t failure_mo) { \ 137 type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ 138 desired); \ 139 if (prev == *expected) { \ 140 return true; \ 141 } else { \ 142 *expected = prev; \ 143 return false; \ 144 } \ 145 } \ 146 ATOMIC_INLINE bool \ 147 atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ 148 type *expected, type desired, \ 149 atomic_memory_order_t success_mo, \ 150 atomic_memory_order_t failure_mo) { \ 151 type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ 152 desired); \ 153 if (prev == *expected) { \ 154 return true; \ 155 } else { \ 156 *expected = prev; \ 157 return false; \ 158 } \ 159 } 160 161 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \ 162 /* unused */ lg_size) \ 163 JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \ 164 \ 165 ATOMIC_INLINE type \ 166 atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \ 167 atomic_memory_order_t mo) { \ 168 return __sync_fetch_and_add(&a->repr, val); \ 169 } \ 170 \ 171 ATOMIC_INLINE type \ 172 atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \ 173 atomic_memory_order_t mo) { \ 174 return __sync_fetch_and_sub(&a->repr, val); \ 175 } \ 176 \ 177 ATOMIC_INLINE type \ 178 atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \ 179 atomic_memory_order_t mo) { \ 180 return __sync_fetch_and_and(&a->repr, val); \ 181 } \ 182 \ 183 ATOMIC_INLINE type \ 184 atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \ 185 atomic_memory_order_t mo) { \ 186 return __sync_fetch_and_or(&a->repr, val); \ 187 } \ 188 \ 189 ATOMIC_INLINE type \ 190 atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \ 191 atomic_memory_order_t mo) { \ 192 return __sync_fetch_and_xor(&a->repr, val); \ 193 } 194 195 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */ 196