1 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H 2 #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H 3 4 #define ATOMIC_INIT(...) {__VA_ARGS__} 5 6 typedef enum { 7 atomic_memory_order_relaxed, 8 atomic_memory_order_acquire, 9 atomic_memory_order_release, 10 atomic_memory_order_acq_rel, 11 atomic_memory_order_seq_cst 12 } atomic_memory_order_t; 13 14 ATOMIC_INLINE void 15 atomic_fence(atomic_memory_order_t mo) { 16 /* Easy cases first: no barrier, and full barrier. */ 17 if (mo == atomic_memory_order_relaxed) { 18 asm volatile("" ::: "memory"); 19 return; 20 } 21 if (mo == atomic_memory_order_seq_cst) { 22 asm volatile("" ::: "memory"); 23 __sync_synchronize(); 24 asm volatile("" ::: "memory"); 25 return; 26 } 27 asm volatile("" ::: "memory"); 28 # if defined(__i386__) || defined(__x86_64__) 29 /* This is implicit on x86. */ 30 # elif defined(__ppc__) 31 asm volatile("lwsync"); 32 # elif defined(__sparc__) && defined(__arch64__) 33 if (mo == atomic_memory_order_acquire) { 34 asm volatile("membar #LoadLoad | #LoadStore"); 35 } else if (mo == atomic_memory_order_release) { 36 asm volatile("membar #LoadStore | #StoreStore"); 37 } else { 38 asm volatile("membar #LoadLoad | #LoadStore | #StoreStore"); 39 } 40 # else 41 __sync_synchronize(); 42 # endif 43 asm volatile("" ::: "memory"); 44 } 45 46 /* 47 * A correct implementation of seq_cst loads and stores on weakly ordered 48 * architectures could do either of the following: 49 * 1. store() is weak-fence -> store -> strong fence, load() is load -> 50 * strong-fence. 51 * 2. store() is strong-fence -> store, load() is strong-fence -> load -> 52 * weak-fence. 53 * The tricky thing is, load() and store() above can be the load or store 54 * portions of a gcc __sync builtin, so we have to follow GCC's lead, which 55 * means going with strategy 2. 56 * On strongly ordered architectures, the natural strategy is to stick a strong 57 * fence after seq_cst stores, and have naked loads. So we want the strong 58 * fences in different places on different architectures. 59 * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to 60 * accomplish this. 61 */ 62 63 ATOMIC_INLINE void 64 atomic_pre_sc_load_fence() { 65 # if defined(__i386__) || defined(__x86_64__) || \ 66 (defined(__sparc__) && defined(__arch64__)) 67 atomic_fence(atomic_memory_order_relaxed); 68 # else 69 atomic_fence(atomic_memory_order_seq_cst); 70 # endif 71 } 72 73 ATOMIC_INLINE void 74 atomic_post_sc_store_fence() { 75 # if defined(__i386__) || defined(__x86_64__) || \ 76 (defined(__sparc__) && defined(__arch64__)) 77 atomic_fence(atomic_memory_order_seq_cst); 78 # else 79 atomic_fence(atomic_memory_order_relaxed); 80 # endif 81 82 } 83 84 #define JEMALLOC_GENERATE_ATOMICS(type, short_type, \ 85 /* unused */ lg_size) \ 86 typedef struct { \ 87 type volatile repr; \ 88 } atomic_##short_type##_t; \ 89 \ 90 ATOMIC_INLINE type \ 91 atomic_load_##short_type(const atomic_##short_type##_t *a, \ 92 atomic_memory_order_t mo) { \ 93 if (mo == atomic_memory_order_seq_cst) { \ 94 atomic_pre_sc_load_fence(); \ 95 } \ 96 type result = a->repr; \ 97 if (mo != atomic_memory_order_relaxed) { \ 98 atomic_fence(atomic_memory_order_acquire); \ 99 } \ 100 return result; \ 101 } \ 102 \ 103 ATOMIC_INLINE void \ 104 atomic_store_##short_type(atomic_##short_type##_t *a, \ 105 type val, atomic_memory_order_t mo) { \ 106 if (mo != atomic_memory_order_relaxed) { \ 107 atomic_fence(atomic_memory_order_release); \ 108 } \ 109 a->repr = val; \ 110 if (mo == atomic_memory_order_seq_cst) { \ 111 atomic_post_sc_store_fence(); \ 112 } \ 113 } \ 114 \ 115 ATOMIC_INLINE type \ 116 atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ 117 atomic_memory_order_t mo) { \ 118 /* \ 119 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\ 120 * an atomic exchange builtin. We fake it with a CAS loop. \ 121 */ \ 122 while (true) { \ 123 type old = a->repr; \ 124 if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \ 125 return old; \ 126 } \ 127 } \ 128 } \ 129 \ 130 ATOMIC_INLINE bool \ 131 atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ 132 type *expected, type desired, atomic_memory_order_t success_mo, \ 133 atomic_memory_order_t failure_mo) { \ 134 type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ 135 desired); \ 136 if (prev == *expected) { \ 137 return true; \ 138 } else { \ 139 *expected = prev; \ 140 return false; \ 141 } \ 142 } \ 143 ATOMIC_INLINE bool \ 144 atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ 145 type *expected, type desired, atomic_memory_order_t success_mo, \ 146 atomic_memory_order_t failure_mo) { \ 147 type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ 148 desired); \ 149 if (prev == *expected) { \ 150 return true; \ 151 } else { \ 152 *expected = prev; \ 153 return false; \ 154 } \ 155 } 156 157 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \ 158 /* unused */ lg_size) \ 159 JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \ 160 \ 161 ATOMIC_INLINE type \ 162 atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \ 163 atomic_memory_order_t mo) { \ 164 return __sync_fetch_and_add(&a->repr, val); \ 165 } \ 166 \ 167 ATOMIC_INLINE type \ 168 atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \ 169 atomic_memory_order_t mo) { \ 170 return __sync_fetch_and_sub(&a->repr, val); \ 171 } \ 172 \ 173 ATOMIC_INLINE type \ 174 atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \ 175 atomic_memory_order_t mo) { \ 176 return __sync_fetch_and_and(&a->repr, val); \ 177 } \ 178 \ 179 ATOMIC_INLINE type \ 180 atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \ 181 atomic_memory_order_t mo) { \ 182 return __sync_fetch_and_or(&a->repr, val); \ 183 } \ 184 \ 185 ATOMIC_INLINE type \ 186 atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \ 187 atomic_memory_order_t mo) { \ 188 return __sync_fetch_and_xor(&a->repr, val); \ 189 } 190 191 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */ 192