1 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
2 #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
3 
4 #define ATOMIC_INIT(...) {__VA_ARGS__}
5 
6 typedef enum {
7 	atomic_memory_order_relaxed,
8 	atomic_memory_order_acquire,
9 	atomic_memory_order_release,
10 	atomic_memory_order_acq_rel,
11 	atomic_memory_order_seq_cst
12 } atomic_memory_order_t;
13 
14 ATOMIC_INLINE void
15 atomic_fence(atomic_memory_order_t mo) {
16 	/* Easy cases first: no barrier, and full barrier. */
17 	if (mo == atomic_memory_order_relaxed) {
18 		asm volatile("" ::: "memory");
19 		return;
20 	}
21 	if (mo == atomic_memory_order_seq_cst) {
22 		asm volatile("" ::: "memory");
23 		__sync_synchronize();
24 		asm volatile("" ::: "memory");
25 		return;
26 	}
27 	asm volatile("" ::: "memory");
28 #  if defined(__i386__) || defined(__x86_64__)
29 	/* This is implicit on x86. */
30 #  elif defined(__ppc__)
31 	asm volatile("lwsync");
32 #  elif defined(__sparc__) && defined(__arch64__)
33 	if (mo == atomic_memory_order_acquire) {
34 		asm volatile("membar #LoadLoad | #LoadStore");
35 	} else if (mo == atomic_memory_order_release) {
36 		asm volatile("membar #LoadStore | #StoreStore");
37 	} else {
38 		asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
39 	}
40 #  else
41 	__sync_synchronize();
42 #  endif
43 	asm volatile("" ::: "memory");
44 }
45 
46 /*
47  * A correct implementation of seq_cst loads and stores on weakly ordered
48  * architectures could do either of the following:
49  *   1. store() is weak-fence -> store -> strong fence, load() is load ->
50  *      strong-fence.
51  *   2. store() is strong-fence -> store, load() is strong-fence -> load ->
52  *      weak-fence.
53  * The tricky thing is, load() and store() above can be the load or store
54  * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
55  * means going with strategy 2.
56  * On strongly ordered architectures, the natural strategy is to stick a strong
57  * fence after seq_cst stores, and have naked loads.  So we want the strong
58  * fences in different places on different architectures.
59  * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
60  * accomplish this.
61  */
62 
63 ATOMIC_INLINE void
64 atomic_pre_sc_load_fence() {
65 #  if defined(__i386__) || defined(__x86_64__) ||			\
66     (defined(__sparc__) && defined(__arch64__))
67 	atomic_fence(atomic_memory_order_relaxed);
68 #  else
69 	atomic_fence(atomic_memory_order_seq_cst);
70 #  endif
71 }
72 
73 ATOMIC_INLINE void
74 atomic_post_sc_store_fence() {
75 #  if defined(__i386__) || defined(__x86_64__) ||			\
76     (defined(__sparc__) && defined(__arch64__))
77 	atomic_fence(atomic_memory_order_seq_cst);
78 #  else
79 	atomic_fence(atomic_memory_order_relaxed);
80 #  endif
81 
82 }
83 
84 #define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
85     /* unused */ lg_size)						\
86 typedef struct {							\
87 	type volatile repr;						\
88 } atomic_##short_type##_t;						\
89 									\
90 ATOMIC_INLINE type							\
91 atomic_load_##short_type(const atomic_##short_type##_t *a,		\
92     atomic_memory_order_t mo) {						\
93 	if (mo == atomic_memory_order_seq_cst) {			\
94 		atomic_pre_sc_load_fence();				\
95 	}								\
96 	type result = a->repr;						\
97 	if (mo != atomic_memory_order_relaxed) {			\
98 		atomic_fence(atomic_memory_order_acquire);		\
99 	}								\
100 	return result;							\
101 }									\
102 									\
103 ATOMIC_INLINE void							\
104 atomic_store_##short_type(atomic_##short_type##_t *a,			\
105     type val, atomic_memory_order_t mo) {				\
106 	if (mo != atomic_memory_order_relaxed) {			\
107 		atomic_fence(atomic_memory_order_release);		\
108 	}								\
109 	a->repr = val;							\
110 	if (mo == atomic_memory_order_seq_cst) {			\
111 		atomic_post_sc_store_fence();				\
112 	}								\
113 }									\
114 									\
115 ATOMIC_INLINE type							\
116 atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
117     atomic_memory_order_t mo) {						\
118 	/*								\
119 	 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
120 	 * an atomic exchange builtin.  We fake it with a CAS loop.	\
121 	 */								\
122 	while (true) {							\
123 		type old = a->repr;					\
124 		if (__sync_bool_compare_and_swap(&a->repr, old, val)) {	\
125 			return old;					\
126 		}							\
127 	}								\
128 }									\
129 									\
130 ATOMIC_INLINE bool							\
131 atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
132     type *expected, type desired, atomic_memory_order_t success_mo,	\
133     atomic_memory_order_t failure_mo) {					\
134 	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
135 	    desired);							\
136 	if (prev == *expected) {					\
137 		return true;						\
138 	} else {							\
139 		*expected = prev;					\
140 		return false;						\
141 	}								\
142 }									\
143 ATOMIC_INLINE bool							\
144 atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
145     type *expected, type desired, atomic_memory_order_t success_mo,	\
146     atomic_memory_order_t failure_mo) {					\
147 	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
148 	    desired);							\
149 	if (prev == *expected) {					\
150 		return true;						\
151 	} else {							\
152 		*expected = prev;					\
153 		return false;						\
154 	}								\
155 }
156 
157 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
158     /* unused */ lg_size)						\
159 JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
160 									\
161 ATOMIC_INLINE type							\
162 atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
163     atomic_memory_order_t mo) {						\
164 	return __sync_fetch_and_add(&a->repr, val);			\
165 }									\
166 									\
167 ATOMIC_INLINE type							\
168 atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
169     atomic_memory_order_t mo) {						\
170 	return __sync_fetch_and_sub(&a->repr, val);			\
171 }									\
172 									\
173 ATOMIC_INLINE type							\
174 atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
175     atomic_memory_order_t mo) {						\
176 	return __sync_fetch_and_and(&a->repr, val);			\
177 }									\
178 									\
179 ATOMIC_INLINE type							\
180 atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
181     atomic_memory_order_t mo) {						\
182 	return __sync_fetch_and_or(&a->repr, val);			\
183 }									\
184 									\
185 ATOMIC_INLINE type							\
186 atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
187     atomic_memory_order_t mo) {						\
188 	return __sync_fetch_and_xor(&a->repr, val);			\
189 }
190 
191 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
192