1 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
2 #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
3 
4 #define ATOMIC_INIT(...) {__VA_ARGS__}
5 
6 typedef enum {
7 	atomic_memory_order_relaxed,
8 	atomic_memory_order_acquire,
9 	atomic_memory_order_release,
10 	atomic_memory_order_acq_rel,
11 	atomic_memory_order_seq_cst
12 } atomic_memory_order_t;
13 
14 ATOMIC_INLINE void
15 atomic_fence(atomic_memory_order_t mo) {
16 	/* Easy cases first: no barrier, and full barrier. */
17 	if (mo == atomic_memory_order_relaxed) {
18 		asm volatile("" ::: "memory");
19 		return;
20 	}
21 	if (mo == atomic_memory_order_seq_cst) {
22 		asm volatile("" ::: "memory");
23 		__sync_synchronize();
24 		asm volatile("" ::: "memory");
25 		return;
26 	}
27 	asm volatile("" ::: "memory");
28 #  if defined(__i386__) || defined(__x86_64__)
29 	/* This is implicit on x86. */
30 #  elif defined(__ppc64__)
31 	asm volatile("lwsync");
32 #  elif defined(__ppc__)
33 	asm volatile("sync");
34 #  elif defined(__sparc__) && defined(__arch64__)
35 	if (mo == atomic_memory_order_acquire) {
36 		asm volatile("membar #LoadLoad | #LoadStore");
37 	} else if (mo == atomic_memory_order_release) {
38 		asm volatile("membar #LoadStore | #StoreStore");
39 	} else {
40 		asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
41 	}
42 #  else
43 	__sync_synchronize();
44 #  endif
45 	asm volatile("" ::: "memory");
46 }
47 
48 /*
49  * A correct implementation of seq_cst loads and stores on weakly ordered
50  * architectures could do either of the following:
51  *   1. store() is weak-fence -> store -> strong fence, load() is load ->
52  *      strong-fence.
53  *   2. store() is strong-fence -> store, load() is strong-fence -> load ->
54  *      weak-fence.
55  * The tricky thing is, load() and store() above can be the load or store
56  * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
57  * means going with strategy 2.
58  * On strongly ordered architectures, the natural strategy is to stick a strong
59  * fence after seq_cst stores, and have naked loads.  So we want the strong
60  * fences in different places on different architectures.
61  * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
62  * accomplish this.
63  */
64 
65 ATOMIC_INLINE void
66 atomic_pre_sc_load_fence() {
67 #  if defined(__i386__) || defined(__x86_64__) ||			\
68     (defined(__sparc__) && defined(__arch64__))
69 	atomic_fence(atomic_memory_order_relaxed);
70 #  else
71 	atomic_fence(atomic_memory_order_seq_cst);
72 #  endif
73 }
74 
75 ATOMIC_INLINE void
76 atomic_post_sc_store_fence() {
77 #  if defined(__i386__) || defined(__x86_64__) ||			\
78     (defined(__sparc__) && defined(__arch64__))
79 	atomic_fence(atomic_memory_order_seq_cst);
80 #  else
81 	atomic_fence(atomic_memory_order_relaxed);
82 #  endif
83 
84 }
85 
86 #define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
87     /* unused */ lg_size)						\
88 typedef struct {							\
89 	type volatile repr;						\
90 } atomic_##short_type##_t;						\
91 									\
92 ATOMIC_INLINE type							\
93 atomic_load_##short_type(const atomic_##short_type##_t *a,		\
94     atomic_memory_order_t mo) {						\
95 	if (mo == atomic_memory_order_seq_cst) {			\
96 		atomic_pre_sc_load_fence();				\
97 	}								\
98 	type result = a->repr;						\
99 	if (mo != atomic_memory_order_relaxed) {			\
100 		atomic_fence(atomic_memory_order_acquire);		\
101 	}								\
102 	return result;							\
103 }									\
104 									\
105 ATOMIC_INLINE void							\
106 atomic_store_##short_type(atomic_##short_type##_t *a,			\
107     type val, atomic_memory_order_t mo) {				\
108 	if (mo != atomic_memory_order_relaxed) {			\
109 		atomic_fence(atomic_memory_order_release);		\
110 	}								\
111 	a->repr = val;							\
112 	if (mo == atomic_memory_order_seq_cst) {			\
113 		atomic_post_sc_store_fence();				\
114 	}								\
115 }									\
116 									\
117 ATOMIC_INLINE type							\
118 atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
119     atomic_memory_order_t mo) {                  					 \
120 	/*								\
121 	 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
122 	 * an atomic exchange builtin.  We fake it with a CAS loop.	\
123 	 */								\
124 	while (true) {							\
125 		type old = a->repr;					\
126 		if (__sync_bool_compare_and_swap(&a->repr, old, val)) {	\
127 			return old;					\
128 		}							\
129 	}								\
130 }									\
131 									\
132 ATOMIC_INLINE bool							\
133 atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
134     type *expected, type desired,                                     \
135     atomic_memory_order_t success_mo,                          \
136     atomic_memory_order_t failure_mo) {				                \
137 	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
138 	    desired);							\
139 	if (prev == *expected) {					\
140 		return true;						\
141 	} else {							\
142 		*expected = prev;					\
143 		return false;						\
144 	}								\
145 }									\
146 ATOMIC_INLINE bool							\
147 atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
148     type *expected, type desired,                                       \
149     atomic_memory_order_t success_mo,                            \
150     atomic_memory_order_t failure_mo) {                          \
151 	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
152 	    desired);							\
153 	if (prev == *expected) {					\
154 		return true;						\
155 	} else {							\
156 		*expected = prev;					\
157 		return false;						\
158 	}								\
159 }
160 
161 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
162     /* unused */ lg_size)						\
163 JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
164 									\
165 ATOMIC_INLINE type							\
166 atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
167     atomic_memory_order_t mo) {						\
168 	return __sync_fetch_and_add(&a->repr, val);			\
169 }									\
170 									\
171 ATOMIC_INLINE type							\
172 atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
173     atomic_memory_order_t mo) {						\
174 	return __sync_fetch_and_sub(&a->repr, val);			\
175 }									\
176 									\
177 ATOMIC_INLINE type							\
178 atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
179     atomic_memory_order_t mo) {						\
180 	return __sync_fetch_and_and(&a->repr, val);			\
181 }									\
182 									\
183 ATOMIC_INLINE type							\
184 atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
185     atomic_memory_order_t mo) {						\
186 	return __sync_fetch_and_or(&a->repr, val);			\
187 }									\
188 									\
189 ATOMIC_INLINE type							\
190 atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
191     atomic_memory_order_t mo) {						\
192 	return __sync_fetch_and_xor(&a->repr, val);			\
193 }
194 
195 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
196