1 /*
2 * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
3 * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
4 * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
5 *
6 *
7 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
8 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
9 *
10 * Permission is hereby granted to use or copy this program
11 * for any purpose, provided the above notices are retained on all copies.
12 * Permission to modify the code and to distribute modified code is granted,
13 * provided the above notices are retained, and a notice that the code was
14 * modified is included with the above copyright notice.
15 *
16 */
17
18 /* Memory model documented at http://www-106.ibm.com/developerworks/ */
19 /* eserver/articles/archguide.html and (clearer) */
20 /* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */
21 /* There appears to be no implicit ordering between any kind of */
22 /* independent memory references. */
23
24 /* TODO: Implement double-wide operations if available. */
25
26 #if (AO_GNUC_PREREQ(4, 8) || AO_CLANG_PREREQ(3, 8)) \
27 && !defined(AO_DISABLE_GCC_ATOMICS)
28 /* Probably, it could be enabled even for earlier gcc/clang versions. */
29
30 /* TODO: As of clang-3.8.1, it emits lwsync in AO_load_acquire */
31 /* (i.e., the code is less efficient than the one given below). */
32
33 # include "generic.h"
34
35 #else /* AO_DISABLE_GCC_ATOMICS */
36
37 /* Architecture enforces some ordering based on control dependence. */
38 /* I don't know if that could help. */
39 /* Data-dependent loads are always ordered. */
40 /* Based on the above references, eieio is intended for use on */
41 /* uncached memory, which we don't support. It does not order loads */
42 /* from cached memory. */
43
44 #include "../all_aligned_atomic_load_store.h"
45
46 #include "../test_and_set_t_is_ao_t.h"
47 /* There seems to be no byte equivalent of lwarx, so this */
48 /* may really be what we want, at least in the 32-bit case. */
49
50 AO_INLINE void
AO_nop_full(void)51 AO_nop_full(void)
52 {
53 __asm__ __volatile__("sync" : : : "memory");
54 }
55 #define AO_HAVE_nop_full
56
57 /* lwsync apparently works for everything but a StoreLoad barrier. */
58 AO_INLINE void
AO_lwsync(void)59 AO_lwsync(void)
60 {
61 #ifdef __NO_LWSYNC__
62 __asm__ __volatile__("sync" : : : "memory");
63 #else
64 __asm__ __volatile__("lwsync" : : : "memory");
65 #endif
66 }
67
68 #define AO_nop_write() AO_lwsync()
69 #define AO_HAVE_nop_write
70
71 #define AO_nop_read() AO_lwsync()
72 #define AO_HAVE_nop_read
73
74 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
75 /* ppc64 uses ld not lwz */
76 # define AO_PPC_LD "ld"
77 # define AO_PPC_LxARX "ldarx"
78 # define AO_PPC_CMPx "cmpd"
79 # define AO_PPC_STxCXd "stdcx."
80 # define AO_PPC_LOAD_CLOBBER "cr0"
81 #else
82 # define AO_PPC_LD "lwz"
83 # define AO_PPC_LxARX "lwarx"
84 # define AO_PPC_CMPx "cmpw"
85 # define AO_PPC_STxCXd "stwcx."
86 # define AO_PPC_LOAD_CLOBBER "cc"
87 /* FIXME: We should get gcc to allocate one of the condition */
88 /* registers. I always got "impossible constraint" when I */
89 /* tried the "y" constraint. */
90 # define AO_T_IS_INT
91 #endif
92
93 #ifdef _AIX
94 /* Labels are not supported on AIX. */
95 /* ppc64 has same size of instructions as 32-bit one. */
96 # define AO_PPC_L(label) /* empty */
97 # define AO_PPC_BR_A(labelBF, addr) addr
98 #else
99 # define AO_PPC_L(label) label ": "
100 # define AO_PPC_BR_A(labelBF, addr) labelBF
101 #endif
102
103 /* We explicitly specify load_acquire, since it is important, and can */
104 /* be implemented relatively cheaply. It could be implemented */
105 /* with an ordinary load followed by a lwsync. But the general wisdom */
106 /* seems to be that a data dependent branch followed by an isync is */
107 /* cheaper. And the documentation is fairly explicit that this also */
108 /* has acquire semantics. */
109 AO_INLINE AO_t
AO_load_acquire(const volatile AO_t * addr)110 AO_load_acquire(const volatile AO_t *addr)
111 {
112 AO_t result;
113
114 __asm__ __volatile__ (
115 AO_PPC_LD "%U1%X1 %0,%1\n"
116 "cmpw %0,%0\n"
117 "bne- " AO_PPC_BR_A("1f", "$+4") "\n"
118 AO_PPC_L("1") "isync\n"
119 : "=r" (result)
120 : "m"(*addr) : "memory", AO_PPC_LOAD_CLOBBER);
121 return result;
122 }
123 #define AO_HAVE_load_acquire
124
125 /* We explicitly specify store_release, since it relies */
126 /* on the fact that lwsync is also a LoadStore barrier. */
127 AO_INLINE void
AO_store_release(volatile AO_t * addr,AO_t value)128 AO_store_release(volatile AO_t *addr, AO_t value)
129 {
130 AO_lwsync();
131 *addr = value;
132 }
133 #define AO_HAVE_store_release
134
135 #ifndef AO_PREFER_GENERALIZED
136 /* This is similar to the code in the garbage collector. Deleting */
137 /* this and having it synthesized from compare_and_swap would probably */
138 /* only cost us a load immediate instruction. */
139 AO_INLINE AO_TS_VAL_t
AO_test_and_set(volatile AO_TS_t * addr)140 AO_test_and_set(volatile AO_TS_t *addr) {
141 /* Completely untested. And we should be using smaller objects anyway. */
142 AO_t oldval;
143 AO_t temp = 1; /* locked value */
144
145 __asm__ __volatile__(
146 AO_PPC_L("1") AO_PPC_LxARX " %0,0,%1\n"
147 /* load and reserve */
148 AO_PPC_CMPx "i %0, 0\n" /* if load is */
149 "bne " AO_PPC_BR_A("2f", "$+12") "\n"
150 /* non-zero, return already set */
151 AO_PPC_STxCXd " %2,0,%1\n" /* else store conditional */
152 "bne- " AO_PPC_BR_A("1b", "$-16") "\n"
153 /* retry if lost reservation */
154 AO_PPC_L("2") "\n" /* oldval is zero if we set */
155 : "=&r"(oldval)
156 : "r"(addr), "r"(temp)
157 : "memory", "cr0");
158 return (AO_TS_VAL_t)oldval;
159 }
160 #define AO_HAVE_test_and_set
161
162 AO_INLINE AO_TS_VAL_t
AO_test_and_set_acquire(volatile AO_TS_t * addr)163 AO_test_and_set_acquire(volatile AO_TS_t *addr) {
164 AO_TS_VAL_t result = AO_test_and_set(addr);
165 AO_lwsync();
166 return result;
167 }
168 #define AO_HAVE_test_and_set_acquire
169
170 AO_INLINE AO_TS_VAL_t
AO_test_and_set_release(volatile AO_TS_t * addr)171 AO_test_and_set_release(volatile AO_TS_t *addr) {
172 AO_lwsync();
173 return AO_test_and_set(addr);
174 }
175 #define AO_HAVE_test_and_set_release
176
177 AO_INLINE AO_TS_VAL_t
AO_test_and_set_full(volatile AO_TS_t * addr)178 AO_test_and_set_full(volatile AO_TS_t *addr) {
179 AO_TS_VAL_t result;
180 AO_lwsync();
181 result = AO_test_and_set(addr);
182 AO_lwsync();
183 return result;
184 }
185 #define AO_HAVE_test_and_set_full
186 #endif /* !AO_PREFER_GENERALIZED */
187
188 #ifndef AO_GENERALIZE_ASM_BOOL_CAS
189
190 AO_INLINE int
AO_compare_and_swap(volatile AO_t * addr,AO_t old,AO_t new_val)191 AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val)
192 {
193 AO_t oldval;
194 int result = 0;
195
196 __asm__ __volatile__(
197 AO_PPC_L("1") AO_PPC_LxARX " %0,0,%2\n" /* load and reserve */
198 AO_PPC_CMPx " %0, %4\n" /* if load is not equal to */
199 "bne " AO_PPC_BR_A("2f", "$+16") "\n" /* old, fail */
200 AO_PPC_STxCXd " %3,0,%2\n" /* else store conditional */
201 "bne- " AO_PPC_BR_A("1b", "$-16") "\n"
202 /* retry if lost reservation */
203 "li %1,1\n" /* result = 1; */
204 AO_PPC_L("2") "\n"
205 : "=&r"(oldval), "=&r"(result)
206 : "r"(addr), "r"(new_val), "r"(old), "1"(result)
207 : "memory", "cr0");
208 return result;
209 }
210 # define AO_HAVE_compare_and_swap
211
212 AO_INLINE int
AO_compare_and_swap_acquire(volatile AO_t * addr,AO_t old,AO_t new_val)213 AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val)
214 {
215 int result = AO_compare_and_swap(addr, old, new_val);
216 AO_lwsync();
217 return result;
218 }
219 # define AO_HAVE_compare_and_swap_acquire
220
221 AO_INLINE int
AO_compare_and_swap_release(volatile AO_t * addr,AO_t old,AO_t new_val)222 AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val)
223 {
224 AO_lwsync();
225 return AO_compare_and_swap(addr, old, new_val);
226 }
227 # define AO_HAVE_compare_and_swap_release
228
229 AO_INLINE int
AO_compare_and_swap_full(volatile AO_t * addr,AO_t old,AO_t new_val)230 AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val)
231 {
232 int result;
233 AO_lwsync();
234 result = AO_compare_and_swap(addr, old, new_val);
235 if (result)
236 AO_lwsync();
237 return result;
238 }
239 # define AO_HAVE_compare_and_swap_full
240
241 #endif /* !AO_GENERALIZE_ASM_BOOL_CAS */
242
243 AO_INLINE AO_t
AO_fetch_compare_and_swap(volatile AO_t * addr,AO_t old_val,AO_t new_val)244 AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val)
245 {
246 AO_t fetched_val;
247
248 __asm__ __volatile__(
249 AO_PPC_L("1") AO_PPC_LxARX " %0,0,%1\n" /* load and reserve */
250 AO_PPC_CMPx " %0, %3\n" /* if load is not equal to */
251 "bne " AO_PPC_BR_A("2f", "$+12") "\n" /* old_val, fail */
252 AO_PPC_STxCXd " %2,0,%1\n" /* else store conditional */
253 "bne- " AO_PPC_BR_A("1b", "$-16") "\n"
254 /* retry if lost reservation */
255 AO_PPC_L("2") "\n"
256 : "=&r"(fetched_val)
257 : "r"(addr), "r"(new_val), "r"(old_val)
258 : "memory", "cr0");
259 return fetched_val;
260 }
261 #define AO_HAVE_fetch_compare_and_swap
262
263 AO_INLINE AO_t
AO_fetch_compare_and_swap_acquire(volatile AO_t * addr,AO_t old_val,AO_t new_val)264 AO_fetch_compare_and_swap_acquire(volatile AO_t *addr, AO_t old_val,
265 AO_t new_val)
266 {
267 AO_t result = AO_fetch_compare_and_swap(addr, old_val, new_val);
268 AO_lwsync();
269 return result;
270 }
271 #define AO_HAVE_fetch_compare_and_swap_acquire
272
273 AO_INLINE AO_t
AO_fetch_compare_and_swap_release(volatile AO_t * addr,AO_t old_val,AO_t new_val)274 AO_fetch_compare_and_swap_release(volatile AO_t *addr, AO_t old_val,
275 AO_t new_val)
276 {
277 AO_lwsync();
278 return AO_fetch_compare_and_swap(addr, old_val, new_val);
279 }
280 #define AO_HAVE_fetch_compare_and_swap_release
281
282 AO_INLINE AO_t
AO_fetch_compare_and_swap_full(volatile AO_t * addr,AO_t old_val,AO_t new_val)283 AO_fetch_compare_and_swap_full(volatile AO_t *addr, AO_t old_val,
284 AO_t new_val)
285 {
286 AO_t result;
287 AO_lwsync();
288 result = AO_fetch_compare_and_swap(addr, old_val, new_val);
289 if (result == old_val)
290 AO_lwsync();
291 return result;
292 }
293 #define AO_HAVE_fetch_compare_and_swap_full
294
295 #ifndef AO_PREFER_GENERALIZED
296 AO_INLINE AO_t
AO_fetch_and_add(volatile AO_t * addr,AO_t incr)297 AO_fetch_and_add(volatile AO_t *addr, AO_t incr) {
298 AO_t oldval;
299 AO_t newval;
300
301 __asm__ __volatile__(
302 AO_PPC_L("1") AO_PPC_LxARX " %0,0,%2\n" /* load and reserve */
303 "add %1,%0,%3\n" /* increment */
304 AO_PPC_STxCXd " %1,0,%2\n" /* store conditional */
305 "bne- " AO_PPC_BR_A("1b", "$-12") "\n"
306 /* retry if lost reservation */
307 : "=&r"(oldval), "=&r"(newval)
308 : "r"(addr), "r"(incr)
309 : "memory", "cr0");
310 return oldval;
311 }
312 #define AO_HAVE_fetch_and_add
313
314 AO_INLINE AO_t
AO_fetch_and_add_acquire(volatile AO_t * addr,AO_t incr)315 AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) {
316 AO_t result = AO_fetch_and_add(addr, incr);
317 AO_lwsync();
318 return result;
319 }
320 #define AO_HAVE_fetch_and_add_acquire
321
322 AO_INLINE AO_t
AO_fetch_and_add_release(volatile AO_t * addr,AO_t incr)323 AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) {
324 AO_lwsync();
325 return AO_fetch_and_add(addr, incr);
326 }
327 #define AO_HAVE_fetch_and_add_release
328
329 AO_INLINE AO_t
AO_fetch_and_add_full(volatile AO_t * addr,AO_t incr)330 AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) {
331 AO_t result;
332 AO_lwsync();
333 result = AO_fetch_and_add(addr, incr);
334 AO_lwsync();
335 return result;
336 }
337 #define AO_HAVE_fetch_and_add_full
338 #endif /* !AO_PREFER_GENERALIZED */
339
340 #undef AO_PPC_BR_A
341 #undef AO_PPC_CMPx
342 #undef AO_PPC_L
343 #undef AO_PPC_LD
344 #undef AO_PPC_LOAD_CLOBBER
345 #undef AO_PPC_LxARX
346 #undef AO_PPC_STxCXd
347
348 #endif /* AO_DISABLE_GCC_ATOMICS */
349