1 /*
2  * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
3  * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
4  * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
5  *
6  *
7  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
8  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
9  *
10  * Permission is hereby granted to use or copy this program
11  * for any purpose,  provided the above notices are retained on all copies.
12  * Permission to modify the code and to distribute modified code is granted,
13  * provided the above notices are retained, and a notice that the code was
14  * modified is included with the above copyright notice.
15  *
16  */
17 
18 /* Memory model documented at http://www-106.ibm.com/developerworks/    */
19 /* eserver/articles/archguide.html and (clearer)                        */
20 /* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */
21 /* There appears to be no implicit ordering between any kind of         */
22 /* independent memory references.                                       */
23 
24 /* TODO: Implement double-wide operations if available. */
25 
26 #if (AO_GNUC_PREREQ(4, 8) || AO_CLANG_PREREQ(3, 8)) \
27     && !defined(AO_DISABLE_GCC_ATOMICS)
28   /* Probably, it could be enabled even for earlier gcc/clang versions. */
29 
30   /* TODO: As of clang-3.8.1, it emits lwsync in AO_load_acquire        */
31   /* (i.e., the code is less efficient than the one given below).       */
32 
33 # include "generic.h"
34 
35 #else /* AO_DISABLE_GCC_ATOMICS */
36 
37 /* Architecture enforces some ordering based on control dependence.     */
38 /* I don't know if that could help.                                     */
39 /* Data-dependent loads are always ordered.                             */
40 /* Based on the above references, eieio is intended for use on          */
41 /* uncached memory, which we don't support.  It does not order loads    */
42 /* from cached memory.                                                  */
43 
44 #include "../all_aligned_atomic_load_store.h"
45 
46 #include "../test_and_set_t_is_ao_t.h"
47         /* There seems to be no byte equivalent of lwarx, so this       */
48         /* may really be what we want, at least in the 32-bit case.     */
49 
50 AO_INLINE void
AO_nop_full(void)51 AO_nop_full(void)
52 {
53   __asm__ __volatile__("sync" : : : "memory");
54 }
55 #define AO_HAVE_nop_full
56 
57 /* lwsync apparently works for everything but a StoreLoad barrier.      */
58 AO_INLINE void
AO_lwsync(void)59 AO_lwsync(void)
60 {
61 #ifdef __NO_LWSYNC__
62   __asm__ __volatile__("sync" : : : "memory");
63 #else
64   __asm__ __volatile__("lwsync" : : : "memory");
65 #endif
66 }
67 
68 #define AO_nop_write() AO_lwsync()
69 #define AO_HAVE_nop_write
70 
71 #define AO_nop_read() AO_lwsync()
72 #define AO_HAVE_nop_read
73 
74 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
75   /* ppc64 uses ld not lwz */
76 # define AO_PPC_LD      "ld"
77 # define AO_PPC_LxARX   "ldarx"
78 # define AO_PPC_CMPx    "cmpd"
79 # define AO_PPC_STxCXd  "stdcx."
80 # define AO_PPC_LOAD_CLOBBER "cr0"
81 #else
82 # define AO_PPC_LD      "lwz"
83 # define AO_PPC_LxARX   "lwarx"
84 # define AO_PPC_CMPx    "cmpw"
85 # define AO_PPC_STxCXd  "stwcx."
86 # define AO_PPC_LOAD_CLOBBER "cc"
87         /* FIXME: We should get gcc to allocate one of the condition    */
88         /* registers.  I always got "impossible constraint" when I      */
89         /* tried the "y" constraint.                                    */
90 # define AO_T_IS_INT
91 #endif
92 
93 #ifdef _AIX
94   /* Labels are not supported on AIX.                   */
95   /* ppc64 has same size of instructions as 32-bit one. */
96 # define AO_PPC_L(label) /* empty */
97 # define AO_PPC_BR_A(labelBF, addr) addr
98 #else
99 # define AO_PPC_L(label) label ": "
100 # define AO_PPC_BR_A(labelBF, addr) labelBF
101 #endif
102 
103 /* We explicitly specify load_acquire, since it is important, and can   */
104 /* be implemented relatively cheaply.  It could be implemented          */
105 /* with an ordinary load followed by a lwsync.  But the general wisdom  */
106 /* seems to be that a data dependent branch followed by an isync is     */
107 /* cheaper.  And the documentation is fairly explicit that this also    */
108 /* has acquire semantics.                                               */
109 AO_INLINE AO_t
AO_load_acquire(const volatile AO_t * addr)110 AO_load_acquire(const volatile AO_t *addr)
111 {
112   AO_t result;
113 
114   __asm__ __volatile__ (
115     AO_PPC_LD "%U1%X1 %0,%1\n"
116     "cmpw %0,%0\n"
117     "bne- " AO_PPC_BR_A("1f", "$+4") "\n"
118     AO_PPC_L("1") "isync\n"
119     : "=r" (result)
120     : "m"(*addr) : "memory", AO_PPC_LOAD_CLOBBER);
121   return result;
122 }
123 #define AO_HAVE_load_acquire
124 
125 /* We explicitly specify store_release, since it relies         */
126 /* on the fact that lwsync is also a LoadStore barrier.         */
127 AO_INLINE void
AO_store_release(volatile AO_t * addr,AO_t value)128 AO_store_release(volatile AO_t *addr, AO_t value)
129 {
130   AO_lwsync();
131   *addr = value;
132 }
133 #define AO_HAVE_store_release
134 
135 #ifndef AO_PREFER_GENERALIZED
136 /* This is similar to the code in the garbage collector.  Deleting      */
137 /* this and having it synthesized from compare_and_swap would probably  */
138 /* only cost us a load immediate instruction.                           */
139 AO_INLINE AO_TS_VAL_t
AO_test_and_set(volatile AO_TS_t * addr)140 AO_test_and_set(volatile AO_TS_t *addr) {
141 /* Completely untested.  And we should be using smaller objects anyway. */
142   AO_t oldval;
143   AO_t temp = 1; /* locked value */
144 
145   __asm__ __volatile__(
146                AO_PPC_L("1") AO_PPC_LxARX " %0,0,%1\n"
147                                                 /* load and reserve     */
148                AO_PPC_CMPx "i %0, 0\n"          /* if load is           */
149                "bne " AO_PPC_BR_A("2f", "$+12") "\n"
150                                     /* non-zero, return already set     */
151                AO_PPC_STxCXd " %2,0,%1\n"   /* else store conditional   */
152                "bne- " AO_PPC_BR_A("1b", "$-16") "\n"
153                                     /* retry if lost reservation        */
154                AO_PPC_L("2") "\n"   /* oldval is zero if we set         */
155               : "=&r"(oldval)
156               : "r"(addr), "r"(temp)
157               : "memory", "cr0");
158   return (AO_TS_VAL_t)oldval;
159 }
160 #define AO_HAVE_test_and_set
161 
162 AO_INLINE AO_TS_VAL_t
AO_test_and_set_acquire(volatile AO_TS_t * addr)163 AO_test_and_set_acquire(volatile AO_TS_t *addr) {
164   AO_TS_VAL_t result = AO_test_and_set(addr);
165   AO_lwsync();
166   return result;
167 }
168 #define AO_HAVE_test_and_set_acquire
169 
170 AO_INLINE AO_TS_VAL_t
AO_test_and_set_release(volatile AO_TS_t * addr)171 AO_test_and_set_release(volatile AO_TS_t *addr) {
172   AO_lwsync();
173   return AO_test_and_set(addr);
174 }
175 #define AO_HAVE_test_and_set_release
176 
177 AO_INLINE AO_TS_VAL_t
AO_test_and_set_full(volatile AO_TS_t * addr)178 AO_test_and_set_full(volatile AO_TS_t *addr) {
179   AO_TS_VAL_t result;
180   AO_lwsync();
181   result = AO_test_and_set(addr);
182   AO_lwsync();
183   return result;
184 }
185 #define AO_HAVE_test_and_set_full
186 #endif /* !AO_PREFER_GENERALIZED */
187 
188 #ifndef AO_GENERALIZE_ASM_BOOL_CAS
189 
190   AO_INLINE int
AO_compare_and_swap(volatile AO_t * addr,AO_t old,AO_t new_val)191   AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val)
192   {
193     AO_t oldval;
194     int result = 0;
195 
196     __asm__ __volatile__(
197         AO_PPC_L("1") AO_PPC_LxARX " %0,0,%2\n" /* load and reserve */
198         AO_PPC_CMPx " %0, %4\n" /* if load is not equal to      */
199         "bne " AO_PPC_BR_A("2f", "$+16") "\n"   /*   old, fail  */
200         AO_PPC_STxCXd " %3,0,%2\n"  /* else store conditional   */
201         "bne- " AO_PPC_BR_A("1b", "$-16") "\n"
202                                 /* retry if lost reservation    */
203         "li %1,1\n"             /* result = 1;                  */
204         AO_PPC_L("2") "\n"
205         : "=&r"(oldval), "=&r"(result)
206         : "r"(addr), "r"(new_val), "r"(old), "1"(result)
207         : "memory", "cr0");
208     return result;
209   }
210 # define AO_HAVE_compare_and_swap
211 
212   AO_INLINE int
AO_compare_and_swap_acquire(volatile AO_t * addr,AO_t old,AO_t new_val)213   AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val)
214   {
215     int result = AO_compare_and_swap(addr, old, new_val);
216     AO_lwsync();
217     return result;
218   }
219 # define AO_HAVE_compare_and_swap_acquire
220 
221   AO_INLINE int
AO_compare_and_swap_release(volatile AO_t * addr,AO_t old,AO_t new_val)222   AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val)
223   {
224     AO_lwsync();
225     return AO_compare_and_swap(addr, old, new_val);
226   }
227 # define AO_HAVE_compare_and_swap_release
228 
229   AO_INLINE int
AO_compare_and_swap_full(volatile AO_t * addr,AO_t old,AO_t new_val)230   AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val)
231   {
232     int result;
233     AO_lwsync();
234     result = AO_compare_and_swap(addr, old, new_val);
235     if (result)
236       AO_lwsync();
237     return result;
238   }
239 # define AO_HAVE_compare_and_swap_full
240 
241 #endif /* !AO_GENERALIZE_ASM_BOOL_CAS */
242 
243 AO_INLINE AO_t
AO_fetch_compare_and_swap(volatile AO_t * addr,AO_t old_val,AO_t new_val)244 AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val)
245 {
246   AO_t fetched_val;
247 
248   __asm__ __volatile__(
249       AO_PPC_L("1") AO_PPC_LxARX " %0,0,%1\n" /* load and reserve */
250       AO_PPC_CMPx " %0, %3\n"   /* if load is not equal to      */
251       "bne " AO_PPC_BR_A("2f", "$+12") "\n" /*   old_val, fail  */
252       AO_PPC_STxCXd " %2,0,%1\n"    /* else store conditional   */
253       "bne- " AO_PPC_BR_A("1b", "$-16") "\n"
254                                 /* retry if lost reservation    */
255       AO_PPC_L("2") "\n"
256       : "=&r"(fetched_val)
257       : "r"(addr), "r"(new_val), "r"(old_val)
258       : "memory", "cr0");
259   return fetched_val;
260 }
261 #define AO_HAVE_fetch_compare_and_swap
262 
263 AO_INLINE AO_t
AO_fetch_compare_and_swap_acquire(volatile AO_t * addr,AO_t old_val,AO_t new_val)264 AO_fetch_compare_and_swap_acquire(volatile AO_t *addr, AO_t old_val,
265                                   AO_t new_val)
266 {
267   AO_t result = AO_fetch_compare_and_swap(addr, old_val, new_val);
268   AO_lwsync();
269   return result;
270 }
271 #define AO_HAVE_fetch_compare_and_swap_acquire
272 
273 AO_INLINE AO_t
AO_fetch_compare_and_swap_release(volatile AO_t * addr,AO_t old_val,AO_t new_val)274 AO_fetch_compare_and_swap_release(volatile AO_t *addr, AO_t old_val,
275                                   AO_t new_val)
276 {
277   AO_lwsync();
278   return AO_fetch_compare_and_swap(addr, old_val, new_val);
279 }
280 #define AO_HAVE_fetch_compare_and_swap_release
281 
282 AO_INLINE AO_t
AO_fetch_compare_and_swap_full(volatile AO_t * addr,AO_t old_val,AO_t new_val)283 AO_fetch_compare_and_swap_full(volatile AO_t *addr, AO_t old_val,
284                                AO_t new_val)
285 {
286   AO_t result;
287   AO_lwsync();
288   result = AO_fetch_compare_and_swap(addr, old_val, new_val);
289   if (result == old_val)
290     AO_lwsync();
291   return result;
292 }
293 #define AO_HAVE_fetch_compare_and_swap_full
294 
295 #ifndef AO_PREFER_GENERALIZED
296 AO_INLINE AO_t
AO_fetch_and_add(volatile AO_t * addr,AO_t incr)297 AO_fetch_and_add(volatile AO_t *addr, AO_t incr) {
298   AO_t oldval;
299   AO_t newval;
300 
301   __asm__ __volatile__(
302                AO_PPC_L("1") AO_PPC_LxARX " %0,0,%2\n" /* load and reserve */
303                "add %1,%0,%3\n"                 /* increment            */
304                AO_PPC_STxCXd " %1,0,%2\n"       /* store conditional    */
305                "bne- " AO_PPC_BR_A("1b", "$-12") "\n"
306                                     /* retry if lost reservation        */
307               : "=&r"(oldval), "=&r"(newval)
308               : "r"(addr), "r"(incr)
309               : "memory", "cr0");
310   return oldval;
311 }
312 #define AO_HAVE_fetch_and_add
313 
314 AO_INLINE AO_t
AO_fetch_and_add_acquire(volatile AO_t * addr,AO_t incr)315 AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) {
316   AO_t result = AO_fetch_and_add(addr, incr);
317   AO_lwsync();
318   return result;
319 }
320 #define AO_HAVE_fetch_and_add_acquire
321 
322 AO_INLINE AO_t
AO_fetch_and_add_release(volatile AO_t * addr,AO_t incr)323 AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) {
324   AO_lwsync();
325   return AO_fetch_and_add(addr, incr);
326 }
327 #define AO_HAVE_fetch_and_add_release
328 
329 AO_INLINE AO_t
AO_fetch_and_add_full(volatile AO_t * addr,AO_t incr)330 AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) {
331   AO_t result;
332   AO_lwsync();
333   result = AO_fetch_and_add(addr, incr);
334   AO_lwsync();
335   return result;
336 }
337 #define AO_HAVE_fetch_and_add_full
338 #endif /* !AO_PREFER_GENERALIZED */
339 
340 #undef AO_PPC_BR_A
341 #undef AO_PPC_CMPx
342 #undef AO_PPC_L
343 #undef AO_PPC_LD
344 #undef AO_PPC_LOAD_CLOBBER
345 #undef AO_PPC_LxARX
346 #undef AO_PPC_STxCXd
347 
348 #endif /* AO_DISABLE_GCC_ATOMICS */
349