1 /*
2  * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
3  * Copyright (c) 2016, 2019 SAP SE. All rights reserved.
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This code is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 only, as
8  * published by the Free Software Foundation.
9  *
10  * This code is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13  * version 2 for more details (a copy is included in the LICENSE file that
14  * accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License version
17  * 2 along with this work; if not, write to the Free Software Foundation,
18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21  * or visit www.oracle.com if you need additional information or have any
22  * questions.
23  *
24  */
25 
26 #ifndef OS_CPU_LINUX_S390_ATOMIC_LINUX_S390_HPP
27 #define OS_CPU_LINUX_S390_ATOMIC_LINUX_S390_HPP
28 
29 #include "runtime/atomic.hpp"
30 #include "runtime/os.hpp"
31 #include "runtime/vm_version.hpp"
32 
33 // Note that the compare-and-swap instructions on System z perform
34 // a serialization function before the storage operand is fetched
35 // and again after the operation is completed.
36 //
37 // Used constraint modifiers:
38 // = write-only access: Value on entry to inline-assembler code irrelevant.
39 // + read/write access: Value on entry is used; on exit value is changed.
40 //   read-only  access: Value on entry is used and never changed.
41 // & early-clobber access: Might be modified before all read-only operands
42 //                         have been used.
43 // a address register operand (not GR0).
44 // d general register operand (including GR0)
45 // Q memory operand w/o index register.
46 // 0..9 operand reference (by operand position).
47 //      Used for operands that fill multiple roles. One example would be a
48 //      write-only operand receiving its initial value from a read-only operand.
49 //      Refer to cmpxchg(..) operand #0 and variable cmp_val for a real-life example.
50 //
51 
52 // On System z, all store operations are atomic if the address where the data is stored into
53 // is an integer multiple of the data length. Furthermore, all stores are ordered:
54 // a store which occurs conceptually before another store becomes visible to other CPUs
55 // before the other store becomes visible.
56 
57 //------------
58 // Atomic::add
59 //------------
60 // These methods force the value in memory to be augmented by the passed increment.
61 // Both, memory value and increment, are treated as 32bit signed binary integers.
62 // No overflow exceptions are recognized, and the condition code does not hold
63 // information about the value in memory.
64 //
65 // The value in memory is updated by using a compare-and-swap instruction. The
66 // instruction is retried as often as required.
67 //
68 // The return value of the method is the value that was successfully stored. At the
69 // time the caller receives back control, the value in memory may have changed already.
70 
71 // New atomic operations only include specific-operand-serialization, not full
72 // memory barriers. We can use the Fast-BCR-Serialization Facility for them.
z196_fast_sync()73 inline void z196_fast_sync() {
74   __asm__ __volatile__ ("bcr 14, 0" : : : "memory");
75 }
76 
77 template<size_t byte_size>
78 struct Atomic::PlatformAdd
79   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
80 {
81   template<typename I, typename D>
82   D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
83 };
84 
85 template<>
86 template<typename I, typename D>
add_and_fetch(I inc,D volatile * dest,atomic_memory_order order) const87 inline D Atomic::PlatformAdd<4>::add_and_fetch(I inc, D volatile* dest,
88                                                atomic_memory_order order) const {
89   STATIC_ASSERT(4 == sizeof(I));
90   STATIC_ASSERT(4 == sizeof(D));
91 
92   D old, upd;
93 
94   if (VM_Version::has_LoadAndALUAtomicV1()) {
95     if (order == memory_order_conservative) { z196_fast_sync(); }
96     __asm__ __volatile__ (
97       "   LGFR     0,%[inc]                \n\t" // save increment
98       "   LA       3,%[mem]                \n\t" // force data address into ARG2
99 //    "   LAA      %[upd],%[inc],%[mem]    \n\t" // increment and get old value
100 //    "   LAA      2,0,0(3)                \n\t" // actually coded instruction
101       "   .byte    0xeb                    \n\t" // LAA main opcode
102       "   .byte    0x20                    \n\t" // R1,R3
103       "   .byte    0x30                    \n\t" // R2,disp1
104       "   .byte    0x00                    \n\t" // disp2,disp3
105       "   .byte    0x00                    \n\t" // disp4,disp5
106       "   .byte    0xf8                    \n\t" // LAA minor opcode
107       "   AR       2,0                     \n\t" // calc new value in register
108       "   LR       %[upd],2                \n\t" // move to result register
109       //---<  outputs  >---
110       : [upd]  "=&d" (upd)    // write-only, updated counter value
111       , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
112       //---<  inputs  >---
113       : [inc]  "a"   (inc)    // read-only.
114       //---<  clobbered  >---
115       : "cc", "r0", "r2", "r3", "memory"
116     );
117     if (order == memory_order_conservative) { z196_fast_sync(); }
118   } else {
119     __asm__ __volatile__ (
120       "   LLGF     %[old],%[mem]           \n\t" // get old value
121       "0: LA       %[upd],0(%[inc],%[old]) \n\t" // calc result
122       "   CS       %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
123       "   JNE      0b                      \n\t" // no success? -> retry
124       //---<  outputs  >---
125       : [old] "=&a" (old)    // write-only, old counter value
126       , [upd] "=&d" (upd)    // write-only, updated counter value
127       , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
128       //---<  inputs  >---
129       : [inc] "a"   (inc)    // read-only.
130       //---<  clobbered  >---
131       : "cc", "memory"
132     );
133   }
134 
135   return upd;
136 }
137 
138 
139 template<>
140 template<typename I, typename D>
add_and_fetch(I inc,D volatile * dest,atomic_memory_order order) const141 inline D Atomic::PlatformAdd<8>::add_and_fetch(I inc, D volatile* dest,
142                                                atomic_memory_order order) const {
143   STATIC_ASSERT(8 == sizeof(I));
144   STATIC_ASSERT(8 == sizeof(D));
145 
146   D old, upd;
147 
148   if (VM_Version::has_LoadAndALUAtomicV1()) {
149     if (order == memory_order_conservative) { z196_fast_sync(); }
150     __asm__ __volatile__ (
151       "   LGR      0,%[inc]                \n\t" // save increment
152       "   LA       3,%[mem]                \n\t" // force data address into ARG2
153 //    "   LAAG     %[upd],%[inc],%[mem]    \n\t" // increment and get old value
154 //    "   LAAG     2,0,0(3)                \n\t" // actually coded instruction
155       "   .byte    0xeb                    \n\t" // LAA main opcode
156       "   .byte    0x20                    \n\t" // R1,R3
157       "   .byte    0x30                    \n\t" // R2,disp1
158       "   .byte    0x00                    \n\t" // disp2,disp3
159       "   .byte    0x00                    \n\t" // disp4,disp5
160       "   .byte    0xe8                    \n\t" // LAA minor opcode
161       "   AGR      2,0                     \n\t" // calc new value in register
162       "   LGR      %[upd],2                \n\t" // move to result register
163       //---<  outputs  >---
164       : [upd]  "=&d" (upd)    // write-only, updated counter value
165       , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
166       //---<  inputs  >---
167       : [inc]  "a"   (inc)    // read-only.
168       //---<  clobbered  >---
169       : "cc", "r0", "r2", "r3", "memory"
170     );
171     if (order == memory_order_conservative) { z196_fast_sync(); }
172   } else {
173     __asm__ __volatile__ (
174       "   LG       %[old],%[mem]           \n\t" // get old value
175       "0: LA       %[upd],0(%[inc],%[old]) \n\t" // calc result
176       "   CSG      %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
177       "   JNE      0b                      \n\t" // no success? -> retry
178       //---<  outputs  >---
179       : [old] "=&a" (old)    // write-only, old counter value
180       , [upd] "=&d" (upd)    // write-only, updated counter value
181       , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
182       //---<  inputs  >---
183       : [inc] "a"   (inc)    // read-only.
184       //---<  clobbered  >---
185       : "cc", "memory"
186     );
187   }
188 
189   return upd;
190 }
191 
192 
193 //-------------
194 // Atomic::xchg
195 //-------------
196 // These methods force the value in memory to be replaced by the new value passed
197 // in as argument.
198 //
199 // The value in memory is replaced by using a compare-and-swap instruction. The
200 // instruction is retried as often as required. This makes sure that the new
201 // value can be seen, at least for a very short period of time, by other CPUs.
202 //
203 // If we would use a normal "load(old value) store(new value)" sequence,
204 // the new value could be lost unnoticed, due to a store(new value) from
205 // another thread.
206 //
207 // The return value is the (unchanged) value from memory as it was when the
208 // replacement succeeded.
209 template<>
210 template<typename T>
operator ()(T exchange_value,T volatile * dest,atomic_memory_order unused) const211 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
212                                              T volatile* dest,
213                                              atomic_memory_order unused) const {
214   STATIC_ASSERT(4 == sizeof(T));
215   T old;
216 
217   __asm__ __volatile__ (
218     "   LLGF     %[old],%[mem]           \n\t" // get old value
219     "0: CS       %[old],%[upd],%[mem]    \n\t" // try to xchg upd with mem
220     "   JNE      0b                      \n\t" // no success? -> retry
221     //---<  outputs  >---
222     : [old] "=&d" (old)      // write-only, prev value irrelevant
223     , [mem] "+Q"  (*dest)    // read/write, memory to be updated atomically
224     //---<  inputs  >---
225     : [upd] "d"   (exchange_value) // read-only, value to be written to memory
226     //---<  clobbered  >---
227     : "cc", "memory"
228   );
229 
230   return old;
231 }
232 
233 template<>
234 template<typename T>
operator ()(T exchange_value,T volatile * dest,atomic_memory_order unused) const235 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
236                                              T volatile* dest,
237                                              atomic_memory_order unused) const {
238   STATIC_ASSERT(8 == sizeof(T));
239   T old;
240 
241   __asm__ __volatile__ (
242     "   LG       %[old],%[mem]           \n\t" // get old value
243     "0: CSG      %[old],%[upd],%[mem]    \n\t" // try to xchg upd with mem
244     "   JNE      0b                      \n\t" // no success? -> retry
245     //---<  outputs  >---
246     : [old] "=&d" (old)      // write-only, init from memory
247     , [mem] "+Q"  (*dest)    // read/write, memory to be updated atomically
248     //---<  inputs  >---
249     : [upd] "d"   (exchange_value) // read-only, value to be written to memory
250     //---<  clobbered  >---
251     : "cc", "memory"
252   );
253 
254   return old;
255 }
256 
257 //----------------
258 // Atomic::cmpxchg
259 //----------------
260 // These methods compare the value in memory with a given compare value.
261 // If both values compare equal, the value in memory is replaced with
262 // the exchange value.
263 //
264 // The value in memory is compared and replaced by using a compare-and-swap
265 // instruction. The instruction is NOT retried (one shot only).
266 //
267 // The return value is the (unchanged) value from memory as it was when the
268 // compare-and-swap instruction completed. A successful exchange operation
269 // is indicated by (return value == compare_value). If unsuccessful, a new
270 // exchange value can be calculated based on the return value which is the
271 // latest contents of the memory location.
272 //
273 // Inspecting the return value is the only way for the caller to determine
274 // if the compare-and-swap instruction was successful:
275 // - If return value and compare value compare equal, the compare-and-swap
276 //   instruction was successful and the value in memory was replaced by the
277 //   exchange value.
278 // - If return value and compare value compare unequal, the compare-and-swap
279 //   instruction was not successful. The value in memory was left unchanged.
280 //
281 // The s390 processors always fence before and after the csg instructions.
282 // Thus we ignore the memory ordering argument. The docu says: "A serialization
283 // function is performed before the operand is fetched and again after the
284 // operation is completed."
285 
286 // No direct support for cmpxchg of bytes; emulate using int.
287 template<>
288 struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
289 
290 template<>
291 template<typename T>
operator ()(T xchg_val,T volatile * dest,T cmp_val,atomic_memory_order unused) const292 inline T Atomic::PlatformCmpxchg<4>::operator()(T xchg_val,
293                                                 T volatile* dest,
294                                                 T cmp_val,
295                                                 atomic_memory_order unused) const {
296   STATIC_ASSERT(4 == sizeof(T));
297   T old;
298 
299   __asm__ __volatile__ (
300     "   CS       %[old],%[upd],%[mem]    \n\t" // Try to xchg upd with mem.
301     // outputs
302     : [old] "=&d" (old)      // Write-only, prev value irrelevant.
303     , [mem] "+Q"  (*dest)    // Read/write, memory to be updated atomically.
304     // inputs
305     : [upd] "d"   (xchg_val)
306     ,       "0"   (cmp_val)  // Read-only, initial value for [old] (operand #0).
307     // clobbered
308     : "cc", "memory"
309   );
310 
311   return old;
312 }
313 
314 template<>
315 template<typename T>
operator ()(T xchg_val,T volatile * dest,T cmp_val,atomic_memory_order unused) const316 inline T Atomic::PlatformCmpxchg<8>::operator()(T xchg_val,
317                                                 T volatile* dest,
318                                                 T cmp_val,
319                                                 atomic_memory_order unused) const {
320   STATIC_ASSERT(8 == sizeof(T));
321   T old;
322 
323   __asm__ __volatile__ (
324     "   CSG      %[old],%[upd],%[mem]    \n\t" // Try to xchg upd with mem.
325     // outputs
326     : [old] "=&d" (old)      // Write-only, prev value irrelevant.
327     , [mem] "+Q"  (*dest)    // Read/write, memory to be updated atomically.
328     // inputs
329     : [upd] "d"   (xchg_val)
330     ,       "0"   (cmp_val)  // Read-only, initial value for [old] (operand #0).
331     // clobbered
332     : "cc", "memory"
333   );
334 
335   return old;
336 }
337 
338 #endif // OS_CPU_LINUX_S390_ATOMIC_LINUX_S390_HPP
339