1 /*
2 * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2016, 2019 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #ifndef OS_CPU_LINUX_S390_ATOMIC_LINUX_S390_HPP
27 #define OS_CPU_LINUX_S390_ATOMIC_LINUX_S390_HPP
28
29 #include "runtime/atomic.hpp"
30 #include "runtime/os.hpp"
31 #include "runtime/vm_version.hpp"
32
33 // Note that the compare-and-swap instructions on System z perform
34 // a serialization function before the storage operand is fetched
35 // and again after the operation is completed.
36 //
37 // Used constraint modifiers:
38 // = write-only access: Value on entry to inline-assembler code irrelevant.
39 // + read/write access: Value on entry is used; on exit value is changed.
40 // read-only access: Value on entry is used and never changed.
41 // & early-clobber access: Might be modified before all read-only operands
42 // have been used.
43 // a address register operand (not GR0).
44 // d general register operand (including GR0)
45 // Q memory operand w/o index register.
46 // 0..9 operand reference (by operand position).
47 // Used for operands that fill multiple roles. One example would be a
48 // write-only operand receiving its initial value from a read-only operand.
49 // Refer to cmpxchg(..) operand #0 and variable cmp_val for a real-life example.
50 //
51
52 // On System z, all store operations are atomic if the address where the data is stored into
53 // is an integer multiple of the data length. Furthermore, all stores are ordered:
54 // a store which occurs conceptually before another store becomes visible to other CPUs
55 // before the other store becomes visible.
56
57 //------------
58 // Atomic::add
59 //------------
60 // These methods force the value in memory to be augmented by the passed increment.
61 // Both, memory value and increment, are treated as 32bit signed binary integers.
62 // No overflow exceptions are recognized, and the condition code does not hold
63 // information about the value in memory.
64 //
65 // The value in memory is updated by using a compare-and-swap instruction. The
66 // instruction is retried as often as required.
67 //
68 // The return value of the method is the value that was successfully stored. At the
69 // time the caller receives back control, the value in memory may have changed already.
70
71 // New atomic operations only include specific-operand-serialization, not full
72 // memory barriers. We can use the Fast-BCR-Serialization Facility for them.
z196_fast_sync()73 inline void z196_fast_sync() {
74 __asm__ __volatile__ ("bcr 14, 0" : : : "memory");
75 }
76
77 template<size_t byte_size>
78 struct Atomic::PlatformAdd
79 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
80 {
81 template<typename I, typename D>
82 D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
83 };
84
85 template<>
86 template<typename I, typename D>
add_and_fetch(I inc,D volatile * dest,atomic_memory_order order) const87 inline D Atomic::PlatformAdd<4>::add_and_fetch(I inc, D volatile* dest,
88 atomic_memory_order order) const {
89 STATIC_ASSERT(4 == sizeof(I));
90 STATIC_ASSERT(4 == sizeof(D));
91
92 D old, upd;
93
94 if (VM_Version::has_LoadAndALUAtomicV1()) {
95 if (order == memory_order_conservative) { z196_fast_sync(); }
96 __asm__ __volatile__ (
97 " LGFR 0,%[inc] \n\t" // save increment
98 " LA 3,%[mem] \n\t" // force data address into ARG2
99 // " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value
100 // " LAA 2,0,0(3) \n\t" // actually coded instruction
101 " .byte 0xeb \n\t" // LAA main opcode
102 " .byte 0x20 \n\t" // R1,R3
103 " .byte 0x30 \n\t" // R2,disp1
104 " .byte 0x00 \n\t" // disp2,disp3
105 " .byte 0x00 \n\t" // disp4,disp5
106 " .byte 0xf8 \n\t" // LAA minor opcode
107 " AR 2,0 \n\t" // calc new value in register
108 " LR %[upd],2 \n\t" // move to result register
109 //---< outputs >---
110 : [upd] "=&d" (upd) // write-only, updated counter value
111 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
112 //---< inputs >---
113 : [inc] "a" (inc) // read-only.
114 //---< clobbered >---
115 : "cc", "r0", "r2", "r3", "memory"
116 );
117 if (order == memory_order_conservative) { z196_fast_sync(); }
118 } else {
119 __asm__ __volatile__ (
120 " LLGF %[old],%[mem] \n\t" // get old value
121 "0: LA %[upd],0(%[inc],%[old]) \n\t" // calc result
122 " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
123 " JNE 0b \n\t" // no success? -> retry
124 //---< outputs >---
125 : [old] "=&a" (old) // write-only, old counter value
126 , [upd] "=&d" (upd) // write-only, updated counter value
127 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
128 //---< inputs >---
129 : [inc] "a" (inc) // read-only.
130 //---< clobbered >---
131 : "cc", "memory"
132 );
133 }
134
135 return upd;
136 }
137
138
139 template<>
140 template<typename I, typename D>
add_and_fetch(I inc,D volatile * dest,atomic_memory_order order) const141 inline D Atomic::PlatformAdd<8>::add_and_fetch(I inc, D volatile* dest,
142 atomic_memory_order order) const {
143 STATIC_ASSERT(8 == sizeof(I));
144 STATIC_ASSERT(8 == sizeof(D));
145
146 D old, upd;
147
148 if (VM_Version::has_LoadAndALUAtomicV1()) {
149 if (order == memory_order_conservative) { z196_fast_sync(); }
150 __asm__ __volatile__ (
151 " LGR 0,%[inc] \n\t" // save increment
152 " LA 3,%[mem] \n\t" // force data address into ARG2
153 // " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value
154 // " LAAG 2,0,0(3) \n\t" // actually coded instruction
155 " .byte 0xeb \n\t" // LAA main opcode
156 " .byte 0x20 \n\t" // R1,R3
157 " .byte 0x30 \n\t" // R2,disp1
158 " .byte 0x00 \n\t" // disp2,disp3
159 " .byte 0x00 \n\t" // disp4,disp5
160 " .byte 0xe8 \n\t" // LAA minor opcode
161 " AGR 2,0 \n\t" // calc new value in register
162 " LGR %[upd],2 \n\t" // move to result register
163 //---< outputs >---
164 : [upd] "=&d" (upd) // write-only, updated counter value
165 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
166 //---< inputs >---
167 : [inc] "a" (inc) // read-only.
168 //---< clobbered >---
169 : "cc", "r0", "r2", "r3", "memory"
170 );
171 if (order == memory_order_conservative) { z196_fast_sync(); }
172 } else {
173 __asm__ __volatile__ (
174 " LG %[old],%[mem] \n\t" // get old value
175 "0: LA %[upd],0(%[inc],%[old]) \n\t" // calc result
176 " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
177 " JNE 0b \n\t" // no success? -> retry
178 //---< outputs >---
179 : [old] "=&a" (old) // write-only, old counter value
180 , [upd] "=&d" (upd) // write-only, updated counter value
181 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
182 //---< inputs >---
183 : [inc] "a" (inc) // read-only.
184 //---< clobbered >---
185 : "cc", "memory"
186 );
187 }
188
189 return upd;
190 }
191
192
193 //-------------
194 // Atomic::xchg
195 //-------------
196 // These methods force the value in memory to be replaced by the new value passed
197 // in as argument.
198 //
199 // The value in memory is replaced by using a compare-and-swap instruction. The
200 // instruction is retried as often as required. This makes sure that the new
201 // value can be seen, at least for a very short period of time, by other CPUs.
202 //
203 // If we would use a normal "load(old value) store(new value)" sequence,
204 // the new value could be lost unnoticed, due to a store(new value) from
205 // another thread.
206 //
207 // The return value is the (unchanged) value from memory as it was when the
208 // replacement succeeded.
209 template<>
210 template<typename T>
operator ()(T exchange_value,T volatile * dest,atomic_memory_order unused) const211 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
212 T volatile* dest,
213 atomic_memory_order unused) const {
214 STATIC_ASSERT(4 == sizeof(T));
215 T old;
216
217 __asm__ __volatile__ (
218 " LLGF %[old],%[mem] \n\t" // get old value
219 "0: CS %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem
220 " JNE 0b \n\t" // no success? -> retry
221 //---< outputs >---
222 : [old] "=&d" (old) // write-only, prev value irrelevant
223 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
224 //---< inputs >---
225 : [upd] "d" (exchange_value) // read-only, value to be written to memory
226 //---< clobbered >---
227 : "cc", "memory"
228 );
229
230 return old;
231 }
232
233 template<>
234 template<typename T>
operator ()(T exchange_value,T volatile * dest,atomic_memory_order unused) const235 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
236 T volatile* dest,
237 atomic_memory_order unused) const {
238 STATIC_ASSERT(8 == sizeof(T));
239 T old;
240
241 __asm__ __volatile__ (
242 " LG %[old],%[mem] \n\t" // get old value
243 "0: CSG %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem
244 " JNE 0b \n\t" // no success? -> retry
245 //---< outputs >---
246 : [old] "=&d" (old) // write-only, init from memory
247 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
248 //---< inputs >---
249 : [upd] "d" (exchange_value) // read-only, value to be written to memory
250 //---< clobbered >---
251 : "cc", "memory"
252 );
253
254 return old;
255 }
256
257 //----------------
258 // Atomic::cmpxchg
259 //----------------
260 // These methods compare the value in memory with a given compare value.
261 // If both values compare equal, the value in memory is replaced with
262 // the exchange value.
263 //
264 // The value in memory is compared and replaced by using a compare-and-swap
265 // instruction. The instruction is NOT retried (one shot only).
266 //
267 // The return value is the (unchanged) value from memory as it was when the
268 // compare-and-swap instruction completed. A successful exchange operation
269 // is indicated by (return value == compare_value). If unsuccessful, a new
270 // exchange value can be calculated based on the return value which is the
271 // latest contents of the memory location.
272 //
273 // Inspecting the return value is the only way for the caller to determine
274 // if the compare-and-swap instruction was successful:
275 // - If return value and compare value compare equal, the compare-and-swap
276 // instruction was successful and the value in memory was replaced by the
277 // exchange value.
278 // - If return value and compare value compare unequal, the compare-and-swap
279 // instruction was not successful. The value in memory was left unchanged.
280 //
281 // The s390 processors always fence before and after the csg instructions.
282 // Thus we ignore the memory ordering argument. The docu says: "A serialization
283 // function is performed before the operand is fetched and again after the
284 // operation is completed."
285
286 // No direct support for cmpxchg of bytes; emulate using int.
287 template<>
288 struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
289
290 template<>
291 template<typename T>
operator ()(T xchg_val,T volatile * dest,T cmp_val,atomic_memory_order unused) const292 inline T Atomic::PlatformCmpxchg<4>::operator()(T xchg_val,
293 T volatile* dest,
294 T cmp_val,
295 atomic_memory_order unused) const {
296 STATIC_ASSERT(4 == sizeof(T));
297 T old;
298
299 __asm__ __volatile__ (
300 " CS %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem.
301 // outputs
302 : [old] "=&d" (old) // Write-only, prev value irrelevant.
303 , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically.
304 // inputs
305 : [upd] "d" (xchg_val)
306 , "0" (cmp_val) // Read-only, initial value for [old] (operand #0).
307 // clobbered
308 : "cc", "memory"
309 );
310
311 return old;
312 }
313
314 template<>
315 template<typename T>
operator ()(T xchg_val,T volatile * dest,T cmp_val,atomic_memory_order unused) const316 inline T Atomic::PlatformCmpxchg<8>::operator()(T xchg_val,
317 T volatile* dest,
318 T cmp_val,
319 atomic_memory_order unused) const {
320 STATIC_ASSERT(8 == sizeof(T));
321 T old;
322
323 __asm__ __volatile__ (
324 " CSG %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem.
325 // outputs
326 : [old] "=&d" (old) // Write-only, prev value irrelevant.
327 , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically.
328 // inputs
329 : [upd] "d" (xchg_val)
330 , "0" (cmp_val) // Read-only, initial value for [old] (operand #0).
331 // clobbered
332 : "cc", "memory"
333 );
334
335 return old;
336 }
337
338 #endif // OS_CPU_LINUX_S390_ATOMIC_LINUX_S390_HPP
339