1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build ppc64 || ppc64le
6
7#include "textflag.h"
8
9// For more details about how various memory models are
10// enforced on POWER, the following paper provides more
11// details about how they enforce C/C++ like models. This
12// gives context about why the strange looking code
13// sequences below work.
14//
15// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
16
17// uint32 ·Load(uint32 volatile* ptr)
18TEXT ·Load(SB),NOSPLIT|NOFRAME,$-8-12
19	MOVD	ptr+0(FP), R3
20	SYNC
21	MOVWZ	0(R3), R3
22	CMPW	R3, R3, CR7
23	BC	4, 30, 1(PC) // bne- cr7,0x4
24	ISYNC
25	MOVW	R3, ret+8(FP)
26	RET
27
28// uint8 ·Load8(uint8 volatile* ptr)
29TEXT ·Load8(SB),NOSPLIT|NOFRAME,$-8-9
30	MOVD	ptr+0(FP), R3
31	SYNC
32	MOVBZ	0(R3), R3
33	CMP	R3, R3, CR7
34	BC	4, 30, 1(PC) // bne- cr7,0x4
35	ISYNC
36	MOVB	R3, ret+8(FP)
37	RET
38
39// uint64 ·Load64(uint64 volatile* ptr)
40TEXT ·Load64(SB),NOSPLIT|NOFRAME,$-8-16
41	MOVD	ptr+0(FP), R3
42	SYNC
43	MOVD	0(R3), R3
44	CMP	R3, R3, CR7
45	BC	4, 30, 1(PC) // bne- cr7,0x4
46	ISYNC
47	MOVD	R3, ret+8(FP)
48	RET
49
50// void *·Loadp(void *volatile *ptr)
51TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$-8-16
52	MOVD	ptr+0(FP), R3
53	SYNC
54	MOVD	0(R3), R3
55	CMP	R3, R3, CR7
56	BC	4, 30, 1(PC) // bne- cr7,0x4
57	ISYNC
58	MOVD	R3, ret+8(FP)
59	RET
60
61// uint32 ·LoadAcq(uint32 volatile* ptr)
62TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12
63	MOVD   ptr+0(FP), R3
64	MOVWZ  0(R3), R3
65	CMPW   R3, R3, CR7
66	BC     4, 30, 1(PC) // bne- cr7, 0x4
67	ISYNC
68	MOVW   R3, ret+8(FP)
69	RET
70
71// uint64 ·LoadAcq64(uint64 volatile* ptr)
72TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16
73	MOVD   ptr+0(FP), R3
74	MOVD   0(R3), R3
75	CMP    R3, R3, CR7
76	BC     4, 30, 1(PC) // bne- cr7, 0x4
77	ISYNC
78	MOVD   R3, ret+8(FP)
79	RET
80
81// bool cas(uint32 *ptr, uint32 old, uint32 new)
82// Atomically:
83//	if(*val == old){
84//		*val = new;
85//		return 1;
86//	} else
87//		return 0;
88TEXT ·Cas(SB), NOSPLIT, $0-17
89	MOVD	ptr+0(FP), R3
90	MOVWZ	old+8(FP), R4
91	MOVWZ	new+12(FP), R5
92	LWSYNC
93cas_again:
94	LWAR	(R3), R6
95	CMPW	R6, R4
96	BNE	cas_fail
97	STWCCC	R5, (R3)
98	BNE	cas_again
99	MOVD	$1, R3
100	LWSYNC
101	MOVB	R3, ret+16(FP)
102	RET
103cas_fail:
104	MOVB	R0, ret+16(FP)
105	RET
106
107// bool	·Cas64(uint64 *ptr, uint64 old, uint64 new)
108// Atomically:
109//	if(*val == old){
110//		*val = new;
111//		return 1;
112//	} else {
113//		return 0;
114//	}
115TEXT ·Cas64(SB), NOSPLIT, $0-25
116	MOVD	ptr+0(FP), R3
117	MOVD	old+8(FP), R4
118	MOVD	new+16(FP), R5
119	LWSYNC
120cas64_again:
121	LDAR	(R3), R6
122	CMP	R6, R4
123	BNE	cas64_fail
124	STDCCC	R5, (R3)
125	BNE	cas64_again
126	MOVD	$1, R3
127	LWSYNC
128	MOVB	R3, ret+24(FP)
129	RET
130cas64_fail:
131	MOVB	R0, ret+24(FP)
132	RET
133
134TEXT ·CasRel(SB), NOSPLIT, $0-17
135	MOVD    ptr+0(FP), R3
136	MOVWZ   old+8(FP), R4
137	MOVWZ   new+12(FP), R5
138	LWSYNC
139cas_again:
140	LWAR    (R3), $0, R6        // 0 = Mutex release hint
141	CMPW    R6, R4
142	BNE     cas_fail
143	STWCCC  R5, (R3)
144	BNE     cas_again
145	MOVD    $1, R3
146	MOVB    R3, ret+16(FP)
147	RET
148cas_fail:
149	MOVB    R0, ret+16(FP)
150	RET
151
152TEXT ·Casint32(SB), NOSPLIT, $0-17
153	BR	·Cas(SB)
154
155TEXT ·Casint64(SB), NOSPLIT, $0-25
156	BR	·Cas64(SB)
157
158TEXT ·Casuintptr(SB), NOSPLIT, $0-25
159	BR	·Cas64(SB)
160
161TEXT ·Loaduintptr(SB),  NOSPLIT|NOFRAME, $0-16
162	BR	·Load64(SB)
163
164TEXT ·LoadAcquintptr(SB),  NOSPLIT|NOFRAME, $0-16
165	BR	·LoadAcq64(SB)
166
167TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
168	BR	·Load64(SB)
169
170TEXT ·Storeint32(SB), NOSPLIT, $0-12
171	BR	·Store(SB)
172
173TEXT ·Storeint64(SB), NOSPLIT, $0-16
174	BR	·Store64(SB)
175
176TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
177	BR	·Store64(SB)
178
179TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
180	BR	·StoreRel64(SB)
181
182TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
183	BR	·Xadd64(SB)
184
185TEXT ·Loadint32(SB), NOSPLIT, $0-12
186	BR	·Load(SB)
187
188TEXT ·Loadint64(SB), NOSPLIT, $0-16
189	BR	·Load64(SB)
190
191TEXT ·Xaddint32(SB), NOSPLIT, $0-20
192	BR	·Xadd(SB)
193
194TEXT ·Xaddint64(SB), NOSPLIT, $0-24
195	BR	·Xadd64(SB)
196
197// bool casp(void **val, void *old, void *new)
198// Atomically:
199//	if(*val == old){
200//		*val = new;
201//		return 1;
202//	} else
203//		return 0;
204TEXT ·Casp1(SB), NOSPLIT, $0-25
205	BR ·Cas64(SB)
206
207// uint32 xadd(uint32 volatile *ptr, int32 delta)
208// Atomically:
209//	*val += delta;
210//	return *val;
211TEXT ·Xadd(SB), NOSPLIT, $0-20
212	MOVD	ptr+0(FP), R4
213	MOVW	delta+8(FP), R5
214	LWSYNC
215	LWAR	(R4), R3
216	ADD	R5, R3
217	STWCCC	R3, (R4)
218	BNE	-3(PC)
219	MOVW	R3, ret+16(FP)
220	RET
221
222// uint64 Xadd64(uint64 volatile *val, int64 delta)
223// Atomically:
224//	*val += delta;
225//	return *val;
226TEXT ·Xadd64(SB), NOSPLIT, $0-24
227	MOVD	ptr+0(FP), R4
228	MOVD	delta+8(FP), R5
229	LWSYNC
230	LDAR	(R4), R3
231	ADD	R5, R3
232	STDCCC	R3, (R4)
233	BNE	-3(PC)
234	MOVD	R3, ret+16(FP)
235	RET
236
237// uint32 Xchg(ptr *uint32, new uint32)
238// Atomically:
239//	old := *ptr;
240//	*ptr = new;
241//	return old;
242TEXT ·Xchg(SB), NOSPLIT, $0-20
243	MOVD	ptr+0(FP), R4
244	MOVW	new+8(FP), R5
245	LWSYNC
246	LWAR	(R4), R3
247	STWCCC	R5, (R4)
248	BNE	-2(PC)
249	ISYNC
250	MOVW	R3, ret+16(FP)
251	RET
252
253// uint64 Xchg64(ptr *uint64, new uint64)
254// Atomically:
255//	old := *ptr;
256//	*ptr = new;
257//	return old;
258TEXT ·Xchg64(SB), NOSPLIT, $0-24
259	MOVD	ptr+0(FP), R4
260	MOVD	new+8(FP), R5
261	LWSYNC
262	LDAR	(R4), R3
263	STDCCC	R5, (R4)
264	BNE	-2(PC)
265	ISYNC
266	MOVD	R3, ret+16(FP)
267	RET
268
269TEXT ·Xchgint32(SB), NOSPLIT, $0-20
270	BR	·Xchg(SB)
271
272TEXT ·Xchgint64(SB), NOSPLIT, $0-24
273	BR	·Xchg64(SB)
274
275TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
276	BR	·Xchg64(SB)
277
278TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
279	BR	·Store64(SB)
280
281TEXT ·Store(SB), NOSPLIT, $0-12
282	MOVD	ptr+0(FP), R3
283	MOVW	val+8(FP), R4
284	SYNC
285	MOVW	R4, 0(R3)
286	RET
287
288TEXT ·Store8(SB), NOSPLIT, $0-9
289	MOVD	ptr+0(FP), R3
290	MOVB	val+8(FP), R4
291	SYNC
292	MOVB	R4, 0(R3)
293	RET
294
295TEXT ·Store64(SB), NOSPLIT, $0-16
296	MOVD	ptr+0(FP), R3
297	MOVD	val+8(FP), R4
298	SYNC
299	MOVD	R4, 0(R3)
300	RET
301
302TEXT ·StoreRel(SB), NOSPLIT, $0-12
303	MOVD	ptr+0(FP), R3
304	MOVW	val+8(FP), R4
305	LWSYNC
306	MOVW	R4, 0(R3)
307	RET
308
309TEXT ·StoreRel64(SB), NOSPLIT, $0-16
310	MOVD	ptr+0(FP), R3
311	MOVD	val+8(FP), R4
312	LWSYNC
313	MOVD	R4, 0(R3)
314	RET
315
316// void ·Or8(byte volatile*, byte);
317TEXT ·Or8(SB), NOSPLIT, $0-9
318	MOVD	ptr+0(FP), R3
319	MOVBZ	val+8(FP), R4
320	LWSYNC
321again:
322	LBAR	(R3), R6
323	OR	R4, R6
324	STBCCC	R6, (R3)
325	BNE	again
326	RET
327
328// void ·And8(byte volatile*, byte);
329TEXT ·And8(SB), NOSPLIT, $0-9
330	MOVD	ptr+0(FP), R3
331	MOVBZ	val+8(FP), R4
332	LWSYNC
333again:
334	LBAR	(R3), R6
335	AND	R4, R6
336	STBCCC	R6, (R3)
337	BNE	again
338	RET
339
340// func Or(addr *uint32, v uint32)
341TEXT ·Or(SB), NOSPLIT, $0-12
342	MOVD	ptr+0(FP), R3
343	MOVW	val+8(FP), R4
344	LWSYNC
345again:
346	LWAR	(R3), R6
347	OR	R4, R6
348	STWCCC	R6, (R3)
349	BNE	again
350	RET
351
352// func And(addr *uint32, v uint32)
353TEXT ·And(SB), NOSPLIT, $0-12
354	MOVD	ptr+0(FP), R3
355	MOVW	val+8(FP), R4
356	LWSYNC
357again:
358	LWAR	(R3),R6
359	AND	R4, R6
360	STWCCC	R6, (R3)
361	BNE	again
362	RET
363