1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// RISC-V's atomic operations have two bits, aq ("acquire") and rl ("release"),
6// which may be toggled on and off. Their precise semantics are defined in
7// section 6.3 of the specification, but the basic idea is as follows:
8//
9//   - If neither aq nor rl is set, the CPU may reorder the atomic arbitrarily.
10//     It guarantees only that it will execute atomically.
11//
12//   - If aq is set, the CPU may move the instruction backward, but not forward.
13//
14//   - If rl is set, the CPU may move the instruction forward, but not backward.
15//
16//   - If both are set, the CPU may not reorder the instruction at all.
17//
18// These four modes correspond to other well-known memory models on other CPUs.
19// On ARM, aq corresponds to a dmb ishst, aq+rl corresponds to a dmb ish. On
20// Intel, aq corresponds to an lfence, rl to an sfence, and aq+rl to an mfence
21// (or a lock prefix).
22//
23// Go's memory model requires that
24//   - if a read happens after a write, the read must observe the write, and
25//     that
26//   - if a read happens concurrently with a write, the read may observe the
27//     write.
28// aq is sufficient to guarantee this, so that's what we use here. (This jibes
29// with ARM, which uses dmb ishst.)
30
31#include "textflag.h"
32
33#define AMOWSC(op,rd,rs1,rs2) WORD $0x0600202f+rd<<7+rs1<<15+rs2<<20+op<<27
34#define AMODSC(op,rd,rs1,rs2) WORD $0x0600302f+rd<<7+rs1<<15+rs2<<20+op<<27
35#define ADD_ 0
36#define SWAP_ 1
37#define LR_ 2
38#define SC_ 3
39#define OR_ 8
40#define AND_ 12
41#define FENCE WORD $0x0ff0000f
42
43// Atomically:
44//      if(*val == *old){
45//              *val = new;
46//              return 1;
47//      } else {
48//              return 0;
49//      }
50
51TEXT ·Cas(SB), NOSPLIT, $0-17
52	MOV	ptr+0(FP), A0
53	MOVW	old+8(FP), A1
54	MOVW	new+12(FP), A2
55cas_again:
56	AMOWSC(LR_,13,10,0)	// lr.w.aq a3,(a0)
57	BNE	A3, A1, cas_fail
58	AMOWSC(SC_,14,10,12)	// sc.w.aq a4,a2,(a0)
59	BNE	A4, ZERO, cas_again
60	MOV	$1, A0
61	MOVB	A0, ret+16(FP)
62	RET
63cas_fail:
64	MOV	$0, A0
65	MOV	A0, ret+16(FP)
66	RET
67
68// func Cas64(ptr *uint64, old, new uint64) bool
69TEXT ·Cas64(SB), NOSPLIT, $0-25
70	MOV	ptr+0(FP), A0
71	MOV	old+8(FP), A1
72	MOV	new+16(FP), A2
73cas_again:
74	AMODSC(LR_,13,10,0)	// lr.d.aq a3,(a0)
75	BNE	A3, A1, cas_fail
76	AMODSC(SC_,14,10,12)	// sc.d.aq a4,a2,(a0)
77	BNE	A4, ZERO, cas_again
78	MOV	$1, A0
79	MOVB	A0, ret+24(FP)
80	RET
81cas_fail:
82	MOVB	ZERO, ret+24(FP)
83	RET
84
85// func Load(ptr *uint32) uint32
86TEXT ·Load(SB),NOSPLIT|NOFRAME,$0-12
87	MOV	ptr+0(FP), A0
88	AMOWSC(LR_,10,10,0)
89	MOVW	A0, ret+8(FP)
90	RET
91
92// func Load8(ptr *uint8) uint8
93TEXT ·Load8(SB),NOSPLIT|NOFRAME,$0-9
94	MOV	ptr+0(FP), A0
95	FENCE
96	MOVBU	(A0), A1
97	FENCE
98	MOVB	A1, ret+8(FP)
99	RET
100
101// func Load64(ptr *uint64) uint64
102TEXT ·Load64(SB),NOSPLIT|NOFRAME,$0-16
103	MOV	ptr+0(FP), A0
104	AMODSC(LR_,10,10,0)
105	MOV	A0, ret+8(FP)
106	RET
107
108// func Store(ptr *uint32, val uint32)
109TEXT ·Store(SB), NOSPLIT, $0-12
110	MOV	ptr+0(FP), A0
111	MOVW	val+8(FP), A1
112	AMOWSC(SWAP_,0,10,11)
113	RET
114
115// func Store8(ptr *uint8, val uint8)
116TEXT ·Store8(SB), NOSPLIT, $0-9
117	MOV	ptr+0(FP), A0
118	MOVBU	val+8(FP), A1
119	FENCE
120	MOVB	A1, (A0)
121	FENCE
122	RET
123
124// func Store64(ptr *uint64, val uint64)
125TEXT ·Store64(SB), NOSPLIT, $0-16
126	MOV	ptr+0(FP), A0
127	MOV	val+8(FP), A1
128	AMODSC(SWAP_,0,10,11)
129	RET
130
131TEXT ·Casp1(SB), NOSPLIT, $0-25
132	JMP	·Cas64(SB)
133
134TEXT ·Casuintptr(SB),NOSPLIT,$0-25
135	JMP	·Cas64(SB)
136
137TEXT ·CasRel(SB), NOSPLIT, $0-17
138	JMP	·Cas(SB)
139
140TEXT ·Loaduintptr(SB),NOSPLIT,$0-16
141	JMP	·Load64(SB)
142
143TEXT ·Storeuintptr(SB),NOSPLIT,$0-16
144	JMP	·Store64(SB)
145
146TEXT ·Loaduint(SB),NOSPLIT,$0-16
147	JMP ·Loaduintptr(SB)
148
149TEXT ·Loadint64(SB),NOSPLIT,$0-16
150	JMP ·Loaduintptr(SB)
151
152TEXT ·Xaddint64(SB),NOSPLIT,$0-24
153	MOV	ptr+0(FP), A0
154	MOV	delta+8(FP), A1
155	WORD $0x04b5352f	// amoadd.d.aq a0,a1,(a0)
156	ADD	A0, A1, A0
157	MOVW	A0, ret+16(FP)
158	RET
159
160TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
161	JMP	·Load(SB)
162
163// func Loadp(ptr unsafe.Pointer) unsafe.Pointer
164TEXT ·Loadp(SB),NOSPLIT,$0-16
165	JMP	·Load64(SB)
166
167// func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
168TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
169	JMP	·Store64(SB)
170
171TEXT ·StoreRel(SB), NOSPLIT, $0-12
172	JMP	·Store(SB)
173
174// func Xchg(ptr *uint32, new uint32) uint32
175TEXT ·Xchg(SB), NOSPLIT, $0-20
176	MOV	ptr+0(FP), A0
177	MOVW	new+8(FP), A1
178	AMOWSC(SWAP_,11,10,11)
179	MOVW	A1, ret+16(FP)
180	RET
181
182// func Xchg64(ptr *uint64, new uint64) uint64
183TEXT ·Xchg64(SB), NOSPLIT, $0-24
184	MOV	ptr+0(FP), A0
185	MOV	new+8(FP), A1
186	AMODSC(SWAP_,11,10,11)
187	MOV	A1, ret+16(FP)
188	RET
189
190// Atomically:
191//      *val += delta;
192//      return *val;
193
194// func Xadd(ptr *uint32, delta int32) uint32
195TEXT ·Xadd(SB), NOSPLIT, $0-20
196	MOV	ptr+0(FP), A0
197	MOVW	delta+8(FP), A1
198	AMOWSC(ADD_,12,10,11)
199	ADD	A2,A1,A0
200	MOVW	A0, ret+16(FP)
201	RET
202
203// func Xadd64(ptr *uint64, delta int64) uint64
204TEXT ·Xadd64(SB), NOSPLIT, $0-24
205	MOV	ptr+0(FP), A0
206	MOV	delta+8(FP), A1
207	AMODSC(ADD_,12,10,11)
208	ADD	A2,A1,A0
209	MOV	A0, ret+16(FP)
210	RET
211
212// func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
213TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
214	JMP	·Xadd64(SB)
215
216// func Xchguintptr(ptr *uintptr, new uintptr) uintptr
217TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
218	JMP	·Xchg64(SB)
219
220// func And8(ptr *uint8, val uint8)
221TEXT ·And8(SB), NOSPLIT, $0-9
222	MOV	ptr+0(FP), A0
223	MOVBU	val+8(FP), A1
224	AND	$3, A0, A2
225	AND	$-4, A0
226	SLL	$3, A2
227	XOR	$255, A1
228	SLL	A2, A1
229	XOR	$-1, A1
230	AMOWSC(AND_,0,10,11)
231	RET
232
233// func Or8(ptr *uint8, val uint8)
234TEXT ·Or8(SB), NOSPLIT, $0-9
235	MOV	ptr+0(FP), A0
236	MOVBU	val+8(FP), A1
237	AND	$3, A0, A2
238	AND	$-4, A0
239	SLL	$3, A2
240	SLL	A2, A1
241	AMOWSC(OR_,0,10,11)
242	RET
243