1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build race
6
7#include "go_asm.h"
8#include "funcdata.h"
9#include "textflag.h"
10#include "tls_arm64.h"
11
12// The following thunks allow calling the gcc-compiled race runtime directly
13// from Go code without going all the way through cgo.
14// First, it's much faster (up to 50% speedup for real Go programs).
15// Second, it eliminates race-related special cases from cgocall and scheduler.
16// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
17
18// A brief recap of the arm64 calling convention.
19// Arguments are passed in R0...R7, the rest is on stack.
20// Callee-saved registers are: R19...R28.
21// Temporary registers are: R9...R15
22// SP must be 16-byte aligned.
23
24// When calling racecalladdr, R9 is the call target address.
25
26// The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
27
28#define load_g \
29	MRS_TPIDR_R0 \
30	MOVD    runtime·tls_g(SB), R11 \
31	ADD     R11, R0 \
32	MOVD    0(R0), g
33
34// func runtime·raceread(addr uintptr)
35// Called from instrumented code.
36TEXT	runtime·raceread(SB), NOSPLIT, $0-8
37	MOVD	addr+0(FP), R1
38	MOVD	LR, R2
39	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
40	MOVD	$__tsan_read(SB), R9
41	JMP	racecalladdr<>(SB)
42
43// func runtime·RaceRead(addr uintptr)
44TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
45	// This needs to be a tail call, because raceread reads caller pc.
46	JMP	runtime·raceread(SB)
47
48// func runtime·racereadpc(void *addr, void *callpc, void *pc)
49TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
50	MOVD	addr+0(FP), R1
51	MOVD	callpc+8(FP), R2
52	MOVD	pc+16(FP), R3
53	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
54	MOVD	$__tsan_read_pc(SB), R9
55	JMP	racecalladdr<>(SB)
56
57// func runtime·racewrite(addr uintptr)
58// Called from instrumented code.
59TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
60	MOVD	addr+0(FP), R1
61	MOVD	LR, R2
62	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
63	MOVD	$__tsan_write(SB), R9
64	JMP	racecalladdr<>(SB)
65
66// func runtime·RaceWrite(addr uintptr)
67TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
68	// This needs to be a tail call, because racewrite reads caller pc.
69	JMP	runtime·racewrite(SB)
70
71// func runtime·racewritepc(void *addr, void *callpc, void *pc)
72TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
73	MOVD	addr+0(FP), R1
74	MOVD	callpc+8(FP), R2
75	MOVD	pc+16(FP), R3
76	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
77	MOVD	$__tsan_write_pc(SB), R9
78	JMP	racecalladdr<>(SB)
79
80// func runtime·racereadrange(addr, size uintptr)
81// Called from instrumented code.
82TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
83	MOVD	addr+0(FP), R1
84	MOVD	size+8(FP), R2
85	MOVD	LR, R3
86	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
87	MOVD	$__tsan_read_range(SB), R9
88	JMP	racecalladdr<>(SB)
89
90// func runtime·RaceReadRange(addr, size uintptr)
91TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
92	// This needs to be a tail call, because racereadrange reads caller pc.
93	JMP	runtime·racereadrange(SB)
94
95// func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
96TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
97	MOVD	addr+0(FP), R1
98	MOVD	size+8(FP), R2
99	MOVD	pc+16(FP), R3
100	ADD	$4, R3	// pc is function start, tsan wants return address.
101	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
102	MOVD	$__tsan_read_range(SB), R9
103	JMP	racecalladdr<>(SB)
104
105// func runtime·racewriterange(addr, size uintptr)
106// Called from instrumented code.
107TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
108	MOVD	addr+0(FP), R1
109	MOVD	size+8(FP), R2
110	MOVD	LR, R3
111	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
112	MOVD	$__tsan_write_range(SB), R9
113	JMP	racecalladdr<>(SB)
114
115// func runtime·RaceWriteRange(addr, size uintptr)
116TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
117	// This needs to be a tail call, because racewriterange reads caller pc.
118	JMP	runtime·racewriterange(SB)
119
120// func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
121TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
122	MOVD	addr+0(FP), R1
123	MOVD	size+8(FP), R2
124	MOVD	pc+16(FP), R3
125	ADD	$4, R3	// pc is function start, tsan wants return address.
126	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
127	MOVD	$__tsan_write_range(SB), R9
128	JMP	racecalladdr<>(SB)
129
130// If addr (R1) is out of range, do nothing.
131// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
132TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
133	load_g
134	MOVD	g_racectx(g), R0
135	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
136	MOVD	runtime·racearenastart(SB), R10
137	CMP	R10, R1
138	BLT	data
139	MOVD	runtime·racearenaend(SB), R10
140	CMP	R10, R1
141	BLT	call
142data:
143	MOVD	runtime·racedatastart(SB), R10
144	CMP	R10, R1
145	BLT	ret
146	MOVD	runtime·racedataend(SB), R10
147	CMP	R10, R1
148	BGT	ret
149call:
150	JMP	racecall<>(SB)
151ret:
152	RET
153
154// func runtime·racefuncenterfp(fp uintptr)
155// Called from instrumented code.
156// Like racefuncenter but doesn't passes an arg, uses the caller pc
157// from the first slot on the stack
158TEXT	runtime·racefuncenterfp(SB), NOSPLIT, $0-0
159	MOVD	0(RSP), R9
160	JMP	racefuncenter<>(SB)
161
162// func runtime·racefuncenter(pc uintptr)
163// Called from instrumented code.
164TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
165	MOVD	callpc+0(FP), R9
166	JMP	racefuncenter<>(SB)
167
168// Common code for racefuncenter/racefuncenterfp
169// R9 = caller's return address
170TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
171	load_g
172	MOVD	g_racectx(g), R0	// goroutine racectx
173	MOVD	R9, R1
174	// void __tsan_func_enter(ThreadState *thr, void *pc);
175	MOVD	$__tsan_func_enter(SB), R9
176	BL	racecall<>(SB)
177	RET
178
179// func runtime·racefuncexit()
180// Called from instrumented code.
181TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
182	load_g
183	MOVD	g_racectx(g), R0	// race context
184	// void __tsan_func_exit(ThreadState *thr);
185	MOVD	$__tsan_func_exit(SB), R9
186	JMP	racecall<>(SB)
187
188// Atomic operations for sync/atomic package.
189// R3 = addr of arguments passed to this function, it can
190// be fetched at 40(RSP) in racecallatomic after two times BL
191// R0, R1, R2 set in racecallatomic
192
193// Load
194TEXT	syncatomic·LoadInt32(SB), NOSPLIT, $0
195	GO_ARGS
196	MOVD	$__tsan_go_atomic32_load(SB), R9
197	BL	racecallatomic<>(SB)
198	RET
199
200TEXT	syncatomic·LoadInt64(SB), NOSPLIT, $0
201	GO_ARGS
202	MOVD	$__tsan_go_atomic64_load(SB), R9
203	BL	racecallatomic<>(SB)
204	RET
205
206TEXT	syncatomic·LoadUint32(SB), NOSPLIT, $0
207	GO_ARGS
208	JMP	syncatomic·LoadInt32(SB)
209
210TEXT	syncatomic·LoadUint64(SB), NOSPLIT, $0
211	GO_ARGS
212	JMP	syncatomic·LoadInt64(SB)
213
214TEXT	syncatomic·LoadUintptr(SB), NOSPLIT, $0
215	GO_ARGS
216	JMP	syncatomic·LoadInt64(SB)
217
218TEXT	syncatomic·LoadPointer(SB), NOSPLIT, $0
219	GO_ARGS
220	JMP	syncatomic·LoadInt64(SB)
221
222// Store
223TEXT	syncatomic·StoreInt32(SB), NOSPLIT, $0
224	GO_ARGS
225	MOVD	$__tsan_go_atomic32_store(SB), R9
226	BL	racecallatomic<>(SB)
227	RET
228
229TEXT	syncatomic·StoreInt64(SB), NOSPLIT, $0
230	GO_ARGS
231	MOVD	$__tsan_go_atomic64_store(SB), R9
232	BL	racecallatomic<>(SB)
233	RET
234
235TEXT	syncatomic·StoreUint32(SB), NOSPLIT, $0
236	GO_ARGS
237	JMP	syncatomic·StoreInt32(SB)
238
239TEXT	syncatomic·StoreUint64(SB), NOSPLIT, $0
240	GO_ARGS
241	JMP	syncatomic·StoreInt64(SB)
242
243TEXT	syncatomic·StoreUintptr(SB), NOSPLIT, $0
244	GO_ARGS
245	JMP	syncatomic·StoreInt64(SB)
246
247// Swap
248TEXT	syncatomic·SwapInt32(SB), NOSPLIT, $0
249	GO_ARGS
250	MOVD	$__tsan_go_atomic32_exchange(SB), R9
251	BL	racecallatomic<>(SB)
252	RET
253
254TEXT	syncatomic·SwapInt64(SB), NOSPLIT, $0
255	GO_ARGS
256	MOVD	$__tsan_go_atomic64_exchange(SB), R9
257	BL	racecallatomic<>(SB)
258	RET
259
260TEXT	syncatomic·SwapUint32(SB), NOSPLIT, $0
261	GO_ARGS
262	JMP	syncatomic·SwapInt32(SB)
263
264TEXT	syncatomic·SwapUint64(SB), NOSPLIT, $0
265	GO_ARGS
266	JMP	syncatomic·SwapInt64(SB)
267
268TEXT	syncatomic·SwapUintptr(SB), NOSPLIT, $0
269	GO_ARGS
270	JMP	syncatomic·SwapInt64(SB)
271
272// Add
273TEXT	syncatomic·AddInt32(SB), NOSPLIT, $0
274	GO_ARGS
275	MOVD	$__tsan_go_atomic32_fetch_add(SB), R9
276	BL	racecallatomic<>(SB)
277	MOVW	add+8(FP), R0	// convert fetch_add to add_fetch
278	MOVW	ret+16(FP), R1
279	ADD	R0, R1, R0
280	MOVW	R0, ret+16(FP)
281	RET
282
283TEXT	syncatomic·AddInt64(SB), NOSPLIT, $0
284	GO_ARGS
285	MOVD	$__tsan_go_atomic64_fetch_add(SB), R9
286	BL	racecallatomic<>(SB)
287	MOVD	add+8(FP), R0	// convert fetch_add to add_fetch
288	MOVD	ret+16(FP), R1
289	ADD	R0, R1, R0
290	MOVD	R0, ret+16(FP)
291	RET
292
293TEXT	syncatomic·AddUint32(SB), NOSPLIT, $0
294	GO_ARGS
295	JMP	syncatomic·AddInt32(SB)
296
297TEXT	syncatomic·AddUint64(SB), NOSPLIT, $0
298	GO_ARGS
299	JMP	syncatomic·AddInt64(SB)
300
301TEXT	syncatomic·AddUintptr(SB), NOSPLIT, $0
302	GO_ARGS
303	JMP	syncatomic·AddInt64(SB)
304
305// CompareAndSwap
306TEXT	syncatomic·CompareAndSwapInt32(SB), NOSPLIT, $0
307	GO_ARGS
308	MOVD	$__tsan_go_atomic32_compare_exchange(SB), R9
309	BL	racecallatomic<>(SB)
310	RET
311
312TEXT	syncatomic·CompareAndSwapInt64(SB), NOSPLIT, $0
313	GO_ARGS
314	MOVD	$__tsan_go_atomic64_compare_exchange(SB), R9
315	BL	racecallatomic<>(SB)
316	RET
317
318TEXT	syncatomic·CompareAndSwapUint32(SB), NOSPLIT, $0
319	GO_ARGS
320	JMP	syncatomic·CompareAndSwapInt32(SB)
321
322TEXT	syncatomic·CompareAndSwapUint64(SB), NOSPLIT, $0
323	GO_ARGS
324	JMP	syncatomic·CompareAndSwapInt64(SB)
325
326TEXT	syncatomic·CompareAndSwapUintptr(SB), NOSPLIT, $0
327	GO_ARGS
328	JMP	syncatomic·CompareAndSwapInt64(SB)
329
330// Generic atomic operation implementation.
331// R9 = addr of target function
332TEXT	racecallatomic<>(SB), NOSPLIT, $0
333	// Set up these registers
334	// R0 = *ThreadState
335	// R1 = caller pc
336	// R2 = pc
337	// R3 = addr of incoming arg list
338
339	// Trigger SIGSEGV early.
340	MOVD	40(RSP), R3	// 1st arg is addr. after two times BL, get it at 40(RSP)
341	MOVD	(R3), R13	// segv here if addr is bad
342	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
343	MOVD	runtime·racearenastart(SB), R10
344	CMP	R10, R3
345	BLT	racecallatomic_data
346	MOVD	runtime·racearenaend(SB), R10
347	CMP	R10, R3
348	BLT	racecallatomic_ok
349racecallatomic_data:
350	MOVD	runtime·racedatastart(SB), R10
351	CMP	R10, R3
352	BLT	racecallatomic_ignore
353	MOVD	runtime·racedataend(SB), R10
354	CMP	R10, R3
355	BGE	racecallatomic_ignore
356racecallatomic_ok:
357	// Addr is within the good range, call the atomic function.
358	load_g
359	MOVD	g_racectx(g), R0	// goroutine context
360	MOVD	16(RSP), R1	// caller pc
361	MOVD	R9, R2	// pc
362	ADD	$40, RSP, R3
363	JMP	racecall<>(SB)	// does not return
364racecallatomic_ignore:
365	// Addr is outside the good range.
366	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
367	// An attempt to synchronize on the address would cause crash.
368	MOVD	R9, R20	// remember the original function
369	MOVD	$__tsan_go_ignore_sync_begin(SB), R9
370	load_g
371	MOVD	g_racectx(g), R0	// goroutine context
372	BL	racecall<>(SB)
373	MOVD	R20, R9	// restore the original function
374	// Call the atomic function.
375	// racecall will call LLVM race code which might clobber R28 (g)
376	load_g
377	MOVD	g_racectx(g), R0	// goroutine context
378	MOVD	16(RSP), R1	// caller pc
379	MOVD	R9, R2	// pc
380	ADD	$40, RSP, R3	// arguments
381	BL	racecall<>(SB)
382	// Call __tsan_go_ignore_sync_end.
383	MOVD	$__tsan_go_ignore_sync_end(SB), R9
384	MOVD	g_racectx(g), R0	// goroutine context
385	BL	racecall<>(SB)
386	RET
387
388// func runtime·racecall(void(*f)(...), ...)
389// Calls C function f from race runtime and passes up to 4 arguments to it.
390// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
391TEXT	runtime·racecall(SB), NOSPLIT, $0-0
392	MOVD	fn+0(FP), R9
393	MOVD	arg0+8(FP), R0
394	MOVD	arg1+16(FP), R1
395	MOVD	arg2+24(FP), R2
396	MOVD	arg3+32(FP), R3
397	JMP	racecall<>(SB)
398
399// Switches SP to g0 stack and calls (R9). Arguments already set.
400TEXT	racecall<>(SB), NOSPLIT, $0-0
401	MOVD	g_m(g), R10
402	// Switch to g0 stack.
403	MOVD	RSP, R19	// callee-saved, preserved across the CALL
404	MOVD	m_g0(R10), R11
405	CMP	R11, g
406	BEQ	call	// already on g0
407	MOVD	(g_sched+gobuf_sp)(R11), R12
408	MOVD	R12, RSP
409call:
410	BL	R9
411	MOVD	R19, RSP
412	RET
413
414// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
415// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
416// The overall effect of Go->C->Go call chain is similar to that of mcall.
417// R0 contains command code. R1 contains command-specific context.
418// See racecallback for command codes.
419TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
420	// Handle command raceGetProcCmd (0) here.
421	// First, code below assumes that we are on curg, while raceGetProcCmd
422	// can be executed on g0. Second, it is called frequently, so will
423	// benefit from this fast path.
424	CMP	$0, R0
425	BNE	rest
426	MOVD	g, R13
427	load_g
428	MOVD	g_m(g), R0
429	MOVD	m_p(R0), R0
430	MOVD	p_raceprocctx(R0), R0
431	MOVD	R0, (R1)
432	MOVD	R13, g
433	JMP	(LR)
434rest:
435	// Save callee-saved registers (Go code won't respect that).
436	// 8(RSP) and 16(RSP) are for args passed through racecallback
437	SUB	$112, RSP
438	MOVD	LR, 0(RSP)
439	STP	(R19, R20), 24(RSP)
440	STP	(R21, R22), 40(RSP)
441	STP	(R23, R24), 56(RSP)
442	STP	(R25, R26), 72(RSP)
443	STP	(R27,   g), 88(RSP)
444	// Set g = g0.
445	// load_g will clobber R0, Save R0
446	MOVD	R0, R13
447	load_g
448	// restore R0
449	MOVD	R13, R0
450	MOVD	g_m(g), R13
451	MOVD	m_g0(R13), R14
452	CMP	R14, g
453	BEQ	noswitch	// branch if already on g0
454	MOVD	R14, g
455
456	MOVD	R0, 8(RSP)	// func arg
457	MOVD	R1, 16(RSP)	// func arg
458	BL	runtime·racecallback(SB)
459
460	// All registers are smashed after Go code, reload.
461	MOVD	g_m(g), R13
462	MOVD	m_curg(R13), g	// g = m->curg
463ret:
464	// Restore callee-saved registers.
465	MOVD	0(RSP), LR
466	LDP	24(RSP), (R19, R20)
467	LDP	40(RSP), (R21, R22)
468	LDP	56(RSP), (R23, R24)
469	LDP	72(RSP), (R25, R26)
470	LDP	88(RSP), (R27,   g)
471	ADD	$112, RSP
472	JMP	(LR)
473
474noswitch:
475	// already on g0
476	MOVD	R0, 8(RSP)	// func arg
477	MOVD	R1, 16(RSP)	// func arg
478	BL	runtime·racecallback(SB)
479	JMP	ret
480
481// tls_g, g value for each thread in TLS
482GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
483