1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build race
6
7#include "go_asm.h"
8#include "go_tls.h"
9#include "funcdata.h"
10#include "textflag.h"
11
12// The following thunks allow calling the gcc-compiled race runtime directly
13// from Go code without going all the way through cgo.
14// First, it's much faster (up to 50% speedup for real Go programs).
15// Second, it eliminates race-related special cases from cgocall and scheduler.
16// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
17
18// A brief recap of the amd64 calling convention.
19// Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
20// Callee-saved registers are: BX, BP, R12-R15.
21// SP must be 16-byte aligned.
22// On Windows:
23// Arguments are passed in CX, DX, R8, R9, the rest is on stack.
24// Callee-saved registers are: BX, BP, DI, SI, R12-R15.
25// SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
26// https://msdn.microsoft.com/en-us/library/ms235286.aspx
27// We do not do this, because it seems to be intended for vararg/unprototyped functions.
28// Gcc-compiled race runtime does not try to use that space.
29
30#ifdef GOOS_windows
31#define RARG0 CX
32#define RARG1 DX
33#define RARG2 R8
34#define RARG3 R9
35#else
36#define RARG0 DI
37#define RARG1 SI
38#define RARG2 DX
39#define RARG3 CX
40#endif
41
42// func runtime·raceread(addr uintptr)
43// Called from instrumented code.
44TEXT	runtime·raceread(SB), NOSPLIT, $0-8
45	MOVQ	addr+0(FP), RARG1
46	MOVQ	(SP), RARG2
47	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
48	MOVQ	$__tsan_read(SB), AX
49	JMP	racecalladdr<>(SB)
50
51// func runtime·RaceRead(addr uintptr)
52TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
53	// This needs to be a tail call, because raceread reads caller pc.
54	JMP	runtime·raceread(SB)
55
56// void runtime·racereadpc(void *addr, void *callpc, void *pc)
57TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
58	MOVQ	addr+0(FP), RARG1
59	MOVQ	callpc+8(FP), RARG2
60	MOVQ	pc+16(FP), RARG3
61	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
62	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
63	MOVQ	$__tsan_read_pc(SB), AX
64	JMP	racecalladdr<>(SB)
65
66// func runtime·racewrite(addr uintptr)
67// Called from instrumented code.
68TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
69	MOVQ	addr+0(FP), RARG1
70	MOVQ	(SP), RARG2
71	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
72	MOVQ	$__tsan_write(SB), AX
73	JMP	racecalladdr<>(SB)
74
75// func runtime·RaceWrite(addr uintptr)
76TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
77	// This needs to be a tail call, because racewrite reads caller pc.
78	JMP	runtime·racewrite(SB)
79
80// void runtime·racewritepc(void *addr, void *callpc, void *pc)
81TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
82	MOVQ	addr+0(FP), RARG1
83	MOVQ	callpc+8(FP), RARG2
84	MOVQ	pc+16(FP), RARG3
85	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
86	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
87	MOVQ	$__tsan_write_pc(SB), AX
88	JMP	racecalladdr<>(SB)
89
90// func runtime·racereadrange(addr, size uintptr)
91// Called from instrumented code.
92TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
93	MOVQ	addr+0(FP), RARG1
94	MOVQ	size+8(FP), RARG2
95	MOVQ	(SP), RARG3
96	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
97	MOVQ	$__tsan_read_range(SB), AX
98	JMP	racecalladdr<>(SB)
99
100// func runtime·RaceReadRange(addr, size uintptr)
101TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
102	// This needs to be a tail call, because racereadrange reads caller pc.
103	JMP	runtime·racereadrange(SB)
104
105// void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
106TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
107	MOVQ	addr+0(FP), RARG1
108	MOVQ	size+8(FP), RARG2
109	MOVQ	pc+16(FP), RARG3
110	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
111	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
112	MOVQ	$__tsan_read_range(SB), AX
113	JMP	racecalladdr<>(SB)
114
115// func runtime·racewriterange(addr, size uintptr)
116// Called from instrumented code.
117TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
118	MOVQ	addr+0(FP), RARG1
119	MOVQ	size+8(FP), RARG2
120	MOVQ	(SP), RARG3
121	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
122	MOVQ	$__tsan_write_range(SB), AX
123	JMP	racecalladdr<>(SB)
124
125// func runtime·RaceWriteRange(addr, size uintptr)
126TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
127	// This needs to be a tail call, because racewriterange reads caller pc.
128	JMP	runtime·racewriterange(SB)
129
130// void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
131TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
132	MOVQ	addr+0(FP), RARG1
133	MOVQ	size+8(FP), RARG2
134	MOVQ	pc+16(FP), RARG3
135	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
136	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
137	MOVQ	$__tsan_write_range(SB), AX
138	JMP	racecalladdr<>(SB)
139
140// If addr (RARG1) is out of range, do nothing.
141// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
142TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
143	get_tls(R12)
144	MOVQ	g(R12), R14
145	MOVQ	g_racectx(R14), RARG0	// goroutine context
146	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
147	CMPQ	RARG1, runtime·racearenastart(SB)
148	JB	data
149	CMPQ	RARG1, runtime·racearenaend(SB)
150	JB	call
151data:
152	CMPQ	RARG1, runtime·racedatastart(SB)
153	JB	ret
154	CMPQ	RARG1, runtime·racedataend(SB)
155	JAE	ret
156call:
157	MOVQ	AX, AX		// w/o this 6a miscompiles this function
158	JMP	racecall<>(SB)
159ret:
160	RET
161
162// func runtime·racefuncenterfp(fp uintptr)
163// Called from instrumented code.
164// Like racefuncenter but passes FP, not PC
165TEXT	runtime·racefuncenterfp(SB), NOSPLIT, $0-8
166	MOVQ	fp+0(FP), R11
167	MOVQ	-8(R11), R11
168	JMP	racefuncenter<>(SB)
169
170// func runtime·racefuncenter(pc uintptr)
171// Called from instrumented code.
172TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
173	MOVQ	callpc+0(FP), R11
174	JMP	racefuncenter<>(SB)
175
176// Common code for racefuncenter/racefuncenterfp
177// R11 = caller's return address
178TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
179	MOVQ	DX, R15		// save function entry context (for closures)
180	get_tls(R12)
181	MOVQ	g(R12), R14
182	MOVQ	g_racectx(R14), RARG0	// goroutine context
183	MOVQ	R11, RARG1
184	// void __tsan_func_enter(ThreadState *thr, void *pc);
185	MOVQ	$__tsan_func_enter(SB), AX
186	// racecall<> preserves R15
187	CALL	racecall<>(SB)
188	MOVQ	R15, DX	// restore function entry context
189	RET
190
191// func runtime·racefuncexit()
192// Called from instrumented code.
193TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
194	get_tls(R12)
195	MOVQ	g(R12), R14
196	MOVQ	g_racectx(R14), RARG0	// goroutine context
197	// void __tsan_func_exit(ThreadState *thr);
198	MOVQ	$__tsan_func_exit(SB), AX
199	JMP	racecall<>(SB)
200
201// Atomic operations for sync/atomic package.
202
203// Load
204TEXT	syncatomic·LoadInt32(SB), NOSPLIT, $0-0
205	MOVQ	$__tsan_go_atomic32_load(SB), AX
206	CALL	racecallatomic<>(SB)
207	RET
208
209TEXT	syncatomic·LoadInt64(SB), NOSPLIT, $0-0
210	MOVQ	$__tsan_go_atomic64_load(SB), AX
211	CALL	racecallatomic<>(SB)
212	RET
213
214TEXT	syncatomic·LoadUint32(SB), NOSPLIT, $0-0
215	JMP	syncatomic·LoadInt32(SB)
216
217TEXT	syncatomic·LoadUint64(SB), NOSPLIT, $0-0
218	JMP	syncatomic·LoadInt64(SB)
219
220TEXT	syncatomic·LoadUintptr(SB), NOSPLIT, $0-0
221	JMP	syncatomic·LoadInt64(SB)
222
223TEXT	syncatomic·LoadPointer(SB), NOSPLIT, $0-0
224	JMP	syncatomic·LoadInt64(SB)
225
226// Store
227TEXT	syncatomic·StoreInt32(SB), NOSPLIT, $0-0
228	MOVQ	$__tsan_go_atomic32_store(SB), AX
229	CALL	racecallatomic<>(SB)
230	RET
231
232TEXT	syncatomic·StoreInt64(SB), NOSPLIT, $0-0
233	MOVQ	$__tsan_go_atomic64_store(SB), AX
234	CALL	racecallatomic<>(SB)
235	RET
236
237TEXT	syncatomic·StoreUint32(SB), NOSPLIT, $0-0
238	JMP	syncatomic·StoreInt32(SB)
239
240TEXT	syncatomic·StoreUint64(SB), NOSPLIT, $0-0
241	JMP	syncatomic·StoreInt64(SB)
242
243TEXT	syncatomic·StoreUintptr(SB), NOSPLIT, $0-0
244	JMP	syncatomic·StoreInt64(SB)
245
246// Swap
247TEXT	syncatomic·SwapInt32(SB), NOSPLIT, $0-0
248	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
249	CALL	racecallatomic<>(SB)
250	RET
251
252TEXT	syncatomic·SwapInt64(SB), NOSPLIT, $0-0
253	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
254	CALL	racecallatomic<>(SB)
255	RET
256
257TEXT	syncatomic·SwapUint32(SB), NOSPLIT, $0-0
258	JMP	syncatomic·SwapInt32(SB)
259
260TEXT	syncatomic·SwapUint64(SB), NOSPLIT, $0-0
261	JMP	syncatomic·SwapInt64(SB)
262
263TEXT	syncatomic·SwapUintptr(SB), NOSPLIT, $0-0
264	JMP	syncatomic·SwapInt64(SB)
265
266// Add
267TEXT	syncatomic·AddInt32(SB), NOSPLIT, $0-0
268	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
269	CALL	racecallatomic<>(SB)
270	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
271	ADDL	AX, ret+16(FP)
272	RET
273
274TEXT	syncatomic·AddInt64(SB), NOSPLIT, $0-0
275	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
276	CALL	racecallatomic<>(SB)
277	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
278	ADDQ	AX, ret+16(FP)
279	RET
280
281TEXT	syncatomic·AddUint32(SB), NOSPLIT, $0-0
282	JMP	syncatomic·AddInt32(SB)
283
284TEXT	syncatomic·AddUint64(SB), NOSPLIT, $0-0
285	JMP	syncatomic·AddInt64(SB)
286
287TEXT	syncatomic·AddUintptr(SB), NOSPLIT, $0-0
288	JMP	syncatomic·AddInt64(SB)
289
290// CompareAndSwap
291TEXT	syncatomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
292	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
293	CALL	racecallatomic<>(SB)
294	RET
295
296TEXT	syncatomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
297	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
298	CALL	racecallatomic<>(SB)
299	RET
300
301TEXT	syncatomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
302	JMP	syncatomic·CompareAndSwapInt32(SB)
303
304TEXT	syncatomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
305	JMP	syncatomic·CompareAndSwapInt64(SB)
306
307TEXT	syncatomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
308	JMP	syncatomic·CompareAndSwapInt64(SB)
309
310// Generic atomic operation implementation.
311// AX already contains target function.
312TEXT	racecallatomic<>(SB), NOSPLIT, $0-0
313	// Trigger SIGSEGV early.
314	MOVQ	16(SP), R12
315	MOVL	(R12), R13
316	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
317	CMPQ	R12, runtime·racearenastart(SB)
318	JB	racecallatomic_data
319	CMPQ	R12, runtime·racearenaend(SB)
320	JB	racecallatomic_ok
321racecallatomic_data:
322	CMPQ	R12, runtime·racedatastart(SB)
323	JB	racecallatomic_ignore
324	CMPQ	R12, runtime·racedataend(SB)
325	JAE	racecallatomic_ignore
326racecallatomic_ok:
327	// Addr is within the good range, call the atomic function.
328	get_tls(R12)
329	MOVQ	g(R12), R14
330	MOVQ	g_racectx(R14), RARG0	// goroutine context
331	MOVQ	8(SP), RARG1	// caller pc
332	MOVQ	(SP), RARG2	// pc
333	LEAQ	16(SP), RARG3	// arguments
334	JMP	racecall<>(SB)	// does not return
335racecallatomic_ignore:
336	// Addr is outside the good range.
337	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
338	// An attempt to synchronize on the address would cause crash.
339	MOVQ	AX, R15	// remember the original function
340	MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
341	get_tls(R12)
342	MOVQ	g(R12), R14
343	MOVQ	g_racectx(R14), RARG0	// goroutine context
344	CALL	racecall<>(SB)
345	MOVQ	R15, AX	// restore the original function
346	// Call the atomic function.
347	MOVQ	g_racectx(R14), RARG0	// goroutine context
348	MOVQ	8(SP), RARG1	// caller pc
349	MOVQ	(SP), RARG2	// pc
350	LEAQ	16(SP), RARG3	// arguments
351	CALL	racecall<>(SB)
352	// Call __tsan_go_ignore_sync_end.
353	MOVQ	$__tsan_go_ignore_sync_end(SB), AX
354	MOVQ	g_racectx(R14), RARG0	// goroutine context
355	JMP	racecall<>(SB)
356
357// void runtime·racecall(void(*f)(...), ...)
358// Calls C function f from race runtime and passes up to 4 arguments to it.
359// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
360TEXT	runtime·racecall(SB), NOSPLIT, $0-0
361	MOVQ	fn+0(FP), AX
362	MOVQ	arg0+8(FP), RARG0
363	MOVQ	arg1+16(FP), RARG1
364	MOVQ	arg2+24(FP), RARG2
365	MOVQ	arg3+32(FP), RARG3
366	JMP	racecall<>(SB)
367
368// Switches SP to g0 stack and calls (AX). Arguments already set.
369TEXT	racecall<>(SB), NOSPLIT, $0-0
370	get_tls(R12)
371	MOVQ	g(R12), R14
372	MOVQ	g_m(R14), R13
373	// Switch to g0 stack.
374	MOVQ	SP, R12		// callee-saved, preserved across the CALL
375	MOVQ	m_g0(R13), R10
376	CMPQ	R10, R14
377	JE	call	// already on g0
378	MOVQ	(g_sched+gobuf_sp)(R10), SP
379call:
380	ANDQ	$~15, SP	// alignment for gcc ABI
381	CALL	AX
382	MOVQ	R12, SP
383	RET
384
385// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
386// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
387// The overall effect of Go->C->Go call chain is similar to that of mcall.
388// RARG0 contains command code. RARG1 contains command-specific context.
389// See racecallback for command codes.
390TEXT	runtime·racecallbackthunk(SB), NOSPLIT, $56-8
391	// Handle command raceGetProcCmd (0) here.
392	// First, code below assumes that we are on curg, while raceGetProcCmd
393	// can be executed on g0. Second, it is called frequently, so will
394	// benefit from this fast path.
395	CMPQ	RARG0, $0
396	JNE	rest
397	get_tls(RARG0)
398	MOVQ	g(RARG0), RARG0
399	MOVQ	g_m(RARG0), RARG0
400	MOVQ	m_p(RARG0), RARG0
401	MOVQ	p_raceprocctx(RARG0), RARG0
402	MOVQ	RARG0, (RARG1)
403	RET
404
405rest:
406	// Save callee-saved registers (Go code won't respect that).
407	// This is superset of darwin/linux/windows registers.
408	PUSHQ	BX
409	PUSHQ	BP
410	PUSHQ	DI
411	PUSHQ	SI
412	PUSHQ	R12
413	PUSHQ	R13
414	PUSHQ	R14
415	PUSHQ	R15
416	// Set g = g0.
417	get_tls(R12)
418	MOVQ	g(R12), R13
419	MOVQ	g_m(R13), R14
420	MOVQ	m_g0(R14), R15
421	CMPQ	R13, R15
422	JEQ	noswitch	// branch if already on g0
423	MOVQ	R15, g(R12)	// g = m->g0
424	PUSHQ	RARG1	// func arg
425	PUSHQ	RARG0	// func arg
426	CALL	runtime·racecallback(SB)
427	POPQ	R12
428	POPQ	R12
429	// All registers are smashed after Go code, reload.
430	get_tls(R12)
431	MOVQ	g(R12), R13
432	MOVQ	g_m(R13), R13
433	MOVQ	m_curg(R13), R14
434	MOVQ	R14, g(R12)	// g = m->curg
435ret:
436	// Restore callee-saved registers.
437	POPQ	R15
438	POPQ	R14
439	POPQ	R13
440	POPQ	R12
441	POPQ	SI
442	POPQ	DI
443	POPQ	BP
444	POPQ	BX
445	RET
446
447noswitch:
448	// already on g0
449	PUSHQ	RARG1	// func arg
450	PUSHQ	RARG0	// func arg
451	CALL	runtime·racecallback(SB)
452	POPQ	R12
453	POPQ	R12
454	JMP	ret
455