1// Copyright 2013 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// +build race 6 7#include "go_asm.h" 8#include "go_tls.h" 9#include "funcdata.h" 10#include "textflag.h" 11 12// The following thunks allow calling the gcc-compiled race runtime directly 13// from Go code without going all the way through cgo. 14// First, it's much faster (up to 50% speedup for real Go programs). 15// Second, it eliminates race-related special cases from cgocall and scheduler. 16// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. 17 18// A brief recap of the amd64 calling convention. 19// Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack. 20// Callee-saved registers are: BX, BP, R12-R15. 21// SP must be 16-byte aligned. 22// On Windows: 23// Arguments are passed in CX, DX, R8, R9, the rest is on stack. 24// Callee-saved registers are: BX, BP, DI, SI, R12-R15. 25// SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments: 26// https://msdn.microsoft.com/en-us/library/ms235286.aspx 27// We do not do this, because it seems to be intended for vararg/unprototyped functions. 28// Gcc-compiled race runtime does not try to use that space. 29 30#ifdef GOOS_windows 31#define RARG0 CX 32#define RARG1 DX 33#define RARG2 R8 34#define RARG3 R9 35#else 36#define RARG0 DI 37#define RARG1 SI 38#define RARG2 DX 39#define RARG3 CX 40#endif 41 42// func runtime·raceread(addr uintptr) 43// Called from instrumented code. 44TEXT runtime·raceread(SB), NOSPLIT, $0-8 45 MOVQ addr+0(FP), RARG1 46 MOVQ (SP), RARG2 47 // void __tsan_read(ThreadState *thr, void *addr, void *pc); 48 MOVQ $__tsan_read(SB), AX 49 JMP racecalladdr<>(SB) 50 51// func runtime·RaceRead(addr uintptr) 52TEXT runtime·RaceRead(SB), NOSPLIT, $0-8 53 // This needs to be a tail call, because raceread reads caller pc. 54 JMP runtime·raceread(SB) 55 56// void runtime·racereadpc(void *addr, void *callpc, void *pc) 57TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 58 MOVQ addr+0(FP), RARG1 59 MOVQ callpc+8(FP), RARG2 60 MOVQ pc+16(FP), RARG3 61 ADDQ $1, RARG3 // pc is function start, tsan wants return address 62 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 63 MOVQ $__tsan_read_pc(SB), AX 64 JMP racecalladdr<>(SB) 65 66// func runtime·racewrite(addr uintptr) 67// Called from instrumented code. 68TEXT runtime·racewrite(SB), NOSPLIT, $0-8 69 MOVQ addr+0(FP), RARG1 70 MOVQ (SP), RARG2 71 // void __tsan_write(ThreadState *thr, void *addr, void *pc); 72 MOVQ $__tsan_write(SB), AX 73 JMP racecalladdr<>(SB) 74 75// func runtime·RaceWrite(addr uintptr) 76TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8 77 // This needs to be a tail call, because racewrite reads caller pc. 78 JMP runtime·racewrite(SB) 79 80// void runtime·racewritepc(void *addr, void *callpc, void *pc) 81TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 82 MOVQ addr+0(FP), RARG1 83 MOVQ callpc+8(FP), RARG2 84 MOVQ pc+16(FP), RARG3 85 ADDQ $1, RARG3 // pc is function start, tsan wants return address 86 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 87 MOVQ $__tsan_write_pc(SB), AX 88 JMP racecalladdr<>(SB) 89 90// func runtime·racereadrange(addr, size uintptr) 91// Called from instrumented code. 92TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 93 MOVQ addr+0(FP), RARG1 94 MOVQ size+8(FP), RARG2 95 MOVQ (SP), RARG3 96 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 97 MOVQ $__tsan_read_range(SB), AX 98 JMP racecalladdr<>(SB) 99 100// func runtime·RaceReadRange(addr, size uintptr) 101TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16 102 // This needs to be a tail call, because racereadrange reads caller pc. 103 JMP runtime·racereadrange(SB) 104 105// void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc) 106TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 107 MOVQ addr+0(FP), RARG1 108 MOVQ size+8(FP), RARG2 109 MOVQ pc+16(FP), RARG3 110 ADDQ $1, RARG3 // pc is function start, tsan wants return address 111 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 112 MOVQ $__tsan_read_range(SB), AX 113 JMP racecalladdr<>(SB) 114 115// func runtime·racewriterange(addr, size uintptr) 116// Called from instrumented code. 117TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 118 MOVQ addr+0(FP), RARG1 119 MOVQ size+8(FP), RARG2 120 MOVQ (SP), RARG3 121 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 122 MOVQ $__tsan_write_range(SB), AX 123 JMP racecalladdr<>(SB) 124 125// func runtime·RaceWriteRange(addr, size uintptr) 126TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16 127 // This needs to be a tail call, because racewriterange reads caller pc. 128 JMP runtime·racewriterange(SB) 129 130// void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc) 131TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24 132 MOVQ addr+0(FP), RARG1 133 MOVQ size+8(FP), RARG2 134 MOVQ pc+16(FP), RARG3 135 ADDQ $1, RARG3 // pc is function start, tsan wants return address 136 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 137 MOVQ $__tsan_write_range(SB), AX 138 JMP racecalladdr<>(SB) 139 140// If addr (RARG1) is out of range, do nothing. 141// Otherwise, setup goroutine context and invoke racecall. Other arguments already set. 142TEXT racecalladdr<>(SB), NOSPLIT, $0-0 143 get_tls(R12) 144 MOVQ g(R12), R14 145 MOVQ g_racectx(R14), RARG0 // goroutine context 146 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 147 CMPQ RARG1, runtime·racearenastart(SB) 148 JB data 149 CMPQ RARG1, runtime·racearenaend(SB) 150 JB call 151data: 152 CMPQ RARG1, runtime·racedatastart(SB) 153 JB ret 154 CMPQ RARG1, runtime·racedataend(SB) 155 JAE ret 156call: 157 MOVQ AX, AX // w/o this 6a miscompiles this function 158 JMP racecall<>(SB) 159ret: 160 RET 161 162// func runtime·racefuncenterfp(fp uintptr) 163// Called from instrumented code. 164// Like racefuncenter but passes FP, not PC 165TEXT runtime·racefuncenterfp(SB), NOSPLIT, $0-8 166 MOVQ fp+0(FP), R11 167 MOVQ -8(R11), R11 168 JMP racefuncenter<>(SB) 169 170// func runtime·racefuncenter(pc uintptr) 171// Called from instrumented code. 172TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 173 MOVQ callpc+0(FP), R11 174 JMP racefuncenter<>(SB) 175 176// Common code for racefuncenter/racefuncenterfp 177// R11 = caller's return address 178TEXT racefuncenter<>(SB), NOSPLIT, $0-0 179 MOVQ DX, R15 // save function entry context (for closures) 180 get_tls(R12) 181 MOVQ g(R12), R14 182 MOVQ g_racectx(R14), RARG0 // goroutine context 183 MOVQ R11, RARG1 184 // void __tsan_func_enter(ThreadState *thr, void *pc); 185 MOVQ $__tsan_func_enter(SB), AX 186 // racecall<> preserves R15 187 CALL racecall<>(SB) 188 MOVQ R15, DX // restore function entry context 189 RET 190 191// func runtime·racefuncexit() 192// Called from instrumented code. 193TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 194 get_tls(R12) 195 MOVQ g(R12), R14 196 MOVQ g_racectx(R14), RARG0 // goroutine context 197 // void __tsan_func_exit(ThreadState *thr); 198 MOVQ $__tsan_func_exit(SB), AX 199 JMP racecall<>(SB) 200 201// Atomic operations for sync/atomic package. 202 203// Load 204TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0 205 MOVQ $__tsan_go_atomic32_load(SB), AX 206 CALL racecallatomic<>(SB) 207 RET 208 209TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0 210 MOVQ $__tsan_go_atomic64_load(SB), AX 211 CALL racecallatomic<>(SB) 212 RET 213 214TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0 215 JMP sync∕atomic·LoadInt32(SB) 216 217TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0 218 JMP sync∕atomic·LoadInt64(SB) 219 220TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0 221 JMP sync∕atomic·LoadInt64(SB) 222 223TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0 224 JMP sync∕atomic·LoadInt64(SB) 225 226// Store 227TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0 228 MOVQ $__tsan_go_atomic32_store(SB), AX 229 CALL racecallatomic<>(SB) 230 RET 231 232TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0 233 MOVQ $__tsan_go_atomic64_store(SB), AX 234 CALL racecallatomic<>(SB) 235 RET 236 237TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0 238 JMP sync∕atomic·StoreInt32(SB) 239 240TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0 241 JMP sync∕atomic·StoreInt64(SB) 242 243TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0 244 JMP sync∕atomic·StoreInt64(SB) 245 246// Swap 247TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0 248 MOVQ $__tsan_go_atomic32_exchange(SB), AX 249 CALL racecallatomic<>(SB) 250 RET 251 252TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0 253 MOVQ $__tsan_go_atomic64_exchange(SB), AX 254 CALL racecallatomic<>(SB) 255 RET 256 257TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0 258 JMP sync∕atomic·SwapInt32(SB) 259 260TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0 261 JMP sync∕atomic·SwapInt64(SB) 262 263TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0 264 JMP sync∕atomic·SwapInt64(SB) 265 266// Add 267TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-0 268 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX 269 CALL racecallatomic<>(SB) 270 MOVL add+8(FP), AX // convert fetch_add to add_fetch 271 ADDL AX, ret+16(FP) 272 RET 273 274TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-0 275 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX 276 CALL racecallatomic<>(SB) 277 MOVQ add+8(FP), AX // convert fetch_add to add_fetch 278 ADDQ AX, ret+16(FP) 279 RET 280 281TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-0 282 JMP sync∕atomic·AddInt32(SB) 283 284TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-0 285 JMP sync∕atomic·AddInt64(SB) 286 287TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0 288 JMP sync∕atomic·AddInt64(SB) 289 290// CompareAndSwap 291TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0 292 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX 293 CALL racecallatomic<>(SB) 294 RET 295 296TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0 297 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX 298 CALL racecallatomic<>(SB) 299 RET 300 301TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0 302 JMP sync∕atomic·CompareAndSwapInt32(SB) 303 304TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0 305 JMP sync∕atomic·CompareAndSwapInt64(SB) 306 307TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0 308 JMP sync∕atomic·CompareAndSwapInt64(SB) 309 310// Generic atomic operation implementation. 311// AX already contains target function. 312TEXT racecallatomic<>(SB), NOSPLIT, $0-0 313 // Trigger SIGSEGV early. 314 MOVQ 16(SP), R12 315 MOVL (R12), R13 316 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 317 CMPQ R12, runtime·racearenastart(SB) 318 JB racecallatomic_data 319 CMPQ R12, runtime·racearenaend(SB) 320 JB racecallatomic_ok 321racecallatomic_data: 322 CMPQ R12, runtime·racedatastart(SB) 323 JB racecallatomic_ignore 324 CMPQ R12, runtime·racedataend(SB) 325 JAE racecallatomic_ignore 326racecallatomic_ok: 327 // Addr is within the good range, call the atomic function. 328 get_tls(R12) 329 MOVQ g(R12), R14 330 MOVQ g_racectx(R14), RARG0 // goroutine context 331 MOVQ 8(SP), RARG1 // caller pc 332 MOVQ (SP), RARG2 // pc 333 LEAQ 16(SP), RARG3 // arguments 334 JMP racecall<>(SB) // does not return 335racecallatomic_ignore: 336 // Addr is outside the good range. 337 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. 338 // An attempt to synchronize on the address would cause crash. 339 MOVQ AX, R15 // remember the original function 340 MOVQ $__tsan_go_ignore_sync_begin(SB), AX 341 get_tls(R12) 342 MOVQ g(R12), R14 343 MOVQ g_racectx(R14), RARG0 // goroutine context 344 CALL racecall<>(SB) 345 MOVQ R15, AX // restore the original function 346 // Call the atomic function. 347 MOVQ g_racectx(R14), RARG0 // goroutine context 348 MOVQ 8(SP), RARG1 // caller pc 349 MOVQ (SP), RARG2 // pc 350 LEAQ 16(SP), RARG3 // arguments 351 CALL racecall<>(SB) 352 // Call __tsan_go_ignore_sync_end. 353 MOVQ $__tsan_go_ignore_sync_end(SB), AX 354 MOVQ g_racectx(R14), RARG0 // goroutine context 355 JMP racecall<>(SB) 356 357// void runtime·racecall(void(*f)(...), ...) 358// Calls C function f from race runtime and passes up to 4 arguments to it. 359// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments. 360TEXT runtime·racecall(SB), NOSPLIT, $0-0 361 MOVQ fn+0(FP), AX 362 MOVQ arg0+8(FP), RARG0 363 MOVQ arg1+16(FP), RARG1 364 MOVQ arg2+24(FP), RARG2 365 MOVQ arg3+32(FP), RARG3 366 JMP racecall<>(SB) 367 368// Switches SP to g0 stack and calls (AX). Arguments already set. 369TEXT racecall<>(SB), NOSPLIT, $0-0 370 get_tls(R12) 371 MOVQ g(R12), R14 372 MOVQ g_m(R14), R13 373 // Switch to g0 stack. 374 MOVQ SP, R12 // callee-saved, preserved across the CALL 375 MOVQ m_g0(R13), R10 376 CMPQ R10, R14 377 JE call // already on g0 378 MOVQ (g_sched+gobuf_sp)(R10), SP 379call: 380 ANDQ $~15, SP // alignment for gcc ABI 381 CALL AX 382 MOVQ R12, SP 383 RET 384 385// C->Go callback thunk that allows to call runtime·racesymbolize from C code. 386// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. 387// The overall effect of Go->C->Go call chain is similar to that of mcall. 388// RARG0 contains command code. RARG1 contains command-specific context. 389// See racecallback for command codes. 390TEXT runtime·racecallbackthunk(SB), NOSPLIT, $56-8 391 // Handle command raceGetProcCmd (0) here. 392 // First, code below assumes that we are on curg, while raceGetProcCmd 393 // can be executed on g0. Second, it is called frequently, so will 394 // benefit from this fast path. 395 CMPQ RARG0, $0 396 JNE rest 397 get_tls(RARG0) 398 MOVQ g(RARG0), RARG0 399 MOVQ g_m(RARG0), RARG0 400 MOVQ m_p(RARG0), RARG0 401 MOVQ p_raceprocctx(RARG0), RARG0 402 MOVQ RARG0, (RARG1) 403 RET 404 405rest: 406 // Save callee-saved registers (Go code won't respect that). 407 // This is superset of darwin/linux/windows registers. 408 PUSHQ BX 409 PUSHQ BP 410 PUSHQ DI 411 PUSHQ SI 412 PUSHQ R12 413 PUSHQ R13 414 PUSHQ R14 415 PUSHQ R15 416 // Set g = g0. 417 get_tls(R12) 418 MOVQ g(R12), R13 419 MOVQ g_m(R13), R14 420 MOVQ m_g0(R14), R15 421 CMPQ R13, R15 422 JEQ noswitch // branch if already on g0 423 MOVQ R15, g(R12) // g = m->g0 424 PUSHQ RARG1 // func arg 425 PUSHQ RARG0 // func arg 426 CALL runtime·racecallback(SB) 427 POPQ R12 428 POPQ R12 429 // All registers are smashed after Go code, reload. 430 get_tls(R12) 431 MOVQ g(R12), R13 432 MOVQ g_m(R13), R13 433 MOVQ m_curg(R13), R14 434 MOVQ R14, g(R12) // g = m->curg 435ret: 436 // Restore callee-saved registers. 437 POPQ R15 438 POPQ R14 439 POPQ R13 440 POPQ R12 441 POPQ SI 442 POPQ DI 443 POPQ BP 444 POPQ BX 445 RET 446 447noswitch: 448 // already on g0 449 PUSHQ RARG1 // func arg 450 PUSHQ RARG0 // func arg 451 CALL runtime·racecallback(SB) 452 POPQ R12 453 POPQ R12 454 JMP ret 455