1// Copyright 2018 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// +build race 6 7#include "go_asm.h" 8#include "funcdata.h" 9#include "textflag.h" 10#include "tls_arm64.h" 11 12// The following thunks allow calling the gcc-compiled race runtime directly 13// from Go code without going all the way through cgo. 14// First, it's much faster (up to 50% speedup for real Go programs). 15// Second, it eliminates race-related special cases from cgocall and scheduler. 16// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. 17 18// A brief recap of the arm64 calling convention. 19// Arguments are passed in R0...R7, the rest is on stack. 20// Callee-saved registers are: R19...R28. 21// Temporary registers are: R9...R15 22// SP must be 16-byte aligned. 23 24// When calling racecalladdr, R9 is the call target address. 25 26// The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr. 27 28#define load_g \ 29 MRS_TPIDR_R0 \ 30 MOVD runtime·tls_g(SB), R11 \ 31 ADD R11, R0 \ 32 MOVD 0(R0), g 33 34// func runtime·raceread(addr uintptr) 35// Called from instrumented code. 36TEXT runtime·raceread(SB), NOSPLIT, $0-8 37 MOVD addr+0(FP), R1 38 MOVD LR, R2 39 // void __tsan_read(ThreadState *thr, void *addr, void *pc); 40 MOVD $__tsan_read(SB), R9 41 JMP racecalladdr<>(SB) 42 43// func runtime·RaceRead(addr uintptr) 44TEXT runtime·RaceRead(SB), NOSPLIT, $0-8 45 // This needs to be a tail call, because raceread reads caller pc. 46 JMP runtime·raceread(SB) 47 48// func runtime·racereadpc(void *addr, void *callpc, void *pc) 49TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 50 MOVD addr+0(FP), R1 51 MOVD callpc+8(FP), R2 52 MOVD pc+16(FP), R3 53 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 54 MOVD $__tsan_read_pc(SB), R9 55 JMP racecalladdr<>(SB) 56 57// func runtime·racewrite(addr uintptr) 58// Called from instrumented code. 59TEXT runtime·racewrite(SB), NOSPLIT, $0-8 60 MOVD addr+0(FP), R1 61 MOVD LR, R2 62 // void __tsan_write(ThreadState *thr, void *addr, void *pc); 63 MOVD $__tsan_write(SB), R9 64 JMP racecalladdr<>(SB) 65 66// func runtime·RaceWrite(addr uintptr) 67TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8 68 // This needs to be a tail call, because racewrite reads caller pc. 69 JMP runtime·racewrite(SB) 70 71// func runtime·racewritepc(void *addr, void *callpc, void *pc) 72TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 73 MOVD addr+0(FP), R1 74 MOVD callpc+8(FP), R2 75 MOVD pc+16(FP), R3 76 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 77 MOVD $__tsan_write_pc(SB), R9 78 JMP racecalladdr<>(SB) 79 80// func runtime·racereadrange(addr, size uintptr) 81// Called from instrumented code. 82TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 83 MOVD addr+0(FP), R1 84 MOVD size+8(FP), R2 85 MOVD LR, R3 86 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 87 MOVD $__tsan_read_range(SB), R9 88 JMP racecalladdr<>(SB) 89 90// func runtime·RaceReadRange(addr, size uintptr) 91TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16 92 // This needs to be a tail call, because racereadrange reads caller pc. 93 JMP runtime·racereadrange(SB) 94 95// func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc) 96TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 97 MOVD addr+0(FP), R1 98 MOVD size+8(FP), R2 99 MOVD pc+16(FP), R3 100 ADD $4, R3 // pc is function start, tsan wants return address. 101 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 102 MOVD $__tsan_read_range(SB), R9 103 JMP racecalladdr<>(SB) 104 105// func runtime·racewriterange(addr, size uintptr) 106// Called from instrumented code. 107TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 108 MOVD addr+0(FP), R1 109 MOVD size+8(FP), R2 110 MOVD LR, R3 111 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 112 MOVD $__tsan_write_range(SB), R9 113 JMP racecalladdr<>(SB) 114 115// func runtime·RaceWriteRange(addr, size uintptr) 116TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16 117 // This needs to be a tail call, because racewriterange reads caller pc. 118 JMP runtime·racewriterange(SB) 119 120// func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc) 121TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24 122 MOVD addr+0(FP), R1 123 MOVD size+8(FP), R2 124 MOVD pc+16(FP), R3 125 ADD $4, R3 // pc is function start, tsan wants return address. 126 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 127 MOVD $__tsan_write_range(SB), R9 128 JMP racecalladdr<>(SB) 129 130// If addr (R1) is out of range, do nothing. 131// Otherwise, setup goroutine context and invoke racecall. Other arguments already set. 132TEXT racecalladdr<>(SB), NOSPLIT, $0-0 133 load_g 134 MOVD g_racectx(g), R0 135 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 136 MOVD runtime·racearenastart(SB), R10 137 CMP R10, R1 138 BLT data 139 MOVD runtime·racearenaend(SB), R10 140 CMP R10, R1 141 BLT call 142data: 143 MOVD runtime·racedatastart(SB), R10 144 CMP R10, R1 145 BLT ret 146 MOVD runtime·racedataend(SB), R10 147 CMP R10, R1 148 BGT ret 149call: 150 JMP racecall<>(SB) 151ret: 152 RET 153 154// func runtime·racefuncenterfp(fp uintptr) 155// Called from instrumented code. 156// Like racefuncenter but doesn't passes an arg, uses the caller pc 157// from the first slot on the stack 158TEXT runtime·racefuncenterfp(SB), NOSPLIT, $0-0 159 MOVD 0(RSP), R9 160 JMP racefuncenter<>(SB) 161 162// func runtime·racefuncenter(pc uintptr) 163// Called from instrumented code. 164TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 165 MOVD callpc+0(FP), R9 166 JMP racefuncenter<>(SB) 167 168// Common code for racefuncenter/racefuncenterfp 169// R9 = caller's return address 170TEXT racefuncenter<>(SB), NOSPLIT, $0-0 171 load_g 172 MOVD g_racectx(g), R0 // goroutine racectx 173 MOVD R9, R1 174 // void __tsan_func_enter(ThreadState *thr, void *pc); 175 MOVD $__tsan_func_enter(SB), R9 176 BL racecall<>(SB) 177 RET 178 179// func runtime·racefuncexit() 180// Called from instrumented code. 181TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 182 load_g 183 MOVD g_racectx(g), R0 // race context 184 // void __tsan_func_exit(ThreadState *thr); 185 MOVD $__tsan_func_exit(SB), R9 186 JMP racecall<>(SB) 187 188// Atomic operations for sync/atomic package. 189// R3 = addr of arguments passed to this function, it can 190// be fetched at 40(RSP) in racecallatomic after two times BL 191// R0, R1, R2 set in racecallatomic 192 193// Load 194TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0 195 GO_ARGS 196 MOVD $__tsan_go_atomic32_load(SB), R9 197 BL racecallatomic<>(SB) 198 RET 199 200TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0 201 GO_ARGS 202 MOVD $__tsan_go_atomic64_load(SB), R9 203 BL racecallatomic<>(SB) 204 RET 205 206TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0 207 GO_ARGS 208 JMP sync∕atomic·LoadInt32(SB) 209 210TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0 211 GO_ARGS 212 JMP sync∕atomic·LoadInt64(SB) 213 214TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0 215 GO_ARGS 216 JMP sync∕atomic·LoadInt64(SB) 217 218TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0 219 GO_ARGS 220 JMP sync∕atomic·LoadInt64(SB) 221 222// Store 223TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0 224 GO_ARGS 225 MOVD $__tsan_go_atomic32_store(SB), R9 226 BL racecallatomic<>(SB) 227 RET 228 229TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0 230 GO_ARGS 231 MOVD $__tsan_go_atomic64_store(SB), R9 232 BL racecallatomic<>(SB) 233 RET 234 235TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0 236 GO_ARGS 237 JMP sync∕atomic·StoreInt32(SB) 238 239TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0 240 GO_ARGS 241 JMP sync∕atomic·StoreInt64(SB) 242 243TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0 244 GO_ARGS 245 JMP sync∕atomic·StoreInt64(SB) 246 247// Swap 248TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0 249 GO_ARGS 250 MOVD $__tsan_go_atomic32_exchange(SB), R9 251 BL racecallatomic<>(SB) 252 RET 253 254TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0 255 GO_ARGS 256 MOVD $__tsan_go_atomic64_exchange(SB), R9 257 BL racecallatomic<>(SB) 258 RET 259 260TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0 261 GO_ARGS 262 JMP sync∕atomic·SwapInt32(SB) 263 264TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0 265 GO_ARGS 266 JMP sync∕atomic·SwapInt64(SB) 267 268TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0 269 GO_ARGS 270 JMP sync∕atomic·SwapInt64(SB) 271 272// Add 273TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0 274 GO_ARGS 275 MOVD $__tsan_go_atomic32_fetch_add(SB), R9 276 BL racecallatomic<>(SB) 277 MOVW add+8(FP), R0 // convert fetch_add to add_fetch 278 MOVW ret+16(FP), R1 279 ADD R0, R1, R0 280 MOVW R0, ret+16(FP) 281 RET 282 283TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0 284 GO_ARGS 285 MOVD $__tsan_go_atomic64_fetch_add(SB), R9 286 BL racecallatomic<>(SB) 287 MOVD add+8(FP), R0 // convert fetch_add to add_fetch 288 MOVD ret+16(FP), R1 289 ADD R0, R1, R0 290 MOVD R0, ret+16(FP) 291 RET 292 293TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0 294 GO_ARGS 295 JMP sync∕atomic·AddInt32(SB) 296 297TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0 298 GO_ARGS 299 JMP sync∕atomic·AddInt64(SB) 300 301TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0 302 GO_ARGS 303 JMP sync∕atomic·AddInt64(SB) 304 305// CompareAndSwap 306TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0 307 GO_ARGS 308 MOVD $__tsan_go_atomic32_compare_exchange(SB), R9 309 BL racecallatomic<>(SB) 310 RET 311 312TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0 313 GO_ARGS 314 MOVD $__tsan_go_atomic64_compare_exchange(SB), R9 315 BL racecallatomic<>(SB) 316 RET 317 318TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0 319 GO_ARGS 320 JMP sync∕atomic·CompareAndSwapInt32(SB) 321 322TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0 323 GO_ARGS 324 JMP sync∕atomic·CompareAndSwapInt64(SB) 325 326TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0 327 GO_ARGS 328 JMP sync∕atomic·CompareAndSwapInt64(SB) 329 330// Generic atomic operation implementation. 331// R9 = addr of target function 332TEXT racecallatomic<>(SB), NOSPLIT, $0 333 // Set up these registers 334 // R0 = *ThreadState 335 // R1 = caller pc 336 // R2 = pc 337 // R3 = addr of incoming arg list 338 339 // Trigger SIGSEGV early. 340 MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP) 341 MOVD (R3), R13 // segv here if addr is bad 342 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 343 MOVD runtime·racearenastart(SB), R10 344 CMP R10, R3 345 BLT racecallatomic_data 346 MOVD runtime·racearenaend(SB), R10 347 CMP R10, R3 348 BLT racecallatomic_ok 349racecallatomic_data: 350 MOVD runtime·racedatastart(SB), R10 351 CMP R10, R3 352 BLT racecallatomic_ignore 353 MOVD runtime·racedataend(SB), R10 354 CMP R10, R3 355 BGE racecallatomic_ignore 356racecallatomic_ok: 357 // Addr is within the good range, call the atomic function. 358 load_g 359 MOVD g_racectx(g), R0 // goroutine context 360 MOVD 16(RSP), R1 // caller pc 361 MOVD R9, R2 // pc 362 ADD $40, RSP, R3 363 JMP racecall<>(SB) // does not return 364racecallatomic_ignore: 365 // Addr is outside the good range. 366 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. 367 // An attempt to synchronize on the address would cause crash. 368 MOVD R9, R20 // remember the original function 369 MOVD $__tsan_go_ignore_sync_begin(SB), R9 370 load_g 371 MOVD g_racectx(g), R0 // goroutine context 372 BL racecall<>(SB) 373 MOVD R20, R9 // restore the original function 374 // Call the atomic function. 375 // racecall will call LLVM race code which might clobber R28 (g) 376 load_g 377 MOVD g_racectx(g), R0 // goroutine context 378 MOVD 16(RSP), R1 // caller pc 379 MOVD R9, R2 // pc 380 ADD $40, RSP, R3 // arguments 381 BL racecall<>(SB) 382 // Call __tsan_go_ignore_sync_end. 383 MOVD $__tsan_go_ignore_sync_end(SB), R9 384 MOVD g_racectx(g), R0 // goroutine context 385 BL racecall<>(SB) 386 RET 387 388// func runtime·racecall(void(*f)(...), ...) 389// Calls C function f from race runtime and passes up to 4 arguments to it. 390// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments. 391TEXT runtime·racecall(SB), NOSPLIT, $0-0 392 MOVD fn+0(FP), R9 393 MOVD arg0+8(FP), R0 394 MOVD arg1+16(FP), R1 395 MOVD arg2+24(FP), R2 396 MOVD arg3+32(FP), R3 397 JMP racecall<>(SB) 398 399// Switches SP to g0 stack and calls (R9). Arguments already set. 400TEXT racecall<>(SB), NOSPLIT, $0-0 401 MOVD g_m(g), R10 402 // Switch to g0 stack. 403 MOVD RSP, R19 // callee-saved, preserved across the CALL 404 MOVD m_g0(R10), R11 405 CMP R11, g 406 BEQ call // already on g0 407 MOVD (g_sched+gobuf_sp)(R11), R12 408 MOVD R12, RSP 409call: 410 BL R9 411 MOVD R19, RSP 412 RET 413 414// C->Go callback thunk that allows to call runtime·racesymbolize from C code. 415// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. 416// The overall effect of Go->C->Go call chain is similar to that of mcall. 417// R0 contains command code. R1 contains command-specific context. 418// See racecallback for command codes. 419TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0 420 // Handle command raceGetProcCmd (0) here. 421 // First, code below assumes that we are on curg, while raceGetProcCmd 422 // can be executed on g0. Second, it is called frequently, so will 423 // benefit from this fast path. 424 CMP $0, R0 425 BNE rest 426 MOVD g, R13 427 load_g 428 MOVD g_m(g), R0 429 MOVD m_p(R0), R0 430 MOVD p_raceprocctx(R0), R0 431 MOVD R0, (R1) 432 MOVD R13, g 433 JMP (LR) 434rest: 435 // Save callee-saved registers (Go code won't respect that). 436 // 8(RSP) and 16(RSP) are for args passed through racecallback 437 SUB $112, RSP 438 MOVD LR, 0(RSP) 439 STP (R19, R20), 24(RSP) 440 STP (R21, R22), 40(RSP) 441 STP (R23, R24), 56(RSP) 442 STP (R25, R26), 72(RSP) 443 STP (R27, g), 88(RSP) 444 // Set g = g0. 445 // load_g will clobber R0, Save R0 446 MOVD R0, R13 447 load_g 448 // restore R0 449 MOVD R13, R0 450 MOVD g_m(g), R13 451 MOVD m_g0(R13), R14 452 CMP R14, g 453 BEQ noswitch // branch if already on g0 454 MOVD R14, g 455 456 MOVD R0, 8(RSP) // func arg 457 MOVD R1, 16(RSP) // func arg 458 BL runtime·racecallback(SB) 459 460 // All registers are smashed after Go code, reload. 461 MOVD g_m(g), R13 462 MOVD m_curg(R13), g // g = m->curg 463ret: 464 // Restore callee-saved registers. 465 MOVD 0(RSP), LR 466 LDP 24(RSP), (R19, R20) 467 LDP 40(RSP), (R21, R22) 468 LDP 56(RSP), (R23, R24) 469 LDP 72(RSP), (R25, R26) 470 LDP 88(RSP), (R27, g) 471 ADD $112, RSP 472 JMP (LR) 473 474noswitch: 475 // already on g0 476 MOVD R0, 8(RSP) // func arg 477 MOVD R1, 16(RSP) // func arg 478 BL runtime·racecallback(SB) 479 JMP ret 480 481// tls_g, g value for each thread in TLS 482GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8 483