1// Copyright 2016 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package ssa 6 7import ( 8 "cmd/compile/internal/reflectdata" 9 "cmd/compile/internal/types" 10 "cmd/internal/obj" 11 "cmd/internal/objabi" 12 "cmd/internal/src" 13 "fmt" 14) 15 16// A ZeroRegion records parts of an object which are known to be zero. 17// A ZeroRegion only applies to a single memory state. 18// Each bit in mask is set if the corresponding pointer-sized word of 19// the base object is known to be zero. 20// In other words, if mask & (1<<i) != 0, then [base+i*ptrSize, base+(i+1)*ptrSize) 21// is known to be zero. 22type ZeroRegion struct { 23 base *Value 24 mask uint64 25} 26 27// needwb reports whether we need write barrier for store op v. 28// v must be Store/Move/Zero. 29// zeroes provides known zero information (keyed by ID of memory-type values). 30func needwb(v *Value, zeroes map[ID]ZeroRegion) bool { 31 t, ok := v.Aux.(*types.Type) 32 if !ok { 33 v.Fatalf("store aux is not a type: %s", v.LongString()) 34 } 35 if !t.HasPointers() { 36 return false 37 } 38 if IsStackAddr(v.Args[0]) { 39 return false // write on stack doesn't need write barrier 40 } 41 if v.Op == OpMove && IsReadOnlyGlobalAddr(v.Args[1]) { 42 if mem, ok := IsNewObject(v.Args[0]); ok && mem == v.MemoryArg() { 43 // Copying data from readonly memory into a fresh object doesn't need a write barrier. 44 return false 45 } 46 } 47 if v.Op == OpStore && IsGlobalAddr(v.Args[1]) { 48 // Storing pointers to non-heap locations into zeroed memory doesn't need a write barrier. 49 ptr := v.Args[0] 50 var off int64 51 size := v.Aux.(*types.Type).Size() 52 for ptr.Op == OpOffPtr { 53 off += ptr.AuxInt 54 ptr = ptr.Args[0] 55 } 56 ptrSize := v.Block.Func.Config.PtrSize 57 if off%ptrSize != 0 || size%ptrSize != 0 { 58 v.Fatalf("unaligned pointer write") 59 } 60 if off < 0 || off+size > 64*ptrSize { 61 // write goes off end of tracked offsets 62 return true 63 } 64 z := zeroes[v.MemoryArg().ID] 65 if ptr != z.base { 66 return true 67 } 68 for i := off; i < off+size; i += ptrSize { 69 if z.mask>>uint(i/ptrSize)&1 == 0 { 70 return true // not known to be zero 71 } 72 } 73 // All written locations are known to be zero - write barrier not needed. 74 return false 75 } 76 return true 77} 78 79// writebarrier pass inserts write barriers for store ops (Store, Move, Zero) 80// when necessary (the condition above). It rewrites store ops to branches 81// and runtime calls, like 82// 83// if writeBarrier.enabled { 84// gcWriteBarrier(ptr, val) // Not a regular Go call 85// } else { 86// *ptr = val 87// } 88// 89// A sequence of WB stores for many pointer fields of a single type will 90// be emitted together, with a single branch. 91func writebarrier(f *Func) { 92 if !f.fe.UseWriteBarrier() { 93 return 94 } 95 96 var sb, sp, wbaddr, const0 *Value 97 var typedmemmove, typedmemclr, gcWriteBarrier *obj.LSym 98 var stores, after []*Value 99 var sset *sparseSet 100 var storeNumber []int32 101 102 zeroes := f.computeZeroMap() 103 for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand 104 // first, identify all the stores that need to insert a write barrier. 105 // mark them with WB ops temporarily. record presence of WB ops. 106 nWBops := 0 // count of temporarily created WB ops remaining to be rewritten in the current block 107 for _, v := range b.Values { 108 switch v.Op { 109 case OpStore, OpMove, OpZero: 110 if needwb(v, zeroes) { 111 switch v.Op { 112 case OpStore: 113 v.Op = OpStoreWB 114 case OpMove: 115 v.Op = OpMoveWB 116 case OpZero: 117 v.Op = OpZeroWB 118 } 119 nWBops++ 120 } 121 } 122 } 123 if nWBops == 0 { 124 continue 125 } 126 127 if wbaddr == nil { 128 // lazily initialize global values for write barrier test and calls 129 // find SB and SP values in entry block 130 initpos := f.Entry.Pos 131 sp, sb = f.spSb() 132 wbsym := f.fe.Syslook("writeBarrier") 133 wbaddr = f.Entry.NewValue1A(initpos, OpAddr, f.Config.Types.UInt32Ptr, wbsym, sb) 134 gcWriteBarrier = f.fe.Syslook("gcWriteBarrier") 135 typedmemmove = f.fe.Syslook("typedmemmove") 136 typedmemclr = f.fe.Syslook("typedmemclr") 137 const0 = f.ConstInt32(f.Config.Types.UInt32, 0) 138 139 // allocate auxiliary data structures for computing store order 140 sset = f.newSparseSet(f.NumValues()) 141 defer f.retSparseSet(sset) 142 storeNumber = make([]int32, f.NumValues()) 143 } 144 145 // order values in store order 146 b.Values = storeOrder(b.Values, sset, storeNumber) 147 148 firstSplit := true 149 again: 150 // find the start and end of the last contiguous WB store sequence. 151 // a branch will be inserted there. values after it will be moved 152 // to a new block. 153 var last *Value 154 var start, end int 155 values := b.Values 156 FindSeq: 157 for i := len(values) - 1; i >= 0; i-- { 158 w := values[i] 159 switch w.Op { 160 case OpStoreWB, OpMoveWB, OpZeroWB: 161 start = i 162 if last == nil { 163 last = w 164 end = i + 1 165 } 166 case OpVarDef, OpVarLive, OpVarKill: 167 continue 168 default: 169 if last == nil { 170 continue 171 } 172 break FindSeq 173 } 174 } 175 stores = append(stores[:0], b.Values[start:end]...) // copy to avoid aliasing 176 after = append(after[:0], b.Values[end:]...) 177 b.Values = b.Values[:start] 178 179 // find the memory before the WB stores 180 mem := stores[0].MemoryArg() 181 pos := stores[0].Pos 182 bThen := f.NewBlock(BlockPlain) 183 bElse := f.NewBlock(BlockPlain) 184 bEnd := f.NewBlock(b.Kind) 185 bThen.Pos = pos 186 bElse.Pos = pos 187 bEnd.Pos = b.Pos 188 b.Pos = pos 189 190 // set up control flow for end block 191 bEnd.CopyControls(b) 192 bEnd.Likely = b.Likely 193 for _, e := range b.Succs { 194 bEnd.Succs = append(bEnd.Succs, e) 195 e.b.Preds[e.i].b = bEnd 196 } 197 198 // set up control flow for write barrier test 199 // load word, test word, avoiding partial register write from load byte. 200 cfgtypes := &f.Config.Types 201 flag := b.NewValue2(pos, OpLoad, cfgtypes.UInt32, wbaddr, mem) 202 flag = b.NewValue2(pos, OpNeq32, cfgtypes.Bool, flag, const0) 203 b.Kind = BlockIf 204 b.SetControl(flag) 205 b.Likely = BranchUnlikely 206 b.Succs = b.Succs[:0] 207 b.AddEdgeTo(bThen) 208 b.AddEdgeTo(bElse) 209 // TODO: For OpStoreWB and the buffered write barrier, 210 // we could move the write out of the write barrier, 211 // which would lead to fewer branches. We could do 212 // something similar to OpZeroWB, since the runtime 213 // could provide just the barrier half and then we 214 // could unconditionally do an OpZero (which could 215 // also generate better zeroing code). OpMoveWB is 216 // trickier and would require changing how 217 // cgoCheckMemmove works. 218 bThen.AddEdgeTo(bEnd) 219 bElse.AddEdgeTo(bEnd) 220 221 // for each write barrier store, append write barrier version to bThen 222 // and simple store version to bElse 223 memThen := mem 224 memElse := mem 225 226 // If the source of a MoveWB is volatile (will be clobbered by a 227 // function call), we need to copy it to a temporary location, as 228 // marshaling the args of typedmemmove might clobber the value we're 229 // trying to move. 230 // Look for volatile source, copy it to temporary before we emit any 231 // call. 232 // It is unlikely to have more than one of them. Just do a linear 233 // search instead of using a map. 234 type volatileCopy struct { 235 src *Value // address of original volatile value 236 tmp *Value // address of temporary we've copied the volatile value into 237 } 238 var volatiles []volatileCopy 239 copyLoop: 240 for _, w := range stores { 241 if w.Op == OpMoveWB { 242 val := w.Args[1] 243 if isVolatile(val) { 244 for _, c := range volatiles { 245 if val == c.src { 246 continue copyLoop // already copied 247 } 248 } 249 250 t := val.Type.Elem() 251 tmp := f.fe.Auto(w.Pos, t) 252 memThen = bThen.NewValue1A(w.Pos, OpVarDef, types.TypeMem, tmp, memThen) 253 tmpaddr := bThen.NewValue2A(w.Pos, OpLocalAddr, t.PtrTo(), tmp, sp, memThen) 254 siz := t.Size() 255 memThen = bThen.NewValue3I(w.Pos, OpMove, types.TypeMem, siz, tmpaddr, val, memThen) 256 memThen.Aux = t 257 volatiles = append(volatiles, volatileCopy{val, tmpaddr}) 258 } 259 } 260 } 261 262 for _, w := range stores { 263 ptr := w.Args[0] 264 pos := w.Pos 265 266 var fn *obj.LSym 267 var typ *obj.LSym 268 var val *Value 269 switch w.Op { 270 case OpStoreWB: 271 val = w.Args[1] 272 nWBops-- 273 case OpMoveWB: 274 fn = typedmemmove 275 val = w.Args[1] 276 typ = reflectdata.TypeLinksym(w.Aux.(*types.Type)) 277 nWBops-- 278 case OpZeroWB: 279 fn = typedmemclr 280 typ = reflectdata.TypeLinksym(w.Aux.(*types.Type)) 281 nWBops-- 282 case OpVarDef, OpVarLive, OpVarKill: 283 } 284 285 // then block: emit write barrier call 286 switch w.Op { 287 case OpStoreWB, OpMoveWB, OpZeroWB: 288 if w.Op == OpStoreWB { 289 memThen = bThen.NewValue3A(pos, OpWB, types.TypeMem, gcWriteBarrier, ptr, val, memThen) 290 } else { 291 srcval := val 292 if w.Op == OpMoveWB && isVolatile(srcval) { 293 for _, c := range volatiles { 294 if srcval == c.src { 295 srcval = c.tmp 296 break 297 } 298 } 299 } 300 memThen = wbcall(pos, bThen, fn, typ, ptr, srcval, memThen, sp, sb) 301 } 302 // Note that we set up a writebarrier function call. 303 f.fe.SetWBPos(pos) 304 case OpVarDef, OpVarLive, OpVarKill: 305 memThen = bThen.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memThen) 306 } 307 308 // else block: normal store 309 switch w.Op { 310 case OpStoreWB: 311 memElse = bElse.NewValue3A(pos, OpStore, types.TypeMem, w.Aux, ptr, val, memElse) 312 case OpMoveWB: 313 memElse = bElse.NewValue3I(pos, OpMove, types.TypeMem, w.AuxInt, ptr, val, memElse) 314 memElse.Aux = w.Aux 315 case OpZeroWB: 316 memElse = bElse.NewValue2I(pos, OpZero, types.TypeMem, w.AuxInt, ptr, memElse) 317 memElse.Aux = w.Aux 318 case OpVarDef, OpVarLive, OpVarKill: 319 memElse = bElse.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memElse) 320 } 321 } 322 323 // mark volatile temps dead 324 for _, c := range volatiles { 325 tmpNode := c.tmp.Aux 326 memThen = bThen.NewValue1A(memThen.Pos, OpVarKill, types.TypeMem, tmpNode, memThen) 327 } 328 329 // merge memory 330 // Splice memory Phi into the last memory of the original sequence, 331 // which may be used in subsequent blocks. Other memories in the 332 // sequence must be dead after this block since there can be only 333 // one memory live. 334 bEnd.Values = append(bEnd.Values, last) 335 last.Block = bEnd 336 last.reset(OpPhi) 337 last.Pos = last.Pos.WithNotStmt() 338 last.Type = types.TypeMem 339 last.AddArg(memThen) 340 last.AddArg(memElse) 341 for _, w := range stores { 342 if w != last { 343 w.resetArgs() 344 } 345 } 346 for _, w := range stores { 347 if w != last { 348 f.freeValue(w) 349 } 350 } 351 352 // put values after the store sequence into the end block 353 bEnd.Values = append(bEnd.Values, after...) 354 for _, w := range after { 355 w.Block = bEnd 356 } 357 358 // Preemption is unsafe between loading the write 359 // barrier-enabled flag and performing the write 360 // because that would allow a GC phase transition, 361 // which would invalidate the flag. Remember the 362 // conditional block so liveness analysis can disable 363 // safe-points. This is somewhat subtle because we're 364 // splitting b bottom-up. 365 if firstSplit { 366 // Add b itself. 367 b.Func.WBLoads = append(b.Func.WBLoads, b) 368 firstSplit = false 369 } else { 370 // We've already split b, so we just pushed a 371 // write barrier test into bEnd. 372 b.Func.WBLoads = append(b.Func.WBLoads, bEnd) 373 } 374 375 // if we have more stores in this block, do this block again 376 if nWBops > 0 { 377 goto again 378 } 379 } 380} 381 382// computeZeroMap returns a map from an ID of a memory value to 383// a set of locations that are known to be zeroed at that memory value. 384func (f *Func) computeZeroMap() map[ID]ZeroRegion { 385 ptrSize := f.Config.PtrSize 386 // Keep track of which parts of memory are known to be zero. 387 // This helps with removing write barriers for various initialization patterns. 388 // This analysis is conservative. We only keep track, for each memory state, of 389 // which of the first 64 words of a single object are known to be zero. 390 zeroes := map[ID]ZeroRegion{} 391 // Find new objects. 392 for _, b := range f.Blocks { 393 for _, v := range b.Values { 394 if mem, ok := IsNewObject(v); ok { 395 nptr := v.Type.Elem().Size() / ptrSize 396 if nptr > 64 { 397 nptr = 64 398 } 399 zeroes[mem.ID] = ZeroRegion{base: v, mask: 1<<uint(nptr) - 1} 400 } 401 } 402 } 403 // Find stores to those new objects. 404 for { 405 changed := false 406 for _, b := range f.Blocks { 407 // Note: iterating forwards helps convergence, as values are 408 // typically (but not always!) in store order. 409 for _, v := range b.Values { 410 if v.Op != OpStore { 411 continue 412 } 413 z, ok := zeroes[v.MemoryArg().ID] 414 if !ok { 415 continue 416 } 417 ptr := v.Args[0] 418 var off int64 419 size := v.Aux.(*types.Type).Size() 420 for ptr.Op == OpOffPtr { 421 off += ptr.AuxInt 422 ptr = ptr.Args[0] 423 } 424 if ptr != z.base { 425 // Different base object - we don't know anything. 426 // We could even be writing to the base object we know 427 // about, but through an aliased but offset pointer. 428 // So we have to throw all the zero information we have away. 429 continue 430 } 431 // Round to cover any partially written pointer slots. 432 // Pointer writes should never be unaligned like this, but non-pointer 433 // writes to pointer-containing types will do this. 434 if d := off % ptrSize; d != 0 { 435 off -= d 436 size += d 437 } 438 if d := size % ptrSize; d != 0 { 439 size += ptrSize - d 440 } 441 // Clip to the 64 words that we track. 442 min := off 443 max := off + size 444 if min < 0 { 445 min = 0 446 } 447 if max > 64*ptrSize { 448 max = 64 * ptrSize 449 } 450 // Clear bits for parts that we are writing (and hence 451 // will no longer necessarily be zero). 452 for i := min; i < max; i += ptrSize { 453 bit := i / ptrSize 454 z.mask &^= 1 << uint(bit) 455 } 456 if z.mask == 0 { 457 // No more known zeros - don't bother keeping. 458 continue 459 } 460 // Save updated known zero contents for new store. 461 if zeroes[v.ID] != z { 462 zeroes[v.ID] = z 463 changed = true 464 } 465 } 466 } 467 if !changed { 468 break 469 } 470 } 471 if f.pass.debug > 0 { 472 fmt.Printf("func %s\n", f.Name) 473 for mem, z := range zeroes { 474 fmt.Printf(" memory=v%d ptr=%v zeromask=%b\n", mem, z.base, z.mask) 475 } 476 } 477 return zeroes 478} 479 480// wbcall emits write barrier runtime call in b, returns memory. 481func wbcall(pos src.XPos, b *Block, fn, typ *obj.LSym, ptr, val, mem, sp, sb *Value) *Value { 482 config := b.Func.Config 483 484 var wbargs []*Value 485 // TODO (register args) this is a bit of a hack. 486 inRegs := b.Func.ABIDefault == b.Func.ABI1 && len(config.intParamRegs) >= 3 487 488 // put arguments on stack 489 off := config.ctxt.FixedFrameSize() 490 491 var argTypes []*types.Type 492 if typ != nil { // for typedmemmove 493 taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb) 494 argTypes = append(argTypes, b.Func.Config.Types.Uintptr) 495 off = round(off, taddr.Type.Alignment()) 496 if inRegs { 497 wbargs = append(wbargs, taddr) 498 } else { 499 arg := b.NewValue1I(pos, OpOffPtr, taddr.Type.PtrTo(), off, sp) 500 mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, taddr, mem) 501 } 502 off += taddr.Type.Size() 503 } 504 505 argTypes = append(argTypes, ptr.Type) 506 off = round(off, ptr.Type.Alignment()) 507 if inRegs { 508 wbargs = append(wbargs, ptr) 509 } else { 510 arg := b.NewValue1I(pos, OpOffPtr, ptr.Type.PtrTo(), off, sp) 511 mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, ptr, mem) 512 } 513 off += ptr.Type.Size() 514 515 if val != nil { 516 argTypes = append(argTypes, val.Type) 517 off = round(off, val.Type.Alignment()) 518 if inRegs { 519 wbargs = append(wbargs, val) 520 } else { 521 arg := b.NewValue1I(pos, OpOffPtr, val.Type.PtrTo(), off, sp) 522 mem = b.NewValue3A(pos, OpStore, types.TypeMem, val.Type, arg, val, mem) 523 } 524 off += val.Type.Size() 525 } 526 off = round(off, config.PtrSize) 527 wbargs = append(wbargs, mem) 528 529 // issue call 530 call := b.NewValue0A(pos, OpStaticCall, types.TypeResultMem, StaticAuxCall(fn, b.Func.ABIDefault.ABIAnalyzeTypes(nil, argTypes, nil))) 531 call.AddArgs(wbargs...) 532 call.AuxInt = off - config.ctxt.FixedFrameSize() 533 return b.NewValue1I(pos, OpSelectN, types.TypeMem, 0, call) 534} 535 536// round to a multiple of r, r is a power of 2 537func round(o int64, r int64) int64 { 538 return (o + r - 1) &^ (r - 1) 539} 540 541// IsStackAddr reports whether v is known to be an address of a stack slot. 542func IsStackAddr(v *Value) bool { 543 for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy { 544 v = v.Args[0] 545 } 546 switch v.Op { 547 case OpSP, OpLocalAddr, OpSelectNAddr, OpGetCallerSP: 548 return true 549 } 550 return false 551} 552 553// IsGlobalAddr reports whether v is known to be an address of a global (or nil). 554func IsGlobalAddr(v *Value) bool { 555 for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy { 556 v = v.Args[0] 557 } 558 if v.Op == OpAddr && v.Args[0].Op == OpSB { 559 return true // address of a global 560 } 561 if v.Op == OpConstNil { 562 return true 563 } 564 if v.Op == OpLoad && IsReadOnlyGlobalAddr(v.Args[0]) { 565 return true // loading from a read-only global - the resulting address can't be a heap address. 566 } 567 return false 568} 569 570// IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global. 571func IsReadOnlyGlobalAddr(v *Value) bool { 572 if v.Op == OpConstNil { 573 // Nil pointers are read only. See issue 33438. 574 return true 575 } 576 if v.Op == OpAddr && v.Aux.(*obj.LSym).Type == objabi.SRODATA { 577 return true 578 } 579 return false 580} 581 582// IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object, 583// if so, also returns the memory state mem at which v is zero. 584func IsNewObject(v *Value) (mem *Value, ok bool) { 585 f := v.Block.Func 586 c := f.Config 587 if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 { 588 if v.Op != OpSelectN || v.AuxInt != 0 { 589 return nil, false 590 } 591 // Find the memory 592 for _, w := range v.Block.Values { 593 if w.Op == OpSelectN && w.AuxInt == 1 && w.Args[0] == v.Args[0] { 594 mem = w 595 break 596 } 597 } 598 if mem == nil { 599 return nil, false 600 } 601 } else { 602 if v.Op != OpLoad { 603 return nil, false 604 } 605 mem = v.MemoryArg() 606 if mem.Op != OpSelectN { 607 return nil, false 608 } 609 if mem.Type != types.TypeMem { 610 return nil, false 611 } // assume it is the right selection if true 612 } 613 call := mem.Args[0] 614 if call.Op != OpStaticCall { 615 return nil, false 616 } 617 if !isSameCall(call.Aux, "runtime.newobject") { 618 return nil, false 619 } 620 if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 { 621 if v.Args[0] == call { 622 return mem, true 623 } 624 return nil, false 625 } 626 if v.Args[0].Op != OpOffPtr { 627 return nil, false 628 } 629 if v.Args[0].Args[0].Op != OpSP { 630 return nil, false 631 } 632 if v.Args[0].AuxInt != c.ctxt.FixedFrameSize()+c.RegSize { // offset of return value 633 return nil, false 634 } 635 return mem, true 636} 637 638// IsSanitizerSafeAddr reports whether v is known to be an address 639// that doesn't need instrumentation. 640func IsSanitizerSafeAddr(v *Value) bool { 641 for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy { 642 v = v.Args[0] 643 } 644 switch v.Op { 645 case OpSP, OpLocalAddr, OpSelectNAddr: 646 // Stack addresses are always safe. 647 return true 648 case OpITab, OpStringPtr, OpGetClosurePtr: 649 // Itabs, string data, and closure fields are 650 // read-only once initialized. 651 return true 652 case OpAddr: 653 return v.Aux.(*obj.LSym).Type == objabi.SRODATA || v.Aux.(*obj.LSym).Type == objabi.SLIBFUZZER_EXTRA_COUNTER 654 } 655 return false 656} 657 658// isVolatile reports whether v is a pointer to argument region on stack which 659// will be clobbered by a function call. 660func isVolatile(v *Value) bool { 661 for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy || v.Op == OpSelectNAddr { 662 v = v.Args[0] 663 } 664 return v.Op == OpSP 665} 666