1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Garbage collector: marking and scanning 6 7package runtime 8 9import ( 10 "runtime/internal/atomic" 11 "runtime/internal/sys" 12 "unsafe" 13) 14 15const ( 16 fixedRootFinalizers = iota 17 fixedRootFreeGStacks 18 fixedRootCount 19 20 // rootBlockBytes is the number of bytes to scan per data or 21 // BSS root. 22 rootBlockBytes = 256 << 10 23 24 // rootBlockSpans is the number of spans to scan per span 25 // root. 26 rootBlockSpans = 8 * 1024 // 64MB worth of spans 27 28 // maxObletBytes is the maximum bytes of an object to scan at 29 // once. Larger objects will be split up into "oblets" of at 30 // most this size. Since we can scan 1–2 MB/ms, 128 KB bounds 31 // scan preemption at ~100 µs. 32 // 33 // This must be > _MaxSmallSize so that the object base is the 34 // span base. 35 maxObletBytes = 128 << 10 36 37 // drainCheckThreshold specifies how many units of work to do 38 // between self-preemption checks in gcDrain. Assuming a scan 39 // rate of 1 MB/ms, this is ~100 µs. Lower values have higher 40 // overhead in the scan loop (the scheduler check may perform 41 // a syscall, so its overhead is nontrivial). Higher values 42 // make the system less responsive to incoming work. 43 drainCheckThreshold = 100000 44) 45 46// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and 47// some miscellany) and initializes scanning-related state. 48// 49// The world must be stopped. 50// 51//go:nowritebarrier 52func gcMarkRootPrepare() { 53 work.nFlushCacheRoots = 0 54 55 // Compute how many data and BSS root blocks there are. 56 nBlocks := func(bytes uintptr) int { 57 return int((bytes + rootBlockBytes - 1) / rootBlockBytes) 58 } 59 60 work.nDataRoots = 0 61 work.nBSSRoots = 0 62 63 // Scan globals. 64 for _, datap := range activeModules() { 65 nDataRoots := nBlocks(datap.edata - datap.data) 66 if nDataRoots > work.nDataRoots { 67 work.nDataRoots = nDataRoots 68 } 69 } 70 71 for _, datap := range activeModules() { 72 nBSSRoots := nBlocks(datap.ebss - datap.bss) 73 if nBSSRoots > work.nBSSRoots { 74 work.nBSSRoots = nBSSRoots 75 } 76 } 77 78 // Scan span roots for finalizer specials. 79 // 80 // We depend on addfinalizer to mark objects that get 81 // finalizers after root marking. 82 // 83 // We're only interested in scanning the in-use spans, 84 // which will all be swept at this point. More spans 85 // may be added to this list during concurrent GC, but 86 // we only care about spans that were allocated before 87 // this mark phase. 88 work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks() 89 90 // Scan stacks. 91 // 92 // Gs may be created after this point, but it's okay that we 93 // ignore them because they begin life without any roots, so 94 // there's nothing to scan, and any roots they create during 95 // the concurrent phase will be scanned during mark 96 // termination. 97 work.nStackRoots = int(atomic.Loaduintptr(&allglen)) 98 99 work.markrootNext = 0 100 work.markrootJobs = uint32(fixedRootCount + work.nFlushCacheRoots + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots) 101} 102 103// gcMarkRootCheck checks that all roots have been scanned. It is 104// purely for debugging. 105func gcMarkRootCheck() { 106 if work.markrootNext < work.markrootJobs { 107 print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n") 108 throw("left over markroot jobs") 109 } 110 111 lock(&allglock) 112 // Check that stacks have been scanned. 113 var gp *g 114 for i := 0; i < work.nStackRoots; i++ { 115 gp = allgs[i] 116 if !gp.gcscandone { 117 goto fail 118 } 119 } 120 unlock(&allglock) 121 return 122 123fail: 124 println("gp", gp, "goid", gp.goid, 125 "status", readgstatus(gp), 126 "gcscandone", gp.gcscandone) 127 unlock(&allglock) // Avoid self-deadlock with traceback. 128 throw("scan missed a g") 129} 130 131// ptrmask for an allocation containing a single pointer. 132var oneptrmask = [...]uint8{1} 133 134// markroot scans the i'th root. 135// 136// Preemption must be disabled (because this uses a gcWork). 137// 138// nowritebarrier is only advisory here. 139// 140//go:nowritebarrier 141func markroot(gcw *gcWork, i uint32) { 142 // TODO(austin): This is a bit ridiculous. Compute and store 143 // the bases in gcMarkRootPrepare instead of the counts. 144 baseFlushCache := uint32(fixedRootCount) 145 baseData := baseFlushCache + uint32(work.nFlushCacheRoots) 146 baseBSS := baseData + uint32(work.nDataRoots) 147 baseSpans := baseBSS + uint32(work.nBSSRoots) 148 baseStacks := baseSpans + uint32(work.nSpanRoots) 149 end := baseStacks + uint32(work.nStackRoots) 150 151 // Note: if you add a case here, please also update heapdump.go:dumproots. 152 switch { 153 case baseFlushCache <= i && i < baseData: 154 flushmcache(int(i - baseFlushCache)) 155 156 case baseData <= i && i < baseBSS: 157 for _, datap := range activeModules() { 158 markrootBlock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, gcw, int(i-baseData)) 159 } 160 161 case baseBSS <= i && i < baseSpans: 162 for _, datap := range activeModules() { 163 markrootBlock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, gcw, int(i-baseBSS)) 164 } 165 166 case i == fixedRootFinalizers: 167 for fb := allfin; fb != nil; fb = fb.alllink { 168 cnt := uintptr(atomic.Load(&fb.cnt)) 169 scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw, nil) 170 } 171 172 case i == fixedRootFreeGStacks: 173 // Switch to the system stack so we can call 174 // stackfree. 175 systemstack(markrootFreeGStacks) 176 177 case baseSpans <= i && i < baseStacks: 178 // mark mspan.specials 179 markrootSpans(gcw, int(i-baseSpans)) 180 181 default: 182 // the rest is scanning goroutine stacks 183 var gp *g 184 if baseStacks <= i && i < end { 185 gp = allgs[i-baseStacks] 186 } else { 187 throw("markroot: bad index") 188 } 189 190 // remember when we've first observed the G blocked 191 // needed only to output in traceback 192 status := readgstatus(gp) // We are not in a scan state 193 if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 { 194 gp.waitsince = work.tstart 195 } 196 197 // scanstack must be done on the system stack in case 198 // we're trying to scan our own stack. 199 systemstack(func() { 200 // If this is a self-scan, put the user G in 201 // _Gwaiting to prevent self-deadlock. It may 202 // already be in _Gwaiting if this is a mark 203 // worker or we're in mark termination. 204 userG := getg().m.curg 205 selfScan := gp == userG && readgstatus(userG) == _Grunning 206 if selfScan { 207 casgstatus(userG, _Grunning, _Gwaiting) 208 userG.waitreason = waitReasonGarbageCollectionScan 209 } 210 211 // TODO: suspendG blocks (and spins) until gp 212 // stops, which may take a while for 213 // running goroutines. Consider doing this in 214 // two phases where the first is non-blocking: 215 // we scan the stacks we can and ask running 216 // goroutines to scan themselves; and the 217 // second blocks. 218 stopped := suspendG(gp) 219 if stopped.dead { 220 gp.gcscandone = true 221 return 222 } 223 if gp.gcscandone { 224 throw("g already scanned") 225 } 226 scanstack(gp, gcw) 227 gp.gcscandone = true 228 resumeG(stopped) 229 230 if selfScan { 231 casgstatus(userG, _Gwaiting, _Grunning) 232 } 233 }) 234 } 235} 236 237// markrootBlock scans the shard'th shard of the block of memory [b0, 238// b0+n0), with the given pointer mask. 239// 240//go:nowritebarrier 241func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) { 242 if rootBlockBytes%(8*sys.PtrSize) != 0 { 243 // This is necessary to pick byte offsets in ptrmask0. 244 throw("rootBlockBytes must be a multiple of 8*ptrSize") 245 } 246 247 // Note that if b0 is toward the end of the address space, 248 // then b0 + rootBlockBytes might wrap around. 249 // These tests are written to avoid any possible overflow. 250 off := uintptr(shard) * rootBlockBytes 251 if off >= n0 { 252 return 253 } 254 b := b0 + off 255 ptrmask := (*uint8)(add(unsafe.Pointer(ptrmask0), uintptr(shard)*(rootBlockBytes/(8*sys.PtrSize)))) 256 n := uintptr(rootBlockBytes) 257 if off+n > n0 { 258 n = n0 - off 259 } 260 261 // Scan this shard. 262 scanblock(b, n, ptrmask, gcw, nil) 263} 264 265// markrootFreeGStacks frees stacks of dead Gs. 266// 267// This does not free stacks of dead Gs cached on Ps, but having a few 268// cached stacks around isn't a problem. 269func markrootFreeGStacks() { 270 // Take list of dead Gs with stacks. 271 lock(&sched.gFree.lock) 272 list := sched.gFree.stack 273 sched.gFree.stack = gList{} 274 unlock(&sched.gFree.lock) 275 if list.empty() { 276 return 277 } 278 279 // Free stacks. 280 q := gQueue{list.head, list.head} 281 for gp := list.head.ptr(); gp != nil; gp = gp.schedlink.ptr() { 282 stackfree(gp.stack) 283 gp.stack.lo = 0 284 gp.stack.hi = 0 285 // Manipulate the queue directly since the Gs are 286 // already all linked the right way. 287 q.tail.set(gp) 288 } 289 290 // Put Gs back on the free list. 291 lock(&sched.gFree.lock) 292 sched.gFree.noStack.pushAll(q) 293 unlock(&sched.gFree.lock) 294} 295 296// markrootSpans marks roots for one shard of work.spans. 297// 298//go:nowritebarrier 299func markrootSpans(gcw *gcWork, shard int) { 300 // Objects with finalizers have two GC-related invariants: 301 // 302 // 1) Everything reachable from the object must be marked. 303 // This ensures that when we pass the object to its finalizer, 304 // everything the finalizer can reach will be retained. 305 // 306 // 2) Finalizer specials (which are not in the garbage 307 // collected heap) are roots. In practice, this means the fn 308 // field must be scanned. 309 // 310 // TODO(austin): There are several ideas for making this more 311 // efficient in issue #11485. 312 313 sg := mheap_.sweepgen 314 spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard) 315 // Note that work.spans may not include spans that were 316 // allocated between entering the scan phase and now. We may 317 // also race with spans being added into sweepSpans when they're 318 // just created, and as a result we may see nil pointers in the 319 // spans slice. This is okay because any objects with finalizers 320 // in those spans must have been allocated and given finalizers 321 // after we entered the scan phase, so addfinalizer will have 322 // ensured the above invariants for them. 323 for i := 0; i < len(spans); i++ { 324 // sweepBuf.block requires that we read pointers from the block atomically. 325 // It also requires that we ignore nil pointers. 326 s := (*mspan)(atomic.Loadp(unsafe.Pointer(&spans[i]))) 327 328 // This is racing with spans being initialized, so 329 // check the state carefully. 330 if s == nil || s.state.get() != mSpanInUse { 331 continue 332 } 333 // Check that this span was swept (it may be cached or uncached). 334 if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) { 335 // sweepgen was updated (+2) during non-checkmark GC pass 336 print("sweep ", s.sweepgen, " ", sg, "\n") 337 throw("gc: unswept span") 338 } 339 340 // Speculatively check if there are any specials 341 // without acquiring the span lock. This may race with 342 // adding the first special to a span, but in that 343 // case addfinalizer will observe that the GC is 344 // active (which is globally synchronized) and ensure 345 // the above invariants. We may also ensure the 346 // invariants, but it's okay to scan an object twice. 347 if s.specials == nil { 348 continue 349 } 350 351 // Lock the specials to prevent a special from being 352 // removed from the list while we're traversing it. 353 lock(&s.speciallock) 354 355 for sp := s.specials; sp != nil; sp = sp.next { 356 if sp.kind != _KindSpecialFinalizer { 357 continue 358 } 359 // don't mark finalized object, but scan it so we 360 // retain everything it points to. 361 spf := (*specialfinalizer)(unsafe.Pointer(sp)) 362 // A finalizer can be set for an inner byte of an object, find object beginning. 363 p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize 364 365 // Mark everything that can be reached from 366 // the object (but *not* the object itself or 367 // we'll never collect it). 368 scanobject(p, gcw) 369 370 // The special itself is a root. 371 scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil) 372 } 373 374 unlock(&s.speciallock) 375 } 376} 377 378// gcAssistAlloc performs GC work to make gp's assist debt positive. 379// gp must be the calling user gorountine. 380// 381// This must be called with preemption enabled. 382func gcAssistAlloc(gp *g) { 383 // Don't assist in non-preemptible contexts. These are 384 // generally fragile and won't allow the assist to block. 385 if getg() == gp.m.g0 { 386 return 387 } 388 if mp := getg().m; mp.locks > 0 || mp.preemptoff != "" { 389 return 390 } 391 392 traced := false 393retry: 394 // Compute the amount of scan work we need to do to make the 395 // balance positive. When the required amount of work is low, 396 // we over-assist to build up credit for future allocations 397 // and amortize the cost of assisting. 398 debtBytes := -gp.gcAssistBytes 399 scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes)) 400 if scanWork < gcOverAssistWork { 401 scanWork = gcOverAssistWork 402 debtBytes = int64(gcController.assistBytesPerWork * float64(scanWork)) 403 } 404 405 // Steal as much credit as we can from the background GC's 406 // scan credit. This is racy and may drop the background 407 // credit below 0 if two mutators steal at the same time. This 408 // will just cause steals to fail until credit is accumulated 409 // again, so in the long run it doesn't really matter, but we 410 // do have to handle the negative credit case. 411 bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit) 412 stolen := int64(0) 413 if bgScanCredit > 0 { 414 if bgScanCredit < scanWork { 415 stolen = bgScanCredit 416 gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(stolen)) 417 } else { 418 stolen = scanWork 419 gp.gcAssistBytes += debtBytes 420 } 421 atomic.Xaddint64(&gcController.bgScanCredit, -stolen) 422 423 scanWork -= stolen 424 425 if scanWork == 0 { 426 // We were able to steal all of the credit we 427 // needed. 428 if traced { 429 traceGCMarkAssistDone() 430 } 431 return 432 } 433 } 434 435 if trace.enabled && !traced { 436 traced = true 437 traceGCMarkAssistStart() 438 } 439 440 // Perform assist work 441 systemstack(func() { 442 gcAssistAlloc1(gp, scanWork) 443 // The user stack may have moved, so this can't touch 444 // anything on it until it returns from systemstack. 445 }) 446 447 completed := gp.param != nil 448 gp.param = nil 449 if completed { 450 gcMarkDone() 451 } 452 453 if gp.gcAssistBytes < 0 { 454 // We were unable steal enough credit or perform 455 // enough work to pay off the assist debt. We need to 456 // do one of these before letting the mutator allocate 457 // more to prevent over-allocation. 458 // 459 // If this is because we were preempted, reschedule 460 // and try some more. 461 if gp.preempt { 462 Gosched() 463 goto retry 464 } 465 466 // Add this G to an assist queue and park. When the GC 467 // has more background credit, it will satisfy queued 468 // assists before flushing to the global credit pool. 469 // 470 // Note that this does *not* get woken up when more 471 // work is added to the work list. The theory is that 472 // there wasn't enough work to do anyway, so we might 473 // as well let background marking take care of the 474 // work that is available. 475 if !gcParkAssist() { 476 goto retry 477 } 478 479 // At this point either background GC has satisfied 480 // this G's assist debt, or the GC cycle is over. 481 } 482 if traced { 483 traceGCMarkAssistDone() 484 } 485} 486 487// gcAssistAlloc1 is the part of gcAssistAlloc that runs on the system 488// stack. This is a separate function to make it easier to see that 489// we're not capturing anything from the user stack, since the user 490// stack may move while we're in this function. 491// 492// gcAssistAlloc1 indicates whether this assist completed the mark 493// phase by setting gp.param to non-nil. This can't be communicated on 494// the stack since it may move. 495// 496//go:systemstack 497func gcAssistAlloc1(gp *g, scanWork int64) { 498 // Clear the flag indicating that this assist completed the 499 // mark phase. 500 gp.param = nil 501 502 if atomic.Load(&gcBlackenEnabled) == 0 { 503 // The gcBlackenEnabled check in malloc races with the 504 // store that clears it but an atomic check in every malloc 505 // would be a performance hit. 506 // Instead we recheck it here on the non-preemptable system 507 // stack to determine if we should perform an assist. 508 509 // GC is done, so ignore any remaining debt. 510 gp.gcAssistBytes = 0 511 return 512 } 513 // Track time spent in this assist. Since we're on the 514 // system stack, this is non-preemptible, so we can 515 // just measure start and end time. 516 startTime := nanotime() 517 518 decnwait := atomic.Xadd(&work.nwait, -1) 519 if decnwait == work.nproc { 520 println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc) 521 throw("nwait > work.nprocs") 522 } 523 524 // gcDrainN requires the caller to be preemptible. 525 casgstatus(gp, _Grunning, _Gwaiting) 526 gp.waitreason = waitReasonGCAssistMarking 527 528 // drain own cached work first in the hopes that it 529 // will be more cache friendly. 530 gcw := &getg().m.p.ptr().gcw 531 workDone := gcDrainN(gcw, scanWork) 532 533 casgstatus(gp, _Gwaiting, _Grunning) 534 535 // Record that we did this much scan work. 536 // 537 // Back out the number of bytes of assist credit that 538 // this scan work counts for. The "1+" is a poor man's 539 // round-up, to ensure this adds credit even if 540 // assistBytesPerWork is very low. 541 gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(workDone)) 542 543 // If this is the last worker and we ran out of work, 544 // signal a completion point. 545 incnwait := atomic.Xadd(&work.nwait, +1) 546 if incnwait > work.nproc { 547 println("runtime: work.nwait=", incnwait, 548 "work.nproc=", work.nproc) 549 throw("work.nwait > work.nproc") 550 } 551 552 if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { 553 // This has reached a background completion point. Set 554 // gp.param to a non-nil value to indicate this. It 555 // doesn't matter what we set it to (it just has to be 556 // a valid pointer). 557 gp.param = unsafe.Pointer(gp) 558 } 559 duration := nanotime() - startTime 560 _p_ := gp.m.p.ptr() 561 _p_.gcAssistTime += duration 562 if _p_.gcAssistTime > gcAssistTimeSlack { 563 atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime) 564 _p_.gcAssistTime = 0 565 } 566} 567 568// gcWakeAllAssists wakes all currently blocked assists. This is used 569// at the end of a GC cycle. gcBlackenEnabled must be false to prevent 570// new assists from going to sleep after this point. 571func gcWakeAllAssists() { 572 lock(&work.assistQueue.lock) 573 list := work.assistQueue.q.popList() 574 injectglist(&list) 575 unlock(&work.assistQueue.lock) 576} 577 578// gcParkAssist puts the current goroutine on the assist queue and parks. 579// 580// gcParkAssist reports whether the assist is now satisfied. If it 581// returns false, the caller must retry the assist. 582// 583//go:nowritebarrier 584func gcParkAssist() bool { 585 lock(&work.assistQueue.lock) 586 // If the GC cycle finished while we were getting the lock, 587 // exit the assist. The cycle can't finish while we hold the 588 // lock. 589 if atomic.Load(&gcBlackenEnabled) == 0 { 590 unlock(&work.assistQueue.lock) 591 return true 592 } 593 594 gp := getg() 595 oldList := work.assistQueue.q 596 work.assistQueue.q.pushBack(gp) 597 598 // Recheck for background credit now that this G is in 599 // the queue, but can still back out. This avoids a 600 // race in case background marking has flushed more 601 // credit since we checked above. 602 if atomic.Loadint64(&gcController.bgScanCredit) > 0 { 603 work.assistQueue.q = oldList 604 if oldList.tail != 0 { 605 oldList.tail.ptr().schedlink.set(nil) 606 } 607 unlock(&work.assistQueue.lock) 608 return false 609 } 610 // Park. 611 goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceEvGoBlockGC, 2) 612 return true 613} 614 615// gcFlushBgCredit flushes scanWork units of background scan work 616// credit. This first satisfies blocked assists on the 617// work.assistQueue and then flushes any remaining credit to 618// gcController.bgScanCredit. 619// 620// Write barriers are disallowed because this is used by gcDrain after 621// it has ensured that all work is drained and this must preserve that 622// condition. 623// 624//go:nowritebarrierrec 625func gcFlushBgCredit(scanWork int64) { 626 if work.assistQueue.q.empty() { 627 // Fast path; there are no blocked assists. There's a 628 // small window here where an assist may add itself to 629 // the blocked queue and park. If that happens, we'll 630 // just get it on the next flush. 631 atomic.Xaddint64(&gcController.bgScanCredit, scanWork) 632 return 633 } 634 635 scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork) 636 637 lock(&work.assistQueue.lock) 638 for !work.assistQueue.q.empty() && scanBytes > 0 { 639 gp := work.assistQueue.q.pop() 640 // Note that gp.gcAssistBytes is negative because gp 641 // is in debt. Think carefully about the signs below. 642 if scanBytes+gp.gcAssistBytes >= 0 { 643 // Satisfy this entire assist debt. 644 scanBytes += gp.gcAssistBytes 645 gp.gcAssistBytes = 0 646 // It's important that we *not* put gp in 647 // runnext. Otherwise, it's possible for user 648 // code to exploit the GC worker's high 649 // scheduler priority to get itself always run 650 // before other goroutines and always in the 651 // fresh quantum started by GC. 652 ready(gp, 0, false) 653 } else { 654 // Partially satisfy this assist. 655 gp.gcAssistBytes += scanBytes 656 scanBytes = 0 657 // As a heuristic, we move this assist to the 658 // back of the queue so that large assists 659 // can't clog up the assist queue and 660 // substantially delay small assists. 661 work.assistQueue.q.pushBack(gp) 662 break 663 } 664 } 665 666 if scanBytes > 0 { 667 // Convert from scan bytes back to work. 668 scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte) 669 atomic.Xaddint64(&gcController.bgScanCredit, scanWork) 670 } 671 unlock(&work.assistQueue.lock) 672} 673 674// scanstack scans gp's stack, greying all pointers found on the stack. 675// 676// scanstack will also shrink the stack if it is safe to do so. If it 677// is not, it schedules a stack shrink for the next synchronous safe 678// point. 679// 680// scanstack is marked go:systemstack because it must not be preempted 681// while using a workbuf. 682// 683//go:nowritebarrier 684//go:systemstack 685func scanstack(gp *g, gcw *gcWork) { 686 if readgstatus(gp)&_Gscan == 0 { 687 print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n") 688 throw("scanstack - bad status") 689 } 690 691 switch readgstatus(gp) &^ _Gscan { 692 default: 693 print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") 694 throw("mark - bad status") 695 case _Gdead: 696 return 697 case _Grunning: 698 print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") 699 throw("scanstack: goroutine not stopped") 700 case _Grunnable, _Gsyscall, _Gwaiting: 701 // ok 702 } 703 704 if gp == getg() { 705 throw("can't scan our own stack") 706 } 707 708 if isShrinkStackSafe(gp) { 709 // Shrink the stack if not much of it is being used. 710 shrinkstack(gp) 711 } else { 712 // Otherwise, shrink the stack at the next sync safe point. 713 gp.preemptShrink = true 714 } 715 716 var state stackScanState 717 state.stack = gp.stack 718 719 if stackTraceDebug { 720 println("stack trace goroutine", gp.goid) 721 } 722 723 if debugScanConservative && gp.asyncSafePoint { 724 print("scanning async preempted goroutine ", gp.goid, " stack [", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n") 725 } 726 727 // Scan the saved context register. This is effectively a live 728 // register that gets moved back and forth between the 729 // register and sched.ctxt without a write barrier. 730 if gp.sched.ctxt != nil { 731 scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw, &state) 732 } 733 734 // Scan the stack. Accumulate a list of stack objects. 735 scanframe := func(frame *stkframe, unused unsafe.Pointer) bool { 736 scanframeworker(frame, &state, gcw) 737 return true 738 } 739 gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0) 740 741 // Find additional pointers that point into the stack from the heap. 742 // Currently this includes defers and panics. See also function copystack. 743 744 // Find and trace all defer arguments. 745 tracebackdefers(gp, scanframe, nil) 746 747 // Find and trace other pointers in defer records. 748 for d := gp._defer; d != nil; d = d.link { 749 if d.fn != nil { 750 // tracebackdefers above does not scan the func value, which could 751 // be a stack allocated closure. See issue 30453. 752 scanblock(uintptr(unsafe.Pointer(&d.fn)), sys.PtrSize, &oneptrmask[0], gcw, &state) 753 } 754 if d.link != nil { 755 // The link field of a stack-allocated defer record might point 756 // to a heap-allocated defer record. Keep that heap record live. 757 scanblock(uintptr(unsafe.Pointer(&d.link)), sys.PtrSize, &oneptrmask[0], gcw, &state) 758 } 759 // Retain defers records themselves. 760 // Defer records might not be reachable from the G through regular heap 761 // tracing because the defer linked list might weave between the stack and the heap. 762 if d.heap { 763 scanblock(uintptr(unsafe.Pointer(&d)), sys.PtrSize, &oneptrmask[0], gcw, &state) 764 } 765 } 766 if gp._panic != nil { 767 // Panics are always stack allocated. 768 state.putPtr(uintptr(unsafe.Pointer(gp._panic)), false) 769 } 770 771 // Find and scan all reachable stack objects. 772 // 773 // The state's pointer queue prioritizes precise pointers over 774 // conservative pointers so that we'll prefer scanning stack 775 // objects precisely. 776 state.buildIndex() 777 for { 778 p, conservative := state.getPtr() 779 if p == 0 { 780 break 781 } 782 obj := state.findObject(p) 783 if obj == nil { 784 continue 785 } 786 t := obj.typ 787 if t == nil { 788 // We've already scanned this object. 789 continue 790 } 791 obj.setType(nil) // Don't scan it again. 792 if stackTraceDebug { 793 printlock() 794 print(" live stkobj at", hex(state.stack.lo+uintptr(obj.off)), "of type", t.string()) 795 if conservative { 796 print(" (conservative)") 797 } 798 println() 799 printunlock() 800 } 801 gcdata := t.gcdata 802 var s *mspan 803 if t.kind&kindGCProg != 0 { 804 // This path is pretty unlikely, an object large enough 805 // to have a GC program allocated on the stack. 806 // We need some space to unpack the program into a straight 807 // bitmask, which we allocate/free here. 808 // TODO: it would be nice if there were a way to run a GC 809 // program without having to store all its bits. We'd have 810 // to change from a Lempel-Ziv style program to something else. 811 // Or we can forbid putting objects on stacks if they require 812 // a gc program (see issue 27447). 813 s = materializeGCProg(t.ptrdata, gcdata) 814 gcdata = (*byte)(unsafe.Pointer(s.startAddr)) 815 } 816 817 b := state.stack.lo + uintptr(obj.off) 818 if conservative { 819 scanConservative(b, t.ptrdata, gcdata, gcw, &state) 820 } else { 821 scanblock(b, t.ptrdata, gcdata, gcw, &state) 822 } 823 824 if s != nil { 825 dematerializeGCProg(s) 826 } 827 } 828 829 // Deallocate object buffers. 830 // (Pointer buffers were all deallocated in the loop above.) 831 for state.head != nil { 832 x := state.head 833 state.head = x.next 834 if stackTraceDebug { 835 for _, obj := range x.obj[:x.nobj] { 836 if obj.typ == nil { // reachable 837 continue 838 } 839 println(" dead stkobj at", hex(gp.stack.lo+uintptr(obj.off)), "of type", obj.typ.string()) 840 // Note: not necessarily really dead - only reachable-from-ptr dead. 841 } 842 } 843 x.nobj = 0 844 putempty((*workbuf)(unsafe.Pointer(x))) 845 } 846 if state.buf != nil || state.cbuf != nil || state.freeBuf != nil { 847 throw("remaining pointer buffers") 848 } 849} 850 851// Scan a stack frame: local variables and function arguments/results. 852//go:nowritebarrier 853func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) { 854 if _DebugGC > 1 && frame.continpc != 0 { 855 print("scanframe ", funcname(frame.fn), "\n") 856 } 857 858 isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == funcID_asyncPreempt 859 if state.conservative || isAsyncPreempt { 860 if debugScanConservative { 861 println("conservatively scanning function", funcname(frame.fn), "at PC", hex(frame.continpc)) 862 } 863 864 // Conservatively scan the frame. Unlike the precise 865 // case, this includes the outgoing argument space 866 // since we may have stopped while this function was 867 // setting up a call. 868 // 869 // TODO: We could narrow this down if the compiler 870 // produced a single map per function of stack slots 871 // and registers that ever contain a pointer. 872 if frame.varp != 0 { 873 size := frame.varp - frame.sp 874 if size > 0 { 875 scanConservative(frame.sp, size, nil, gcw, state) 876 } 877 } 878 879 // Scan arguments to this frame. 880 if frame.arglen != 0 { 881 // TODO: We could pass the entry argument map 882 // to narrow this down further. 883 scanConservative(frame.argp, frame.arglen, nil, gcw, state) 884 } 885 886 if isAsyncPreempt { 887 // This function's frame contained the 888 // registers for the asynchronously stopped 889 // parent frame. Scan the parent 890 // conservatively. 891 state.conservative = true 892 } else { 893 // We only wanted to scan those two frames 894 // conservatively. Clear the flag for future 895 // frames. 896 state.conservative = false 897 } 898 return 899 } 900 901 locals, args, objs := getStackMap(frame, &state.cache, false) 902 903 // Scan local variables if stack frame has been allocated. 904 if locals.n > 0 { 905 size := uintptr(locals.n) * sys.PtrSize 906 scanblock(frame.varp-size, size, locals.bytedata, gcw, state) 907 } 908 909 // Scan arguments. 910 if args.n > 0 { 911 scanblock(frame.argp, uintptr(args.n)*sys.PtrSize, args.bytedata, gcw, state) 912 } 913 914 // Add all stack objects to the stack object list. 915 if frame.varp != 0 { 916 // varp is 0 for defers, where there are no locals. 917 // In that case, there can't be a pointer to its args, either. 918 // (And all args would be scanned above anyway.) 919 for _, obj := range objs { 920 off := obj.off 921 base := frame.varp // locals base pointer 922 if off >= 0 { 923 base = frame.argp // arguments and return values base pointer 924 } 925 ptr := base + uintptr(off) 926 if ptr < frame.sp { 927 // object hasn't been allocated in the frame yet. 928 continue 929 } 930 if stackTraceDebug { 931 println("stkobj at", hex(ptr), "of type", obj.typ.string()) 932 } 933 state.addObject(ptr, obj.typ) 934 } 935 } 936} 937 938type gcDrainFlags int 939 940const ( 941 gcDrainUntilPreempt gcDrainFlags = 1 << iota 942 gcDrainFlushBgCredit 943 gcDrainIdle 944 gcDrainFractional 945) 946 947// gcDrain scans roots and objects in work buffers, blackening grey 948// objects until it is unable to get more work. It may return before 949// GC is done; it's the caller's responsibility to balance work from 950// other Ps. 951// 952// If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt 953// is set. 954// 955// If flags&gcDrainIdle != 0, gcDrain returns when there is other work 956// to do. 957// 958// If flags&gcDrainFractional != 0, gcDrain self-preempts when 959// pollFractionalWorkerExit() returns true. This implies 960// gcDrainNoBlock. 961// 962// If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work 963// credit to gcController.bgScanCredit every gcCreditSlack units of 964// scan work. 965// 966//go:nowritebarrier 967func gcDrain(gcw *gcWork, flags gcDrainFlags) { 968 if !writeBarrier.needed { 969 throw("gcDrain phase incorrect") 970 } 971 972 gp := getg().m.curg 973 preemptible := flags&gcDrainUntilPreempt != 0 974 flushBgCredit := flags&gcDrainFlushBgCredit != 0 975 idle := flags&gcDrainIdle != 0 976 977 initScanWork := gcw.scanWork 978 979 // checkWork is the scan work before performing the next 980 // self-preempt check. 981 checkWork := int64(1<<63 - 1) 982 var check func() bool 983 if flags&(gcDrainIdle|gcDrainFractional) != 0 { 984 checkWork = initScanWork + drainCheckThreshold 985 if idle { 986 check = pollWork 987 } else if flags&gcDrainFractional != 0 { 988 check = pollFractionalWorkerExit 989 } 990 } 991 992 // Drain root marking jobs. 993 if work.markrootNext < work.markrootJobs { 994 for !(preemptible && gp.preempt) { 995 job := atomic.Xadd(&work.markrootNext, +1) - 1 996 if job >= work.markrootJobs { 997 break 998 } 999 markroot(gcw, job) 1000 if check != nil && check() { 1001 goto done 1002 } 1003 } 1004 } 1005 1006 // Drain heap marking jobs. 1007 for !(preemptible && gp.preempt) { 1008 // Try to keep work available on the global queue. We used to 1009 // check if there were waiting workers, but it's better to 1010 // just keep work available than to make workers wait. In the 1011 // worst case, we'll do O(log(_WorkbufSize)) unnecessary 1012 // balances. 1013 if work.full == 0 { 1014 gcw.balance() 1015 } 1016 1017 b := gcw.tryGetFast() 1018 if b == 0 { 1019 b = gcw.tryGet() 1020 if b == 0 { 1021 // Flush the write barrier 1022 // buffer; this may create 1023 // more work. 1024 wbBufFlush(nil, 0) 1025 b = gcw.tryGet() 1026 } 1027 } 1028 if b == 0 { 1029 // Unable to get work. 1030 break 1031 } 1032 scanobject(b, gcw) 1033 1034 // Flush background scan work credit to the global 1035 // account if we've accumulated enough locally so 1036 // mutator assists can draw on it. 1037 if gcw.scanWork >= gcCreditSlack { 1038 atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) 1039 if flushBgCredit { 1040 gcFlushBgCredit(gcw.scanWork - initScanWork) 1041 initScanWork = 0 1042 } 1043 checkWork -= gcw.scanWork 1044 gcw.scanWork = 0 1045 1046 if checkWork <= 0 { 1047 checkWork += drainCheckThreshold 1048 if check != nil && check() { 1049 break 1050 } 1051 } 1052 } 1053 } 1054 1055done: 1056 // Flush remaining scan work credit. 1057 if gcw.scanWork > 0 { 1058 atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) 1059 if flushBgCredit { 1060 gcFlushBgCredit(gcw.scanWork - initScanWork) 1061 } 1062 gcw.scanWork = 0 1063 } 1064} 1065 1066// gcDrainN blackens grey objects until it has performed roughly 1067// scanWork units of scan work or the G is preempted. This is 1068// best-effort, so it may perform less work if it fails to get a work 1069// buffer. Otherwise, it will perform at least n units of work, but 1070// may perform more because scanning is always done in whole object 1071// increments. It returns the amount of scan work performed. 1072// 1073// The caller goroutine must be in a preemptible state (e.g., 1074// _Gwaiting) to prevent deadlocks during stack scanning. As a 1075// consequence, this must be called on the system stack. 1076// 1077//go:nowritebarrier 1078//go:systemstack 1079func gcDrainN(gcw *gcWork, scanWork int64) int64 { 1080 if !writeBarrier.needed { 1081 throw("gcDrainN phase incorrect") 1082 } 1083 1084 // There may already be scan work on the gcw, which we don't 1085 // want to claim was done by this call. 1086 workFlushed := -gcw.scanWork 1087 1088 gp := getg().m.curg 1089 for !gp.preempt && workFlushed+gcw.scanWork < scanWork { 1090 // See gcDrain comment. 1091 if work.full == 0 { 1092 gcw.balance() 1093 } 1094 1095 // This might be a good place to add prefetch code... 1096 // if(wbuf.nobj > 4) { 1097 // PREFETCH(wbuf->obj[wbuf.nobj - 3]; 1098 // } 1099 // 1100 b := gcw.tryGetFast() 1101 if b == 0 { 1102 b = gcw.tryGet() 1103 if b == 0 { 1104 // Flush the write barrier buffer; 1105 // this may create more work. 1106 wbBufFlush(nil, 0) 1107 b = gcw.tryGet() 1108 } 1109 } 1110 1111 if b == 0 { 1112 // Try to do a root job. 1113 // 1114 // TODO: Assists should get credit for this 1115 // work. 1116 if work.markrootNext < work.markrootJobs { 1117 job := atomic.Xadd(&work.markrootNext, +1) - 1 1118 if job < work.markrootJobs { 1119 markroot(gcw, job) 1120 continue 1121 } 1122 } 1123 // No heap or root jobs. 1124 break 1125 } 1126 scanobject(b, gcw) 1127 1128 // Flush background scan work credit. 1129 if gcw.scanWork >= gcCreditSlack { 1130 atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) 1131 workFlushed += gcw.scanWork 1132 gcw.scanWork = 0 1133 } 1134 } 1135 1136 // Unlike gcDrain, there's no need to flush remaining work 1137 // here because this never flushes to bgScanCredit and 1138 // gcw.dispose will flush any remaining work to scanWork. 1139 1140 return workFlushed + gcw.scanWork 1141} 1142 1143// scanblock scans b as scanobject would, but using an explicit 1144// pointer bitmap instead of the heap bitmap. 1145// 1146// This is used to scan non-heap roots, so it does not update 1147// gcw.bytesMarked or gcw.scanWork. 1148// 1149// If stk != nil, possible stack pointers are also reported to stk.putPtr. 1150//go:nowritebarrier 1151func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork, stk *stackScanState) { 1152 // Use local copies of original parameters, so that a stack trace 1153 // due to one of the throws below shows the original block 1154 // base and extent. 1155 b := b0 1156 n := n0 1157 1158 for i := uintptr(0); i < n; { 1159 // Find bits for the next word. 1160 bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8))) 1161 if bits == 0 { 1162 i += sys.PtrSize * 8 1163 continue 1164 } 1165 for j := 0; j < 8 && i < n; j++ { 1166 if bits&1 != 0 { 1167 // Same work as in scanobject; see comments there. 1168 p := *(*uintptr)(unsafe.Pointer(b + i)) 1169 if p != 0 { 1170 if obj, span, objIndex := findObject(p, b, i); obj != 0 { 1171 greyobject(obj, b, i, span, gcw, objIndex) 1172 } else if stk != nil && p >= stk.stack.lo && p < stk.stack.hi { 1173 stk.putPtr(p, false) 1174 } 1175 } 1176 } 1177 bits >>= 1 1178 i += sys.PtrSize 1179 } 1180 } 1181} 1182 1183// scanobject scans the object starting at b, adding pointers to gcw. 1184// b must point to the beginning of a heap object or an oblet. 1185// scanobject consults the GC bitmap for the pointer mask and the 1186// spans for the size of the object. 1187// 1188//go:nowritebarrier 1189func scanobject(b uintptr, gcw *gcWork) { 1190 // Find the bits for b and the size of the object at b. 1191 // 1192 // b is either the beginning of an object, in which case this 1193 // is the size of the object to scan, or it points to an 1194 // oblet, in which case we compute the size to scan below. 1195 hbits := heapBitsForAddr(b) 1196 s := spanOfUnchecked(b) 1197 n := s.elemsize 1198 if n == 0 { 1199 throw("scanobject n == 0") 1200 } 1201 1202 if n > maxObletBytes { 1203 // Large object. Break into oblets for better 1204 // parallelism and lower latency. 1205 if b == s.base() { 1206 // It's possible this is a noscan object (not 1207 // from greyobject, but from other code 1208 // paths), in which case we must *not* enqueue 1209 // oblets since their bitmaps will be 1210 // uninitialized. 1211 if s.spanclass.noscan() { 1212 // Bypass the whole scan. 1213 gcw.bytesMarked += uint64(n) 1214 return 1215 } 1216 1217 // Enqueue the other oblets to scan later. 1218 // Some oblets may be in b's scalar tail, but 1219 // these will be marked as "no more pointers", 1220 // so we'll drop out immediately when we go to 1221 // scan those. 1222 for oblet := b + maxObletBytes; oblet < s.base()+s.elemsize; oblet += maxObletBytes { 1223 if !gcw.putFast(oblet) { 1224 gcw.put(oblet) 1225 } 1226 } 1227 } 1228 1229 // Compute the size of the oblet. Since this object 1230 // must be a large object, s.base() is the beginning 1231 // of the object. 1232 n = s.base() + s.elemsize - b 1233 if n > maxObletBytes { 1234 n = maxObletBytes 1235 } 1236 } 1237 1238 var i uintptr 1239 for i = 0; i < n; i += sys.PtrSize { 1240 // Find bits for this word. 1241 if i != 0 { 1242 // Avoid needless hbits.next() on last iteration. 1243 hbits = hbits.next() 1244 } 1245 // Load bits once. See CL 22712 and issue 16973 for discussion. 1246 bits := hbits.bits() 1247 // During checkmarking, 1-word objects store the checkmark 1248 // in the type bit for the one word. The only one-word objects 1249 // are pointers, or else they'd be merged with other non-pointer 1250 // data into larger allocations. 1251 if i != 1*sys.PtrSize && bits&bitScan == 0 { 1252 break // no more pointers in this object 1253 } 1254 if bits&bitPointer == 0 { 1255 continue // not a pointer 1256 } 1257 1258 // Work here is duplicated in scanblock and above. 1259 // If you make changes here, make changes there too. 1260 obj := *(*uintptr)(unsafe.Pointer(b + i)) 1261 1262 // At this point we have extracted the next potential pointer. 1263 // Quickly filter out nil and pointers back to the current object. 1264 if obj != 0 && obj-b >= n { 1265 // Test if obj points into the Go heap and, if so, 1266 // mark the object. 1267 // 1268 // Note that it's possible for findObject to 1269 // fail if obj points to a just-allocated heap 1270 // object because of a race with growing the 1271 // heap. In this case, we know the object was 1272 // just allocated and hence will be marked by 1273 // allocation itself. 1274 if obj, span, objIndex := findObject(obj, b, i); obj != 0 { 1275 greyobject(obj, b, i, span, gcw, objIndex) 1276 } 1277 } 1278 } 1279 gcw.bytesMarked += uint64(n) 1280 gcw.scanWork += int64(i) 1281} 1282 1283// scanConservative scans block [b, b+n) conservatively, treating any 1284// pointer-like value in the block as a pointer. 1285// 1286// If ptrmask != nil, only words that are marked in ptrmask are 1287// considered as potential pointers. 1288// 1289// If state != nil, it's assumed that [b, b+n) is a block in the stack 1290// and may contain pointers to stack objects. 1291func scanConservative(b, n uintptr, ptrmask *uint8, gcw *gcWork, state *stackScanState) { 1292 if debugScanConservative { 1293 printlock() 1294 print("conservatively scanning [", hex(b), ",", hex(b+n), ")\n") 1295 hexdumpWords(b, b+n, func(p uintptr) byte { 1296 if ptrmask != nil { 1297 word := (p - b) / sys.PtrSize 1298 bits := *addb(ptrmask, word/8) 1299 if (bits>>(word%8))&1 == 0 { 1300 return '$' 1301 } 1302 } 1303 1304 val := *(*uintptr)(unsafe.Pointer(p)) 1305 if state != nil && state.stack.lo <= val && val < state.stack.hi { 1306 return '@' 1307 } 1308 1309 span := spanOfHeap(val) 1310 if span == nil { 1311 return ' ' 1312 } 1313 idx := span.objIndex(val) 1314 if span.isFree(idx) { 1315 return ' ' 1316 } 1317 return '*' 1318 }) 1319 printunlock() 1320 } 1321 1322 for i := uintptr(0); i < n; i += sys.PtrSize { 1323 if ptrmask != nil { 1324 word := i / sys.PtrSize 1325 bits := *addb(ptrmask, word/8) 1326 if bits == 0 { 1327 // Skip 8 words (the loop increment will do the 8th) 1328 // 1329 // This must be the first time we've 1330 // seen this word of ptrmask, so i 1331 // must be 8-word-aligned, but check 1332 // our reasoning just in case. 1333 if i%(sys.PtrSize*8) != 0 { 1334 throw("misaligned mask") 1335 } 1336 i += sys.PtrSize*8 - sys.PtrSize 1337 continue 1338 } 1339 if (bits>>(word%8))&1 == 0 { 1340 continue 1341 } 1342 } 1343 1344 val := *(*uintptr)(unsafe.Pointer(b + i)) 1345 1346 // Check if val points into the stack. 1347 if state != nil && state.stack.lo <= val && val < state.stack.hi { 1348 // val may point to a stack object. This 1349 // object may be dead from last cycle and 1350 // hence may contain pointers to unallocated 1351 // objects, but unlike heap objects we can't 1352 // tell if it's already dead. Hence, if all 1353 // pointers to this object are from 1354 // conservative scanning, we have to scan it 1355 // defensively, too. 1356 state.putPtr(val, true) 1357 continue 1358 } 1359 1360 // Check if val points to a heap span. 1361 span := spanOfHeap(val) 1362 if span == nil { 1363 continue 1364 } 1365 1366 // Check if val points to an allocated object. 1367 idx := span.objIndex(val) 1368 if span.isFree(idx) { 1369 continue 1370 } 1371 1372 // val points to an allocated object. Mark it. 1373 obj := span.base() + idx*span.elemsize 1374 greyobject(obj, b, i, span, gcw, idx) 1375 } 1376} 1377 1378// Shade the object if it isn't already. 1379// The object is not nil and known to be in the heap. 1380// Preemption must be disabled. 1381//go:nowritebarrier 1382func shade(b uintptr) { 1383 if obj, span, objIndex := findObject(b, 0, 0); obj != 0 { 1384 gcw := &getg().m.p.ptr().gcw 1385 greyobject(obj, 0, 0, span, gcw, objIndex) 1386 } 1387} 1388 1389// obj is the start of an object with mark mbits. 1390// If it isn't already marked, mark it and enqueue into gcw. 1391// base and off are for debugging only and could be removed. 1392// 1393// See also wbBufFlush1, which partially duplicates this logic. 1394// 1395//go:nowritebarrierrec 1396func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintptr) { 1397 // obj should be start of allocation, and so must be at least pointer-aligned. 1398 if obj&(sys.PtrSize-1) != 0 { 1399 throw("greyobject: obj not pointer-aligned") 1400 } 1401 mbits := span.markBitsForIndex(objIndex) 1402 1403 if useCheckmark { 1404 if !mbits.isMarked() { 1405 printlock() 1406 print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n") 1407 print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n") 1408 1409 // Dump the source (base) object 1410 gcDumpObject("base", base, off) 1411 1412 // Dump the object 1413 gcDumpObject("obj", obj, ^uintptr(0)) 1414 1415 getg().m.traceback = 2 1416 throw("checkmark found unmarked object") 1417 } 1418 hbits := heapBitsForAddr(obj) 1419 if hbits.isCheckmarked(span.elemsize) { 1420 return 1421 } 1422 hbits.setCheckmarked(span.elemsize) 1423 if !hbits.isCheckmarked(span.elemsize) { 1424 throw("setCheckmarked and isCheckmarked disagree") 1425 } 1426 } else { 1427 if debug.gccheckmark > 0 && span.isFree(objIndex) { 1428 print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n") 1429 gcDumpObject("base", base, off) 1430 gcDumpObject("obj", obj, ^uintptr(0)) 1431 getg().m.traceback = 2 1432 throw("marking free object") 1433 } 1434 1435 // If marked we have nothing to do. 1436 if mbits.isMarked() { 1437 return 1438 } 1439 mbits.setMarked() 1440 1441 // Mark span. 1442 arena, pageIdx, pageMask := pageIndexOf(span.base()) 1443 if arena.pageMarks[pageIdx]&pageMask == 0 { 1444 atomic.Or8(&arena.pageMarks[pageIdx], pageMask) 1445 } 1446 1447 // If this is a noscan object, fast-track it to black 1448 // instead of greying it. 1449 if span.spanclass.noscan() { 1450 gcw.bytesMarked += uint64(span.elemsize) 1451 return 1452 } 1453 } 1454 1455 // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but 1456 // seems like a nice optimization that can be added back in. 1457 // There needs to be time between the PREFETCH and the use. 1458 // Previously we put the obj in an 8 element buffer that is drained at a rate 1459 // to give the PREFETCH time to do its work. 1460 // Use of PREFETCHNTA might be more appropriate than PREFETCH 1461 if !gcw.putFast(obj) { 1462 gcw.put(obj) 1463 } 1464} 1465 1466// gcDumpObject dumps the contents of obj for debugging and marks the 1467// field at byte offset off in obj. 1468func gcDumpObject(label string, obj, off uintptr) { 1469 s := spanOf(obj) 1470 print(label, "=", hex(obj)) 1471 if s == nil { 1472 print(" s=nil\n") 1473 return 1474 } 1475 print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.spanclass=", s.spanclass, " s.elemsize=", s.elemsize, " s.state=") 1476 if state := s.state.get(); 0 <= state && int(state) < len(mSpanStateNames) { 1477 print(mSpanStateNames[state], "\n") 1478 } else { 1479 print("unknown(", state, ")\n") 1480 } 1481 1482 skipped := false 1483 size := s.elemsize 1484 if s.state.get() == mSpanManual && size == 0 { 1485 // We're printing something from a stack frame. We 1486 // don't know how big it is, so just show up to an 1487 // including off. 1488 size = off + sys.PtrSize 1489 } 1490 for i := uintptr(0); i < size; i += sys.PtrSize { 1491 // For big objects, just print the beginning (because 1492 // that usually hints at the object's type) and the 1493 // fields around off. 1494 if !(i < 128*sys.PtrSize || off-16*sys.PtrSize < i && i < off+16*sys.PtrSize) { 1495 skipped = true 1496 continue 1497 } 1498 if skipped { 1499 print(" ...\n") 1500 skipped = false 1501 } 1502 print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + i)))) 1503 if i == off { 1504 print(" <==") 1505 } 1506 print("\n") 1507 } 1508 if skipped { 1509 print(" ...\n") 1510 } 1511} 1512 1513// gcmarknewobject marks a newly allocated object black. obj must 1514// not contain any non-nil pointers. 1515// 1516// This is nosplit so it can manipulate a gcWork without preemption. 1517// 1518//go:nowritebarrier 1519//go:nosplit 1520func gcmarknewobject(obj, size, scanSize uintptr) { 1521 if useCheckmark { // The world should be stopped so this should not happen. 1522 throw("gcmarknewobject called while doing checkmark") 1523 } 1524 markBitsForAddr(obj).setMarked() 1525 gcw := &getg().m.p.ptr().gcw 1526 gcw.bytesMarked += uint64(size) 1527 gcw.scanWork += int64(scanSize) 1528} 1529 1530// gcMarkTinyAllocs greys all active tiny alloc blocks. 1531// 1532// The world must be stopped. 1533func gcMarkTinyAllocs() { 1534 for _, p := range allp { 1535 c := p.mcache 1536 if c == nil || c.tiny == 0 { 1537 continue 1538 } 1539 _, span, objIndex := findObject(c.tiny, 0, 0) 1540 gcw := &p.gcw 1541 greyobject(c.tiny, 0, 0, span, gcw, objIndex) 1542 } 1543} 1544 1545// Checkmarking 1546 1547// To help debug the concurrent GC we remark with the world 1548// stopped ensuring that any object encountered has their normal 1549// mark bit set. To do this we use an orthogonal bit 1550// pattern to indicate the object is marked. The following pattern 1551// uses the upper two bits in the object's boundary nibble. 1552// 01: scalar not marked 1553// 10: pointer not marked 1554// 11: pointer marked 1555// 00: scalar marked 1556// Xoring with 01 will flip the pattern from marked to unmarked and vica versa. 1557// The higher bit is 1 for pointers and 0 for scalars, whether the object 1558// is marked or not. 1559// The first nibble no longer holds the typeDead pattern indicating that the 1560// there are no more pointers in the object. This information is held 1561// in the second nibble. 1562 1563// If useCheckmark is true, marking of an object uses the 1564// checkmark bits (encoding above) instead of the standard 1565// mark bits. 1566var useCheckmark = false 1567 1568//go:nowritebarrier 1569func initCheckmarks() { 1570 useCheckmark = true 1571 for _, s := range mheap_.allspans { 1572 if s.state.get() == mSpanInUse { 1573 heapBitsForAddr(s.base()).initCheckmarkSpan(s.layout()) 1574 } 1575 } 1576} 1577 1578func clearCheckmarks() { 1579 useCheckmark = false 1580 for _, s := range mheap_.allspans { 1581 if s.state.get() == mSpanInUse { 1582 heapBitsForAddr(s.base()).clearCheckmarkSpan(s.layout()) 1583 } 1584 } 1585} 1586