1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Garbage collector: marking and scanning
6
7package runtime
8
9import (
10	"runtime/internal/atomic"
11	"runtime/internal/sys"
12	"unsafe"
13)
14
15const (
16	fixedRootFinalizers = iota
17	fixedRootFreeGStacks
18	fixedRootCount
19
20	// rootBlockBytes is the number of bytes to scan per data or
21	// BSS root.
22	rootBlockBytes = 256 << 10
23
24	// rootBlockSpans is the number of spans to scan per span
25	// root.
26	rootBlockSpans = 8 * 1024 // 64MB worth of spans
27
28	// maxObletBytes is the maximum bytes of an object to scan at
29	// once. Larger objects will be split up into "oblets" of at
30	// most this size. Since we can scan 1–2 MB/ms, 128 KB bounds
31	// scan preemption at ~100 µs.
32	//
33	// This must be > _MaxSmallSize so that the object base is the
34	// span base.
35	maxObletBytes = 128 << 10
36
37	// drainCheckThreshold specifies how many units of work to do
38	// between self-preemption checks in gcDrain. Assuming a scan
39	// rate of 1 MB/ms, this is ~100 µs. Lower values have higher
40	// overhead in the scan loop (the scheduler check may perform
41	// a syscall, so its overhead is nontrivial). Higher values
42	// make the system less responsive to incoming work.
43	drainCheckThreshold = 100000
44)
45
46// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
47// some miscellany) and initializes scanning-related state.
48//
49// The world must be stopped.
50//
51//go:nowritebarrier
52func gcMarkRootPrepare() {
53	work.nFlushCacheRoots = 0
54
55	// Compute how many data and BSS root blocks there are.
56	nBlocks := func(bytes uintptr) int {
57		return int((bytes + rootBlockBytes - 1) / rootBlockBytes)
58	}
59
60	work.nDataRoots = 0
61	work.nBSSRoots = 0
62
63	// Scan globals.
64	for _, datap := range activeModules() {
65		nDataRoots := nBlocks(datap.edata - datap.data)
66		if nDataRoots > work.nDataRoots {
67			work.nDataRoots = nDataRoots
68		}
69	}
70
71	for _, datap := range activeModules() {
72		nBSSRoots := nBlocks(datap.ebss - datap.bss)
73		if nBSSRoots > work.nBSSRoots {
74			work.nBSSRoots = nBSSRoots
75		}
76	}
77
78	// Scan span roots for finalizer specials.
79	//
80	// We depend on addfinalizer to mark objects that get
81	// finalizers after root marking.
82	//
83	// We're only interested in scanning the in-use spans,
84	// which will all be swept at this point. More spans
85	// may be added to this list during concurrent GC, but
86	// we only care about spans that were allocated before
87	// this mark phase.
88	work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks()
89
90	// Scan stacks.
91	//
92	// Gs may be created after this point, but it's okay that we
93	// ignore them because they begin life without any roots, so
94	// there's nothing to scan, and any roots they create during
95	// the concurrent phase will be scanned during mark
96	// termination.
97	work.nStackRoots = int(atomic.Loaduintptr(&allglen))
98
99	work.markrootNext = 0
100	work.markrootJobs = uint32(fixedRootCount + work.nFlushCacheRoots + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots)
101}
102
103// gcMarkRootCheck checks that all roots have been scanned. It is
104// purely for debugging.
105func gcMarkRootCheck() {
106	if work.markrootNext < work.markrootJobs {
107		print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n")
108		throw("left over markroot jobs")
109	}
110
111	lock(&allglock)
112	// Check that stacks have been scanned.
113	var gp *g
114	for i := 0; i < work.nStackRoots; i++ {
115		gp = allgs[i]
116		if !gp.gcscandone {
117			goto fail
118		}
119	}
120	unlock(&allglock)
121	return
122
123fail:
124	println("gp", gp, "goid", gp.goid,
125		"status", readgstatus(gp),
126		"gcscandone", gp.gcscandone)
127	unlock(&allglock) // Avoid self-deadlock with traceback.
128	throw("scan missed a g")
129}
130
131// ptrmask for an allocation containing a single pointer.
132var oneptrmask = [...]uint8{1}
133
134// markroot scans the i'th root.
135//
136// Preemption must be disabled (because this uses a gcWork).
137//
138// nowritebarrier is only advisory here.
139//
140//go:nowritebarrier
141func markroot(gcw *gcWork, i uint32) {
142	// TODO(austin): This is a bit ridiculous. Compute and store
143	// the bases in gcMarkRootPrepare instead of the counts.
144	baseFlushCache := uint32(fixedRootCount)
145	baseData := baseFlushCache + uint32(work.nFlushCacheRoots)
146	baseBSS := baseData + uint32(work.nDataRoots)
147	baseSpans := baseBSS + uint32(work.nBSSRoots)
148	baseStacks := baseSpans + uint32(work.nSpanRoots)
149	end := baseStacks + uint32(work.nStackRoots)
150
151	// Note: if you add a case here, please also update heapdump.go:dumproots.
152	switch {
153	case baseFlushCache <= i && i < baseData:
154		flushmcache(int(i - baseFlushCache))
155
156	case baseData <= i && i < baseBSS:
157		for _, datap := range activeModules() {
158			markrootBlock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, gcw, int(i-baseData))
159		}
160
161	case baseBSS <= i && i < baseSpans:
162		for _, datap := range activeModules() {
163			markrootBlock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, gcw, int(i-baseBSS))
164		}
165
166	case i == fixedRootFinalizers:
167		for fb := allfin; fb != nil; fb = fb.alllink {
168			cnt := uintptr(atomic.Load(&fb.cnt))
169			scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw, nil)
170		}
171
172	case i == fixedRootFreeGStacks:
173		// Switch to the system stack so we can call
174		// stackfree.
175		systemstack(markrootFreeGStacks)
176
177	case baseSpans <= i && i < baseStacks:
178		// mark mspan.specials
179		markrootSpans(gcw, int(i-baseSpans))
180
181	default:
182		// the rest is scanning goroutine stacks
183		var gp *g
184		if baseStacks <= i && i < end {
185			gp = allgs[i-baseStacks]
186		} else {
187			throw("markroot: bad index")
188		}
189
190		// remember when we've first observed the G blocked
191		// needed only to output in traceback
192		status := readgstatus(gp) // We are not in a scan state
193		if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
194			gp.waitsince = work.tstart
195		}
196
197		// scanstack must be done on the system stack in case
198		// we're trying to scan our own stack.
199		systemstack(func() {
200			// If this is a self-scan, put the user G in
201			// _Gwaiting to prevent self-deadlock. It may
202			// already be in _Gwaiting if this is a mark
203			// worker or we're in mark termination.
204			userG := getg().m.curg
205			selfScan := gp == userG && readgstatus(userG) == _Grunning
206			if selfScan {
207				casgstatus(userG, _Grunning, _Gwaiting)
208				userG.waitreason = waitReasonGarbageCollectionScan
209			}
210
211			// TODO: suspendG blocks (and spins) until gp
212			// stops, which may take a while for
213			// running goroutines. Consider doing this in
214			// two phases where the first is non-blocking:
215			// we scan the stacks we can and ask running
216			// goroutines to scan themselves; and the
217			// second blocks.
218			stopped := suspendG(gp)
219			if stopped.dead {
220				gp.gcscandone = true
221				return
222			}
223			if gp.gcscandone {
224				throw("g already scanned")
225			}
226			scanstack(gp, gcw)
227			gp.gcscandone = true
228			resumeG(stopped)
229
230			if selfScan {
231				casgstatus(userG, _Gwaiting, _Grunning)
232			}
233		})
234	}
235}
236
237// markrootBlock scans the shard'th shard of the block of memory [b0,
238// b0+n0), with the given pointer mask.
239//
240//go:nowritebarrier
241func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) {
242	if rootBlockBytes%(8*sys.PtrSize) != 0 {
243		// This is necessary to pick byte offsets in ptrmask0.
244		throw("rootBlockBytes must be a multiple of 8*ptrSize")
245	}
246
247	// Note that if b0 is toward the end of the address space,
248	// then b0 + rootBlockBytes might wrap around.
249	// These tests are written to avoid any possible overflow.
250	off := uintptr(shard) * rootBlockBytes
251	if off >= n0 {
252		return
253	}
254	b := b0 + off
255	ptrmask := (*uint8)(add(unsafe.Pointer(ptrmask0), uintptr(shard)*(rootBlockBytes/(8*sys.PtrSize))))
256	n := uintptr(rootBlockBytes)
257	if off+n > n0 {
258		n = n0 - off
259	}
260
261	// Scan this shard.
262	scanblock(b, n, ptrmask, gcw, nil)
263}
264
265// markrootFreeGStacks frees stacks of dead Gs.
266//
267// This does not free stacks of dead Gs cached on Ps, but having a few
268// cached stacks around isn't a problem.
269func markrootFreeGStacks() {
270	// Take list of dead Gs with stacks.
271	lock(&sched.gFree.lock)
272	list := sched.gFree.stack
273	sched.gFree.stack = gList{}
274	unlock(&sched.gFree.lock)
275	if list.empty() {
276		return
277	}
278
279	// Free stacks.
280	q := gQueue{list.head, list.head}
281	for gp := list.head.ptr(); gp != nil; gp = gp.schedlink.ptr() {
282		stackfree(gp.stack)
283		gp.stack.lo = 0
284		gp.stack.hi = 0
285		// Manipulate the queue directly since the Gs are
286		// already all linked the right way.
287		q.tail.set(gp)
288	}
289
290	// Put Gs back on the free list.
291	lock(&sched.gFree.lock)
292	sched.gFree.noStack.pushAll(q)
293	unlock(&sched.gFree.lock)
294}
295
296// markrootSpans marks roots for one shard of work.spans.
297//
298//go:nowritebarrier
299func markrootSpans(gcw *gcWork, shard int) {
300	// Objects with finalizers have two GC-related invariants:
301	//
302	// 1) Everything reachable from the object must be marked.
303	// This ensures that when we pass the object to its finalizer,
304	// everything the finalizer can reach will be retained.
305	//
306	// 2) Finalizer specials (which are not in the garbage
307	// collected heap) are roots. In practice, this means the fn
308	// field must be scanned.
309	//
310	// TODO(austin): There are several ideas for making this more
311	// efficient in issue #11485.
312
313	sg := mheap_.sweepgen
314	spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard)
315	// Note that work.spans may not include spans that were
316	// allocated between entering the scan phase and now. We may
317	// also race with spans being added into sweepSpans when they're
318	// just created, and as a result we may see nil pointers in the
319	// spans slice. This is okay because any objects with finalizers
320	// in those spans must have been allocated and given finalizers
321	// after we entered the scan phase, so addfinalizer will have
322	// ensured the above invariants for them.
323	for i := 0; i < len(spans); i++ {
324		// sweepBuf.block requires that we read pointers from the block atomically.
325		// It also requires that we ignore nil pointers.
326		s := (*mspan)(atomic.Loadp(unsafe.Pointer(&spans[i])))
327
328		// This is racing with spans being initialized, so
329		// check the state carefully.
330		if s == nil || s.state.get() != mSpanInUse {
331			continue
332		}
333		// Check that this span was swept (it may be cached or uncached).
334		if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) {
335			// sweepgen was updated (+2) during non-checkmark GC pass
336			print("sweep ", s.sweepgen, " ", sg, "\n")
337			throw("gc: unswept span")
338		}
339
340		// Speculatively check if there are any specials
341		// without acquiring the span lock. This may race with
342		// adding the first special to a span, but in that
343		// case addfinalizer will observe that the GC is
344		// active (which is globally synchronized) and ensure
345		// the above invariants. We may also ensure the
346		// invariants, but it's okay to scan an object twice.
347		if s.specials == nil {
348			continue
349		}
350
351		// Lock the specials to prevent a special from being
352		// removed from the list while we're traversing it.
353		lock(&s.speciallock)
354
355		for sp := s.specials; sp != nil; sp = sp.next {
356			if sp.kind != _KindSpecialFinalizer {
357				continue
358			}
359			// don't mark finalized object, but scan it so we
360			// retain everything it points to.
361			spf := (*specialfinalizer)(unsafe.Pointer(sp))
362			// A finalizer can be set for an inner byte of an object, find object beginning.
363			p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize
364
365			// Mark everything that can be reached from
366			// the object (but *not* the object itself or
367			// we'll never collect it).
368			scanobject(p, gcw)
369
370			// The special itself is a root.
371			scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil)
372		}
373
374		unlock(&s.speciallock)
375	}
376}
377
378// gcAssistAlloc performs GC work to make gp's assist debt positive.
379// gp must be the calling user gorountine.
380//
381// This must be called with preemption enabled.
382func gcAssistAlloc(gp *g) {
383	// Don't assist in non-preemptible contexts. These are
384	// generally fragile and won't allow the assist to block.
385	if getg() == gp.m.g0 {
386		return
387	}
388	if mp := getg().m; mp.locks > 0 || mp.preemptoff != "" {
389		return
390	}
391
392	traced := false
393retry:
394	// Compute the amount of scan work we need to do to make the
395	// balance positive. When the required amount of work is low,
396	// we over-assist to build up credit for future allocations
397	// and amortize the cost of assisting.
398	debtBytes := -gp.gcAssistBytes
399	scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes))
400	if scanWork < gcOverAssistWork {
401		scanWork = gcOverAssistWork
402		debtBytes = int64(gcController.assistBytesPerWork * float64(scanWork))
403	}
404
405	// Steal as much credit as we can from the background GC's
406	// scan credit. This is racy and may drop the background
407	// credit below 0 if two mutators steal at the same time. This
408	// will just cause steals to fail until credit is accumulated
409	// again, so in the long run it doesn't really matter, but we
410	// do have to handle the negative credit case.
411	bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit)
412	stolen := int64(0)
413	if bgScanCredit > 0 {
414		if bgScanCredit < scanWork {
415			stolen = bgScanCredit
416			gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(stolen))
417		} else {
418			stolen = scanWork
419			gp.gcAssistBytes += debtBytes
420		}
421		atomic.Xaddint64(&gcController.bgScanCredit, -stolen)
422
423		scanWork -= stolen
424
425		if scanWork == 0 {
426			// We were able to steal all of the credit we
427			// needed.
428			if traced {
429				traceGCMarkAssistDone()
430			}
431			return
432		}
433	}
434
435	if trace.enabled && !traced {
436		traced = true
437		traceGCMarkAssistStart()
438	}
439
440	// Perform assist work
441	systemstack(func() {
442		gcAssistAlloc1(gp, scanWork)
443		// The user stack may have moved, so this can't touch
444		// anything on it until it returns from systemstack.
445	})
446
447	completed := gp.param != nil
448	gp.param = nil
449	if completed {
450		gcMarkDone()
451	}
452
453	if gp.gcAssistBytes < 0 {
454		// We were unable steal enough credit or perform
455		// enough work to pay off the assist debt. We need to
456		// do one of these before letting the mutator allocate
457		// more to prevent over-allocation.
458		//
459		// If this is because we were preempted, reschedule
460		// and try some more.
461		if gp.preempt {
462			Gosched()
463			goto retry
464		}
465
466		// Add this G to an assist queue and park. When the GC
467		// has more background credit, it will satisfy queued
468		// assists before flushing to the global credit pool.
469		//
470		// Note that this does *not* get woken up when more
471		// work is added to the work list. The theory is that
472		// there wasn't enough work to do anyway, so we might
473		// as well let background marking take care of the
474		// work that is available.
475		if !gcParkAssist() {
476			goto retry
477		}
478
479		// At this point either background GC has satisfied
480		// this G's assist debt, or the GC cycle is over.
481	}
482	if traced {
483		traceGCMarkAssistDone()
484	}
485}
486
487// gcAssistAlloc1 is the part of gcAssistAlloc that runs on the system
488// stack. This is a separate function to make it easier to see that
489// we're not capturing anything from the user stack, since the user
490// stack may move while we're in this function.
491//
492// gcAssistAlloc1 indicates whether this assist completed the mark
493// phase by setting gp.param to non-nil. This can't be communicated on
494// the stack since it may move.
495//
496//go:systemstack
497func gcAssistAlloc1(gp *g, scanWork int64) {
498	// Clear the flag indicating that this assist completed the
499	// mark phase.
500	gp.param = nil
501
502	if atomic.Load(&gcBlackenEnabled) == 0 {
503		// The gcBlackenEnabled check in malloc races with the
504		// store that clears it but an atomic check in every malloc
505		// would be a performance hit.
506		// Instead we recheck it here on the non-preemptable system
507		// stack to determine if we should perform an assist.
508
509		// GC is done, so ignore any remaining debt.
510		gp.gcAssistBytes = 0
511		return
512	}
513	// Track time spent in this assist. Since we're on the
514	// system stack, this is non-preemptible, so we can
515	// just measure start and end time.
516	startTime := nanotime()
517
518	decnwait := atomic.Xadd(&work.nwait, -1)
519	if decnwait == work.nproc {
520		println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc)
521		throw("nwait > work.nprocs")
522	}
523
524	// gcDrainN requires the caller to be preemptible.
525	casgstatus(gp, _Grunning, _Gwaiting)
526	gp.waitreason = waitReasonGCAssistMarking
527
528	// drain own cached work first in the hopes that it
529	// will be more cache friendly.
530	gcw := &getg().m.p.ptr().gcw
531	workDone := gcDrainN(gcw, scanWork)
532
533	casgstatus(gp, _Gwaiting, _Grunning)
534
535	// Record that we did this much scan work.
536	//
537	// Back out the number of bytes of assist credit that
538	// this scan work counts for. The "1+" is a poor man's
539	// round-up, to ensure this adds credit even if
540	// assistBytesPerWork is very low.
541	gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(workDone))
542
543	// If this is the last worker and we ran out of work,
544	// signal a completion point.
545	incnwait := atomic.Xadd(&work.nwait, +1)
546	if incnwait > work.nproc {
547		println("runtime: work.nwait=", incnwait,
548			"work.nproc=", work.nproc)
549		throw("work.nwait > work.nproc")
550	}
551
552	if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
553		// This has reached a background completion point. Set
554		// gp.param to a non-nil value to indicate this. It
555		// doesn't matter what we set it to (it just has to be
556		// a valid pointer).
557		gp.param = unsafe.Pointer(gp)
558	}
559	duration := nanotime() - startTime
560	_p_ := gp.m.p.ptr()
561	_p_.gcAssistTime += duration
562	if _p_.gcAssistTime > gcAssistTimeSlack {
563		atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime)
564		_p_.gcAssistTime = 0
565	}
566}
567
568// gcWakeAllAssists wakes all currently blocked assists. This is used
569// at the end of a GC cycle. gcBlackenEnabled must be false to prevent
570// new assists from going to sleep after this point.
571func gcWakeAllAssists() {
572	lock(&work.assistQueue.lock)
573	list := work.assistQueue.q.popList()
574	injectglist(&list)
575	unlock(&work.assistQueue.lock)
576}
577
578// gcParkAssist puts the current goroutine on the assist queue and parks.
579//
580// gcParkAssist reports whether the assist is now satisfied. If it
581// returns false, the caller must retry the assist.
582//
583//go:nowritebarrier
584func gcParkAssist() bool {
585	lock(&work.assistQueue.lock)
586	// If the GC cycle finished while we were getting the lock,
587	// exit the assist. The cycle can't finish while we hold the
588	// lock.
589	if atomic.Load(&gcBlackenEnabled) == 0 {
590		unlock(&work.assistQueue.lock)
591		return true
592	}
593
594	gp := getg()
595	oldList := work.assistQueue.q
596	work.assistQueue.q.pushBack(gp)
597
598	// Recheck for background credit now that this G is in
599	// the queue, but can still back out. This avoids a
600	// race in case background marking has flushed more
601	// credit since we checked above.
602	if atomic.Loadint64(&gcController.bgScanCredit) > 0 {
603		work.assistQueue.q = oldList
604		if oldList.tail != 0 {
605			oldList.tail.ptr().schedlink.set(nil)
606		}
607		unlock(&work.assistQueue.lock)
608		return false
609	}
610	// Park.
611	goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceEvGoBlockGC, 2)
612	return true
613}
614
615// gcFlushBgCredit flushes scanWork units of background scan work
616// credit. This first satisfies blocked assists on the
617// work.assistQueue and then flushes any remaining credit to
618// gcController.bgScanCredit.
619//
620// Write barriers are disallowed because this is used by gcDrain after
621// it has ensured that all work is drained and this must preserve that
622// condition.
623//
624//go:nowritebarrierrec
625func gcFlushBgCredit(scanWork int64) {
626	if work.assistQueue.q.empty() {
627		// Fast path; there are no blocked assists. There's a
628		// small window here where an assist may add itself to
629		// the blocked queue and park. If that happens, we'll
630		// just get it on the next flush.
631		atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
632		return
633	}
634
635	scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork)
636
637	lock(&work.assistQueue.lock)
638	for !work.assistQueue.q.empty() && scanBytes > 0 {
639		gp := work.assistQueue.q.pop()
640		// Note that gp.gcAssistBytes is negative because gp
641		// is in debt. Think carefully about the signs below.
642		if scanBytes+gp.gcAssistBytes >= 0 {
643			// Satisfy this entire assist debt.
644			scanBytes += gp.gcAssistBytes
645			gp.gcAssistBytes = 0
646			// It's important that we *not* put gp in
647			// runnext. Otherwise, it's possible for user
648			// code to exploit the GC worker's high
649			// scheduler priority to get itself always run
650			// before other goroutines and always in the
651			// fresh quantum started by GC.
652			ready(gp, 0, false)
653		} else {
654			// Partially satisfy this assist.
655			gp.gcAssistBytes += scanBytes
656			scanBytes = 0
657			// As a heuristic, we move this assist to the
658			// back of the queue so that large assists
659			// can't clog up the assist queue and
660			// substantially delay small assists.
661			work.assistQueue.q.pushBack(gp)
662			break
663		}
664	}
665
666	if scanBytes > 0 {
667		// Convert from scan bytes back to work.
668		scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte)
669		atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
670	}
671	unlock(&work.assistQueue.lock)
672}
673
674// scanstack scans gp's stack, greying all pointers found on the stack.
675//
676// scanstack will also shrink the stack if it is safe to do so. If it
677// is not, it schedules a stack shrink for the next synchronous safe
678// point.
679//
680// scanstack is marked go:systemstack because it must not be preempted
681// while using a workbuf.
682//
683//go:nowritebarrier
684//go:systemstack
685func scanstack(gp *g, gcw *gcWork) {
686	if readgstatus(gp)&_Gscan == 0 {
687		print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
688		throw("scanstack - bad status")
689	}
690
691	switch readgstatus(gp) &^ _Gscan {
692	default:
693		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
694		throw("mark - bad status")
695	case _Gdead:
696		return
697	case _Grunning:
698		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
699		throw("scanstack: goroutine not stopped")
700	case _Grunnable, _Gsyscall, _Gwaiting:
701		// ok
702	}
703
704	if gp == getg() {
705		throw("can't scan our own stack")
706	}
707
708	if isShrinkStackSafe(gp) {
709		// Shrink the stack if not much of it is being used.
710		shrinkstack(gp)
711	} else {
712		// Otherwise, shrink the stack at the next sync safe point.
713		gp.preemptShrink = true
714	}
715
716	var state stackScanState
717	state.stack = gp.stack
718
719	if stackTraceDebug {
720		println("stack trace goroutine", gp.goid)
721	}
722
723	if debugScanConservative && gp.asyncSafePoint {
724		print("scanning async preempted goroutine ", gp.goid, " stack [", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n")
725	}
726
727	// Scan the saved context register. This is effectively a live
728	// register that gets moved back and forth between the
729	// register and sched.ctxt without a write barrier.
730	if gp.sched.ctxt != nil {
731		scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw, &state)
732	}
733
734	// Scan the stack. Accumulate a list of stack objects.
735	scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
736		scanframeworker(frame, &state, gcw)
737		return true
738	}
739	gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
740
741	// Find additional pointers that point into the stack from the heap.
742	// Currently this includes defers and panics. See also function copystack.
743
744	// Find and trace all defer arguments.
745	tracebackdefers(gp, scanframe, nil)
746
747	// Find and trace other pointers in defer records.
748	for d := gp._defer; d != nil; d = d.link {
749		if d.fn != nil {
750			// tracebackdefers above does not scan the func value, which could
751			// be a stack allocated closure. See issue 30453.
752			scanblock(uintptr(unsafe.Pointer(&d.fn)), sys.PtrSize, &oneptrmask[0], gcw, &state)
753		}
754		if d.link != nil {
755			// The link field of a stack-allocated defer record might point
756			// to a heap-allocated defer record. Keep that heap record live.
757			scanblock(uintptr(unsafe.Pointer(&d.link)), sys.PtrSize, &oneptrmask[0], gcw, &state)
758		}
759		// Retain defers records themselves.
760		// Defer records might not be reachable from the G through regular heap
761		// tracing because the defer linked list might weave between the stack and the heap.
762		if d.heap {
763			scanblock(uintptr(unsafe.Pointer(&d)), sys.PtrSize, &oneptrmask[0], gcw, &state)
764		}
765	}
766	if gp._panic != nil {
767		// Panics are always stack allocated.
768		state.putPtr(uintptr(unsafe.Pointer(gp._panic)), false)
769	}
770
771	// Find and scan all reachable stack objects.
772	//
773	// The state's pointer queue prioritizes precise pointers over
774	// conservative pointers so that we'll prefer scanning stack
775	// objects precisely.
776	state.buildIndex()
777	for {
778		p, conservative := state.getPtr()
779		if p == 0 {
780			break
781		}
782		obj := state.findObject(p)
783		if obj == nil {
784			continue
785		}
786		t := obj.typ
787		if t == nil {
788			// We've already scanned this object.
789			continue
790		}
791		obj.setType(nil) // Don't scan it again.
792		if stackTraceDebug {
793			printlock()
794			print("  live stkobj at", hex(state.stack.lo+uintptr(obj.off)), "of type", t.string())
795			if conservative {
796				print(" (conservative)")
797			}
798			println()
799			printunlock()
800		}
801		gcdata := t.gcdata
802		var s *mspan
803		if t.kind&kindGCProg != 0 {
804			// This path is pretty unlikely, an object large enough
805			// to have a GC program allocated on the stack.
806			// We need some space to unpack the program into a straight
807			// bitmask, which we allocate/free here.
808			// TODO: it would be nice if there were a way to run a GC
809			// program without having to store all its bits. We'd have
810			// to change from a Lempel-Ziv style program to something else.
811			// Or we can forbid putting objects on stacks if they require
812			// a gc program (see issue 27447).
813			s = materializeGCProg(t.ptrdata, gcdata)
814			gcdata = (*byte)(unsafe.Pointer(s.startAddr))
815		}
816
817		b := state.stack.lo + uintptr(obj.off)
818		if conservative {
819			scanConservative(b, t.ptrdata, gcdata, gcw, &state)
820		} else {
821			scanblock(b, t.ptrdata, gcdata, gcw, &state)
822		}
823
824		if s != nil {
825			dematerializeGCProg(s)
826		}
827	}
828
829	// Deallocate object buffers.
830	// (Pointer buffers were all deallocated in the loop above.)
831	for state.head != nil {
832		x := state.head
833		state.head = x.next
834		if stackTraceDebug {
835			for _, obj := range x.obj[:x.nobj] {
836				if obj.typ == nil { // reachable
837					continue
838				}
839				println("  dead stkobj at", hex(gp.stack.lo+uintptr(obj.off)), "of type", obj.typ.string())
840				// Note: not necessarily really dead - only reachable-from-ptr dead.
841			}
842		}
843		x.nobj = 0
844		putempty((*workbuf)(unsafe.Pointer(x)))
845	}
846	if state.buf != nil || state.cbuf != nil || state.freeBuf != nil {
847		throw("remaining pointer buffers")
848	}
849}
850
851// Scan a stack frame: local variables and function arguments/results.
852//go:nowritebarrier
853func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) {
854	if _DebugGC > 1 && frame.continpc != 0 {
855		print("scanframe ", funcname(frame.fn), "\n")
856	}
857
858	isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == funcID_asyncPreempt
859	if state.conservative || isAsyncPreempt {
860		if debugScanConservative {
861			println("conservatively scanning function", funcname(frame.fn), "at PC", hex(frame.continpc))
862		}
863
864		// Conservatively scan the frame. Unlike the precise
865		// case, this includes the outgoing argument space
866		// since we may have stopped while this function was
867		// setting up a call.
868		//
869		// TODO: We could narrow this down if the compiler
870		// produced a single map per function of stack slots
871		// and registers that ever contain a pointer.
872		if frame.varp != 0 {
873			size := frame.varp - frame.sp
874			if size > 0 {
875				scanConservative(frame.sp, size, nil, gcw, state)
876			}
877		}
878
879		// Scan arguments to this frame.
880		if frame.arglen != 0 {
881			// TODO: We could pass the entry argument map
882			// to narrow this down further.
883			scanConservative(frame.argp, frame.arglen, nil, gcw, state)
884		}
885
886		if isAsyncPreempt {
887			// This function's frame contained the
888			// registers for the asynchronously stopped
889			// parent frame. Scan the parent
890			// conservatively.
891			state.conservative = true
892		} else {
893			// We only wanted to scan those two frames
894			// conservatively. Clear the flag for future
895			// frames.
896			state.conservative = false
897		}
898		return
899	}
900
901	locals, args, objs := getStackMap(frame, &state.cache, false)
902
903	// Scan local variables if stack frame has been allocated.
904	if locals.n > 0 {
905		size := uintptr(locals.n) * sys.PtrSize
906		scanblock(frame.varp-size, size, locals.bytedata, gcw, state)
907	}
908
909	// Scan arguments.
910	if args.n > 0 {
911		scanblock(frame.argp, uintptr(args.n)*sys.PtrSize, args.bytedata, gcw, state)
912	}
913
914	// Add all stack objects to the stack object list.
915	if frame.varp != 0 {
916		// varp is 0 for defers, where there are no locals.
917		// In that case, there can't be a pointer to its args, either.
918		// (And all args would be scanned above anyway.)
919		for _, obj := range objs {
920			off := obj.off
921			base := frame.varp // locals base pointer
922			if off >= 0 {
923				base = frame.argp // arguments and return values base pointer
924			}
925			ptr := base + uintptr(off)
926			if ptr < frame.sp {
927				// object hasn't been allocated in the frame yet.
928				continue
929			}
930			if stackTraceDebug {
931				println("stkobj at", hex(ptr), "of type", obj.typ.string())
932			}
933			state.addObject(ptr, obj.typ)
934		}
935	}
936}
937
938type gcDrainFlags int
939
940const (
941	gcDrainUntilPreempt gcDrainFlags = 1 << iota
942	gcDrainFlushBgCredit
943	gcDrainIdle
944	gcDrainFractional
945)
946
947// gcDrain scans roots and objects in work buffers, blackening grey
948// objects until it is unable to get more work. It may return before
949// GC is done; it's the caller's responsibility to balance work from
950// other Ps.
951//
952// If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt
953// is set.
954//
955// If flags&gcDrainIdle != 0, gcDrain returns when there is other work
956// to do.
957//
958// If flags&gcDrainFractional != 0, gcDrain self-preempts when
959// pollFractionalWorkerExit() returns true. This implies
960// gcDrainNoBlock.
961//
962// If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work
963// credit to gcController.bgScanCredit every gcCreditSlack units of
964// scan work.
965//
966//go:nowritebarrier
967func gcDrain(gcw *gcWork, flags gcDrainFlags) {
968	if !writeBarrier.needed {
969		throw("gcDrain phase incorrect")
970	}
971
972	gp := getg().m.curg
973	preemptible := flags&gcDrainUntilPreempt != 0
974	flushBgCredit := flags&gcDrainFlushBgCredit != 0
975	idle := flags&gcDrainIdle != 0
976
977	initScanWork := gcw.scanWork
978
979	// checkWork is the scan work before performing the next
980	// self-preempt check.
981	checkWork := int64(1<<63 - 1)
982	var check func() bool
983	if flags&(gcDrainIdle|gcDrainFractional) != 0 {
984		checkWork = initScanWork + drainCheckThreshold
985		if idle {
986			check = pollWork
987		} else if flags&gcDrainFractional != 0 {
988			check = pollFractionalWorkerExit
989		}
990	}
991
992	// Drain root marking jobs.
993	if work.markrootNext < work.markrootJobs {
994		for !(preemptible && gp.preempt) {
995			job := atomic.Xadd(&work.markrootNext, +1) - 1
996			if job >= work.markrootJobs {
997				break
998			}
999			markroot(gcw, job)
1000			if check != nil && check() {
1001				goto done
1002			}
1003		}
1004	}
1005
1006	// Drain heap marking jobs.
1007	for !(preemptible && gp.preempt) {
1008		// Try to keep work available on the global queue. We used to
1009		// check if there were waiting workers, but it's better to
1010		// just keep work available than to make workers wait. In the
1011		// worst case, we'll do O(log(_WorkbufSize)) unnecessary
1012		// balances.
1013		if work.full == 0 {
1014			gcw.balance()
1015		}
1016
1017		b := gcw.tryGetFast()
1018		if b == 0 {
1019			b = gcw.tryGet()
1020			if b == 0 {
1021				// Flush the write barrier
1022				// buffer; this may create
1023				// more work.
1024				wbBufFlush(nil, 0)
1025				b = gcw.tryGet()
1026			}
1027		}
1028		if b == 0 {
1029			// Unable to get work.
1030			break
1031		}
1032		scanobject(b, gcw)
1033
1034		// Flush background scan work credit to the global
1035		// account if we've accumulated enough locally so
1036		// mutator assists can draw on it.
1037		if gcw.scanWork >= gcCreditSlack {
1038			atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
1039			if flushBgCredit {
1040				gcFlushBgCredit(gcw.scanWork - initScanWork)
1041				initScanWork = 0
1042			}
1043			checkWork -= gcw.scanWork
1044			gcw.scanWork = 0
1045
1046			if checkWork <= 0 {
1047				checkWork += drainCheckThreshold
1048				if check != nil && check() {
1049					break
1050				}
1051			}
1052		}
1053	}
1054
1055done:
1056	// Flush remaining scan work credit.
1057	if gcw.scanWork > 0 {
1058		atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
1059		if flushBgCredit {
1060			gcFlushBgCredit(gcw.scanWork - initScanWork)
1061		}
1062		gcw.scanWork = 0
1063	}
1064}
1065
1066// gcDrainN blackens grey objects until it has performed roughly
1067// scanWork units of scan work or the G is preempted. This is
1068// best-effort, so it may perform less work if it fails to get a work
1069// buffer. Otherwise, it will perform at least n units of work, but
1070// may perform more because scanning is always done in whole object
1071// increments. It returns the amount of scan work performed.
1072//
1073// The caller goroutine must be in a preemptible state (e.g.,
1074// _Gwaiting) to prevent deadlocks during stack scanning. As a
1075// consequence, this must be called on the system stack.
1076//
1077//go:nowritebarrier
1078//go:systemstack
1079func gcDrainN(gcw *gcWork, scanWork int64) int64 {
1080	if !writeBarrier.needed {
1081		throw("gcDrainN phase incorrect")
1082	}
1083
1084	// There may already be scan work on the gcw, which we don't
1085	// want to claim was done by this call.
1086	workFlushed := -gcw.scanWork
1087
1088	gp := getg().m.curg
1089	for !gp.preempt && workFlushed+gcw.scanWork < scanWork {
1090		// See gcDrain comment.
1091		if work.full == 0 {
1092			gcw.balance()
1093		}
1094
1095		// This might be a good place to add prefetch code...
1096		// if(wbuf.nobj > 4) {
1097		//         PREFETCH(wbuf->obj[wbuf.nobj - 3];
1098		//  }
1099		//
1100		b := gcw.tryGetFast()
1101		if b == 0 {
1102			b = gcw.tryGet()
1103			if b == 0 {
1104				// Flush the write barrier buffer;
1105				// this may create more work.
1106				wbBufFlush(nil, 0)
1107				b = gcw.tryGet()
1108			}
1109		}
1110
1111		if b == 0 {
1112			// Try to do a root job.
1113			//
1114			// TODO: Assists should get credit for this
1115			// work.
1116			if work.markrootNext < work.markrootJobs {
1117				job := atomic.Xadd(&work.markrootNext, +1) - 1
1118				if job < work.markrootJobs {
1119					markroot(gcw, job)
1120					continue
1121				}
1122			}
1123			// No heap or root jobs.
1124			break
1125		}
1126		scanobject(b, gcw)
1127
1128		// Flush background scan work credit.
1129		if gcw.scanWork >= gcCreditSlack {
1130			atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
1131			workFlushed += gcw.scanWork
1132			gcw.scanWork = 0
1133		}
1134	}
1135
1136	// Unlike gcDrain, there's no need to flush remaining work
1137	// here because this never flushes to bgScanCredit and
1138	// gcw.dispose will flush any remaining work to scanWork.
1139
1140	return workFlushed + gcw.scanWork
1141}
1142
1143// scanblock scans b as scanobject would, but using an explicit
1144// pointer bitmap instead of the heap bitmap.
1145//
1146// This is used to scan non-heap roots, so it does not update
1147// gcw.bytesMarked or gcw.scanWork.
1148//
1149// If stk != nil, possible stack pointers are also reported to stk.putPtr.
1150//go:nowritebarrier
1151func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork, stk *stackScanState) {
1152	// Use local copies of original parameters, so that a stack trace
1153	// due to one of the throws below shows the original block
1154	// base and extent.
1155	b := b0
1156	n := n0
1157
1158	for i := uintptr(0); i < n; {
1159		// Find bits for the next word.
1160		bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8)))
1161		if bits == 0 {
1162			i += sys.PtrSize * 8
1163			continue
1164		}
1165		for j := 0; j < 8 && i < n; j++ {
1166			if bits&1 != 0 {
1167				// Same work as in scanobject; see comments there.
1168				p := *(*uintptr)(unsafe.Pointer(b + i))
1169				if p != 0 {
1170					if obj, span, objIndex := findObject(p, b, i); obj != 0 {
1171						greyobject(obj, b, i, span, gcw, objIndex)
1172					} else if stk != nil && p >= stk.stack.lo && p < stk.stack.hi {
1173						stk.putPtr(p, false)
1174					}
1175				}
1176			}
1177			bits >>= 1
1178			i += sys.PtrSize
1179		}
1180	}
1181}
1182
1183// scanobject scans the object starting at b, adding pointers to gcw.
1184// b must point to the beginning of a heap object or an oblet.
1185// scanobject consults the GC bitmap for the pointer mask and the
1186// spans for the size of the object.
1187//
1188//go:nowritebarrier
1189func scanobject(b uintptr, gcw *gcWork) {
1190	// Find the bits for b and the size of the object at b.
1191	//
1192	// b is either the beginning of an object, in which case this
1193	// is the size of the object to scan, or it points to an
1194	// oblet, in which case we compute the size to scan below.
1195	hbits := heapBitsForAddr(b)
1196	s := spanOfUnchecked(b)
1197	n := s.elemsize
1198	if n == 0 {
1199		throw("scanobject n == 0")
1200	}
1201
1202	if n > maxObletBytes {
1203		// Large object. Break into oblets for better
1204		// parallelism and lower latency.
1205		if b == s.base() {
1206			// It's possible this is a noscan object (not
1207			// from greyobject, but from other code
1208			// paths), in which case we must *not* enqueue
1209			// oblets since their bitmaps will be
1210			// uninitialized.
1211			if s.spanclass.noscan() {
1212				// Bypass the whole scan.
1213				gcw.bytesMarked += uint64(n)
1214				return
1215			}
1216
1217			// Enqueue the other oblets to scan later.
1218			// Some oblets may be in b's scalar tail, but
1219			// these will be marked as "no more pointers",
1220			// so we'll drop out immediately when we go to
1221			// scan those.
1222			for oblet := b + maxObletBytes; oblet < s.base()+s.elemsize; oblet += maxObletBytes {
1223				if !gcw.putFast(oblet) {
1224					gcw.put(oblet)
1225				}
1226			}
1227		}
1228
1229		// Compute the size of the oblet. Since this object
1230		// must be a large object, s.base() is the beginning
1231		// of the object.
1232		n = s.base() + s.elemsize - b
1233		if n > maxObletBytes {
1234			n = maxObletBytes
1235		}
1236	}
1237
1238	var i uintptr
1239	for i = 0; i < n; i += sys.PtrSize {
1240		// Find bits for this word.
1241		if i != 0 {
1242			// Avoid needless hbits.next() on last iteration.
1243			hbits = hbits.next()
1244		}
1245		// Load bits once. See CL 22712 and issue 16973 for discussion.
1246		bits := hbits.bits()
1247		// During checkmarking, 1-word objects store the checkmark
1248		// in the type bit for the one word. The only one-word objects
1249		// are pointers, or else they'd be merged with other non-pointer
1250		// data into larger allocations.
1251		if i != 1*sys.PtrSize && bits&bitScan == 0 {
1252			break // no more pointers in this object
1253		}
1254		if bits&bitPointer == 0 {
1255			continue // not a pointer
1256		}
1257
1258		// Work here is duplicated in scanblock and above.
1259		// If you make changes here, make changes there too.
1260		obj := *(*uintptr)(unsafe.Pointer(b + i))
1261
1262		// At this point we have extracted the next potential pointer.
1263		// Quickly filter out nil and pointers back to the current object.
1264		if obj != 0 && obj-b >= n {
1265			// Test if obj points into the Go heap and, if so,
1266			// mark the object.
1267			//
1268			// Note that it's possible for findObject to
1269			// fail if obj points to a just-allocated heap
1270			// object because of a race with growing the
1271			// heap. In this case, we know the object was
1272			// just allocated and hence will be marked by
1273			// allocation itself.
1274			if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
1275				greyobject(obj, b, i, span, gcw, objIndex)
1276			}
1277		}
1278	}
1279	gcw.bytesMarked += uint64(n)
1280	gcw.scanWork += int64(i)
1281}
1282
1283// scanConservative scans block [b, b+n) conservatively, treating any
1284// pointer-like value in the block as a pointer.
1285//
1286// If ptrmask != nil, only words that are marked in ptrmask are
1287// considered as potential pointers.
1288//
1289// If state != nil, it's assumed that [b, b+n) is a block in the stack
1290// and may contain pointers to stack objects.
1291func scanConservative(b, n uintptr, ptrmask *uint8, gcw *gcWork, state *stackScanState) {
1292	if debugScanConservative {
1293		printlock()
1294		print("conservatively scanning [", hex(b), ",", hex(b+n), ")\n")
1295		hexdumpWords(b, b+n, func(p uintptr) byte {
1296			if ptrmask != nil {
1297				word := (p - b) / sys.PtrSize
1298				bits := *addb(ptrmask, word/8)
1299				if (bits>>(word%8))&1 == 0 {
1300					return '$'
1301				}
1302			}
1303
1304			val := *(*uintptr)(unsafe.Pointer(p))
1305			if state != nil && state.stack.lo <= val && val < state.stack.hi {
1306				return '@'
1307			}
1308
1309			span := spanOfHeap(val)
1310			if span == nil {
1311				return ' '
1312			}
1313			idx := span.objIndex(val)
1314			if span.isFree(idx) {
1315				return ' '
1316			}
1317			return '*'
1318		})
1319		printunlock()
1320	}
1321
1322	for i := uintptr(0); i < n; i += sys.PtrSize {
1323		if ptrmask != nil {
1324			word := i / sys.PtrSize
1325			bits := *addb(ptrmask, word/8)
1326			if bits == 0 {
1327				// Skip 8 words (the loop increment will do the 8th)
1328				//
1329				// This must be the first time we've
1330				// seen this word of ptrmask, so i
1331				// must be 8-word-aligned, but check
1332				// our reasoning just in case.
1333				if i%(sys.PtrSize*8) != 0 {
1334					throw("misaligned mask")
1335				}
1336				i += sys.PtrSize*8 - sys.PtrSize
1337				continue
1338			}
1339			if (bits>>(word%8))&1 == 0 {
1340				continue
1341			}
1342		}
1343
1344		val := *(*uintptr)(unsafe.Pointer(b + i))
1345
1346		// Check if val points into the stack.
1347		if state != nil && state.stack.lo <= val && val < state.stack.hi {
1348			// val may point to a stack object. This
1349			// object may be dead from last cycle and
1350			// hence may contain pointers to unallocated
1351			// objects, but unlike heap objects we can't
1352			// tell if it's already dead. Hence, if all
1353			// pointers to this object are from
1354			// conservative scanning, we have to scan it
1355			// defensively, too.
1356			state.putPtr(val, true)
1357			continue
1358		}
1359
1360		// Check if val points to a heap span.
1361		span := spanOfHeap(val)
1362		if span == nil {
1363			continue
1364		}
1365
1366		// Check if val points to an allocated object.
1367		idx := span.objIndex(val)
1368		if span.isFree(idx) {
1369			continue
1370		}
1371
1372		// val points to an allocated object. Mark it.
1373		obj := span.base() + idx*span.elemsize
1374		greyobject(obj, b, i, span, gcw, idx)
1375	}
1376}
1377
1378// Shade the object if it isn't already.
1379// The object is not nil and known to be in the heap.
1380// Preemption must be disabled.
1381//go:nowritebarrier
1382func shade(b uintptr) {
1383	if obj, span, objIndex := findObject(b, 0, 0); obj != 0 {
1384		gcw := &getg().m.p.ptr().gcw
1385		greyobject(obj, 0, 0, span, gcw, objIndex)
1386	}
1387}
1388
1389// obj is the start of an object with mark mbits.
1390// If it isn't already marked, mark it and enqueue into gcw.
1391// base and off are for debugging only and could be removed.
1392//
1393// See also wbBufFlush1, which partially duplicates this logic.
1394//
1395//go:nowritebarrierrec
1396func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintptr) {
1397	// obj should be start of allocation, and so must be at least pointer-aligned.
1398	if obj&(sys.PtrSize-1) != 0 {
1399		throw("greyobject: obj not pointer-aligned")
1400	}
1401	mbits := span.markBitsForIndex(objIndex)
1402
1403	if useCheckmark {
1404		if !mbits.isMarked() {
1405			printlock()
1406			print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
1407			print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
1408
1409			// Dump the source (base) object
1410			gcDumpObject("base", base, off)
1411
1412			// Dump the object
1413			gcDumpObject("obj", obj, ^uintptr(0))
1414
1415			getg().m.traceback = 2
1416			throw("checkmark found unmarked object")
1417		}
1418		hbits := heapBitsForAddr(obj)
1419		if hbits.isCheckmarked(span.elemsize) {
1420			return
1421		}
1422		hbits.setCheckmarked(span.elemsize)
1423		if !hbits.isCheckmarked(span.elemsize) {
1424			throw("setCheckmarked and isCheckmarked disagree")
1425		}
1426	} else {
1427		if debug.gccheckmark > 0 && span.isFree(objIndex) {
1428			print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n")
1429			gcDumpObject("base", base, off)
1430			gcDumpObject("obj", obj, ^uintptr(0))
1431			getg().m.traceback = 2
1432			throw("marking free object")
1433		}
1434
1435		// If marked we have nothing to do.
1436		if mbits.isMarked() {
1437			return
1438		}
1439		mbits.setMarked()
1440
1441		// Mark span.
1442		arena, pageIdx, pageMask := pageIndexOf(span.base())
1443		if arena.pageMarks[pageIdx]&pageMask == 0 {
1444			atomic.Or8(&arena.pageMarks[pageIdx], pageMask)
1445		}
1446
1447		// If this is a noscan object, fast-track it to black
1448		// instead of greying it.
1449		if span.spanclass.noscan() {
1450			gcw.bytesMarked += uint64(span.elemsize)
1451			return
1452		}
1453	}
1454
1455	// Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
1456	// seems like a nice optimization that can be added back in.
1457	// There needs to be time between the PREFETCH and the use.
1458	// Previously we put the obj in an 8 element buffer that is drained at a rate
1459	// to give the PREFETCH time to do its work.
1460	// Use of PREFETCHNTA might be more appropriate than PREFETCH
1461	if !gcw.putFast(obj) {
1462		gcw.put(obj)
1463	}
1464}
1465
1466// gcDumpObject dumps the contents of obj for debugging and marks the
1467// field at byte offset off in obj.
1468func gcDumpObject(label string, obj, off uintptr) {
1469	s := spanOf(obj)
1470	print(label, "=", hex(obj))
1471	if s == nil {
1472		print(" s=nil\n")
1473		return
1474	}
1475	print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.spanclass=", s.spanclass, " s.elemsize=", s.elemsize, " s.state=")
1476	if state := s.state.get(); 0 <= state && int(state) < len(mSpanStateNames) {
1477		print(mSpanStateNames[state], "\n")
1478	} else {
1479		print("unknown(", state, ")\n")
1480	}
1481
1482	skipped := false
1483	size := s.elemsize
1484	if s.state.get() == mSpanManual && size == 0 {
1485		// We're printing something from a stack frame. We
1486		// don't know how big it is, so just show up to an
1487		// including off.
1488		size = off + sys.PtrSize
1489	}
1490	for i := uintptr(0); i < size; i += sys.PtrSize {
1491		// For big objects, just print the beginning (because
1492		// that usually hints at the object's type) and the
1493		// fields around off.
1494		if !(i < 128*sys.PtrSize || off-16*sys.PtrSize < i && i < off+16*sys.PtrSize) {
1495			skipped = true
1496			continue
1497		}
1498		if skipped {
1499			print(" ...\n")
1500			skipped = false
1501		}
1502		print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + i))))
1503		if i == off {
1504			print(" <==")
1505		}
1506		print("\n")
1507	}
1508	if skipped {
1509		print(" ...\n")
1510	}
1511}
1512
1513// gcmarknewobject marks a newly allocated object black. obj must
1514// not contain any non-nil pointers.
1515//
1516// This is nosplit so it can manipulate a gcWork without preemption.
1517//
1518//go:nowritebarrier
1519//go:nosplit
1520func gcmarknewobject(obj, size, scanSize uintptr) {
1521	if useCheckmark { // The world should be stopped so this should not happen.
1522		throw("gcmarknewobject called while doing checkmark")
1523	}
1524	markBitsForAddr(obj).setMarked()
1525	gcw := &getg().m.p.ptr().gcw
1526	gcw.bytesMarked += uint64(size)
1527	gcw.scanWork += int64(scanSize)
1528}
1529
1530// gcMarkTinyAllocs greys all active tiny alloc blocks.
1531//
1532// The world must be stopped.
1533func gcMarkTinyAllocs() {
1534	for _, p := range allp {
1535		c := p.mcache
1536		if c == nil || c.tiny == 0 {
1537			continue
1538		}
1539		_, span, objIndex := findObject(c.tiny, 0, 0)
1540		gcw := &p.gcw
1541		greyobject(c.tiny, 0, 0, span, gcw, objIndex)
1542	}
1543}
1544
1545// Checkmarking
1546
1547// To help debug the concurrent GC we remark with the world
1548// stopped ensuring that any object encountered has their normal
1549// mark bit set. To do this we use an orthogonal bit
1550// pattern to indicate the object is marked. The following pattern
1551// uses the upper two bits in the object's boundary nibble.
1552// 01: scalar  not marked
1553// 10: pointer not marked
1554// 11: pointer     marked
1555// 00: scalar      marked
1556// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
1557// The higher bit is 1 for pointers and 0 for scalars, whether the object
1558// is marked or not.
1559// The first nibble no longer holds the typeDead pattern indicating that the
1560// there are no more pointers in the object. This information is held
1561// in the second nibble.
1562
1563// If useCheckmark is true, marking of an object uses the
1564// checkmark bits (encoding above) instead of the standard
1565// mark bits.
1566var useCheckmark = false
1567
1568//go:nowritebarrier
1569func initCheckmarks() {
1570	useCheckmark = true
1571	for _, s := range mheap_.allspans {
1572		if s.state.get() == mSpanInUse {
1573			heapBitsForAddr(s.base()).initCheckmarkSpan(s.layout())
1574		}
1575	}
1576}
1577
1578func clearCheckmarks() {
1579	useCheckmark = false
1580	for _, s := range mheap_.allspans {
1581		if s.state.get() == mSpanInUse {
1582			heapBitsForAddr(s.base()).clearCheckmarkSpan(s.layout())
1583		}
1584	}
1585}
1586