1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime
6
7import (
8	"runtime/internal/atomic"
9	"runtime/internal/sys"
10	"unsafe"
11)
12
13const (
14	_WorkbufSize = 2048 // in bytes; larger values result in less contention
15
16	// workbufAlloc is the number of bytes to allocate at a time
17	// for new workbufs. This must be a multiple of pageSize and
18	// should be a multiple of _WorkbufSize.
19	//
20	// Larger values reduce workbuf allocation overhead. Smaller
21	// values reduce heap fragmentation.
22	workbufAlloc = 32 << 10
23)
24
25// throwOnGCWork causes any operations that add pointers to a gcWork
26// buffer to throw.
27//
28// TODO(austin): This is a temporary debugging measure for issue
29// #27993. To be removed before release.
30var throwOnGCWork bool
31
32func init() {
33	if workbufAlloc%pageSize != 0 || workbufAlloc%_WorkbufSize != 0 {
34		throw("bad workbufAlloc")
35	}
36}
37
38// Garbage collector work pool abstraction.
39//
40// This implements a producer/consumer model for pointers to grey
41// objects. A grey object is one that is marked and on a work
42// queue. A black object is marked and not on a work queue.
43//
44// Write barriers, root discovery, stack scanning, and object scanning
45// produce pointers to grey objects. Scanning consumes pointers to
46// grey objects, thus blackening them, and then scans them,
47// potentially producing new pointers to grey objects.
48
49// A gcWork provides the interface to produce and consume work for the
50// garbage collector.
51//
52// A gcWork can be used on the stack as follows:
53//
54//     (preemption must be disabled)
55//     gcw := &getg().m.p.ptr().gcw
56//     .. call gcw.put() to produce and gcw.tryGet() to consume ..
57//
58// It's important that any use of gcWork during the mark phase prevent
59// the garbage collector from transitioning to mark termination since
60// gcWork may locally hold GC work buffers. This can be done by
61// disabling preemption (systemstack or acquirem).
62type gcWork struct {
63	// wbuf1 and wbuf2 are the primary and secondary work buffers.
64	//
65	// This can be thought of as a stack of both work buffers'
66	// pointers concatenated. When we pop the last pointer, we
67	// shift the stack up by one work buffer by bringing in a new
68	// full buffer and discarding an empty one. When we fill both
69	// buffers, we shift the stack down by one work buffer by
70	// bringing in a new empty buffer and discarding a full one.
71	// This way we have one buffer's worth of hysteresis, which
72	// amortizes the cost of getting or putting a work buffer over
73	// at least one buffer of work and reduces contention on the
74	// global work lists.
75	//
76	// wbuf1 is always the buffer we're currently pushing to and
77	// popping from and wbuf2 is the buffer that will be discarded
78	// next.
79	//
80	// Invariant: Both wbuf1 and wbuf2 are nil or neither are.
81	wbuf1, wbuf2 *workbuf
82
83	// Bytes marked (blackened) on this gcWork. This is aggregated
84	// into work.bytesMarked by dispose.
85	bytesMarked uint64
86
87	// Scan work performed on this gcWork. This is aggregated into
88	// gcController by dispose and may also be flushed by callers.
89	scanWork int64
90
91	// flushedWork indicates that a non-empty work buffer was
92	// flushed to the global work list since the last gcMarkDone
93	// termination check. Specifically, this indicates that this
94	// gcWork may have communicated work to another gcWork.
95	flushedWork bool
96
97	// pauseGen causes put operations to spin while pauseGen ==
98	// gcWorkPauseGen if debugCachedWork is true.
99	pauseGen uint32
100
101	// putGen is the pauseGen of the last putGen.
102	putGen uint32
103
104	// pauseStack is the stack at which this P was paused if
105	// debugCachedWork is true.
106	pauseStack [16]uintptr
107}
108
109// Most of the methods of gcWork are go:nowritebarrierrec because the
110// write barrier itself can invoke gcWork methods but the methods are
111// not generally re-entrant. Hence, if a gcWork method invoked the
112// write barrier while the gcWork was in an inconsistent state, and
113// the write barrier in turn invoked a gcWork method, it could
114// permanently corrupt the gcWork.
115
116func (w *gcWork) init() {
117	w.wbuf1 = getempty()
118	wbuf2 := trygetfull()
119	if wbuf2 == nil {
120		wbuf2 = getempty()
121	}
122	w.wbuf2 = wbuf2
123}
124
125func (w *gcWork) checkPut(ptr uintptr, ptrs []uintptr) {
126	if debugCachedWork {
127		alreadyFailed := w.putGen == w.pauseGen
128		w.putGen = w.pauseGen
129		if !canPreemptM(getg().m) {
130			// If we were to spin, the runtime may
131			// deadlock. Since we can't be preempted, the
132			// spin could prevent gcMarkDone from
133			// finishing the ragged barrier, which is what
134			// releases us from the spin.
135			return
136		}
137		for atomic.Load(&gcWorkPauseGen) == w.pauseGen {
138		}
139		if throwOnGCWork {
140			printlock()
141			if alreadyFailed {
142				println("runtime: checkPut already failed at this generation")
143			}
144			println("runtime: late gcWork put")
145			if ptr != 0 {
146				gcDumpObject("ptr", ptr, ^uintptr(0))
147			}
148			for _, ptr := range ptrs {
149				gcDumpObject("ptrs", ptr, ^uintptr(0))
150			}
151			println("runtime: paused at")
152			for _, pc := range w.pauseStack {
153				if pc == 0 {
154					break
155				}
156				f := findfunc(pc)
157				if f.valid() {
158					// Obviously this doesn't
159					// relate to ancestor
160					// tracebacks, but this
161					// function prints what we
162					// want.
163					printAncestorTracebackFuncInfo(f, pc)
164				} else {
165					println("\tunknown PC ", hex(pc), "\n")
166				}
167			}
168			throw("throwOnGCWork")
169		}
170	}
171}
172
173// put enqueues a pointer for the garbage collector to trace.
174// obj must point to the beginning of a heap object or an oblet.
175//go:nowritebarrierrec
176func (w *gcWork) put(obj uintptr) {
177	w.checkPut(obj, nil)
178
179	flushed := false
180	wbuf := w.wbuf1
181	if wbuf == nil {
182		w.init()
183		wbuf = w.wbuf1
184		// wbuf is empty at this point.
185	} else if wbuf.nobj == len(wbuf.obj) {
186		w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
187		wbuf = w.wbuf1
188		if wbuf.nobj == len(wbuf.obj) {
189			putfull(wbuf)
190			w.flushedWork = true
191			wbuf = getempty()
192			w.wbuf1 = wbuf
193			flushed = true
194		}
195	}
196
197	wbuf.obj[wbuf.nobj] = obj
198	wbuf.nobj++
199
200	// If we put a buffer on full, let the GC controller know so
201	// it can encourage more workers to run. We delay this until
202	// the end of put so that w is in a consistent state, since
203	// enlistWorker may itself manipulate w.
204	if flushed && gcphase == _GCmark {
205		gcController.enlistWorker()
206	}
207}
208
209// putFast does a put and reports whether it can be done quickly
210// otherwise it returns false and the caller needs to call put.
211//go:nowritebarrierrec
212func (w *gcWork) putFast(obj uintptr) bool {
213	w.checkPut(obj, nil)
214
215	wbuf := w.wbuf1
216	if wbuf == nil {
217		return false
218	} else if wbuf.nobj == len(wbuf.obj) {
219		return false
220	}
221
222	wbuf.obj[wbuf.nobj] = obj
223	wbuf.nobj++
224	return true
225}
226
227// putBatch performs a put on every pointer in obj. See put for
228// constraints on these pointers.
229//
230//go:nowritebarrierrec
231func (w *gcWork) putBatch(obj []uintptr) {
232	if len(obj) == 0 {
233		return
234	}
235
236	w.checkPut(0, obj)
237
238	flushed := false
239	wbuf := w.wbuf1
240	if wbuf == nil {
241		w.init()
242		wbuf = w.wbuf1
243	}
244
245	for len(obj) > 0 {
246		for wbuf.nobj == len(wbuf.obj) {
247			putfull(wbuf)
248			w.flushedWork = true
249			w.wbuf1, w.wbuf2 = w.wbuf2, getempty()
250			wbuf = w.wbuf1
251			flushed = true
252		}
253		n := copy(wbuf.obj[wbuf.nobj:], obj)
254		wbuf.nobj += n
255		obj = obj[n:]
256	}
257
258	if flushed && gcphase == _GCmark {
259		gcController.enlistWorker()
260	}
261}
262
263// tryGet dequeues a pointer for the garbage collector to trace.
264//
265// If there are no pointers remaining in this gcWork or in the global
266// queue, tryGet returns 0.  Note that there may still be pointers in
267// other gcWork instances or other caches.
268//go:nowritebarrierrec
269func (w *gcWork) tryGet() uintptr {
270	wbuf := w.wbuf1
271	if wbuf == nil {
272		w.init()
273		wbuf = w.wbuf1
274		// wbuf is empty at this point.
275	}
276	if wbuf.nobj == 0 {
277		w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
278		wbuf = w.wbuf1
279		if wbuf.nobj == 0 {
280			owbuf := wbuf
281			wbuf = trygetfull()
282			if wbuf == nil {
283				return 0
284			}
285			putempty(owbuf)
286			w.wbuf1 = wbuf
287		}
288	}
289
290	wbuf.nobj--
291	return wbuf.obj[wbuf.nobj]
292}
293
294// tryGetFast dequeues a pointer for the garbage collector to trace
295// if one is readily available. Otherwise it returns 0 and
296// the caller is expected to call tryGet().
297//go:nowritebarrierrec
298func (w *gcWork) tryGetFast() uintptr {
299	wbuf := w.wbuf1
300	if wbuf == nil {
301		return 0
302	}
303	if wbuf.nobj == 0 {
304		return 0
305	}
306
307	wbuf.nobj--
308	return wbuf.obj[wbuf.nobj]
309}
310
311// dispose returns any cached pointers to the global queue.
312// The buffers are being put on the full queue so that the
313// write barriers will not simply reacquire them before the
314// GC can inspect them. This helps reduce the mutator's
315// ability to hide pointers during the concurrent mark phase.
316//
317//go:nowritebarrierrec
318func (w *gcWork) dispose() {
319	if wbuf := w.wbuf1; wbuf != nil {
320		if wbuf.nobj == 0 {
321			putempty(wbuf)
322		} else {
323			putfull(wbuf)
324			w.flushedWork = true
325		}
326		w.wbuf1 = nil
327
328		wbuf = w.wbuf2
329		if wbuf.nobj == 0 {
330			putempty(wbuf)
331		} else {
332			putfull(wbuf)
333			w.flushedWork = true
334		}
335		w.wbuf2 = nil
336	}
337	if w.bytesMarked != 0 {
338		// dispose happens relatively infrequently. If this
339		// atomic becomes a problem, we should first try to
340		// dispose less and if necessary aggregate in a per-P
341		// counter.
342		atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked))
343		w.bytesMarked = 0
344	}
345	if w.scanWork != 0 {
346		atomic.Xaddint64(&gcController.scanWork, w.scanWork)
347		w.scanWork = 0
348	}
349}
350
351// balance moves some work that's cached in this gcWork back on the
352// global queue.
353//go:nowritebarrierrec
354func (w *gcWork) balance() {
355	if w.wbuf1 == nil {
356		return
357	}
358	if wbuf := w.wbuf2; wbuf.nobj != 0 {
359		w.checkPut(0, wbuf.obj[:wbuf.nobj])
360		putfull(wbuf)
361		w.flushedWork = true
362		w.wbuf2 = getempty()
363	} else if wbuf := w.wbuf1; wbuf.nobj > 4 {
364		w.checkPut(0, wbuf.obj[:wbuf.nobj])
365		w.wbuf1 = handoff(wbuf)
366		w.flushedWork = true // handoff did putfull
367	} else {
368		return
369	}
370	// We flushed a buffer to the full list, so wake a worker.
371	if gcphase == _GCmark {
372		gcController.enlistWorker()
373	}
374}
375
376// empty reports whether w has no mark work available.
377//go:nowritebarrierrec
378func (w *gcWork) empty() bool {
379	return w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0)
380}
381
382// Internally, the GC work pool is kept in arrays in work buffers.
383// The gcWork interface caches a work buffer until full (or empty) to
384// avoid contending on the global work buffer lists.
385
386type workbufhdr struct {
387	node lfnode // must be first
388	nobj int
389}
390
391//go:notinheap
392type workbuf struct {
393	workbufhdr
394	// account for the above fields
395	obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / sys.PtrSize]uintptr
396}
397
398// workbuf factory routines. These funcs are used to manage the
399// workbufs.
400// If the GC asks for some work these are the only routines that
401// make wbufs available to the GC.
402
403func (b *workbuf) checknonempty() {
404	if b.nobj == 0 {
405		throw("workbuf is empty")
406	}
407}
408
409func (b *workbuf) checkempty() {
410	if b.nobj != 0 {
411		throw("workbuf is not empty")
412	}
413}
414
415// getempty pops an empty work buffer off the work.empty list,
416// allocating new buffers if none are available.
417//go:nowritebarrier
418func getempty() *workbuf {
419	var b *workbuf
420	if work.empty != 0 {
421		b = (*workbuf)(work.empty.pop())
422		if b != nil {
423			b.checkempty()
424		}
425	}
426	if b == nil {
427		// Allocate more workbufs.
428		var s *mspan
429		if work.wbufSpans.free.first != nil {
430			lock(&work.wbufSpans.lock)
431			s = work.wbufSpans.free.first
432			if s != nil {
433				work.wbufSpans.free.remove(s)
434				work.wbufSpans.busy.insert(s)
435			}
436			unlock(&work.wbufSpans.lock)
437		}
438		if s == nil {
439			systemstack(func() {
440				s = mheap_.allocManual(workbufAlloc/pageSize, &memstats.gc_sys)
441			})
442			if s == nil {
443				throw("out of memory")
444			}
445			// Record the new span in the busy list.
446			lock(&work.wbufSpans.lock)
447			work.wbufSpans.busy.insert(s)
448			unlock(&work.wbufSpans.lock)
449		}
450		// Slice up the span into new workbufs. Return one and
451		// put the rest on the empty list.
452		for i := uintptr(0); i+_WorkbufSize <= workbufAlloc; i += _WorkbufSize {
453			newb := (*workbuf)(unsafe.Pointer(s.base() + i))
454			newb.nobj = 0
455			lfnodeValidate(&newb.node)
456			if i == 0 {
457				b = newb
458			} else {
459				putempty(newb)
460			}
461		}
462	}
463	return b
464}
465
466// putempty puts a workbuf onto the work.empty list.
467// Upon entry this go routine owns b. The lfstack.push relinquishes ownership.
468//go:nowritebarrier
469func putempty(b *workbuf) {
470	b.checkempty()
471	work.empty.push(&b.node)
472}
473
474// putfull puts the workbuf on the work.full list for the GC.
475// putfull accepts partially full buffers so the GC can avoid competing
476// with the mutators for ownership of partially full buffers.
477//go:nowritebarrier
478func putfull(b *workbuf) {
479	b.checknonempty()
480	work.full.push(&b.node)
481}
482
483// trygetfull tries to get a full or partially empty workbuffer.
484// If one is not immediately available return nil
485//go:nowritebarrier
486func trygetfull() *workbuf {
487	b := (*workbuf)(work.full.pop())
488	if b != nil {
489		b.checknonempty()
490		return b
491	}
492	return b
493}
494
495//go:nowritebarrier
496func handoff(b *workbuf) *workbuf {
497	// Make new buffer with half of b's pointers.
498	b1 := getempty()
499	n := b.nobj / 2
500	b.nobj -= n
501	b1.nobj = n
502	memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), uintptr(n)*unsafe.Sizeof(b1.obj[0]))
503
504	// Put b on full list - let first half of b get stolen.
505	putfull(b)
506	return b1
507}
508
509// prepareFreeWorkbufs moves busy workbuf spans to free list so they
510// can be freed to the heap. This must only be called when all
511// workbufs are on the empty list.
512func prepareFreeWorkbufs() {
513	lock(&work.wbufSpans.lock)
514	if work.full != 0 {
515		throw("cannot free workbufs when work.full != 0")
516	}
517	// Since all workbufs are on the empty list, we don't care
518	// which ones are in which spans. We can wipe the entire empty
519	// list and move all workbuf spans to the free list.
520	work.empty = 0
521	work.wbufSpans.free.takeAll(&work.wbufSpans.busy)
522	unlock(&work.wbufSpans.lock)
523}
524
525// freeSomeWbufs frees some workbufs back to the heap and returns
526// true if it should be called again to free more.
527func freeSomeWbufs(preemptible bool) bool {
528	const batchSize = 64 // ~1–2 µs per span.
529	lock(&work.wbufSpans.lock)
530	if gcphase != _GCoff || work.wbufSpans.free.isEmpty() {
531		unlock(&work.wbufSpans.lock)
532		return false
533	}
534	systemstack(func() {
535		gp := getg().m.curg
536		for i := 0; i < batchSize && !(preemptible && gp.preempt); i++ {
537			span := work.wbufSpans.free.first
538			if span == nil {
539				break
540			}
541			work.wbufSpans.free.remove(span)
542			mheap_.freeManual(span, &memstats.gc_sys)
543		}
544	})
545	more := !work.wbufSpans.free.isEmpty()
546	unlock(&work.wbufSpans.lock)
547	return more
548}
549