1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime
6
7import (
8	"runtime/internal/atomic"
9	"runtime/internal/sys"
10	"unsafe"
11)
12
13const (
14	_WorkbufSize = 2048 // in bytes; larger values result in less contention
15
16	// workbufAlloc is the number of bytes to allocate at a time
17	// for new workbufs. This must be a multiple of pageSize and
18	// should be a multiple of _WorkbufSize.
19	//
20	// Larger values reduce workbuf allocation overhead. Smaller
21	// values reduce heap fragmentation.
22	workbufAlloc = 32 << 10
23)
24
25// throwOnGCWork causes any operations that add pointers to a gcWork
26// buffer to throw.
27//
28// TODO(austin): This is a temporary debugging measure for issue
29// #27993. To be removed before release.
30var throwOnGCWork bool
31
32func init() {
33	if workbufAlloc%pageSize != 0 || workbufAlloc%_WorkbufSize != 0 {
34		throw("bad workbufAlloc")
35	}
36}
37
38// Garbage collector work pool abstraction.
39//
40// This implements a producer/consumer model for pointers to grey
41// objects. A grey object is one that is marked and on a work
42// queue. A black object is marked and not on a work queue.
43//
44// Write barriers, root discovery, stack scanning, and object scanning
45// produce pointers to grey objects. Scanning consumes pointers to
46// grey objects, thus blackening them, and then scans them,
47// potentially producing new pointers to grey objects.
48
49// A gcWork provides the interface to produce and consume work for the
50// garbage collector.
51//
52// A gcWork can be used on the stack as follows:
53//
54//     (preemption must be disabled)
55//     gcw := &getg().m.p.ptr().gcw
56//     .. call gcw.put() to produce and gcw.tryGet() to consume ..
57//
58// It's important that any use of gcWork during the mark phase prevent
59// the garbage collector from transitioning to mark termination since
60// gcWork may locally hold GC work buffers. This can be done by
61// disabling preemption (systemstack or acquirem).
62type gcWork struct {
63	// wbuf1 and wbuf2 are the primary and secondary work buffers.
64	//
65	// This can be thought of as a stack of both work buffers'
66	// pointers concatenated. When we pop the last pointer, we
67	// shift the stack up by one work buffer by bringing in a new
68	// full buffer and discarding an empty one. When we fill both
69	// buffers, we shift the stack down by one work buffer by
70	// bringing in a new empty buffer and discarding a full one.
71	// This way we have one buffer's worth of hysteresis, which
72	// amortizes the cost of getting or putting a work buffer over
73	// at least one buffer of work and reduces contention on the
74	// global work lists.
75	//
76	// wbuf1 is always the buffer we're currently pushing to and
77	// popping from and wbuf2 is the buffer that will be discarded
78	// next.
79	//
80	// Invariant: Both wbuf1 and wbuf2 are nil or neither are.
81	wbuf1, wbuf2 *workbuf
82
83	// Bytes marked (blackened) on this gcWork. This is aggregated
84	// into work.bytesMarked by dispose.
85	bytesMarked uint64
86
87	// Scan work performed on this gcWork. This is aggregated into
88	// gcController by dispose and may also be flushed by callers.
89	scanWork int64
90
91	// flushedWork indicates that a non-empty work buffer was
92	// flushed to the global work list since the last gcMarkDone
93	// termination check. Specifically, this indicates that this
94	// gcWork may have communicated work to another gcWork.
95	flushedWork bool
96
97	// pauseGen causes put operations to spin while pauseGen ==
98	// gcWorkPauseGen if debugCachedWork is true.
99	pauseGen uint32
100
101	// putGen is the pauseGen of the last putGen.
102	putGen uint32
103
104	// pauseStack is the stack at which this P was paused if
105	// debugCachedWork is true.
106	pauseStack [16]location
107}
108
109// Most of the methods of gcWork are go:nowritebarrierrec because the
110// write barrier itself can invoke gcWork methods but the methods are
111// not generally re-entrant. Hence, if a gcWork method invoked the
112// write barrier while the gcWork was in an inconsistent state, and
113// the write barrier in turn invoked a gcWork method, it could
114// permanently corrupt the gcWork.
115
116func (w *gcWork) init() {
117	w.wbuf1 = getempty()
118	wbuf2 := trygetfull()
119	if wbuf2 == nil {
120		wbuf2 = getempty()
121	}
122	w.wbuf2 = wbuf2
123}
124
125func (w *gcWork) checkPut(ptr uintptr, ptrs []uintptr) {
126	if debugCachedWork {
127		alreadyFailed := w.putGen == w.pauseGen
128		w.putGen = w.pauseGen
129		if !canPreemptM(getg().m) {
130			// If we were to spin, the runtime may
131			// deadlock. Since we can't be preempted, the
132			// spin could prevent gcMarkDone from
133			// finishing the ragged barrier, which is what
134			// releases us from the spin.
135			return
136		}
137		for atomic.Load(&gcWorkPauseGen) == w.pauseGen {
138		}
139		if throwOnGCWork {
140			printlock()
141			if alreadyFailed {
142				println("runtime: checkPut already failed at this generation")
143			}
144			println("runtime: late gcWork put")
145			if ptr != 0 {
146				gcDumpObject("ptr", ptr, ^uintptr(0))
147			}
148			for _, ptr := range ptrs {
149				gcDumpObject("ptrs", ptr, ^uintptr(0))
150			}
151			println("runtime: paused at")
152			for _, loc := range w.pauseStack {
153				if loc.pc == 0 {
154					break
155				}
156				if loc.function != "" {
157					// Obviously this doesn't
158					// relate to ancestor
159					// tracebacks, but this
160					// function prints what we
161					// want.
162					printAncestorTracebackFuncInfo(loc.function, loc.filename, loc.lineno, loc.pc)
163				} else {
164					println("\tunknown PC ", hex(loc.pc), "\n")
165				}
166			}
167			throw("throwOnGCWork")
168		}
169	}
170}
171
172// put enqueues a pointer for the garbage collector to trace.
173// obj must point to the beginning of a heap object or an oblet.
174//go:nowritebarrierrec
175func (w *gcWork) put(obj uintptr) {
176	w.checkPut(obj, nil)
177
178	flushed := false
179	wbuf := w.wbuf1
180	if wbuf == nil {
181		w.init()
182		wbuf = w.wbuf1
183		// wbuf is empty at this point.
184	} else if wbuf.nobj == len(wbuf.obj) {
185		w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
186		wbuf = w.wbuf1
187		if wbuf.nobj == len(wbuf.obj) {
188			putfull(wbuf)
189			w.flushedWork = true
190			wbuf = getempty()
191			w.wbuf1 = wbuf
192			flushed = true
193		}
194	}
195
196	wbuf.obj[wbuf.nobj] = obj
197	wbuf.nobj++
198
199	// If we put a buffer on full, let the GC controller know so
200	// it can encourage more workers to run. We delay this until
201	// the end of put so that w is in a consistent state, since
202	// enlistWorker may itself manipulate w.
203	if flushed && gcphase == _GCmark {
204		gcController.enlistWorker()
205	}
206}
207
208// putFast does a put and reports whether it can be done quickly
209// otherwise it returns false and the caller needs to call put.
210//go:nowritebarrierrec
211func (w *gcWork) putFast(obj uintptr) bool {
212	w.checkPut(obj, nil)
213
214	wbuf := w.wbuf1
215	if wbuf == nil {
216		return false
217	} else if wbuf.nobj == len(wbuf.obj) {
218		return false
219	}
220
221	wbuf.obj[wbuf.nobj] = obj
222	wbuf.nobj++
223	return true
224}
225
226// putBatch performs a put on every pointer in obj. See put for
227// constraints on these pointers.
228//
229//go:nowritebarrierrec
230func (w *gcWork) putBatch(obj []uintptr) {
231	if len(obj) == 0 {
232		return
233	}
234
235	w.checkPut(0, obj)
236
237	flushed := false
238	wbuf := w.wbuf1
239	if wbuf == nil {
240		w.init()
241		wbuf = w.wbuf1
242	}
243
244	for len(obj) > 0 {
245		for wbuf.nobj == len(wbuf.obj) {
246			putfull(wbuf)
247			w.flushedWork = true
248			w.wbuf1, w.wbuf2 = w.wbuf2, getempty()
249			wbuf = w.wbuf1
250			flushed = true
251		}
252		n := copy(wbuf.obj[wbuf.nobj:], obj)
253		wbuf.nobj += n
254		obj = obj[n:]
255	}
256
257	if flushed && gcphase == _GCmark {
258		gcController.enlistWorker()
259	}
260}
261
262// tryGet dequeues a pointer for the garbage collector to trace.
263//
264// If there are no pointers remaining in this gcWork or in the global
265// queue, tryGet returns 0.  Note that there may still be pointers in
266// other gcWork instances or other caches.
267//go:nowritebarrierrec
268func (w *gcWork) tryGet() uintptr {
269	wbuf := w.wbuf1
270	if wbuf == nil {
271		w.init()
272		wbuf = w.wbuf1
273		// wbuf is empty at this point.
274	}
275	if wbuf.nobj == 0 {
276		w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
277		wbuf = w.wbuf1
278		if wbuf.nobj == 0 {
279			owbuf := wbuf
280			wbuf = trygetfull()
281			if wbuf == nil {
282				return 0
283			}
284			putempty(owbuf)
285			w.wbuf1 = wbuf
286		}
287	}
288
289	wbuf.nobj--
290	return wbuf.obj[wbuf.nobj]
291}
292
293// tryGetFast dequeues a pointer for the garbage collector to trace
294// if one is readily available. Otherwise it returns 0 and
295// the caller is expected to call tryGet().
296//go:nowritebarrierrec
297func (w *gcWork) tryGetFast() uintptr {
298	wbuf := w.wbuf1
299	if wbuf == nil {
300		return 0
301	}
302	if wbuf.nobj == 0 {
303		return 0
304	}
305
306	wbuf.nobj--
307	return wbuf.obj[wbuf.nobj]
308}
309
310// dispose returns any cached pointers to the global queue.
311// The buffers are being put on the full queue so that the
312// write barriers will not simply reacquire them before the
313// GC can inspect them. This helps reduce the mutator's
314// ability to hide pointers during the concurrent mark phase.
315//
316//go:nowritebarrierrec
317func (w *gcWork) dispose() {
318	if wbuf := w.wbuf1; wbuf != nil {
319		if wbuf.nobj == 0 {
320			putempty(wbuf)
321		} else {
322			putfull(wbuf)
323			w.flushedWork = true
324		}
325		w.wbuf1 = nil
326
327		wbuf = w.wbuf2
328		if wbuf.nobj == 0 {
329			putempty(wbuf)
330		} else {
331			putfull(wbuf)
332			w.flushedWork = true
333		}
334		w.wbuf2 = nil
335	}
336	if w.bytesMarked != 0 {
337		// dispose happens relatively infrequently. If this
338		// atomic becomes a problem, we should first try to
339		// dispose less and if necessary aggregate in a per-P
340		// counter.
341		atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked))
342		w.bytesMarked = 0
343	}
344	if w.scanWork != 0 {
345		atomic.Xaddint64(&gcController.scanWork, w.scanWork)
346		w.scanWork = 0
347	}
348}
349
350// balance moves some work that's cached in this gcWork back on the
351// global queue.
352//go:nowritebarrierrec
353func (w *gcWork) balance() {
354	if w.wbuf1 == nil {
355		return
356	}
357	if wbuf := w.wbuf2; wbuf.nobj != 0 {
358		w.checkPut(0, wbuf.obj[:wbuf.nobj])
359		putfull(wbuf)
360		w.flushedWork = true
361		w.wbuf2 = getempty()
362	} else if wbuf := w.wbuf1; wbuf.nobj > 4 {
363		w.checkPut(0, wbuf.obj[:wbuf.nobj])
364		w.wbuf1 = handoff(wbuf)
365		w.flushedWork = true // handoff did putfull
366	} else {
367		return
368	}
369	// We flushed a buffer to the full list, so wake a worker.
370	if gcphase == _GCmark {
371		gcController.enlistWorker()
372	}
373}
374
375// empty reports whether w has no mark work available.
376//go:nowritebarrierrec
377func (w *gcWork) empty() bool {
378	return w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0)
379}
380
381// Internally, the GC work pool is kept in arrays in work buffers.
382// The gcWork interface caches a work buffer until full (or empty) to
383// avoid contending on the global work buffer lists.
384
385type workbufhdr struct {
386	node lfnode // must be first
387	nobj int
388}
389
390//go:notinheap
391type workbuf struct {
392	workbufhdr
393	// account for the above fields
394	obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / sys.PtrSize]uintptr
395}
396
397// workbuf factory routines. These funcs are used to manage the
398// workbufs.
399// If the GC asks for some work these are the only routines that
400// make wbufs available to the GC.
401
402func (b *workbuf) checknonempty() {
403	if b.nobj == 0 {
404		throw("workbuf is empty")
405	}
406}
407
408func (b *workbuf) checkempty() {
409	if b.nobj != 0 {
410		throw("workbuf is not empty")
411	}
412}
413
414// getempty pops an empty work buffer off the work.empty list,
415// allocating new buffers if none are available.
416//go:nowritebarrier
417func getempty() *workbuf {
418	var b *workbuf
419	if work.empty != 0 {
420		b = (*workbuf)(work.empty.pop())
421		if b != nil {
422			b.checkempty()
423		}
424	}
425	if b == nil {
426		// Allocate more workbufs.
427		var s *mspan
428		if work.wbufSpans.free.first != nil {
429			lock(&work.wbufSpans.lock)
430			s = work.wbufSpans.free.first
431			if s != nil {
432				work.wbufSpans.free.remove(s)
433				work.wbufSpans.busy.insert(s)
434			}
435			unlock(&work.wbufSpans.lock)
436		}
437		if s == nil {
438			systemstack(func() {
439				s = mheap_.allocManual(workbufAlloc/pageSize, &memstats.gc_sys)
440			})
441			if s == nil {
442				throw("out of memory")
443			}
444			// Record the new span in the busy list.
445			lock(&work.wbufSpans.lock)
446			work.wbufSpans.busy.insert(s)
447			unlock(&work.wbufSpans.lock)
448		}
449		// Slice up the span into new workbufs. Return one and
450		// put the rest on the empty list.
451		for i := uintptr(0); i+_WorkbufSize <= workbufAlloc; i += _WorkbufSize {
452			newb := (*workbuf)(unsafe.Pointer(s.base() + i))
453			newb.nobj = 0
454			lfnodeValidate(&newb.node)
455			if i == 0 {
456				b = newb
457			} else {
458				putempty(newb)
459			}
460		}
461	}
462	return b
463}
464
465// putempty puts a workbuf onto the work.empty list.
466// Upon entry this go routine owns b. The lfstack.push relinquishes ownership.
467//go:nowritebarrier
468func putempty(b *workbuf) {
469	b.checkempty()
470	work.empty.push(&b.node)
471}
472
473// putfull puts the workbuf on the work.full list for the GC.
474// putfull accepts partially full buffers so the GC can avoid competing
475// with the mutators for ownership of partially full buffers.
476//go:nowritebarrier
477func putfull(b *workbuf) {
478	b.checknonempty()
479	work.full.push(&b.node)
480}
481
482// trygetfull tries to get a full or partially empty workbuffer.
483// If one is not immediately available return nil
484//go:nowritebarrier
485func trygetfull() *workbuf {
486	b := (*workbuf)(work.full.pop())
487	if b != nil {
488		b.checknonempty()
489		return b
490	}
491	return b
492}
493
494//go:nowritebarrier
495func handoff(b *workbuf) *workbuf {
496	// Make new buffer with half of b's pointers.
497	b1 := getempty()
498	n := b.nobj / 2
499	b.nobj -= n
500	b1.nobj = n
501	memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), uintptr(n)*unsafe.Sizeof(b1.obj[0]))
502
503	// Put b on full list - let first half of b get stolen.
504	putfull(b)
505	return b1
506}
507
508// prepareFreeWorkbufs moves busy workbuf spans to free list so they
509// can be freed to the heap. This must only be called when all
510// workbufs are on the empty list.
511func prepareFreeWorkbufs() {
512	lock(&work.wbufSpans.lock)
513	if work.full != 0 {
514		throw("cannot free workbufs when work.full != 0")
515	}
516	// Since all workbufs are on the empty list, we don't care
517	// which ones are in which spans. We can wipe the entire empty
518	// list and move all workbuf spans to the free list.
519	work.empty = 0
520	work.wbufSpans.free.takeAll(&work.wbufSpans.busy)
521	unlock(&work.wbufSpans.lock)
522}
523
524// freeSomeWbufs frees some workbufs back to the heap and returns
525// true if it should be called again to free more.
526func freeSomeWbufs(preemptible bool) bool {
527	const batchSize = 64 // ~1–2 µs per span.
528	lock(&work.wbufSpans.lock)
529	if gcphase != _GCoff || work.wbufSpans.free.isEmpty() {
530		unlock(&work.wbufSpans.lock)
531		return false
532	}
533	systemstack(func() {
534		gp := getg().m.curg
535		for i := 0; i < batchSize && !(preemptible && gp.preempt); i++ {
536			span := work.wbufSpans.free.first
537			if span == nil {
538				break
539			}
540			work.wbufSpans.free.remove(span)
541			mheap_.freeManual(span, &memstats.gc_sys)
542		}
543	})
544	more := !work.wbufSpans.free.isEmpty()
545	unlock(&work.wbufSpans.lock)
546	return more
547}
548