1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This implements the write barrier buffer. The write barrier itself
6// is gcWriteBarrier and is implemented in assembly.
7//
8// See mbarrier.go for algorithmic details on the write barrier. This
9// file deals only with the buffer.
10//
11// The write barrier has a fast path and a slow path. The fast path
12// simply enqueues to a per-P write barrier buffer. It's written in
13// assembly and doesn't clobber any general purpose registers, so it
14// doesn't have the usual overheads of a Go call.
15//
16// When the buffer fills up, the write barrier invokes the slow path
17// (wbBufFlush) to flush the buffer to the GC work queues. In this
18// path, since the compiler didn't spill registers, we spill *all*
19// registers and disallow any GC safe points that could observe the
20// stack frame (since we don't know the types of the spilled
21// registers).
22
23package runtime
24
25import (
26	"runtime/internal/atomic"
27	"runtime/internal/sys"
28	"unsafe"
29)
30
31// testSmallBuf forces a small write barrier buffer to stress write
32// barrier flushing.
33const testSmallBuf = false
34
35// wbBuf is a per-P buffer of pointers queued by the write barrier.
36// This buffer is flushed to the GC workbufs when it fills up and on
37// various GC transitions.
38//
39// This is closely related to a "sequential store buffer" (SSB),
40// except that SSBs are usually used for maintaining remembered sets,
41// while this is used for marking.
42type wbBuf struct {
43	// next points to the next slot in buf. It must not be a
44	// pointer type because it can point past the end of buf and
45	// must be updated without write barriers.
46	//
47	// This is a pointer rather than an index to optimize the
48	// write barrier assembly.
49	next uintptr
50
51	// end points to just past the end of buf. It must not be a
52	// pointer type because it points past the end of buf and must
53	// be updated without write barriers.
54	end uintptr
55
56	// buf stores a series of pointers to execute write barriers
57	// on. This must be a multiple of wbBufEntryPointers because
58	// the write barrier only checks for overflow once per entry.
59	buf [wbBufEntryPointers * wbBufEntries]uintptr
60
61	// debugGen causes the write barrier buffer to flush after
62	// every write barrier if equal to gcWorkPauseGen. This is for
63	// debugging #27993. This is only set if debugCachedWork is
64	// set.
65	debugGen uint32
66}
67
68const (
69	// wbBufEntries is the number of write barriers between
70	// flushes of the write barrier buffer.
71	//
72	// This trades latency for throughput amortization. Higher
73	// values amortize flushing overhead more, but increase the
74	// latency of flushing. Higher values also increase the cache
75	// footprint of the buffer.
76	//
77	// TODO: What is the latency cost of this? Tune this value.
78	wbBufEntries = 256
79
80	// wbBufEntryPointers is the number of pointers added to the
81	// buffer by each write barrier.
82	wbBufEntryPointers = 2
83)
84
85// reset empties b by resetting its next and end pointers.
86func (b *wbBuf) reset() {
87	start := uintptr(unsafe.Pointer(&b.buf[0]))
88	b.next = start
89	if writeBarrier.cgo || (debugCachedWork && (throwOnGCWork || b.debugGen == atomic.Load(&gcWorkPauseGen))) {
90		// Effectively disable the buffer by forcing a flush
91		// on every barrier.
92		b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers]))
93	} else if testSmallBuf {
94		// For testing, allow two barriers in the buffer. If
95		// we only did one, then barriers of non-heap pointers
96		// would be no-ops. This lets us combine a buffered
97		// barrier with a flush at a later time.
98		b.end = uintptr(unsafe.Pointer(&b.buf[2*wbBufEntryPointers]))
99	} else {
100		b.end = start + uintptr(len(b.buf))*unsafe.Sizeof(b.buf[0])
101	}
102
103	if (b.end-b.next)%(wbBufEntryPointers*unsafe.Sizeof(b.buf[0])) != 0 {
104		throw("bad write barrier buffer bounds")
105	}
106}
107
108// discard resets b's next pointer, but not its end pointer.
109//
110// This must be nosplit because it's called by wbBufFlush.
111//
112//go:nosplit
113func (b *wbBuf) discard() {
114	b.next = uintptr(unsafe.Pointer(&b.buf[0]))
115}
116
117// empty reports whether b contains no pointers.
118func (b *wbBuf) empty() bool {
119	return b.next == uintptr(unsafe.Pointer(&b.buf[0]))
120}
121
122// putFast adds old and new to the write barrier buffer and returns
123// false if a flush is necessary. Callers should use this as:
124//
125//     buf := &getg().m.p.ptr().wbBuf
126//     if !buf.putFast(old, new) {
127//         wbBufFlush(...)
128//     }
129//     ... actual memory write ...
130//
131// The arguments to wbBufFlush depend on whether the caller is doing
132// its own cgo pointer checks. If it is, then this can be
133// wbBufFlush(nil, 0). Otherwise, it must pass the slot address and
134// new.
135//
136// The caller must ensure there are no preemption points during the
137// above sequence. There must be no preemption points while buf is in
138// use because it is a per-P resource. There must be no preemption
139// points between the buffer put and the write to memory because this
140// could allow a GC phase change, which could result in missed write
141// barriers.
142//
143// putFast must be nowritebarrierrec to because write barriers here would
144// corrupt the write barrier buffer. It (and everything it calls, if
145// it called anything) has to be nosplit to avoid scheduling on to a
146// different P and a different buffer.
147//
148//go:nowritebarrierrec
149//go:nosplit
150func (b *wbBuf) putFast(old, new uintptr) bool {
151	p := (*[2]uintptr)(unsafe.Pointer(b.next))
152	p[0] = old
153	p[1] = new
154	b.next += 2 * sys.PtrSize
155	return b.next != b.end
156}
157
158// wbBufFlush flushes the current P's write barrier buffer to the GC
159// workbufs. It is passed the slot and value of the write barrier that
160// caused the flush so that it can implement cgocheck.
161//
162// This must not have write barriers because it is part of the write
163// barrier implementation.
164//
165// This and everything it calls must be nosplit because 1) the stack
166// contains untyped slots from gcWriteBarrier and 2) there must not be
167// a GC safe point between the write barrier test in the caller and
168// flushing the buffer.
169//
170// TODO: A "go:nosplitrec" annotation would be perfect for this.
171//
172//go:nowritebarrierrec
173//go:nosplit
174func wbBufFlush(dst *uintptr, src uintptr) {
175	// Note: Every possible return from this function must reset
176	// the buffer's next pointer to prevent buffer overflow.
177
178	// This *must not* modify its arguments because this
179	// function's argument slots do double duty in gcWriteBarrier
180	// as register spill slots. Currently, not modifying the
181	// arguments is sufficient to keep the spill slots unmodified
182	// (which seems unlikely to change since it costs little and
183	// helps with debugging).
184
185	if getg().m.dying > 0 {
186		// We're going down. Not much point in write barriers
187		// and this way we can allow write barriers in the
188		// panic path.
189		getg().m.p.ptr().wbBuf.discard()
190		return
191	}
192
193	if writeBarrier.cgo && dst != nil {
194		// This must be called from the stack that did the
195		// write. It's nosplit all the way down.
196		cgoCheckWriteBarrier(dst, src)
197		if !writeBarrier.needed {
198			// We were only called for cgocheck.
199			getg().m.p.ptr().wbBuf.discard()
200			return
201		}
202	}
203
204	// Switch to the system stack so we don't have to worry about
205	// the untyped stack slots or safe points.
206	systemstack(func() {
207		if debugCachedWork {
208			// For debugging, include the old value of the
209			// slot and some other data in the traceback.
210			wbBuf := &getg().m.p.ptr().wbBuf
211			var old uintptr
212			if dst != nil {
213				// dst may be nil in direct calls to wbBufFlush.
214				old = *dst
215			}
216			wbBufFlush1Debug(old, wbBuf.buf[0], wbBuf.buf[1], &wbBuf.buf[0], wbBuf.next)
217		} else {
218			wbBufFlush1(getg().m.p.ptr())
219		}
220	})
221}
222
223// wbBufFlush1Debug is a temporary function for debugging issue
224// #27993. It exists solely to add some context to the traceback.
225//
226//go:nowritebarrierrec
227//go:systemstack
228//go:noinline
229func wbBufFlush1Debug(old, buf1, buf2 uintptr, start *uintptr, next uintptr) {
230	wbBufFlush1(getg().m.p.ptr())
231}
232
233// wbBufFlush1 flushes p's write barrier buffer to the GC work queue.
234//
235// This must not have write barriers because it is part of the write
236// barrier implementation, so this may lead to infinite loops or
237// buffer corruption.
238//
239// This must be non-preemptible because it uses the P's workbuf.
240//
241//go:nowritebarrierrec
242//go:systemstack
243func wbBufFlush1(_p_ *p) {
244	// Get the buffered pointers.
245	start := uintptr(unsafe.Pointer(&_p_.wbBuf.buf[0]))
246	n := (_p_.wbBuf.next - start) / unsafe.Sizeof(_p_.wbBuf.buf[0])
247	ptrs := _p_.wbBuf.buf[:n]
248
249	// Poison the buffer to make extra sure nothing is enqueued
250	// while we're processing the buffer.
251	_p_.wbBuf.next = 0
252
253	if useCheckmark {
254		// Slow path for checkmark mode.
255		for _, ptr := range ptrs {
256			shade(ptr)
257		}
258		_p_.wbBuf.reset()
259		return
260	}
261
262	// Mark all of the pointers in the buffer and record only the
263	// pointers we greyed. We use the buffer itself to temporarily
264	// record greyed pointers.
265	//
266	// TODO: Should scanobject/scanblock just stuff pointers into
267	// the wbBuf? Then this would become the sole greying path.
268	//
269	// TODO: We could avoid shading any of the "new" pointers in
270	// the buffer if the stack has been shaded, or even avoid
271	// putting them in the buffer at all (which would double its
272	// capacity). This is slightly complicated with the buffer; we
273	// could track whether any un-shaded goroutine has used the
274	// buffer, or just track globally whether there are any
275	// un-shaded stacks and flush after each stack scan.
276	gcw := &_p_.gcw
277	pos := 0
278	for _, ptr := range ptrs {
279		if ptr < minLegalPointer {
280			// nil pointers are very common, especially
281			// for the "old" values. Filter out these and
282			// other "obvious" non-heap pointers ASAP.
283			//
284			// TODO: Should we filter out nils in the fast
285			// path to reduce the rate of flushes?
286			continue
287		}
288		obj, span, objIndex := findObject(ptr, 0, 0, !usestackmaps)
289		if obj == 0 {
290			continue
291		}
292		if span.isFree(objIndex) {
293			// For gccgo, it is possible that we have a write barrier
294			// writing to unintialized stack memory. So we could see
295			// a bad pointer in the write barrier buffer. Don't mark
296			// it in this case.
297			continue
298		}
299		// TODO: Consider making two passes where the first
300		// just prefetches the mark bits.
301		mbits := span.markBitsForIndex(objIndex)
302		if mbits.isMarked() {
303			continue
304		}
305		mbits.setMarked()
306		if span.spanclass.noscan() {
307			gcw.bytesMarked += uint64(span.elemsize)
308			continue
309		}
310		ptrs[pos] = obj
311		pos++
312	}
313
314	// Enqueue the greyed objects.
315	gcw.putBatch(ptrs[:pos])
316
317	_p_.wbBuf.reset()
318}
319