1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Garbage collector: write barriers.
6//
7// For the concurrent garbage collector, the Go compiler implements
8// updates to pointer-valued fields that may be in heap objects by
9// emitting calls to write barriers. This file contains the actual write barrier
10// implementation, gcmarkwb_m, and the various wrappers called by the
11// compiler to implement pointer assignment, slice assignment,
12// typed memmove, and so on.
13
14package runtime
15
16import (
17	"runtime/internal/sys"
18	"unsafe"
19)
20
21// For gccgo, use go:linkname to rename compiler-called functions to
22// themselves, so that the compiler will export them.
23//
24//go:linkname writebarrierptr runtime.writebarrierptr
25//go:linkname typedmemmove runtime.typedmemmove
26//go:linkname typedslicecopy runtime.typedslicecopy
27
28// gcmarkwb_m is the mark-phase write barrier, the only barrier we have.
29// The rest of this file exists only to make calls to this function.
30//
31// This is a hybrid barrier that combines a Yuasa-style deletion
32// barrier—which shades the object whose reference is being
33// overwritten—with Dijkstra insertion barrier—which shades the object
34// whose reference is being written. The insertion part of the barrier
35// is necessary while the calling goroutine's stack is grey. In
36// pseudocode, the barrier is:
37//
38//     writePointer(slot, ptr):
39//         shade(*slot)
40//         if current stack is grey:
41//             shade(ptr)
42//         *slot = ptr
43//
44// slot is the destination in Go code.
45// ptr is the value that goes into the slot in Go code.
46//
47// Shade indicates that it has seen a white pointer by adding the referent
48// to wbuf as well as marking it.
49//
50// The two shades and the condition work together to prevent a mutator
51// from hiding an object from the garbage collector:
52//
53// 1. shade(*slot) prevents a mutator from hiding an object by moving
54// the sole pointer to it from the heap to its stack. If it attempts
55// to unlink an object from the heap, this will shade it.
56//
57// 2. shade(ptr) prevents a mutator from hiding an object by moving
58// the sole pointer to it from its stack into a black object in the
59// heap. If it attempts to install the pointer into a black object,
60// this will shade it.
61//
62// 3. Once a goroutine's stack is black, the shade(ptr) becomes
63// unnecessary. shade(ptr) prevents hiding an object by moving it from
64// the stack to the heap, but this requires first having a pointer
65// hidden on the stack. Immediately after a stack is scanned, it only
66// points to shaded objects, so it's not hiding anything, and the
67// shade(*slot) prevents it from hiding any other pointers on its
68// stack.
69//
70// For a detailed description of this barrier and proof of
71// correctness, see https://github.com/golang/proposal/blob/master/design/17503-eliminate-rescan.md
72//
73//
74//
75// Dealing with memory ordering:
76//
77// Both the Yuasa and Dijkstra barriers can be made conditional on the
78// color of the object containing the slot. We chose not to make these
79// conditional because the cost of ensuring that the object holding
80// the slot doesn't concurrently change color without the mutator
81// noticing seems prohibitive.
82//
83// Consider the following example where the mutator writes into
84// a slot and then loads the slot's mark bit while the GC thread
85// writes to the slot's mark bit and then as part of scanning reads
86// the slot.
87//
88// Initially both [slot] and [slotmark] are 0 (nil)
89// Mutator thread          GC thread
90// st [slot], ptr          st [slotmark], 1
91//
92// ld r1, [slotmark]       ld r2, [slot]
93//
94// Without an expensive memory barrier between the st and the ld, the final
95// result on most HW (including 386/amd64) can be r1==r2==0. This is a classic
96// example of what can happen when loads are allowed to be reordered with older
97// stores (avoiding such reorderings lies at the heart of the classic
98// Peterson/Dekker algorithms for mutual exclusion). Rather than require memory
99// barriers, which will slow down both the mutator and the GC, we always grey
100// the ptr object regardless of the slot's color.
101//
102// Another place where we intentionally omit memory barriers is when
103// accessing mheap_.arena_used to check if a pointer points into the
104// heap. On relaxed memory machines, it's possible for a mutator to
105// extend the size of the heap by updating arena_used, allocate an
106// object from this new region, and publish a pointer to that object,
107// but for tracing running on another processor to observe the pointer
108// but use the old value of arena_used. In this case, tracing will not
109// mark the object, even though it's reachable. However, the mutator
110// is guaranteed to execute a write barrier when it publishes the
111// pointer, so it will take care of marking the object. A general
112// consequence of this is that the garbage collector may cache the
113// value of mheap_.arena_used. (See issue #9984.)
114//
115//
116// Stack writes:
117//
118// The compiler omits write barriers for writes to the current frame,
119// but if a stack pointer has been passed down the call stack, the
120// compiler will generate a write barrier for writes through that
121// pointer (because it doesn't know it's not a heap pointer).
122//
123// One might be tempted to ignore the write barrier if slot points
124// into to the stack. Don't do it! Mark termination only re-scans
125// frames that have potentially been active since the concurrent scan,
126// so it depends on write barriers to track changes to pointers in
127// stack frames that have not been active.
128//
129//
130// Global writes:
131//
132// The Go garbage collector requires write barriers when heap pointers
133// are stored in globals. Many garbage collectors ignore writes to
134// globals and instead pick up global -> heap pointers during
135// termination. This increases pause time, so we instead rely on write
136// barriers for writes to globals so that we don't have to rescan
137// global during mark termination.
138//
139//
140// Publication ordering:
141//
142// The write barrier is *pre-publication*, meaning that the write
143// barrier happens prior to the *slot = ptr write that may make ptr
144// reachable by some goroutine that currently cannot reach it.
145//
146//
147//go:nowritebarrierrec
148//go:systemstack
149func gcmarkwb_m(slot *uintptr, ptr uintptr) {
150	if writeBarrier.needed {
151		// Note: This turns bad pointer writes into bad
152		// pointer reads, which could be confusing. We avoid
153		// reading from obviously bad pointers, which should
154		// take care of the vast majority of these. We could
155		// patch this up in the signal handler, or use XCHG to
156		// combine the read and the write. Checking inheap is
157		// insufficient since we need to track changes to
158		// roots outside the heap.
159		//
160		// Note: profbuf.go omits a barrier during signal handler
161		// profile logging; that's safe only because this deletion barrier exists.
162		// If we remove the deletion barrier, we'll have to work out
163		// a new way to handle the profile logging.
164		if slot1 := uintptr(unsafe.Pointer(slot)); slot1 >= minPhysPageSize {
165			if optr := *slot; optr != 0 {
166				shade(optr)
167			}
168		}
169		// TODO: Make this conditional on the caller's stack color.
170		if ptr != 0 && inheap(ptr) {
171			shade(ptr)
172		}
173	}
174}
175
176// writebarrierptr_prewrite1 invokes a write barrier for *dst = src
177// prior to the write happening.
178//
179// Write barrier calls must not happen during critical GC and scheduler
180// related operations. In particular there are times when the GC assumes
181// that the world is stopped but scheduler related code is still being
182// executed, dealing with syscalls, dealing with putting gs on runnable
183// queues and so forth. This code cannot execute write barriers because
184// the GC might drop them on the floor. Stopping the world involves removing
185// the p associated with an m. We use the fact that m.p == nil to indicate
186// that we are in one these critical section and throw if the write is of
187// a pointer to a heap object.
188//go:nosplit
189func writebarrierptr_prewrite1(dst *uintptr, src uintptr) {
190	mp := acquirem()
191	if mp.inwb || mp.dying > 0 {
192		// We explicitly allow write barriers in startpanic_m,
193		// since we're going down anyway. Ignore them here.
194		releasem(mp)
195		return
196	}
197	systemstack(func() {
198		if mp.p == 0 && memstats.enablegc && !mp.inwb && inheap(src) {
199			throw("writebarrierptr_prewrite1 called with mp.p == nil")
200		}
201		mp.inwb = true
202		gcmarkwb_m(dst, src)
203	})
204	mp.inwb = false
205	releasem(mp)
206}
207
208// NOTE: Really dst *unsafe.Pointer, src unsafe.Pointer,
209// but if we do that, Go inserts a write barrier on *dst = src.
210//go:nosplit
211func writebarrierptr(dst *uintptr, src uintptr) {
212	if writeBarrier.cgo {
213		cgoCheckWriteBarrier(dst, src)
214	}
215	if !writeBarrier.needed {
216		*dst = src
217		return
218	}
219	if src != 0 && src < minPhysPageSize {
220		systemstack(func() {
221			print("runtime: writebarrierptr *", dst, " = ", hex(src), "\n")
222			throw("bad pointer in write barrier")
223		})
224	}
225	writebarrierptr_prewrite1(dst, src)
226	*dst = src
227}
228
229// writebarrierptr_prewrite is like writebarrierptr, but the store
230// will be performed by the caller after this call. The caller must
231// not allow preemption between this call and the write.
232//
233//go:nosplit
234func writebarrierptr_prewrite(dst *uintptr, src uintptr) {
235	if writeBarrier.cgo {
236		cgoCheckWriteBarrier(dst, src)
237	}
238	if !writeBarrier.needed {
239		return
240	}
241	if src != 0 && src < minPhysPageSize {
242		systemstack(func() { throw("bad pointer in write barrier") })
243	}
244	writebarrierptr_prewrite1(dst, src)
245}
246
247// typedmemmove copies a value of type t to dst from src.
248// Must be nosplit, see #16026.
249//
250// TODO: Perfect for go:nosplitrec since we can't have a safe point
251// anywhere in the bulk barrier or memmove.
252//
253//go:nosplit
254func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
255	if typ.kind&kindNoPointers == 0 {
256		bulkBarrierPreWrite(uintptr(dst), uintptr(src), typ.size)
257	}
258	// There's a race here: if some other goroutine can write to
259	// src, it may change some pointer in src after we've
260	// performed the write barrier but before we perform the
261	// memory copy. This safe because the write performed by that
262	// other goroutine must also be accompanied by a write
263	// barrier, so at worst we've unnecessarily greyed the old
264	// pointer that was in src.
265	memmove(dst, src, typ.size)
266	if writeBarrier.cgo {
267		cgoCheckMemmove(typ, dst, src, 0, typ.size)
268	}
269}
270
271//go:linkname reflect_typedmemmove reflect.typedmemmove
272func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
273	if raceenabled {
274		raceWriteObjectPC(typ, dst, getcallerpc(), funcPC(reflect_typedmemmove))
275		raceReadObjectPC(typ, src, getcallerpc(), funcPC(reflect_typedmemmove))
276	}
277	if msanenabled {
278		msanwrite(dst, typ.size)
279		msanread(src, typ.size)
280	}
281	typedmemmove(typ, dst, src)
282}
283
284// typedmemmovepartial is like typedmemmove but assumes that
285// dst and src point off bytes into the value and only copies size bytes.
286//go:linkname reflect_typedmemmovepartial reflect.typedmemmovepartial
287func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size uintptr) {
288	if writeBarrier.needed && typ.kind&kindNoPointers == 0 && size >= sys.PtrSize {
289		// Pointer-align start address for bulk barrier.
290		adst, asrc, asize := dst, src, size
291		if frag := -off & (sys.PtrSize - 1); frag != 0 {
292			adst = add(dst, frag)
293			asrc = add(src, frag)
294			asize -= frag
295		}
296		bulkBarrierPreWrite(uintptr(adst), uintptr(asrc), asize&^(sys.PtrSize-1))
297	}
298
299	memmove(dst, src, size)
300	if writeBarrier.cgo {
301		cgoCheckMemmove(typ, dst, src, off, size)
302	}
303}
304
305//go:nosplit
306func typedslicecopy(typ *_type, dst, src slice) int {
307	// TODO(rsc): If typedslicecopy becomes faster than calling
308	// typedmemmove repeatedly, consider using during func growslice.
309	n := dst.len
310	if n > src.len {
311		n = src.len
312	}
313	if n == 0 {
314		return 0
315	}
316	dstp := dst.array
317	srcp := src.array
318
319	// The compiler emits calls to typedslicecopy before
320	// instrumentation runs, so unlike the other copying and
321	// assignment operations, it's not instrumented in the calling
322	// code and needs its own instrumentation.
323	if raceenabled {
324		callerpc := getcallerpc()
325		pc := funcPC(slicecopy)
326		racewriterangepc(dstp, uintptr(n)*typ.size, callerpc, pc)
327		racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc)
328	}
329	if msanenabled {
330		msanwrite(dstp, uintptr(n)*typ.size)
331		msanread(srcp, uintptr(n)*typ.size)
332	}
333
334	if writeBarrier.cgo {
335		cgoCheckSliceCopy(typ, dst, src, n)
336	}
337
338	// Note: No point in checking typ.kind&kindNoPointers here:
339	// compiler only emits calls to typedslicecopy for types with pointers,
340	// and growslice and reflect_typedslicecopy check for pointers
341	// before calling typedslicecopy.
342	size := uintptr(n) * typ.size
343	if writeBarrier.needed {
344		bulkBarrierPreWrite(uintptr(dstp), uintptr(srcp), size)
345	}
346	// See typedmemmove for a discussion of the race between the
347	// barrier and memmove.
348	memmove(dstp, srcp, size)
349	return n
350}
351
352//go:linkname reflect_typedslicecopy reflect.typedslicecopy
353func reflect_typedslicecopy(elemType *_type, dst, src slice) int {
354	if elemType.kind&kindNoPointers != 0 {
355		n := dst.len
356		if n > src.len {
357			n = src.len
358		}
359		if n == 0 {
360			return 0
361		}
362
363		size := uintptr(n) * elemType.size
364		if raceenabled {
365			callerpc := getcallerpc()
366			pc := funcPC(reflect_typedslicecopy)
367			racewriterangepc(dst.array, size, callerpc, pc)
368			racereadrangepc(src.array, size, callerpc, pc)
369		}
370		if msanenabled {
371			msanwrite(dst.array, size)
372			msanread(src.array, size)
373		}
374
375		memmove(dst.array, src.array, size)
376		return n
377	}
378	return typedslicecopy(elemType, dst, src)
379}
380
381// typedmemclr clears the typed memory at ptr with type typ. The
382// memory at ptr must already be initialized (and hence in type-safe
383// state). If the memory is being initialized for the first time, see
384// memclrNoHeapPointers.
385//
386// If the caller knows that typ has pointers, it can alternatively
387// call memclrHasPointers.
388//
389//go:nosplit
390func typedmemclr(typ *_type, ptr unsafe.Pointer) {
391	if typ.kind&kindNoPointers == 0 {
392		bulkBarrierPreWrite(uintptr(ptr), 0, typ.size)
393	}
394	memclrNoHeapPointers(ptr, typ.size)
395}
396
397// memclrHasPointers clears n bytes of typed memory starting at ptr.
398// The caller must ensure that the type of the object at ptr has
399// pointers, usually by checking typ.kind&kindNoPointers. However, ptr
400// does not have to point to the start of the allocation.
401//
402//go:nosplit
403func memclrHasPointers(ptr unsafe.Pointer, n uintptr) {
404	bulkBarrierPreWrite(uintptr(ptr), 0, n)
405	memclrNoHeapPointers(ptr, n)
406}
407