1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Goroutine preemption
6//
7// A goroutine can be preempted at any safe-point. Currently, there
8// are a few categories of safe-points:
9//
10// 1. A blocked safe-point occurs for the duration that a goroutine is
11//    descheduled, blocked on synchronization, or in a system call.
12//
13// 2. Synchronous safe-points occur when a running goroutine checks
14//    for a preemption request.
15//
16// 3. Asynchronous safe-points occur at any instruction in user code
17//    where the goroutine can be safely paused and a conservative
18//    stack and register scan can find stack roots. The runtime can
19//    stop a goroutine at an async safe-point using a signal.
20//
21// At both blocked and synchronous safe-points, a goroutine's CPU
22// state is minimal and the garbage collector has complete information
23// about its entire stack. This makes it possible to deschedule a
24// goroutine with minimal space, and to precisely scan a goroutine's
25// stack.
26//
27// Synchronous safe-points are implemented by overloading the stack
28// bound check in function prologues. To preempt a goroutine at the
29// next synchronous safe-point, the runtime poisons the goroutine's
30// stack bound to a value that will cause the next stack bound check
31// to fail and enter the stack growth implementation, which will
32// detect that it was actually a preemption and redirect to preemption
33// handling.
34//
35// Preemption at asynchronous safe-points is implemented by suspending
36// the thread using an OS mechanism (e.g., signals) and inspecting its
37// state to determine if the goroutine was at an asynchronous
38// safe-point. Since the thread suspension itself is generally
39// asynchronous, it also checks if the running goroutine wants to be
40// preempted, since this could have changed. If all conditions are
41// satisfied, it adjusts the signal context to make it look like the
42// signaled thread just called asyncPreempt and resumes the thread.
43// asyncPreempt spills all registers and enters the scheduler.
44//
45// (An alternative would be to preempt in the signal handler itself.
46// This would let the OS save and restore the register state and the
47// runtime would only need to know how to extract potentially
48// pointer-containing registers from the signal context. However, this
49// would consume an M for every preempted G, and the scheduler itself
50// is not designed to run from a signal handler, as it tends to
51// allocate memory and start threads in the preemption path.)
52
53package runtime
54
55import (
56	"runtime/internal/atomic"
57)
58
59type suspendGState struct {
60	g *g
61
62	// dead indicates the goroutine was not suspended because it
63	// is dead. This goroutine could be reused after the dead
64	// state was observed, so the caller must not assume that it
65	// remains dead.
66	dead bool
67
68	// stopped indicates that this suspendG transitioned the G to
69	// _Gwaiting via g.preemptStop and thus is responsible for
70	// readying it when done.
71	stopped bool
72}
73
74// suspendG suspends goroutine gp at a safe-point and returns the
75// state of the suspended goroutine. The caller gets read access to
76// the goroutine until it calls resumeG.
77//
78// It is safe for multiple callers to attempt to suspend the same
79// goroutine at the same time. The goroutine may execute between
80// subsequent successful suspend operations. The current
81// implementation grants exclusive access to the goroutine, and hence
82// multiple callers will serialize. However, the intent is to grant
83// shared read access, so please don't depend on exclusive access.
84//
85// This must be called from the system stack and the user goroutine on
86// the current M (if any) must be in a preemptible state. This
87// prevents deadlocks where two goroutines attempt to suspend each
88// other and both are in non-preemptible states. There are other ways
89// to resolve this deadlock, but this seems simplest.
90//
91// TODO(austin): What if we instead required this to be called from a
92// user goroutine? Then we could deschedule the goroutine while
93// waiting instead of blocking the thread. If two goroutines tried to
94// suspend each other, one of them would win and the other wouldn't
95// complete the suspend until it was resumed. We would have to be
96// careful that they couldn't actually queue up suspend for each other
97// and then both be suspended. This would also avoid the need for a
98// kernel context switch in the synchronous case because we could just
99// directly schedule the waiter. The context switch is unavoidable in
100// the signal case.
101//
102//go:systemstack
103func suspendG(gp *g) suspendGState {
104	if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning {
105		// Since we're on the system stack of this M, the user
106		// G is stuck at an unsafe point. If another goroutine
107		// were to try to preempt m.curg, it could deadlock.
108		throw("suspendG from non-preemptible goroutine")
109	}
110
111	// See https://golang.org/cl/21503 for justification of the yield delay.
112	const yieldDelay = 10 * 1000
113	var nextYield int64
114
115	// Drive the goroutine to a preemption point.
116	stopped := false
117	var asyncM *m
118	var asyncGen uint32
119	var nextPreemptM int64
120	for i := 0; ; i++ {
121		switch s := readgstatus(gp); s {
122		default:
123			if s&_Gscan != 0 {
124				// Someone else is suspending it. Wait
125				// for them to finish.
126				//
127				// TODO: It would be nicer if we could
128				// coalesce suspends.
129				break
130			}
131
132			dumpgstatus(gp)
133			throw("invalid g status")
134
135		case _Gdead:
136			// Nothing to suspend.
137			//
138			// preemptStop may need to be cleared, but
139			// doing that here could race with goroutine
140			// reuse. Instead, goexit0 clears it.
141			return suspendGState{dead: true}
142
143		case _Gcopystack:
144			// The stack is being copied. We need to wait
145			// until this is done.
146
147		case _Gexitingsyscall:
148			// This is a transient state.  Try again.
149
150		case _Gpreempted:
151			// We (or someone else) suspended the G. Claim
152			// ownership of it by transitioning it to
153			// _Gwaiting.
154			if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) {
155				break
156			}
157
158			// We stopped the G, so we have to ready it later.
159			stopped = true
160
161			s = _Gwaiting
162			fallthrough
163
164		case _Grunnable, _Gsyscall, _Gwaiting:
165			// Claim goroutine by setting scan bit.
166			// This may race with execution or readying of gp.
167			// The scan bit keeps it from transition state.
168			if !castogscanstatus(gp, s, s|_Gscan) {
169				break
170			}
171
172			// Clear the preemption request. It's safe to
173			// reset the stack guard because we hold the
174			// _Gscan bit and thus own the stack.
175			gp.preemptStop = false
176			gp.preempt = false
177
178			// The goroutine was already at a safe-point
179			// and we've now locked that in.
180			//
181			// TODO: It would be much better if we didn't
182			// leave it in _Gscan, but instead gently
183			// prevented its scheduling until resumption.
184			// Maybe we only use this to bump a suspended
185			// count and the scheduler skips suspended
186			// goroutines? That wouldn't be enough for
187			// {_Gsyscall,_Gwaiting} -> _Grunning. Maybe
188			// for all those transitions we need to check
189			// suspended and deschedule?
190			return suspendGState{g: gp, stopped: stopped}
191
192		case _Grunning:
193			// Optimization: if there is already a pending preemption request
194			// (from the previous loop iteration), don't bother with the atomics.
195			if asyncM != nil && gp.preemptStop && gp.preempt && asyncM == gp.m && atomic.Load(&asyncM.preemptGen) == asyncGen {
196				break
197			}
198
199			// Temporarily block state transitions.
200			if !castogscanstatus(gp, _Grunning, _Gscanrunning) {
201				break
202			}
203
204			// Request synchronous preemption.
205			gp.preemptStop = true
206			gp.preempt = true
207
208			// Prepare for asynchronous preemption.
209			asyncM2 := gp.m
210			asyncGen2 := atomic.Load(&asyncM2.preemptGen)
211			needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
212			asyncM = asyncM2
213			asyncGen = asyncGen2
214
215			casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
216
217			// Send asynchronous preemption. We do this
218			// after CASing the G back to _Grunning
219			// because preemptM may be synchronous and we
220			// don't want to catch the G just spinning on
221			// its status.
222			if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
223				// Rate limit preemptM calls. This is
224				// particularly important on Windows
225				// where preemptM is actually
226				// synchronous and the spin loop here
227				// can lead to live-lock.
228				now := nanotime()
229				if now >= nextPreemptM {
230					nextPreemptM = now + yieldDelay/2
231					preemptM(asyncM)
232				}
233			}
234		}
235
236		// TODO: Don't busy wait. This loop should really only
237		// be a simple read/decide/CAS loop that only fails if
238		// there's an active race. Once the CAS succeeds, we
239		// should queue up the preemption (which will require
240		// it to be reliable in the _Grunning case, not
241		// best-effort) and then sleep until we're notified
242		// that the goroutine is suspended.
243		if i == 0 {
244			nextYield = nanotime() + yieldDelay
245		}
246		if nanotime() < nextYield {
247			procyield(10)
248		} else {
249			osyield()
250			nextYield = nanotime() + yieldDelay/2
251		}
252	}
253}
254
255// resumeG undoes the effects of suspendG, allowing the suspended
256// goroutine to continue from its current safe-point.
257func resumeG(state suspendGState) {
258	if state.dead {
259		// We didn't actually stop anything.
260		return
261	}
262
263	gp := state.g
264	switch s := readgstatus(gp); s {
265	default:
266		dumpgstatus(gp)
267		throw("unexpected g status")
268
269	case _Grunnable | _Gscan,
270		_Gwaiting | _Gscan,
271		_Gsyscall | _Gscan:
272		casfrom_Gscanstatus(gp, s, s&^_Gscan)
273	}
274
275	if state.stopped {
276		// We stopped it, so we need to re-schedule it.
277		ready(gp, 0, true)
278	}
279}
280
281// canPreemptM reports whether mp is in a state that is safe to preempt.
282//
283// It is nosplit because it has nosplit callers.
284//
285//go:nosplit
286func canPreemptM(mp *m) bool {
287	return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning
288}
289
290//go:generate go run mkpreempt.go
291
292// asyncPreempt saves all user registers and calls asyncPreempt2.
293//
294// When stack scanning encounters an asyncPreempt frame, it scans that
295// frame and its parent frame conservatively.
296//
297// asyncPreempt is implemented in assembly.
298func asyncPreempt()
299
300//go:nosplit
301func asyncPreempt2() {
302	gp := getg()
303	gp.asyncSafePoint = true
304	if gp.preemptStop {
305		mcall(preemptPark)
306	} else {
307		mcall(gopreempt_m)
308	}
309	gp.asyncSafePoint = false
310}
311
312// wantAsyncPreempt returns whether an asynchronous preemption is
313// queued for gp.
314func wantAsyncPreempt(gp *g) bool {
315	// Check both the G and the P.
316	return (gp.preempt || gp.m.p != 0 && gp.m.p.ptr().preempt) && readgstatus(gp)&^_Gscan == _Grunning
317}
318
319// isAsyncSafePoint reports whether gp at instruction PC is an
320// asynchronous safe point. This indicates that:
321//
322// 1. It's safe to suspend gp and conservatively scan its stack and
323// registers. There are no potentially hidden pointer values and it's
324// not in the middle of an atomic sequence like a write barrier.
325//
326// 2. gp has enough stack space to inject the asyncPreempt call.
327//
328// 3. It's generally safe to interact with the runtime, even if we're
329// in a signal handler stopped here. For example, there are no runtime
330// locks held, so acquiring a runtime lock won't self-deadlock.
331//
332// In some cases the PC is safe for asynchronous preemption but it
333// also needs to adjust the resumption PC. The new PC is returned in
334// the second result.
335func isAsyncSafePoint(gp *g, pc uintptr) (bool, uintptr) {
336	mp := gp.m
337
338	// Only user Gs can have safe-points. We check this first
339	// because it's extremely common that we'll catch mp in the
340	// scheduler processing this G preemption.
341	if mp.curg != gp {
342		return false, 0
343	}
344
345	// Check M state.
346	if mp.p == 0 || !canPreemptM(mp) {
347		return false, 0
348	}
349
350	// Check if PC is an unsafe-point.
351	f := FuncForPC(pc)
352	if f == nil {
353		// Not Go code.
354		return false, 0
355	}
356	name := f.Name()
357	if hasPrefix(name, "runtime.") ||
358		hasPrefix(name, "runtime_1internal_1") ||
359		hasPrefix(name, "reflect.") {
360		// For now we never async preempt the runtime or
361		// anything closely tied to the runtime. Known issues
362		// include: various points in the scheduler ("don't
363		// preempt between here and here"), much of the defer
364		// implementation (untyped info on stack), bulk write
365		// barriers (write barrier check),
366		// reflect.{makeFuncStub,methodValueCall}.
367		//
368		// TODO(austin): We should improve this, or opt things
369		// in incrementally.
370		return false, 0
371	}
372	return true, pc
373}
374