1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime
6
7import (
8	"runtime/internal/atomic"
9	"runtime/internal/sys"
10	"unsafe"
11)
12
13// Functions called by C code.
14//go:linkname main runtime.main
15//go:linkname goparkunlock runtime.goparkunlock
16//go:linkname newextram runtime.newextram
17//go:linkname acquirep runtime.acquirep
18//go:linkname releasep runtime.releasep
19//go:linkname incidlelocked runtime.incidlelocked
20//go:linkname schedinit runtime.schedinit
21//go:linkname ready runtime.ready
22//go:linkname gcprocs runtime.gcprocs
23//go:linkname stopm runtime.stopm
24//go:linkname handoffp runtime.handoffp
25//go:linkname wakep runtime.wakep
26//go:linkname stoplockedm runtime.stoplockedm
27//go:linkname schedule runtime.schedule
28//go:linkname execute runtime.execute
29//go:linkname goexit1 runtime.goexit1
30//go:linkname reentersyscall runtime.reentersyscall
31//go:linkname reentersyscallblock runtime.reentersyscallblock
32//go:linkname exitsyscall runtime.exitsyscall
33//go:linkname gfget runtime.gfget
34//go:linkname helpgc runtime.helpgc
35//go:linkname kickoff runtime.kickoff
36//go:linkname mstart1 runtime.mstart1
37//go:linkname mexit runtime.mexit
38//go:linkname globrunqput runtime.globrunqput
39//go:linkname pidleget runtime.pidleget
40
41// Exported for test (see runtime/testdata/testprogcgo/dropm_stub.go).
42//go:linkname getm runtime.getm
43
44// Function called by misc/cgo/test.
45//go:linkname lockedOSThread runtime.lockedOSThread
46
47// C functions for thread and context management.
48func newosproc(*m)
49
50//go:noescape
51func malg(bool, bool, *unsafe.Pointer, *uintptr) *g
52
53//go:noescape
54func resetNewG(*g, *unsafe.Pointer, *uintptr)
55func gogo(*g)
56func setGContext()
57func makeGContext(*g, unsafe.Pointer, uintptr)
58func getTraceback(me, gp *g)
59func gtraceback(*g)
60func _cgo_notify_runtime_init_done()
61func alreadyInCallers() bool
62func stackfree(*g)
63
64// Functions created by the compiler.
65//extern __go_init_main
66func main_init()
67
68//extern main.main
69func main_main()
70
71var buildVersion = sys.TheVersion
72
73// Goroutine scheduler
74// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
75//
76// The main concepts are:
77// G - goroutine.
78// M - worker thread, or machine.
79// P - processor, a resource that is required to execute Go code.
80//     M must have an associated P to execute Go code, however it can be
81//     blocked or in a syscall w/o an associated P.
82//
83// Design doc at https://golang.org/s/go11sched.
84
85// Worker thread parking/unparking.
86// We need to balance between keeping enough running worker threads to utilize
87// available hardware parallelism and parking excessive running worker threads
88// to conserve CPU resources and power. This is not simple for two reasons:
89// (1) scheduler state is intentionally distributed (in particular, per-P work
90// queues), so it is not possible to compute global predicates on fast paths;
91// (2) for optimal thread management we would need to know the future (don't park
92// a worker thread when a new goroutine will be readied in near future).
93//
94// Three rejected approaches that would work badly:
95// 1. Centralize all scheduler state (would inhibit scalability).
96// 2. Direct goroutine handoff. That is, when we ready a new goroutine and there
97//    is a spare P, unpark a thread and handoff it the thread and the goroutine.
98//    This would lead to thread state thrashing, as the thread that readied the
99//    goroutine can be out of work the very next moment, we will need to park it.
100//    Also, it would destroy locality of computation as we want to preserve
101//    dependent goroutines on the same thread; and introduce additional latency.
102// 3. Unpark an additional thread whenever we ready a goroutine and there is an
103//    idle P, but don't do handoff. This would lead to excessive thread parking/
104//    unparking as the additional threads will instantly park without discovering
105//    any work to do.
106//
107// The current approach:
108// We unpark an additional thread when we ready a goroutine if (1) there is an
109// idle P and there are no "spinning" worker threads. A worker thread is considered
110// spinning if it is out of local work and did not find work in global run queue/
111// netpoller; the spinning state is denoted in m.spinning and in sched.nmspinning.
112// Threads unparked this way are also considered spinning; we don't do goroutine
113// handoff so such threads are out of work initially. Spinning threads do some
114// spinning looking for work in per-P run queues before parking. If a spinning
115// thread finds work it takes itself out of the spinning state and proceeds to
116// execution. If it does not find work it takes itself out of the spinning state
117// and then parks.
118// If there is at least one spinning thread (sched.nmspinning>1), we don't unpark
119// new threads when readying goroutines. To compensate for that, if the last spinning
120// thread finds work and stops spinning, it must unpark a new spinning thread.
121// This approach smooths out unjustified spikes of thread unparking,
122// but at the same time guarantees eventual maximal CPU parallelism utilization.
123//
124// The main implementation complication is that we need to be very careful during
125// spinning->non-spinning thread transition. This transition can race with submission
126// of a new goroutine, and either one part or another needs to unpark another worker
127// thread. If they both fail to do that, we can end up with semi-persistent CPU
128// underutilization. The general pattern for goroutine readying is: submit a goroutine
129// to local work queue, #StoreLoad-style memory barrier, check sched.nmspinning.
130// The general pattern for spinning->non-spinning transition is: decrement nmspinning,
131// #StoreLoad-style memory barrier, check all per-P work queues for new work.
132// Note that all this complexity does not apply to global run queue as we are not
133// sloppy about thread unparking when submitting to global queue. Also see comments
134// for nmspinning manipulation.
135
136var (
137	m0 m
138	g0 g
139)
140
141// main_init_done is a signal used by cgocallbackg that initialization
142// has been completed. It is made before _cgo_notify_runtime_init_done,
143// so all cgo calls can rely on it existing. When main_init is complete,
144// it is closed, meaning cgocallbackg can reliably receive from it.
145var main_init_done chan bool
146
147// mainStarted indicates that the main M has started.
148var mainStarted bool
149
150// runtimeInitTime is the nanotime() at which the runtime started.
151var runtimeInitTime int64
152
153// Value to use for signal mask for newly created M's.
154var initSigmask sigset
155
156// The main goroutine.
157func main() {
158	g := getg()
159
160	// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
161	// Using decimal instead of binary GB and MB because
162	// they look nicer in the stack overflow failure message.
163	if sys.PtrSize == 8 {
164		maxstacksize = 1000000000
165	} else {
166		maxstacksize = 250000000
167	}
168
169	// Allow newproc to start new Ms.
170	mainStarted = true
171
172	systemstack(func() {
173		newm(sysmon, nil)
174	})
175
176	// Lock the main goroutine onto this, the main OS thread,
177	// during initialization. Most programs won't care, but a few
178	// do require certain calls to be made by the main thread.
179	// Those can arrange for main.main to run in the main thread
180	// by calling runtime.LockOSThread during initialization
181	// to preserve the lock.
182	lockOSThread()
183
184	if g.m != &m0 {
185		throw("runtime.main not on m0")
186	}
187
188	// Defer unlock so that runtime.Goexit during init does the unlock too.
189	needUnlock := true
190	defer func() {
191		if needUnlock {
192			unlockOSThread()
193		}
194	}()
195
196	// Record when the world started. Must be after runtime_init
197	// because nanotime on some platforms depends on startNano.
198	runtimeInitTime = nanotime()
199
200	main_init_done = make(chan bool)
201	if iscgo {
202		// Start the template thread in case we enter Go from
203		// a C-created thread and need to create a new thread.
204		startTemplateThread()
205		_cgo_notify_runtime_init_done()
206	}
207
208	fn := main_init // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
209	fn()
210	close(main_init_done)
211
212	needUnlock = false
213	unlockOSThread()
214
215	// For gccgo we have to wait until after main is initialized
216	// to enable GC, because initializing main registers the GC roots.
217	gcenable()
218
219	if isarchive || islibrary {
220		// A program compiled with -buildmode=c-archive or c-shared
221		// has a main, but it is not executed.
222		return
223	}
224	fn = main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
225	fn()
226	if raceenabled {
227		racefini()
228	}
229
230	// Make racy client program work: if panicking on
231	// another goroutine at the same time as main returns,
232	// let the other goroutine finish printing the panic trace.
233	// Once it does, it will exit. See issues 3934 and 20018.
234	if atomic.Load(&runningPanicDefers) != 0 {
235		// Running deferred functions should not take long.
236		for c := 0; c < 1000; c++ {
237			if atomic.Load(&runningPanicDefers) == 0 {
238				break
239			}
240			Gosched()
241		}
242	}
243	if atomic.Load(&panicking) != 0 {
244		gopark(nil, nil, "panicwait", traceEvGoStop, 1)
245	}
246
247	exit(0)
248	for {
249		var x *int32
250		*x = 0
251	}
252}
253
254// os_beforeExit is called from os.Exit(0).
255//go:linkname os_beforeExit os.runtime_beforeExit
256func os_beforeExit() {
257	if raceenabled {
258		racefini()
259	}
260}
261
262// start forcegc helper goroutine
263func init() {
264	expectSystemGoroutine()
265	go forcegchelper()
266}
267
268func forcegchelper() {
269	setSystemGoroutine()
270
271	forcegc.g = getg()
272	for {
273		lock(&forcegc.lock)
274		if forcegc.idle != 0 {
275			throw("forcegc: phase error")
276		}
277		atomic.Store(&forcegc.idle, 1)
278		goparkunlock(&forcegc.lock, "force gc (idle)", traceEvGoBlock, 1)
279		// this goroutine is explicitly resumed by sysmon
280		if debug.gctrace > 0 {
281			println("GC forced")
282		}
283		// Time-triggered, fully concurrent.
284		gcStart(gcBackgroundMode, gcTrigger{kind: gcTriggerTime, now: nanotime()})
285	}
286}
287
288//go:nosplit
289
290// Gosched yields the processor, allowing other goroutines to run. It does not
291// suspend the current goroutine, so execution resumes automatically.
292func Gosched() {
293	mcall(gosched_m)
294}
295
296// goschedguarded yields the processor like gosched, but also checks
297// for forbidden states and opts out of the yield in those cases.
298//go:nosplit
299func goschedguarded() {
300	mcall(goschedguarded_m)
301}
302
303// Puts the current goroutine into a waiting state and calls unlockf.
304// If unlockf returns false, the goroutine is resumed.
305// unlockf must not access this G's stack, as it may be moved between
306// the call to gopark and the call to unlockf.
307func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string, traceEv byte, traceskip int) {
308	mp := acquirem()
309	gp := mp.curg
310	status := readgstatus(gp)
311	if status != _Grunning && status != _Gscanrunning {
312		throw("gopark: bad g status")
313	}
314	mp.waitlock = lock
315	mp.waitunlockf = *(*unsafe.Pointer)(unsafe.Pointer(&unlockf))
316	gp.waitreason = reason
317	mp.waittraceev = traceEv
318	mp.waittraceskip = traceskip
319	releasem(mp)
320	// can't do anything that might move the G between Ms here.
321	mcall(park_m)
322}
323
324// Puts the current goroutine into a waiting state and unlocks the lock.
325// The goroutine can be made runnable again by calling goready(gp).
326func goparkunlock(lock *mutex, reason string, traceEv byte, traceskip int) {
327	gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceEv, traceskip)
328}
329
330func goready(gp *g, traceskip int) {
331	systemstack(func() {
332		ready(gp, traceskip, true)
333	})
334}
335
336//go:nosplit
337func acquireSudog() *sudog {
338	// Delicate dance: the semaphore implementation calls
339	// acquireSudog, acquireSudog calls new(sudog),
340	// new calls malloc, malloc can call the garbage collector,
341	// and the garbage collector calls the semaphore implementation
342	// in stopTheWorld.
343	// Break the cycle by doing acquirem/releasem around new(sudog).
344	// The acquirem/releasem increments m.locks during new(sudog),
345	// which keeps the garbage collector from being invoked.
346	mp := acquirem()
347	pp := mp.p.ptr()
348	if len(pp.sudogcache) == 0 {
349		lock(&sched.sudoglock)
350		// First, try to grab a batch from central cache.
351		for len(pp.sudogcache) < cap(pp.sudogcache)/2 && sched.sudogcache != nil {
352			s := sched.sudogcache
353			sched.sudogcache = s.next
354			s.next = nil
355			pp.sudogcache = append(pp.sudogcache, s)
356		}
357		unlock(&sched.sudoglock)
358		// If the central cache is empty, allocate a new one.
359		if len(pp.sudogcache) == 0 {
360			pp.sudogcache = append(pp.sudogcache, new(sudog))
361		}
362	}
363	n := len(pp.sudogcache)
364	s := pp.sudogcache[n-1]
365	pp.sudogcache[n-1] = nil
366	pp.sudogcache = pp.sudogcache[:n-1]
367	if s.elem != nil {
368		throw("acquireSudog: found s.elem != nil in cache")
369	}
370	releasem(mp)
371	return s
372}
373
374//go:nosplit
375func releaseSudog(s *sudog) {
376	if s.elem != nil {
377		throw("runtime: sudog with non-nil elem")
378	}
379	if s.isSelect {
380		throw("runtime: sudog with non-false isSelect")
381	}
382	if s.next != nil {
383		throw("runtime: sudog with non-nil next")
384	}
385	if s.prev != nil {
386		throw("runtime: sudog with non-nil prev")
387	}
388	if s.waitlink != nil {
389		throw("runtime: sudog with non-nil waitlink")
390	}
391	if s.c != nil {
392		throw("runtime: sudog with non-nil c")
393	}
394	gp := getg()
395	if gp.param != nil {
396		throw("runtime: releaseSudog with non-nil gp.param")
397	}
398	mp := acquirem() // avoid rescheduling to another P
399	pp := mp.p.ptr()
400	if len(pp.sudogcache) == cap(pp.sudogcache) {
401		// Transfer half of local cache to the central cache.
402		var first, last *sudog
403		for len(pp.sudogcache) > cap(pp.sudogcache)/2 {
404			n := len(pp.sudogcache)
405			p := pp.sudogcache[n-1]
406			pp.sudogcache[n-1] = nil
407			pp.sudogcache = pp.sudogcache[:n-1]
408			if first == nil {
409				first = p
410			} else {
411				last.next = p
412			}
413			last = p
414		}
415		lock(&sched.sudoglock)
416		last.next = sched.sudogcache
417		sched.sudogcache = first
418		unlock(&sched.sudoglock)
419	}
420	pp.sudogcache = append(pp.sudogcache, s)
421	releasem(mp)
422}
423
424// funcPC returns the entry PC of the function f.
425// It assumes that f is a func value. Otherwise the behavior is undefined.
426// CAREFUL: In programs with plugins, funcPC can return different values
427// for the same function (because there are actually multiple copies of
428// the same function in the address space). To be safe, don't use the
429// results of this function in any == expression. It is only safe to
430// use the result as an address at which to start executing code.
431//
432// For gccgo note that this differs from the gc implementation; the gc
433// implementation adds sys.PtrSize to the address of the interface
434// value, but GCC's alias analysis decides that that can not be a
435// reference to the second field of the interface, and in some cases
436// it drops the initialization of the second field as a dead store.
437//go:nosplit
438func funcPC(f interface{}) uintptr {
439	i := (*iface)(unsafe.Pointer(&f))
440	return **(**uintptr)(i.data)
441}
442
443func lockedOSThread() bool {
444	gp := getg()
445	return gp.lockedm != 0 && gp.m.lockedg != 0
446}
447
448var (
449	allgs    []*g
450	allglock mutex
451)
452
453func allgadd(gp *g) {
454	if readgstatus(gp) == _Gidle {
455		throw("allgadd: bad status Gidle")
456	}
457
458	lock(&allglock)
459	allgs = append(allgs, gp)
460	allglen = uintptr(len(allgs))
461	unlock(&allglock)
462}
463
464const (
465	// Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
466	// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
467	_GoidCacheBatch = 16
468)
469
470// The bootstrap sequence is:
471//
472//	call osinit
473//	call schedinit
474//	make & queue new G
475//	call runtime·mstart
476//
477// The new G calls runtime·main.
478func schedinit() {
479	_m_ := &m0
480	_g_ := &g0
481	_m_.g0 = _g_
482	_m_.curg = _g_
483	_g_.m = _m_
484	setg(_g_)
485
486	sched.maxmcount = 10000
487
488	mallocinit()
489	mcommoninit(_g_.m)
490	alginit() // maps must not be used before this call
491
492	msigsave(_g_.m)
493	initSigmask = _g_.m.sigmask
494
495	goargs()
496	goenvs()
497	parsedebugvars()
498	gcinit()
499
500	sched.lastpoll = uint64(nanotime())
501	procs := ncpu
502	if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
503		procs = n
504	}
505	if procresize(procs) != nil {
506		throw("unknown runnable goroutine during bootstrap")
507	}
508
509	// For cgocheck > 1, we turn on the write barrier at all times
510	// and check all pointer writes. We can't do this until after
511	// procresize because the write barrier needs a P.
512	if debug.cgocheck > 1 {
513		writeBarrier.cgo = true
514		writeBarrier.enabled = true
515		for _, p := range allp {
516			p.wbBuf.reset()
517		}
518	}
519
520	if buildVersion == "" {
521		// Condition should never trigger. This code just serves
522		// to ensure runtime·buildVersion is kept in the resulting binary.
523		buildVersion = "unknown"
524	}
525}
526
527func dumpgstatus(gp *g) {
528	_g_ := getg()
529	print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
530	print("runtime:  g:  g=", _g_, ", goid=", _g_.goid, ",  g->atomicstatus=", readgstatus(_g_), "\n")
531}
532
533func checkmcount() {
534	// sched lock is held
535	if mcount() > sched.maxmcount {
536		print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n")
537		throw("thread exhaustion")
538	}
539}
540
541func mcommoninit(mp *m) {
542	_g_ := getg()
543
544	// g0 stack won't make sense for user (and is not necessary unwindable).
545	if _g_ != _g_.m.g0 {
546		callers(1, mp.createstack[:])
547	}
548
549	lock(&sched.lock)
550	if sched.mnext+1 < sched.mnext {
551		throw("runtime: thread ID overflow")
552	}
553	mp.id = sched.mnext
554	sched.mnext++
555	checkmcount()
556
557	mp.fastrand[0] = 1597334677 * uint32(mp.id)
558	mp.fastrand[1] = uint32(cputicks())
559	if mp.fastrand[0]|mp.fastrand[1] == 0 {
560		mp.fastrand[1] = 1
561	}
562
563	mpreinit(mp)
564
565	// Add to allm so garbage collector doesn't free g->m
566	// when it is just in a register or thread-local storage.
567	mp.alllink = allm
568
569	// NumCgoCall() iterates over allm w/o schedlock,
570	// so we need to publish it safely.
571	atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp))
572	unlock(&sched.lock)
573}
574
575// Mark gp ready to run.
576func ready(gp *g, traceskip int, next bool) {
577	if trace.enabled {
578		traceGoUnpark(gp, traceskip)
579	}
580
581	status := readgstatus(gp)
582
583	// Mark runnable.
584	_g_ := getg()
585	_g_.m.locks++ // disable preemption because it can be holding p in a local var
586	if status&^_Gscan != _Gwaiting {
587		dumpgstatus(gp)
588		throw("bad g->status in ready")
589	}
590
591	// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
592	casgstatus(gp, _Gwaiting, _Grunnable)
593	runqput(_g_.m.p.ptr(), gp, next)
594	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
595		wakep()
596	}
597	_g_.m.locks--
598}
599
600func gcprocs() int32 {
601	// Figure out how many CPUs to use during GC.
602	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
603	lock(&sched.lock)
604	n := gomaxprocs
605	if n > ncpu {
606		n = ncpu
607	}
608	if n > _MaxGcproc {
609		n = _MaxGcproc
610	}
611	if n > sched.nmidle+1 { // one M is currently running
612		n = sched.nmidle + 1
613	}
614	unlock(&sched.lock)
615	return n
616}
617
618func needaddgcproc() bool {
619	lock(&sched.lock)
620	n := gomaxprocs
621	if n > ncpu {
622		n = ncpu
623	}
624	if n > _MaxGcproc {
625		n = _MaxGcproc
626	}
627	n -= sched.nmidle + 1 // one M is currently running
628	unlock(&sched.lock)
629	return n > 0
630}
631
632func helpgc(nproc int32) {
633	_g_ := getg()
634	lock(&sched.lock)
635	pos := 0
636	for n := int32(1); n < nproc; n++ { // one M is currently running
637		if allp[pos].mcache == _g_.m.mcache {
638			pos++
639		}
640		mp := mget()
641		if mp == nil {
642			throw("gcprocs inconsistency")
643		}
644		mp.helpgc = n
645		mp.p.set(allp[pos])
646		mp.mcache = allp[pos].mcache
647		pos++
648		notewakeup(&mp.park)
649	}
650	unlock(&sched.lock)
651}
652
653// freezeStopWait is a large value that freezetheworld sets
654// sched.stopwait to in order to request that all Gs permanently stop.
655const freezeStopWait = 0x7fffffff
656
657// freezing is set to non-zero if the runtime is trying to freeze the
658// world.
659var freezing uint32
660
661// Similar to stopTheWorld but best-effort and can be called several times.
662// There is no reverse operation, used during crashing.
663// This function must not lock any mutexes.
664func freezetheworld() {
665	atomic.Store(&freezing, 1)
666	// stopwait and preemption requests can be lost
667	// due to races with concurrently executing threads,
668	// so try several times
669	for i := 0; i < 5; i++ {
670		// this should tell the scheduler to not start any new goroutines
671		sched.stopwait = freezeStopWait
672		atomic.Store(&sched.gcwaiting, 1)
673		// this should stop running goroutines
674		if !preemptall() {
675			break // no running goroutines
676		}
677		usleep(1000)
678	}
679	// to be sure
680	usleep(1000)
681	preemptall()
682	usleep(1000)
683}
684
685func isscanstatus(status uint32) bool {
686	if status == _Gscan {
687		throw("isscanstatus: Bad status Gscan")
688	}
689	return status&_Gscan == _Gscan
690}
691
692// All reads and writes of g's status go through readgstatus, casgstatus
693// castogscanstatus, casfrom_Gscanstatus.
694//go:nosplit
695func readgstatus(gp *g) uint32 {
696	return atomic.Load(&gp.atomicstatus)
697}
698
699// Ownership of gcscanvalid:
700//
701// If gp is running (meaning status == _Grunning or _Grunning|_Gscan),
702// then gp owns gp.gcscanvalid, and other goroutines must not modify it.
703//
704// Otherwise, a second goroutine can lock the scan state by setting _Gscan
705// in the status bit and then modify gcscanvalid, and then unlock the scan state.
706//
707// Note that the first condition implies an exception to the second:
708// if a second goroutine changes gp's status to _Grunning|_Gscan,
709// that second goroutine still does not have the right to modify gcscanvalid.
710
711// The Gscanstatuses are acting like locks and this releases them.
712// If it proves to be a performance hit we should be able to make these
713// simple atomic stores but for now we are going to throw if
714// we see an inconsistent state.
715func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
716	success := false
717
718	// Check that transition is valid.
719	switch oldval {
720	default:
721		print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
722		dumpgstatus(gp)
723		throw("casfrom_Gscanstatus:top gp->status is not in scan state")
724	case _Gscanrunnable,
725		_Gscanwaiting,
726		_Gscanrunning,
727		_Gscansyscall:
728		if newval == oldval&^_Gscan {
729			success = atomic.Cas(&gp.atomicstatus, oldval, newval)
730		}
731	}
732	if !success {
733		print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
734		dumpgstatus(gp)
735		throw("casfrom_Gscanstatus: gp->status is not in scan state")
736	}
737}
738
739// This will return false if the gp is not in the expected status and the cas fails.
740// This acts like a lock acquire while the casfromgstatus acts like a lock release.
741func castogscanstatus(gp *g, oldval, newval uint32) bool {
742	switch oldval {
743	case _Grunnable,
744		_Grunning,
745		_Gwaiting,
746		_Gsyscall:
747		if newval == oldval|_Gscan {
748			return atomic.Cas(&gp.atomicstatus, oldval, newval)
749		}
750	}
751	print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n")
752	throw("castogscanstatus")
753	panic("not reached")
754}
755
756// If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
757// and casfrom_Gscanstatus instead.
758// casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that
759// put it in the Gscan state is finished.
760//go:nosplit
761func casgstatus(gp *g, oldval, newval uint32) {
762	if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval {
763		systemstack(func() {
764			print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n")
765			throw("casgstatus: bad incoming values")
766		})
767	}
768
769	if oldval == _Grunning && gp.gcscanvalid {
770		// If oldvall == _Grunning, then the actual status must be
771		// _Grunning or _Grunning|_Gscan; either way,
772		// we own gp.gcscanvalid, so it's safe to read.
773		// gp.gcscanvalid must not be true when we are running.
774		systemstack(func() {
775			print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n")
776			throw("casgstatus")
777		})
778	}
779
780	// See http://golang.org/cl/21503 for justification of the yield delay.
781	const yieldDelay = 5 * 1000
782	var nextYield int64
783
784	// loop if gp->atomicstatus is in a scan state giving
785	// GC time to finish and change the state to oldval.
786	for i := 0; !atomic.Cas(&gp.atomicstatus, oldval, newval); i++ {
787		if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
788			systemstack(func() {
789				throw("casgstatus: waiting for Gwaiting but is Grunnable")
790			})
791		}
792		// Help GC if needed.
793		// if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) {
794		// 	gp.preemptscan = false
795		// 	systemstack(func() {
796		// 		gcphasework(gp)
797		// 	})
798		// }
799		// But meanwhile just yield.
800		if i == 0 {
801			nextYield = nanotime() + yieldDelay
802		}
803		if nanotime() < nextYield {
804			for x := 0; x < 10 && gp.atomicstatus != oldval; x++ {
805				procyield(1)
806			}
807		} else {
808			osyield()
809			nextYield = nanotime() + yieldDelay/2
810		}
811	}
812	if newval == _Grunning {
813		gp.gcscanvalid = false
814	}
815}
816
817// scang blocks until gp's stack has been scanned.
818// It might be scanned by scang or it might be scanned by the goroutine itself.
819// Either way, the stack scan has completed when scang returns.
820func scang(gp *g, gcw *gcWork) {
821	// Invariant; we (the caller, markroot for a specific goroutine) own gp.gcscandone.
822	// Nothing is racing with us now, but gcscandone might be set to true left over
823	// from an earlier round of stack scanning (we scan twice per GC).
824	// We use gcscandone to record whether the scan has been done during this round.
825
826	gp.gcscandone = false
827
828	// See http://golang.org/cl/21503 for justification of the yield delay.
829	const yieldDelay = 10 * 1000
830	var nextYield int64
831
832	// Endeavor to get gcscandone set to true,
833	// either by doing the stack scan ourselves or by coercing gp to scan itself.
834	// gp.gcscandone can transition from false to true when we're not looking
835	// (if we asked for preemption), so any time we lock the status using
836	// castogscanstatus we have to double-check that the scan is still not done.
837loop:
838	for i := 0; !gp.gcscandone; i++ {
839		switch s := readgstatus(gp); s {
840		default:
841			dumpgstatus(gp)
842			throw("stopg: invalid status")
843
844		case _Gdead:
845			// No stack.
846			gp.gcscandone = true
847			break loop
848
849		case _Gcopystack:
850		// Stack being switched. Go around again.
851
852		case _Grunnable, _Gsyscall, _Gwaiting:
853			// Claim goroutine by setting scan bit.
854			// Racing with execution or readying of gp.
855			// The scan bit keeps them from running
856			// the goroutine until we're done.
857			if castogscanstatus(gp, s, s|_Gscan) {
858				if gp.scanningself {
859					// Don't try to scan the stack
860					// if the goroutine is going to do
861					// it itself.
862					restartg(gp)
863					break
864				}
865				if !gp.gcscandone {
866					scanstack(gp, gcw)
867					gp.gcscandone = true
868				}
869				restartg(gp)
870				break loop
871			}
872
873		case _Gscanwaiting:
874			// newstack is doing a scan for us right now. Wait.
875
876		case _Gscanrunning:
877			// checkPreempt is scanning. Wait.
878
879		case _Grunning:
880			// Goroutine running. Try to preempt execution so it can scan itself.
881			// The preemption handler (in newstack) does the actual scan.
882
883			// Optimization: if there is already a pending preemption request
884			// (from the previous loop iteration), don't bother with the atomics.
885			if gp.preemptscan && gp.preempt {
886				break
887			}
888
889			// Ask for preemption and self scan.
890			if castogscanstatus(gp, _Grunning, _Gscanrunning) {
891				if !gp.gcscandone {
892					gp.preemptscan = true
893					gp.preempt = true
894				}
895				casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
896			}
897		}
898
899		if i == 0 {
900			nextYield = nanotime() + yieldDelay
901		}
902		if nanotime() < nextYield {
903			procyield(10)
904		} else {
905			osyield()
906			nextYield = nanotime() + yieldDelay/2
907		}
908	}
909
910	gp.preemptscan = false // cancel scan request if no longer needed
911}
912
913// The GC requests that this routine be moved from a scanmumble state to a mumble state.
914func restartg(gp *g) {
915	s := readgstatus(gp)
916	switch s {
917	default:
918		dumpgstatus(gp)
919		throw("restartg: unexpected status")
920
921	case _Gdead:
922	// ok
923
924	case _Gscanrunnable,
925		_Gscanwaiting,
926		_Gscansyscall:
927		casfrom_Gscanstatus(gp, s, s&^_Gscan)
928	}
929}
930
931// stopTheWorld stops all P's from executing goroutines, interrupting
932// all goroutines at GC safe points and records reason as the reason
933// for the stop. On return, only the current goroutine's P is running.
934// stopTheWorld must not be called from a system stack and the caller
935// must not hold worldsema. The caller must call startTheWorld when
936// other P's should resume execution.
937//
938// stopTheWorld is safe for multiple goroutines to call at the
939// same time. Each will execute its own stop, and the stops will
940// be serialized.
941//
942// This is also used by routines that do stack dumps. If the system is
943// in panic or being exited, this may not reliably stop all
944// goroutines.
945func stopTheWorld(reason string) {
946	semacquire(&worldsema)
947	getg().m.preemptoff = reason
948	systemstack(stopTheWorldWithSema)
949}
950
951// startTheWorld undoes the effects of stopTheWorld.
952func startTheWorld() {
953	systemstack(func() { startTheWorldWithSema(false) })
954	// worldsema must be held over startTheWorldWithSema to ensure
955	// gomaxprocs cannot change while worldsema is held.
956	semrelease(&worldsema)
957	getg().m.preemptoff = ""
958}
959
960// Holding worldsema grants an M the right to try to stop the world
961// and prevents gomaxprocs from changing concurrently.
962var worldsema uint32 = 1
963
964// stopTheWorldWithSema is the core implementation of stopTheWorld.
965// The caller is responsible for acquiring worldsema and disabling
966// preemption first and then should stopTheWorldWithSema on the system
967// stack:
968//
969//	semacquire(&worldsema, 0)
970//	m.preemptoff = "reason"
971//	systemstack(stopTheWorldWithSema)
972//
973// When finished, the caller must either call startTheWorld or undo
974// these three operations separately:
975//
976//	m.preemptoff = ""
977//	systemstack(startTheWorldWithSema)
978//	semrelease(&worldsema)
979//
980// It is allowed to acquire worldsema once and then execute multiple
981// startTheWorldWithSema/stopTheWorldWithSema pairs.
982// Other P's are able to execute between successive calls to
983// startTheWorldWithSema and stopTheWorldWithSema.
984// Holding worldsema causes any other goroutines invoking
985// stopTheWorld to block.
986func stopTheWorldWithSema() {
987	_g_ := getg()
988
989	// If we hold a lock, then we won't be able to stop another M
990	// that is blocked trying to acquire the lock.
991	if _g_.m.locks > 0 {
992		throw("stopTheWorld: holding locks")
993	}
994
995	lock(&sched.lock)
996	sched.stopwait = gomaxprocs
997	atomic.Store(&sched.gcwaiting, 1)
998	preemptall()
999	// stop current P
1000	_g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic.
1001	sched.stopwait--
1002	// try to retake all P's in Psyscall status
1003	for _, p := range allp {
1004		s := p.status
1005		if s == _Psyscall && atomic.Cas(&p.status, s, _Pgcstop) {
1006			if trace.enabled {
1007				traceGoSysBlock(p)
1008				traceProcStop(p)
1009			}
1010			p.syscalltick++
1011			sched.stopwait--
1012		}
1013	}
1014	// stop idle P's
1015	for {
1016		p := pidleget()
1017		if p == nil {
1018			break
1019		}
1020		p.status = _Pgcstop
1021		sched.stopwait--
1022	}
1023	wait := sched.stopwait > 0
1024	unlock(&sched.lock)
1025
1026	// wait for remaining P's to stop voluntarily
1027	if wait {
1028		for {
1029			// wait for 100us, then try to re-preempt in case of any races
1030			if notetsleep(&sched.stopnote, 100*1000) {
1031				noteclear(&sched.stopnote)
1032				break
1033			}
1034			preemptall()
1035		}
1036	}
1037
1038	// sanity checks
1039	bad := ""
1040	if sched.stopwait != 0 {
1041		bad = "stopTheWorld: not stopped (stopwait != 0)"
1042	} else {
1043		for _, p := range allp {
1044			if p.status != _Pgcstop {
1045				bad = "stopTheWorld: not stopped (status != _Pgcstop)"
1046			}
1047		}
1048	}
1049	if atomic.Load(&freezing) != 0 {
1050		// Some other thread is panicking. This can cause the
1051		// sanity checks above to fail if the panic happens in
1052		// the signal handler on a stopped thread. Either way,
1053		// we should halt this thread.
1054		lock(&deadlock)
1055		lock(&deadlock)
1056	}
1057	if bad != "" {
1058		throw(bad)
1059	}
1060}
1061
1062func mhelpgc() {
1063	_g_ := getg()
1064	_g_.m.helpgc = -1
1065}
1066
1067func startTheWorldWithSema(emitTraceEvent bool) int64 {
1068	_g_ := getg()
1069
1070	_g_.m.locks++ // disable preemption because it can be holding p in a local var
1071	if netpollinited() {
1072		gp := netpoll(false) // non-blocking
1073		injectglist(gp)
1074	}
1075	add := needaddgcproc()
1076	lock(&sched.lock)
1077
1078	procs := gomaxprocs
1079	if newprocs != 0 {
1080		procs = newprocs
1081		newprocs = 0
1082	}
1083	p1 := procresize(procs)
1084	sched.gcwaiting = 0
1085	if sched.sysmonwait != 0 {
1086		sched.sysmonwait = 0
1087		notewakeup(&sched.sysmonnote)
1088	}
1089	unlock(&sched.lock)
1090
1091	for p1 != nil {
1092		p := p1
1093		p1 = p1.link.ptr()
1094		if p.m != 0 {
1095			mp := p.m.ptr()
1096			p.m = 0
1097			if mp.nextp != 0 {
1098				throw("startTheWorld: inconsistent mp->nextp")
1099			}
1100			mp.nextp.set(p)
1101			notewakeup(&mp.park)
1102		} else {
1103			// Start M to run P.  Do not start another M below.
1104			newm(nil, p)
1105			add = false
1106		}
1107	}
1108
1109	// Capture start-the-world time before doing clean-up tasks.
1110	startTime := nanotime()
1111	if emitTraceEvent {
1112		traceGCSTWDone()
1113	}
1114
1115	// Wakeup an additional proc in case we have excessive runnable goroutines
1116	// in local queues or in the global queue. If we don't, the proc will park itself.
1117	// If we have lots of excessive work, resetspinning will unpark additional procs as necessary.
1118	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
1119		wakep()
1120	}
1121
1122	if add {
1123		// If GC could have used another helper proc, start one now,
1124		// in the hope that it will be available next time.
1125		// It would have been even better to start it before the collection,
1126		// but doing so requires allocating memory, so it's tricky to
1127		// coordinate. This lazy approach works out in practice:
1128		// we don't mind if the first couple gc rounds don't have quite
1129		// the maximum number of procs.
1130		newm(mhelpgc, nil)
1131	}
1132	_g_.m.locks--
1133
1134	return startTime
1135}
1136
1137// First function run by a new goroutine.
1138// This is passed to makecontext.
1139func kickoff() {
1140	gp := getg()
1141
1142	if gp.traceback != nil {
1143		gtraceback(gp)
1144	}
1145
1146	fv := gp.entry
1147	param := gp.param
1148	gp.entry = nil
1149
1150	// When running on the g0 stack we can wind up here without a p,
1151	// for example from mcall(exitsyscall0) in exitsyscall.
1152	// Setting gp.param = nil will call a write barrier, and if
1153	// there is no p that write barrier will crash. When called from
1154	// mcall the gp.param value will be a *g, which we don't need to
1155	// shade since we know it will be kept alive elsewhere. In that
1156	// case clear the field using uintptr so that the write barrier
1157	// does nothing.
1158	if gp.m.p == 0 {
1159		if gp == gp.m.g0 && gp.param == unsafe.Pointer(gp.m.curg) {
1160			*(*uintptr)(unsafe.Pointer(&gp.param)) = 0
1161		} else {
1162			throw("no p in kickoff")
1163		}
1164	}
1165	gp.param = nil
1166
1167	fv(param)
1168	goexit1()
1169}
1170
1171func mstart1(dummy int32) {
1172	_g_ := getg()
1173
1174	if _g_ != _g_.m.g0 {
1175		throw("bad runtime·mstart")
1176	}
1177
1178	asminit()
1179
1180	// Install signal handlers; after minit so that minit can
1181	// prepare the thread to be able to handle the signals.
1182	// For gccgo minit was called by C code.
1183	if _g_.m == &m0 {
1184		mstartm0()
1185	}
1186
1187	if fn := _g_.m.mstartfn; fn != nil {
1188		fn()
1189	}
1190
1191	if _g_.m.helpgc != 0 {
1192		_g_.m.helpgc = 0
1193		stopm()
1194	} else if _g_.m != &m0 {
1195		acquirep(_g_.m.nextp.ptr())
1196		_g_.m.nextp = 0
1197	}
1198	schedule()
1199}
1200
1201// mstartm0 implements part of mstart1 that only runs on the m0.
1202//
1203// Write barriers are allowed here because we know the GC can't be
1204// running yet, so they'll be no-ops.
1205//
1206//go:yeswritebarrierrec
1207func mstartm0() {
1208	// Create an extra M for callbacks on threads not created by Go.
1209	if iscgo && !cgoHasExtraM {
1210		cgoHasExtraM = true
1211		newextram()
1212	}
1213	initsig(false)
1214}
1215
1216// mexit tears down and exits the current thread.
1217//
1218// Don't call this directly to exit the thread, since it must run at
1219// the top of the thread stack. Instead, use gogo(&_g_.m.g0.sched) to
1220// unwind the stack to the point that exits the thread.
1221//
1222// It is entered with m.p != nil, so write barriers are allowed. It
1223// will release the P before exiting.
1224//
1225//go:yeswritebarrierrec
1226func mexit(osStack bool) {
1227	g := getg()
1228	m := g.m
1229
1230	if m == &m0 {
1231		// This is the main thread. Just wedge it.
1232		//
1233		// On Linux, exiting the main thread puts the process
1234		// into a non-waitable zombie state. On Plan 9,
1235		// exiting the main thread unblocks wait even though
1236		// other threads are still running. On Solaris we can
1237		// neither exitThread nor return from mstart. Other
1238		// bad things probably happen on other platforms.
1239		//
1240		// We could try to clean up this M more before wedging
1241		// it, but that complicates signal handling.
1242		handoffp(releasep())
1243		lock(&sched.lock)
1244		sched.nmfreed++
1245		checkdead()
1246		unlock(&sched.lock)
1247		notesleep(&m.park)
1248		throw("locked m0 woke up")
1249	}
1250
1251	sigblock()
1252	unminit()
1253
1254	// Free the gsignal stack.
1255	if m.gsignal != nil {
1256		stackfree(m.gsignal)
1257	}
1258
1259	// Remove m from allm.
1260	lock(&sched.lock)
1261	for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink {
1262		if *pprev == m {
1263			*pprev = m.alllink
1264			goto found
1265		}
1266	}
1267	throw("m not found in allm")
1268found:
1269	if !osStack {
1270		// Delay reaping m until it's done with the stack.
1271		//
1272		// If this is using an OS stack, the OS will free it
1273		// so there's no need for reaping.
1274		atomic.Store(&m.freeWait, 1)
1275		// Put m on the free list, though it will not be reaped until
1276		// freeWait is 0. Note that the free list must not be linked
1277		// through alllink because some functions walk allm without
1278		// locking, so may be using alllink.
1279		m.freelink = sched.freem
1280		sched.freem = m
1281	}
1282	unlock(&sched.lock)
1283
1284	// Release the P.
1285	handoffp(releasep())
1286	// After this point we must not have write barriers.
1287
1288	// Invoke the deadlock detector. This must happen after
1289	// handoffp because it may have started a new M to take our
1290	// P's work.
1291	lock(&sched.lock)
1292	sched.nmfreed++
1293	checkdead()
1294	unlock(&sched.lock)
1295
1296	if osStack {
1297		// Return from mstart and let the system thread
1298		// library free the g0 stack and terminate the thread.
1299		return
1300	}
1301
1302	// mstart is the thread's entry point, so there's nothing to
1303	// return to. Exit the thread directly. exitThread will clear
1304	// m.freeWait when it's done with the stack and the m can be
1305	// reaped.
1306	exitThread(&m.freeWait)
1307}
1308
1309// forEachP calls fn(p) for every P p when p reaches a GC safe point.
1310// If a P is currently executing code, this will bring the P to a GC
1311// safe point and execute fn on that P. If the P is not executing code
1312// (it is idle or in a syscall), this will call fn(p) directly while
1313// preventing the P from exiting its state. This does not ensure that
1314// fn will run on every CPU executing Go code, but it acts as a global
1315// memory barrier. GC uses this as a "ragged barrier."
1316//
1317// The caller must hold worldsema.
1318//
1319//go:systemstack
1320func forEachP(fn func(*p)) {
1321	mp := acquirem()
1322	_p_ := getg().m.p.ptr()
1323
1324	lock(&sched.lock)
1325	if sched.safePointWait != 0 {
1326		throw("forEachP: sched.safePointWait != 0")
1327	}
1328	sched.safePointWait = gomaxprocs - 1
1329	sched.safePointFn = fn
1330
1331	// Ask all Ps to run the safe point function.
1332	for _, p := range allp {
1333		if p != _p_ {
1334			atomic.Store(&p.runSafePointFn, 1)
1335		}
1336	}
1337	preemptall()
1338
1339	// Any P entering _Pidle or _Psyscall from now on will observe
1340	// p.runSafePointFn == 1 and will call runSafePointFn when
1341	// changing its status to _Pidle/_Psyscall.
1342
1343	// Run safe point function for all idle Ps. sched.pidle will
1344	// not change because we hold sched.lock.
1345	for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() {
1346		if atomic.Cas(&p.runSafePointFn, 1, 0) {
1347			fn(p)
1348			sched.safePointWait--
1349		}
1350	}
1351
1352	wait := sched.safePointWait > 0
1353	unlock(&sched.lock)
1354
1355	// Run fn for the current P.
1356	fn(_p_)
1357
1358	// Force Ps currently in _Psyscall into _Pidle and hand them
1359	// off to induce safe point function execution.
1360	for _, p := range allp {
1361		s := p.status
1362		if s == _Psyscall && p.runSafePointFn == 1 && atomic.Cas(&p.status, s, _Pidle) {
1363			if trace.enabled {
1364				traceGoSysBlock(p)
1365				traceProcStop(p)
1366			}
1367			p.syscalltick++
1368			handoffp(p)
1369		}
1370	}
1371
1372	// Wait for remaining Ps to run fn.
1373	if wait {
1374		for {
1375			// Wait for 100us, then try to re-preempt in
1376			// case of any races.
1377			//
1378			// Requires system stack.
1379			if notetsleep(&sched.safePointNote, 100*1000) {
1380				noteclear(&sched.safePointNote)
1381				break
1382			}
1383			preemptall()
1384		}
1385	}
1386	if sched.safePointWait != 0 {
1387		throw("forEachP: not done")
1388	}
1389	for _, p := range allp {
1390		if p.runSafePointFn != 0 {
1391			throw("forEachP: P did not run fn")
1392		}
1393	}
1394
1395	lock(&sched.lock)
1396	sched.safePointFn = nil
1397	unlock(&sched.lock)
1398	releasem(mp)
1399}
1400
1401// runSafePointFn runs the safe point function, if any, for this P.
1402// This should be called like
1403//
1404//     if getg().m.p.runSafePointFn != 0 {
1405//         runSafePointFn()
1406//     }
1407//
1408// runSafePointFn must be checked on any transition in to _Pidle or
1409// _Psyscall to avoid a race where forEachP sees that the P is running
1410// just before the P goes into _Pidle/_Psyscall and neither forEachP
1411// nor the P run the safe-point function.
1412func runSafePointFn() {
1413	p := getg().m.p.ptr()
1414	// Resolve the race between forEachP running the safe-point
1415	// function on this P's behalf and this P running the
1416	// safe-point function directly.
1417	if !atomic.Cas(&p.runSafePointFn, 1, 0) {
1418		return
1419	}
1420	sched.safePointFn(p)
1421	lock(&sched.lock)
1422	sched.safePointWait--
1423	if sched.safePointWait == 0 {
1424		notewakeup(&sched.safePointNote)
1425	}
1426	unlock(&sched.lock)
1427}
1428
1429// Allocate a new m unassociated with any thread.
1430// Can use p for allocation context if needed.
1431// fn is recorded as the new m's m.mstartfn.
1432//
1433// This function is allowed to have write barriers even if the caller
1434// isn't because it borrows _p_.
1435//
1436//go:yeswritebarrierrec
1437func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointer, g0StackSize uintptr) {
1438	_g_ := getg()
1439	_g_.m.locks++ // disable GC because it can be called from sysmon
1440	if _g_.m.p == 0 {
1441		acquirep(_p_) // temporarily borrow p for mallocs in this function
1442	}
1443
1444	// Release the free M list. We need to do this somewhere and
1445	// this may free up a stack we can use.
1446	if sched.freem != nil {
1447		lock(&sched.lock)
1448		var newList *m
1449		for freem := sched.freem; freem != nil; {
1450			if freem.freeWait != 0 {
1451				next := freem.freelink
1452				freem.freelink = newList
1453				newList = freem
1454				freem = next
1455				continue
1456			}
1457			stackfree(freem.g0)
1458			freem = freem.freelink
1459		}
1460		sched.freem = newList
1461		unlock(&sched.lock)
1462	}
1463
1464	mp = new(m)
1465	mp.mstartfn = fn
1466	mcommoninit(mp)
1467
1468	mp.g0 = malg(allocatestack, false, &g0Stack, &g0StackSize)
1469	mp.g0.m = mp
1470
1471	if _p_ == _g_.m.p.ptr() {
1472		releasep()
1473	}
1474	_g_.m.locks--
1475
1476	return mp, g0Stack, g0StackSize
1477}
1478
1479// needm is called when a cgo callback happens on a
1480// thread without an m (a thread not created by Go).
1481// In this case, needm is expected to find an m to use
1482// and return with m, g initialized correctly.
1483// Since m and g are not set now (likely nil, but see below)
1484// needm is limited in what routines it can call. In particular
1485// it can only call nosplit functions (textflag 7) and cannot
1486// do any scheduling that requires an m.
1487//
1488// In order to avoid needing heavy lifting here, we adopt
1489// the following strategy: there is a stack of available m's
1490// that can be stolen. Using compare-and-swap
1491// to pop from the stack has ABA races, so we simulate
1492// a lock by doing an exchange (via casp) to steal the stack
1493// head and replace the top pointer with MLOCKED (1).
1494// This serves as a simple spin lock that we can use even
1495// without an m. The thread that locks the stack in this way
1496// unlocks the stack by storing a valid stack head pointer.
1497//
1498// In order to make sure that there is always an m structure
1499// available to be stolen, we maintain the invariant that there
1500// is always one more than needed. At the beginning of the
1501// program (if cgo is in use) the list is seeded with a single m.
1502// If needm finds that it has taken the last m off the list, its job
1503// is - once it has installed its own m so that it can do things like
1504// allocate memory - to create a spare m and put it on the list.
1505//
1506// Each of these extra m's also has a g0 and a curg that are
1507// pressed into service as the scheduling stack and current
1508// goroutine for the duration of the cgo callback.
1509//
1510// When the callback is done with the m, it calls dropm to
1511// put the m back on the list.
1512//go:nosplit
1513func needm(x byte) {
1514	if iscgo && !cgoHasExtraM {
1515		// Can happen if C/C++ code calls Go from a global ctor.
1516		// Can not throw, because scheduler is not initialized yet.
1517		write(2, unsafe.Pointer(&earlycgocallback[0]), int32(len(earlycgocallback)))
1518		exit(1)
1519	}
1520
1521	// Lock extra list, take head, unlock popped list.
1522	// nilokay=false is safe here because of the invariant above,
1523	// that the extra list always contains or will soon contain
1524	// at least one m.
1525	mp := lockextra(false)
1526
1527	// Set needextram when we've just emptied the list,
1528	// so that the eventual call into cgocallbackg will
1529	// allocate a new m for the extra list. We delay the
1530	// allocation until then so that it can be done
1531	// after exitsyscall makes sure it is okay to be
1532	// running at all (that is, there's no garbage collection
1533	// running right now).
1534	mp.needextram = mp.schedlink == 0
1535	extraMCount--
1536	unlockextra(mp.schedlink.ptr())
1537
1538	// Save and block signals before installing g.
1539	// Once g is installed, any incoming signals will try to execute,
1540	// but we won't have the sigaltstack settings and other data
1541	// set up appropriately until the end of minit, which will
1542	// unblock the signals. This is the same dance as when
1543	// starting a new m to run Go code via newosproc.
1544	msigsave(mp)
1545	sigblock()
1546
1547	// Install g (= m->curg).
1548	setg(mp.curg)
1549
1550	// Initialize this thread to use the m.
1551	asminit()
1552	minit()
1553
1554	setGContext()
1555
1556	// mp.curg is now a real goroutine.
1557	casgstatus(mp.curg, _Gdead, _Gsyscall)
1558	atomic.Xadd(&sched.ngsys, -1)
1559}
1560
1561var earlycgocallback = []byte("fatal error: cgo callback before cgo call\n")
1562
1563// newextram allocates m's and puts them on the extra list.
1564// It is called with a working local m, so that it can do things
1565// like call schedlock and allocate.
1566func newextram() {
1567	c := atomic.Xchg(&extraMWaiters, 0)
1568	if c > 0 {
1569		for i := uint32(0); i < c; i++ {
1570			oneNewExtraM()
1571		}
1572	} else {
1573		// Make sure there is at least one extra M.
1574		mp := lockextra(true)
1575		unlockextra(mp)
1576		if mp == nil {
1577			oneNewExtraM()
1578		}
1579	}
1580}
1581
1582// oneNewExtraM allocates an m and puts it on the extra list.
1583func oneNewExtraM() {
1584	// Create extra goroutine locked to extra m.
1585	// The goroutine is the context in which the cgo callback will run.
1586	// The sched.pc will never be returned to, but setting it to
1587	// goexit makes clear to the traceback routines where
1588	// the goroutine stack ends.
1589	mp, g0SP, g0SPSize := allocm(nil, nil, true)
1590	gp := malg(true, false, nil, nil)
1591	gp.gcscanvalid = true
1592	gp.gcscandone = true
1593	// malg returns status as _Gidle. Change to _Gdead before
1594	// adding to allg where GC can see it. We use _Gdead to hide
1595	// this from tracebacks and stack scans since it isn't a
1596	// "real" goroutine until needm grabs it.
1597	casgstatus(gp, _Gidle, _Gdead)
1598	gp.m = mp
1599	mp.curg = gp
1600	mp.lockedInt++
1601	mp.lockedg.set(gp)
1602	gp.lockedm.set(mp)
1603	gp.goid = int64(atomic.Xadd64(&sched.goidgen, 1))
1604	// put on allg for garbage collector
1605	allgadd(gp)
1606
1607	// The context for gp will be set up in needm.
1608	// Here we need to set the context for g0.
1609	makeGContext(mp.g0, g0SP, g0SPSize)
1610
1611	// gp is now on the allg list, but we don't want it to be
1612	// counted by gcount. It would be more "proper" to increment
1613	// sched.ngfree, but that requires locking. Incrementing ngsys
1614	// has the same effect.
1615	atomic.Xadd(&sched.ngsys, +1)
1616
1617	// Add m to the extra list.
1618	mnext := lockextra(true)
1619	mp.schedlink.set(mnext)
1620	extraMCount++
1621	unlockextra(mp)
1622}
1623
1624// dropm is called when a cgo callback has called needm but is now
1625// done with the callback and returning back into the non-Go thread.
1626// It puts the current m back onto the extra list.
1627//
1628// The main expense here is the call to signalstack to release the
1629// m's signal stack, and then the call to needm on the next callback
1630// from this thread. It is tempting to try to save the m for next time,
1631// which would eliminate both these costs, but there might not be
1632// a next time: the current thread (which Go does not control) might exit.
1633// If we saved the m for that thread, there would be an m leak each time
1634// such a thread exited. Instead, we acquire and release an m on each
1635// call. These should typically not be scheduling operations, just a few
1636// atomics, so the cost should be small.
1637//
1638// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1639// variable using pthread_key_create. Unlike the pthread keys we already use
1640// on OS X, this dummy key would never be read by Go code. It would exist
1641// only so that we could register at thread-exit-time destructor.
1642// That destructor would put the m back onto the extra list.
1643// This is purely a performance optimization. The current version,
1644// in which dropm happens on each cgo call, is still correct too.
1645// We may have to keep the current version on systems with cgo
1646// but without pthreads, like Windows.
1647//
1648// CgocallBackDone calls this after releasing p, so no write barriers.
1649//go:nowritebarrierrec
1650func dropm() {
1651	// Clear m and g, and return m to the extra list.
1652	// After the call to setg we can only call nosplit functions
1653	// with no pointer manipulation.
1654	mp := getg().m
1655
1656	// Return mp.curg to dead state.
1657	casgstatus(mp.curg, _Gsyscall, _Gdead)
1658	atomic.Xadd(&sched.ngsys, +1)
1659
1660	// Block signals before unminit.
1661	// Unminit unregisters the signal handling stack (but needs g on some systems).
1662	// Setg(nil) clears g, which is the signal handler's cue not to run Go handlers.
1663	// It's important not to try to handle a signal between those two steps.
1664	sigmask := mp.sigmask
1665	sigblock()
1666	unminit()
1667
1668	// gccgo sets the stack to Gdead here, because the splitstack
1669	// context is not initialized.
1670	atomic.Store(&mp.curg.atomicstatus, _Gdead)
1671	mp.curg.gcstack = 0
1672	mp.curg.gcnextsp = 0
1673
1674	mnext := lockextra(true)
1675	extraMCount++
1676	mp.schedlink.set(mnext)
1677
1678	setg(nil)
1679
1680	// Commit the release of mp.
1681	unlockextra(mp)
1682
1683	msigrestore(sigmask)
1684}
1685
1686// A helper function for EnsureDropM.
1687func getm() uintptr {
1688	return uintptr(unsafe.Pointer(getg().m))
1689}
1690
1691var extram uintptr
1692var extraMCount uint32 // Protected by lockextra
1693var extraMWaiters uint32
1694
1695// lockextra locks the extra list and returns the list head.
1696// The caller must unlock the list by storing a new list head
1697// to extram. If nilokay is true, then lockextra will
1698// return a nil list head if that's what it finds. If nilokay is false,
1699// lockextra will keep waiting until the list head is no longer nil.
1700//go:nosplit
1701//go:nowritebarrierrec
1702func lockextra(nilokay bool) *m {
1703	const locked = 1
1704
1705	incr := false
1706	for {
1707		old := atomic.Loaduintptr(&extram)
1708		if old == locked {
1709			yield := osyield
1710			yield()
1711			continue
1712		}
1713		if old == 0 && !nilokay {
1714			if !incr {
1715				// Add 1 to the number of threads
1716				// waiting for an M.
1717				// This is cleared by newextram.
1718				atomic.Xadd(&extraMWaiters, 1)
1719				incr = true
1720			}
1721			usleep(1)
1722			continue
1723		}
1724		if atomic.Casuintptr(&extram, old, locked) {
1725			return (*m)(unsafe.Pointer(old))
1726		}
1727		yield := osyield
1728		yield()
1729		continue
1730	}
1731}
1732
1733//go:nosplit
1734//go:nowritebarrierrec
1735func unlockextra(mp *m) {
1736	atomic.Storeuintptr(&extram, uintptr(unsafe.Pointer(mp)))
1737}
1738
1739// execLock serializes exec and clone to avoid bugs or unspecified behaviour
1740// around exec'ing while creating/destroying threads.  See issue #19546.
1741var execLock rwmutex
1742
1743// newmHandoff contains a list of m structures that need new OS threads.
1744// This is used by newm in situations where newm itself can't safely
1745// start an OS thread.
1746var newmHandoff struct {
1747	lock mutex
1748
1749	// newm points to a list of M structures that need new OS
1750	// threads. The list is linked through m.schedlink.
1751	newm muintptr
1752
1753	// waiting indicates that wake needs to be notified when an m
1754	// is put on the list.
1755	waiting bool
1756	wake    note
1757
1758	// haveTemplateThread indicates that the templateThread has
1759	// been started. This is not protected by lock. Use cas to set
1760	// to 1.
1761	haveTemplateThread uint32
1762}
1763
1764// Create a new m. It will start off with a call to fn, or else the scheduler.
1765// fn needs to be static and not a heap allocated closure.
1766// May run with m.p==nil, so write barriers are not allowed.
1767//go:nowritebarrierrec
1768func newm(fn func(), _p_ *p) {
1769	mp, _, _ := allocm(_p_, fn, false)
1770	mp.nextp.set(_p_)
1771	mp.sigmask = initSigmask
1772	if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" {
1773		// We're on a locked M or a thread that may have been
1774		// started by C. The kernel state of this thread may
1775		// be strange (the user may have locked it for that
1776		// purpose). We don't want to clone that into another
1777		// thread. Instead, ask a known-good thread to create
1778		// the thread for us.
1779		//
1780		// This is disabled on Plan 9. See golang.org/issue/22227.
1781		//
1782		// TODO: This may be unnecessary on Windows, which
1783		// doesn't model thread creation off fork.
1784		lock(&newmHandoff.lock)
1785		if newmHandoff.haveTemplateThread == 0 {
1786			throw("on a locked thread with no template thread")
1787		}
1788		mp.schedlink = newmHandoff.newm
1789		newmHandoff.newm.set(mp)
1790		if newmHandoff.waiting {
1791			newmHandoff.waiting = false
1792			notewakeup(&newmHandoff.wake)
1793		}
1794		unlock(&newmHandoff.lock)
1795		return
1796	}
1797	newm1(mp)
1798}
1799
1800func newm1(mp *m) {
1801	execLock.rlock() // Prevent process clone.
1802	newosproc(mp)
1803	execLock.runlock()
1804}
1805
1806// startTemplateThread starts the template thread if it is not already
1807// running.
1808//
1809// The calling thread must itself be in a known-good state.
1810func startTemplateThread() {
1811	if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) {
1812		return
1813	}
1814	newm(templateThread, nil)
1815}
1816
1817// tmeplateThread is a thread in a known-good state that exists solely
1818// to start new threads in known-good states when the calling thread
1819// may not be a a good state.
1820//
1821// Many programs never need this, so templateThread is started lazily
1822// when we first enter a state that might lead to running on a thread
1823// in an unknown state.
1824//
1825// templateThread runs on an M without a P, so it must not have write
1826// barriers.
1827//
1828//go:nowritebarrierrec
1829func templateThread() {
1830	lock(&sched.lock)
1831	sched.nmsys++
1832	checkdead()
1833	unlock(&sched.lock)
1834
1835	for {
1836		lock(&newmHandoff.lock)
1837		for newmHandoff.newm != 0 {
1838			newm := newmHandoff.newm.ptr()
1839			newmHandoff.newm = 0
1840			unlock(&newmHandoff.lock)
1841			for newm != nil {
1842				next := newm.schedlink.ptr()
1843				newm.schedlink = 0
1844				newm1(newm)
1845				newm = next
1846			}
1847			lock(&newmHandoff.lock)
1848		}
1849		newmHandoff.waiting = true
1850		noteclear(&newmHandoff.wake)
1851		unlock(&newmHandoff.lock)
1852		notesleep(&newmHandoff.wake)
1853	}
1854}
1855
1856// Stops execution of the current m until new work is available.
1857// Returns with acquired P.
1858func stopm() {
1859	_g_ := getg()
1860
1861	if _g_.m.locks != 0 {
1862		throw("stopm holding locks")
1863	}
1864	if _g_.m.p != 0 {
1865		throw("stopm holding p")
1866	}
1867	if _g_.m.spinning {
1868		throw("stopm spinning")
1869	}
1870
1871retry:
1872	lock(&sched.lock)
1873	mput(_g_.m)
1874	unlock(&sched.lock)
1875	notesleep(&_g_.m.park)
1876	noteclear(&_g_.m.park)
1877	if _g_.m.helpgc != 0 {
1878		// helpgc() set _g_.m.p and _g_.m.mcache, so we have a P.
1879		gchelper()
1880		// Undo the effects of helpgc().
1881		_g_.m.helpgc = 0
1882		_g_.m.mcache = nil
1883		_g_.m.p = 0
1884		goto retry
1885	}
1886	acquirep(_g_.m.nextp.ptr())
1887	_g_.m.nextp = 0
1888}
1889
1890func mspinning() {
1891	// startm's caller incremented nmspinning. Set the new M's spinning.
1892	getg().m.spinning = true
1893}
1894
1895// Schedules some M to run the p (creates an M if necessary).
1896// If p==nil, tries to get an idle P, if no idle P's does nothing.
1897// May run with m.p==nil, so write barriers are not allowed.
1898// If spinning is set, the caller has incremented nmspinning and startm will
1899// either decrement nmspinning or set m.spinning in the newly started M.
1900//go:nowritebarrierrec
1901func startm(_p_ *p, spinning bool) {
1902	lock(&sched.lock)
1903	if _p_ == nil {
1904		_p_ = pidleget()
1905		if _p_ == nil {
1906			unlock(&sched.lock)
1907			if spinning {
1908				// The caller incremented nmspinning, but there are no idle Ps,
1909				// so it's okay to just undo the increment and give up.
1910				if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
1911					throw("startm: negative nmspinning")
1912				}
1913			}
1914			return
1915		}
1916	}
1917	mp := mget()
1918	unlock(&sched.lock)
1919	if mp == nil {
1920		var fn func()
1921		if spinning {
1922			// The caller incremented nmspinning, so set m.spinning in the new M.
1923			fn = mspinning
1924		}
1925		newm(fn, _p_)
1926		return
1927	}
1928	if mp.spinning {
1929		throw("startm: m is spinning")
1930	}
1931	if mp.nextp != 0 {
1932		throw("startm: m has p")
1933	}
1934	if spinning && !runqempty(_p_) {
1935		throw("startm: p has runnable gs")
1936	}
1937	// The caller incremented nmspinning, so set m.spinning in the new M.
1938	mp.spinning = spinning
1939	mp.nextp.set(_p_)
1940	notewakeup(&mp.park)
1941}
1942
1943// Hands off P from syscall or locked M.
1944// Always runs without a P, so write barriers are not allowed.
1945//go:nowritebarrierrec
1946func handoffp(_p_ *p) {
1947	// handoffp must start an M in any situation where
1948	// findrunnable would return a G to run on _p_.
1949
1950	// if it has local work, start it straight away
1951	if !runqempty(_p_) || sched.runqsize != 0 {
1952		startm(_p_, false)
1953		return
1954	}
1955	// if it has GC work, start it straight away
1956	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) {
1957		startm(_p_, false)
1958		return
1959	}
1960	// no local work, check that there are no spinning/idle M's,
1961	// otherwise our help is not required
1962	if atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) == 0 && atomic.Cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic
1963		startm(_p_, true)
1964		return
1965	}
1966	lock(&sched.lock)
1967	if sched.gcwaiting != 0 {
1968		_p_.status = _Pgcstop
1969		sched.stopwait--
1970		if sched.stopwait == 0 {
1971			notewakeup(&sched.stopnote)
1972		}
1973		unlock(&sched.lock)
1974		return
1975	}
1976	if _p_.runSafePointFn != 0 && atomic.Cas(&_p_.runSafePointFn, 1, 0) {
1977		sched.safePointFn(_p_)
1978		sched.safePointWait--
1979		if sched.safePointWait == 0 {
1980			notewakeup(&sched.safePointNote)
1981		}
1982	}
1983	if sched.runqsize != 0 {
1984		unlock(&sched.lock)
1985		startm(_p_, false)
1986		return
1987	}
1988	// If this is the last running P and nobody is polling network,
1989	// need to wakeup another M to poll network.
1990	if sched.npidle == uint32(gomaxprocs-1) && atomic.Load64(&sched.lastpoll) != 0 {
1991		unlock(&sched.lock)
1992		startm(_p_, false)
1993		return
1994	}
1995	pidleput(_p_)
1996	unlock(&sched.lock)
1997}
1998
1999// Tries to add one more P to execute G's.
2000// Called when a G is made runnable (newproc, ready).
2001func wakep() {
2002	// be conservative about spinning threads
2003	if !atomic.Cas(&sched.nmspinning, 0, 1) {
2004		return
2005	}
2006	startm(nil, true)
2007}
2008
2009// Stops execution of the current m that is locked to a g until the g is runnable again.
2010// Returns with acquired P.
2011func stoplockedm() {
2012	_g_ := getg()
2013
2014	if _g_.m.lockedg == 0 || _g_.m.lockedg.ptr().lockedm.ptr() != _g_.m {
2015		throw("stoplockedm: inconsistent locking")
2016	}
2017	if _g_.m.p != 0 {
2018		// Schedule another M to run this p.
2019		_p_ := releasep()
2020		handoffp(_p_)
2021	}
2022	incidlelocked(1)
2023	// Wait until another thread schedules lockedg again.
2024	notesleep(&_g_.m.park)
2025	noteclear(&_g_.m.park)
2026	status := readgstatus(_g_.m.lockedg.ptr())
2027	if status&^_Gscan != _Grunnable {
2028		print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n")
2029		dumpgstatus(_g_)
2030		throw("stoplockedm: not runnable")
2031	}
2032	acquirep(_g_.m.nextp.ptr())
2033	_g_.m.nextp = 0
2034}
2035
2036// Schedules the locked m to run the locked gp.
2037// May run during STW, so write barriers are not allowed.
2038//go:nowritebarrierrec
2039func startlockedm(gp *g) {
2040	_g_ := getg()
2041
2042	mp := gp.lockedm.ptr()
2043	if mp == _g_.m {
2044		throw("startlockedm: locked to me")
2045	}
2046	if mp.nextp != 0 {
2047		throw("startlockedm: m has p")
2048	}
2049	// directly handoff current P to the locked m
2050	incidlelocked(-1)
2051	_p_ := releasep()
2052	mp.nextp.set(_p_)
2053	notewakeup(&mp.park)
2054	stopm()
2055}
2056
2057// Stops the current m for stopTheWorld.
2058// Returns when the world is restarted.
2059func gcstopm() {
2060	_g_ := getg()
2061
2062	if sched.gcwaiting == 0 {
2063		throw("gcstopm: not waiting for gc")
2064	}
2065	if _g_.m.spinning {
2066		_g_.m.spinning = false
2067		// OK to just drop nmspinning here,
2068		// startTheWorld will unpark threads as necessary.
2069		if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
2070			throw("gcstopm: negative nmspinning")
2071		}
2072	}
2073	_p_ := releasep()
2074	lock(&sched.lock)
2075	_p_.status = _Pgcstop
2076	sched.stopwait--
2077	if sched.stopwait == 0 {
2078		notewakeup(&sched.stopnote)
2079	}
2080	unlock(&sched.lock)
2081	stopm()
2082}
2083
2084// Schedules gp to run on the current M.
2085// If inheritTime is true, gp inherits the remaining time in the
2086// current time slice. Otherwise, it starts a new time slice.
2087// Never returns.
2088//
2089// Write barriers are allowed because this is called immediately after
2090// acquiring a P in several places.
2091//
2092//go:yeswritebarrierrec
2093func execute(gp *g, inheritTime bool) {
2094	_g_ := getg()
2095
2096	casgstatus(gp, _Grunnable, _Grunning)
2097	gp.waitsince = 0
2098	gp.preempt = false
2099	if !inheritTime {
2100		_g_.m.p.ptr().schedtick++
2101	}
2102	_g_.m.curg = gp
2103	gp.m = _g_.m
2104
2105	// Check whether the profiler needs to be turned on or off.
2106	hz := sched.profilehz
2107	if _g_.m.profilehz != hz {
2108		setThreadCPUProfiler(hz)
2109	}
2110
2111	if trace.enabled {
2112		// GoSysExit has to happen when we have a P, but before GoStart.
2113		// So we emit it here.
2114		if gp.syscallsp != 0 && gp.sysblocktraced {
2115			traceGoSysExit(gp.sysexitticks)
2116		}
2117		traceGoStart()
2118	}
2119
2120	gogo(gp)
2121}
2122
2123// Finds a runnable goroutine to execute.
2124// Tries to steal from other P's, get g from global queue, poll network.
2125func findrunnable() (gp *g, inheritTime bool) {
2126	_g_ := getg()
2127
2128	// The conditions here and in handoffp must agree: if
2129	// findrunnable would return a G to run, handoffp must start
2130	// an M.
2131
2132top:
2133	_p_ := _g_.m.p.ptr()
2134	if sched.gcwaiting != 0 {
2135		gcstopm()
2136		goto top
2137	}
2138	if _p_.runSafePointFn != 0 {
2139		runSafePointFn()
2140	}
2141	if fingwait && fingwake {
2142		if gp := wakefing(); gp != nil {
2143			ready(gp, 0, true)
2144		}
2145	}
2146	if *cgo_yield != nil {
2147		asmcgocall(*cgo_yield, nil)
2148	}
2149
2150	// local runq
2151	if gp, inheritTime := runqget(_p_); gp != nil {
2152		return gp, inheritTime
2153	}
2154
2155	// global runq
2156	if sched.runqsize != 0 {
2157		lock(&sched.lock)
2158		gp := globrunqget(_p_, 0)
2159		unlock(&sched.lock)
2160		if gp != nil {
2161			return gp, false
2162		}
2163	}
2164
2165	// Poll network.
2166	// This netpoll is only an optimization before we resort to stealing.
2167	// We can safely skip it if there are no waiters or a thread is blocked
2168	// in netpoll already. If there is any kind of logical race with that
2169	// blocked thread (e.g. it has already returned from netpoll, but does
2170	// not set lastpoll yet), this thread will do blocking netpoll below
2171	// anyway.
2172	if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 {
2173		if gp := netpoll(false); gp != nil { // non-blocking
2174			// netpoll returns list of goroutines linked by schedlink.
2175			injectglist(gp.schedlink.ptr())
2176			casgstatus(gp, _Gwaiting, _Grunnable)
2177			if trace.enabled {
2178				traceGoUnpark(gp, 0)
2179			}
2180			return gp, false
2181		}
2182	}
2183
2184	// Steal work from other P's.
2185	procs := uint32(gomaxprocs)
2186	if atomic.Load(&sched.npidle) == procs-1 {
2187		// Either GOMAXPROCS=1 or everybody, except for us, is idle already.
2188		// New work can appear from returning syscall/cgocall, network or timers.
2189		// Neither of that submits to local run queues, so no point in stealing.
2190		goto stop
2191	}
2192	// If number of spinning M's >= number of busy P's, block.
2193	// This is necessary to prevent excessive CPU consumption
2194	// when GOMAXPROCS>>1 but the program parallelism is low.
2195	if !_g_.m.spinning && 2*atomic.Load(&sched.nmspinning) >= procs-atomic.Load(&sched.npidle) {
2196		goto stop
2197	}
2198	if !_g_.m.spinning {
2199		_g_.m.spinning = true
2200		atomic.Xadd(&sched.nmspinning, 1)
2201	}
2202	for i := 0; i < 4; i++ {
2203		for enum := stealOrder.start(fastrand()); !enum.done(); enum.next() {
2204			if sched.gcwaiting != 0 {
2205				goto top
2206			}
2207			stealRunNextG := i > 2 // first look for ready queues with more than 1 g
2208			if gp := runqsteal(_p_, allp[enum.position()], stealRunNextG); gp != nil {
2209				return gp, false
2210			}
2211		}
2212	}
2213
2214stop:
2215
2216	// We have nothing to do. If we're in the GC mark phase, can
2217	// safely scan and blacken objects, and have work to do, run
2218	// idle-time marking rather than give up the P.
2219	if gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != 0 && gcMarkWorkAvailable(_p_) {
2220		_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
2221		gp := _p_.gcBgMarkWorker.ptr()
2222		casgstatus(gp, _Gwaiting, _Grunnable)
2223		if trace.enabled {
2224			traceGoUnpark(gp, 0)
2225		}
2226		return gp, false
2227	}
2228
2229	// Before we drop our P, make a snapshot of the allp slice,
2230	// which can change underfoot once we no longer block
2231	// safe-points. We don't need to snapshot the contents because
2232	// everything up to cap(allp) is immutable.
2233	allpSnapshot := allp
2234
2235	// return P and block
2236	lock(&sched.lock)
2237	if sched.gcwaiting != 0 || _p_.runSafePointFn != 0 {
2238		unlock(&sched.lock)
2239		goto top
2240	}
2241	if sched.runqsize != 0 {
2242		gp := globrunqget(_p_, 0)
2243		unlock(&sched.lock)
2244		return gp, false
2245	}
2246	if releasep() != _p_ {
2247		throw("findrunnable: wrong p")
2248	}
2249	pidleput(_p_)
2250	unlock(&sched.lock)
2251
2252	// Delicate dance: thread transitions from spinning to non-spinning state,
2253	// potentially concurrently with submission of new goroutines. We must
2254	// drop nmspinning first and then check all per-P queues again (with
2255	// #StoreLoad memory barrier in between). If we do it the other way around,
2256	// another thread can submit a goroutine after we've checked all run queues
2257	// but before we drop nmspinning; as the result nobody will unpark a thread
2258	// to run the goroutine.
2259	// If we discover new work below, we need to restore m.spinning as a signal
2260	// for resetspinning to unpark a new worker thread (because there can be more
2261	// than one starving goroutine). However, if after discovering new work
2262	// we also observe no idle Ps, it is OK to just park the current thread:
2263	// the system is fully loaded so no spinning threads are required.
2264	// Also see "Worker thread parking/unparking" comment at the top of the file.
2265	wasSpinning := _g_.m.spinning
2266	if _g_.m.spinning {
2267		_g_.m.spinning = false
2268		if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
2269			throw("findrunnable: negative nmspinning")
2270		}
2271	}
2272
2273	// check all runqueues once again
2274	for _, _p_ := range allpSnapshot {
2275		if !runqempty(_p_) {
2276			lock(&sched.lock)
2277			_p_ = pidleget()
2278			unlock(&sched.lock)
2279			if _p_ != nil {
2280				acquirep(_p_)
2281				if wasSpinning {
2282					_g_.m.spinning = true
2283					atomic.Xadd(&sched.nmspinning, 1)
2284				}
2285				goto top
2286			}
2287			break
2288		}
2289	}
2290
2291	// Check for idle-priority GC work again.
2292	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(nil) {
2293		lock(&sched.lock)
2294		_p_ = pidleget()
2295		if _p_ != nil && _p_.gcBgMarkWorker == 0 {
2296			pidleput(_p_)
2297			_p_ = nil
2298		}
2299		unlock(&sched.lock)
2300		if _p_ != nil {
2301			acquirep(_p_)
2302			if wasSpinning {
2303				_g_.m.spinning = true
2304				atomic.Xadd(&sched.nmspinning, 1)
2305			}
2306			// Go back to idle GC check.
2307			goto stop
2308		}
2309	}
2310
2311	// poll network
2312	if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
2313		if _g_.m.p != 0 {
2314			throw("findrunnable: netpoll with p")
2315		}
2316		if _g_.m.spinning {
2317			throw("findrunnable: netpoll with spinning")
2318		}
2319		gp := netpoll(true) // block until new work is available
2320		atomic.Store64(&sched.lastpoll, uint64(nanotime()))
2321		if gp != nil {
2322			lock(&sched.lock)
2323			_p_ = pidleget()
2324			unlock(&sched.lock)
2325			if _p_ != nil {
2326				acquirep(_p_)
2327				injectglist(gp.schedlink.ptr())
2328				casgstatus(gp, _Gwaiting, _Grunnable)
2329				if trace.enabled {
2330					traceGoUnpark(gp, 0)
2331				}
2332				return gp, false
2333			}
2334			injectglist(gp)
2335		}
2336	}
2337	stopm()
2338	goto top
2339}
2340
2341// pollWork returns true if there is non-background work this P could
2342// be doing. This is a fairly lightweight check to be used for
2343// background work loops, like idle GC. It checks a subset of the
2344// conditions checked by the actual scheduler.
2345func pollWork() bool {
2346	if sched.runqsize != 0 {
2347		return true
2348	}
2349	p := getg().m.p.ptr()
2350	if !runqempty(p) {
2351		return true
2352	}
2353	if netpollinited() && atomic.Load(&netpollWaiters) > 0 && sched.lastpoll != 0 {
2354		if gp := netpoll(false); gp != nil {
2355			injectglist(gp)
2356			return true
2357		}
2358	}
2359	return false
2360}
2361
2362func resetspinning() {
2363	_g_ := getg()
2364	if !_g_.m.spinning {
2365		throw("resetspinning: not a spinning m")
2366	}
2367	_g_.m.spinning = false
2368	nmspinning := atomic.Xadd(&sched.nmspinning, -1)
2369	if int32(nmspinning) < 0 {
2370		throw("findrunnable: negative nmspinning")
2371	}
2372	// M wakeup policy is deliberately somewhat conservative, so check if we
2373	// need to wakeup another P here. See "Worker thread parking/unparking"
2374	// comment at the top of the file for details.
2375	if nmspinning == 0 && atomic.Load(&sched.npidle) > 0 {
2376		wakep()
2377	}
2378}
2379
2380// Injects the list of runnable G's into the scheduler.
2381// Can run concurrently with GC.
2382func injectglist(glist *g) {
2383	if glist == nil {
2384		return
2385	}
2386	if trace.enabled {
2387		for gp := glist; gp != nil; gp = gp.schedlink.ptr() {
2388			traceGoUnpark(gp, 0)
2389		}
2390	}
2391	lock(&sched.lock)
2392	var n int
2393	for n = 0; glist != nil; n++ {
2394		gp := glist
2395		glist = gp.schedlink.ptr()
2396		casgstatus(gp, _Gwaiting, _Grunnable)
2397		globrunqput(gp)
2398	}
2399	unlock(&sched.lock)
2400	for ; n != 0 && sched.npidle != 0; n-- {
2401		startm(nil, false)
2402	}
2403}
2404
2405// One round of scheduler: find a runnable goroutine and execute it.
2406// Never returns.
2407func schedule() {
2408	_g_ := getg()
2409
2410	if _g_.m.locks != 0 {
2411		throw("schedule: holding locks")
2412	}
2413
2414	if _g_.m.lockedg != 0 {
2415		stoplockedm()
2416		execute(_g_.m.lockedg.ptr(), false) // Never returns.
2417	}
2418
2419	// We should not schedule away from a g that is executing a cgo call,
2420	// since the cgo call is using the m's g0 stack.
2421	if _g_.m.incgo {
2422		throw("schedule: in cgo")
2423	}
2424
2425top:
2426	if sched.gcwaiting != 0 {
2427		gcstopm()
2428		goto top
2429	}
2430	if _g_.m.p.ptr().runSafePointFn != 0 {
2431		runSafePointFn()
2432	}
2433
2434	var gp *g
2435	var inheritTime bool
2436	if trace.enabled || trace.shutdown {
2437		gp = traceReader()
2438		if gp != nil {
2439			casgstatus(gp, _Gwaiting, _Grunnable)
2440			traceGoUnpark(gp, 0)
2441		}
2442	}
2443	if gp == nil && gcBlackenEnabled != 0 {
2444		gp = gcController.findRunnableGCWorker(_g_.m.p.ptr())
2445	}
2446	if gp == nil {
2447		// Check the global runnable queue once in a while to ensure fairness.
2448		// Otherwise two goroutines can completely occupy the local runqueue
2449		// by constantly respawning each other.
2450		if _g_.m.p.ptr().schedtick%61 == 0 && sched.runqsize > 0 {
2451			lock(&sched.lock)
2452			gp = globrunqget(_g_.m.p.ptr(), 1)
2453			unlock(&sched.lock)
2454		}
2455	}
2456	if gp == nil {
2457		gp, inheritTime = runqget(_g_.m.p.ptr())
2458		if gp != nil && _g_.m.spinning {
2459			throw("schedule: spinning with local work")
2460		}
2461
2462		// Because gccgo does not implement preemption as a stack check,
2463		// we need to check for preemption here for fairness.
2464		// Otherwise goroutines on the local queue may starve
2465		// goroutines on the global queue.
2466		// Since we preempt by storing the goroutine on the global
2467		// queue, this is the only place we need to check preempt.
2468		// This does not call checkPreempt because gp is not running.
2469		if gp != nil && gp.preempt {
2470			gp.preempt = false
2471			lock(&sched.lock)
2472			globrunqput(gp)
2473			unlock(&sched.lock)
2474			goto top
2475		}
2476	}
2477	if gp == nil {
2478		gp, inheritTime = findrunnable() // blocks until work is available
2479	}
2480
2481	// This thread is going to run a goroutine and is not spinning anymore,
2482	// so if it was marked as spinning we need to reset it now and potentially
2483	// start a new spinning M.
2484	if _g_.m.spinning {
2485		resetspinning()
2486	}
2487
2488	if gp.lockedm != 0 {
2489		// Hands off own p to the locked m,
2490		// then blocks waiting for a new p.
2491		startlockedm(gp)
2492		goto top
2493	}
2494
2495	execute(gp, inheritTime)
2496}
2497
2498// dropg removes the association between m and the current goroutine m->curg (gp for short).
2499// Typically a caller sets gp's status away from Grunning and then
2500// immediately calls dropg to finish the job. The caller is also responsible
2501// for arranging that gp will be restarted using ready at an
2502// appropriate time. After calling dropg and arranging for gp to be
2503// readied later, the caller can do other work but eventually should
2504// call schedule to restart the scheduling of goroutines on this m.
2505func dropg() {
2506	_g_ := getg()
2507
2508	setMNoWB(&_g_.m.curg.m, nil)
2509	setGNoWB(&_g_.m.curg, nil)
2510}
2511
2512func parkunlock_c(gp *g, lock unsafe.Pointer) bool {
2513	unlock((*mutex)(lock))
2514	return true
2515}
2516
2517// park continuation on g0.
2518func park_m(gp *g) {
2519	_g_ := getg()
2520
2521	if trace.enabled {
2522		traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip)
2523	}
2524
2525	casgstatus(gp, _Grunning, _Gwaiting)
2526	dropg()
2527
2528	if _g_.m.waitunlockf != nil {
2529		fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf))
2530		ok := fn(gp, _g_.m.waitlock)
2531		_g_.m.waitunlockf = nil
2532		_g_.m.waitlock = nil
2533		if !ok {
2534			if trace.enabled {
2535				traceGoUnpark(gp, 2)
2536			}
2537			casgstatus(gp, _Gwaiting, _Grunnable)
2538			execute(gp, true) // Schedule it back, never returns.
2539		}
2540	}
2541	schedule()
2542}
2543
2544func goschedImpl(gp *g) {
2545	status := readgstatus(gp)
2546	if status&^_Gscan != _Grunning {
2547		dumpgstatus(gp)
2548		throw("bad g status")
2549	}
2550	casgstatus(gp, _Grunning, _Grunnable)
2551	dropg()
2552	lock(&sched.lock)
2553	globrunqput(gp)
2554	unlock(&sched.lock)
2555
2556	schedule()
2557}
2558
2559// Gosched continuation on g0.
2560func gosched_m(gp *g) {
2561	if trace.enabled {
2562		traceGoSched()
2563	}
2564	goschedImpl(gp)
2565}
2566
2567// goschedguarded is a forbidden-states-avoided version of gosched_m
2568func goschedguarded_m(gp *g) {
2569
2570	if gp.m.locks != 0 || gp.m.mallocing != 0 || gp.m.preemptoff != "" || gp.m.p.ptr().status != _Prunning {
2571		gogo(gp) // never return
2572	}
2573
2574	if trace.enabled {
2575		traceGoSched()
2576	}
2577	goschedImpl(gp)
2578}
2579
2580func gopreempt_m(gp *g) {
2581	if trace.enabled {
2582		traceGoPreempt()
2583	}
2584	goschedImpl(gp)
2585}
2586
2587// Finishes execution of the current goroutine.
2588func goexit1() {
2589	if trace.enabled {
2590		traceGoEnd()
2591	}
2592	mcall(goexit0)
2593}
2594
2595// goexit continuation on g0.
2596func goexit0(gp *g) {
2597	_g_ := getg()
2598
2599	casgstatus(gp, _Grunning, _Gdead)
2600	if isSystemGoroutine(gp) {
2601		atomic.Xadd(&sched.ngsys, -1)
2602		gp.isSystemGoroutine = false
2603	}
2604	gp.m = nil
2605	locked := gp.lockedm != 0
2606	gp.lockedm = 0
2607	_g_.m.lockedg = 0
2608	gp.entry = nil
2609	gp.paniconfault = false
2610	gp._defer = nil // should be true already but just in case.
2611	gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
2612	gp.writebuf = nil
2613	gp.waitreason = ""
2614	gp.param = nil
2615	gp.labels = nil
2616	gp.timer = nil
2617
2618	if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 {
2619		// Flush assist credit to the global pool. This gives
2620		// better information to pacing if the application is
2621		// rapidly creating an exiting goroutines.
2622		scanCredit := int64(gcController.assistWorkPerByte * float64(gp.gcAssistBytes))
2623		atomic.Xaddint64(&gcController.bgScanCredit, scanCredit)
2624		gp.gcAssistBytes = 0
2625	}
2626
2627	// Note that gp's stack scan is now "valid" because it has no
2628	// stack.
2629	gp.gcscanvalid = true
2630	dropg()
2631
2632	if _g_.m.lockedInt != 0 {
2633		print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
2634		throw("internal lockOSThread error")
2635	}
2636	_g_.m.lockedExt = 0
2637	gfput(_g_.m.p.ptr(), gp)
2638	if locked {
2639		// The goroutine may have locked this thread because
2640		// it put it in an unusual kernel state. Kill it
2641		// rather than returning it to the thread pool.
2642
2643		// Return to mstart, which will release the P and exit
2644		// the thread.
2645		if GOOS != "plan9" { // See golang.org/issue/22227.
2646			_g_.m.exiting = true
2647			gogo(_g_.m.g0)
2648		}
2649	}
2650	schedule()
2651}
2652
2653// The goroutine g is about to enter a system call.
2654// Record that it's not using the cpu anymore.
2655// This is called only from the go syscall library and cgocall,
2656// not from the low-level system calls used by the runtime.
2657//
2658// The entersyscall function is written in C, so that it can save the
2659// current register context so that the GC will see them.
2660// It calls reentersyscall.
2661//
2662// Syscall tracing:
2663// At the start of a syscall we emit traceGoSysCall to capture the stack trace.
2664// If the syscall does not block, that is it, we do not emit any other events.
2665// If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock;
2666// when syscall returns we emit traceGoSysExit and when the goroutine starts running
2667// (potentially instantly, if exitsyscallfast returns true) we emit traceGoStart.
2668// To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock,
2669// we remember current value of syscalltick in m (_g_.m.syscalltick = _g_.m.p.ptr().syscalltick),
2670// whoever emits traceGoSysBlock increments p.syscalltick afterwards;
2671// and we wait for the increment before emitting traceGoSysExit.
2672// Note that the increment is done even if tracing is not enabled,
2673// because tracing can be enabled in the middle of syscall. We don't want the wait to hang.
2674//
2675//go:nosplit
2676//go:noinline
2677func reentersyscall(pc, sp uintptr) {
2678	_g_ := getg()
2679
2680	// Disable preemption because during this function g is in Gsyscall status,
2681	// but can have inconsistent g->sched, do not let GC observe it.
2682	_g_.m.locks++
2683
2684	_g_.syscallsp = sp
2685	_g_.syscallpc = pc
2686	casgstatus(_g_, _Grunning, _Gsyscall)
2687
2688	if trace.enabled {
2689		systemstack(traceGoSysCall)
2690	}
2691
2692	if atomic.Load(&sched.sysmonwait) != 0 {
2693		systemstack(entersyscall_sysmon)
2694	}
2695
2696	if _g_.m.p.ptr().runSafePointFn != 0 {
2697		// runSafePointFn may stack split if run on this stack
2698		systemstack(runSafePointFn)
2699	}
2700
2701	_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
2702	_g_.sysblocktraced = true
2703	_g_.m.mcache = nil
2704	_g_.m.p.ptr().m = 0
2705	atomic.Store(&_g_.m.p.ptr().status, _Psyscall)
2706	if sched.gcwaiting != 0 {
2707		systemstack(entersyscall_gcwait)
2708	}
2709
2710	_g_.m.locks--
2711}
2712
2713func entersyscall_sysmon() {
2714	lock(&sched.lock)
2715	if atomic.Load(&sched.sysmonwait) != 0 {
2716		atomic.Store(&sched.sysmonwait, 0)
2717		notewakeup(&sched.sysmonnote)
2718	}
2719	unlock(&sched.lock)
2720}
2721
2722func entersyscall_gcwait() {
2723	_g_ := getg()
2724	_p_ := _g_.m.p.ptr()
2725
2726	lock(&sched.lock)
2727	if sched.stopwait > 0 && atomic.Cas(&_p_.status, _Psyscall, _Pgcstop) {
2728		if trace.enabled {
2729			traceGoSysBlock(_p_)
2730			traceProcStop(_p_)
2731		}
2732		_p_.syscalltick++
2733		if sched.stopwait--; sched.stopwait == 0 {
2734			notewakeup(&sched.stopnote)
2735		}
2736	}
2737	unlock(&sched.lock)
2738}
2739
2740// The same as reentersyscall(), but with a hint that the syscall is blocking.
2741//go:nosplit
2742func reentersyscallblock(pc, sp uintptr) {
2743	_g_ := getg()
2744
2745	_g_.m.locks++ // see comment in entersyscall
2746	_g_.throwsplit = true
2747	_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
2748	_g_.sysblocktraced = true
2749	_g_.m.p.ptr().syscalltick++
2750
2751	// Leave SP around for GC and traceback.
2752	_g_.syscallsp = sp
2753	_g_.syscallpc = pc
2754	casgstatus(_g_, _Grunning, _Gsyscall)
2755	systemstack(entersyscallblock_handoff)
2756
2757	_g_.m.locks--
2758}
2759
2760func entersyscallblock_handoff() {
2761	if trace.enabled {
2762		traceGoSysCall()
2763		traceGoSysBlock(getg().m.p.ptr())
2764	}
2765	handoffp(releasep())
2766}
2767
2768// The goroutine g exited its system call.
2769// Arrange for it to run on a cpu again.
2770// This is called only from the go syscall library, not
2771// from the low-level system calls used by the runtime.
2772//
2773// Write barriers are not allowed because our P may have been stolen.
2774//
2775//go:nosplit
2776//go:nowritebarrierrec
2777func exitsyscall(dummy int32) {
2778	_g_ := getg()
2779
2780	_g_.m.locks++ // see comment in entersyscall
2781
2782	_g_.waitsince = 0
2783	oldp := _g_.m.p.ptr()
2784	if exitsyscallfast() {
2785		if _g_.m.mcache == nil {
2786			systemstack(func() {
2787				throw("lost mcache")
2788			})
2789		}
2790		if trace.enabled {
2791			if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
2792				systemstack(traceGoStart)
2793			}
2794		}
2795		// There's a cpu for us, so we can run.
2796		_g_.m.p.ptr().syscalltick++
2797		// We need to cas the status and scan before resuming...
2798		casgstatus(_g_, _Gsyscall, _Grunning)
2799
2800		exitsyscallclear(_g_)
2801		_g_.m.locks--
2802		_g_.throwsplit = false
2803
2804		// Check preemption, since unlike gc we don't check on
2805		// every call.
2806		if getg().preempt {
2807			checkPreempt()
2808		}
2809
2810		return
2811	}
2812
2813	_g_.sysexitticks = 0
2814	if trace.enabled {
2815		// Wait till traceGoSysBlock event is emitted.
2816		// This ensures consistency of the trace (the goroutine is started after it is blocked).
2817		for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {
2818			osyield()
2819		}
2820		// We can't trace syscall exit right now because we don't have a P.
2821		// Tracing code can invoke write barriers that cannot run without a P.
2822		// So instead we remember the syscall exit time and emit the event
2823		// in execute when we have a P.
2824		_g_.sysexitticks = cputicks()
2825	}
2826
2827	_g_.m.locks--
2828
2829	// Call the scheduler.
2830	mcall(exitsyscall0)
2831
2832	if _g_.m.mcache == nil {
2833		systemstack(func() {
2834			throw("lost mcache")
2835		})
2836	}
2837
2838	// Scheduler returned, so we're allowed to run now.
2839	// Delete the syscallsp information that we left for
2840	// the garbage collector during the system call.
2841	// Must wait until now because until gosched returns
2842	// we don't know for sure that the garbage collector
2843	// is not running.
2844	exitsyscallclear(_g_)
2845
2846	_g_.m.p.ptr().syscalltick++
2847	_g_.throwsplit = false
2848}
2849
2850//go:nosplit
2851func exitsyscallfast() bool {
2852	_g_ := getg()
2853
2854	// Freezetheworld sets stopwait but does not retake P's.
2855	if sched.stopwait == freezeStopWait {
2856		_g_.m.mcache = nil
2857		_g_.m.p = 0
2858		return false
2859	}
2860
2861	// Try to re-acquire the last P.
2862	if _g_.m.p != 0 && _g_.m.p.ptr().status == _Psyscall && atomic.Cas(&_g_.m.p.ptr().status, _Psyscall, _Prunning) {
2863		// There's a cpu for us, so we can run.
2864		exitsyscallfast_reacquired()
2865		return true
2866	}
2867
2868	// Try to get any other idle P.
2869	oldp := _g_.m.p.ptr()
2870	_g_.m.mcache = nil
2871	_g_.m.p = 0
2872	if sched.pidle != 0 {
2873		var ok bool
2874		systemstack(func() {
2875			ok = exitsyscallfast_pidle()
2876			if ok && trace.enabled {
2877				if oldp != nil {
2878					// Wait till traceGoSysBlock event is emitted.
2879					// This ensures consistency of the trace (the goroutine is started after it is blocked).
2880					for oldp.syscalltick == _g_.m.syscalltick {
2881						osyield()
2882					}
2883				}
2884				traceGoSysExit(0)
2885			}
2886		})
2887		if ok {
2888			return true
2889		}
2890	}
2891	return false
2892}
2893
2894// exitsyscallfast_reacquired is the exitsyscall path on which this G
2895// has successfully reacquired the P it was running on before the
2896// syscall.
2897//
2898// This function is allowed to have write barriers because exitsyscall
2899// has acquired a P at this point.
2900//
2901//go:yeswritebarrierrec
2902//go:nosplit
2903func exitsyscallfast_reacquired() {
2904	_g_ := getg()
2905	_g_.m.mcache = _g_.m.p.ptr().mcache
2906	_g_.m.p.ptr().m.set(_g_.m)
2907	if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
2908		if trace.enabled {
2909			// The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed).
2910			// traceGoSysBlock for this syscall was already emitted,
2911			// but here we effectively retake the p from the new syscall running on the same p.
2912			systemstack(func() {
2913				// Denote blocking of the new syscall.
2914				traceGoSysBlock(_g_.m.p.ptr())
2915				// Denote completion of the current syscall.
2916				traceGoSysExit(0)
2917			})
2918		}
2919		_g_.m.p.ptr().syscalltick++
2920	}
2921}
2922
2923func exitsyscallfast_pidle() bool {
2924	lock(&sched.lock)
2925	_p_ := pidleget()
2926	if _p_ != nil && atomic.Load(&sched.sysmonwait) != 0 {
2927		atomic.Store(&sched.sysmonwait, 0)
2928		notewakeup(&sched.sysmonnote)
2929	}
2930	unlock(&sched.lock)
2931	if _p_ != nil {
2932		acquirep(_p_)
2933		return true
2934	}
2935	return false
2936}
2937
2938// exitsyscall slow path on g0.
2939// Failed to acquire P, enqueue gp as runnable.
2940//
2941//go:nowritebarrierrec
2942func exitsyscall0(gp *g) {
2943	_g_ := getg()
2944
2945	casgstatus(gp, _Gsyscall, _Grunnable)
2946	dropg()
2947	lock(&sched.lock)
2948	_p_ := pidleget()
2949	if _p_ == nil {
2950		globrunqput(gp)
2951	} else if atomic.Load(&sched.sysmonwait) != 0 {
2952		atomic.Store(&sched.sysmonwait, 0)
2953		notewakeup(&sched.sysmonnote)
2954	}
2955	unlock(&sched.lock)
2956	if _p_ != nil {
2957		acquirep(_p_)
2958		execute(gp, false) // Never returns.
2959	}
2960	if _g_.m.lockedg != 0 {
2961		// Wait until another thread schedules gp and so m again.
2962		stoplockedm()
2963		execute(gp, false) // Never returns.
2964	}
2965	stopm()
2966	schedule() // Never returns.
2967}
2968
2969// exitsyscallclear clears GC-related information that we only track
2970// during a syscall.
2971func exitsyscallclear(gp *g) {
2972	// Garbage collector isn't running (since we are), so okay to
2973	// clear syscallsp.
2974	gp.syscallsp = 0
2975
2976	gp.gcstack = 0
2977	gp.gcnextsp = 0
2978	memclrNoHeapPointers(unsafe.Pointer(&gp.gcregs), unsafe.Sizeof(gp.gcregs))
2979}
2980
2981// Code generated by cgo, and some library code, calls syscall.Entersyscall
2982// and syscall.Exitsyscall.
2983
2984//go:linkname syscall_entersyscall syscall.Entersyscall
2985//go:nosplit
2986func syscall_entersyscall() {
2987	entersyscall(0)
2988}
2989
2990//go:linkname syscall_exitsyscall syscall.Exitsyscall
2991//go:nosplit
2992func syscall_exitsyscall() {
2993	exitsyscall(0)
2994}
2995
2996func beforefork() {
2997	gp := getg().m.curg
2998
2999	// Block signals during a fork, so that the child does not run
3000	// a signal handler before exec if a signal is sent to the process
3001	// group. See issue #18600.
3002	gp.m.locks++
3003	msigsave(gp.m)
3004	sigblock()
3005}
3006
3007// Called from syscall package before fork.
3008//go:linkname syscall_runtime_BeforeFork syscall.runtime_BeforeFork
3009//go:nosplit
3010func syscall_runtime_BeforeFork() {
3011	systemstack(beforefork)
3012}
3013
3014func afterfork() {
3015	gp := getg().m.curg
3016
3017	msigrestore(gp.m.sigmask)
3018
3019	gp.m.locks--
3020}
3021
3022// Called from syscall package after fork in parent.
3023//go:linkname syscall_runtime_AfterFork syscall.runtime_AfterFork
3024//go:nosplit
3025func syscall_runtime_AfterFork() {
3026	systemstack(afterfork)
3027}
3028
3029// inForkedChild is true while manipulating signals in the child process.
3030// This is used to avoid calling libc functions in case we are using vfork.
3031var inForkedChild bool
3032
3033// Called from syscall package after fork in child.
3034// It resets non-sigignored signals to the default handler, and
3035// restores the signal mask in preparation for the exec.
3036//
3037// Because this might be called during a vfork, and therefore may be
3038// temporarily sharing address space with the parent process, this must
3039// not change any global variables or calling into C code that may do so.
3040//
3041//go:linkname syscall_runtime_AfterForkInChild syscall.runtime_AfterForkInChild
3042//go:nosplit
3043//go:nowritebarrierrec
3044func syscall_runtime_AfterForkInChild() {
3045	// It's OK to change the global variable inForkedChild here
3046	// because we are going to change it back. There is no race here,
3047	// because if we are sharing address space with the parent process,
3048	// then the parent process can not be running concurrently.
3049	inForkedChild = true
3050
3051	clearSignalHandlers()
3052
3053	// When we are the child we are the only thread running,
3054	// so we know that nothing else has changed gp.m.sigmask.
3055	msigrestore(getg().m.sigmask)
3056
3057	inForkedChild = false
3058}
3059
3060// Called from syscall package before Exec.
3061//go:linkname syscall_runtime_BeforeExec syscall.runtime_BeforeExec
3062func syscall_runtime_BeforeExec() {
3063	// Prevent thread creation during exec.
3064	execLock.lock()
3065}
3066
3067// Called from syscall package after Exec.
3068//go:linkname syscall_runtime_AfterExec syscall.runtime_AfterExec
3069func syscall_runtime_AfterExec() {
3070	execLock.unlock()
3071}
3072
3073// Create a new g running fn passing arg as the single argument.
3074// Put it on the queue of g's waiting to run.
3075// The compiler turns a go statement into a call to this.
3076//go:linkname newproc __go_go
3077func newproc(fn uintptr, arg unsafe.Pointer) *g {
3078	_g_ := getg()
3079
3080	if fn == 0 {
3081		_g_.m.throwing = -1 // do not dump full stacks
3082		throw("go of nil func value")
3083	}
3084	_g_.m.locks++ // disable preemption because it can be holding p in a local var
3085
3086	_p_ := _g_.m.p.ptr()
3087	newg := gfget(_p_)
3088	var (
3089		sp     unsafe.Pointer
3090		spsize uintptr
3091	)
3092	if newg == nil {
3093		newg = malg(true, false, &sp, &spsize)
3094		casgstatus(newg, _Gidle, _Gdead)
3095		allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
3096	} else {
3097		resetNewG(newg, &sp, &spsize)
3098	}
3099	newg.traceback = nil
3100
3101	if readgstatus(newg) != _Gdead {
3102		throw("newproc1: new g is not Gdead")
3103	}
3104
3105	// Store the C function pointer into entryfn, take the address
3106	// of entryfn, convert it to a Go function value, and store
3107	// that in entry.
3108	newg.entryfn = fn
3109	var entry func(unsafe.Pointer)
3110	*(*unsafe.Pointer)(unsafe.Pointer(&entry)) = unsafe.Pointer(&newg.entryfn)
3111	newg.entry = entry
3112
3113	newg.param = arg
3114	newg.gopc = getcallerpc()
3115	newg.startpc = fn
3116	if _g_.m.curg != nil {
3117		newg.labels = _g_.m.curg.labels
3118	}
3119	if isSystemGoroutine(newg) {
3120		atomic.Xadd(&sched.ngsys, +1)
3121	}
3122	newg.gcscanvalid = false
3123	casgstatus(newg, _Gdead, _Grunnable)
3124
3125	if _p_.goidcache == _p_.goidcacheend {
3126		// Sched.goidgen is the last allocated id,
3127		// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
3128		// At startup sched.goidgen=0, so main goroutine receives goid=1.
3129		_p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch)
3130		_p_.goidcache -= _GoidCacheBatch - 1
3131		_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
3132	}
3133	newg.goid = int64(_p_.goidcache)
3134	_p_.goidcache++
3135	if trace.enabled {
3136		traceGoCreate(newg, newg.startpc)
3137	}
3138
3139	makeGContext(newg, sp, spsize)
3140
3141	runqput(_p_, newg, true)
3142
3143	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && mainStarted {
3144		wakep()
3145	}
3146	_g_.m.locks--
3147	return newg
3148}
3149
3150// expectedSystemGoroutines counts the number of goroutines expected
3151// to mark themselves as system goroutines. After they mark themselves
3152// by calling setSystemGoroutine, this is decremented. NumGoroutines
3153// uses this to wait for all system goroutines to mark themselves
3154// before it counts them.
3155var expectedSystemGoroutines uint32
3156
3157// expectSystemGoroutine is called when starting a goroutine that will
3158// call setSystemGoroutine. It increments expectedSystemGoroutines.
3159func expectSystemGoroutine() {
3160	atomic.Xadd(&expectedSystemGoroutines, +1)
3161}
3162
3163// waitForSystemGoroutines waits for all currently expected system
3164// goroutines to register themselves.
3165func waitForSystemGoroutines() {
3166	for atomic.Load(&expectedSystemGoroutines) > 0 {
3167		Gosched()
3168		osyield()
3169	}
3170}
3171
3172// setSystemGoroutine marks this goroutine as a "system goroutine".
3173// In the gc toolchain this is done by comparing startpc to a list of
3174// saved special PCs. In gccgo that approach does not work as startpc
3175// is often a thunk that invokes the real function with arguments,
3176// so the thunk address never matches the saved special PCs. Instead,
3177// since there are only a limited number of "system goroutines",
3178// we force each one to mark itself as special.
3179func setSystemGoroutine() {
3180	getg().isSystemGoroutine = true
3181	atomic.Xadd(&sched.ngsys, +1)
3182	atomic.Xadd(&expectedSystemGoroutines, -1)
3183}
3184
3185// Put on gfree list.
3186// If local list is too long, transfer a batch to the global list.
3187func gfput(_p_ *p, gp *g) {
3188	if readgstatus(gp) != _Gdead {
3189		throw("gfput: bad status (not Gdead)")
3190	}
3191
3192	gp.schedlink.set(_p_.gfree)
3193	_p_.gfree = gp
3194	_p_.gfreecnt++
3195	if _p_.gfreecnt >= 64 {
3196		lock(&sched.gflock)
3197		for _p_.gfreecnt >= 32 {
3198			_p_.gfreecnt--
3199			gp = _p_.gfree
3200			_p_.gfree = gp.schedlink.ptr()
3201			gp.schedlink.set(sched.gfree)
3202			sched.gfree = gp
3203			sched.ngfree++
3204		}
3205		unlock(&sched.gflock)
3206	}
3207}
3208
3209// Get from gfree list.
3210// If local list is empty, grab a batch from global list.
3211func gfget(_p_ *p) *g {
3212retry:
3213	gp := _p_.gfree
3214	if gp == nil && sched.gfree != nil {
3215		lock(&sched.gflock)
3216		for _p_.gfreecnt < 32 {
3217			if sched.gfree != nil {
3218				gp = sched.gfree
3219				sched.gfree = gp.schedlink.ptr()
3220			} else {
3221				break
3222			}
3223			_p_.gfreecnt++
3224			sched.ngfree--
3225			gp.schedlink.set(_p_.gfree)
3226			_p_.gfree = gp
3227		}
3228		unlock(&sched.gflock)
3229		goto retry
3230	}
3231	if gp != nil {
3232		_p_.gfree = gp.schedlink.ptr()
3233		_p_.gfreecnt--
3234	}
3235	return gp
3236}
3237
3238// Purge all cached G's from gfree list to the global list.
3239func gfpurge(_p_ *p) {
3240	lock(&sched.gflock)
3241	for _p_.gfreecnt != 0 {
3242		_p_.gfreecnt--
3243		gp := _p_.gfree
3244		_p_.gfree = gp.schedlink.ptr()
3245		gp.schedlink.set(sched.gfree)
3246		sched.gfree = gp
3247		sched.ngfree++
3248	}
3249	unlock(&sched.gflock)
3250}
3251
3252// Breakpoint executes a breakpoint trap.
3253func Breakpoint() {
3254	breakpoint()
3255}
3256
3257// dolockOSThread is called by LockOSThread and lockOSThread below
3258// after they modify m.locked. Do not allow preemption during this call,
3259// or else the m might be different in this function than in the caller.
3260//go:nosplit
3261func dolockOSThread() {
3262	_g_ := getg()
3263	_g_.m.lockedg.set(_g_)
3264	_g_.lockedm.set(_g_.m)
3265}
3266
3267//go:nosplit
3268
3269// LockOSThread wires the calling goroutine to its current operating system thread.
3270// The calling goroutine will always execute in that thread,
3271// and no other goroutine will execute in it,
3272// until the calling goroutine has made as many calls to
3273// UnlockOSThread as to LockOSThread.
3274// If the calling goroutine exits without unlocking the thread,
3275// the thread will be terminated.
3276//
3277// A goroutine should call LockOSThread before calling OS services or
3278// non-Go library functions that depend on per-thread state.
3279func LockOSThread() {
3280	if atomic.Load(&newmHandoff.haveTemplateThread) == 0 && GOOS != "plan9" {
3281		// If we need to start a new thread from the locked
3282		// thread, we need the template thread. Start it now
3283		// while we're in a known-good state.
3284		startTemplateThread()
3285	}
3286	_g_ := getg()
3287	_g_.m.lockedExt++
3288	if _g_.m.lockedExt == 0 {
3289		_g_.m.lockedExt--
3290		panic("LockOSThread nesting overflow")
3291	}
3292	dolockOSThread()
3293}
3294
3295//go:nosplit
3296func lockOSThread() {
3297	getg().m.lockedInt++
3298	dolockOSThread()
3299}
3300
3301// dounlockOSThread is called by UnlockOSThread and unlockOSThread below
3302// after they update m->locked. Do not allow preemption during this call,
3303// or else the m might be in different in this function than in the caller.
3304//go:nosplit
3305func dounlockOSThread() {
3306	_g_ := getg()
3307	if _g_.m.lockedInt != 0 || _g_.m.lockedExt != 0 {
3308		return
3309	}
3310	_g_.m.lockedg = 0
3311	_g_.lockedm = 0
3312}
3313
3314//go:nosplit
3315
3316// UnlockOSThread undoes an earlier call to LockOSThread.
3317// If this drops the number of active LockOSThread calls on the
3318// calling goroutine to zero, it unwires the calling goroutine from
3319// its fixed operating system thread.
3320// If there are no active LockOSThread calls, this is a no-op.
3321//
3322// Before calling UnlockOSThread, the caller must ensure that the OS
3323// thread is suitable for running other goroutines. If the caller made
3324// any permanent changes to the state of the thread that would affect
3325// other goroutines, it should not call this function and thus leave
3326// the goroutine locked to the OS thread until the goroutine (and
3327// hence the thread) exits.
3328func UnlockOSThread() {
3329	_g_ := getg()
3330	if _g_.m.lockedExt == 0 {
3331		return
3332	}
3333	_g_.m.lockedExt--
3334	dounlockOSThread()
3335}
3336
3337//go:nosplit
3338func unlockOSThread() {
3339	_g_ := getg()
3340	if _g_.m.lockedInt == 0 {
3341		systemstack(badunlockosthread)
3342	}
3343	_g_.m.lockedInt--
3344	dounlockOSThread()
3345}
3346
3347func badunlockosthread() {
3348	throw("runtime: internal error: misuse of lockOSThread/unlockOSThread")
3349}
3350
3351func gcount() int32 {
3352	n := int32(allglen) - sched.ngfree - int32(atomic.Load(&sched.ngsys))
3353	for _, _p_ := range allp {
3354		n -= _p_.gfreecnt
3355	}
3356
3357	// All these variables can be changed concurrently, so the result can be inconsistent.
3358	// But at least the current goroutine is running.
3359	if n < 1 {
3360		n = 1
3361	}
3362	return n
3363}
3364
3365func mcount() int32 {
3366	return int32(sched.mnext - sched.nmfreed)
3367}
3368
3369var prof struct {
3370	signalLock uint32
3371	hz         int32
3372}
3373
3374func _System()                    { _System() }
3375func _ExternalCode()              { _ExternalCode() }
3376func _LostExternalCode()          { _LostExternalCode() }
3377func _GC()                        { _GC() }
3378func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() }
3379
3380// Counts SIGPROFs received while in atomic64 critical section, on mips{,le}
3381var lostAtomic64Count uint64
3382
3383var _SystemPC = funcPC(_System)
3384var _ExternalCodePC = funcPC(_ExternalCode)
3385var _LostExternalCodePC = funcPC(_LostExternalCode)
3386var _GCPC = funcPC(_GC)
3387var _LostSIGPROFDuringAtomic64PC = funcPC(_LostSIGPROFDuringAtomic64)
3388
3389// Called if we receive a SIGPROF signal.
3390// Called by the signal handler, may run during STW.
3391//go:nowritebarrierrec
3392func sigprof(pc uintptr, gp *g, mp *m) {
3393	if prof.hz == 0 {
3394		return
3395	}
3396
3397	// Profiling runs concurrently with GC, so it must not allocate.
3398	// Set a trap in case the code does allocate.
3399	// Note that on windows, one thread takes profiles of all the
3400	// other threads, so mp is usually not getg().m.
3401	// In fact mp may not even be stopped.
3402	// See golang.org/issue/17165.
3403	getg().m.mallocing++
3404
3405	traceback := true
3406
3407	// If SIGPROF arrived while already fetching runtime callers
3408	// we can have trouble on older systems because the unwind
3409	// library calls dl_iterate_phdr which was not reentrant in
3410	// the past. alreadyInCallers checks for that.
3411	if gp == nil || alreadyInCallers() {
3412		traceback = false
3413	}
3414
3415	var stk [maxCPUProfStack]uintptr
3416	n := 0
3417	if traceback {
3418		var stklocs [maxCPUProfStack]location
3419		n = callers(0, stklocs[:])
3420
3421		for i := 0; i < n; i++ {
3422			stk[i] = stklocs[i].pc
3423		}
3424	}
3425
3426	if n <= 0 {
3427		// Normal traceback is impossible or has failed.
3428		// Account it against abstract "System" or "GC".
3429		n = 2
3430		stk[0] = pc
3431		if mp.preemptoff != "" || mp.helpgc != 0 {
3432			stk[1] = _GCPC + sys.PCQuantum
3433		} else {
3434			stk[1] = _SystemPC + sys.PCQuantum
3435		}
3436	}
3437
3438	if prof.hz != 0 {
3439		if (GOARCH == "mips" || GOARCH == "mipsle") && lostAtomic64Count > 0 {
3440			cpuprof.addLostAtomic64(lostAtomic64Count)
3441			lostAtomic64Count = 0
3442		}
3443		cpuprof.add(gp, stk[:n])
3444	}
3445	getg().m.mallocing--
3446}
3447
3448// Use global arrays rather than using up lots of stack space in the
3449// signal handler. This is safe since while we are executing a SIGPROF
3450// signal other SIGPROF signals are blocked.
3451var nonprofGoStklocs [maxCPUProfStack]location
3452var nonprofGoStk [maxCPUProfStack]uintptr
3453
3454// sigprofNonGo is called if we receive a SIGPROF signal on a non-Go thread,
3455// and the signal handler collected a stack trace in sigprofCallers.
3456// When this is called, sigprofCallersUse will be non-zero.
3457// g is nil, and what we can do is very limited.
3458//go:nosplit
3459//go:nowritebarrierrec
3460func sigprofNonGo(pc uintptr) {
3461	if prof.hz != 0 {
3462		n := callers(0, nonprofGoStklocs[:])
3463
3464		for i := 0; i < n; i++ {
3465			nonprofGoStk[i] = nonprofGoStklocs[i].pc
3466		}
3467
3468		if n <= 0 {
3469			n = 2
3470			nonprofGoStk[0] = pc
3471			nonprofGoStk[1] = _ExternalCodePC + sys.PCQuantum
3472		}
3473
3474		cpuprof.addNonGo(nonprofGoStk[:n])
3475	}
3476}
3477
3478// sigprofNonGoPC is called when a profiling signal arrived on a
3479// non-Go thread and we have a single PC value, not a stack trace.
3480// g is nil, and what we can do is very limited.
3481//go:nosplit
3482//go:nowritebarrierrec
3483func sigprofNonGoPC(pc uintptr) {
3484	if prof.hz != 0 {
3485		stk := []uintptr{
3486			pc,
3487			_ExternalCodePC + sys.PCQuantum,
3488		}
3489		cpuprof.addNonGo(stk)
3490	}
3491}
3492
3493// setcpuprofilerate sets the CPU profiling rate to hz times per second.
3494// If hz <= 0, setcpuprofilerate turns off CPU profiling.
3495func setcpuprofilerate(hz int32) {
3496	// Force sane arguments.
3497	if hz < 0 {
3498		hz = 0
3499	}
3500
3501	// Disable preemption, otherwise we can be rescheduled to another thread
3502	// that has profiling enabled.
3503	_g_ := getg()
3504	_g_.m.locks++
3505
3506	// Stop profiler on this thread so that it is safe to lock prof.
3507	// if a profiling signal came in while we had prof locked,
3508	// it would deadlock.
3509	setThreadCPUProfiler(0)
3510
3511	for !atomic.Cas(&prof.signalLock, 0, 1) {
3512		osyield()
3513	}
3514	if prof.hz != hz {
3515		setProcessCPUProfiler(hz)
3516		prof.hz = hz
3517	}
3518	atomic.Store(&prof.signalLock, 0)
3519
3520	lock(&sched.lock)
3521	sched.profilehz = hz
3522	unlock(&sched.lock)
3523
3524	if hz != 0 {
3525		setThreadCPUProfiler(hz)
3526	}
3527
3528	_g_.m.locks--
3529}
3530
3531// Change number of processors. The world is stopped, sched is locked.
3532// gcworkbufs are not being modified by either the GC or
3533// the write barrier code.
3534// Returns list of Ps with local work, they need to be scheduled by the caller.
3535func procresize(nprocs int32) *p {
3536	old := gomaxprocs
3537	if old < 0 || nprocs <= 0 {
3538		throw("procresize: invalid arg")
3539	}
3540	if trace.enabled {
3541		traceGomaxprocs(nprocs)
3542	}
3543
3544	// update statistics
3545	now := nanotime()
3546	if sched.procresizetime != 0 {
3547		sched.totaltime += int64(old) * (now - sched.procresizetime)
3548	}
3549	sched.procresizetime = now
3550
3551	// Grow allp if necessary.
3552	if nprocs > int32(len(allp)) {
3553		// Synchronize with retake, which could be running
3554		// concurrently since it doesn't run on a P.
3555		lock(&allpLock)
3556		if nprocs <= int32(cap(allp)) {
3557			allp = allp[:nprocs]
3558		} else {
3559			nallp := make([]*p, nprocs)
3560			// Copy everything up to allp's cap so we
3561			// never lose old allocated Ps.
3562			copy(nallp, allp[:cap(allp)])
3563			allp = nallp
3564		}
3565		unlock(&allpLock)
3566	}
3567
3568	// initialize new P's
3569	for i := int32(0); i < nprocs; i++ {
3570		pp := allp[i]
3571		if pp == nil {
3572			pp = new(p)
3573			pp.id = i
3574			pp.status = _Pgcstop
3575			pp.sudogcache = pp.sudogbuf[:0]
3576			pp.deferpool = pp.deferpoolbuf[:0]
3577			pp.wbBuf.reset()
3578			atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
3579		}
3580		if pp.mcache == nil {
3581			if old == 0 && i == 0 {
3582				if getg().m.mcache == nil {
3583					throw("missing mcache?")
3584				}
3585				pp.mcache = getg().m.mcache // bootstrap
3586			} else {
3587				pp.mcache = allocmcache()
3588			}
3589		}
3590	}
3591
3592	// free unused P's
3593	for i := nprocs; i < old; i++ {
3594		p := allp[i]
3595		if trace.enabled && p == getg().m.p.ptr() {
3596			// moving to p[0], pretend that we were descheduled
3597			// and then scheduled again to keep the trace sane.
3598			traceGoSched()
3599			traceProcStop(p)
3600		}
3601		// move all runnable goroutines to the global queue
3602		for p.runqhead != p.runqtail {
3603			// pop from tail of local queue
3604			p.runqtail--
3605			gp := p.runq[p.runqtail%uint32(len(p.runq))].ptr()
3606			// push onto head of global queue
3607			globrunqputhead(gp)
3608		}
3609		if p.runnext != 0 {
3610			globrunqputhead(p.runnext.ptr())
3611			p.runnext = 0
3612		}
3613		// if there's a background worker, make it runnable and put
3614		// it on the global queue so it can clean itself up
3615		if gp := p.gcBgMarkWorker.ptr(); gp != nil {
3616			casgstatus(gp, _Gwaiting, _Grunnable)
3617			if trace.enabled {
3618				traceGoUnpark(gp, 0)
3619			}
3620			globrunqput(gp)
3621			// This assignment doesn't race because the
3622			// world is stopped.
3623			p.gcBgMarkWorker.set(nil)
3624		}
3625		// Flush p's write barrier buffer.
3626		if gcphase != _GCoff {
3627			wbBufFlush1(p)
3628			p.gcw.dispose()
3629		}
3630		for i := range p.sudogbuf {
3631			p.sudogbuf[i] = nil
3632		}
3633		p.sudogcache = p.sudogbuf[:0]
3634		for i := range p.deferpoolbuf {
3635			p.deferpoolbuf[i] = nil
3636		}
3637		p.deferpool = p.deferpoolbuf[:0]
3638		freemcache(p.mcache)
3639		p.mcache = nil
3640		gfpurge(p)
3641		traceProcFree(p)
3642		p.gcAssistTime = 0
3643		p.status = _Pdead
3644		// can't free P itself because it can be referenced by an M in syscall
3645	}
3646
3647	// Trim allp.
3648	if int32(len(allp)) != nprocs {
3649		lock(&allpLock)
3650		allp = allp[:nprocs]
3651		unlock(&allpLock)
3652	}
3653
3654	_g_ := getg()
3655	if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
3656		// continue to use the current P
3657		_g_.m.p.ptr().status = _Prunning
3658	} else {
3659		// release the current P and acquire allp[0]
3660		if _g_.m.p != 0 {
3661			_g_.m.p.ptr().m = 0
3662		}
3663		_g_.m.p = 0
3664		_g_.m.mcache = nil
3665		p := allp[0]
3666		p.m = 0
3667		p.status = _Pidle
3668		acquirep(p)
3669		if trace.enabled {
3670			traceGoStart()
3671		}
3672	}
3673	var runnablePs *p
3674	for i := nprocs - 1; i >= 0; i-- {
3675		p := allp[i]
3676		if _g_.m.p.ptr() == p {
3677			continue
3678		}
3679		p.status = _Pidle
3680		if runqempty(p) {
3681			pidleput(p)
3682		} else {
3683			p.m.set(mget())
3684			p.link.set(runnablePs)
3685			runnablePs = p
3686		}
3687	}
3688	stealOrder.reset(uint32(nprocs))
3689	var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
3690	atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs))
3691	return runnablePs
3692}
3693
3694// Associate p and the current m.
3695//
3696// This function is allowed to have write barriers even if the caller
3697// isn't because it immediately acquires _p_.
3698//
3699//go:yeswritebarrierrec
3700func acquirep(_p_ *p) {
3701	// Do the part that isn't allowed to have write barriers.
3702	acquirep1(_p_)
3703
3704	// have p; write barriers now allowed
3705	_g_ := getg()
3706	_g_.m.mcache = _p_.mcache
3707
3708	if trace.enabled {
3709		traceProcStart()
3710	}
3711}
3712
3713// acquirep1 is the first step of acquirep, which actually acquires
3714// _p_. This is broken out so we can disallow write barriers for this
3715// part, since we don't yet have a P.
3716//
3717//go:nowritebarrierrec
3718func acquirep1(_p_ *p) {
3719	_g_ := getg()
3720
3721	if _g_.m.p != 0 || _g_.m.mcache != nil {
3722		throw("acquirep: already in go")
3723	}
3724	if _p_.m != 0 || _p_.status != _Pidle {
3725		id := int64(0)
3726		if _p_.m != 0 {
3727			id = _p_.m.ptr().id
3728		}
3729		print("acquirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
3730		throw("acquirep: invalid p state")
3731	}
3732	_g_.m.p.set(_p_)
3733	_p_.m.set(_g_.m)
3734	_p_.status = _Prunning
3735}
3736
3737// Disassociate p and the current m.
3738func releasep() *p {
3739	_g_ := getg()
3740
3741	if _g_.m.p == 0 || _g_.m.mcache == nil {
3742		throw("releasep: invalid arg")
3743	}
3744	_p_ := _g_.m.p.ptr()
3745	if _p_.m.ptr() != _g_.m || _p_.mcache != _g_.m.mcache || _p_.status != _Prunning {
3746		print("releasep: m=", _g_.m, " m->p=", _g_.m.p.ptr(), " p->m=", _p_.m, " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n")
3747		throw("releasep: invalid p state")
3748	}
3749	if trace.enabled {
3750		traceProcStop(_g_.m.p.ptr())
3751	}
3752	_g_.m.p = 0
3753	_g_.m.mcache = nil
3754	_p_.m = 0
3755	_p_.status = _Pidle
3756	return _p_
3757}
3758
3759func incidlelocked(v int32) {
3760	lock(&sched.lock)
3761	sched.nmidlelocked += v
3762	if v > 0 {
3763		checkdead()
3764	}
3765	unlock(&sched.lock)
3766}
3767
3768// Check for deadlock situation.
3769// The check is based on number of running M's, if 0 -> deadlock.
3770// sched.lock must be held.
3771func checkdead() {
3772	// For -buildmode=c-shared or -buildmode=c-archive it's OK if
3773	// there are no running goroutines. The calling program is
3774	// assumed to be running.
3775	if islibrary || isarchive {
3776		return
3777	}
3778
3779	// If we are dying because of a signal caught on an already idle thread,
3780	// freezetheworld will cause all running threads to block.
3781	// And runtime will essentially enter into deadlock state,
3782	// except that there is a thread that will call exit soon.
3783	if panicking > 0 {
3784		return
3785	}
3786
3787	run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys
3788	if run > 0 {
3789		return
3790	}
3791	if run < 0 {
3792		print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n")
3793		throw("checkdead: inconsistent counts")
3794	}
3795
3796	grunning := 0
3797	lock(&allglock)
3798	for i := 0; i < len(allgs); i++ {
3799		gp := allgs[i]
3800		if isSystemGoroutine(gp) {
3801			continue
3802		}
3803		s := readgstatus(gp)
3804		switch s &^ _Gscan {
3805		case _Gwaiting:
3806			grunning++
3807		case _Grunnable,
3808			_Grunning,
3809			_Gsyscall:
3810			unlock(&allglock)
3811			print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n")
3812			throw("checkdead: runnable g")
3813		}
3814	}
3815	unlock(&allglock)
3816	if grunning == 0 { // possible if main goroutine calls runtime·Goexit()
3817		throw("no goroutines (main called runtime.Goexit) - deadlock!")
3818	}
3819
3820	// Maybe jump time forward for playground.
3821	gp := timejump()
3822	if gp != nil {
3823		casgstatus(gp, _Gwaiting, _Grunnable)
3824		globrunqput(gp)
3825		_p_ := pidleget()
3826		if _p_ == nil {
3827			throw("checkdead: no p for timer")
3828		}
3829		mp := mget()
3830		if mp == nil {
3831			// There should always be a free M since
3832			// nothing is running.
3833			throw("checkdead: no m for timer")
3834		}
3835		mp.nextp.set(_p_)
3836		notewakeup(&mp.park)
3837		return
3838	}
3839
3840	getg().m.throwing = -1 // do not dump full stacks
3841	throw("all goroutines are asleep - deadlock!")
3842}
3843
3844// forcegcperiod is the maximum time in nanoseconds between garbage
3845// collections. If we go this long without a garbage collection, one
3846// is forced to run.
3847//
3848// This is a variable for testing purposes. It normally doesn't change.
3849var forcegcperiod int64 = 2 * 60 * 1e9
3850
3851// Always runs without a P, so write barriers are not allowed.
3852//
3853//go:nowritebarrierrec
3854func sysmon() {
3855	lock(&sched.lock)
3856	sched.nmsys++
3857	checkdead()
3858	unlock(&sched.lock)
3859
3860	// If a heap span goes unused for 5 minutes after a garbage collection,
3861	// we hand it back to the operating system.
3862	scavengelimit := int64(5 * 60 * 1e9)
3863
3864	if debug.scavenge > 0 {
3865		// Scavenge-a-lot for testing.
3866		forcegcperiod = 10 * 1e6
3867		scavengelimit = 20 * 1e6
3868	}
3869
3870	lastscavenge := nanotime()
3871	nscavenge := 0
3872
3873	lasttrace := int64(0)
3874	idle := 0 // how many cycles in succession we had not wokeup somebody
3875	delay := uint32(0)
3876	for {
3877		if idle == 0 { // start with 20us sleep...
3878			delay = 20
3879		} else if idle > 50 { // start doubling the sleep after 1ms...
3880			delay *= 2
3881		}
3882		if delay > 10*1000 { // up to 10ms
3883			delay = 10 * 1000
3884		}
3885		usleep(delay)
3886		if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) {
3887			lock(&sched.lock)
3888			if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) {
3889				atomic.Store(&sched.sysmonwait, 1)
3890				unlock(&sched.lock)
3891				// Make wake-up period small enough
3892				// for the sampling to be correct.
3893				maxsleep := forcegcperiod / 2
3894				if scavengelimit < forcegcperiod {
3895					maxsleep = scavengelimit / 2
3896				}
3897				shouldRelax := true
3898				if osRelaxMinNS > 0 {
3899					next := timeSleepUntil()
3900					now := nanotime()
3901					if next-now < osRelaxMinNS {
3902						shouldRelax = false
3903					}
3904				}
3905				if shouldRelax {
3906					osRelax(true)
3907				}
3908				notetsleep(&sched.sysmonnote, maxsleep)
3909				if shouldRelax {
3910					osRelax(false)
3911				}
3912				lock(&sched.lock)
3913				atomic.Store(&sched.sysmonwait, 0)
3914				noteclear(&sched.sysmonnote)
3915				idle = 0
3916				delay = 20
3917			}
3918			unlock(&sched.lock)
3919		}
3920		// trigger libc interceptors if needed
3921		if *cgo_yield != nil {
3922			asmcgocall(*cgo_yield, nil)
3923		}
3924		// poll network if not polled for more than 10ms
3925		lastpoll := int64(atomic.Load64(&sched.lastpoll))
3926		now := nanotime()
3927		if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
3928			atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
3929			gp := netpoll(false) // non-blocking - returns list of goroutines
3930			if gp != nil {
3931				// Need to decrement number of idle locked M's
3932				// (pretending that one more is running) before injectglist.
3933				// Otherwise it can lead to the following situation:
3934				// injectglist grabs all P's but before it starts M's to run the P's,
3935				// another M returns from syscall, finishes running its G,
3936				// observes that there is no work to do and no other running M's
3937				// and reports deadlock.
3938				incidlelocked(-1)
3939				injectglist(gp)
3940				incidlelocked(1)
3941			}
3942		}
3943		// retake P's blocked in syscalls
3944		// and preempt long running G's
3945		if retake(now) != 0 {
3946			idle = 0
3947		} else {
3948			idle++
3949		}
3950		// check if we need to force a GC
3951		if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && atomic.Load(&forcegc.idle) != 0 {
3952			lock(&forcegc.lock)
3953			forcegc.idle = 0
3954			forcegc.g.schedlink = 0
3955			injectglist(forcegc.g)
3956			unlock(&forcegc.lock)
3957		}
3958		// scavenge heap once in a while
3959		if lastscavenge+scavengelimit/2 < now {
3960			mheap_.scavenge(int32(nscavenge), uint64(now), uint64(scavengelimit))
3961			lastscavenge = now
3962			nscavenge++
3963		}
3964		if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace)*1000000 <= now {
3965			lasttrace = now
3966			schedtrace(debug.scheddetail > 0)
3967		}
3968	}
3969}
3970
3971type sysmontick struct {
3972	schedtick   uint32
3973	schedwhen   int64
3974	syscalltick uint32
3975	syscallwhen int64
3976}
3977
3978// forcePreemptNS is the time slice given to a G before it is
3979// preempted.
3980const forcePreemptNS = 10 * 1000 * 1000 // 10ms
3981
3982func retake(now int64) uint32 {
3983	n := 0
3984	// Prevent allp slice changes. This lock will be completely
3985	// uncontended unless we're already stopping the world.
3986	lock(&allpLock)
3987	// We can't use a range loop over allp because we may
3988	// temporarily drop the allpLock. Hence, we need to re-fetch
3989	// allp each time around the loop.
3990	for i := 0; i < len(allp); i++ {
3991		_p_ := allp[i]
3992		if _p_ == nil {
3993			// This can happen if procresize has grown
3994			// allp but not yet created new Ps.
3995			continue
3996		}
3997		pd := &_p_.sysmontick
3998		s := _p_.status
3999		if s == _Psyscall {
4000			// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
4001			t := int64(_p_.syscalltick)
4002			if int64(pd.syscalltick) != t {
4003				pd.syscalltick = uint32(t)
4004				pd.syscallwhen = now
4005				continue
4006			}
4007			// On the one hand we don't want to retake Ps if there is no other work to do,
4008			// but on the other hand we want to retake them eventually
4009			// because they can prevent the sysmon thread from deep sleep.
4010			if runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {
4011				continue
4012			}
4013			// Drop allpLock so we can take sched.lock.
4014			unlock(&allpLock)
4015			// Need to decrement number of idle locked M's
4016			// (pretending that one more is running) before the CAS.
4017			// Otherwise the M from which we retake can exit the syscall,
4018			// increment nmidle and report deadlock.
4019			incidlelocked(-1)
4020			if atomic.Cas(&_p_.status, s, _Pidle) {
4021				if trace.enabled {
4022					traceGoSysBlock(_p_)
4023					traceProcStop(_p_)
4024				}
4025				n++
4026				_p_.syscalltick++
4027				handoffp(_p_)
4028			}
4029			incidlelocked(1)
4030			lock(&allpLock)
4031		} else if s == _Prunning {
4032			// Preempt G if it's running for too long.
4033			t := int64(_p_.schedtick)
4034			if int64(pd.schedtick) != t {
4035				pd.schedtick = uint32(t)
4036				pd.schedwhen = now
4037				continue
4038			}
4039			if pd.schedwhen+forcePreemptNS > now {
4040				continue
4041			}
4042			preemptone(_p_)
4043		}
4044	}
4045	unlock(&allpLock)
4046	return uint32(n)
4047}
4048
4049// Tell all goroutines that they have been preempted and they should stop.
4050// This function is purely best-effort. It can fail to inform a goroutine if a
4051// processor just started running it.
4052// No locks need to be held.
4053// Returns true if preemption request was issued to at least one goroutine.
4054func preemptall() bool {
4055	res := false
4056	for _, _p_ := range allp {
4057		if _p_.status != _Prunning {
4058			continue
4059		}
4060		if preemptone(_p_) {
4061			res = true
4062		}
4063	}
4064	return res
4065}
4066
4067// Tell the goroutine running on processor P to stop.
4068// This function is purely best-effort. It can incorrectly fail to inform the
4069// goroutine. It can send inform the wrong goroutine. Even if it informs the
4070// correct goroutine, that goroutine might ignore the request if it is
4071// simultaneously executing newstack.
4072// No lock needs to be held.
4073// Returns true if preemption request was issued.
4074// The actual preemption will happen at some point in the future
4075// and will be indicated by the gp->status no longer being
4076// Grunning
4077func preemptone(_p_ *p) bool {
4078	mp := _p_.m.ptr()
4079	if mp == nil || mp == getg().m {
4080		return false
4081	}
4082	gp := mp.curg
4083	if gp == nil || gp == mp.g0 {
4084		return false
4085	}
4086
4087	gp.preempt = true
4088
4089	// At this point the gc implementation sets gp.stackguard0 to
4090	// a value that causes the goroutine to suspend itself.
4091	// gccgo has no support for this, and it's hard to support.
4092	// The split stack code reads a value from its TCB.
4093	// We have no way to set a value in the TCB of a different thread.
4094	// And, of course, not all systems support split stack anyhow.
4095	// Checking the field in the g is expensive, since it requires
4096	// loading the g from TLS.  The best mechanism is likely to be
4097	// setting a global variable and figuring out a way to efficiently
4098	// check that global variable.
4099	//
4100	// For now we check gp.preempt in schedule, mallocgc, selectgo,
4101	// and a few other places, which is at least better than doing
4102	// nothing at all.
4103
4104	return true
4105}
4106
4107var starttime int64
4108
4109func schedtrace(detailed bool) {
4110	now := nanotime()
4111	if starttime == 0 {
4112		starttime = now
4113	}
4114
4115	lock(&sched.lock)
4116	print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", mcount(), " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
4117	if detailed {
4118		print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n")
4119	}
4120	// We must be careful while reading data from P's, M's and G's.
4121	// Even if we hold schedlock, most data can be changed concurrently.
4122	// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
4123	for i, _p_ := range allp {
4124		mp := _p_.m.ptr()
4125		h := atomic.Load(&_p_.runqhead)
4126		t := atomic.Load(&_p_.runqtail)
4127		if detailed {
4128			id := int64(-1)
4129			if mp != nil {
4130				id = mp.id
4131			}
4132			print("  P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gfreecnt, "\n")
4133		} else {
4134			// In non-detailed mode format lengths of per-P run queues as:
4135			// [len1 len2 len3 len4]
4136			print(" ")
4137			if i == 0 {
4138				print("[")
4139			}
4140			print(t - h)
4141			if i == len(allp)-1 {
4142				print("]\n")
4143			}
4144		}
4145	}
4146
4147	if !detailed {
4148		unlock(&sched.lock)
4149		return
4150	}
4151
4152	for mp := allm; mp != nil; mp = mp.alllink {
4153		_p_ := mp.p.ptr()
4154		gp := mp.curg
4155		lockedg := mp.lockedg.ptr()
4156		id1 := int32(-1)
4157		if _p_ != nil {
4158			id1 = _p_.id
4159		}
4160		id2 := int64(-1)
4161		if gp != nil {
4162			id2 = gp.goid
4163		}
4164		id3 := int64(-1)
4165		if lockedg != nil {
4166			id3 = lockedg.goid
4167		}
4168		print("  M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " helpgc=", mp.helpgc, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=", id3, "\n")
4169	}
4170
4171	lock(&allglock)
4172	for gi := 0; gi < len(allgs); gi++ {
4173		gp := allgs[gi]
4174		mp := gp.m
4175		lockedm := gp.lockedm.ptr()
4176		id1 := int64(-1)
4177		if mp != nil {
4178			id1 = mp.id
4179		}
4180		id2 := int64(-1)
4181		if lockedm != nil {
4182			id2 = lockedm.id
4183		}
4184		print("  G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason, ") m=", id1, " lockedm=", id2, "\n")
4185	}
4186	unlock(&allglock)
4187	unlock(&sched.lock)
4188}
4189
4190// Put mp on midle list.
4191// Sched must be locked.
4192// May run during STW, so write barriers are not allowed.
4193//go:nowritebarrierrec
4194func mput(mp *m) {
4195	mp.schedlink = sched.midle
4196	sched.midle.set(mp)
4197	sched.nmidle++
4198	checkdead()
4199}
4200
4201// Try to get an m from midle list.
4202// Sched must be locked.
4203// May run during STW, so write barriers are not allowed.
4204//go:nowritebarrierrec
4205func mget() *m {
4206	mp := sched.midle.ptr()
4207	if mp != nil {
4208		sched.midle = mp.schedlink
4209		sched.nmidle--
4210	}
4211	return mp
4212}
4213
4214// Put gp on the global runnable queue.
4215// Sched must be locked.
4216// May run during STW, so write barriers are not allowed.
4217//go:nowritebarrierrec
4218func globrunqput(gp *g) {
4219	gp.schedlink = 0
4220	if sched.runqtail != 0 {
4221		sched.runqtail.ptr().schedlink.set(gp)
4222	} else {
4223		sched.runqhead.set(gp)
4224	}
4225	sched.runqtail.set(gp)
4226	sched.runqsize++
4227}
4228
4229// Put gp at the head of the global runnable queue.
4230// Sched must be locked.
4231// May run during STW, so write barriers are not allowed.
4232//go:nowritebarrierrec
4233func globrunqputhead(gp *g) {
4234	gp.schedlink = sched.runqhead
4235	sched.runqhead.set(gp)
4236	if sched.runqtail == 0 {
4237		sched.runqtail.set(gp)
4238	}
4239	sched.runqsize++
4240}
4241
4242// Put a batch of runnable goroutines on the global runnable queue.
4243// Sched must be locked.
4244func globrunqputbatch(ghead *g, gtail *g, n int32) {
4245	gtail.schedlink = 0
4246	if sched.runqtail != 0 {
4247		sched.runqtail.ptr().schedlink.set(ghead)
4248	} else {
4249		sched.runqhead.set(ghead)
4250	}
4251	sched.runqtail.set(gtail)
4252	sched.runqsize += n
4253}
4254
4255// Try get a batch of G's from the global runnable queue.
4256// Sched must be locked.
4257func globrunqget(_p_ *p, max int32) *g {
4258	if sched.runqsize == 0 {
4259		return nil
4260	}
4261
4262	n := sched.runqsize/gomaxprocs + 1
4263	if n > sched.runqsize {
4264		n = sched.runqsize
4265	}
4266	if max > 0 && n > max {
4267		n = max
4268	}
4269	if n > int32(len(_p_.runq))/2 {
4270		n = int32(len(_p_.runq)) / 2
4271	}
4272
4273	sched.runqsize -= n
4274	if sched.runqsize == 0 {
4275		sched.runqtail = 0
4276	}
4277
4278	gp := sched.runqhead.ptr()
4279	sched.runqhead = gp.schedlink
4280	n--
4281	for ; n > 0; n-- {
4282		gp1 := sched.runqhead.ptr()
4283		sched.runqhead = gp1.schedlink
4284		runqput(_p_, gp1, false)
4285	}
4286	return gp
4287}
4288
4289// Put p to on _Pidle list.
4290// Sched must be locked.
4291// May run during STW, so write barriers are not allowed.
4292//go:nowritebarrierrec
4293func pidleput(_p_ *p) {
4294	if !runqempty(_p_) {
4295		throw("pidleput: P has non-empty run queue")
4296	}
4297	_p_.link = sched.pidle
4298	sched.pidle.set(_p_)
4299	atomic.Xadd(&sched.npidle, 1) // TODO: fast atomic
4300}
4301
4302// Try get a p from _Pidle list.
4303// Sched must be locked.
4304// May run during STW, so write barriers are not allowed.
4305//go:nowritebarrierrec
4306func pidleget() *p {
4307	_p_ := sched.pidle.ptr()
4308	if _p_ != nil {
4309		sched.pidle = _p_.link
4310		atomic.Xadd(&sched.npidle, -1) // TODO: fast atomic
4311	}
4312	return _p_
4313}
4314
4315// runqempty returns true if _p_ has no Gs on its local run queue.
4316// It never returns true spuriously.
4317func runqempty(_p_ *p) bool {
4318	// Defend against a race where 1) _p_ has G1 in runqnext but runqhead == runqtail,
4319	// 2) runqput on _p_ kicks G1 to the runq, 3) runqget on _p_ empties runqnext.
4320	// Simply observing that runqhead == runqtail and then observing that runqnext == nil
4321	// does not mean the queue is empty.
4322	for {
4323		head := atomic.Load(&_p_.runqhead)
4324		tail := atomic.Load(&_p_.runqtail)
4325		runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&_p_.runnext)))
4326		if tail == atomic.Load(&_p_.runqtail) {
4327			return head == tail && runnext == 0
4328		}
4329	}
4330}
4331
4332// To shake out latent assumptions about scheduling order,
4333// we introduce some randomness into scheduling decisions
4334// when running with the race detector.
4335// The need for this was made obvious by changing the
4336// (deterministic) scheduling order in Go 1.5 and breaking
4337// many poorly-written tests.
4338// With the randomness here, as long as the tests pass
4339// consistently with -race, they shouldn't have latent scheduling
4340// assumptions.
4341const randomizeScheduler = raceenabled
4342
4343// runqput tries to put g on the local runnable queue.
4344// If next if false, runqput adds g to the tail of the runnable queue.
4345// If next is true, runqput puts g in the _p_.runnext slot.
4346// If the run queue is full, runnext puts g on the global queue.
4347// Executed only by the owner P.
4348func runqput(_p_ *p, gp *g, next bool) {
4349	if randomizeScheduler && next && fastrand()%2 == 0 {
4350		next = false
4351	}
4352
4353	if next {
4354	retryNext:
4355		oldnext := _p_.runnext
4356		if !_p_.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) {
4357			goto retryNext
4358		}
4359		if oldnext == 0 {
4360			return
4361		}
4362		// Kick the old runnext out to the regular run queue.
4363		gp = oldnext.ptr()
4364	}
4365
4366retry:
4367	h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with consumers
4368	t := _p_.runqtail
4369	if t-h < uint32(len(_p_.runq)) {
4370		_p_.runq[t%uint32(len(_p_.runq))].set(gp)
4371		atomic.Store(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
4372		return
4373	}
4374	if runqputslow(_p_, gp, h, t) {
4375		return
4376	}
4377	// the queue is not full, now the put above must succeed
4378	goto retry
4379}
4380
4381// Put g and a batch of work from local runnable queue on global queue.
4382// Executed only by the owner P.
4383func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
4384	var batch [len(_p_.runq)/2 + 1]*g
4385
4386	// First, grab a batch from local queue.
4387	n := t - h
4388	n = n / 2
4389	if n != uint32(len(_p_.runq)/2) {
4390		throw("runqputslow: queue is not full")
4391	}
4392	for i := uint32(0); i < n; i++ {
4393		batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))].ptr()
4394	}
4395	if !atomic.Cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
4396		return false
4397	}
4398	batch[n] = gp
4399
4400	if randomizeScheduler {
4401		for i := uint32(1); i <= n; i++ {
4402			j := fastrandn(i + 1)
4403			batch[i], batch[j] = batch[j], batch[i]
4404		}
4405	}
4406
4407	// Link the goroutines.
4408	for i := uint32(0); i < n; i++ {
4409		batch[i].schedlink.set(batch[i+1])
4410	}
4411
4412	// Now put the batch on global queue.
4413	lock(&sched.lock)
4414	globrunqputbatch(batch[0], batch[n], int32(n+1))
4415	unlock(&sched.lock)
4416	return true
4417}
4418
4419// Get g from local runnable queue.
4420// If inheritTime is true, gp should inherit the remaining time in the
4421// current time slice. Otherwise, it should start a new time slice.
4422// Executed only by the owner P.
4423func runqget(_p_ *p) (gp *g, inheritTime bool) {
4424	// If there's a runnext, it's the next G to run.
4425	for {
4426		next := _p_.runnext
4427		if next == 0 {
4428			break
4429		}
4430		if _p_.runnext.cas(next, 0) {
4431			return next.ptr(), true
4432		}
4433	}
4434
4435	for {
4436		h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with other consumers
4437		t := _p_.runqtail
4438		if t == h {
4439			return nil, false
4440		}
4441		gp := _p_.runq[h%uint32(len(_p_.runq))].ptr()
4442		if atomic.Cas(&_p_.runqhead, h, h+1) { // cas-release, commits consume
4443			return gp, false
4444		}
4445	}
4446}
4447
4448// Grabs a batch of goroutines from _p_'s runnable queue into batch.
4449// Batch is a ring buffer starting at batchHead.
4450// Returns number of grabbed goroutines.
4451// Can be executed by any P.
4452func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 {
4453	for {
4454		h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with other consumers
4455		t := atomic.Load(&_p_.runqtail) // load-acquire, synchronize with the producer
4456		n := t - h
4457		n = n - n/2
4458		if n == 0 {
4459			if stealRunNextG {
4460				// Try to steal from _p_.runnext.
4461				if next := _p_.runnext; next != 0 {
4462					if _p_.status == _Prunning {
4463						// Sleep to ensure that _p_ isn't about to run the g
4464						// we are about to steal.
4465						// The important use case here is when the g running
4466						// on _p_ ready()s another g and then almost
4467						// immediately blocks. Instead of stealing runnext
4468						// in this window, back off to give _p_ a chance to
4469						// schedule runnext. This will avoid thrashing gs
4470						// between different Ps.
4471						// A sync chan send/recv takes ~50ns as of time of
4472						// writing, so 3us gives ~50x overshoot.
4473						if GOOS != "windows" {
4474							usleep(3)
4475						} else {
4476							// On windows system timer granularity is
4477							// 1-15ms, which is way too much for this
4478							// optimization. So just yield.
4479							osyield()
4480						}
4481					}
4482					if !_p_.runnext.cas(next, 0) {
4483						continue
4484					}
4485					batch[batchHead%uint32(len(batch))] = next
4486					return 1
4487				}
4488			}
4489			return 0
4490		}
4491		if n > uint32(len(_p_.runq)/2) { // read inconsistent h and t
4492			continue
4493		}
4494		for i := uint32(0); i < n; i++ {
4495			g := _p_.runq[(h+i)%uint32(len(_p_.runq))]
4496			batch[(batchHead+i)%uint32(len(batch))] = g
4497		}
4498		if atomic.Cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
4499			return n
4500		}
4501	}
4502}
4503
4504// Steal half of elements from local runnable queue of p2
4505// and put onto local runnable queue of p.
4506// Returns one of the stolen elements (or nil if failed).
4507func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
4508	t := _p_.runqtail
4509	n := runqgrab(p2, &_p_.runq, t, stealRunNextG)
4510	if n == 0 {
4511		return nil
4512	}
4513	n--
4514	gp := _p_.runq[(t+n)%uint32(len(_p_.runq))].ptr()
4515	if n == 0 {
4516		return gp
4517	}
4518	h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with consumers
4519	if t-h+n >= uint32(len(_p_.runq)) {
4520		throw("runqsteal: runq overflow")
4521	}
4522	atomic.Store(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
4523	return gp
4524}
4525
4526//go:linkname setMaxThreads runtime_debug.setMaxThreads
4527func setMaxThreads(in int) (out int) {
4528	lock(&sched.lock)
4529	out = int(sched.maxmcount)
4530	if in > 0x7fffffff { // MaxInt32
4531		sched.maxmcount = 0x7fffffff
4532	} else {
4533		sched.maxmcount = int32(in)
4534	}
4535	checkmcount()
4536	unlock(&sched.lock)
4537	return
4538}
4539
4540//go:nosplit
4541func procPin() int {
4542	_g_ := getg()
4543	mp := _g_.m
4544
4545	mp.locks++
4546	return int(mp.p.ptr().id)
4547}
4548
4549//go:nosplit
4550func procUnpin() {
4551	_g_ := getg()
4552	_g_.m.locks--
4553}
4554
4555//go:linkname sync_runtime_procPin sync.runtime_procPin
4556//go:nosplit
4557func sync_runtime_procPin() int {
4558	return procPin()
4559}
4560
4561//go:linkname sync_runtime_procUnpin sync.runtime_procUnpin
4562//go:nosplit
4563func sync_runtime_procUnpin() {
4564	procUnpin()
4565}
4566
4567//go:linkname sync_atomic_runtime_procPin sync_atomic.runtime_procPin
4568//go:nosplit
4569func sync_atomic_runtime_procPin() int {
4570	return procPin()
4571}
4572
4573//go:linkname sync_atomic_runtime_procUnpin sync_atomic.runtime_procUnpin
4574//go:nosplit
4575func sync_atomic_runtime_procUnpin() {
4576	procUnpin()
4577}
4578
4579// Active spinning for sync.Mutex.
4580//go:linkname sync_runtime_canSpin sync.runtime_canSpin
4581//go:nosplit
4582func sync_runtime_canSpin(i int) bool {
4583	// sync.Mutex is cooperative, so we are conservative with spinning.
4584	// Spin only few times and only if running on a multicore machine and
4585	// GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
4586	// As opposed to runtime mutex we don't do passive spinning here,
4587	// because there can be work on global runq on on other Ps.
4588	if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 {
4589		return false
4590	}
4591	if p := getg().m.p.ptr(); !runqempty(p) {
4592		return false
4593	}
4594	return true
4595}
4596
4597//go:linkname sync_runtime_doSpin sync.runtime_doSpin
4598//go:nosplit
4599func sync_runtime_doSpin() {
4600	procyield(active_spin_cnt)
4601}
4602
4603var stealOrder randomOrder
4604
4605// randomOrder/randomEnum are helper types for randomized work stealing.
4606// They allow to enumerate all Ps in different pseudo-random orders without repetitions.
4607// The algorithm is based on the fact that if we have X such that X and GOMAXPROCS
4608// are coprime, then a sequences of (i + X) % GOMAXPROCS gives the required enumeration.
4609type randomOrder struct {
4610	count    uint32
4611	coprimes []uint32
4612}
4613
4614type randomEnum struct {
4615	i     uint32
4616	count uint32
4617	pos   uint32
4618	inc   uint32
4619}
4620
4621func (ord *randomOrder) reset(count uint32) {
4622	ord.count = count
4623	ord.coprimes = ord.coprimes[:0]
4624	for i := uint32(1); i <= count; i++ {
4625		if gcd(i, count) == 1 {
4626			ord.coprimes = append(ord.coprimes, i)
4627		}
4628	}
4629}
4630
4631func (ord *randomOrder) start(i uint32) randomEnum {
4632	return randomEnum{
4633		count: ord.count,
4634		pos:   i % ord.count,
4635		inc:   ord.coprimes[i%uint32(len(ord.coprimes))],
4636	}
4637}
4638
4639func (enum *randomEnum) done() bool {
4640	return enum.i == enum.count
4641}
4642
4643func (enum *randomEnum) next() {
4644	enum.i++
4645	enum.pos = (enum.pos + enum.inc) % enum.count
4646}
4647
4648func (enum *randomEnum) position() uint32 {
4649	return enum.pos
4650}
4651
4652func gcd(a, b uint32) uint32 {
4653	for b != 0 {
4654		a, b = b, a%b
4655	}
4656	return a
4657}
4658