1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime
6
7import (
8	"internal/cpu"
9	"runtime/internal/atomic"
10	"runtime/internal/sys"
11	"unsafe"
12)
13
14var buildVersion = sys.TheVersion
15
16// set using cmd/go/internal/modload.ModInfoProg
17var modinfo string
18
19// Goroutine scheduler
20// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
21//
22// The main concepts are:
23// G - goroutine.
24// M - worker thread, or machine.
25// P - processor, a resource that is required to execute Go code.
26//     M must have an associated P to execute Go code, however it can be
27//     blocked or in a syscall w/o an associated P.
28//
29// Design doc at https://golang.org/s/go11sched.
30
31// Worker thread parking/unparking.
32// We need to balance between keeping enough running worker threads to utilize
33// available hardware parallelism and parking excessive running worker threads
34// to conserve CPU resources and power. This is not simple for two reasons:
35// (1) scheduler state is intentionally distributed (in particular, per-P work
36// queues), so it is not possible to compute global predicates on fast paths;
37// (2) for optimal thread management we would need to know the future (don't park
38// a worker thread when a new goroutine will be readied in near future).
39//
40// Three rejected approaches that would work badly:
41// 1. Centralize all scheduler state (would inhibit scalability).
42// 2. Direct goroutine handoff. That is, when we ready a new goroutine and there
43//    is a spare P, unpark a thread and handoff it the thread and the goroutine.
44//    This would lead to thread state thrashing, as the thread that readied the
45//    goroutine can be out of work the very next moment, we will need to park it.
46//    Also, it would destroy locality of computation as we want to preserve
47//    dependent goroutines on the same thread; and introduce additional latency.
48// 3. Unpark an additional thread whenever we ready a goroutine and there is an
49//    idle P, but don't do handoff. This would lead to excessive thread parking/
50//    unparking as the additional threads will instantly park without discovering
51//    any work to do.
52//
53// The current approach:
54// We unpark an additional thread when we ready a goroutine if (1) there is an
55// idle P and there are no "spinning" worker threads. A worker thread is considered
56// spinning if it is out of local work and did not find work in global run queue/
57// netpoller; the spinning state is denoted in m.spinning and in sched.nmspinning.
58// Threads unparked this way are also considered spinning; we don't do goroutine
59// handoff so such threads are out of work initially. Spinning threads do some
60// spinning looking for work in per-P run queues before parking. If a spinning
61// thread finds work it takes itself out of the spinning state and proceeds to
62// execution. If it does not find work it takes itself out of the spinning state
63// and then parks.
64// If there is at least one spinning thread (sched.nmspinning>1), we don't unpark
65// new threads when readying goroutines. To compensate for that, if the last spinning
66// thread finds work and stops spinning, it must unpark a new spinning thread.
67// This approach smooths out unjustified spikes of thread unparking,
68// but at the same time guarantees eventual maximal CPU parallelism utilization.
69//
70// The main implementation complication is that we need to be very careful during
71// spinning->non-spinning thread transition. This transition can race with submission
72// of a new goroutine, and either one part or another needs to unpark another worker
73// thread. If they both fail to do that, we can end up with semi-persistent CPU
74// underutilization. The general pattern for goroutine readying is: submit a goroutine
75// to local work queue, #StoreLoad-style memory barrier, check sched.nmspinning.
76// The general pattern for spinning->non-spinning transition is: decrement nmspinning,
77// #StoreLoad-style memory barrier, check all per-P work queues for new work.
78// Note that all this complexity does not apply to global run queue as we are not
79// sloppy about thread unparking when submitting to global queue. Also see comments
80// for nmspinning manipulation.
81
82var (
83	m0           m
84	g0           g
85	raceprocctx0 uintptr
86)
87
88//go:linkname runtime_inittask runtime..inittask
89var runtime_inittask initTask
90
91//go:linkname main_inittask main..inittask
92var main_inittask initTask
93
94// main_init_done is a signal used by cgocallbackg that initialization
95// has been completed. It is made before _cgo_notify_runtime_init_done,
96// so all cgo calls can rely on it existing. When main_init is complete,
97// it is closed, meaning cgocallbackg can reliably receive from it.
98var main_init_done chan bool
99
100//go:linkname main_main main.main
101func main_main()
102
103// mainStarted indicates that the main M has started.
104var mainStarted bool
105
106// runtimeInitTime is the nanotime() at which the runtime started.
107var runtimeInitTime int64
108
109// Value to use for signal mask for newly created M's.
110var initSigmask sigset
111
112// The main goroutine.
113func main() {
114	g := getg()
115
116	// Racectx of m0->g0 is used only as the parent of the main goroutine.
117	// It must not be used for anything else.
118	g.m.g0.racectx = 0
119
120	// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
121	// Using decimal instead of binary GB and MB because
122	// they look nicer in the stack overflow failure message.
123	if sys.PtrSize == 8 {
124		maxstacksize = 1000000000
125	} else {
126		maxstacksize = 250000000
127	}
128
129	// Allow newproc to start new Ms.
130	mainStarted = true
131
132	if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
133		systemstack(func() {
134			newm(sysmon, nil)
135		})
136	}
137
138	// Lock the main goroutine onto this, the main OS thread,
139	// during initialization. Most programs won't care, but a few
140	// do require certain calls to be made by the main thread.
141	// Those can arrange for main.main to run in the main thread
142	// by calling runtime.LockOSThread during initialization
143	// to preserve the lock.
144	lockOSThread()
145
146	if g.m != &m0 {
147		throw("runtime.main not on m0")
148	}
149
150	doInit(&runtime_inittask) // must be before defer
151	if nanotime() == 0 {
152		throw("nanotime returning zero")
153	}
154
155	// Defer unlock so that runtime.Goexit during init does the unlock too.
156	needUnlock := true
157	defer func() {
158		if needUnlock {
159			unlockOSThread()
160		}
161	}()
162
163	// Record when the world started.
164	runtimeInitTime = nanotime()
165
166	gcenable()
167
168	main_init_done = make(chan bool)
169	if iscgo {
170		if _cgo_thread_start == nil {
171			throw("_cgo_thread_start missing")
172		}
173		if GOOS != "windows" {
174			if _cgo_setenv == nil {
175				throw("_cgo_setenv missing")
176			}
177			if _cgo_unsetenv == nil {
178				throw("_cgo_unsetenv missing")
179			}
180		}
181		if _cgo_notify_runtime_init_done == nil {
182			throw("_cgo_notify_runtime_init_done missing")
183		}
184		// Start the template thread in case we enter Go from
185		// a C-created thread and need to create a new thread.
186		startTemplateThread()
187		cgocall(_cgo_notify_runtime_init_done, nil)
188	}
189
190	doInit(&main_inittask)
191
192	close(main_init_done)
193
194	needUnlock = false
195	unlockOSThread()
196
197	if isarchive || islibrary {
198		// A program compiled with -buildmode=c-archive or c-shared
199		// has a main, but it is not executed.
200		return
201	}
202	fn := main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
203	fn()
204	if raceenabled {
205		racefini()
206	}
207
208	// Make racy client program work: if panicking on
209	// another goroutine at the same time as main returns,
210	// let the other goroutine finish printing the panic trace.
211	// Once it does, it will exit. See issues 3934 and 20018.
212	if atomic.Load(&runningPanicDefers) != 0 {
213		// Running deferred functions should not take long.
214		for c := 0; c < 1000; c++ {
215			if atomic.Load(&runningPanicDefers) == 0 {
216				break
217			}
218			Gosched()
219		}
220	}
221	if atomic.Load(&panicking) != 0 {
222		gopark(nil, nil, waitReasonPanicWait, traceEvGoStop, 1)
223	}
224
225	exit(0)
226	for {
227		var x *int32
228		*x = 0
229	}
230}
231
232// os_beforeExit is called from os.Exit(0).
233//go:linkname os_beforeExit os.runtime_beforeExit
234func os_beforeExit() {
235	if raceenabled {
236		racefini()
237	}
238}
239
240// start forcegc helper goroutine
241func init() {
242	go forcegchelper()
243}
244
245func forcegchelper() {
246	forcegc.g = getg()
247	for {
248		lock(&forcegc.lock)
249		if forcegc.idle != 0 {
250			throw("forcegc: phase error")
251		}
252		atomic.Store(&forcegc.idle, 1)
253		goparkunlock(&forcegc.lock, waitReasonForceGGIdle, traceEvGoBlock, 1)
254		// this goroutine is explicitly resumed by sysmon
255		if debug.gctrace > 0 {
256			println("GC forced")
257		}
258		// Time-triggered, fully concurrent.
259		gcStart(gcTrigger{kind: gcTriggerTime, now: nanotime()})
260	}
261}
262
263//go:nosplit
264
265// Gosched yields the processor, allowing other goroutines to run. It does not
266// suspend the current goroutine, so execution resumes automatically.
267func Gosched() {
268	checkTimeouts()
269	mcall(gosched_m)
270}
271
272// goschedguarded yields the processor like gosched, but also checks
273// for forbidden states and opts out of the yield in those cases.
274//go:nosplit
275func goschedguarded() {
276	mcall(goschedguarded_m)
277}
278
279// Puts the current goroutine into a waiting state and calls unlockf.
280// If unlockf returns false, the goroutine is resumed.
281// unlockf must not access this G's stack, as it may be moved between
282// the call to gopark and the call to unlockf.
283// Reason explains why the goroutine has been parked.
284// It is displayed in stack traces and heap dumps.
285// Reasons should be unique and descriptive.
286// Do not re-use reasons, add new ones.
287func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceEv byte, traceskip int) {
288	if reason != waitReasonSleep {
289		checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy
290	}
291	mp := acquirem()
292	gp := mp.curg
293	status := readgstatus(gp)
294	if status != _Grunning && status != _Gscanrunning {
295		throw("gopark: bad g status")
296	}
297	mp.waitlock = lock
298	mp.waitunlockf = unlockf
299	gp.waitreason = reason
300	mp.waittraceev = traceEv
301	mp.waittraceskip = traceskip
302	releasem(mp)
303	// can't do anything that might move the G between Ms here.
304	mcall(park_m)
305}
306
307// Puts the current goroutine into a waiting state and unlocks the lock.
308// The goroutine can be made runnable again by calling goready(gp).
309func goparkunlock(lock *mutex, reason waitReason, traceEv byte, traceskip int) {
310	gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceEv, traceskip)
311}
312
313func goready(gp *g, traceskip int) {
314	systemstack(func() {
315		ready(gp, traceskip, true)
316	})
317}
318
319//go:nosplit
320func acquireSudog() *sudog {
321	// Delicate dance: the semaphore implementation calls
322	// acquireSudog, acquireSudog calls new(sudog),
323	// new calls malloc, malloc can call the garbage collector,
324	// and the garbage collector calls the semaphore implementation
325	// in stopTheWorld.
326	// Break the cycle by doing acquirem/releasem around new(sudog).
327	// The acquirem/releasem increments m.locks during new(sudog),
328	// which keeps the garbage collector from being invoked.
329	mp := acquirem()
330	pp := mp.p.ptr()
331	if len(pp.sudogcache) == 0 {
332		lock(&sched.sudoglock)
333		// First, try to grab a batch from central cache.
334		for len(pp.sudogcache) < cap(pp.sudogcache)/2 && sched.sudogcache != nil {
335			s := sched.sudogcache
336			sched.sudogcache = s.next
337			s.next = nil
338			pp.sudogcache = append(pp.sudogcache, s)
339		}
340		unlock(&sched.sudoglock)
341		// If the central cache is empty, allocate a new one.
342		if len(pp.sudogcache) == 0 {
343			pp.sudogcache = append(pp.sudogcache, new(sudog))
344		}
345	}
346	n := len(pp.sudogcache)
347	s := pp.sudogcache[n-1]
348	pp.sudogcache[n-1] = nil
349	pp.sudogcache = pp.sudogcache[:n-1]
350	if s.elem != nil {
351		throw("acquireSudog: found s.elem != nil in cache")
352	}
353	releasem(mp)
354	return s
355}
356
357//go:nosplit
358func releaseSudog(s *sudog) {
359	if s.elem != nil {
360		throw("runtime: sudog with non-nil elem")
361	}
362	if s.isSelect {
363		throw("runtime: sudog with non-false isSelect")
364	}
365	if s.next != nil {
366		throw("runtime: sudog with non-nil next")
367	}
368	if s.prev != nil {
369		throw("runtime: sudog with non-nil prev")
370	}
371	if s.waitlink != nil {
372		throw("runtime: sudog with non-nil waitlink")
373	}
374	if s.c != nil {
375		throw("runtime: sudog with non-nil c")
376	}
377	gp := getg()
378	if gp.param != nil {
379		throw("runtime: releaseSudog with non-nil gp.param")
380	}
381	mp := acquirem() // avoid rescheduling to another P
382	pp := mp.p.ptr()
383	if len(pp.sudogcache) == cap(pp.sudogcache) {
384		// Transfer half of local cache to the central cache.
385		var first, last *sudog
386		for len(pp.sudogcache) > cap(pp.sudogcache)/2 {
387			n := len(pp.sudogcache)
388			p := pp.sudogcache[n-1]
389			pp.sudogcache[n-1] = nil
390			pp.sudogcache = pp.sudogcache[:n-1]
391			if first == nil {
392				first = p
393			} else {
394				last.next = p
395			}
396			last = p
397		}
398		lock(&sched.sudoglock)
399		last.next = sched.sudogcache
400		sched.sudogcache = first
401		unlock(&sched.sudoglock)
402	}
403	pp.sudogcache = append(pp.sudogcache, s)
404	releasem(mp)
405}
406
407// funcPC returns the entry PC of the function f.
408// It assumes that f is a func value. Otherwise the behavior is undefined.
409// CAREFUL: In programs with plugins, funcPC can return different values
410// for the same function (because there are actually multiple copies of
411// the same function in the address space). To be safe, don't use the
412// results of this function in any == expression. It is only safe to
413// use the result as an address at which to start executing code.
414//go:nosplit
415func funcPC(f interface{}) uintptr {
416	return *(*uintptr)(efaceOf(&f).data)
417}
418
419// called from assembly
420func badmcall(fn func(*g)) {
421	throw("runtime: mcall called on m->g0 stack")
422}
423
424func badmcall2(fn func(*g)) {
425	throw("runtime: mcall function returned")
426}
427
428func badreflectcall() {
429	panic(plainError("arg size to reflect.call more than 1GB"))
430}
431
432var badmorestackg0Msg = "fatal: morestack on g0\n"
433
434//go:nosplit
435//go:nowritebarrierrec
436func badmorestackg0() {
437	sp := stringStructOf(&badmorestackg0Msg)
438	write(2, sp.str, int32(sp.len))
439}
440
441var badmorestackgsignalMsg = "fatal: morestack on gsignal\n"
442
443//go:nosplit
444//go:nowritebarrierrec
445func badmorestackgsignal() {
446	sp := stringStructOf(&badmorestackgsignalMsg)
447	write(2, sp.str, int32(sp.len))
448}
449
450//go:nosplit
451func badctxt() {
452	throw("ctxt != 0")
453}
454
455func lockedOSThread() bool {
456	gp := getg()
457	return gp.lockedm != 0 && gp.m.lockedg != 0
458}
459
460var (
461	allgs    []*g
462	allglock mutex
463)
464
465func allgadd(gp *g) {
466	if readgstatus(gp) == _Gidle {
467		throw("allgadd: bad status Gidle")
468	}
469
470	lock(&allglock)
471	allgs = append(allgs, gp)
472	allglen = uintptr(len(allgs))
473	unlock(&allglock)
474}
475
476const (
477	// Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
478	// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
479	_GoidCacheBatch = 16
480)
481
482// cpuinit extracts the environment variable GODEBUG from the environment on
483// Unix-like operating systems and calls internal/cpu.Initialize.
484func cpuinit() {
485	const prefix = "GODEBUG="
486	var env string
487
488	switch GOOS {
489	case "aix", "darwin", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux":
490		cpu.DebugOptions = true
491
492		// Similar to goenv_unix but extracts the environment value for
493		// GODEBUG directly.
494		// TODO(moehrmann): remove when general goenvs() can be called before cpuinit()
495		n := int32(0)
496		for argv_index(argv, argc+1+n) != nil {
497			n++
498		}
499
500		for i := int32(0); i < n; i++ {
501			p := argv_index(argv, argc+1+i)
502			s := *(*string)(unsafe.Pointer(&stringStruct{unsafe.Pointer(p), findnull(p)}))
503
504			if hasPrefix(s, prefix) {
505				env = gostring(p)[len(prefix):]
506				break
507			}
508		}
509	}
510
511	cpu.Initialize(env)
512
513	// Support cpu feature variables are used in code generated by the compiler
514	// to guard execution of instructions that can not be assumed to be always supported.
515	x86HasPOPCNT = cpu.X86.HasPOPCNT
516	x86HasSSE41 = cpu.X86.HasSSE41
517	x86HasFMA = cpu.X86.HasFMA
518
519	armHasVFPv4 = cpu.ARM.HasVFPv4
520
521	arm64HasATOMICS = cpu.ARM64.HasATOMICS
522}
523
524// The bootstrap sequence is:
525//
526//	call osinit
527//	call schedinit
528//	make & queue new G
529//	call runtime·mstart
530//
531// The new G calls runtime·main.
532func schedinit() {
533	// raceinit must be the first call to race detector.
534	// In particular, it must be done before mallocinit below calls racemapshadow.
535	_g_ := getg()
536	if raceenabled {
537		_g_.racectx, raceprocctx0 = raceinit()
538	}
539
540	sched.maxmcount = 10000
541
542	tracebackinit()
543	moduledataverify()
544	stackinit()
545	mallocinit()
546	fastrandinit() // must run before mcommoninit
547	mcommoninit(_g_.m)
548	cpuinit()       // must run before alginit
549	alginit()       // maps must not be used before this call
550	modulesinit()   // provides activeModules
551	typelinksinit() // uses maps, activeModules
552	itabsinit()     // uses activeModules
553
554	msigsave(_g_.m)
555	initSigmask = _g_.m.sigmask
556
557	goargs()
558	goenvs()
559	parsedebugvars()
560	gcinit()
561
562	sched.lastpoll = uint64(nanotime())
563	procs := ncpu
564	if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
565		procs = n
566	}
567	if procresize(procs) != nil {
568		throw("unknown runnable goroutine during bootstrap")
569	}
570
571	// For cgocheck > 1, we turn on the write barrier at all times
572	// and check all pointer writes. We can't do this until after
573	// procresize because the write barrier needs a P.
574	if debug.cgocheck > 1 {
575		writeBarrier.cgo = true
576		writeBarrier.enabled = true
577		for _, p := range allp {
578			p.wbBuf.reset()
579		}
580	}
581
582	if buildVersion == "" {
583		// Condition should never trigger. This code just serves
584		// to ensure runtime·buildVersion is kept in the resulting binary.
585		buildVersion = "unknown"
586	}
587	if len(modinfo) == 1 {
588		// Condition should never trigger. This code just serves
589		// to ensure runtime·modinfo is kept in the resulting binary.
590		modinfo = ""
591	}
592}
593
594func dumpgstatus(gp *g) {
595	_g_ := getg()
596	print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
597	print("runtime:  g:  g=", _g_, ", goid=", _g_.goid, ",  g->atomicstatus=", readgstatus(_g_), "\n")
598}
599
600func checkmcount() {
601	// sched lock is held
602	if mcount() > sched.maxmcount {
603		print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n")
604		throw("thread exhaustion")
605	}
606}
607
608func mcommoninit(mp *m) {
609	_g_ := getg()
610
611	// g0 stack won't make sense for user (and is not necessary unwindable).
612	if _g_ != _g_.m.g0 {
613		callers(1, mp.createstack[:])
614	}
615
616	lock(&sched.lock)
617	if sched.mnext+1 < sched.mnext {
618		throw("runtime: thread ID overflow")
619	}
620	mp.id = sched.mnext
621	sched.mnext++
622	checkmcount()
623
624	mp.fastrand[0] = uint32(int64Hash(uint64(mp.id), fastrandseed))
625	mp.fastrand[1] = uint32(int64Hash(uint64(cputicks()), ^fastrandseed))
626	if mp.fastrand[0]|mp.fastrand[1] == 0 {
627		mp.fastrand[1] = 1
628	}
629
630	mpreinit(mp)
631	if mp.gsignal != nil {
632		mp.gsignal.stackguard1 = mp.gsignal.stack.lo + _StackGuard
633	}
634
635	// Add to allm so garbage collector doesn't free g->m
636	// when it is just in a register or thread-local storage.
637	mp.alllink = allm
638
639	// NumCgoCall() iterates over allm w/o schedlock,
640	// so we need to publish it safely.
641	atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp))
642	unlock(&sched.lock)
643
644	// Allocate memory to hold a cgo traceback if the cgo call crashes.
645	if iscgo || GOOS == "solaris" || GOOS == "illumos" || GOOS == "windows" {
646		mp.cgoCallers = new(cgoCallers)
647	}
648}
649
650var fastrandseed uintptr
651
652func fastrandinit() {
653	s := (*[unsafe.Sizeof(fastrandseed)]byte)(unsafe.Pointer(&fastrandseed))[:]
654	getRandomData(s)
655}
656
657// Mark gp ready to run.
658func ready(gp *g, traceskip int, next bool) {
659	if trace.enabled {
660		traceGoUnpark(gp, traceskip)
661	}
662
663	status := readgstatus(gp)
664
665	// Mark runnable.
666	_g_ := getg()
667	mp := acquirem() // disable preemption because it can be holding p in a local var
668	if status&^_Gscan != _Gwaiting {
669		dumpgstatus(gp)
670		throw("bad g->status in ready")
671	}
672
673	// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
674	casgstatus(gp, _Gwaiting, _Grunnable)
675	runqput(_g_.m.p.ptr(), gp, next)
676	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
677		wakep()
678	}
679	releasem(mp)
680}
681
682// freezeStopWait is a large value that freezetheworld sets
683// sched.stopwait to in order to request that all Gs permanently stop.
684const freezeStopWait = 0x7fffffff
685
686// freezing is set to non-zero if the runtime is trying to freeze the
687// world.
688var freezing uint32
689
690// Similar to stopTheWorld but best-effort and can be called several times.
691// There is no reverse operation, used during crashing.
692// This function must not lock any mutexes.
693func freezetheworld() {
694	atomic.Store(&freezing, 1)
695	// stopwait and preemption requests can be lost
696	// due to races with concurrently executing threads,
697	// so try several times
698	for i := 0; i < 5; i++ {
699		// this should tell the scheduler to not start any new goroutines
700		sched.stopwait = freezeStopWait
701		atomic.Store(&sched.gcwaiting, 1)
702		// this should stop running goroutines
703		if !preemptall() {
704			break // no running goroutines
705		}
706		usleep(1000)
707	}
708	// to be sure
709	usleep(1000)
710	preemptall()
711	usleep(1000)
712}
713
714// All reads and writes of g's status go through readgstatus, casgstatus
715// castogscanstatus, casfrom_Gscanstatus.
716//go:nosplit
717func readgstatus(gp *g) uint32 {
718	return atomic.Load(&gp.atomicstatus)
719}
720
721// The Gscanstatuses are acting like locks and this releases them.
722// If it proves to be a performance hit we should be able to make these
723// simple atomic stores but for now we are going to throw if
724// we see an inconsistent state.
725func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
726	success := false
727
728	// Check that transition is valid.
729	switch oldval {
730	default:
731		print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
732		dumpgstatus(gp)
733		throw("casfrom_Gscanstatus:top gp->status is not in scan state")
734	case _Gscanrunnable,
735		_Gscanwaiting,
736		_Gscanrunning,
737		_Gscansyscall,
738		_Gscanpreempted:
739		if newval == oldval&^_Gscan {
740			success = atomic.Cas(&gp.atomicstatus, oldval, newval)
741		}
742	}
743	if !success {
744		print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
745		dumpgstatus(gp)
746		throw("casfrom_Gscanstatus: gp->status is not in scan state")
747	}
748}
749
750// This will return false if the gp is not in the expected status and the cas fails.
751// This acts like a lock acquire while the casfromgstatus acts like a lock release.
752func castogscanstatus(gp *g, oldval, newval uint32) bool {
753	switch oldval {
754	case _Grunnable,
755		_Grunning,
756		_Gwaiting,
757		_Gsyscall:
758		if newval == oldval|_Gscan {
759			return atomic.Cas(&gp.atomicstatus, oldval, newval)
760		}
761	}
762	print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n")
763	throw("castogscanstatus")
764	panic("not reached")
765}
766
767// If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
768// and casfrom_Gscanstatus instead.
769// casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that
770// put it in the Gscan state is finished.
771//go:nosplit
772func casgstatus(gp *g, oldval, newval uint32) {
773	if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval {
774		systemstack(func() {
775			print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n")
776			throw("casgstatus: bad incoming values")
777		})
778	}
779
780	// See https://golang.org/cl/21503 for justification of the yield delay.
781	const yieldDelay = 5 * 1000
782	var nextYield int64
783
784	// loop if gp->atomicstatus is in a scan state giving
785	// GC time to finish and change the state to oldval.
786	for i := 0; !atomic.Cas(&gp.atomicstatus, oldval, newval); i++ {
787		if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
788			throw("casgstatus: waiting for Gwaiting but is Grunnable")
789		}
790		if i == 0 {
791			nextYield = nanotime() + yieldDelay
792		}
793		if nanotime() < nextYield {
794			for x := 0; x < 10 && gp.atomicstatus != oldval; x++ {
795				procyield(1)
796			}
797		} else {
798			osyield()
799			nextYield = nanotime() + yieldDelay/2
800		}
801	}
802}
803
804// casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable.
805// Returns old status. Cannot call casgstatus directly, because we are racing with an
806// async wakeup that might come in from netpoll. If we see Gwaiting from the readgstatus,
807// it might have become Grunnable by the time we get to the cas. If we called casgstatus,
808// it would loop waiting for the status to go back to Gwaiting, which it never will.
809//go:nosplit
810func casgcopystack(gp *g) uint32 {
811	for {
812		oldstatus := readgstatus(gp) &^ _Gscan
813		if oldstatus != _Gwaiting && oldstatus != _Grunnable {
814			throw("copystack: bad status, not Gwaiting or Grunnable")
815		}
816		if atomic.Cas(&gp.atomicstatus, oldstatus, _Gcopystack) {
817			return oldstatus
818		}
819	}
820}
821
822// casGToPreemptScan transitions gp from _Grunning to _Gscan|_Gpreempted.
823//
824// TODO(austin): This is the only status operation that both changes
825// the status and locks the _Gscan bit. Rethink this.
826func casGToPreemptScan(gp *g, old, new uint32) {
827	if old != _Grunning || new != _Gscan|_Gpreempted {
828		throw("bad g transition")
829	}
830	for !atomic.Cas(&gp.atomicstatus, _Grunning, _Gscan|_Gpreempted) {
831	}
832}
833
834// casGFromPreempted attempts to transition gp from _Gpreempted to
835// _Gwaiting. If successful, the caller is responsible for
836// re-scheduling gp.
837func casGFromPreempted(gp *g, old, new uint32) bool {
838	if old != _Gpreempted || new != _Gwaiting {
839		throw("bad g transition")
840	}
841	return atomic.Cas(&gp.atomicstatus, _Gpreempted, _Gwaiting)
842}
843
844// stopTheWorld stops all P's from executing goroutines, interrupting
845// all goroutines at GC safe points and records reason as the reason
846// for the stop. On return, only the current goroutine's P is running.
847// stopTheWorld must not be called from a system stack and the caller
848// must not hold worldsema. The caller must call startTheWorld when
849// other P's should resume execution.
850//
851// stopTheWorld is safe for multiple goroutines to call at the
852// same time. Each will execute its own stop, and the stops will
853// be serialized.
854//
855// This is also used by routines that do stack dumps. If the system is
856// in panic or being exited, this may not reliably stop all
857// goroutines.
858func stopTheWorld(reason string) {
859	semacquire(&worldsema)
860	getg().m.preemptoff = reason
861	systemstack(stopTheWorldWithSema)
862}
863
864// startTheWorld undoes the effects of stopTheWorld.
865func startTheWorld() {
866	systemstack(func() { startTheWorldWithSema(false) })
867	// worldsema must be held over startTheWorldWithSema to ensure
868	// gomaxprocs cannot change while worldsema is held.
869	semrelease(&worldsema)
870	getg().m.preemptoff = ""
871}
872
873// Holding worldsema grants an M the right to try to stop the world
874// and prevents gomaxprocs from changing concurrently.
875var worldsema uint32 = 1
876
877// stopTheWorldWithSema is the core implementation of stopTheWorld.
878// The caller is responsible for acquiring worldsema and disabling
879// preemption first and then should stopTheWorldWithSema on the system
880// stack:
881//
882//	semacquire(&worldsema, 0)
883//	m.preemptoff = "reason"
884//	systemstack(stopTheWorldWithSema)
885//
886// When finished, the caller must either call startTheWorld or undo
887// these three operations separately:
888//
889//	m.preemptoff = ""
890//	systemstack(startTheWorldWithSema)
891//	semrelease(&worldsema)
892//
893// It is allowed to acquire worldsema once and then execute multiple
894// startTheWorldWithSema/stopTheWorldWithSema pairs.
895// Other P's are able to execute between successive calls to
896// startTheWorldWithSema and stopTheWorldWithSema.
897// Holding worldsema causes any other goroutines invoking
898// stopTheWorld to block.
899func stopTheWorldWithSema() {
900	_g_ := getg()
901
902	// If we hold a lock, then we won't be able to stop another M
903	// that is blocked trying to acquire the lock.
904	if _g_.m.locks > 0 {
905		throw("stopTheWorld: holding locks")
906	}
907
908	lock(&sched.lock)
909	sched.stopwait = gomaxprocs
910	atomic.Store(&sched.gcwaiting, 1)
911	preemptall()
912	// stop current P
913	_g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic.
914	sched.stopwait--
915	// try to retake all P's in Psyscall status
916	for _, p := range allp {
917		s := p.status
918		if s == _Psyscall && atomic.Cas(&p.status, s, _Pgcstop) {
919			if trace.enabled {
920				traceGoSysBlock(p)
921				traceProcStop(p)
922			}
923			p.syscalltick++
924			sched.stopwait--
925		}
926	}
927	// stop idle P's
928	for {
929		p := pidleget()
930		if p == nil {
931			break
932		}
933		p.status = _Pgcstop
934		sched.stopwait--
935	}
936	wait := sched.stopwait > 0
937	unlock(&sched.lock)
938
939	// wait for remaining P's to stop voluntarily
940	if wait {
941		for {
942			// wait for 100us, then try to re-preempt in case of any races
943			if notetsleep(&sched.stopnote, 100*1000) {
944				noteclear(&sched.stopnote)
945				break
946			}
947			preemptall()
948		}
949	}
950
951	// sanity checks
952	bad := ""
953	if sched.stopwait != 0 {
954		bad = "stopTheWorld: not stopped (stopwait != 0)"
955	} else {
956		for _, p := range allp {
957			if p.status != _Pgcstop {
958				bad = "stopTheWorld: not stopped (status != _Pgcstop)"
959			}
960		}
961	}
962	if atomic.Load(&freezing) != 0 {
963		// Some other thread is panicking. This can cause the
964		// sanity checks above to fail if the panic happens in
965		// the signal handler on a stopped thread. Either way,
966		// we should halt this thread.
967		lock(&deadlock)
968		lock(&deadlock)
969	}
970	if bad != "" {
971		throw(bad)
972	}
973}
974
975func startTheWorldWithSema(emitTraceEvent bool) int64 {
976	mp := acquirem() // disable preemption because it can be holding p in a local var
977	if netpollinited() {
978		list := netpoll(0) // non-blocking
979		injectglist(&list)
980	}
981	lock(&sched.lock)
982
983	procs := gomaxprocs
984	if newprocs != 0 {
985		procs = newprocs
986		newprocs = 0
987	}
988	p1 := procresize(procs)
989	sched.gcwaiting = 0
990	if sched.sysmonwait != 0 {
991		sched.sysmonwait = 0
992		notewakeup(&sched.sysmonnote)
993	}
994	unlock(&sched.lock)
995
996	for p1 != nil {
997		p := p1
998		p1 = p1.link.ptr()
999		if p.m != 0 {
1000			mp := p.m.ptr()
1001			p.m = 0
1002			if mp.nextp != 0 {
1003				throw("startTheWorld: inconsistent mp->nextp")
1004			}
1005			mp.nextp.set(p)
1006			notewakeup(&mp.park)
1007		} else {
1008			// Start M to run P.  Do not start another M below.
1009			newm(nil, p)
1010		}
1011	}
1012
1013	// Capture start-the-world time before doing clean-up tasks.
1014	startTime := nanotime()
1015	if emitTraceEvent {
1016		traceGCSTWDone()
1017	}
1018
1019	// Wakeup an additional proc in case we have excessive runnable goroutines
1020	// in local queues or in the global queue. If we don't, the proc will park itself.
1021	// If we have lots of excessive work, resetspinning will unpark additional procs as necessary.
1022	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
1023		wakep()
1024	}
1025
1026	releasem(mp)
1027
1028	return startTime
1029}
1030
1031// mstart is the entry-point for new Ms.
1032//
1033// This must not split the stack because we may not even have stack
1034// bounds set up yet.
1035//
1036// May run during STW (because it doesn't have a P yet), so write
1037// barriers are not allowed.
1038//
1039//go:nosplit
1040//go:nowritebarrierrec
1041func mstart() {
1042	_g_ := getg()
1043
1044	osStack := _g_.stack.lo == 0
1045	if osStack {
1046		// Initialize stack bounds from system stack.
1047		// Cgo may have left stack size in stack.hi.
1048		// minit may update the stack bounds.
1049		size := _g_.stack.hi
1050		if size == 0 {
1051			size = 8192 * sys.StackGuardMultiplier
1052		}
1053		_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
1054		_g_.stack.lo = _g_.stack.hi - size + 1024
1055	}
1056	// Initialize stack guard so that we can start calling regular
1057	// Go code.
1058	_g_.stackguard0 = _g_.stack.lo + _StackGuard
1059	// This is the g0, so we can also call go:systemstack
1060	// functions, which check stackguard1.
1061	_g_.stackguard1 = _g_.stackguard0
1062	mstart1()
1063
1064	// Exit this thread.
1065	switch GOOS {
1066	case "windows", "solaris", "illumos", "plan9", "darwin", "aix":
1067		// Windows, Solaris, illumos, Darwin, AIX and Plan 9 always system-allocate
1068		// the stack, but put it in _g_.stack before mstart,
1069		// so the logic above hasn't set osStack yet.
1070		osStack = true
1071	}
1072	mexit(osStack)
1073}
1074
1075func mstart1() {
1076	_g_ := getg()
1077
1078	if _g_ != _g_.m.g0 {
1079		throw("bad runtime·mstart")
1080	}
1081
1082	// Record the caller for use as the top of stack in mcall and
1083	// for terminating the thread.
1084	// We're never coming back to mstart1 after we call schedule,
1085	// so other calls can reuse the current frame.
1086	save(getcallerpc(), getcallersp())
1087	asminit()
1088	minit()
1089
1090	// Install signal handlers; after minit so that minit can
1091	// prepare the thread to be able to handle the signals.
1092	if _g_.m == &m0 {
1093		mstartm0()
1094	}
1095
1096	if fn := _g_.m.mstartfn; fn != nil {
1097		fn()
1098	}
1099
1100	if _g_.m != &m0 {
1101		acquirep(_g_.m.nextp.ptr())
1102		_g_.m.nextp = 0
1103	}
1104	schedule()
1105}
1106
1107// mstartm0 implements part of mstart1 that only runs on the m0.
1108//
1109// Write barriers are allowed here because we know the GC can't be
1110// running yet, so they'll be no-ops.
1111//
1112//go:yeswritebarrierrec
1113func mstartm0() {
1114	// Create an extra M for callbacks on threads not created by Go.
1115	// An extra M is also needed on Windows for callbacks created by
1116	// syscall.NewCallback. See issue #6751 for details.
1117	if (iscgo || GOOS == "windows") && !cgoHasExtraM {
1118		cgoHasExtraM = true
1119		newextram()
1120	}
1121	initsig(false)
1122}
1123
1124// mexit tears down and exits the current thread.
1125//
1126// Don't call this directly to exit the thread, since it must run at
1127// the top of the thread stack. Instead, use gogo(&_g_.m.g0.sched) to
1128// unwind the stack to the point that exits the thread.
1129//
1130// It is entered with m.p != nil, so write barriers are allowed. It
1131// will release the P before exiting.
1132//
1133//go:yeswritebarrierrec
1134func mexit(osStack bool) {
1135	g := getg()
1136	m := g.m
1137
1138	if m == &m0 {
1139		// This is the main thread. Just wedge it.
1140		//
1141		// On Linux, exiting the main thread puts the process
1142		// into a non-waitable zombie state. On Plan 9,
1143		// exiting the main thread unblocks wait even though
1144		// other threads are still running. On Solaris we can
1145		// neither exitThread nor return from mstart. Other
1146		// bad things probably happen on other platforms.
1147		//
1148		// We could try to clean up this M more before wedging
1149		// it, but that complicates signal handling.
1150		handoffp(releasep())
1151		lock(&sched.lock)
1152		sched.nmfreed++
1153		checkdead()
1154		unlock(&sched.lock)
1155		notesleep(&m.park)
1156		throw("locked m0 woke up")
1157	}
1158
1159	sigblock()
1160	unminit()
1161
1162	// Free the gsignal stack.
1163	if m.gsignal != nil {
1164		stackfree(m.gsignal.stack)
1165		// On some platforms, when calling into VDSO (e.g. nanotime)
1166		// we store our g on the gsignal stack, if there is one.
1167		// Now the stack is freed, unlink it from the m, so we
1168		// won't write to it when calling VDSO code.
1169		m.gsignal = nil
1170	}
1171
1172	// Remove m from allm.
1173	lock(&sched.lock)
1174	for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink {
1175		if *pprev == m {
1176			*pprev = m.alllink
1177			goto found
1178		}
1179	}
1180	throw("m not found in allm")
1181found:
1182	if !osStack {
1183		// Delay reaping m until it's done with the stack.
1184		//
1185		// If this is using an OS stack, the OS will free it
1186		// so there's no need for reaping.
1187		atomic.Store(&m.freeWait, 1)
1188		// Put m on the free list, though it will not be reaped until
1189		// freeWait is 0. Note that the free list must not be linked
1190		// through alllink because some functions walk allm without
1191		// locking, so may be using alllink.
1192		m.freelink = sched.freem
1193		sched.freem = m
1194	}
1195	unlock(&sched.lock)
1196
1197	// Release the P.
1198	handoffp(releasep())
1199	// After this point we must not have write barriers.
1200
1201	// Invoke the deadlock detector. This must happen after
1202	// handoffp because it may have started a new M to take our
1203	// P's work.
1204	lock(&sched.lock)
1205	sched.nmfreed++
1206	checkdead()
1207	unlock(&sched.lock)
1208
1209	if osStack {
1210		// Return from mstart and let the system thread
1211		// library free the g0 stack and terminate the thread.
1212		return
1213	}
1214
1215	// mstart is the thread's entry point, so there's nothing to
1216	// return to. Exit the thread directly. exitThread will clear
1217	// m.freeWait when it's done with the stack and the m can be
1218	// reaped.
1219	exitThread(&m.freeWait)
1220}
1221
1222// forEachP calls fn(p) for every P p when p reaches a GC safe point.
1223// If a P is currently executing code, this will bring the P to a GC
1224// safe point and execute fn on that P. If the P is not executing code
1225// (it is idle or in a syscall), this will call fn(p) directly while
1226// preventing the P from exiting its state. This does not ensure that
1227// fn will run on every CPU executing Go code, but it acts as a global
1228// memory barrier. GC uses this as a "ragged barrier."
1229//
1230// The caller must hold worldsema.
1231//
1232//go:systemstack
1233func forEachP(fn func(*p)) {
1234	mp := acquirem()
1235	_p_ := getg().m.p.ptr()
1236
1237	lock(&sched.lock)
1238	if sched.safePointWait != 0 {
1239		throw("forEachP: sched.safePointWait != 0")
1240	}
1241	sched.safePointWait = gomaxprocs - 1
1242	sched.safePointFn = fn
1243
1244	// Ask all Ps to run the safe point function.
1245	for _, p := range allp {
1246		if p != _p_ {
1247			atomic.Store(&p.runSafePointFn, 1)
1248		}
1249	}
1250	preemptall()
1251
1252	// Any P entering _Pidle or _Psyscall from now on will observe
1253	// p.runSafePointFn == 1 and will call runSafePointFn when
1254	// changing its status to _Pidle/_Psyscall.
1255
1256	// Run safe point function for all idle Ps. sched.pidle will
1257	// not change because we hold sched.lock.
1258	for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() {
1259		if atomic.Cas(&p.runSafePointFn, 1, 0) {
1260			fn(p)
1261			sched.safePointWait--
1262		}
1263	}
1264
1265	wait := sched.safePointWait > 0
1266	unlock(&sched.lock)
1267
1268	// Run fn for the current P.
1269	fn(_p_)
1270
1271	// Force Ps currently in _Psyscall into _Pidle and hand them
1272	// off to induce safe point function execution.
1273	for _, p := range allp {
1274		s := p.status
1275		if s == _Psyscall && p.runSafePointFn == 1 && atomic.Cas(&p.status, s, _Pidle) {
1276			if trace.enabled {
1277				traceGoSysBlock(p)
1278				traceProcStop(p)
1279			}
1280			p.syscalltick++
1281			handoffp(p)
1282		}
1283	}
1284
1285	// Wait for remaining Ps to run fn.
1286	if wait {
1287		for {
1288			// Wait for 100us, then try to re-preempt in
1289			// case of any races.
1290			//
1291			// Requires system stack.
1292			if notetsleep(&sched.safePointNote, 100*1000) {
1293				noteclear(&sched.safePointNote)
1294				break
1295			}
1296			preemptall()
1297		}
1298	}
1299	if sched.safePointWait != 0 {
1300		throw("forEachP: not done")
1301	}
1302	for _, p := range allp {
1303		if p.runSafePointFn != 0 {
1304			throw("forEachP: P did not run fn")
1305		}
1306	}
1307
1308	lock(&sched.lock)
1309	sched.safePointFn = nil
1310	unlock(&sched.lock)
1311	releasem(mp)
1312}
1313
1314// runSafePointFn runs the safe point function, if any, for this P.
1315// This should be called like
1316//
1317//     if getg().m.p.runSafePointFn != 0 {
1318//         runSafePointFn()
1319//     }
1320//
1321// runSafePointFn must be checked on any transition in to _Pidle or
1322// _Psyscall to avoid a race where forEachP sees that the P is running
1323// just before the P goes into _Pidle/_Psyscall and neither forEachP
1324// nor the P run the safe-point function.
1325func runSafePointFn() {
1326	p := getg().m.p.ptr()
1327	// Resolve the race between forEachP running the safe-point
1328	// function on this P's behalf and this P running the
1329	// safe-point function directly.
1330	if !atomic.Cas(&p.runSafePointFn, 1, 0) {
1331		return
1332	}
1333	sched.safePointFn(p)
1334	lock(&sched.lock)
1335	sched.safePointWait--
1336	if sched.safePointWait == 0 {
1337		notewakeup(&sched.safePointNote)
1338	}
1339	unlock(&sched.lock)
1340}
1341
1342// When running with cgo, we call _cgo_thread_start
1343// to start threads for us so that we can play nicely with
1344// foreign code.
1345var cgoThreadStart unsafe.Pointer
1346
1347type cgothreadstart struct {
1348	g   guintptr
1349	tls *uint64
1350	fn  unsafe.Pointer
1351}
1352
1353// Allocate a new m unassociated with any thread.
1354// Can use p for allocation context if needed.
1355// fn is recorded as the new m's m.mstartfn.
1356//
1357// This function is allowed to have write barriers even if the caller
1358// isn't because it borrows _p_.
1359//
1360//go:yeswritebarrierrec
1361func allocm(_p_ *p, fn func()) *m {
1362	_g_ := getg()
1363	acquirem() // disable GC because it can be called from sysmon
1364	if _g_.m.p == 0 {
1365		acquirep(_p_) // temporarily borrow p for mallocs in this function
1366	}
1367
1368	// Release the free M list. We need to do this somewhere and
1369	// this may free up a stack we can use.
1370	if sched.freem != nil {
1371		lock(&sched.lock)
1372		var newList *m
1373		for freem := sched.freem; freem != nil; {
1374			if freem.freeWait != 0 {
1375				next := freem.freelink
1376				freem.freelink = newList
1377				newList = freem
1378				freem = next
1379				continue
1380			}
1381			stackfree(freem.g0.stack)
1382			freem = freem.freelink
1383		}
1384		sched.freem = newList
1385		unlock(&sched.lock)
1386	}
1387
1388	mp := new(m)
1389	mp.mstartfn = fn
1390	mcommoninit(mp)
1391
1392	// In case of cgo or Solaris or illumos or Darwin, pthread_create will make us a stack.
1393	// Windows and Plan 9 will layout sched stack on OS stack.
1394	if iscgo || GOOS == "solaris" || GOOS == "illumos" || GOOS == "windows" || GOOS == "plan9" || GOOS == "darwin" {
1395		mp.g0 = malg(-1)
1396	} else {
1397		mp.g0 = malg(8192 * sys.StackGuardMultiplier)
1398	}
1399	mp.g0.m = mp
1400
1401	if _p_ == _g_.m.p.ptr() {
1402		releasep()
1403	}
1404	releasem(_g_.m)
1405
1406	return mp
1407}
1408
1409// needm is called when a cgo callback happens on a
1410// thread without an m (a thread not created by Go).
1411// In this case, needm is expected to find an m to use
1412// and return with m, g initialized correctly.
1413// Since m and g are not set now (likely nil, but see below)
1414// needm is limited in what routines it can call. In particular
1415// it can only call nosplit functions (textflag 7) and cannot
1416// do any scheduling that requires an m.
1417//
1418// In order to avoid needing heavy lifting here, we adopt
1419// the following strategy: there is a stack of available m's
1420// that can be stolen. Using compare-and-swap
1421// to pop from the stack has ABA races, so we simulate
1422// a lock by doing an exchange (via Casuintptr) to steal the stack
1423// head and replace the top pointer with MLOCKED (1).
1424// This serves as a simple spin lock that we can use even
1425// without an m. The thread that locks the stack in this way
1426// unlocks the stack by storing a valid stack head pointer.
1427//
1428// In order to make sure that there is always an m structure
1429// available to be stolen, we maintain the invariant that there
1430// is always one more than needed. At the beginning of the
1431// program (if cgo is in use) the list is seeded with a single m.
1432// If needm finds that it has taken the last m off the list, its job
1433// is - once it has installed its own m so that it can do things like
1434// allocate memory - to create a spare m and put it on the list.
1435//
1436// Each of these extra m's also has a g0 and a curg that are
1437// pressed into service as the scheduling stack and current
1438// goroutine for the duration of the cgo callback.
1439//
1440// When the callback is done with the m, it calls dropm to
1441// put the m back on the list.
1442//go:nosplit
1443func needm(x byte) {
1444	if (iscgo || GOOS == "windows") && !cgoHasExtraM {
1445		// Can happen if C/C++ code calls Go from a global ctor.
1446		// Can also happen on Windows if a global ctor uses a
1447		// callback created by syscall.NewCallback. See issue #6751
1448		// for details.
1449		//
1450		// Can not throw, because scheduler is not initialized yet.
1451		write(2, unsafe.Pointer(&earlycgocallback[0]), int32(len(earlycgocallback)))
1452		exit(1)
1453	}
1454
1455	// Lock extra list, take head, unlock popped list.
1456	// nilokay=false is safe here because of the invariant above,
1457	// that the extra list always contains or will soon contain
1458	// at least one m.
1459	mp := lockextra(false)
1460
1461	// Set needextram when we've just emptied the list,
1462	// so that the eventual call into cgocallbackg will
1463	// allocate a new m for the extra list. We delay the
1464	// allocation until then so that it can be done
1465	// after exitsyscall makes sure it is okay to be
1466	// running at all (that is, there's no garbage collection
1467	// running right now).
1468	mp.needextram = mp.schedlink == 0
1469	extraMCount--
1470	unlockextra(mp.schedlink.ptr())
1471
1472	// Save and block signals before installing g.
1473	// Once g is installed, any incoming signals will try to execute,
1474	// but we won't have the sigaltstack settings and other data
1475	// set up appropriately until the end of minit, which will
1476	// unblock the signals. This is the same dance as when
1477	// starting a new m to run Go code via newosproc.
1478	msigsave(mp)
1479	sigblock()
1480
1481	// Install g (= m->g0) and set the stack bounds
1482	// to match the current stack. We don't actually know
1483	// how big the stack is, like we don't know how big any
1484	// scheduling stack is, but we assume there's at least 32 kB,
1485	// which is more than enough for us.
1486	setg(mp.g0)
1487	_g_ := getg()
1488	_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&x))) + 1024
1489	_g_.stack.lo = uintptr(noescape(unsafe.Pointer(&x))) - 32*1024
1490	_g_.stackguard0 = _g_.stack.lo + _StackGuard
1491
1492	// Initialize this thread to use the m.
1493	asminit()
1494	minit()
1495
1496	// mp.curg is now a real goroutine.
1497	casgstatus(mp.curg, _Gdead, _Gsyscall)
1498	atomic.Xadd(&sched.ngsys, -1)
1499}
1500
1501var earlycgocallback = []byte("fatal error: cgo callback before cgo call\n")
1502
1503// newextram allocates m's and puts them on the extra list.
1504// It is called with a working local m, so that it can do things
1505// like call schedlock and allocate.
1506func newextram() {
1507	c := atomic.Xchg(&extraMWaiters, 0)
1508	if c > 0 {
1509		for i := uint32(0); i < c; i++ {
1510			oneNewExtraM()
1511		}
1512	} else {
1513		// Make sure there is at least one extra M.
1514		mp := lockextra(true)
1515		unlockextra(mp)
1516		if mp == nil {
1517			oneNewExtraM()
1518		}
1519	}
1520}
1521
1522// oneNewExtraM allocates an m and puts it on the extra list.
1523func oneNewExtraM() {
1524	// Create extra goroutine locked to extra m.
1525	// The goroutine is the context in which the cgo callback will run.
1526	// The sched.pc will never be returned to, but setting it to
1527	// goexit makes clear to the traceback routines where
1528	// the goroutine stack ends.
1529	mp := allocm(nil, nil)
1530	gp := malg(4096)
1531	gp.sched.pc = funcPC(goexit) + sys.PCQuantum
1532	gp.sched.sp = gp.stack.hi
1533	gp.sched.sp -= 4 * sys.RegSize // extra space in case of reads slightly beyond frame
1534	gp.sched.lr = 0
1535	gp.sched.g = guintptr(unsafe.Pointer(gp))
1536	gp.syscallpc = gp.sched.pc
1537	gp.syscallsp = gp.sched.sp
1538	gp.stktopsp = gp.sched.sp
1539	// malg returns status as _Gidle. Change to _Gdead before
1540	// adding to allg where GC can see it. We use _Gdead to hide
1541	// this from tracebacks and stack scans since it isn't a
1542	// "real" goroutine until needm grabs it.
1543	casgstatus(gp, _Gidle, _Gdead)
1544	gp.m = mp
1545	mp.curg = gp
1546	mp.lockedInt++
1547	mp.lockedg.set(gp)
1548	gp.lockedm.set(mp)
1549	gp.goid = int64(atomic.Xadd64(&sched.goidgen, 1))
1550	if raceenabled {
1551		gp.racectx = racegostart(funcPC(newextram) + sys.PCQuantum)
1552	}
1553	// put on allg for garbage collector
1554	allgadd(gp)
1555
1556	// gp is now on the allg list, but we don't want it to be
1557	// counted by gcount. It would be more "proper" to increment
1558	// sched.ngfree, but that requires locking. Incrementing ngsys
1559	// has the same effect.
1560	atomic.Xadd(&sched.ngsys, +1)
1561
1562	// Add m to the extra list.
1563	mnext := lockextra(true)
1564	mp.schedlink.set(mnext)
1565	extraMCount++
1566	unlockextra(mp)
1567}
1568
1569// dropm is called when a cgo callback has called needm but is now
1570// done with the callback and returning back into the non-Go thread.
1571// It puts the current m back onto the extra list.
1572//
1573// The main expense here is the call to signalstack to release the
1574// m's signal stack, and then the call to needm on the next callback
1575// from this thread. It is tempting to try to save the m for next time,
1576// which would eliminate both these costs, but there might not be
1577// a next time: the current thread (which Go does not control) might exit.
1578// If we saved the m for that thread, there would be an m leak each time
1579// such a thread exited. Instead, we acquire and release an m on each
1580// call. These should typically not be scheduling operations, just a few
1581// atomics, so the cost should be small.
1582//
1583// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1584// variable using pthread_key_create. Unlike the pthread keys we already use
1585// on OS X, this dummy key would never be read by Go code. It would exist
1586// only so that we could register at thread-exit-time destructor.
1587// That destructor would put the m back onto the extra list.
1588// This is purely a performance optimization. The current version,
1589// in which dropm happens on each cgo call, is still correct too.
1590// We may have to keep the current version on systems with cgo
1591// but without pthreads, like Windows.
1592func dropm() {
1593	// Clear m and g, and return m to the extra list.
1594	// After the call to setg we can only call nosplit functions
1595	// with no pointer manipulation.
1596	mp := getg().m
1597
1598	// Return mp.curg to dead state.
1599	casgstatus(mp.curg, _Gsyscall, _Gdead)
1600	mp.curg.preemptStop = false
1601	atomic.Xadd(&sched.ngsys, +1)
1602
1603	// Block signals before unminit.
1604	// Unminit unregisters the signal handling stack (but needs g on some systems).
1605	// Setg(nil) clears g, which is the signal handler's cue not to run Go handlers.
1606	// It's important not to try to handle a signal between those two steps.
1607	sigmask := mp.sigmask
1608	sigblock()
1609	unminit()
1610
1611	mnext := lockextra(true)
1612	extraMCount++
1613	mp.schedlink.set(mnext)
1614
1615	setg(nil)
1616
1617	// Commit the release of mp.
1618	unlockextra(mp)
1619
1620	msigrestore(sigmask)
1621}
1622
1623// A helper function for EnsureDropM.
1624func getm() uintptr {
1625	return uintptr(unsafe.Pointer(getg().m))
1626}
1627
1628var extram uintptr
1629var extraMCount uint32 // Protected by lockextra
1630var extraMWaiters uint32
1631
1632// lockextra locks the extra list and returns the list head.
1633// The caller must unlock the list by storing a new list head
1634// to extram. If nilokay is true, then lockextra will
1635// return a nil list head if that's what it finds. If nilokay is false,
1636// lockextra will keep waiting until the list head is no longer nil.
1637//go:nosplit
1638func lockextra(nilokay bool) *m {
1639	const locked = 1
1640
1641	incr := false
1642	for {
1643		old := atomic.Loaduintptr(&extram)
1644		if old == locked {
1645			yield := osyield
1646			yield()
1647			continue
1648		}
1649		if old == 0 && !nilokay {
1650			if !incr {
1651				// Add 1 to the number of threads
1652				// waiting for an M.
1653				// This is cleared by newextram.
1654				atomic.Xadd(&extraMWaiters, 1)
1655				incr = true
1656			}
1657			usleep(1)
1658			continue
1659		}
1660		if atomic.Casuintptr(&extram, old, locked) {
1661			return (*m)(unsafe.Pointer(old))
1662		}
1663		yield := osyield
1664		yield()
1665		continue
1666	}
1667}
1668
1669//go:nosplit
1670func unlockextra(mp *m) {
1671	atomic.Storeuintptr(&extram, uintptr(unsafe.Pointer(mp)))
1672}
1673
1674// execLock serializes exec and clone to avoid bugs or unspecified behaviour
1675// around exec'ing while creating/destroying threads.  See issue #19546.
1676var execLock rwmutex
1677
1678// newmHandoff contains a list of m structures that need new OS threads.
1679// This is used by newm in situations where newm itself can't safely
1680// start an OS thread.
1681var newmHandoff struct {
1682	lock mutex
1683
1684	// newm points to a list of M structures that need new OS
1685	// threads. The list is linked through m.schedlink.
1686	newm muintptr
1687
1688	// waiting indicates that wake needs to be notified when an m
1689	// is put on the list.
1690	waiting bool
1691	wake    note
1692
1693	// haveTemplateThread indicates that the templateThread has
1694	// been started. This is not protected by lock. Use cas to set
1695	// to 1.
1696	haveTemplateThread uint32
1697}
1698
1699// Create a new m. It will start off with a call to fn, or else the scheduler.
1700// fn needs to be static and not a heap allocated closure.
1701// May run with m.p==nil, so write barriers are not allowed.
1702//go:nowritebarrierrec
1703func newm(fn func(), _p_ *p) {
1704	mp := allocm(_p_, fn)
1705	mp.nextp.set(_p_)
1706	mp.sigmask = initSigmask
1707	if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" {
1708		// We're on a locked M or a thread that may have been
1709		// started by C. The kernel state of this thread may
1710		// be strange (the user may have locked it for that
1711		// purpose). We don't want to clone that into another
1712		// thread. Instead, ask a known-good thread to create
1713		// the thread for us.
1714		//
1715		// This is disabled on Plan 9. See golang.org/issue/22227.
1716		//
1717		// TODO: This may be unnecessary on Windows, which
1718		// doesn't model thread creation off fork.
1719		lock(&newmHandoff.lock)
1720		if newmHandoff.haveTemplateThread == 0 {
1721			throw("on a locked thread with no template thread")
1722		}
1723		mp.schedlink = newmHandoff.newm
1724		newmHandoff.newm.set(mp)
1725		if newmHandoff.waiting {
1726			newmHandoff.waiting = false
1727			notewakeup(&newmHandoff.wake)
1728		}
1729		unlock(&newmHandoff.lock)
1730		return
1731	}
1732	newm1(mp)
1733}
1734
1735func newm1(mp *m) {
1736	if iscgo {
1737		var ts cgothreadstart
1738		if _cgo_thread_start == nil {
1739			throw("_cgo_thread_start missing")
1740		}
1741		ts.g.set(mp.g0)
1742		ts.tls = (*uint64)(unsafe.Pointer(&mp.tls[0]))
1743		ts.fn = unsafe.Pointer(funcPC(mstart))
1744		if msanenabled {
1745			msanwrite(unsafe.Pointer(&ts), unsafe.Sizeof(ts))
1746		}
1747		execLock.rlock() // Prevent process clone.
1748		asmcgocall(_cgo_thread_start, unsafe.Pointer(&ts))
1749		execLock.runlock()
1750		return
1751	}
1752	execLock.rlock() // Prevent process clone.
1753	newosproc(mp)
1754	execLock.runlock()
1755}
1756
1757// startTemplateThread starts the template thread if it is not already
1758// running.
1759//
1760// The calling thread must itself be in a known-good state.
1761func startTemplateThread() {
1762	if GOARCH == "wasm" { // no threads on wasm yet
1763		return
1764	}
1765	if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) {
1766		return
1767	}
1768	newm(templateThread, nil)
1769}
1770
1771// templateThread is a thread in a known-good state that exists solely
1772// to start new threads in known-good states when the calling thread
1773// may not be in a good state.
1774//
1775// Many programs never need this, so templateThread is started lazily
1776// when we first enter a state that might lead to running on a thread
1777// in an unknown state.
1778//
1779// templateThread runs on an M without a P, so it must not have write
1780// barriers.
1781//
1782//go:nowritebarrierrec
1783func templateThread() {
1784	lock(&sched.lock)
1785	sched.nmsys++
1786	checkdead()
1787	unlock(&sched.lock)
1788
1789	for {
1790		lock(&newmHandoff.lock)
1791		for newmHandoff.newm != 0 {
1792			newm := newmHandoff.newm.ptr()
1793			newmHandoff.newm = 0
1794			unlock(&newmHandoff.lock)
1795			for newm != nil {
1796				next := newm.schedlink.ptr()
1797				newm.schedlink = 0
1798				newm1(newm)
1799				newm = next
1800			}
1801			lock(&newmHandoff.lock)
1802		}
1803		newmHandoff.waiting = true
1804		noteclear(&newmHandoff.wake)
1805		unlock(&newmHandoff.lock)
1806		notesleep(&newmHandoff.wake)
1807	}
1808}
1809
1810// Stops execution of the current m until new work is available.
1811// Returns with acquired P.
1812func stopm() {
1813	_g_ := getg()
1814
1815	if _g_.m.locks != 0 {
1816		throw("stopm holding locks")
1817	}
1818	if _g_.m.p != 0 {
1819		throw("stopm holding p")
1820	}
1821	if _g_.m.spinning {
1822		throw("stopm spinning")
1823	}
1824
1825	lock(&sched.lock)
1826	mput(_g_.m)
1827	unlock(&sched.lock)
1828	notesleep(&_g_.m.park)
1829	noteclear(&_g_.m.park)
1830	acquirep(_g_.m.nextp.ptr())
1831	_g_.m.nextp = 0
1832}
1833
1834func mspinning() {
1835	// startm's caller incremented nmspinning. Set the new M's spinning.
1836	getg().m.spinning = true
1837}
1838
1839// Schedules some M to run the p (creates an M if necessary).
1840// If p==nil, tries to get an idle P, if no idle P's does nothing.
1841// May run with m.p==nil, so write barriers are not allowed.
1842// If spinning is set, the caller has incremented nmspinning and startm will
1843// either decrement nmspinning or set m.spinning in the newly started M.
1844//go:nowritebarrierrec
1845func startm(_p_ *p, spinning bool) {
1846	lock(&sched.lock)
1847	if _p_ == nil {
1848		_p_ = pidleget()
1849		if _p_ == nil {
1850			unlock(&sched.lock)
1851			if spinning {
1852				// The caller incremented nmspinning, but there are no idle Ps,
1853				// so it's okay to just undo the increment and give up.
1854				if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
1855					throw("startm: negative nmspinning")
1856				}
1857			}
1858			return
1859		}
1860	}
1861	mp := mget()
1862	unlock(&sched.lock)
1863	if mp == nil {
1864		var fn func()
1865		if spinning {
1866			// The caller incremented nmspinning, so set m.spinning in the new M.
1867			fn = mspinning
1868		}
1869		newm(fn, _p_)
1870		return
1871	}
1872	if mp.spinning {
1873		throw("startm: m is spinning")
1874	}
1875	if mp.nextp != 0 {
1876		throw("startm: m has p")
1877	}
1878	if spinning && !runqempty(_p_) {
1879		throw("startm: p has runnable gs")
1880	}
1881	// The caller incremented nmspinning, so set m.spinning in the new M.
1882	mp.spinning = spinning
1883	mp.nextp.set(_p_)
1884	notewakeup(&mp.park)
1885}
1886
1887// Hands off P from syscall or locked M.
1888// Always runs without a P, so write barriers are not allowed.
1889//go:nowritebarrierrec
1890func handoffp(_p_ *p) {
1891	// handoffp must start an M in any situation where
1892	// findrunnable would return a G to run on _p_.
1893
1894	// if it has local work, start it straight away
1895	if !runqempty(_p_) || sched.runqsize != 0 {
1896		startm(_p_, false)
1897		return
1898	}
1899	// if it has GC work, start it straight away
1900	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) {
1901		startm(_p_, false)
1902		return
1903	}
1904	// no local work, check that there are no spinning/idle M's,
1905	// otherwise our help is not required
1906	if atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) == 0 && atomic.Cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic
1907		startm(_p_, true)
1908		return
1909	}
1910	lock(&sched.lock)
1911	if sched.gcwaiting != 0 {
1912		_p_.status = _Pgcstop
1913		sched.stopwait--
1914		if sched.stopwait == 0 {
1915			notewakeup(&sched.stopnote)
1916		}
1917		unlock(&sched.lock)
1918		return
1919	}
1920	if _p_.runSafePointFn != 0 && atomic.Cas(&_p_.runSafePointFn, 1, 0) {
1921		sched.safePointFn(_p_)
1922		sched.safePointWait--
1923		if sched.safePointWait == 0 {
1924			notewakeup(&sched.safePointNote)
1925		}
1926	}
1927	if sched.runqsize != 0 {
1928		unlock(&sched.lock)
1929		startm(_p_, false)
1930		return
1931	}
1932	// If this is the last running P and nobody is polling network,
1933	// need to wakeup another M to poll network.
1934	if sched.npidle == uint32(gomaxprocs-1) && atomic.Load64(&sched.lastpoll) != 0 {
1935		unlock(&sched.lock)
1936		startm(_p_, false)
1937		return
1938	}
1939	if when := nobarrierWakeTime(_p_); when != 0 {
1940		wakeNetPoller(when)
1941	}
1942	pidleput(_p_)
1943	unlock(&sched.lock)
1944}
1945
1946// Tries to add one more P to execute G's.
1947// Called when a G is made runnable (newproc, ready).
1948func wakep() {
1949	// be conservative about spinning threads
1950	if !atomic.Cas(&sched.nmspinning, 0, 1) {
1951		return
1952	}
1953	startm(nil, true)
1954}
1955
1956// Stops execution of the current m that is locked to a g until the g is runnable again.
1957// Returns with acquired P.
1958func stoplockedm() {
1959	_g_ := getg()
1960
1961	if _g_.m.lockedg == 0 || _g_.m.lockedg.ptr().lockedm.ptr() != _g_.m {
1962		throw("stoplockedm: inconsistent locking")
1963	}
1964	if _g_.m.p != 0 {
1965		// Schedule another M to run this p.
1966		_p_ := releasep()
1967		handoffp(_p_)
1968	}
1969	incidlelocked(1)
1970	// Wait until another thread schedules lockedg again.
1971	notesleep(&_g_.m.park)
1972	noteclear(&_g_.m.park)
1973	status := readgstatus(_g_.m.lockedg.ptr())
1974	if status&^_Gscan != _Grunnable {
1975		print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n")
1976		dumpgstatus(_g_)
1977		throw("stoplockedm: not runnable")
1978	}
1979	acquirep(_g_.m.nextp.ptr())
1980	_g_.m.nextp = 0
1981}
1982
1983// Schedules the locked m to run the locked gp.
1984// May run during STW, so write barriers are not allowed.
1985//go:nowritebarrierrec
1986func startlockedm(gp *g) {
1987	_g_ := getg()
1988
1989	mp := gp.lockedm.ptr()
1990	if mp == _g_.m {
1991		throw("startlockedm: locked to me")
1992	}
1993	if mp.nextp != 0 {
1994		throw("startlockedm: m has p")
1995	}
1996	// directly handoff current P to the locked m
1997	incidlelocked(-1)
1998	_p_ := releasep()
1999	mp.nextp.set(_p_)
2000	notewakeup(&mp.park)
2001	stopm()
2002}
2003
2004// Stops the current m for stopTheWorld.
2005// Returns when the world is restarted.
2006func gcstopm() {
2007	_g_ := getg()
2008
2009	if sched.gcwaiting == 0 {
2010		throw("gcstopm: not waiting for gc")
2011	}
2012	if _g_.m.spinning {
2013		_g_.m.spinning = false
2014		// OK to just drop nmspinning here,
2015		// startTheWorld will unpark threads as necessary.
2016		if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
2017			throw("gcstopm: negative nmspinning")
2018		}
2019	}
2020	_p_ := releasep()
2021	lock(&sched.lock)
2022	_p_.status = _Pgcstop
2023	sched.stopwait--
2024	if sched.stopwait == 0 {
2025		notewakeup(&sched.stopnote)
2026	}
2027	unlock(&sched.lock)
2028	stopm()
2029}
2030
2031// Schedules gp to run on the current M.
2032// If inheritTime is true, gp inherits the remaining time in the
2033// current time slice. Otherwise, it starts a new time slice.
2034// Never returns.
2035//
2036// Write barriers are allowed because this is called immediately after
2037// acquiring a P in several places.
2038//
2039//go:yeswritebarrierrec
2040func execute(gp *g, inheritTime bool) {
2041	_g_ := getg()
2042
2043	// Assign gp.m before entering _Grunning so running Gs have an
2044	// M.
2045	_g_.m.curg = gp
2046	gp.m = _g_.m
2047	casgstatus(gp, _Grunnable, _Grunning)
2048	gp.waitsince = 0
2049	gp.preempt = false
2050	gp.stackguard0 = gp.stack.lo + _StackGuard
2051	if !inheritTime {
2052		_g_.m.p.ptr().schedtick++
2053	}
2054
2055	// Check whether the profiler needs to be turned on or off.
2056	hz := sched.profilehz
2057	if _g_.m.profilehz != hz {
2058		setThreadCPUProfiler(hz)
2059	}
2060
2061	if trace.enabled {
2062		// GoSysExit has to happen when we have a P, but before GoStart.
2063		// So we emit it here.
2064		if gp.syscallsp != 0 && gp.sysblocktraced {
2065			traceGoSysExit(gp.sysexitticks)
2066		}
2067		traceGoStart()
2068	}
2069
2070	gogo(&gp.sched)
2071}
2072
2073// Finds a runnable goroutine to execute.
2074// Tries to steal from other P's, get g from local or global queue, poll network.
2075func findrunnable() (gp *g, inheritTime bool) {
2076	_g_ := getg()
2077
2078	// The conditions here and in handoffp must agree: if
2079	// findrunnable would return a G to run, handoffp must start
2080	// an M.
2081
2082top:
2083	_p_ := _g_.m.p.ptr()
2084	if sched.gcwaiting != 0 {
2085		gcstopm()
2086		goto top
2087	}
2088	if _p_.runSafePointFn != 0 {
2089		runSafePointFn()
2090	}
2091
2092	now, pollUntil, _ := checkTimers(_p_, 0)
2093
2094	if fingwait && fingwake {
2095		if gp := wakefing(); gp != nil {
2096			ready(gp, 0, true)
2097		}
2098	}
2099	if *cgo_yield != nil {
2100		asmcgocall(*cgo_yield, nil)
2101	}
2102
2103	// local runq
2104	if gp, inheritTime := runqget(_p_); gp != nil {
2105		return gp, inheritTime
2106	}
2107
2108	// global runq
2109	if sched.runqsize != 0 {
2110		lock(&sched.lock)
2111		gp := globrunqget(_p_, 0)
2112		unlock(&sched.lock)
2113		if gp != nil {
2114			return gp, false
2115		}
2116	}
2117
2118	// Poll network.
2119	// This netpoll is only an optimization before we resort to stealing.
2120	// We can safely skip it if there are no waiters or a thread is blocked
2121	// in netpoll already. If there is any kind of logical race with that
2122	// blocked thread (e.g. it has already returned from netpoll, but does
2123	// not set lastpoll yet), this thread will do blocking netpoll below
2124	// anyway.
2125	if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 {
2126		if list := netpoll(0); !list.empty() { // non-blocking
2127			gp := list.pop()
2128			injectglist(&list)
2129			casgstatus(gp, _Gwaiting, _Grunnable)
2130			if trace.enabled {
2131				traceGoUnpark(gp, 0)
2132			}
2133			return gp, false
2134		}
2135	}
2136
2137	// Steal work from other P's.
2138	procs := uint32(gomaxprocs)
2139	ranTimer := false
2140	// If number of spinning M's >= number of busy P's, block.
2141	// This is necessary to prevent excessive CPU consumption
2142	// when GOMAXPROCS>>1 but the program parallelism is low.
2143	if !_g_.m.spinning && 2*atomic.Load(&sched.nmspinning) >= procs-atomic.Load(&sched.npidle) {
2144		goto stop
2145	}
2146	if !_g_.m.spinning {
2147		_g_.m.spinning = true
2148		atomic.Xadd(&sched.nmspinning, 1)
2149	}
2150	for i := 0; i < 4; i++ {
2151		for enum := stealOrder.start(fastrand()); !enum.done(); enum.next() {
2152			if sched.gcwaiting != 0 {
2153				goto top
2154			}
2155			stealRunNextG := i > 2 // first look for ready queues with more than 1 g
2156			p2 := allp[enum.position()]
2157			if _p_ == p2 {
2158				continue
2159			}
2160			if gp := runqsteal(_p_, p2, stealRunNextG); gp != nil {
2161				return gp, false
2162			}
2163
2164			// Consider stealing timers from p2.
2165			// This call to checkTimers is the only place where
2166			// we hold a lock on a different P's timers.
2167			// Lock contention can be a problem here, so avoid
2168			// grabbing the lock if p2 is running and not marked
2169			// for preemption. If p2 is running and not being
2170			// preempted we assume it will handle its own timers.
2171			if i > 2 && shouldStealTimers(p2) {
2172				tnow, w, ran := checkTimers(p2, now)
2173				now = tnow
2174				if w != 0 && (pollUntil == 0 || w < pollUntil) {
2175					pollUntil = w
2176				}
2177				if ran {
2178					// Running the timers may have
2179					// made an arbitrary number of G's
2180					// ready and added them to this P's
2181					// local run queue. That invalidates
2182					// the assumption of runqsteal
2183					// that is always has room to add
2184					// stolen G's. So check now if there
2185					// is a local G to run.
2186					if gp, inheritTime := runqget(_p_); gp != nil {
2187						return gp, inheritTime
2188					}
2189					ranTimer = true
2190				}
2191			}
2192		}
2193	}
2194	if ranTimer {
2195		// Running a timer may have made some goroutine ready.
2196		goto top
2197	}
2198
2199stop:
2200
2201	// We have nothing to do. If we're in the GC mark phase, can
2202	// safely scan and blacken objects, and have work to do, run
2203	// idle-time marking rather than give up the P.
2204	if gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != 0 && gcMarkWorkAvailable(_p_) {
2205		_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
2206		gp := _p_.gcBgMarkWorker.ptr()
2207		casgstatus(gp, _Gwaiting, _Grunnable)
2208		if trace.enabled {
2209			traceGoUnpark(gp, 0)
2210		}
2211		return gp, false
2212	}
2213
2214	delta := int64(-1)
2215	if pollUntil != 0 {
2216		// checkTimers ensures that polluntil > now.
2217		delta = pollUntil - now
2218	}
2219
2220	// wasm only:
2221	// If a callback returned and no other goroutine is awake,
2222	// then pause execution until a callback was triggered.
2223	if beforeIdle(delta) {
2224		// At least one goroutine got woken.
2225		goto top
2226	}
2227
2228	// Before we drop our P, make a snapshot of the allp slice,
2229	// which can change underfoot once we no longer block
2230	// safe-points. We don't need to snapshot the contents because
2231	// everything up to cap(allp) is immutable.
2232	allpSnapshot := allp
2233
2234	// return P and block
2235	lock(&sched.lock)
2236	if sched.gcwaiting != 0 || _p_.runSafePointFn != 0 {
2237		unlock(&sched.lock)
2238		goto top
2239	}
2240	if sched.runqsize != 0 {
2241		gp := globrunqget(_p_, 0)
2242		unlock(&sched.lock)
2243		return gp, false
2244	}
2245	if releasep() != _p_ {
2246		throw("findrunnable: wrong p")
2247	}
2248	pidleput(_p_)
2249	unlock(&sched.lock)
2250
2251	// Delicate dance: thread transitions from spinning to non-spinning state,
2252	// potentially concurrently with submission of new goroutines. We must
2253	// drop nmspinning first and then check all per-P queues again (with
2254	// #StoreLoad memory barrier in between). If we do it the other way around,
2255	// another thread can submit a goroutine after we've checked all run queues
2256	// but before we drop nmspinning; as the result nobody will unpark a thread
2257	// to run the goroutine.
2258	// If we discover new work below, we need to restore m.spinning as a signal
2259	// for resetspinning to unpark a new worker thread (because there can be more
2260	// than one starving goroutine). However, if after discovering new work
2261	// we also observe no idle Ps, it is OK to just park the current thread:
2262	// the system is fully loaded so no spinning threads are required.
2263	// Also see "Worker thread parking/unparking" comment at the top of the file.
2264	wasSpinning := _g_.m.spinning
2265	if _g_.m.spinning {
2266		_g_.m.spinning = false
2267		if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
2268			throw("findrunnable: negative nmspinning")
2269		}
2270	}
2271
2272	// check all runqueues once again
2273	for _, _p_ := range allpSnapshot {
2274		if !runqempty(_p_) {
2275			lock(&sched.lock)
2276			_p_ = pidleget()
2277			unlock(&sched.lock)
2278			if _p_ != nil {
2279				acquirep(_p_)
2280				if wasSpinning {
2281					_g_.m.spinning = true
2282					atomic.Xadd(&sched.nmspinning, 1)
2283				}
2284				goto top
2285			}
2286			break
2287		}
2288	}
2289
2290	// Check for idle-priority GC work again.
2291	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(nil) {
2292		lock(&sched.lock)
2293		_p_ = pidleget()
2294		if _p_ != nil && _p_.gcBgMarkWorker == 0 {
2295			pidleput(_p_)
2296			_p_ = nil
2297		}
2298		unlock(&sched.lock)
2299		if _p_ != nil {
2300			acquirep(_p_)
2301			if wasSpinning {
2302				_g_.m.spinning = true
2303				atomic.Xadd(&sched.nmspinning, 1)
2304			}
2305			// Go back to idle GC check.
2306			goto stop
2307		}
2308	}
2309
2310	// poll network
2311	if netpollinited() && (atomic.Load(&netpollWaiters) > 0 || pollUntil != 0) && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
2312		atomic.Store64(&sched.pollUntil, uint64(pollUntil))
2313		if _g_.m.p != 0 {
2314			throw("findrunnable: netpoll with p")
2315		}
2316		if _g_.m.spinning {
2317			throw("findrunnable: netpoll with spinning")
2318		}
2319		if faketime != 0 {
2320			// When using fake time, just poll.
2321			delta = 0
2322		}
2323		list := netpoll(delta) // block until new work is available
2324		atomic.Store64(&sched.pollUntil, 0)
2325		atomic.Store64(&sched.lastpoll, uint64(nanotime()))
2326		if faketime != 0 && list.empty() {
2327			// Using fake time and nothing is ready; stop M.
2328			// When all M's stop, checkdead will call timejump.
2329			stopm()
2330			goto top
2331		}
2332		lock(&sched.lock)
2333		_p_ = pidleget()
2334		unlock(&sched.lock)
2335		if _p_ == nil {
2336			injectglist(&list)
2337		} else {
2338			acquirep(_p_)
2339			if !list.empty() {
2340				gp := list.pop()
2341				injectglist(&list)
2342				casgstatus(gp, _Gwaiting, _Grunnable)
2343				if trace.enabled {
2344					traceGoUnpark(gp, 0)
2345				}
2346				return gp, false
2347			}
2348			if wasSpinning {
2349				_g_.m.spinning = true
2350				atomic.Xadd(&sched.nmspinning, 1)
2351			}
2352			goto top
2353		}
2354	} else if pollUntil != 0 && netpollinited() {
2355		pollerPollUntil := int64(atomic.Load64(&sched.pollUntil))
2356		if pollerPollUntil == 0 || pollerPollUntil > pollUntil {
2357			netpollBreak()
2358		}
2359	}
2360	stopm()
2361	goto top
2362}
2363
2364// pollWork reports whether there is non-background work this P could
2365// be doing. This is a fairly lightweight check to be used for
2366// background work loops, like idle GC. It checks a subset of the
2367// conditions checked by the actual scheduler.
2368func pollWork() bool {
2369	if sched.runqsize != 0 {
2370		return true
2371	}
2372	p := getg().m.p.ptr()
2373	if !runqempty(p) {
2374		return true
2375	}
2376	if netpollinited() && atomic.Load(&netpollWaiters) > 0 && sched.lastpoll != 0 {
2377		if list := netpoll(0); !list.empty() {
2378			injectglist(&list)
2379			return true
2380		}
2381	}
2382	return false
2383}
2384
2385// wakeNetPoller wakes up the thread sleeping in the network poller,
2386// if there is one, and if it isn't going to wake up anyhow before
2387// the when argument.
2388func wakeNetPoller(when int64) {
2389	if atomic.Load64(&sched.lastpoll) == 0 {
2390		// In findrunnable we ensure that when polling the pollUntil
2391		// field is either zero or the time to which the current
2392		// poll is expected to run. This can have a spurious wakeup
2393		// but should never miss a wakeup.
2394		pollerPollUntil := int64(atomic.Load64(&sched.pollUntil))
2395		if pollerPollUntil == 0 || pollerPollUntil > when {
2396			netpollBreak()
2397		}
2398	}
2399}
2400
2401func resetspinning() {
2402	_g_ := getg()
2403	if !_g_.m.spinning {
2404		throw("resetspinning: not a spinning m")
2405	}
2406	_g_.m.spinning = false
2407	nmspinning := atomic.Xadd(&sched.nmspinning, -1)
2408	if int32(nmspinning) < 0 {
2409		throw("findrunnable: negative nmspinning")
2410	}
2411	// M wakeup policy is deliberately somewhat conservative, so check if we
2412	// need to wakeup another P here. See "Worker thread parking/unparking"
2413	// comment at the top of the file for details.
2414	if nmspinning == 0 && atomic.Load(&sched.npidle) > 0 {
2415		wakep()
2416	}
2417}
2418
2419// Injects the list of runnable G's into the scheduler and clears glist.
2420// Can run concurrently with GC.
2421func injectglist(glist *gList) {
2422	if glist.empty() {
2423		return
2424	}
2425	if trace.enabled {
2426		for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() {
2427			traceGoUnpark(gp, 0)
2428		}
2429	}
2430	lock(&sched.lock)
2431	var n int
2432	for n = 0; !glist.empty(); n++ {
2433		gp := glist.pop()
2434		casgstatus(gp, _Gwaiting, _Grunnable)
2435		globrunqput(gp)
2436	}
2437	unlock(&sched.lock)
2438	for ; n != 0 && sched.npidle != 0; n-- {
2439		startm(nil, false)
2440	}
2441	*glist = gList{}
2442}
2443
2444// One round of scheduler: find a runnable goroutine and execute it.
2445// Never returns.
2446func schedule() {
2447	_g_ := getg()
2448
2449	if _g_.m.locks != 0 {
2450		throw("schedule: holding locks")
2451	}
2452
2453	if _g_.m.lockedg != 0 {
2454		stoplockedm()
2455		execute(_g_.m.lockedg.ptr(), false) // Never returns.
2456	}
2457
2458	// We should not schedule away from a g that is executing a cgo call,
2459	// since the cgo call is using the m's g0 stack.
2460	if _g_.m.incgo {
2461		throw("schedule: in cgo")
2462	}
2463
2464top:
2465	pp := _g_.m.p.ptr()
2466	pp.preempt = false
2467
2468	if sched.gcwaiting != 0 {
2469		gcstopm()
2470		goto top
2471	}
2472	if pp.runSafePointFn != 0 {
2473		runSafePointFn()
2474	}
2475
2476	// Sanity check: if we are spinning, the run queue should be empty.
2477	// Check this before calling checkTimers, as that might call
2478	// goready to put a ready goroutine on the local run queue.
2479	if _g_.m.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) {
2480		throw("schedule: spinning with local work")
2481	}
2482
2483	checkTimers(pp, 0)
2484
2485	var gp *g
2486	var inheritTime bool
2487
2488	// Normal goroutines will check for need to wakeP in ready,
2489	// but GCworkers and tracereaders will not, so the check must
2490	// be done here instead.
2491	tryWakeP := false
2492	if trace.enabled || trace.shutdown {
2493		gp = traceReader()
2494		if gp != nil {
2495			casgstatus(gp, _Gwaiting, _Grunnable)
2496			traceGoUnpark(gp, 0)
2497			tryWakeP = true
2498		}
2499	}
2500	if gp == nil && gcBlackenEnabled != 0 {
2501		gp = gcController.findRunnableGCWorker(_g_.m.p.ptr())
2502		tryWakeP = tryWakeP || gp != nil
2503	}
2504	if gp == nil {
2505		// Check the global runnable queue once in a while to ensure fairness.
2506		// Otherwise two goroutines can completely occupy the local runqueue
2507		// by constantly respawning each other.
2508		if _g_.m.p.ptr().schedtick%61 == 0 && sched.runqsize > 0 {
2509			lock(&sched.lock)
2510			gp = globrunqget(_g_.m.p.ptr(), 1)
2511			unlock(&sched.lock)
2512		}
2513	}
2514	if gp == nil {
2515		gp, inheritTime = runqget(_g_.m.p.ptr())
2516		// We can see gp != nil here even if the M is spinning,
2517		// if checkTimers added a local goroutine via goready.
2518	}
2519	if gp == nil {
2520		gp, inheritTime = findrunnable() // blocks until work is available
2521	}
2522
2523	// This thread is going to run a goroutine and is not spinning anymore,
2524	// so if it was marked as spinning we need to reset it now and potentially
2525	// start a new spinning M.
2526	if _g_.m.spinning {
2527		resetspinning()
2528	}
2529
2530	if sched.disable.user && !schedEnabled(gp) {
2531		// Scheduling of this goroutine is disabled. Put it on
2532		// the list of pending runnable goroutines for when we
2533		// re-enable user scheduling and look again.
2534		lock(&sched.lock)
2535		if schedEnabled(gp) {
2536			// Something re-enabled scheduling while we
2537			// were acquiring the lock.
2538			unlock(&sched.lock)
2539		} else {
2540			sched.disable.runnable.pushBack(gp)
2541			sched.disable.n++
2542			unlock(&sched.lock)
2543			goto top
2544		}
2545	}
2546
2547	// If about to schedule a not-normal goroutine (a GCworker or tracereader),
2548	// wake a P if there is one.
2549	if tryWakeP {
2550		if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
2551			wakep()
2552		}
2553	}
2554	if gp.lockedm != 0 {
2555		// Hands off own p to the locked m,
2556		// then blocks waiting for a new p.
2557		startlockedm(gp)
2558		goto top
2559	}
2560
2561	execute(gp, inheritTime)
2562}
2563
2564// dropg removes the association between m and the current goroutine m->curg (gp for short).
2565// Typically a caller sets gp's status away from Grunning and then
2566// immediately calls dropg to finish the job. The caller is also responsible
2567// for arranging that gp will be restarted using ready at an
2568// appropriate time. After calling dropg and arranging for gp to be
2569// readied later, the caller can do other work but eventually should
2570// call schedule to restart the scheduling of goroutines on this m.
2571func dropg() {
2572	_g_ := getg()
2573
2574	setMNoWB(&_g_.m.curg.m, nil)
2575	setGNoWB(&_g_.m.curg, nil)
2576}
2577
2578// checkTimers runs any timers for the P that are ready.
2579// If now is not 0 it is the current time.
2580// It returns the current time or 0 if it is not known,
2581// and the time when the next timer should run or 0 if there is no next timer,
2582// and reports whether it ran any timers.
2583// If the time when the next timer should run is not 0,
2584// it is always larger than the returned time.
2585// We pass now in and out to avoid extra calls of nanotime.
2586//go:yeswritebarrierrec
2587func checkTimers(pp *p, now int64) (rnow, pollUntil int64, ran bool) {
2588	// If there are no timers to adjust, and the first timer on
2589	// the heap is not yet ready to run, then there is nothing to do.
2590	if atomic.Load(&pp.adjustTimers) == 0 {
2591		next := int64(atomic.Load64(&pp.timer0When))
2592		if next == 0 {
2593			return now, 0, false
2594		}
2595		if now == 0 {
2596			now = nanotime()
2597		}
2598		if now < next {
2599			// Next timer is not ready to run.
2600			// But keep going if we would clear deleted timers.
2601			// This corresponds to the condition below where
2602			// we decide whether to call clearDeletedTimers.
2603			if pp != getg().m.p.ptr() || int(atomic.Load(&pp.deletedTimers)) <= int(atomic.Load(&pp.numTimers)/4) {
2604				return now, next, false
2605			}
2606		}
2607	}
2608
2609	lock(&pp.timersLock)
2610
2611	adjusttimers(pp)
2612
2613	rnow = now
2614	if len(pp.timers) > 0 {
2615		if rnow == 0 {
2616			rnow = nanotime()
2617		}
2618		for len(pp.timers) > 0 {
2619			// Note that runtimer may temporarily unlock
2620			// pp.timersLock.
2621			if tw := runtimer(pp, rnow); tw != 0 {
2622				if tw > 0 {
2623					pollUntil = tw
2624				}
2625				break
2626			}
2627			ran = true
2628		}
2629	}
2630
2631	// If this is the local P, and there are a lot of deleted timers,
2632	// clear them out. We only do this for the local P to reduce
2633	// lock contention on timersLock.
2634	if pp == getg().m.p.ptr() && int(atomic.Load(&pp.deletedTimers)) > len(pp.timers)/4 {
2635		clearDeletedTimers(pp)
2636	}
2637
2638	unlock(&pp.timersLock)
2639
2640	return rnow, pollUntil, ran
2641}
2642
2643// shouldStealTimers reports whether we should try stealing the timers from p2.
2644// We don't steal timers from a running P that is not marked for preemption,
2645// on the assumption that it will run its own timers. This reduces
2646// contention on the timers lock.
2647func shouldStealTimers(p2 *p) bool {
2648	if p2.status != _Prunning {
2649		return true
2650	}
2651	mp := p2.m.ptr()
2652	if mp == nil || mp.locks > 0 {
2653		return false
2654	}
2655	gp := mp.curg
2656	if gp == nil || gp.atomicstatus != _Grunning || !gp.preempt {
2657		return false
2658	}
2659	return true
2660}
2661
2662func parkunlock_c(gp *g, lock unsafe.Pointer) bool {
2663	unlock((*mutex)(lock))
2664	return true
2665}
2666
2667// park continuation on g0.
2668func park_m(gp *g) {
2669	_g_ := getg()
2670
2671	if trace.enabled {
2672		traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip)
2673	}
2674
2675	casgstatus(gp, _Grunning, _Gwaiting)
2676	dropg()
2677
2678	if fn := _g_.m.waitunlockf; fn != nil {
2679		ok := fn(gp, _g_.m.waitlock)
2680		_g_.m.waitunlockf = nil
2681		_g_.m.waitlock = nil
2682		if !ok {
2683			if trace.enabled {
2684				traceGoUnpark(gp, 2)
2685			}
2686			casgstatus(gp, _Gwaiting, _Grunnable)
2687			execute(gp, true) // Schedule it back, never returns.
2688		}
2689	}
2690	schedule()
2691}
2692
2693func goschedImpl(gp *g) {
2694	status := readgstatus(gp)
2695	if status&^_Gscan != _Grunning {
2696		dumpgstatus(gp)
2697		throw("bad g status")
2698	}
2699	casgstatus(gp, _Grunning, _Grunnable)
2700	dropg()
2701	lock(&sched.lock)
2702	globrunqput(gp)
2703	unlock(&sched.lock)
2704
2705	schedule()
2706}
2707
2708// Gosched continuation on g0.
2709func gosched_m(gp *g) {
2710	if trace.enabled {
2711		traceGoSched()
2712	}
2713	goschedImpl(gp)
2714}
2715
2716// goschedguarded is a forbidden-states-avoided version of gosched_m
2717func goschedguarded_m(gp *g) {
2718
2719	if !canPreemptM(gp.m) {
2720		gogo(&gp.sched) // never return
2721	}
2722
2723	if trace.enabled {
2724		traceGoSched()
2725	}
2726	goschedImpl(gp)
2727}
2728
2729func gopreempt_m(gp *g) {
2730	if trace.enabled {
2731		traceGoPreempt()
2732	}
2733	goschedImpl(gp)
2734}
2735
2736// preemptPark parks gp and puts it in _Gpreempted.
2737//
2738//go:systemstack
2739func preemptPark(gp *g) {
2740	if trace.enabled {
2741		traceGoPark(traceEvGoBlock, 0)
2742	}
2743	status := readgstatus(gp)
2744	if status&^_Gscan != _Grunning {
2745		dumpgstatus(gp)
2746		throw("bad g status")
2747	}
2748	gp.waitreason = waitReasonPreempted
2749	// Transition from _Grunning to _Gscan|_Gpreempted. We can't
2750	// be in _Grunning when we dropg because then we'd be running
2751	// without an M, but the moment we're in _Gpreempted,
2752	// something could claim this G before we've fully cleaned it
2753	// up. Hence, we set the scan bit to lock down further
2754	// transitions until we can dropg.
2755	casGToPreemptScan(gp, _Grunning, _Gscan|_Gpreempted)
2756	dropg()
2757	casfrom_Gscanstatus(gp, _Gscan|_Gpreempted, _Gpreempted)
2758	schedule()
2759}
2760
2761// goyield is like Gosched, but it:
2762// - emits a GoPreempt trace event instead of a GoSched trace event
2763// - puts the current G on the runq of the current P instead of the globrunq
2764func goyield() {
2765	checkTimeouts()
2766	mcall(goyield_m)
2767}
2768
2769func goyield_m(gp *g) {
2770	if trace.enabled {
2771		traceGoPreempt()
2772	}
2773	pp := gp.m.p.ptr()
2774	casgstatus(gp, _Grunning, _Grunnable)
2775	dropg()
2776	runqput(pp, gp, false)
2777	schedule()
2778}
2779
2780// Finishes execution of the current goroutine.
2781func goexit1() {
2782	if raceenabled {
2783		racegoend()
2784	}
2785	if trace.enabled {
2786		traceGoEnd()
2787	}
2788	mcall(goexit0)
2789}
2790
2791// goexit continuation on g0.
2792func goexit0(gp *g) {
2793	_g_ := getg()
2794
2795	casgstatus(gp, _Grunning, _Gdead)
2796	if isSystemGoroutine(gp, false) {
2797		atomic.Xadd(&sched.ngsys, -1)
2798	}
2799	gp.m = nil
2800	locked := gp.lockedm != 0
2801	gp.lockedm = 0
2802	_g_.m.lockedg = 0
2803	gp.preemptStop = false
2804	gp.paniconfault = false
2805	gp._defer = nil // should be true already but just in case.
2806	gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
2807	gp.writebuf = nil
2808	gp.waitreason = 0
2809	gp.param = nil
2810	gp.labels = nil
2811	gp.timer = nil
2812
2813	if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 {
2814		// Flush assist credit to the global pool. This gives
2815		// better information to pacing if the application is
2816		// rapidly creating an exiting goroutines.
2817		scanCredit := int64(gcController.assistWorkPerByte * float64(gp.gcAssistBytes))
2818		atomic.Xaddint64(&gcController.bgScanCredit, scanCredit)
2819		gp.gcAssistBytes = 0
2820	}
2821
2822	dropg()
2823
2824	if GOARCH == "wasm" { // no threads yet on wasm
2825		gfput(_g_.m.p.ptr(), gp)
2826		schedule() // never returns
2827	}
2828
2829	if _g_.m.lockedInt != 0 {
2830		print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
2831		throw("internal lockOSThread error")
2832	}
2833	gfput(_g_.m.p.ptr(), gp)
2834	if locked {
2835		// The goroutine may have locked this thread because
2836		// it put it in an unusual kernel state. Kill it
2837		// rather than returning it to the thread pool.
2838
2839		// Return to mstart, which will release the P and exit
2840		// the thread.
2841		if GOOS != "plan9" { // See golang.org/issue/22227.
2842			gogo(&_g_.m.g0.sched)
2843		} else {
2844			// Clear lockedExt on plan9 since we may end up re-using
2845			// this thread.
2846			_g_.m.lockedExt = 0
2847		}
2848	}
2849	schedule()
2850}
2851
2852// save updates getg().sched to refer to pc and sp so that a following
2853// gogo will restore pc and sp.
2854//
2855// save must not have write barriers because invoking a write barrier
2856// can clobber getg().sched.
2857//
2858//go:nosplit
2859//go:nowritebarrierrec
2860func save(pc, sp uintptr) {
2861	_g_ := getg()
2862
2863	_g_.sched.pc = pc
2864	_g_.sched.sp = sp
2865	_g_.sched.lr = 0
2866	_g_.sched.ret = 0
2867	_g_.sched.g = guintptr(unsafe.Pointer(_g_))
2868	// We need to ensure ctxt is zero, but can't have a write
2869	// barrier here. However, it should always already be zero.
2870	// Assert that.
2871	if _g_.sched.ctxt != nil {
2872		badctxt()
2873	}
2874}
2875
2876// The goroutine g is about to enter a system call.
2877// Record that it's not using the cpu anymore.
2878// This is called only from the go syscall library and cgocall,
2879// not from the low-level system calls used by the runtime.
2880//
2881// Entersyscall cannot split the stack: the gosave must
2882// make g->sched refer to the caller's stack segment, because
2883// entersyscall is going to return immediately after.
2884//
2885// Nothing entersyscall calls can split the stack either.
2886// We cannot safely move the stack during an active call to syscall,
2887// because we do not know which of the uintptr arguments are
2888// really pointers (back into the stack).
2889// In practice, this means that we make the fast path run through
2890// entersyscall doing no-split things, and the slow path has to use systemstack
2891// to run bigger things on the system stack.
2892//
2893// reentersyscall is the entry point used by cgo callbacks, where explicitly
2894// saved SP and PC are restored. This is needed when exitsyscall will be called
2895// from a function further up in the call stack than the parent, as g->syscallsp
2896// must always point to a valid stack frame. entersyscall below is the normal
2897// entry point for syscalls, which obtains the SP and PC from the caller.
2898//
2899// Syscall tracing:
2900// At the start of a syscall we emit traceGoSysCall to capture the stack trace.
2901// If the syscall does not block, that is it, we do not emit any other events.
2902// If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock;
2903// when syscall returns we emit traceGoSysExit and when the goroutine starts running
2904// (potentially instantly, if exitsyscallfast returns true) we emit traceGoStart.
2905// To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock,
2906// we remember current value of syscalltick in m (_g_.m.syscalltick = _g_.m.p.ptr().syscalltick),
2907// whoever emits traceGoSysBlock increments p.syscalltick afterwards;
2908// and we wait for the increment before emitting traceGoSysExit.
2909// Note that the increment is done even if tracing is not enabled,
2910// because tracing can be enabled in the middle of syscall. We don't want the wait to hang.
2911//
2912//go:nosplit
2913func reentersyscall(pc, sp uintptr) {
2914	_g_ := getg()
2915
2916	// Disable preemption because during this function g is in Gsyscall status,
2917	// but can have inconsistent g->sched, do not let GC observe it.
2918	_g_.m.locks++
2919
2920	// Entersyscall must not call any function that might split/grow the stack.
2921	// (See details in comment above.)
2922	// Catch calls that might, by replacing the stack guard with something that
2923	// will trip any stack check and leaving a flag to tell newstack to die.
2924	_g_.stackguard0 = stackPreempt
2925	_g_.throwsplit = true
2926
2927	// Leave SP around for GC and traceback.
2928	save(pc, sp)
2929	_g_.syscallsp = sp
2930	_g_.syscallpc = pc
2931	casgstatus(_g_, _Grunning, _Gsyscall)
2932	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
2933		systemstack(func() {
2934			print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
2935			throw("entersyscall")
2936		})
2937	}
2938
2939	if trace.enabled {
2940		systemstack(traceGoSysCall)
2941		// systemstack itself clobbers g.sched.{pc,sp} and we might
2942		// need them later when the G is genuinely blocked in a
2943		// syscall
2944		save(pc, sp)
2945	}
2946
2947	if atomic.Load(&sched.sysmonwait) != 0 {
2948		systemstack(entersyscall_sysmon)
2949		save(pc, sp)
2950	}
2951
2952	if _g_.m.p.ptr().runSafePointFn != 0 {
2953		// runSafePointFn may stack split if run on this stack
2954		systemstack(runSafePointFn)
2955		save(pc, sp)
2956	}
2957
2958	_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
2959	_g_.sysblocktraced = true
2960	_g_.m.mcache = nil
2961	pp := _g_.m.p.ptr()
2962	pp.m = 0
2963	_g_.m.oldp.set(pp)
2964	_g_.m.p = 0
2965	atomic.Store(&pp.status, _Psyscall)
2966	if sched.gcwaiting != 0 {
2967		systemstack(entersyscall_gcwait)
2968		save(pc, sp)
2969	}
2970
2971	_g_.m.locks--
2972}
2973
2974// Standard syscall entry used by the go syscall library and normal cgo calls.
2975//
2976// This is exported via linkname to assembly in the syscall package.
2977//
2978//go:nosplit
2979//go:linkname entersyscall
2980func entersyscall() {
2981	reentersyscall(getcallerpc(), getcallersp())
2982}
2983
2984func entersyscall_sysmon() {
2985	lock(&sched.lock)
2986	if atomic.Load(&sched.sysmonwait) != 0 {
2987		atomic.Store(&sched.sysmonwait, 0)
2988		notewakeup(&sched.sysmonnote)
2989	}
2990	unlock(&sched.lock)
2991}
2992
2993func entersyscall_gcwait() {
2994	_g_ := getg()
2995	_p_ := _g_.m.oldp.ptr()
2996
2997	lock(&sched.lock)
2998	if sched.stopwait > 0 && atomic.Cas(&_p_.status, _Psyscall, _Pgcstop) {
2999		if trace.enabled {
3000			traceGoSysBlock(_p_)
3001			traceProcStop(_p_)
3002		}
3003		_p_.syscalltick++
3004		if sched.stopwait--; sched.stopwait == 0 {
3005			notewakeup(&sched.stopnote)
3006		}
3007	}
3008	unlock(&sched.lock)
3009}
3010
3011// The same as entersyscall(), but with a hint that the syscall is blocking.
3012//go:nosplit
3013func entersyscallblock() {
3014	_g_ := getg()
3015
3016	_g_.m.locks++ // see comment in entersyscall
3017	_g_.throwsplit = true
3018	_g_.stackguard0 = stackPreempt // see comment in entersyscall
3019	_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
3020	_g_.sysblocktraced = true
3021	_g_.m.p.ptr().syscalltick++
3022
3023	// Leave SP around for GC and traceback.
3024	pc := getcallerpc()
3025	sp := getcallersp()
3026	save(pc, sp)
3027	_g_.syscallsp = _g_.sched.sp
3028	_g_.syscallpc = _g_.sched.pc
3029	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
3030		sp1 := sp
3031		sp2 := _g_.sched.sp
3032		sp3 := _g_.syscallsp
3033		systemstack(func() {
3034			print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
3035			throw("entersyscallblock")
3036		})
3037	}
3038	casgstatus(_g_, _Grunning, _Gsyscall)
3039	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
3040		systemstack(func() {
3041			print("entersyscallblock inconsistent ", hex(sp), " ", hex(_g_.sched.sp), " ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
3042			throw("entersyscallblock")
3043		})
3044	}
3045
3046	systemstack(entersyscallblock_handoff)
3047
3048	// Resave for traceback during blocked call.
3049	save(getcallerpc(), getcallersp())
3050
3051	_g_.m.locks--
3052}
3053
3054func entersyscallblock_handoff() {
3055	if trace.enabled {
3056		traceGoSysCall()
3057		traceGoSysBlock(getg().m.p.ptr())
3058	}
3059	handoffp(releasep())
3060}
3061
3062// The goroutine g exited its system call.
3063// Arrange for it to run on a cpu again.
3064// This is called only from the go syscall library, not
3065// from the low-level system calls used by the runtime.
3066//
3067// Write barriers are not allowed because our P may have been stolen.
3068//
3069// This is exported via linkname to assembly in the syscall package.
3070//
3071//go:nosplit
3072//go:nowritebarrierrec
3073//go:linkname exitsyscall
3074func exitsyscall() {
3075	_g_ := getg()
3076
3077	_g_.m.locks++ // see comment in entersyscall
3078	if getcallersp() > _g_.syscallsp {
3079		throw("exitsyscall: syscall frame is no longer valid")
3080	}
3081
3082	_g_.waitsince = 0
3083	oldp := _g_.m.oldp.ptr()
3084	_g_.m.oldp = 0
3085	if exitsyscallfast(oldp) {
3086		if _g_.m.mcache == nil {
3087			throw("lost mcache")
3088		}
3089		if trace.enabled {
3090			if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
3091				systemstack(traceGoStart)
3092			}
3093		}
3094		// There's a cpu for us, so we can run.
3095		_g_.m.p.ptr().syscalltick++
3096		// We need to cas the status and scan before resuming...
3097		casgstatus(_g_, _Gsyscall, _Grunning)
3098
3099		// Garbage collector isn't running (since we are),
3100		// so okay to clear syscallsp.
3101		_g_.syscallsp = 0
3102		_g_.m.locks--
3103		if _g_.preempt {
3104			// restore the preemption request in case we've cleared it in newstack
3105			_g_.stackguard0 = stackPreempt
3106		} else {
3107			// otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock
3108			_g_.stackguard0 = _g_.stack.lo + _StackGuard
3109		}
3110		_g_.throwsplit = false
3111
3112		if sched.disable.user && !schedEnabled(_g_) {
3113			// Scheduling of this goroutine is disabled.
3114			Gosched()
3115		}
3116
3117		return
3118	}
3119
3120	_g_.sysexitticks = 0
3121	if trace.enabled {
3122		// Wait till traceGoSysBlock event is emitted.
3123		// This ensures consistency of the trace (the goroutine is started after it is blocked).
3124		for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {
3125			osyield()
3126		}
3127		// We can't trace syscall exit right now because we don't have a P.
3128		// Tracing code can invoke write barriers that cannot run without a P.
3129		// So instead we remember the syscall exit time and emit the event
3130		// in execute when we have a P.
3131		_g_.sysexitticks = cputicks()
3132	}
3133
3134	_g_.m.locks--
3135
3136	// Call the scheduler.
3137	mcall(exitsyscall0)
3138
3139	if _g_.m.mcache == nil {
3140		throw("lost mcache")
3141	}
3142
3143	// Scheduler returned, so we're allowed to run now.
3144	// Delete the syscallsp information that we left for
3145	// the garbage collector during the system call.
3146	// Must wait until now because until gosched returns
3147	// we don't know for sure that the garbage collector
3148	// is not running.
3149	_g_.syscallsp = 0
3150	_g_.m.p.ptr().syscalltick++
3151	_g_.throwsplit = false
3152}
3153
3154//go:nosplit
3155func exitsyscallfast(oldp *p) bool {
3156	_g_ := getg()
3157
3158	// Freezetheworld sets stopwait but does not retake P's.
3159	if sched.stopwait == freezeStopWait {
3160		return false
3161	}
3162
3163	// Try to re-acquire the last P.
3164	if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) {
3165		// There's a cpu for us, so we can run.
3166		wirep(oldp)
3167		exitsyscallfast_reacquired()
3168		return true
3169	}
3170
3171	// Try to get any other idle P.
3172	if sched.pidle != 0 {
3173		var ok bool
3174		systemstack(func() {
3175			ok = exitsyscallfast_pidle()
3176			if ok && trace.enabled {
3177				if oldp != nil {
3178					// Wait till traceGoSysBlock event is emitted.
3179					// This ensures consistency of the trace (the goroutine is started after it is blocked).
3180					for oldp.syscalltick == _g_.m.syscalltick {
3181						osyield()
3182					}
3183				}
3184				traceGoSysExit(0)
3185			}
3186		})
3187		if ok {
3188			return true
3189		}
3190	}
3191	return false
3192}
3193
3194// exitsyscallfast_reacquired is the exitsyscall path on which this G
3195// has successfully reacquired the P it was running on before the
3196// syscall.
3197//
3198//go:nosplit
3199func exitsyscallfast_reacquired() {
3200	_g_ := getg()
3201	if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
3202		if trace.enabled {
3203			// The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed).
3204			// traceGoSysBlock for this syscall was already emitted,
3205			// but here we effectively retake the p from the new syscall running on the same p.
3206			systemstack(func() {
3207				// Denote blocking of the new syscall.
3208				traceGoSysBlock(_g_.m.p.ptr())
3209				// Denote completion of the current syscall.
3210				traceGoSysExit(0)
3211			})
3212		}
3213		_g_.m.p.ptr().syscalltick++
3214	}
3215}
3216
3217func exitsyscallfast_pidle() bool {
3218	lock(&sched.lock)
3219	_p_ := pidleget()
3220	if _p_ != nil && atomic.Load(&sched.sysmonwait) != 0 {
3221		atomic.Store(&sched.sysmonwait, 0)
3222		notewakeup(&sched.sysmonnote)
3223	}
3224	unlock(&sched.lock)
3225	if _p_ != nil {
3226		acquirep(_p_)
3227		return true
3228	}
3229	return false
3230}
3231
3232// exitsyscall slow path on g0.
3233// Failed to acquire P, enqueue gp as runnable.
3234//
3235//go:nowritebarrierrec
3236func exitsyscall0(gp *g) {
3237	_g_ := getg()
3238
3239	casgstatus(gp, _Gsyscall, _Grunnable)
3240	dropg()
3241	lock(&sched.lock)
3242	var _p_ *p
3243	if schedEnabled(_g_) {
3244		_p_ = pidleget()
3245	}
3246	if _p_ == nil {
3247		globrunqput(gp)
3248	} else if atomic.Load(&sched.sysmonwait) != 0 {
3249		atomic.Store(&sched.sysmonwait, 0)
3250		notewakeup(&sched.sysmonnote)
3251	}
3252	unlock(&sched.lock)
3253	if _p_ != nil {
3254		acquirep(_p_)
3255		execute(gp, false) // Never returns.
3256	}
3257	if _g_.m.lockedg != 0 {
3258		// Wait until another thread schedules gp and so m again.
3259		stoplockedm()
3260		execute(gp, false) // Never returns.
3261	}
3262	stopm()
3263	schedule() // Never returns.
3264}
3265
3266func beforefork() {
3267	gp := getg().m.curg
3268
3269	// Block signals during a fork, so that the child does not run
3270	// a signal handler before exec if a signal is sent to the process
3271	// group. See issue #18600.
3272	gp.m.locks++
3273	msigsave(gp.m)
3274	sigblock()
3275
3276	// This function is called before fork in syscall package.
3277	// Code between fork and exec must not allocate memory nor even try to grow stack.
3278	// Here we spoil g->_StackGuard to reliably detect any attempts to grow stack.
3279	// runtime_AfterFork will undo this in parent process, but not in child.
3280	gp.stackguard0 = stackFork
3281}
3282
3283// Called from syscall package before fork.
3284//go:linkname syscall_runtime_BeforeFork syscall.runtime_BeforeFork
3285//go:nosplit
3286func syscall_runtime_BeforeFork() {
3287	systemstack(beforefork)
3288}
3289
3290func afterfork() {
3291	gp := getg().m.curg
3292
3293	// See the comments in beforefork.
3294	gp.stackguard0 = gp.stack.lo + _StackGuard
3295
3296	msigrestore(gp.m.sigmask)
3297
3298	gp.m.locks--
3299}
3300
3301// Called from syscall package after fork in parent.
3302//go:linkname syscall_runtime_AfterFork syscall.runtime_AfterFork
3303//go:nosplit
3304func syscall_runtime_AfterFork() {
3305	systemstack(afterfork)
3306}
3307
3308// inForkedChild is true while manipulating signals in the child process.
3309// This is used to avoid calling libc functions in case we are using vfork.
3310var inForkedChild bool
3311
3312// Called from syscall package after fork in child.
3313// It resets non-sigignored signals to the default handler, and
3314// restores the signal mask in preparation for the exec.
3315//
3316// Because this might be called during a vfork, and therefore may be
3317// temporarily sharing address space with the parent process, this must
3318// not change any global variables or calling into C code that may do so.
3319//
3320//go:linkname syscall_runtime_AfterForkInChild syscall.runtime_AfterForkInChild
3321//go:nosplit
3322//go:nowritebarrierrec
3323func syscall_runtime_AfterForkInChild() {
3324	// It's OK to change the global variable inForkedChild here
3325	// because we are going to change it back. There is no race here,
3326	// because if we are sharing address space with the parent process,
3327	// then the parent process can not be running concurrently.
3328	inForkedChild = true
3329
3330	clearSignalHandlers()
3331
3332	// When we are the child we are the only thread running,
3333	// so we know that nothing else has changed gp.m.sigmask.
3334	msigrestore(getg().m.sigmask)
3335
3336	inForkedChild = false
3337}
3338
3339// Called from syscall package before Exec.
3340//go:linkname syscall_runtime_BeforeExec syscall.runtime_BeforeExec
3341func syscall_runtime_BeforeExec() {
3342	// Prevent thread creation during exec.
3343	execLock.lock()
3344}
3345
3346// Called from syscall package after Exec.
3347//go:linkname syscall_runtime_AfterExec syscall.runtime_AfterExec
3348func syscall_runtime_AfterExec() {
3349	execLock.unlock()
3350}
3351
3352// Allocate a new g, with a stack big enough for stacksize bytes.
3353func malg(stacksize int32) *g {
3354	newg := new(g)
3355	if stacksize >= 0 {
3356		stacksize = round2(_StackSystem + stacksize)
3357		systemstack(func() {
3358			newg.stack = stackalloc(uint32(stacksize))
3359		})
3360		newg.stackguard0 = newg.stack.lo + _StackGuard
3361		newg.stackguard1 = ^uintptr(0)
3362		// Clear the bottom word of the stack. We record g
3363		// there on gsignal stack during VDSO on ARM and ARM64.
3364		*(*uintptr)(unsafe.Pointer(newg.stack.lo)) = 0
3365	}
3366	return newg
3367}
3368
3369// Create a new g running fn with siz bytes of arguments.
3370// Put it on the queue of g's waiting to run.
3371// The compiler turns a go statement into a call to this.
3372// Cannot split the stack because it assumes that the arguments
3373// are available sequentially after &fn; they would not be
3374// copied if a stack split occurred.
3375//go:nosplit
3376func newproc(siz int32, fn *funcval) {
3377	argp := add(unsafe.Pointer(&fn), sys.PtrSize)
3378	gp := getg()
3379	pc := getcallerpc()
3380	systemstack(func() {
3381		newproc1(fn, argp, siz, gp, pc)
3382	})
3383}
3384
3385// Create a new g running fn with narg bytes of arguments starting
3386// at argp. callerpc is the address of the go statement that created
3387// this. The new g is put on the queue of g's waiting to run.
3388func newproc1(fn *funcval, argp unsafe.Pointer, narg int32, callergp *g, callerpc uintptr) {
3389	_g_ := getg()
3390
3391	if fn == nil {
3392		_g_.m.throwing = -1 // do not dump full stacks
3393		throw("go of nil func value")
3394	}
3395	acquirem() // disable preemption because it can be holding p in a local var
3396	siz := narg
3397	siz = (siz + 7) &^ 7
3398
3399	// We could allocate a larger initial stack if necessary.
3400	// Not worth it: this is almost always an error.
3401	// 4*sizeof(uintreg): extra space added below
3402	// sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
3403	if siz >= _StackMin-4*sys.RegSize-sys.RegSize {
3404		throw("newproc: function arguments too large for new goroutine")
3405	}
3406
3407	_p_ := _g_.m.p.ptr()
3408	newg := gfget(_p_)
3409	if newg == nil {
3410		newg = malg(_StackMin)
3411		casgstatus(newg, _Gidle, _Gdead)
3412		allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
3413	}
3414	if newg.stack.hi == 0 {
3415		throw("newproc1: newg missing stack")
3416	}
3417
3418	if readgstatus(newg) != _Gdead {
3419		throw("newproc1: new g is not Gdead")
3420	}
3421
3422	totalSize := 4*sys.RegSize + uintptr(siz) + sys.MinFrameSize // extra space in case of reads slightly beyond frame
3423	totalSize += -totalSize & (sys.SpAlign - 1)                  // align to spAlign
3424	sp := newg.stack.hi - totalSize
3425	spArg := sp
3426	if usesLR {
3427		// caller's LR
3428		*(*uintptr)(unsafe.Pointer(sp)) = 0
3429		prepGoExitFrame(sp)
3430		spArg += sys.MinFrameSize
3431	}
3432	if narg > 0 {
3433		memmove(unsafe.Pointer(spArg), argp, uintptr(narg))
3434		// This is a stack-to-stack copy. If write barriers
3435		// are enabled and the source stack is grey (the
3436		// destination is always black), then perform a
3437		// barrier copy. We do this *after* the memmove
3438		// because the destination stack may have garbage on
3439		// it.
3440		if writeBarrier.needed && !_g_.m.curg.gcscandone {
3441			f := findfunc(fn.fn)
3442			stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
3443			if stkmap.nbit > 0 {
3444				// We're in the prologue, so it's always stack map index 0.
3445				bv := stackmapdata(stkmap, 0)
3446				bulkBarrierBitmap(spArg, spArg, uintptr(bv.n)*sys.PtrSize, 0, bv.bytedata)
3447			}
3448		}
3449	}
3450
3451	memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
3452	newg.sched.sp = sp
3453	newg.stktopsp = sp
3454	newg.sched.pc = funcPC(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function
3455	newg.sched.g = guintptr(unsafe.Pointer(newg))
3456	gostartcallfn(&newg.sched, fn)
3457	newg.gopc = callerpc
3458	newg.ancestors = saveAncestors(callergp)
3459	newg.startpc = fn.fn
3460	if _g_.m.curg != nil {
3461		newg.labels = _g_.m.curg.labels
3462	}
3463	if isSystemGoroutine(newg, false) {
3464		atomic.Xadd(&sched.ngsys, +1)
3465	}
3466	casgstatus(newg, _Gdead, _Grunnable)
3467
3468	if _p_.goidcache == _p_.goidcacheend {
3469		// Sched.goidgen is the last allocated id,
3470		// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
3471		// At startup sched.goidgen=0, so main goroutine receives goid=1.
3472		_p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch)
3473		_p_.goidcache -= _GoidCacheBatch - 1
3474		_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
3475	}
3476	newg.goid = int64(_p_.goidcache)
3477	_p_.goidcache++
3478	if raceenabled {
3479		newg.racectx = racegostart(callerpc)
3480	}
3481	if trace.enabled {
3482		traceGoCreate(newg, newg.startpc)
3483	}
3484	runqput(_p_, newg, true)
3485
3486	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && mainStarted {
3487		wakep()
3488	}
3489	releasem(_g_.m)
3490}
3491
3492// saveAncestors copies previous ancestors of the given caller g and
3493// includes infor for the current caller into a new set of tracebacks for
3494// a g being created.
3495func saveAncestors(callergp *g) *[]ancestorInfo {
3496	// Copy all prior info, except for the root goroutine (goid 0).
3497	if debug.tracebackancestors <= 0 || callergp.goid == 0 {
3498		return nil
3499	}
3500	var callerAncestors []ancestorInfo
3501	if callergp.ancestors != nil {
3502		callerAncestors = *callergp.ancestors
3503	}
3504	n := int32(len(callerAncestors)) + 1
3505	if n > debug.tracebackancestors {
3506		n = debug.tracebackancestors
3507	}
3508	ancestors := make([]ancestorInfo, n)
3509	copy(ancestors[1:], callerAncestors)
3510
3511	var pcs [_TracebackMaxFrames]uintptr
3512	npcs := gcallers(callergp, 0, pcs[:])
3513	ipcs := make([]uintptr, npcs)
3514	copy(ipcs, pcs[:])
3515	ancestors[0] = ancestorInfo{
3516		pcs:  ipcs,
3517		goid: callergp.goid,
3518		gopc: callergp.gopc,
3519	}
3520
3521	ancestorsp := new([]ancestorInfo)
3522	*ancestorsp = ancestors
3523	return ancestorsp
3524}
3525
3526// Put on gfree list.
3527// If local list is too long, transfer a batch to the global list.
3528func gfput(_p_ *p, gp *g) {
3529	if readgstatus(gp) != _Gdead {
3530		throw("gfput: bad status (not Gdead)")
3531	}
3532
3533	stksize := gp.stack.hi - gp.stack.lo
3534
3535	if stksize != _FixedStack {
3536		// non-standard stack size - free it.
3537		stackfree(gp.stack)
3538		gp.stack.lo = 0
3539		gp.stack.hi = 0
3540		gp.stackguard0 = 0
3541	}
3542
3543	_p_.gFree.push(gp)
3544	_p_.gFree.n++
3545	if _p_.gFree.n >= 64 {
3546		lock(&sched.gFree.lock)
3547		for _p_.gFree.n >= 32 {
3548			_p_.gFree.n--
3549			gp = _p_.gFree.pop()
3550			if gp.stack.lo == 0 {
3551				sched.gFree.noStack.push(gp)
3552			} else {
3553				sched.gFree.stack.push(gp)
3554			}
3555			sched.gFree.n++
3556		}
3557		unlock(&sched.gFree.lock)
3558	}
3559}
3560
3561// Get from gfree list.
3562// If local list is empty, grab a batch from global list.
3563func gfget(_p_ *p) *g {
3564retry:
3565	if _p_.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) {
3566		lock(&sched.gFree.lock)
3567		// Move a batch of free Gs to the P.
3568		for _p_.gFree.n < 32 {
3569			// Prefer Gs with stacks.
3570			gp := sched.gFree.stack.pop()
3571			if gp == nil {
3572				gp = sched.gFree.noStack.pop()
3573				if gp == nil {
3574					break
3575				}
3576			}
3577			sched.gFree.n--
3578			_p_.gFree.push(gp)
3579			_p_.gFree.n++
3580		}
3581		unlock(&sched.gFree.lock)
3582		goto retry
3583	}
3584	gp := _p_.gFree.pop()
3585	if gp == nil {
3586		return nil
3587	}
3588	_p_.gFree.n--
3589	if gp.stack.lo == 0 {
3590		// Stack was deallocated in gfput. Allocate a new one.
3591		systemstack(func() {
3592			gp.stack = stackalloc(_FixedStack)
3593		})
3594		gp.stackguard0 = gp.stack.lo + _StackGuard
3595	} else {
3596		if raceenabled {
3597			racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo)
3598		}
3599		if msanenabled {
3600			msanmalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo)
3601		}
3602	}
3603	return gp
3604}
3605
3606// Purge all cached G's from gfree list to the global list.
3607func gfpurge(_p_ *p) {
3608	lock(&sched.gFree.lock)
3609	for !_p_.gFree.empty() {
3610		gp := _p_.gFree.pop()
3611		_p_.gFree.n--
3612		if gp.stack.lo == 0 {
3613			sched.gFree.noStack.push(gp)
3614		} else {
3615			sched.gFree.stack.push(gp)
3616		}
3617		sched.gFree.n++
3618	}
3619	unlock(&sched.gFree.lock)
3620}
3621
3622// Breakpoint executes a breakpoint trap.
3623func Breakpoint() {
3624	breakpoint()
3625}
3626
3627// dolockOSThread is called by LockOSThread and lockOSThread below
3628// after they modify m.locked. Do not allow preemption during this call,
3629// or else the m might be different in this function than in the caller.
3630//go:nosplit
3631func dolockOSThread() {
3632	if GOARCH == "wasm" {
3633		return // no threads on wasm yet
3634	}
3635	_g_ := getg()
3636	_g_.m.lockedg.set(_g_)
3637	_g_.lockedm.set(_g_.m)
3638}
3639
3640//go:nosplit
3641
3642// LockOSThread wires the calling goroutine to its current operating system thread.
3643// The calling goroutine will always execute in that thread,
3644// and no other goroutine will execute in it,
3645// until the calling goroutine has made as many calls to
3646// UnlockOSThread as to LockOSThread.
3647// If the calling goroutine exits without unlocking the thread,
3648// the thread will be terminated.
3649//
3650// All init functions are run on the startup thread. Calling LockOSThread
3651// from an init function will cause the main function to be invoked on
3652// that thread.
3653//
3654// A goroutine should call LockOSThread before calling OS services or
3655// non-Go library functions that depend on per-thread state.
3656func LockOSThread() {
3657	if atomic.Load(&newmHandoff.haveTemplateThread) == 0 && GOOS != "plan9" {
3658		// If we need to start a new thread from the locked
3659		// thread, we need the template thread. Start it now
3660		// while we're in a known-good state.
3661		startTemplateThread()
3662	}
3663	_g_ := getg()
3664	_g_.m.lockedExt++
3665	if _g_.m.lockedExt == 0 {
3666		_g_.m.lockedExt--
3667		panic("LockOSThread nesting overflow")
3668	}
3669	dolockOSThread()
3670}
3671
3672//go:nosplit
3673func lockOSThread() {
3674	getg().m.lockedInt++
3675	dolockOSThread()
3676}
3677
3678// dounlockOSThread is called by UnlockOSThread and unlockOSThread below
3679// after they update m->locked. Do not allow preemption during this call,
3680// or else the m might be in different in this function than in the caller.
3681//go:nosplit
3682func dounlockOSThread() {
3683	if GOARCH == "wasm" {
3684		return // no threads on wasm yet
3685	}
3686	_g_ := getg()
3687	if _g_.m.lockedInt != 0 || _g_.m.lockedExt != 0 {
3688		return
3689	}
3690	_g_.m.lockedg = 0
3691	_g_.lockedm = 0
3692}
3693
3694//go:nosplit
3695
3696// UnlockOSThread undoes an earlier call to LockOSThread.
3697// If this drops the number of active LockOSThread calls on the
3698// calling goroutine to zero, it unwires the calling goroutine from
3699// its fixed operating system thread.
3700// If there are no active LockOSThread calls, this is a no-op.
3701//
3702// Before calling UnlockOSThread, the caller must ensure that the OS
3703// thread is suitable for running other goroutines. If the caller made
3704// any permanent changes to the state of the thread that would affect
3705// other goroutines, it should not call this function and thus leave
3706// the goroutine locked to the OS thread until the goroutine (and
3707// hence the thread) exits.
3708func UnlockOSThread() {
3709	_g_ := getg()
3710	if _g_.m.lockedExt == 0 {
3711		return
3712	}
3713	_g_.m.lockedExt--
3714	dounlockOSThread()
3715}
3716
3717//go:nosplit
3718func unlockOSThread() {
3719	_g_ := getg()
3720	if _g_.m.lockedInt == 0 {
3721		systemstack(badunlockosthread)
3722	}
3723	_g_.m.lockedInt--
3724	dounlockOSThread()
3725}
3726
3727func badunlockosthread() {
3728	throw("runtime: internal error: misuse of lockOSThread/unlockOSThread")
3729}
3730
3731func gcount() int32 {
3732	n := int32(allglen) - sched.gFree.n - int32(atomic.Load(&sched.ngsys))
3733	for _, _p_ := range allp {
3734		n -= _p_.gFree.n
3735	}
3736
3737	// All these variables can be changed concurrently, so the result can be inconsistent.
3738	// But at least the current goroutine is running.
3739	if n < 1 {
3740		n = 1
3741	}
3742	return n
3743}
3744
3745func mcount() int32 {
3746	return int32(sched.mnext - sched.nmfreed)
3747}
3748
3749var prof struct {
3750	signalLock uint32
3751	hz         int32
3752}
3753
3754func _System()                    { _System() }
3755func _ExternalCode()              { _ExternalCode() }
3756func _LostExternalCode()          { _LostExternalCode() }
3757func _GC()                        { _GC() }
3758func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() }
3759func _VDSO()                      { _VDSO() }
3760
3761// Called if we receive a SIGPROF signal.
3762// Called by the signal handler, may run during STW.
3763//go:nowritebarrierrec
3764func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
3765	if prof.hz == 0 {
3766		return
3767	}
3768
3769	// On mips{,le}, 64bit atomics are emulated with spinlocks, in
3770	// runtime/internal/atomic. If SIGPROF arrives while the program is inside
3771	// the critical section, it creates a deadlock (when writing the sample).
3772	// As a workaround, create a counter of SIGPROFs while in critical section
3773	// to store the count, and pass it to sigprof.add() later when SIGPROF is
3774	// received from somewhere else (with _LostSIGPROFDuringAtomic64 as pc).
3775	if GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm" {
3776		if f := findfunc(pc); f.valid() {
3777			if hasPrefix(funcname(f), "runtime/internal/atomic") {
3778				cpuprof.lostAtomic++
3779				return
3780			}
3781		}
3782	}
3783
3784	// Profiling runs concurrently with GC, so it must not allocate.
3785	// Set a trap in case the code does allocate.
3786	// Note that on windows, one thread takes profiles of all the
3787	// other threads, so mp is usually not getg().m.
3788	// In fact mp may not even be stopped.
3789	// See golang.org/issue/17165.
3790	getg().m.mallocing++
3791
3792	// Define that a "user g" is a user-created goroutine, and a "system g"
3793	// is one that is m->g0 or m->gsignal.
3794	//
3795	// We might be interrupted for profiling halfway through a
3796	// goroutine switch. The switch involves updating three (or four) values:
3797	// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
3798	// because once it gets updated the new g is running.
3799	//
3800	// When switching from a user g to a system g, LR is not considered live,
3801	// so the update only affects g, SP, and PC. Since PC must be last, there
3802	// the possible partial transitions in ordinary execution are (1) g alone is updated,
3803	// (2) both g and SP are updated, and (3) SP alone is updated.
3804	// If SP or g alone is updated, we can detect the partial transition by checking
3805	// whether the SP is within g's stack bounds. (We could also require that SP
3806	// be changed only after g, but the stack bounds check is needed by other
3807	// cases, so there is no need to impose an additional requirement.)
3808	//
3809	// There is one exceptional transition to a system g, not in ordinary execution.
3810	// When a signal arrives, the operating system starts the signal handler running
3811	// with an updated PC and SP. The g is updated last, at the beginning of the
3812	// handler. There are two reasons this is okay. First, until g is updated the
3813	// g and SP do not match, so the stack bounds check detects the partial transition.
3814	// Second, signal handlers currently run with signals disabled, so a profiling
3815	// signal cannot arrive during the handler.
3816	//
3817	// When switching from a system g to a user g, there are three possibilities.
3818	//
3819	// First, it may be that the g switch has no PC update, because the SP
3820	// either corresponds to a user g throughout (as in asmcgocall)
3821	// or because it has been arranged to look like a user g frame
3822	// (as in cgocallback_gofunc). In this case, since the entire
3823	// transition is a g+SP update, a partial transition updating just one of
3824	// those will be detected by the stack bounds check.
3825	//
3826	// Second, when returning from a signal handler, the PC and SP updates
3827	// are performed by the operating system in an atomic update, so the g
3828	// update must be done before them. The stack bounds check detects
3829	// the partial transition here, and (again) signal handlers run with signals
3830	// disabled, so a profiling signal cannot arrive then anyway.
3831	//
3832	// Third, the common case: it may be that the switch updates g, SP, and PC
3833	// separately. If the PC is within any of the functions that does this,
3834	// we don't ask for a traceback. C.F. the function setsSP for more about this.
3835	//
3836	// There is another apparently viable approach, recorded here in case
3837	// the "PC within setsSP function" check turns out not to be usable.
3838	// It would be possible to delay the update of either g or SP until immediately
3839	// before the PC update instruction. Then, because of the stack bounds check,
3840	// the only problematic interrupt point is just before that PC update instruction,
3841	// and the sigprof handler can detect that instruction and simulate stepping past
3842	// it in order to reach a consistent state. On ARM, the update of g must be made
3843	// in two places (in R10 and also in a TLS slot), so the delayed update would
3844	// need to be the SP update. The sigprof handler must read the instruction at
3845	// the current PC and if it was the known instruction (for example, JMP BX or
3846	// MOV R2, PC), use that other register in place of the PC value.
3847	// The biggest drawback to this solution is that it requires that we can tell
3848	// whether it's safe to read from the memory pointed at by PC.
3849	// In a correct program, we can test PC == nil and otherwise read,
3850	// but if a profiling signal happens at the instant that a program executes
3851	// a bad jump (before the program manages to handle the resulting fault)
3852	// the profiling handler could fault trying to read nonexistent memory.
3853	//
3854	// To recap, there are no constraints on the assembly being used for the
3855	// transition. We simply require that g and SP match and that the PC is not
3856	// in gogo.
3857	traceback := true
3858	if gp == nil || sp < gp.stack.lo || gp.stack.hi < sp || setsSP(pc) || (mp != nil && mp.vdsoSP != 0) {
3859		traceback = false
3860	}
3861	var stk [maxCPUProfStack]uintptr
3862	n := 0
3863	if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 {
3864		cgoOff := 0
3865		// Check cgoCallersUse to make sure that we are not
3866		// interrupting other code that is fiddling with
3867		// cgoCallers.  We are running in a signal handler
3868		// with all signals blocked, so we don't have to worry
3869		// about any other code interrupting us.
3870		if atomic.Load(&mp.cgoCallersUse) == 0 && mp.cgoCallers != nil && mp.cgoCallers[0] != 0 {
3871			for cgoOff < len(mp.cgoCallers) && mp.cgoCallers[cgoOff] != 0 {
3872				cgoOff++
3873			}
3874			copy(stk[:], mp.cgoCallers[:cgoOff])
3875			mp.cgoCallers[0] = 0
3876		}
3877
3878		// Collect Go stack that leads to the cgo call.
3879		n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[cgoOff], len(stk)-cgoOff, nil, nil, 0)
3880		if n > 0 {
3881			n += cgoOff
3882		}
3883	} else if traceback {
3884		n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap|_TraceJumpStack)
3885	}
3886
3887	if n <= 0 {
3888		// Normal traceback is impossible or has failed.
3889		// See if it falls into several common cases.
3890		n = 0
3891		if (GOOS == "windows" || GOOS == "solaris" || GOOS == "illumos" || GOOS == "darwin" || GOOS == "aix") && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 {
3892			// Libcall, i.e. runtime syscall on windows.
3893			// Collect Go stack that leads to the call.
3894			n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), 0, &stk[0], len(stk), nil, nil, 0)
3895		}
3896		if n == 0 && mp != nil && mp.vdsoSP != 0 {
3897			n = gentraceback(mp.vdsoPC, mp.vdsoSP, 0, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap|_TraceJumpStack)
3898		}
3899		if n == 0 {
3900			// If all of the above has failed, account it against abstract "System" or "GC".
3901			n = 2
3902			if inVDSOPage(pc) {
3903				pc = funcPC(_VDSO) + sys.PCQuantum
3904			} else if pc > firstmoduledata.etext {
3905				// "ExternalCode" is better than "etext".
3906				pc = funcPC(_ExternalCode) + sys.PCQuantum
3907			}
3908			stk[0] = pc
3909			if mp.preemptoff != "" {
3910				stk[1] = funcPC(_GC) + sys.PCQuantum
3911			} else {
3912				stk[1] = funcPC(_System) + sys.PCQuantum
3913			}
3914		}
3915	}
3916
3917	if prof.hz != 0 {
3918		cpuprof.add(gp, stk[:n])
3919	}
3920	getg().m.mallocing--
3921}
3922
3923// If the signal handler receives a SIGPROF signal on a non-Go thread,
3924// it tries to collect a traceback into sigprofCallers.
3925// sigprofCallersUse is set to non-zero while sigprofCallers holds a traceback.
3926var sigprofCallers cgoCallers
3927var sigprofCallersUse uint32
3928
3929// sigprofNonGo is called if we receive a SIGPROF signal on a non-Go thread,
3930// and the signal handler collected a stack trace in sigprofCallers.
3931// When this is called, sigprofCallersUse will be non-zero.
3932// g is nil, and what we can do is very limited.
3933//go:nosplit
3934//go:nowritebarrierrec
3935func sigprofNonGo() {
3936	if prof.hz != 0 {
3937		n := 0
3938		for n < len(sigprofCallers) && sigprofCallers[n] != 0 {
3939			n++
3940		}
3941		cpuprof.addNonGo(sigprofCallers[:n])
3942	}
3943
3944	atomic.Store(&sigprofCallersUse, 0)
3945}
3946
3947// sigprofNonGoPC is called when a profiling signal arrived on a
3948// non-Go thread and we have a single PC value, not a stack trace.
3949// g is nil, and what we can do is very limited.
3950//go:nosplit
3951//go:nowritebarrierrec
3952func sigprofNonGoPC(pc uintptr) {
3953	if prof.hz != 0 {
3954		stk := []uintptr{
3955			pc,
3956			funcPC(_ExternalCode) + sys.PCQuantum,
3957		}
3958		cpuprof.addNonGo(stk)
3959	}
3960}
3961
3962// Reports whether a function will set the SP
3963// to an absolute value. Important that
3964// we don't traceback when these are at the bottom
3965// of the stack since we can't be sure that we will
3966// find the caller.
3967//
3968// If the function is not on the bottom of the stack
3969// we assume that it will have set it up so that traceback will be consistent,
3970// either by being a traceback terminating function
3971// or putting one on the stack at the right offset.
3972func setsSP(pc uintptr) bool {
3973	f := findfunc(pc)
3974	if !f.valid() {
3975		// couldn't find the function for this PC,
3976		// so assume the worst and stop traceback
3977		return true
3978	}
3979	switch f.funcID {
3980	case funcID_gogo, funcID_systemstack, funcID_mcall, funcID_morestack:
3981		return true
3982	}
3983	return false
3984}
3985
3986// setcpuprofilerate sets the CPU profiling rate to hz times per second.
3987// If hz <= 0, setcpuprofilerate turns off CPU profiling.
3988func setcpuprofilerate(hz int32) {
3989	// Force sane arguments.
3990	if hz < 0 {
3991		hz = 0
3992	}
3993
3994	// Disable preemption, otherwise we can be rescheduled to another thread
3995	// that has profiling enabled.
3996	_g_ := getg()
3997	_g_.m.locks++
3998
3999	// Stop profiler on this thread so that it is safe to lock prof.
4000	// if a profiling signal came in while we had prof locked,
4001	// it would deadlock.
4002	setThreadCPUProfiler(0)
4003
4004	for !atomic.Cas(&prof.signalLock, 0, 1) {
4005		osyield()
4006	}
4007	if prof.hz != hz {
4008		setProcessCPUProfiler(hz)
4009		prof.hz = hz
4010	}
4011	atomic.Store(&prof.signalLock, 0)
4012
4013	lock(&sched.lock)
4014	sched.profilehz = hz
4015	unlock(&sched.lock)
4016
4017	if hz != 0 {
4018		setThreadCPUProfiler(hz)
4019	}
4020
4021	_g_.m.locks--
4022}
4023
4024// init initializes pp, which may be a freshly allocated p or a
4025// previously destroyed p, and transitions it to status _Pgcstop.
4026func (pp *p) init(id int32) {
4027	pp.id = id
4028	pp.status = _Pgcstop
4029	pp.sudogcache = pp.sudogbuf[:0]
4030	for i := range pp.deferpool {
4031		pp.deferpool[i] = pp.deferpoolbuf[i][:0]
4032	}
4033	pp.wbBuf.reset()
4034	if pp.mcache == nil {
4035		if id == 0 {
4036			if getg().m.mcache == nil {
4037				throw("missing mcache?")
4038			}
4039			pp.mcache = getg().m.mcache // bootstrap
4040		} else {
4041			pp.mcache = allocmcache()
4042		}
4043	}
4044	if raceenabled && pp.raceprocctx == 0 {
4045		if id == 0 {
4046			pp.raceprocctx = raceprocctx0
4047			raceprocctx0 = 0 // bootstrap
4048		} else {
4049			pp.raceprocctx = raceproccreate()
4050		}
4051	}
4052}
4053
4054// destroy releases all of the resources associated with pp and
4055// transitions it to status _Pdead.
4056//
4057// sched.lock must be held and the world must be stopped.
4058func (pp *p) destroy() {
4059	// Move all runnable goroutines to the global queue
4060	for pp.runqhead != pp.runqtail {
4061		// Pop from tail of local queue
4062		pp.runqtail--
4063		gp := pp.runq[pp.runqtail%uint32(len(pp.runq))].ptr()
4064		// Push onto head of global queue
4065		globrunqputhead(gp)
4066	}
4067	if pp.runnext != 0 {
4068		globrunqputhead(pp.runnext.ptr())
4069		pp.runnext = 0
4070	}
4071	if len(pp.timers) > 0 {
4072		plocal := getg().m.p.ptr()
4073		// The world is stopped, but we acquire timersLock to
4074		// protect against sysmon calling timeSleepUntil.
4075		// This is the only case where we hold the timersLock of
4076		// more than one P, so there are no deadlock concerns.
4077		lock(&plocal.timersLock)
4078		lock(&pp.timersLock)
4079		moveTimers(plocal, pp.timers)
4080		pp.timers = nil
4081		pp.numTimers = 0
4082		pp.adjustTimers = 0
4083		pp.deletedTimers = 0
4084		atomic.Store64(&pp.timer0When, 0)
4085		unlock(&pp.timersLock)
4086		unlock(&plocal.timersLock)
4087	}
4088	// If there's a background worker, make it runnable and put
4089	// it on the global queue so it can clean itself up.
4090	if gp := pp.gcBgMarkWorker.ptr(); gp != nil {
4091		casgstatus(gp, _Gwaiting, _Grunnable)
4092		if trace.enabled {
4093			traceGoUnpark(gp, 0)
4094		}
4095		globrunqput(gp)
4096		// This assignment doesn't race because the
4097		// world is stopped.
4098		pp.gcBgMarkWorker.set(nil)
4099	}
4100	// Flush p's write barrier buffer.
4101	if gcphase != _GCoff {
4102		wbBufFlush1(pp)
4103		pp.gcw.dispose()
4104	}
4105	for i := range pp.sudogbuf {
4106		pp.sudogbuf[i] = nil
4107	}
4108	pp.sudogcache = pp.sudogbuf[:0]
4109	for i := range pp.deferpool {
4110		for j := range pp.deferpoolbuf[i] {
4111			pp.deferpoolbuf[i][j] = nil
4112		}
4113		pp.deferpool[i] = pp.deferpoolbuf[i][:0]
4114	}
4115	systemstack(func() {
4116		for i := 0; i < pp.mspancache.len; i++ {
4117			// Safe to call since the world is stopped.
4118			mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i]))
4119		}
4120		pp.mspancache.len = 0
4121		pp.pcache.flush(&mheap_.pages)
4122	})
4123	freemcache(pp.mcache)
4124	pp.mcache = nil
4125	gfpurge(pp)
4126	traceProcFree(pp)
4127	if raceenabled {
4128		if pp.timerRaceCtx != 0 {
4129			// The race detector code uses a callback to fetch
4130			// the proc context, so arrange for that callback
4131			// to see the right thing.
4132			// This hack only works because we are the only
4133			// thread running.
4134			mp := getg().m
4135			phold := mp.p.ptr()
4136			mp.p.set(pp)
4137
4138			racectxend(pp.timerRaceCtx)
4139			pp.timerRaceCtx = 0
4140
4141			mp.p.set(phold)
4142		}
4143		raceprocdestroy(pp.raceprocctx)
4144		pp.raceprocctx = 0
4145	}
4146	pp.gcAssistTime = 0
4147	pp.status = _Pdead
4148}
4149
4150// Change number of processors. The world is stopped, sched is locked.
4151// gcworkbufs are not being modified by either the GC or
4152// the write barrier code.
4153// Returns list of Ps with local work, they need to be scheduled by the caller.
4154func procresize(nprocs int32) *p {
4155	old := gomaxprocs
4156	if old < 0 || nprocs <= 0 {
4157		throw("procresize: invalid arg")
4158	}
4159	if trace.enabled {
4160		traceGomaxprocs(nprocs)
4161	}
4162
4163	// update statistics
4164	now := nanotime()
4165	if sched.procresizetime != 0 {
4166		sched.totaltime += int64(old) * (now - sched.procresizetime)
4167	}
4168	sched.procresizetime = now
4169
4170	// Grow allp if necessary.
4171	if nprocs > int32(len(allp)) {
4172		// Synchronize with retake, which could be running
4173		// concurrently since it doesn't run on a P.
4174		lock(&allpLock)
4175		if nprocs <= int32(cap(allp)) {
4176			allp = allp[:nprocs]
4177		} else {
4178			nallp := make([]*p, nprocs)
4179			// Copy everything up to allp's cap so we
4180			// never lose old allocated Ps.
4181			copy(nallp, allp[:cap(allp)])
4182			allp = nallp
4183		}
4184		unlock(&allpLock)
4185	}
4186
4187	// initialize new P's
4188	for i := old; i < nprocs; i++ {
4189		pp := allp[i]
4190		if pp == nil {
4191			pp = new(p)
4192		}
4193		pp.init(i)
4194		atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
4195	}
4196
4197	_g_ := getg()
4198	if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
4199		// continue to use the current P
4200		_g_.m.p.ptr().status = _Prunning
4201		_g_.m.p.ptr().mcache.prepareForSweep()
4202	} else {
4203		// release the current P and acquire allp[0].
4204		//
4205		// We must do this before destroying our current P
4206		// because p.destroy itself has write barriers, so we
4207		// need to do that from a valid P.
4208		if _g_.m.p != 0 {
4209			if trace.enabled {
4210				// Pretend that we were descheduled
4211				// and then scheduled again to keep
4212				// the trace sane.
4213				traceGoSched()
4214				traceProcStop(_g_.m.p.ptr())
4215			}
4216			_g_.m.p.ptr().m = 0
4217		}
4218		_g_.m.p = 0
4219		_g_.m.mcache = nil
4220		p := allp[0]
4221		p.m = 0
4222		p.status = _Pidle
4223		acquirep(p)
4224		if trace.enabled {
4225			traceGoStart()
4226		}
4227	}
4228
4229	// release resources from unused P's
4230	for i := nprocs; i < old; i++ {
4231		p := allp[i]
4232		p.destroy()
4233		// can't free P itself because it can be referenced by an M in syscall
4234	}
4235
4236	// Trim allp.
4237	if int32(len(allp)) != nprocs {
4238		lock(&allpLock)
4239		allp = allp[:nprocs]
4240		unlock(&allpLock)
4241	}
4242
4243	var runnablePs *p
4244	for i := nprocs - 1; i >= 0; i-- {
4245		p := allp[i]
4246		if _g_.m.p.ptr() == p {
4247			continue
4248		}
4249		p.status = _Pidle
4250		if runqempty(p) {
4251			pidleput(p)
4252		} else {
4253			p.m.set(mget())
4254			p.link.set(runnablePs)
4255			runnablePs = p
4256		}
4257	}
4258	stealOrder.reset(uint32(nprocs))
4259	var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
4260	atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs))
4261	return runnablePs
4262}
4263
4264// Associate p and the current m.
4265//
4266// This function is allowed to have write barriers even if the caller
4267// isn't because it immediately acquires _p_.
4268//
4269//go:yeswritebarrierrec
4270func acquirep(_p_ *p) {
4271	// Do the part that isn't allowed to have write barriers.
4272	wirep(_p_)
4273
4274	// Have p; write barriers now allowed.
4275
4276	// Perform deferred mcache flush before this P can allocate
4277	// from a potentially stale mcache.
4278	_p_.mcache.prepareForSweep()
4279
4280	if trace.enabled {
4281		traceProcStart()
4282	}
4283}
4284
4285// wirep is the first step of acquirep, which actually associates the
4286// current M to _p_. This is broken out so we can disallow write
4287// barriers for this part, since we don't yet have a P.
4288//
4289//go:nowritebarrierrec
4290//go:nosplit
4291func wirep(_p_ *p) {
4292	_g_ := getg()
4293
4294	if _g_.m.p != 0 || _g_.m.mcache != nil {
4295		throw("wirep: already in go")
4296	}
4297	if _p_.m != 0 || _p_.status != _Pidle {
4298		id := int64(0)
4299		if _p_.m != 0 {
4300			id = _p_.m.ptr().id
4301		}
4302		print("wirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
4303		throw("wirep: invalid p state")
4304	}
4305	_g_.m.mcache = _p_.mcache
4306	_g_.m.p.set(_p_)
4307	_p_.m.set(_g_.m)
4308	_p_.status = _Prunning
4309}
4310
4311// Disassociate p and the current m.
4312func releasep() *p {
4313	_g_ := getg()
4314
4315	if _g_.m.p == 0 || _g_.m.mcache == nil {
4316		throw("releasep: invalid arg")
4317	}
4318	_p_ := _g_.m.p.ptr()
4319	if _p_.m.ptr() != _g_.m || _p_.mcache != _g_.m.mcache || _p_.status != _Prunning {
4320		print("releasep: m=", _g_.m, " m->p=", _g_.m.p.ptr(), " p->m=", hex(_p_.m), " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n")
4321		throw("releasep: invalid p state")
4322	}
4323	if trace.enabled {
4324		traceProcStop(_g_.m.p.ptr())
4325	}
4326	_g_.m.p = 0
4327	_g_.m.mcache = nil
4328	_p_.m = 0
4329	_p_.status = _Pidle
4330	return _p_
4331}
4332
4333func incidlelocked(v int32) {
4334	lock(&sched.lock)
4335	sched.nmidlelocked += v
4336	if v > 0 {
4337		checkdead()
4338	}
4339	unlock(&sched.lock)
4340}
4341
4342// Check for deadlock situation.
4343// The check is based on number of running M's, if 0 -> deadlock.
4344// sched.lock must be held.
4345func checkdead() {
4346	// For -buildmode=c-shared or -buildmode=c-archive it's OK if
4347	// there are no running goroutines. The calling program is
4348	// assumed to be running.
4349	if islibrary || isarchive {
4350		return
4351	}
4352
4353	// If we are dying because of a signal caught on an already idle thread,
4354	// freezetheworld will cause all running threads to block.
4355	// And runtime will essentially enter into deadlock state,
4356	// except that there is a thread that will call exit soon.
4357	if panicking > 0 {
4358		return
4359	}
4360
4361	// If we are not running under cgo, but we have an extra M then account
4362	// for it. (It is possible to have an extra M on Windows without cgo to
4363	// accommodate callbacks created by syscall.NewCallback. See issue #6751
4364	// for details.)
4365	var run0 int32
4366	if !iscgo && cgoHasExtraM {
4367		mp := lockextra(true)
4368		haveExtraM := extraMCount > 0
4369		unlockextra(mp)
4370		if haveExtraM {
4371			run0 = 1
4372		}
4373	}
4374
4375	run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys
4376	if run > run0 {
4377		return
4378	}
4379	if run < 0 {
4380		print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n")
4381		throw("checkdead: inconsistent counts")
4382	}
4383
4384	grunning := 0
4385	lock(&allglock)
4386	for i := 0; i < len(allgs); i++ {
4387		gp := allgs[i]
4388		if isSystemGoroutine(gp, false) {
4389			continue
4390		}
4391		s := readgstatus(gp)
4392		switch s &^ _Gscan {
4393		case _Gwaiting,
4394			_Gpreempted:
4395			grunning++
4396		case _Grunnable,
4397			_Grunning,
4398			_Gsyscall:
4399			unlock(&allglock)
4400			print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n")
4401			throw("checkdead: runnable g")
4402		}
4403	}
4404	unlock(&allglock)
4405	if grunning == 0 { // possible if main goroutine calls runtime·Goexit()
4406		unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang
4407		throw("no goroutines (main called runtime.Goexit) - deadlock!")
4408	}
4409
4410	// Maybe jump time forward for playground.
4411	if faketime != 0 {
4412		when, _p_ := timeSleepUntil()
4413		if _p_ != nil {
4414			faketime = when
4415			for pp := &sched.pidle; *pp != 0; pp = &(*pp).ptr().link {
4416				if (*pp).ptr() == _p_ {
4417					*pp = _p_.link
4418					break
4419				}
4420			}
4421			mp := mget()
4422			if mp == nil {
4423				// There should always be a free M since
4424				// nothing is running.
4425				throw("checkdead: no m for timer")
4426			}
4427			mp.nextp.set(_p_)
4428			notewakeup(&mp.park)
4429			return
4430		}
4431	}
4432
4433	// There are no goroutines running, so we can look at the P's.
4434	for _, _p_ := range allp {
4435		if len(_p_.timers) > 0 {
4436			return
4437		}
4438	}
4439
4440	getg().m.throwing = -1 // do not dump full stacks
4441	unlock(&sched.lock)    // unlock so that GODEBUG=scheddetail=1 doesn't hang
4442	throw("all goroutines are asleep - deadlock!")
4443}
4444
4445// forcegcperiod is the maximum time in nanoseconds between garbage
4446// collections. If we go this long without a garbage collection, one
4447// is forced to run.
4448//
4449// This is a variable for testing purposes. It normally doesn't change.
4450var forcegcperiod int64 = 2 * 60 * 1e9
4451
4452// Always runs without a P, so write barriers are not allowed.
4453//
4454//go:nowritebarrierrec
4455func sysmon() {
4456	lock(&sched.lock)
4457	sched.nmsys++
4458	checkdead()
4459	unlock(&sched.lock)
4460
4461	lasttrace := int64(0)
4462	idle := 0 // how many cycles in succession we had not wokeup somebody
4463	delay := uint32(0)
4464	for {
4465		if idle == 0 { // start with 20us sleep...
4466			delay = 20
4467		} else if idle > 50 { // start doubling the sleep after 1ms...
4468			delay *= 2
4469		}
4470		if delay > 10*1000 { // up to 10ms
4471			delay = 10 * 1000
4472		}
4473		usleep(delay)
4474		now := nanotime()
4475		next, _ := timeSleepUntil()
4476		if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) {
4477			lock(&sched.lock)
4478			if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) {
4479				if next > now {
4480					atomic.Store(&sched.sysmonwait, 1)
4481					unlock(&sched.lock)
4482					// Make wake-up period small enough
4483					// for the sampling to be correct.
4484					sleep := forcegcperiod / 2
4485					if next-now < sleep {
4486						sleep = next - now
4487					}
4488					shouldRelax := sleep >= osRelaxMinNS
4489					if shouldRelax {
4490						osRelax(true)
4491					}
4492					notetsleep(&sched.sysmonnote, sleep)
4493					if shouldRelax {
4494						osRelax(false)
4495					}
4496					now = nanotime()
4497					next, _ = timeSleepUntil()
4498					lock(&sched.lock)
4499					atomic.Store(&sched.sysmonwait, 0)
4500					noteclear(&sched.sysmonnote)
4501				}
4502				idle = 0
4503				delay = 20
4504			}
4505			unlock(&sched.lock)
4506		}
4507		// trigger libc interceptors if needed
4508		if *cgo_yield != nil {
4509			asmcgocall(*cgo_yield, nil)
4510		}
4511		// poll network if not polled for more than 10ms
4512		lastpoll := int64(atomic.Load64(&sched.lastpoll))
4513		if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
4514			atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
4515			list := netpoll(0) // non-blocking - returns list of goroutines
4516			if !list.empty() {
4517				// Need to decrement number of idle locked M's
4518				// (pretending that one more is running) before injectglist.
4519				// Otherwise it can lead to the following situation:
4520				// injectglist grabs all P's but before it starts M's to run the P's,
4521				// another M returns from syscall, finishes running its G,
4522				// observes that there is no work to do and no other running M's
4523				// and reports deadlock.
4524				incidlelocked(-1)
4525				injectglist(&list)
4526				incidlelocked(1)
4527			}
4528		}
4529		if next < now {
4530			// There are timers that should have already run,
4531			// perhaps because there is an unpreemptible P.
4532			// Try to start an M to run them.
4533			startm(nil, false)
4534		}
4535		// retake P's blocked in syscalls
4536		// and preempt long running G's
4537		if retake(now) != 0 {
4538			idle = 0
4539		} else {
4540			idle++
4541		}
4542		// check if we need to force a GC
4543		if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && atomic.Load(&forcegc.idle) != 0 {
4544			lock(&forcegc.lock)
4545			forcegc.idle = 0
4546			var list gList
4547			list.push(forcegc.g)
4548			injectglist(&list)
4549			unlock(&forcegc.lock)
4550		}
4551		if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace)*1000000 <= now {
4552			lasttrace = now
4553			schedtrace(debug.scheddetail > 0)
4554		}
4555	}
4556}
4557
4558type sysmontick struct {
4559	schedtick   uint32
4560	schedwhen   int64
4561	syscalltick uint32
4562	syscallwhen int64
4563}
4564
4565// forcePreemptNS is the time slice given to a G before it is
4566// preempted.
4567const forcePreemptNS = 10 * 1000 * 1000 // 10ms
4568
4569func retake(now int64) uint32 {
4570	n := 0
4571	// Prevent allp slice changes. This lock will be completely
4572	// uncontended unless we're already stopping the world.
4573	lock(&allpLock)
4574	// We can't use a range loop over allp because we may
4575	// temporarily drop the allpLock. Hence, we need to re-fetch
4576	// allp each time around the loop.
4577	for i := 0; i < len(allp); i++ {
4578		_p_ := allp[i]
4579		if _p_ == nil {
4580			// This can happen if procresize has grown
4581			// allp but not yet created new Ps.
4582			continue
4583		}
4584		pd := &_p_.sysmontick
4585		s := _p_.status
4586		sysretake := false
4587		if s == _Prunning || s == _Psyscall {
4588			// Preempt G if it's running for too long.
4589			t := int64(_p_.schedtick)
4590			if int64(pd.schedtick) != t {
4591				pd.schedtick = uint32(t)
4592				pd.schedwhen = now
4593			} else if pd.schedwhen+forcePreemptNS <= now {
4594				preemptone(_p_)
4595				// In case of syscall, preemptone() doesn't
4596				// work, because there is no M wired to P.
4597				sysretake = true
4598			}
4599		}
4600		if s == _Psyscall {
4601			// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
4602			t := int64(_p_.syscalltick)
4603			if !sysretake && int64(pd.syscalltick) != t {
4604				pd.syscalltick = uint32(t)
4605				pd.syscallwhen = now
4606				continue
4607			}
4608			// On the one hand we don't want to retake Ps if there is no other work to do,
4609			// but on the other hand we want to retake them eventually
4610			// because they can prevent the sysmon thread from deep sleep.
4611			if runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {
4612				continue
4613			}
4614			// Drop allpLock so we can take sched.lock.
4615			unlock(&allpLock)
4616			// Need to decrement number of idle locked M's
4617			// (pretending that one more is running) before the CAS.
4618			// Otherwise the M from which we retake can exit the syscall,
4619			// increment nmidle and report deadlock.
4620			incidlelocked(-1)
4621			if atomic.Cas(&_p_.status, s, _Pidle) {
4622				if trace.enabled {
4623					traceGoSysBlock(_p_)
4624					traceProcStop(_p_)
4625				}
4626				n++
4627				_p_.syscalltick++
4628				handoffp(_p_)
4629			}
4630			incidlelocked(1)
4631			lock(&allpLock)
4632		}
4633	}
4634	unlock(&allpLock)
4635	return uint32(n)
4636}
4637
4638// Tell all goroutines that they have been preempted and they should stop.
4639// This function is purely best-effort. It can fail to inform a goroutine if a
4640// processor just started running it.
4641// No locks need to be held.
4642// Returns true if preemption request was issued to at least one goroutine.
4643func preemptall() bool {
4644	res := false
4645	for _, _p_ := range allp {
4646		if _p_.status != _Prunning {
4647			continue
4648		}
4649		if preemptone(_p_) {
4650			res = true
4651		}
4652	}
4653	return res
4654}
4655
4656// Tell the goroutine running on processor P to stop.
4657// This function is purely best-effort. It can incorrectly fail to inform the
4658// goroutine. It can send inform the wrong goroutine. Even if it informs the
4659// correct goroutine, that goroutine might ignore the request if it is
4660// simultaneously executing newstack.
4661// No lock needs to be held.
4662// Returns true if preemption request was issued.
4663// The actual preemption will happen at some point in the future
4664// and will be indicated by the gp->status no longer being
4665// Grunning
4666func preemptone(_p_ *p) bool {
4667	mp := _p_.m.ptr()
4668	if mp == nil || mp == getg().m {
4669		return false
4670	}
4671	gp := mp.curg
4672	if gp == nil || gp == mp.g0 {
4673		return false
4674	}
4675
4676	gp.preempt = true
4677
4678	// Every call in a go routine checks for stack overflow by
4679	// comparing the current stack pointer to gp->stackguard0.
4680	// Setting gp->stackguard0 to StackPreempt folds
4681	// preemption into the normal stack overflow check.
4682	gp.stackguard0 = stackPreempt
4683
4684	// Request an async preemption of this P.
4685	if preemptMSupported && debug.asyncpreemptoff == 0 {
4686		_p_.preempt = true
4687		preemptM(mp)
4688	}
4689
4690	return true
4691}
4692
4693var starttime int64
4694
4695func schedtrace(detailed bool) {
4696	now := nanotime()
4697	if starttime == 0 {
4698		starttime = now
4699	}
4700
4701	lock(&sched.lock)
4702	print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", mcount(), " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
4703	if detailed {
4704		print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n")
4705	}
4706	// We must be careful while reading data from P's, M's and G's.
4707	// Even if we hold schedlock, most data can be changed concurrently.
4708	// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
4709	for i, _p_ := range allp {
4710		mp := _p_.m.ptr()
4711		h := atomic.Load(&_p_.runqhead)
4712		t := atomic.Load(&_p_.runqtail)
4713		if detailed {
4714			id := int64(-1)
4715			if mp != nil {
4716				id = mp.id
4717			}
4718			print("  P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, " timerslen=", len(_p_.timers), "\n")
4719		} else {
4720			// In non-detailed mode format lengths of per-P run queues as:
4721			// [len1 len2 len3 len4]
4722			print(" ")
4723			if i == 0 {
4724				print("[")
4725			}
4726			print(t - h)
4727			if i == len(allp)-1 {
4728				print("]\n")
4729			}
4730		}
4731	}
4732
4733	if !detailed {
4734		unlock(&sched.lock)
4735		return
4736	}
4737
4738	for mp := allm; mp != nil; mp = mp.alllink {
4739		_p_ := mp.p.ptr()
4740		gp := mp.curg
4741		lockedg := mp.lockedg.ptr()
4742		id1 := int32(-1)
4743		if _p_ != nil {
4744			id1 = _p_.id
4745		}
4746		id2 := int64(-1)
4747		if gp != nil {
4748			id2 = gp.goid
4749		}
4750		id3 := int64(-1)
4751		if lockedg != nil {
4752			id3 = lockedg.goid
4753		}
4754		print("  M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=", id3, "\n")
4755	}
4756
4757	lock(&allglock)
4758	for gi := 0; gi < len(allgs); gi++ {
4759		gp := allgs[gi]
4760		mp := gp.m
4761		lockedm := gp.lockedm.ptr()
4762		id1 := int64(-1)
4763		if mp != nil {
4764			id1 = mp.id
4765		}
4766		id2 := int64(-1)
4767		if lockedm != nil {
4768			id2 = lockedm.id
4769		}
4770		print("  G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=", id1, " lockedm=", id2, "\n")
4771	}
4772	unlock(&allglock)
4773	unlock(&sched.lock)
4774}
4775
4776// schedEnableUser enables or disables the scheduling of user
4777// goroutines.
4778//
4779// This does not stop already running user goroutines, so the caller
4780// should first stop the world when disabling user goroutines.
4781func schedEnableUser(enable bool) {
4782	lock(&sched.lock)
4783	if sched.disable.user == !enable {
4784		unlock(&sched.lock)
4785		return
4786	}
4787	sched.disable.user = !enable
4788	if enable {
4789		n := sched.disable.n
4790		sched.disable.n = 0
4791		globrunqputbatch(&sched.disable.runnable, n)
4792		unlock(&sched.lock)
4793		for ; n != 0 && sched.npidle != 0; n-- {
4794			startm(nil, false)
4795		}
4796	} else {
4797		unlock(&sched.lock)
4798	}
4799}
4800
4801// schedEnabled reports whether gp should be scheduled. It returns
4802// false is scheduling of gp is disabled.
4803func schedEnabled(gp *g) bool {
4804	if sched.disable.user {
4805		return isSystemGoroutine(gp, true)
4806	}
4807	return true
4808}
4809
4810// Put mp on midle list.
4811// Sched must be locked.
4812// May run during STW, so write barriers are not allowed.
4813//go:nowritebarrierrec
4814func mput(mp *m) {
4815	mp.schedlink = sched.midle
4816	sched.midle.set(mp)
4817	sched.nmidle++
4818	checkdead()
4819}
4820
4821// Try to get an m from midle list.
4822// Sched must be locked.
4823// May run during STW, so write barriers are not allowed.
4824//go:nowritebarrierrec
4825func mget() *m {
4826	mp := sched.midle.ptr()
4827	if mp != nil {
4828		sched.midle = mp.schedlink
4829		sched.nmidle--
4830	}
4831	return mp
4832}
4833
4834// Put gp on the global runnable queue.
4835// Sched must be locked.
4836// May run during STW, so write barriers are not allowed.
4837//go:nowritebarrierrec
4838func globrunqput(gp *g) {
4839	sched.runq.pushBack(gp)
4840	sched.runqsize++
4841}
4842
4843// Put gp at the head of the global runnable queue.
4844// Sched must be locked.
4845// May run during STW, so write barriers are not allowed.
4846//go:nowritebarrierrec
4847func globrunqputhead(gp *g) {
4848	sched.runq.push(gp)
4849	sched.runqsize++
4850}
4851
4852// Put a batch of runnable goroutines on the global runnable queue.
4853// This clears *batch.
4854// Sched must be locked.
4855func globrunqputbatch(batch *gQueue, n int32) {
4856	sched.runq.pushBackAll(*batch)
4857	sched.runqsize += n
4858	*batch = gQueue{}
4859}
4860
4861// Try get a batch of G's from the global runnable queue.
4862// Sched must be locked.
4863func globrunqget(_p_ *p, max int32) *g {
4864	if sched.runqsize == 0 {
4865		return nil
4866	}
4867
4868	n := sched.runqsize/gomaxprocs + 1
4869	if n > sched.runqsize {
4870		n = sched.runqsize
4871	}
4872	if max > 0 && n > max {
4873		n = max
4874	}
4875	if n > int32(len(_p_.runq))/2 {
4876		n = int32(len(_p_.runq)) / 2
4877	}
4878
4879	sched.runqsize -= n
4880
4881	gp := sched.runq.pop()
4882	n--
4883	for ; n > 0; n-- {
4884		gp1 := sched.runq.pop()
4885		runqput(_p_, gp1, false)
4886	}
4887	return gp
4888}
4889
4890// Put p to on _Pidle list.
4891// Sched must be locked.
4892// May run during STW, so write barriers are not allowed.
4893//go:nowritebarrierrec
4894func pidleput(_p_ *p) {
4895	if !runqempty(_p_) {
4896		throw("pidleput: P has non-empty run queue")
4897	}
4898	_p_.link = sched.pidle
4899	sched.pidle.set(_p_)
4900	atomic.Xadd(&sched.npidle, 1) // TODO: fast atomic
4901}
4902
4903// Try get a p from _Pidle list.
4904// Sched must be locked.
4905// May run during STW, so write barriers are not allowed.
4906//go:nowritebarrierrec
4907func pidleget() *p {
4908	_p_ := sched.pidle.ptr()
4909	if _p_ != nil {
4910		sched.pidle = _p_.link
4911		atomic.Xadd(&sched.npidle, -1) // TODO: fast atomic
4912	}
4913	return _p_
4914}
4915
4916// runqempty reports whether _p_ has no Gs on its local run queue.
4917// It never returns true spuriously.
4918func runqempty(_p_ *p) bool {
4919	// Defend against a race where 1) _p_ has G1 in runqnext but runqhead == runqtail,
4920	// 2) runqput on _p_ kicks G1 to the runq, 3) runqget on _p_ empties runqnext.
4921	// Simply observing that runqhead == runqtail and then observing that runqnext == nil
4922	// does not mean the queue is empty.
4923	for {
4924		head := atomic.Load(&_p_.runqhead)
4925		tail := atomic.Load(&_p_.runqtail)
4926		runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&_p_.runnext)))
4927		if tail == atomic.Load(&_p_.runqtail) {
4928			return head == tail && runnext == 0
4929		}
4930	}
4931}
4932
4933// To shake out latent assumptions about scheduling order,
4934// we introduce some randomness into scheduling decisions
4935// when running with the race detector.
4936// The need for this was made obvious by changing the
4937// (deterministic) scheduling order in Go 1.5 and breaking
4938// many poorly-written tests.
4939// With the randomness here, as long as the tests pass
4940// consistently with -race, they shouldn't have latent scheduling
4941// assumptions.
4942const randomizeScheduler = raceenabled
4943
4944// runqput tries to put g on the local runnable queue.
4945// If next is false, runqput adds g to the tail of the runnable queue.
4946// If next is true, runqput puts g in the _p_.runnext slot.
4947// If the run queue is full, runnext puts g on the global queue.
4948// Executed only by the owner P.
4949func runqput(_p_ *p, gp *g, next bool) {
4950	if randomizeScheduler && next && fastrand()%2 == 0 {
4951		next = false
4952	}
4953
4954	if next {
4955	retryNext:
4956		oldnext := _p_.runnext
4957		if !_p_.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) {
4958			goto retryNext
4959		}
4960		if oldnext == 0 {
4961			return
4962		}
4963		// Kick the old runnext out to the regular run queue.
4964		gp = oldnext.ptr()
4965	}
4966
4967retry:
4968	h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers
4969	t := _p_.runqtail
4970	if t-h < uint32(len(_p_.runq)) {
4971		_p_.runq[t%uint32(len(_p_.runq))].set(gp)
4972		atomic.StoreRel(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
4973		return
4974	}
4975	if runqputslow(_p_, gp, h, t) {
4976		return
4977	}
4978	// the queue is not full, now the put above must succeed
4979	goto retry
4980}
4981
4982// Put g and a batch of work from local runnable queue on global queue.
4983// Executed only by the owner P.
4984func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
4985	var batch [len(_p_.runq)/2 + 1]*g
4986
4987	// First, grab a batch from local queue.
4988	n := t - h
4989	n = n / 2
4990	if n != uint32(len(_p_.runq)/2) {
4991		throw("runqputslow: queue is not full")
4992	}
4993	for i := uint32(0); i < n; i++ {
4994		batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))].ptr()
4995	}
4996	if !atomic.CasRel(&_p_.runqhead, h, h+n) { // cas-release, commits consume
4997		return false
4998	}
4999	batch[n] = gp
5000
5001	if randomizeScheduler {
5002		for i := uint32(1); i <= n; i++ {
5003			j := fastrandn(i + 1)
5004			batch[i], batch[j] = batch[j], batch[i]
5005		}
5006	}
5007
5008	// Link the goroutines.
5009	for i := uint32(0); i < n; i++ {
5010		batch[i].schedlink.set(batch[i+1])
5011	}
5012	var q gQueue
5013	q.head.set(batch[0])
5014	q.tail.set(batch[n])
5015
5016	// Now put the batch on global queue.
5017	lock(&sched.lock)
5018	globrunqputbatch(&q, int32(n+1))
5019	unlock(&sched.lock)
5020	return true
5021}
5022
5023// Get g from local runnable queue.
5024// If inheritTime is true, gp should inherit the remaining time in the
5025// current time slice. Otherwise, it should start a new time slice.
5026// Executed only by the owner P.
5027func runqget(_p_ *p) (gp *g, inheritTime bool) {
5028	// If there's a runnext, it's the next G to run.
5029	for {
5030		next := _p_.runnext
5031		if next == 0 {
5032			break
5033		}
5034		if _p_.runnext.cas(next, 0) {
5035			return next.ptr(), true
5036		}
5037	}
5038
5039	for {
5040		h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers
5041		t := _p_.runqtail
5042		if t == h {
5043			return nil, false
5044		}
5045		gp := _p_.runq[h%uint32(len(_p_.runq))].ptr()
5046		if atomic.CasRel(&_p_.runqhead, h, h+1) { // cas-release, commits consume
5047			return gp, false
5048		}
5049	}
5050}
5051
5052// Grabs a batch of goroutines from _p_'s runnable queue into batch.
5053// Batch is a ring buffer starting at batchHead.
5054// Returns number of grabbed goroutines.
5055// Can be executed by any P.
5056func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 {
5057	for {
5058		h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers
5059		t := atomic.LoadAcq(&_p_.runqtail) // load-acquire, synchronize with the producer
5060		n := t - h
5061		n = n - n/2
5062		if n == 0 {
5063			if stealRunNextG {
5064				// Try to steal from _p_.runnext.
5065				if next := _p_.runnext; next != 0 {
5066					if _p_.status == _Prunning {
5067						// Sleep to ensure that _p_ isn't about to run the g
5068						// we are about to steal.
5069						// The important use case here is when the g running
5070						// on _p_ ready()s another g and then almost
5071						// immediately blocks. Instead of stealing runnext
5072						// in this window, back off to give _p_ a chance to
5073						// schedule runnext. This will avoid thrashing gs
5074						// between different Ps.
5075						// A sync chan send/recv takes ~50ns as of time of
5076						// writing, so 3us gives ~50x overshoot.
5077						if GOOS != "windows" {
5078							usleep(3)
5079						} else {
5080							// On windows system timer granularity is
5081							// 1-15ms, which is way too much for this
5082							// optimization. So just yield.
5083							osyield()
5084						}
5085					}
5086					if !_p_.runnext.cas(next, 0) {
5087						continue
5088					}
5089					batch[batchHead%uint32(len(batch))] = next
5090					return 1
5091				}
5092			}
5093			return 0
5094		}
5095		if n > uint32(len(_p_.runq)/2) { // read inconsistent h and t
5096			continue
5097		}
5098		for i := uint32(0); i < n; i++ {
5099			g := _p_.runq[(h+i)%uint32(len(_p_.runq))]
5100			batch[(batchHead+i)%uint32(len(batch))] = g
5101		}
5102		if atomic.CasRel(&_p_.runqhead, h, h+n) { // cas-release, commits consume
5103			return n
5104		}
5105	}
5106}
5107
5108// Steal half of elements from local runnable queue of p2
5109// and put onto local runnable queue of p.
5110// Returns one of the stolen elements (or nil if failed).
5111func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
5112	t := _p_.runqtail
5113	n := runqgrab(p2, &_p_.runq, t, stealRunNextG)
5114	if n == 0 {
5115		return nil
5116	}
5117	n--
5118	gp := _p_.runq[(t+n)%uint32(len(_p_.runq))].ptr()
5119	if n == 0 {
5120		return gp
5121	}
5122	h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers
5123	if t-h+n >= uint32(len(_p_.runq)) {
5124		throw("runqsteal: runq overflow")
5125	}
5126	atomic.StoreRel(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
5127	return gp
5128}
5129
5130// A gQueue is a dequeue of Gs linked through g.schedlink. A G can only
5131// be on one gQueue or gList at a time.
5132type gQueue struct {
5133	head guintptr
5134	tail guintptr
5135}
5136
5137// empty reports whether q is empty.
5138func (q *gQueue) empty() bool {
5139	return q.head == 0
5140}
5141
5142// push adds gp to the head of q.
5143func (q *gQueue) push(gp *g) {
5144	gp.schedlink = q.head
5145	q.head.set(gp)
5146	if q.tail == 0 {
5147		q.tail.set(gp)
5148	}
5149}
5150
5151// pushBack adds gp to the tail of q.
5152func (q *gQueue) pushBack(gp *g) {
5153	gp.schedlink = 0
5154	if q.tail != 0 {
5155		q.tail.ptr().schedlink.set(gp)
5156	} else {
5157		q.head.set(gp)
5158	}
5159	q.tail.set(gp)
5160}
5161
5162// pushBackAll adds all Gs in l2 to the tail of q. After this q2 must
5163// not be used.
5164func (q *gQueue) pushBackAll(q2 gQueue) {
5165	if q2.tail == 0 {
5166		return
5167	}
5168	q2.tail.ptr().schedlink = 0
5169	if q.tail != 0 {
5170		q.tail.ptr().schedlink = q2.head
5171	} else {
5172		q.head = q2.head
5173	}
5174	q.tail = q2.tail
5175}
5176
5177// pop removes and returns the head of queue q. It returns nil if
5178// q is empty.
5179func (q *gQueue) pop() *g {
5180	gp := q.head.ptr()
5181	if gp != nil {
5182		q.head = gp.schedlink
5183		if q.head == 0 {
5184			q.tail = 0
5185		}
5186	}
5187	return gp
5188}
5189
5190// popList takes all Gs in q and returns them as a gList.
5191func (q *gQueue) popList() gList {
5192	stack := gList{q.head}
5193	*q = gQueue{}
5194	return stack
5195}
5196
5197// A gList is a list of Gs linked through g.schedlink. A G can only be
5198// on one gQueue or gList at a time.
5199type gList struct {
5200	head guintptr
5201}
5202
5203// empty reports whether l is empty.
5204func (l *gList) empty() bool {
5205	return l.head == 0
5206}
5207
5208// push adds gp to the head of l.
5209func (l *gList) push(gp *g) {
5210	gp.schedlink = l.head
5211	l.head.set(gp)
5212}
5213
5214// pushAll prepends all Gs in q to l.
5215func (l *gList) pushAll(q gQueue) {
5216	if !q.empty() {
5217		q.tail.ptr().schedlink = l.head
5218		l.head = q.head
5219	}
5220}
5221
5222// pop removes and returns the head of l. If l is empty, it returns nil.
5223func (l *gList) pop() *g {
5224	gp := l.head.ptr()
5225	if gp != nil {
5226		l.head = gp.schedlink
5227	}
5228	return gp
5229}
5230
5231//go:linkname setMaxThreads runtime/debug.setMaxThreads
5232func setMaxThreads(in int) (out int) {
5233	lock(&sched.lock)
5234	out = int(sched.maxmcount)
5235	if in > 0x7fffffff { // MaxInt32
5236		sched.maxmcount = 0x7fffffff
5237	} else {
5238		sched.maxmcount = int32(in)
5239	}
5240	checkmcount()
5241	unlock(&sched.lock)
5242	return
5243}
5244
5245func haveexperiment(name string) bool {
5246	if name == "framepointer" {
5247		return framepointer_enabled // set by linker
5248	}
5249	x := sys.Goexperiment
5250	for x != "" {
5251		xname := ""
5252		i := index(x, ",")
5253		if i < 0 {
5254			xname, x = x, ""
5255		} else {
5256			xname, x = x[:i], x[i+1:]
5257		}
5258		if xname == name {
5259			return true
5260		}
5261		if len(xname) > 2 && xname[:2] == "no" && xname[2:] == name {
5262			return false
5263		}
5264	}
5265	return false
5266}
5267
5268//go:nosplit
5269func procPin() int {
5270	_g_ := getg()
5271	mp := _g_.m
5272
5273	mp.locks++
5274	return int(mp.p.ptr().id)
5275}
5276
5277//go:nosplit
5278func procUnpin() {
5279	_g_ := getg()
5280	_g_.m.locks--
5281}
5282
5283//go:linkname sync_runtime_procPin sync.runtime_procPin
5284//go:nosplit
5285func sync_runtime_procPin() int {
5286	return procPin()
5287}
5288
5289//go:linkname sync_runtime_procUnpin sync.runtime_procUnpin
5290//go:nosplit
5291func sync_runtime_procUnpin() {
5292	procUnpin()
5293}
5294
5295//go:linkname sync_atomic_runtime_procPin sync/atomic.runtime_procPin
5296//go:nosplit
5297func sync_atomic_runtime_procPin() int {
5298	return procPin()
5299}
5300
5301//go:linkname sync_atomic_runtime_procUnpin sync/atomic.runtime_procUnpin
5302//go:nosplit
5303func sync_atomic_runtime_procUnpin() {
5304	procUnpin()
5305}
5306
5307// Active spinning for sync.Mutex.
5308//go:linkname sync_runtime_canSpin sync.runtime_canSpin
5309//go:nosplit
5310func sync_runtime_canSpin(i int) bool {
5311	// sync.Mutex is cooperative, so we are conservative with spinning.
5312	// Spin only few times and only if running on a multicore machine and
5313	// GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
5314	// As opposed to runtime mutex we don't do passive spinning here,
5315	// because there can be work on global runq or on other Ps.
5316	if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 {
5317		return false
5318	}
5319	if p := getg().m.p.ptr(); !runqempty(p) {
5320		return false
5321	}
5322	return true
5323}
5324
5325//go:linkname sync_runtime_doSpin sync.runtime_doSpin
5326//go:nosplit
5327func sync_runtime_doSpin() {
5328	procyield(active_spin_cnt)
5329}
5330
5331var stealOrder randomOrder
5332
5333// randomOrder/randomEnum are helper types for randomized work stealing.
5334// They allow to enumerate all Ps in different pseudo-random orders without repetitions.
5335// The algorithm is based on the fact that if we have X such that X and GOMAXPROCS
5336// are coprime, then a sequences of (i + X) % GOMAXPROCS gives the required enumeration.
5337type randomOrder struct {
5338	count    uint32
5339	coprimes []uint32
5340}
5341
5342type randomEnum struct {
5343	i     uint32
5344	count uint32
5345	pos   uint32
5346	inc   uint32
5347}
5348
5349func (ord *randomOrder) reset(count uint32) {
5350	ord.count = count
5351	ord.coprimes = ord.coprimes[:0]
5352	for i := uint32(1); i <= count; i++ {
5353		if gcd(i, count) == 1 {
5354			ord.coprimes = append(ord.coprimes, i)
5355		}
5356	}
5357}
5358
5359func (ord *randomOrder) start(i uint32) randomEnum {
5360	return randomEnum{
5361		count: ord.count,
5362		pos:   i % ord.count,
5363		inc:   ord.coprimes[i%uint32(len(ord.coprimes))],
5364	}
5365}
5366
5367func (enum *randomEnum) done() bool {
5368	return enum.i == enum.count
5369}
5370
5371func (enum *randomEnum) next() {
5372	enum.i++
5373	enum.pos = (enum.pos + enum.inc) % enum.count
5374}
5375
5376func (enum *randomEnum) position() uint32 {
5377	return enum.pos
5378}
5379
5380func gcd(a, b uint32) uint32 {
5381	for b != 0 {
5382		a, b = b, a%b
5383	}
5384	return a
5385}
5386
5387// An initTask represents the set of initializations that need to be done for a package.
5388// Keep in sync with ../../test/initempty.go:initTask
5389type initTask struct {
5390	// TODO: pack the first 3 fields more tightly?
5391	state uintptr // 0 = uninitialized, 1 = in progress, 2 = done
5392	ndeps uintptr
5393	nfns  uintptr
5394	// followed by ndeps instances of an *initTask, one per package depended on
5395	// followed by nfns pcs, one per init function to run
5396}
5397
5398func doInit(t *initTask) {
5399	switch t.state {
5400	case 2: // fully initialized
5401		return
5402	case 1: // initialization in progress
5403		throw("recursive call during initialization - linker skew")
5404	default: // not initialized yet
5405		t.state = 1 // initialization in progress
5406		for i := uintptr(0); i < t.ndeps; i++ {
5407			p := add(unsafe.Pointer(t), (3+i)*sys.PtrSize)
5408			t2 := *(**initTask)(p)
5409			doInit(t2)
5410		}
5411		for i := uintptr(0); i < t.nfns; i++ {
5412			p := add(unsafe.Pointer(t), (3+t.ndeps+i)*sys.PtrSize)
5413			f := *(*func())(unsafe.Pointer(&p))
5414			f()
5415		}
5416		t.state = 2 // initialization done
5417	}
5418}
5419