1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime
6
7import (
8	"internal/cpu"
9	"runtime/internal/atomic"
10	"runtime/internal/sys"
11	"unsafe"
12)
13
14// Functions called by C code.
15//go:linkname main
16//go:linkname goparkunlock
17//go:linkname newextram
18//go:linkname acquirep
19//go:linkname releasep
20//go:linkname incidlelocked
21//go:linkname ginit
22//go:linkname schedinit
23//go:linkname ready
24//go:linkname stopm
25//go:linkname handoffp
26//go:linkname wakep
27//go:linkname stoplockedm
28//go:linkname schedule
29//go:linkname execute
30//go:linkname goexit1
31//go:linkname reentersyscall
32//go:linkname reentersyscallblock
33//go:linkname exitsyscall
34//go:linkname gfget
35//go:linkname kickoff
36//go:linkname mstart1
37//go:linkname mexit
38//go:linkname globrunqput
39//go:linkname pidleget
40
41// Exported for test (see runtime/testdata/testprogcgo/dropm_stub.go).
42//go:linkname getm
43
44// Function called by misc/cgo/test.
45//go:linkname lockedOSThread
46
47// C functions for thread and context management.
48func newosproc(*m)
49
50//go:noescape
51func malg(bool, bool, *unsafe.Pointer, *uintptr) *g
52
53//go:noescape
54func resetNewG(*g, *unsafe.Pointer, *uintptr)
55func gogo(*g)
56func setGContext()
57func makeGContext(*g, unsafe.Pointer, uintptr)
58func getTraceback(me, gp *g)
59func gtraceback(*g)
60func _cgo_notify_runtime_init_done()
61func alreadyInCallers() bool
62func stackfree(*g)
63
64// Functions created by the compiler.
65//extern __go_init_main
66func main_init()
67
68//extern main.main
69func main_main()
70
71var buildVersion = sys.TheVersion
72
73// set using cmd/go/internal/modload.ModInfoProg
74var modinfo string
75
76// Goroutine scheduler
77// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
78//
79// The main concepts are:
80// G - goroutine.
81// M - worker thread, or machine.
82// P - processor, a resource that is required to execute Go code.
83//     M must have an associated P to execute Go code, however it can be
84//     blocked or in a syscall w/o an associated P.
85//
86// Design doc at https://golang.org/s/go11sched.
87
88// Worker thread parking/unparking.
89// We need to balance between keeping enough running worker threads to utilize
90// available hardware parallelism and parking excessive running worker threads
91// to conserve CPU resources and power. This is not simple for two reasons:
92// (1) scheduler state is intentionally distributed (in particular, per-P work
93// queues), so it is not possible to compute global predicates on fast paths;
94// (2) for optimal thread management we would need to know the future (don't park
95// a worker thread when a new goroutine will be readied in near future).
96//
97// Three rejected approaches that would work badly:
98// 1. Centralize all scheduler state (would inhibit scalability).
99// 2. Direct goroutine handoff. That is, when we ready a new goroutine and there
100//    is a spare P, unpark a thread and handoff it the thread and the goroutine.
101//    This would lead to thread state thrashing, as the thread that readied the
102//    goroutine can be out of work the very next moment, we will need to park it.
103//    Also, it would destroy locality of computation as we want to preserve
104//    dependent goroutines on the same thread; and introduce additional latency.
105// 3. Unpark an additional thread whenever we ready a goroutine and there is an
106//    idle P, but don't do handoff. This would lead to excessive thread parking/
107//    unparking as the additional threads will instantly park without discovering
108//    any work to do.
109//
110// The current approach:
111// We unpark an additional thread when we ready a goroutine if (1) there is an
112// idle P and there are no "spinning" worker threads. A worker thread is considered
113// spinning if it is out of local work and did not find work in global run queue/
114// netpoller; the spinning state is denoted in m.spinning and in sched.nmspinning.
115// Threads unparked this way are also considered spinning; we don't do goroutine
116// handoff so such threads are out of work initially. Spinning threads do some
117// spinning looking for work in per-P run queues before parking. If a spinning
118// thread finds work it takes itself out of the spinning state and proceeds to
119// execution. If it does not find work it takes itself out of the spinning state
120// and then parks.
121// If there is at least one spinning thread (sched.nmspinning>1), we don't unpark
122// new threads when readying goroutines. To compensate for that, if the last spinning
123// thread finds work and stops spinning, it must unpark a new spinning thread.
124// This approach smooths out unjustified spikes of thread unparking,
125// but at the same time guarantees eventual maximal CPU parallelism utilization.
126//
127// The main implementation complication is that we need to be very careful during
128// spinning->non-spinning thread transition. This transition can race with submission
129// of a new goroutine, and either one part or another needs to unpark another worker
130// thread. If they both fail to do that, we can end up with semi-persistent CPU
131// underutilization. The general pattern for goroutine readying is: submit a goroutine
132// to local work queue, #StoreLoad-style memory barrier, check sched.nmspinning.
133// The general pattern for spinning->non-spinning transition is: decrement nmspinning,
134// #StoreLoad-style memory barrier, check all per-P work queues for new work.
135// Note that all this complexity does not apply to global run queue as we are not
136// sloppy about thread unparking when submitting to global queue. Also see comments
137// for nmspinning manipulation.
138
139var (
140	m0           m
141	g0           g
142	raceprocctx0 uintptr
143)
144
145// main_init_done is a signal used by cgocallbackg that initialization
146// has been completed. It is made before _cgo_notify_runtime_init_done,
147// so all cgo calls can rely on it existing. When main_init is complete,
148// it is closed, meaning cgocallbackg can reliably receive from it.
149var main_init_done chan bool
150
151// mainStarted indicates that the main M has started.
152var mainStarted bool
153
154// runtimeInitTime is the nanotime() at which the runtime started.
155var runtimeInitTime int64
156
157// Value to use for signal mask for newly created M's.
158var initSigmask sigset
159
160// The main goroutine.
161func main(unsafe.Pointer) {
162	g := getg()
163
164	// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
165	// Using decimal instead of binary GB and MB because
166	// they look nicer in the stack overflow failure message.
167	if sys.PtrSize == 8 {
168		maxstacksize = 1000000000
169	} else {
170		maxstacksize = 250000000
171	}
172
173	// Allow newproc to start new Ms.
174	mainStarted = true
175
176	if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
177		systemstack(func() {
178			newm(sysmon, nil)
179		})
180	}
181
182	// Lock the main goroutine onto this, the main OS thread,
183	// during initialization. Most programs won't care, but a few
184	// do require certain calls to be made by the main thread.
185	// Those can arrange for main.main to run in the main thread
186	// by calling runtime.LockOSThread during initialization
187	// to preserve the lock.
188	lockOSThread()
189
190	if g.m != &m0 {
191		throw("runtime.main not on m0")
192	}
193
194	if nanotime() == 0 {
195		throw("nanotime returning zero")
196	}
197
198	// Defer unlock so that runtime.Goexit during init does the unlock too.
199	needUnlock := true
200	defer func() {
201		if needUnlock {
202			unlockOSThread()
203		}
204	}()
205
206	// Record when the world started.
207	runtimeInitTime = nanotime()
208
209	main_init_done = make(chan bool)
210	if iscgo {
211		// Start the template thread in case we enter Go from
212		// a C-created thread and need to create a new thread.
213		startTemplateThread()
214		_cgo_notify_runtime_init_done()
215	}
216
217	fn := main_init // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
218	fn()
219	createGcRootsIndex()
220	close(main_init_done)
221
222	// For gccgo we have to wait until after main is initialized
223	// to enable GC, because initializing main registers the GC roots.
224	gcenable()
225
226	needUnlock = false
227	unlockOSThread()
228
229	if isarchive || islibrary {
230		// A program compiled with -buildmode=c-archive or c-shared
231		// has a main, but it is not executed.
232		return
233	}
234	fn = main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
235	fn()
236	if raceenabled {
237		racefini()
238	}
239
240	// Make racy client program work: if panicking on
241	// another goroutine at the same time as main returns,
242	// let the other goroutine finish printing the panic trace.
243	// Once it does, it will exit. See issues 3934 and 20018.
244	if atomic.Load(&runningPanicDefers) != 0 {
245		// Running deferred functions should not take long.
246		for c := 0; c < 1000; c++ {
247			if atomic.Load(&runningPanicDefers) == 0 {
248				break
249			}
250			Gosched()
251		}
252	}
253	if atomic.Load(&panicking) != 0 {
254		gopark(nil, nil, waitReasonPanicWait, traceEvGoStop, 1)
255	}
256
257	exit(0)
258	for {
259		var x *int32
260		*x = 0
261	}
262}
263
264// os_beforeExit is called from os.Exit(0).
265//go:linkname os_beforeExit os.runtime_beforeExit
266func os_beforeExit() {
267	if raceenabled {
268		racefini()
269	}
270}
271
272// start forcegc helper goroutine
273func init() {
274	expectSystemGoroutine()
275	go forcegchelper()
276}
277
278func forcegchelper() {
279	setSystemGoroutine()
280
281	forcegc.g = getg()
282	for {
283		lock(&forcegc.lock)
284		if forcegc.idle != 0 {
285			throw("forcegc: phase error")
286		}
287		atomic.Store(&forcegc.idle, 1)
288		goparkunlock(&forcegc.lock, waitReasonForceGGIdle, traceEvGoBlock, 1)
289		// this goroutine is explicitly resumed by sysmon
290		if debug.gctrace > 0 {
291			println("GC forced")
292		}
293		// Time-triggered, fully concurrent.
294		gcStart(gcTrigger{kind: gcTriggerTime, now: nanotime()})
295	}
296}
297
298//go:nosplit
299
300// Gosched yields the processor, allowing other goroutines to run. It does not
301// suspend the current goroutine, so execution resumes automatically.
302func Gosched() {
303	checkTimeouts()
304	mcall(gosched_m)
305}
306
307// goschedguarded yields the processor like gosched, but also checks
308// for forbidden states and opts out of the yield in those cases.
309//go:nosplit
310func goschedguarded() {
311	mcall(goschedguarded_m)
312}
313
314// Puts the current goroutine into a waiting state and calls unlockf.
315// If unlockf returns false, the goroutine is resumed.
316// unlockf must not access this G's stack, as it may be moved between
317// the call to gopark and the call to unlockf.
318// Reason explains why the goroutine has been parked.
319// It is displayed in stack traces and heap dumps.
320// Reasons should be unique and descriptive.
321// Do not re-use reasons, add new ones.
322func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceEv byte, traceskip int) {
323	if reason != waitReasonSleep {
324		checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy
325	}
326	mp := acquirem()
327	gp := mp.curg
328	status := readgstatus(gp)
329	if status != _Grunning && status != _Gscanrunning {
330		throw("gopark: bad g status")
331	}
332	mp.waitlock = lock
333	mp.waitunlockf = unlockf
334	gp.waitreason = reason
335	mp.waittraceev = traceEv
336	mp.waittraceskip = traceskip
337	releasem(mp)
338	// can't do anything that might move the G between Ms here.
339	mcall(park_m)
340}
341
342// Puts the current goroutine into a waiting state and unlocks the lock.
343// The goroutine can be made runnable again by calling goready(gp).
344func goparkunlock(lock *mutex, reason waitReason, traceEv byte, traceskip int) {
345	gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceEv, traceskip)
346}
347
348func goready(gp *g, traceskip int) {
349	systemstack(func() {
350		ready(gp, traceskip, true)
351	})
352}
353
354//go:nosplit
355func acquireSudog() *sudog {
356	// Delicate dance: the semaphore implementation calls
357	// acquireSudog, acquireSudog calls new(sudog),
358	// new calls malloc, malloc can call the garbage collector,
359	// and the garbage collector calls the semaphore implementation
360	// in stopTheWorld.
361	// Break the cycle by doing acquirem/releasem around new(sudog).
362	// The acquirem/releasem increments m.locks during new(sudog),
363	// which keeps the garbage collector from being invoked.
364	mp := acquirem()
365	pp := mp.p.ptr()
366	if len(pp.sudogcache) == 0 {
367		lock(&sched.sudoglock)
368		// First, try to grab a batch from central cache.
369		for len(pp.sudogcache) < cap(pp.sudogcache)/2 && sched.sudogcache != nil {
370			s := sched.sudogcache
371			sched.sudogcache = s.next
372			s.next = nil
373			pp.sudogcache = append(pp.sudogcache, s)
374		}
375		unlock(&sched.sudoglock)
376		// If the central cache is empty, allocate a new one.
377		if len(pp.sudogcache) == 0 {
378			pp.sudogcache = append(pp.sudogcache, new(sudog))
379		}
380	}
381	n := len(pp.sudogcache)
382	s := pp.sudogcache[n-1]
383	pp.sudogcache[n-1] = nil
384	pp.sudogcache = pp.sudogcache[:n-1]
385	if s.elem != nil {
386		throw("acquireSudog: found s.elem != nil in cache")
387	}
388	releasem(mp)
389	return s
390}
391
392//go:nosplit
393func releaseSudog(s *sudog) {
394	if s.elem != nil {
395		throw("runtime: sudog with non-nil elem")
396	}
397	if s.isSelect {
398		throw("runtime: sudog with non-false isSelect")
399	}
400	if s.next != nil {
401		throw("runtime: sudog with non-nil next")
402	}
403	if s.prev != nil {
404		throw("runtime: sudog with non-nil prev")
405	}
406	if s.waitlink != nil {
407		throw("runtime: sudog with non-nil waitlink")
408	}
409	if s.c != nil {
410		throw("runtime: sudog with non-nil c")
411	}
412	gp := getg()
413	if gp.param != nil {
414		throw("runtime: releaseSudog with non-nil gp.param")
415	}
416	mp := acquirem() // avoid rescheduling to another P
417	pp := mp.p.ptr()
418	if len(pp.sudogcache) == cap(pp.sudogcache) {
419		// Transfer half of local cache to the central cache.
420		var first, last *sudog
421		for len(pp.sudogcache) > cap(pp.sudogcache)/2 {
422			n := len(pp.sudogcache)
423			p := pp.sudogcache[n-1]
424			pp.sudogcache[n-1] = nil
425			pp.sudogcache = pp.sudogcache[:n-1]
426			if first == nil {
427				first = p
428			} else {
429				last.next = p
430			}
431			last = p
432		}
433		lock(&sched.sudoglock)
434		last.next = sched.sudogcache
435		sched.sudogcache = first
436		unlock(&sched.sudoglock)
437	}
438	pp.sudogcache = append(pp.sudogcache, s)
439	releasem(mp)
440}
441
442// funcPC returns the entry PC of the function f.
443// It assumes that f is a func value. Otherwise the behavior is undefined.
444// CAREFUL: In programs with plugins, funcPC can return different values
445// for the same function (because there are actually multiple copies of
446// the same function in the address space). To be safe, don't use the
447// results of this function in any == expression. It is only safe to
448// use the result as an address at which to start executing code.
449//
450// For gccgo note that this differs from the gc implementation; the gc
451// implementation adds sys.PtrSize to the address of the interface
452// value, but GCC's alias analysis decides that that can not be a
453// reference to the second field of the interface, and in some cases
454// it drops the initialization of the second field as a dead store.
455//go:nosplit
456func funcPC(f interface{}) uintptr {
457	i := (*iface)(unsafe.Pointer(&f))
458	r := *(*uintptr)(i.data)
459	if cpu.FunctionDescriptors {
460		// With PPC64 ELF ABI v1 function descriptors the
461		// function address is a pointer to a struct whose
462		// first field is the actual PC.
463		r = *(*uintptr)(unsafe.Pointer(r))
464	}
465	return r
466}
467
468func lockedOSThread() bool {
469	gp := getg()
470	return gp.lockedm != 0 && gp.m.lockedg != 0
471}
472
473var (
474	allgs    []*g
475	allglock mutex
476)
477
478func allgadd(gp *g) {
479	if readgstatus(gp) == _Gidle {
480		throw("allgadd: bad status Gidle")
481	}
482
483	lock(&allglock)
484	allgs = append(allgs, gp)
485	allglen = uintptr(len(allgs))
486	unlock(&allglock)
487}
488
489const (
490	// Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
491	// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
492	_GoidCacheBatch = 16
493)
494
495// cpuinit extracts the environment variable GODEBUG from the environment on
496// Unix-like operating systems and calls internal/cpu.Initialize.
497func cpuinit() {
498	const prefix = "GODEBUG="
499	var env string
500
501	switch GOOS {
502	case "aix", "darwin", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux":
503		cpu.DebugOptions = true
504
505		// Similar to goenv_unix but extracts the environment value for
506		// GODEBUG directly.
507		// TODO(moehrmann): remove when general goenvs() can be called before cpuinit()
508		n := int32(0)
509		for argv_index(argv, argc+1+n) != nil {
510			n++
511		}
512
513		for i := int32(0); i < n; i++ {
514			p := argv_index(argv, argc+1+i)
515			s := *(*string)(unsafe.Pointer(&stringStruct{unsafe.Pointer(p), findnull(p)}))
516
517			if hasPrefix(s, prefix) {
518				env = gostring(p)[len(prefix):]
519				break
520			}
521		}
522	}
523
524	cpu.Initialize(env)
525}
526
527func ginit() {
528	_m_ := &m0
529	_g_ := &g0
530	_m_.g0 = _g_
531	_m_.curg = _g_
532	_g_.m = _m_
533	setg(_g_)
534}
535
536// The bootstrap sequence is:
537//
538//	call osinit
539//	call schedinit
540//	make & queue new G
541//	call runtime·mstart
542//
543// The new G calls runtime·main.
544func schedinit() {
545	_g_ := getg()
546	sched.maxmcount = 10000
547
548	usestackmaps = probestackmaps()
549
550	mallocinit()
551	fastrandinit() // must run before mcommoninit
552	mcommoninit(_g_.m)
553	cpuinit() // must run before alginit
554	alginit() // maps must not be used before this call
555
556	msigsave(_g_.m)
557	initSigmask = _g_.m.sigmask
558
559	goargs()
560	goenvs()
561	parsedebugvars()
562	gcinit()
563
564	sched.lastpoll = uint64(nanotime())
565	procs := ncpu
566
567	// In 32-bit mode, we can burn a lot of memory on thread stacks.
568	// Try to avoid this by limiting the number of threads we run
569	// by default.
570	if sys.PtrSize == 4 && procs > 32 {
571		procs = 32
572	}
573
574	if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
575		procs = n
576	}
577	if procresize(procs) != nil {
578		throw("unknown runnable goroutine during bootstrap")
579	}
580
581	// For cgocheck > 1, we turn on the write barrier at all times
582	// and check all pointer writes. We can't do this until after
583	// procresize because the write barrier needs a P.
584	if debug.cgocheck > 1 {
585		writeBarrier.cgo = true
586		writeBarrier.enabled = true
587		for _, p := range allp {
588			p.wbBuf.reset()
589		}
590	}
591
592	if buildVersion == "" {
593		// Condition should never trigger. This code just serves
594		// to ensure runtime·buildVersion is kept in the resulting binary.
595		buildVersion = "unknown"
596	}
597	if len(modinfo) == 1 {
598		// Condition should never trigger. This code just serves
599		// to ensure runtime·modinfo is kept in the resulting binary.
600		modinfo = ""
601	}
602}
603
604func dumpgstatus(gp *g) {
605	_g_ := getg()
606	print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
607	print("runtime:  g:  g=", _g_, ", goid=", _g_.goid, ",  g->atomicstatus=", readgstatus(_g_), "\n")
608}
609
610func checkmcount() {
611	// sched lock is held
612	if mcount() > sched.maxmcount {
613		print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n")
614		throw("thread exhaustion")
615	}
616}
617
618func mcommoninit(mp *m) {
619	_g_ := getg()
620
621	// g0 stack won't make sense for user (and is not necessary unwindable).
622	if _g_ != _g_.m.g0 {
623		callers(1, mp.createstack[:])
624	}
625
626	lock(&sched.lock)
627	if sched.mnext+1 < sched.mnext {
628		throw("runtime: thread ID overflow")
629	}
630	mp.id = sched.mnext
631	sched.mnext++
632	checkmcount()
633
634	mp.fastrand[0] = uint32(int64Hash(uint64(mp.id), fastrandseed))
635	mp.fastrand[1] = uint32(int64Hash(uint64(cputicks()), ^fastrandseed))
636	if mp.fastrand[0]|mp.fastrand[1] == 0 {
637		mp.fastrand[1] = 1
638	}
639
640	mpreinit(mp)
641
642	// Add to allm so garbage collector doesn't free g->m
643	// when it is just in a register or thread-local storage.
644	mp.alllink = allm
645
646	// NumCgoCall() iterates over allm w/o schedlock,
647	// so we need to publish it safely.
648	atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp))
649	unlock(&sched.lock)
650}
651
652var fastrandseed uintptr
653
654func fastrandinit() {
655	s := (*[unsafe.Sizeof(fastrandseed)]byte)(unsafe.Pointer(&fastrandseed))[:]
656	getRandomData(s)
657}
658
659// Mark gp ready to run.
660func ready(gp *g, traceskip int, next bool) {
661	if trace.enabled {
662		traceGoUnpark(gp, traceskip)
663	}
664
665	status := readgstatus(gp)
666
667	// Mark runnable.
668	_g_ := getg()
669	mp := acquirem() // disable preemption because it can be holding p in a local var
670	if status&^_Gscan != _Gwaiting {
671		dumpgstatus(gp)
672		throw("bad g->status in ready")
673	}
674
675	// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
676	casgstatus(gp, _Gwaiting, _Grunnable)
677	runqput(_g_.m.p.ptr(), gp, next)
678	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
679		wakep()
680	}
681	releasem(mp)
682}
683
684// freezeStopWait is a large value that freezetheworld sets
685// sched.stopwait to in order to request that all Gs permanently stop.
686const freezeStopWait = 0x7fffffff
687
688// freezing is set to non-zero if the runtime is trying to freeze the
689// world.
690var freezing uint32
691
692// Similar to stopTheWorld but best-effort and can be called several times.
693// There is no reverse operation, used during crashing.
694// This function must not lock any mutexes.
695func freezetheworld() {
696	atomic.Store(&freezing, 1)
697	// stopwait and preemption requests can be lost
698	// due to races with concurrently executing threads,
699	// so try several times
700	for i := 0; i < 5; i++ {
701		// this should tell the scheduler to not start any new goroutines
702		sched.stopwait = freezeStopWait
703		atomic.Store(&sched.gcwaiting, 1)
704		// this should stop running goroutines
705		if !preemptall() {
706			break // no running goroutines
707		}
708		usleep(1000)
709	}
710	// to be sure
711	usleep(1000)
712	preemptall()
713	usleep(1000)
714}
715
716// All reads and writes of g's status go through readgstatus, casgstatus
717// castogscanstatus, casfrom_Gscanstatus.
718//go:nosplit
719func readgstatus(gp *g) uint32 {
720	return atomic.Load(&gp.atomicstatus)
721}
722
723// The Gscanstatuses are acting like locks and this releases them.
724// If it proves to be a performance hit we should be able to make these
725// simple atomic stores but for now we are going to throw if
726// we see an inconsistent state.
727func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
728	success := false
729
730	// Check that transition is valid.
731	switch oldval {
732	default:
733		print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
734		dumpgstatus(gp)
735		throw("casfrom_Gscanstatus:top gp->status is not in scan state")
736	case _Gscanrunnable,
737		_Gscanwaiting,
738		_Gscanrunning,
739		_Gscansyscall,
740		_Gscanpreempted:
741		if newval == oldval&^_Gscan {
742			success = atomic.Cas(&gp.atomicstatus, oldval, newval)
743		}
744	}
745	if !success {
746		print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
747		dumpgstatus(gp)
748		throw("casfrom_Gscanstatus: gp->status is not in scan state")
749	}
750}
751
752// This will return false if the gp is not in the expected status and the cas fails.
753// This acts like a lock acquire while the casfromgstatus acts like a lock release.
754func castogscanstatus(gp *g, oldval, newval uint32) bool {
755	switch oldval {
756	case _Grunnable,
757		_Grunning,
758		_Gwaiting,
759		_Gsyscall:
760		if newval == oldval|_Gscan {
761			return atomic.Cas(&gp.atomicstatus, oldval, newval)
762		}
763	}
764	print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n")
765	throw("castogscanstatus")
766	panic("not reached")
767}
768
769// If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
770// and casfrom_Gscanstatus instead.
771// casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that
772// put it in the Gscan state is finished.
773//go:nosplit
774func casgstatus(gp *g, oldval, newval uint32) {
775	if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval {
776		systemstack(func() {
777			print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n")
778			throw("casgstatus: bad incoming values")
779		})
780	}
781
782	// See https://golang.org/cl/21503 for justification of the yield delay.
783	const yieldDelay = 5 * 1000
784	var nextYield int64
785
786	// loop if gp->atomicstatus is in a scan state giving
787	// GC time to finish and change the state to oldval.
788	for i := 0; !atomic.Cas(&gp.atomicstatus, oldval, newval); i++ {
789		if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
790			throw("casgstatus: waiting for Gwaiting but is Grunnable")
791		}
792		if i == 0 {
793			nextYield = nanotime() + yieldDelay
794		}
795		if nanotime() < nextYield {
796			for x := 0; x < 10 && gp.atomicstatus != oldval; x++ {
797				procyield(1)
798			}
799		} else {
800			osyield()
801			nextYield = nanotime() + yieldDelay/2
802		}
803	}
804}
805
806// casGToPreemptScan transitions gp from _Grunning to _Gscan|_Gpreempted.
807//
808// TODO(austin): This is the only status operation that both changes
809// the status and locks the _Gscan bit. Rethink this.
810func casGToPreemptScan(gp *g, old, new uint32) {
811	if old != _Grunning || new != _Gscan|_Gpreempted {
812		throw("bad g transition")
813	}
814	for !atomic.Cas(&gp.atomicstatus, _Grunning, _Gscan|_Gpreempted) {
815	}
816}
817
818// casGFromPreempted attempts to transition gp from _Gpreempted to
819// _Gwaiting. If successful, the caller is responsible for
820// re-scheduling gp.
821func casGFromPreempted(gp *g, old, new uint32) bool {
822	if old != _Gpreempted || new != _Gwaiting {
823		throw("bad g transition")
824	}
825	return atomic.Cas(&gp.atomicstatus, _Gpreempted, _Gwaiting)
826}
827
828// stopTheWorld stops all P's from executing goroutines, interrupting
829// all goroutines at GC safe points and records reason as the reason
830// for the stop. On return, only the current goroutine's P is running.
831// stopTheWorld must not be called from a system stack and the caller
832// must not hold worldsema. The caller must call startTheWorld when
833// other P's should resume execution.
834//
835// stopTheWorld is safe for multiple goroutines to call at the
836// same time. Each will execute its own stop, and the stops will
837// be serialized.
838//
839// This is also used by routines that do stack dumps. If the system is
840// in panic or being exited, this may not reliably stop all
841// goroutines.
842func stopTheWorld(reason string) {
843	semacquire(&worldsema)
844	getg().m.preemptoff = reason
845	systemstack(stopTheWorldWithSema)
846}
847
848// startTheWorld undoes the effects of stopTheWorld.
849func startTheWorld() {
850	systemstack(func() { startTheWorldWithSema(false) })
851	// worldsema must be held over startTheWorldWithSema to ensure
852	// gomaxprocs cannot change while worldsema is held.
853	semrelease(&worldsema)
854	getg().m.preemptoff = ""
855}
856
857// Holding worldsema grants an M the right to try to stop the world
858// and prevents gomaxprocs from changing concurrently.
859var worldsema uint32 = 1
860
861// stopTheWorldWithSema is the core implementation of stopTheWorld.
862// The caller is responsible for acquiring worldsema and disabling
863// preemption first and then should stopTheWorldWithSema on the system
864// stack:
865//
866//	semacquire(&worldsema, 0)
867//	m.preemptoff = "reason"
868//	systemstack(stopTheWorldWithSema)
869//
870// When finished, the caller must either call startTheWorld or undo
871// these three operations separately:
872//
873//	m.preemptoff = ""
874//	systemstack(startTheWorldWithSema)
875//	semrelease(&worldsema)
876//
877// It is allowed to acquire worldsema once and then execute multiple
878// startTheWorldWithSema/stopTheWorldWithSema pairs.
879// Other P's are able to execute between successive calls to
880// startTheWorldWithSema and stopTheWorldWithSema.
881// Holding worldsema causes any other goroutines invoking
882// stopTheWorld to block.
883func stopTheWorldWithSema() {
884	_g_ := getg()
885
886	// If we hold a lock, then we won't be able to stop another M
887	// that is blocked trying to acquire the lock.
888	if _g_.m.locks > 0 {
889		throw("stopTheWorld: holding locks")
890	}
891
892	lock(&sched.lock)
893	sched.stopwait = gomaxprocs
894	atomic.Store(&sched.gcwaiting, 1)
895	preemptall()
896	// stop current P
897	_g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic.
898	sched.stopwait--
899	// try to retake all P's in Psyscall status
900	for _, p := range allp {
901		s := p.status
902		if s == _Psyscall && atomic.Cas(&p.status, s, _Pgcstop) {
903			if trace.enabled {
904				traceGoSysBlock(p)
905				traceProcStop(p)
906			}
907			p.syscalltick++
908			sched.stopwait--
909		}
910	}
911	// stop idle P's
912	for {
913		p := pidleget()
914		if p == nil {
915			break
916		}
917		p.status = _Pgcstop
918		sched.stopwait--
919	}
920	wait := sched.stopwait > 0
921	unlock(&sched.lock)
922
923	// wait for remaining P's to stop voluntarily
924	if wait {
925		for {
926			// wait for 100us, then try to re-preempt in case of any races
927			if notetsleep(&sched.stopnote, 100*1000) {
928				noteclear(&sched.stopnote)
929				break
930			}
931			preemptall()
932		}
933	}
934
935	// sanity checks
936	bad := ""
937	if sched.stopwait != 0 {
938		bad = "stopTheWorld: not stopped (stopwait != 0)"
939	} else {
940		for _, p := range allp {
941			if p.status != _Pgcstop {
942				bad = "stopTheWorld: not stopped (status != _Pgcstop)"
943			}
944		}
945	}
946	if atomic.Load(&freezing) != 0 {
947		// Some other thread is panicking. This can cause the
948		// sanity checks above to fail if the panic happens in
949		// the signal handler on a stopped thread. Either way,
950		// we should halt this thread.
951		lock(&deadlock)
952		lock(&deadlock)
953	}
954	if bad != "" {
955		throw(bad)
956	}
957}
958
959func startTheWorldWithSema(emitTraceEvent bool) int64 {
960	mp := acquirem() // disable preemption because it can be holding p in a local var
961	if netpollinited() {
962		list := netpoll(0) // non-blocking
963		injectglist(&list)
964	}
965	lock(&sched.lock)
966
967	procs := gomaxprocs
968	if newprocs != 0 {
969		procs = newprocs
970		newprocs = 0
971	}
972	p1 := procresize(procs)
973	sched.gcwaiting = 0
974	if sched.sysmonwait != 0 {
975		sched.sysmonwait = 0
976		notewakeup(&sched.sysmonnote)
977	}
978	unlock(&sched.lock)
979
980	for p1 != nil {
981		p := p1
982		p1 = p1.link.ptr()
983		if p.m != 0 {
984			mp := p.m.ptr()
985			p.m = 0
986			if mp.nextp != 0 {
987				throw("startTheWorld: inconsistent mp->nextp")
988			}
989			mp.nextp.set(p)
990			notewakeup(&mp.park)
991		} else {
992			// Start M to run P.  Do not start another M below.
993			newm(nil, p)
994		}
995	}
996
997	// Capture start-the-world time before doing clean-up tasks.
998	startTime := nanotime()
999	if emitTraceEvent {
1000		traceGCSTWDone()
1001	}
1002
1003	// Wakeup an additional proc in case we have excessive runnable goroutines
1004	// in local queues or in the global queue. If we don't, the proc will park itself.
1005	// If we have lots of excessive work, resetspinning will unpark additional procs as necessary.
1006	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
1007		wakep()
1008	}
1009
1010	releasem(mp)
1011
1012	return startTime
1013}
1014
1015// First function run by a new goroutine.
1016// This is passed to makecontext.
1017func kickoff() {
1018	gp := getg()
1019
1020	if gp.traceback != 0 {
1021		gtraceback(gp)
1022	}
1023
1024	fv := gp.entry
1025	param := gp.param
1026
1027	// When running on the g0 stack we can wind up here without a p,
1028	// for example from mcall(exitsyscall0) in exitsyscall, in
1029	// which case we can not run a write barrier.
1030	// It is also possible for us to get here from the systemstack
1031	// call in wbBufFlush, at which point the write barrier buffer
1032	// is full and we can not run a write barrier.
1033	// Setting gp.entry = nil or gp.param = nil will try to run a
1034	// write barrier, so if we are on the g0 stack due to mcall
1035	// (systemstack calls mcall) then clear the field using uintptr.
1036	// This is OK when gp.param is gp.m.curg, as curg will be kept
1037	// alive elsewhere, and gp.entry always points into g, or
1038	// to a statically allocated value, or (in the case of mcall)
1039	// to the stack.
1040	if gp == gp.m.g0 && gp.param == unsafe.Pointer(gp.m.curg) {
1041		*(*uintptr)(unsafe.Pointer(&gp.entry)) = 0
1042		*(*uintptr)(unsafe.Pointer(&gp.param)) = 0
1043	} else if gp.m.p == 0 {
1044		throw("no p in kickoff")
1045	} else {
1046		gp.entry = nil
1047		gp.param = nil
1048	}
1049
1050	// Record the entry SP to help stack scan.
1051	gp.entrysp = getsp()
1052
1053	fv(param)
1054	goexit1()
1055}
1056
1057func mstart1() {
1058	_g_ := getg()
1059
1060	if _g_ != _g_.m.g0 {
1061		throw("bad runtime·mstart")
1062	}
1063
1064	asminit()
1065
1066	// Install signal handlers; after minit so that minit can
1067	// prepare the thread to be able to handle the signals.
1068	// For gccgo minit was called by C code.
1069	if _g_.m == &m0 {
1070		mstartm0()
1071	}
1072
1073	if fn := _g_.m.mstartfn; fn != nil {
1074		fn()
1075	}
1076
1077	if _g_.m != &m0 {
1078		acquirep(_g_.m.nextp.ptr())
1079		_g_.m.nextp = 0
1080	}
1081	schedule()
1082}
1083
1084// mstartm0 implements part of mstart1 that only runs on the m0.
1085//
1086// Write barriers are allowed here because we know the GC can't be
1087// running yet, so they'll be no-ops.
1088//
1089//go:yeswritebarrierrec
1090func mstartm0() {
1091	// Create an extra M for callbacks on threads not created by Go.
1092	// An extra M is also needed on Windows for callbacks created by
1093	// syscall.NewCallback. See issue #6751 for details.
1094	if (iscgo || GOOS == "windows") && !cgoHasExtraM {
1095		cgoHasExtraM = true
1096		newextram()
1097	}
1098	initsig(false)
1099}
1100
1101// mexit tears down and exits the current thread.
1102//
1103// Don't call this directly to exit the thread, since it must run at
1104// the top of the thread stack. Instead, use gogo(&_g_.m.g0.sched) to
1105// unwind the stack to the point that exits the thread.
1106//
1107// It is entered with m.p != nil, so write barriers are allowed. It
1108// will release the P before exiting.
1109//
1110//go:yeswritebarrierrec
1111func mexit(osStack bool) {
1112	g := getg()
1113	m := g.m
1114
1115	if m == &m0 {
1116		// This is the main thread. Just wedge it.
1117		//
1118		// On Linux, exiting the main thread puts the process
1119		// into a non-waitable zombie state. On Plan 9,
1120		// exiting the main thread unblocks wait even though
1121		// other threads are still running. On Solaris we can
1122		// neither exitThread nor return from mstart. Other
1123		// bad things probably happen on other platforms.
1124		//
1125		// We could try to clean up this M more before wedging
1126		// it, but that complicates signal handling.
1127		handoffp(releasep())
1128		lock(&sched.lock)
1129		sched.nmfreed++
1130		checkdead()
1131		unlock(&sched.lock)
1132		notesleep(&m.park)
1133		throw("locked m0 woke up")
1134	}
1135
1136	sigblock()
1137	unminit()
1138
1139	// Free the gsignal stack.
1140	if m.gsignal != nil {
1141		stackfree(m.gsignal)
1142		// On some platforms, when calling into VDSO (e.g. nanotime)
1143		// we store our g on the gsignal stack, if there is one.
1144		// Now the stack is freed, unlink it from the m, so we
1145		// won't write to it when calling VDSO code.
1146		m.gsignal = nil
1147	}
1148
1149	// Remove m from allm.
1150	lock(&sched.lock)
1151	for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink {
1152		if *pprev == m {
1153			*pprev = m.alllink
1154			goto found
1155		}
1156	}
1157	throw("m not found in allm")
1158found:
1159	if !osStack {
1160		// Delay reaping m until it's done with the stack.
1161		//
1162		// If this is using an OS stack, the OS will free it
1163		// so there's no need for reaping.
1164		atomic.Store(&m.freeWait, 1)
1165		// Put m on the free list, though it will not be reaped until
1166		// freeWait is 0. Note that the free list must not be linked
1167		// through alllink because some functions walk allm without
1168		// locking, so may be using alllink.
1169		m.freelink = sched.freem
1170		sched.freem = m
1171	}
1172	unlock(&sched.lock)
1173
1174	// Release the P.
1175	handoffp(releasep())
1176	// After this point we must not have write barriers.
1177
1178	// Invoke the deadlock detector. This must happen after
1179	// handoffp because it may have started a new M to take our
1180	// P's work.
1181	lock(&sched.lock)
1182	sched.nmfreed++
1183	checkdead()
1184	unlock(&sched.lock)
1185
1186	if osStack {
1187		// Return from mstart and let the system thread
1188		// library free the g0 stack and terminate the thread.
1189		return
1190	}
1191
1192	// mstart is the thread's entry point, so there's nothing to
1193	// return to. Exit the thread directly. exitThread will clear
1194	// m.freeWait when it's done with the stack and the m can be
1195	// reaped.
1196	exitThread(&m.freeWait)
1197}
1198
1199// forEachP calls fn(p) for every P p when p reaches a GC safe point.
1200// If a P is currently executing code, this will bring the P to a GC
1201// safe point and execute fn on that P. If the P is not executing code
1202// (it is idle or in a syscall), this will call fn(p) directly while
1203// preventing the P from exiting its state. This does not ensure that
1204// fn will run on every CPU executing Go code, but it acts as a global
1205// memory barrier. GC uses this as a "ragged barrier."
1206//
1207// The caller must hold worldsema.
1208//
1209//go:systemstack
1210func forEachP(fn func(*p)) {
1211	mp := acquirem()
1212	_p_ := getg().m.p.ptr()
1213
1214	lock(&sched.lock)
1215	if sched.safePointWait != 0 {
1216		throw("forEachP: sched.safePointWait != 0")
1217	}
1218	sched.safePointWait = gomaxprocs - 1
1219	sched.safePointFn = fn
1220
1221	// Ask all Ps to run the safe point function.
1222	for _, p := range allp {
1223		if p != _p_ {
1224			atomic.Store(&p.runSafePointFn, 1)
1225		}
1226	}
1227	preemptall()
1228
1229	// Any P entering _Pidle or _Psyscall from now on will observe
1230	// p.runSafePointFn == 1 and will call runSafePointFn when
1231	// changing its status to _Pidle/_Psyscall.
1232
1233	// Run safe point function for all idle Ps. sched.pidle will
1234	// not change because we hold sched.lock.
1235	for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() {
1236		if atomic.Cas(&p.runSafePointFn, 1, 0) {
1237			fn(p)
1238			sched.safePointWait--
1239		}
1240	}
1241
1242	wait := sched.safePointWait > 0
1243	unlock(&sched.lock)
1244
1245	// Run fn for the current P.
1246	fn(_p_)
1247
1248	// Force Ps currently in _Psyscall into _Pidle and hand them
1249	// off to induce safe point function execution.
1250	for _, p := range allp {
1251		s := p.status
1252		if s == _Psyscall && p.runSafePointFn == 1 && atomic.Cas(&p.status, s, _Pidle) {
1253			if trace.enabled {
1254				traceGoSysBlock(p)
1255				traceProcStop(p)
1256			}
1257			p.syscalltick++
1258			handoffp(p)
1259		}
1260	}
1261
1262	// Wait for remaining Ps to run fn.
1263	if wait {
1264		for {
1265			// Wait for 100us, then try to re-preempt in
1266			// case of any races.
1267			//
1268			// Requires system stack.
1269			if notetsleep(&sched.safePointNote, 100*1000) {
1270				noteclear(&sched.safePointNote)
1271				break
1272			}
1273			preemptall()
1274		}
1275	}
1276	if sched.safePointWait != 0 {
1277		throw("forEachP: not done")
1278	}
1279	for _, p := range allp {
1280		if p.runSafePointFn != 0 {
1281			throw("forEachP: P did not run fn")
1282		}
1283	}
1284
1285	lock(&sched.lock)
1286	sched.safePointFn = nil
1287	unlock(&sched.lock)
1288	releasem(mp)
1289}
1290
1291// runSafePointFn runs the safe point function, if any, for this P.
1292// This should be called like
1293//
1294//     if getg().m.p.runSafePointFn != 0 {
1295//         runSafePointFn()
1296//     }
1297//
1298// runSafePointFn must be checked on any transition in to _Pidle or
1299// _Psyscall to avoid a race where forEachP sees that the P is running
1300// just before the P goes into _Pidle/_Psyscall and neither forEachP
1301// nor the P run the safe-point function.
1302func runSafePointFn() {
1303	p := getg().m.p.ptr()
1304	// Resolve the race between forEachP running the safe-point
1305	// function on this P's behalf and this P running the
1306	// safe-point function directly.
1307	if !atomic.Cas(&p.runSafePointFn, 1, 0) {
1308		return
1309	}
1310	sched.safePointFn(p)
1311	lock(&sched.lock)
1312	sched.safePointWait--
1313	if sched.safePointWait == 0 {
1314		notewakeup(&sched.safePointNote)
1315	}
1316	unlock(&sched.lock)
1317}
1318
1319// Allocate a new m unassociated with any thread.
1320// Can use p for allocation context if needed.
1321// fn is recorded as the new m's m.mstartfn.
1322//
1323// This function is allowed to have write barriers even if the caller
1324// isn't because it borrows _p_.
1325//
1326//go:yeswritebarrierrec
1327func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointer, g0StackSize uintptr) {
1328	_g_ := getg()
1329	acquirem() // disable GC because it can be called from sysmon
1330	if _g_.m.p == 0 {
1331		acquirep(_p_) // temporarily borrow p for mallocs in this function
1332	}
1333
1334	// Release the free M list. We need to do this somewhere and
1335	// this may free up a stack we can use.
1336	if sched.freem != nil {
1337		lock(&sched.lock)
1338		var newList *m
1339		for freem := sched.freem; freem != nil; {
1340			if freem.freeWait != 0 {
1341				next := freem.freelink
1342				freem.freelink = newList
1343				newList = freem
1344				freem = next
1345				continue
1346			}
1347			stackfree(freem.g0)
1348			freem = freem.freelink
1349		}
1350		sched.freem = newList
1351		unlock(&sched.lock)
1352	}
1353
1354	mp = new(m)
1355	mp.mstartfn = fn
1356	mcommoninit(mp)
1357
1358	mp.g0 = malg(allocatestack, false, &g0Stack, &g0StackSize)
1359	mp.g0.m = mp
1360
1361	if _p_ == _g_.m.p.ptr() {
1362		releasep()
1363	}
1364	releasem(_g_.m)
1365
1366	return mp, g0Stack, g0StackSize
1367}
1368
1369// needm is called when a cgo callback happens on a
1370// thread without an m (a thread not created by Go).
1371// In this case, needm is expected to find an m to use
1372// and return with m, g initialized correctly.
1373// Since m and g are not set now (likely nil, but see below)
1374// needm is limited in what routines it can call. In particular
1375// it can only call nosplit functions (textflag 7) and cannot
1376// do any scheduling that requires an m.
1377//
1378// In order to avoid needing heavy lifting here, we adopt
1379// the following strategy: there is a stack of available m's
1380// that can be stolen. Using compare-and-swap
1381// to pop from the stack has ABA races, so we simulate
1382// a lock by doing an exchange (via Casuintptr) to steal the stack
1383// head and replace the top pointer with MLOCKED (1).
1384// This serves as a simple spin lock that we can use even
1385// without an m. The thread that locks the stack in this way
1386// unlocks the stack by storing a valid stack head pointer.
1387//
1388// In order to make sure that there is always an m structure
1389// available to be stolen, we maintain the invariant that there
1390// is always one more than needed. At the beginning of the
1391// program (if cgo is in use) the list is seeded with a single m.
1392// If needm finds that it has taken the last m off the list, its job
1393// is - once it has installed its own m so that it can do things like
1394// allocate memory - to create a spare m and put it on the list.
1395//
1396// Each of these extra m's also has a g0 and a curg that are
1397// pressed into service as the scheduling stack and current
1398// goroutine for the duration of the cgo callback.
1399//
1400// When the callback is done with the m, it calls dropm to
1401// put the m back on the list.
1402//go:nosplit
1403func needm(x byte) {
1404	if (iscgo || GOOS == "windows") && !cgoHasExtraM {
1405		// Can happen if C/C++ code calls Go from a global ctor.
1406		// Can also happen on Windows if a global ctor uses a
1407		// callback created by syscall.NewCallback. See issue #6751
1408		// for details.
1409		//
1410		// Can not throw, because scheduler is not initialized yet.
1411		write(2, unsafe.Pointer(&earlycgocallback[0]), int32(len(earlycgocallback)))
1412		exit(1)
1413	}
1414
1415	// Lock extra list, take head, unlock popped list.
1416	// nilokay=false is safe here because of the invariant above,
1417	// that the extra list always contains or will soon contain
1418	// at least one m.
1419	mp := lockextra(false)
1420
1421	// Set needextram when we've just emptied the list,
1422	// so that the eventual call into cgocallbackg will
1423	// allocate a new m for the extra list. We delay the
1424	// allocation until then so that it can be done
1425	// after exitsyscall makes sure it is okay to be
1426	// running at all (that is, there's no garbage collection
1427	// running right now).
1428	mp.needextram = mp.schedlink == 0
1429	extraMCount--
1430	unlockextra(mp.schedlink.ptr())
1431
1432	// Save and block signals before installing g.
1433	// Once g is installed, any incoming signals will try to execute,
1434	// but we won't have the sigaltstack settings and other data
1435	// set up appropriately until the end of minit, which will
1436	// unblock the signals. This is the same dance as when
1437	// starting a new m to run Go code via newosproc.
1438	msigsave(mp)
1439	sigblock()
1440
1441	// Install g (= m->curg).
1442	setg(mp.curg)
1443
1444	// Initialize this thread to use the m.
1445	asminit()
1446	minit()
1447
1448	setGContext()
1449
1450	// mp.curg is now a real goroutine.
1451	casgstatus(mp.curg, _Gdead, _Gsyscall)
1452	atomic.Xadd(&sched.ngsys, -1)
1453}
1454
1455var earlycgocallback = []byte("fatal error: cgo callback before cgo call\n")
1456
1457// newextram allocates m's and puts them on the extra list.
1458// It is called with a working local m, so that it can do things
1459// like call schedlock and allocate.
1460func newextram() {
1461	c := atomic.Xchg(&extraMWaiters, 0)
1462	if c > 0 {
1463		for i := uint32(0); i < c; i++ {
1464			oneNewExtraM()
1465		}
1466	} else {
1467		// Make sure there is at least one extra M.
1468		mp := lockextra(true)
1469		unlockextra(mp)
1470		if mp == nil {
1471			oneNewExtraM()
1472		}
1473	}
1474}
1475
1476// oneNewExtraM allocates an m and puts it on the extra list.
1477func oneNewExtraM() {
1478	// Create extra goroutine locked to extra m.
1479	// The goroutine is the context in which the cgo callback will run.
1480	// The sched.pc will never be returned to, but setting it to
1481	// goexit makes clear to the traceback routines where
1482	// the goroutine stack ends.
1483	mp, g0SP, g0SPSize := allocm(nil, nil, true)
1484	gp := malg(true, false, nil, nil)
1485	// malg returns status as _Gidle. Change to _Gdead before
1486	// adding to allg where GC can see it. We use _Gdead to hide
1487	// this from tracebacks and stack scans since it isn't a
1488	// "real" goroutine until needm grabs it.
1489	casgstatus(gp, _Gidle, _Gdead)
1490	gp.m = mp
1491	mp.curg = gp
1492	mp.lockedInt++
1493	mp.lockedg.set(gp)
1494	gp.lockedm.set(mp)
1495	gp.goid = int64(atomic.Xadd64(&sched.goidgen, 1))
1496	// put on allg for garbage collector
1497	allgadd(gp)
1498
1499	// The context for gp will be set up in needm.
1500	// Here we need to set the context for g0.
1501	makeGContext(mp.g0, g0SP, g0SPSize)
1502
1503	// gp is now on the allg list, but we don't want it to be
1504	// counted by gcount. It would be more "proper" to increment
1505	// sched.ngfree, but that requires locking. Incrementing ngsys
1506	// has the same effect.
1507	atomic.Xadd(&sched.ngsys, +1)
1508
1509	// Add m to the extra list.
1510	mnext := lockextra(true)
1511	mp.schedlink.set(mnext)
1512	extraMCount++
1513	unlockextra(mp)
1514}
1515
1516// dropm is called when a cgo callback has called needm but is now
1517// done with the callback and returning back into the non-Go thread.
1518// It puts the current m back onto the extra list.
1519//
1520// The main expense here is the call to signalstack to release the
1521// m's signal stack, and then the call to needm on the next callback
1522// from this thread. It is tempting to try to save the m for next time,
1523// which would eliminate both these costs, but there might not be
1524// a next time: the current thread (which Go does not control) might exit.
1525// If we saved the m for that thread, there would be an m leak each time
1526// such a thread exited. Instead, we acquire and release an m on each
1527// call. These should typically not be scheduling operations, just a few
1528// atomics, so the cost should be small.
1529//
1530// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1531// variable using pthread_key_create. Unlike the pthread keys we already use
1532// on OS X, this dummy key would never be read by Go code. It would exist
1533// only so that we could register at thread-exit-time destructor.
1534// That destructor would put the m back onto the extra list.
1535// This is purely a performance optimization. The current version,
1536// in which dropm happens on each cgo call, is still correct too.
1537// We may have to keep the current version on systems with cgo
1538// but without pthreads, like Windows.
1539//
1540// CgocallBackDone calls this after releasing p, so no write barriers.
1541//go:nowritebarrierrec
1542func dropm() {
1543	// Clear m and g, and return m to the extra list.
1544	// After the call to setg we can only call nosplit functions
1545	// with no pointer manipulation.
1546	mp := getg().m
1547
1548	// Return mp.curg to dead state.
1549	casgstatus(mp.curg, _Gsyscall, _Gdead)
1550	mp.curg.preemptStop = false
1551	atomic.Xadd(&sched.ngsys, +1)
1552
1553	// Block signals before unminit.
1554	// Unminit unregisters the signal handling stack (but needs g on some systems).
1555	// Setg(nil) clears g, which is the signal handler's cue not to run Go handlers.
1556	// It's important not to try to handle a signal between those two steps.
1557	sigmask := mp.sigmask
1558	sigblock()
1559	unminit()
1560
1561	// gccgo sets the stack to Gdead here, because the splitstack
1562	// context is not initialized.
1563	atomic.Store(&mp.curg.atomicstatus, _Gdead)
1564	mp.curg.gcstack = 0
1565	mp.curg.gcnextsp = 0
1566
1567	mnext := lockextra(true)
1568	extraMCount++
1569	mp.schedlink.set(mnext)
1570
1571	setg(nil)
1572
1573	// Commit the release of mp.
1574	unlockextra(mp)
1575
1576	msigrestore(sigmask)
1577}
1578
1579// A helper function for EnsureDropM.
1580func getm() uintptr {
1581	return uintptr(unsafe.Pointer(getg().m))
1582}
1583
1584var extram uintptr
1585var extraMCount uint32 // Protected by lockextra
1586var extraMWaiters uint32
1587
1588// lockextra locks the extra list and returns the list head.
1589// The caller must unlock the list by storing a new list head
1590// to extram. If nilokay is true, then lockextra will
1591// return a nil list head if that's what it finds. If nilokay is false,
1592// lockextra will keep waiting until the list head is no longer nil.
1593//go:nosplit
1594//go:nowritebarrierrec
1595func lockextra(nilokay bool) *m {
1596	const locked = 1
1597
1598	incr := false
1599	for {
1600		old := atomic.Loaduintptr(&extram)
1601		if old == locked {
1602			yield := osyield
1603			yield()
1604			continue
1605		}
1606		if old == 0 && !nilokay {
1607			if !incr {
1608				// Add 1 to the number of threads
1609				// waiting for an M.
1610				// This is cleared by newextram.
1611				atomic.Xadd(&extraMWaiters, 1)
1612				incr = true
1613			}
1614			usleep(1)
1615			continue
1616		}
1617		if atomic.Casuintptr(&extram, old, locked) {
1618			return (*m)(unsafe.Pointer(old))
1619		}
1620		yield := osyield
1621		yield()
1622		continue
1623	}
1624}
1625
1626//go:nosplit
1627//go:nowritebarrierrec
1628func unlockextra(mp *m) {
1629	atomic.Storeuintptr(&extram, uintptr(unsafe.Pointer(mp)))
1630}
1631
1632// execLock serializes exec and clone to avoid bugs or unspecified behaviour
1633// around exec'ing while creating/destroying threads.  See issue #19546.
1634var execLock rwmutex
1635
1636// newmHandoff contains a list of m structures that need new OS threads.
1637// This is used by newm in situations where newm itself can't safely
1638// start an OS thread.
1639var newmHandoff struct {
1640	lock mutex
1641
1642	// newm points to a list of M structures that need new OS
1643	// threads. The list is linked through m.schedlink.
1644	newm muintptr
1645
1646	// waiting indicates that wake needs to be notified when an m
1647	// is put on the list.
1648	waiting bool
1649	wake    note
1650
1651	// haveTemplateThread indicates that the templateThread has
1652	// been started. This is not protected by lock. Use cas to set
1653	// to 1.
1654	haveTemplateThread uint32
1655}
1656
1657// Create a new m. It will start off with a call to fn, or else the scheduler.
1658// fn needs to be static and not a heap allocated closure.
1659// May run with m.p==nil, so write barriers are not allowed.
1660//go:nowritebarrierrec
1661func newm(fn func(), _p_ *p) {
1662	mp, _, _ := allocm(_p_, fn, false)
1663	mp.nextp.set(_p_)
1664	mp.sigmask = initSigmask
1665	if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" {
1666		// We're on a locked M or a thread that may have been
1667		// started by C. The kernel state of this thread may
1668		// be strange (the user may have locked it for that
1669		// purpose). We don't want to clone that into another
1670		// thread. Instead, ask a known-good thread to create
1671		// the thread for us.
1672		//
1673		// This is disabled on Plan 9. See golang.org/issue/22227.
1674		//
1675		// TODO: This may be unnecessary on Windows, which
1676		// doesn't model thread creation off fork.
1677		lock(&newmHandoff.lock)
1678		if newmHandoff.haveTemplateThread == 0 {
1679			throw("on a locked thread with no template thread")
1680		}
1681		mp.schedlink = newmHandoff.newm
1682		newmHandoff.newm.set(mp)
1683		if newmHandoff.waiting {
1684			newmHandoff.waiting = false
1685			notewakeup(&newmHandoff.wake)
1686		}
1687		unlock(&newmHandoff.lock)
1688		return
1689	}
1690	newm1(mp)
1691}
1692
1693func newm1(mp *m) {
1694	execLock.rlock() // Prevent process clone.
1695	newosproc(mp)
1696	execLock.runlock()
1697}
1698
1699// startTemplateThread starts the template thread if it is not already
1700// running.
1701//
1702// The calling thread must itself be in a known-good state.
1703func startTemplateThread() {
1704	if GOARCH == "wasm" { // no threads on wasm yet
1705		return
1706	}
1707
1708	// Disable preemption to guarantee that the template thread will be
1709	// created before a park once haveTemplateThread is set.
1710	mp := acquirem()
1711	if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) {
1712		releasem(mp)
1713		return
1714	}
1715	newm(templateThread, nil)
1716	releasem(mp)
1717}
1718
1719// templateThread is a thread in a known-good state that exists solely
1720// to start new threads in known-good states when the calling thread
1721// may not be in a good state.
1722//
1723// Many programs never need this, so templateThread is started lazily
1724// when we first enter a state that might lead to running on a thread
1725// in an unknown state.
1726//
1727// templateThread runs on an M without a P, so it must not have write
1728// barriers.
1729//
1730//go:nowritebarrierrec
1731func templateThread() {
1732	lock(&sched.lock)
1733	sched.nmsys++
1734	checkdead()
1735	unlock(&sched.lock)
1736
1737	for {
1738		lock(&newmHandoff.lock)
1739		for newmHandoff.newm != 0 {
1740			newm := newmHandoff.newm.ptr()
1741			newmHandoff.newm = 0
1742			unlock(&newmHandoff.lock)
1743			for newm != nil {
1744				next := newm.schedlink.ptr()
1745				newm.schedlink = 0
1746				newm1(newm)
1747				newm = next
1748			}
1749			lock(&newmHandoff.lock)
1750		}
1751		newmHandoff.waiting = true
1752		noteclear(&newmHandoff.wake)
1753		unlock(&newmHandoff.lock)
1754		notesleep(&newmHandoff.wake)
1755	}
1756}
1757
1758// Stops execution of the current m until new work is available.
1759// Returns with acquired P.
1760func stopm() {
1761	_g_ := getg()
1762
1763	if _g_.m.locks != 0 {
1764		throw("stopm holding locks")
1765	}
1766	if _g_.m.p != 0 {
1767		throw("stopm holding p")
1768	}
1769	if _g_.m.spinning {
1770		throw("stopm spinning")
1771	}
1772
1773	lock(&sched.lock)
1774	mput(_g_.m)
1775	unlock(&sched.lock)
1776	notesleep(&_g_.m.park)
1777	noteclear(&_g_.m.park)
1778	acquirep(_g_.m.nextp.ptr())
1779	_g_.m.nextp = 0
1780}
1781
1782func mspinning() {
1783	// startm's caller incremented nmspinning. Set the new M's spinning.
1784	getg().m.spinning = true
1785}
1786
1787// Schedules some M to run the p (creates an M if necessary).
1788// If p==nil, tries to get an idle P, if no idle P's does nothing.
1789// May run with m.p==nil, so write barriers are not allowed.
1790// If spinning is set, the caller has incremented nmspinning and startm will
1791// either decrement nmspinning or set m.spinning in the newly started M.
1792//go:nowritebarrierrec
1793func startm(_p_ *p, spinning bool) {
1794	lock(&sched.lock)
1795	if _p_ == nil {
1796		_p_ = pidleget()
1797		if _p_ == nil {
1798			unlock(&sched.lock)
1799			if spinning {
1800				// The caller incremented nmspinning, but there are no idle Ps,
1801				// so it's okay to just undo the increment and give up.
1802				if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
1803					throw("startm: negative nmspinning")
1804				}
1805			}
1806			return
1807		}
1808	}
1809	mp := mget()
1810	unlock(&sched.lock)
1811	if mp == nil {
1812		var fn func()
1813		if spinning {
1814			// The caller incremented nmspinning, so set m.spinning in the new M.
1815			fn = mspinning
1816		}
1817		newm(fn, _p_)
1818		return
1819	}
1820	if mp.spinning {
1821		throw("startm: m is spinning")
1822	}
1823	if mp.nextp != 0 {
1824		throw("startm: m has p")
1825	}
1826	if spinning && !runqempty(_p_) {
1827		throw("startm: p has runnable gs")
1828	}
1829	// The caller incremented nmspinning, so set m.spinning in the new M.
1830	mp.spinning = spinning
1831	mp.nextp.set(_p_)
1832	notewakeup(&mp.park)
1833}
1834
1835// Hands off P from syscall or locked M.
1836// Always runs without a P, so write barriers are not allowed.
1837//go:nowritebarrierrec
1838func handoffp(_p_ *p) {
1839	// handoffp must start an M in any situation where
1840	// findrunnable would return a G to run on _p_.
1841
1842	// if it has local work, start it straight away
1843	if !runqempty(_p_) || sched.runqsize != 0 {
1844		startm(_p_, false)
1845		return
1846	}
1847	// if it has GC work, start it straight away
1848	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) {
1849		startm(_p_, false)
1850		return
1851	}
1852	// no local work, check that there are no spinning/idle M's,
1853	// otherwise our help is not required
1854	if atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) == 0 && atomic.Cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic
1855		startm(_p_, true)
1856		return
1857	}
1858	lock(&sched.lock)
1859	if sched.gcwaiting != 0 {
1860		_p_.status = _Pgcstop
1861		sched.stopwait--
1862		if sched.stopwait == 0 {
1863			notewakeup(&sched.stopnote)
1864		}
1865		unlock(&sched.lock)
1866		return
1867	}
1868	if _p_.runSafePointFn != 0 && atomic.Cas(&_p_.runSafePointFn, 1, 0) {
1869		sched.safePointFn(_p_)
1870		sched.safePointWait--
1871		if sched.safePointWait == 0 {
1872			notewakeup(&sched.safePointNote)
1873		}
1874	}
1875	if sched.runqsize != 0 {
1876		unlock(&sched.lock)
1877		startm(_p_, false)
1878		return
1879	}
1880	// If this is the last running P and nobody is polling network,
1881	// need to wakeup another M to poll network.
1882	if sched.npidle == uint32(gomaxprocs-1) && atomic.Load64(&sched.lastpoll) != 0 {
1883		unlock(&sched.lock)
1884		startm(_p_, false)
1885		return
1886	}
1887	if when := nobarrierWakeTime(_p_); when != 0 {
1888		wakeNetPoller(when)
1889	}
1890	pidleput(_p_)
1891	unlock(&sched.lock)
1892}
1893
1894// Tries to add one more P to execute G's.
1895// Called when a G is made runnable (newproc, ready).
1896func wakep() {
1897	// be conservative about spinning threads
1898	if !atomic.Cas(&sched.nmspinning, 0, 1) {
1899		return
1900	}
1901	startm(nil, true)
1902}
1903
1904// Stops execution of the current m that is locked to a g until the g is runnable again.
1905// Returns with acquired P.
1906func stoplockedm() {
1907	_g_ := getg()
1908
1909	if _g_.m.lockedg == 0 || _g_.m.lockedg.ptr().lockedm.ptr() != _g_.m {
1910		throw("stoplockedm: inconsistent locking")
1911	}
1912	if _g_.m.p != 0 {
1913		// Schedule another M to run this p.
1914		_p_ := releasep()
1915		handoffp(_p_)
1916	}
1917	incidlelocked(1)
1918	// Wait until another thread schedules lockedg again.
1919	notesleep(&_g_.m.park)
1920	noteclear(&_g_.m.park)
1921	status := readgstatus(_g_.m.lockedg.ptr())
1922	if status&^_Gscan != _Grunnable {
1923		print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n")
1924		dumpgstatus(_g_)
1925		throw("stoplockedm: not runnable")
1926	}
1927	acquirep(_g_.m.nextp.ptr())
1928	_g_.m.nextp = 0
1929}
1930
1931// Schedules the locked m to run the locked gp.
1932// May run during STW, so write barriers are not allowed.
1933//go:nowritebarrierrec
1934func startlockedm(gp *g) {
1935	_g_ := getg()
1936
1937	mp := gp.lockedm.ptr()
1938	if mp == _g_.m {
1939		throw("startlockedm: locked to me")
1940	}
1941	if mp.nextp != 0 {
1942		throw("startlockedm: m has p")
1943	}
1944	// directly handoff current P to the locked m
1945	incidlelocked(-1)
1946	_p_ := releasep()
1947	mp.nextp.set(_p_)
1948	notewakeup(&mp.park)
1949	stopm()
1950}
1951
1952// Stops the current m for stopTheWorld.
1953// Returns when the world is restarted.
1954func gcstopm() {
1955	_g_ := getg()
1956
1957	if sched.gcwaiting == 0 {
1958		throw("gcstopm: not waiting for gc")
1959	}
1960	if _g_.m.spinning {
1961		_g_.m.spinning = false
1962		// OK to just drop nmspinning here,
1963		// startTheWorld will unpark threads as necessary.
1964		if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
1965			throw("gcstopm: negative nmspinning")
1966		}
1967	}
1968	_p_ := releasep()
1969	lock(&sched.lock)
1970	_p_.status = _Pgcstop
1971	sched.stopwait--
1972	if sched.stopwait == 0 {
1973		notewakeup(&sched.stopnote)
1974	}
1975	unlock(&sched.lock)
1976	stopm()
1977}
1978
1979// Schedules gp to run on the current M.
1980// If inheritTime is true, gp inherits the remaining time in the
1981// current time slice. Otherwise, it starts a new time slice.
1982// Never returns.
1983//
1984// Write barriers are allowed because this is called immediately after
1985// acquiring a P in several places.
1986//
1987//go:yeswritebarrierrec
1988func execute(gp *g, inheritTime bool) {
1989	_g_ := getg()
1990
1991	// Assign gp.m before entering _Grunning so running Gs have an
1992	// M.
1993	_g_.m.curg = gp
1994	gp.m = _g_.m
1995	casgstatus(gp, _Grunnable, _Grunning)
1996	gp.waitsince = 0
1997	gp.preempt = false
1998	if !inheritTime {
1999		_g_.m.p.ptr().schedtick++
2000	}
2001
2002	// Check whether the profiler needs to be turned on or off.
2003	hz := sched.profilehz
2004	if _g_.m.profilehz != hz {
2005		setThreadCPUProfiler(hz)
2006	}
2007
2008	if trace.enabled {
2009		// GoSysExit has to happen when we have a P, but before GoStart.
2010		// So we emit it here.
2011		if gp.syscallsp != 0 && gp.sysblocktraced {
2012			traceGoSysExit(gp.sysexitticks)
2013		}
2014		traceGoStart()
2015	}
2016
2017	gogo(gp)
2018}
2019
2020// Finds a runnable goroutine to execute.
2021// Tries to steal from other P's, get g from local or global queue, poll network.
2022func findrunnable() (gp *g, inheritTime bool) {
2023	_g_ := getg()
2024
2025	// The conditions here and in handoffp must agree: if
2026	// findrunnable would return a G to run, handoffp must start
2027	// an M.
2028
2029top:
2030	_p_ := _g_.m.p.ptr()
2031	if sched.gcwaiting != 0 {
2032		gcstopm()
2033		goto top
2034	}
2035	if _p_.runSafePointFn != 0 {
2036		runSafePointFn()
2037	}
2038
2039	now, pollUntil, _ := checkTimers(_p_, 0)
2040
2041	if fingwait && fingwake {
2042		if gp := wakefing(); gp != nil {
2043			ready(gp, 0, true)
2044		}
2045	}
2046	if *cgo_yield != nil {
2047		asmcgocall(*cgo_yield, nil)
2048	}
2049
2050	// local runq
2051	if gp, inheritTime := runqget(_p_); gp != nil {
2052		return gp, inheritTime
2053	}
2054
2055	// global runq
2056	if sched.runqsize != 0 {
2057		lock(&sched.lock)
2058		gp := globrunqget(_p_, 0)
2059		unlock(&sched.lock)
2060		if gp != nil {
2061			return gp, false
2062		}
2063	}
2064
2065	// Poll network.
2066	// This netpoll is only an optimization before we resort to stealing.
2067	// We can safely skip it if there are no waiters or a thread is blocked
2068	// in netpoll already. If there is any kind of logical race with that
2069	// blocked thread (e.g. it has already returned from netpoll, but does
2070	// not set lastpoll yet), this thread will do blocking netpoll below
2071	// anyway.
2072	if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 {
2073		if list := netpoll(0); !list.empty() { // non-blocking
2074			gp := list.pop()
2075			injectglist(&list)
2076			casgstatus(gp, _Gwaiting, _Grunnable)
2077			if trace.enabled {
2078				traceGoUnpark(gp, 0)
2079			}
2080			return gp, false
2081		}
2082	}
2083
2084	// Steal work from other P's.
2085	procs := uint32(gomaxprocs)
2086	ranTimer := false
2087	// If number of spinning M's >= number of busy P's, block.
2088	// This is necessary to prevent excessive CPU consumption
2089	// when GOMAXPROCS>>1 but the program parallelism is low.
2090	if !_g_.m.spinning && 2*atomic.Load(&sched.nmspinning) >= procs-atomic.Load(&sched.npidle) {
2091		goto stop
2092	}
2093	if !_g_.m.spinning {
2094		_g_.m.spinning = true
2095		atomic.Xadd(&sched.nmspinning, 1)
2096	}
2097	for i := 0; i < 4; i++ {
2098		for enum := stealOrder.start(fastrand()); !enum.done(); enum.next() {
2099			if sched.gcwaiting != 0 {
2100				goto top
2101			}
2102			stealRunNextG := i > 2 // first look for ready queues with more than 1 g
2103			p2 := allp[enum.position()]
2104			if _p_ == p2 {
2105				continue
2106			}
2107			if gp := runqsteal(_p_, p2, stealRunNextG); gp != nil {
2108				return gp, false
2109			}
2110
2111			// Consider stealing timers from p2.
2112			// This call to checkTimers is the only place where
2113			// we hold a lock on a different P's timers.
2114			// Lock contention can be a problem here, so avoid
2115			// grabbing the lock if p2 is running and not marked
2116			// for preemption. If p2 is running and not being
2117			// preempted we assume it will handle its own timers.
2118			if i > 2 && shouldStealTimers(p2) {
2119				tnow, w, ran := checkTimers(p2, now)
2120				now = tnow
2121				if w != 0 && (pollUntil == 0 || w < pollUntil) {
2122					pollUntil = w
2123				}
2124				if ran {
2125					// Running the timers may have
2126					// made an arbitrary number of G's
2127					// ready and added them to this P's
2128					// local run queue. That invalidates
2129					// the assumption of runqsteal
2130					// that is always has room to add
2131					// stolen G's. So check now if there
2132					// is a local G to run.
2133					if gp, inheritTime := runqget(_p_); gp != nil {
2134						return gp, inheritTime
2135					}
2136					ranTimer = true
2137				}
2138			}
2139		}
2140	}
2141	if ranTimer {
2142		// Running a timer may have made some goroutine ready.
2143		goto top
2144	}
2145
2146stop:
2147
2148	// We have nothing to do. If we're in the GC mark phase, can
2149	// safely scan and blacken objects, and have work to do, run
2150	// idle-time marking rather than give up the P.
2151	if gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != 0 && gcMarkWorkAvailable(_p_) {
2152		_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
2153		gp := _p_.gcBgMarkWorker.ptr()
2154		casgstatus(gp, _Gwaiting, _Grunnable)
2155		if trace.enabled {
2156			traceGoUnpark(gp, 0)
2157		}
2158		return gp, false
2159	}
2160
2161	delta := int64(-1)
2162	if pollUntil != 0 {
2163		// checkTimers ensures that polluntil > now.
2164		delta = pollUntil - now
2165	}
2166
2167	// wasm only:
2168	// If a callback returned and no other goroutine is awake,
2169	// then pause execution until a callback was triggered.
2170	if beforeIdle(delta) {
2171		// At least one goroutine got woken.
2172		goto top
2173	}
2174
2175	// Before we drop our P, make a snapshot of the allp slice,
2176	// which can change underfoot once we no longer block
2177	// safe-points. We don't need to snapshot the contents because
2178	// everything up to cap(allp) is immutable.
2179	allpSnapshot := allp
2180
2181	// return P and block
2182	lock(&sched.lock)
2183	if sched.gcwaiting != 0 || _p_.runSafePointFn != 0 {
2184		unlock(&sched.lock)
2185		goto top
2186	}
2187	if sched.runqsize != 0 {
2188		gp := globrunqget(_p_, 0)
2189		unlock(&sched.lock)
2190		return gp, false
2191	}
2192	if releasep() != _p_ {
2193		throw("findrunnable: wrong p")
2194	}
2195	pidleput(_p_)
2196	unlock(&sched.lock)
2197
2198	// Delicate dance: thread transitions from spinning to non-spinning state,
2199	// potentially concurrently with submission of new goroutines. We must
2200	// drop nmspinning first and then check all per-P queues again (with
2201	// #StoreLoad memory barrier in between). If we do it the other way around,
2202	// another thread can submit a goroutine after we've checked all run queues
2203	// but before we drop nmspinning; as the result nobody will unpark a thread
2204	// to run the goroutine.
2205	// If we discover new work below, we need to restore m.spinning as a signal
2206	// for resetspinning to unpark a new worker thread (because there can be more
2207	// than one starving goroutine). However, if after discovering new work
2208	// we also observe no idle Ps, it is OK to just park the current thread:
2209	// the system is fully loaded so no spinning threads are required.
2210	// Also see "Worker thread parking/unparking" comment at the top of the file.
2211	wasSpinning := _g_.m.spinning
2212	if _g_.m.spinning {
2213		_g_.m.spinning = false
2214		if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
2215			throw("findrunnable: negative nmspinning")
2216		}
2217	}
2218
2219	// check all runqueues once again
2220	for _, _p_ := range allpSnapshot {
2221		if !runqempty(_p_) {
2222			lock(&sched.lock)
2223			_p_ = pidleget()
2224			unlock(&sched.lock)
2225			if _p_ != nil {
2226				acquirep(_p_)
2227				if wasSpinning {
2228					_g_.m.spinning = true
2229					atomic.Xadd(&sched.nmspinning, 1)
2230				}
2231				goto top
2232			}
2233			break
2234		}
2235	}
2236
2237	// Check for idle-priority GC work again.
2238	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(nil) {
2239		lock(&sched.lock)
2240		_p_ = pidleget()
2241		if _p_ != nil && _p_.gcBgMarkWorker == 0 {
2242			pidleput(_p_)
2243			_p_ = nil
2244		}
2245		unlock(&sched.lock)
2246		if _p_ != nil {
2247			acquirep(_p_)
2248			if wasSpinning {
2249				_g_.m.spinning = true
2250				atomic.Xadd(&sched.nmspinning, 1)
2251			}
2252			// Go back to idle GC check.
2253			goto stop
2254		}
2255	}
2256
2257	// poll network
2258	if netpollinited() && (atomic.Load(&netpollWaiters) > 0 || pollUntil != 0) && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
2259		atomic.Store64(&sched.pollUntil, uint64(pollUntil))
2260		if _g_.m.p != 0 {
2261			throw("findrunnable: netpoll with p")
2262		}
2263		if _g_.m.spinning {
2264			throw("findrunnable: netpoll with spinning")
2265		}
2266		if faketime != 0 {
2267			// When using fake time, just poll.
2268			delta = 0
2269		}
2270		list := netpoll(delta) // block until new work is available
2271		atomic.Store64(&sched.pollUntil, 0)
2272		atomic.Store64(&sched.lastpoll, uint64(nanotime()))
2273		if faketime != 0 && list.empty() {
2274			// Using fake time and nothing is ready; stop M.
2275			// When all M's stop, checkdead will call timejump.
2276			stopm()
2277			goto top
2278		}
2279		lock(&sched.lock)
2280		_p_ = pidleget()
2281		unlock(&sched.lock)
2282		if _p_ == nil {
2283			injectglist(&list)
2284		} else {
2285			acquirep(_p_)
2286			if !list.empty() {
2287				gp := list.pop()
2288				injectglist(&list)
2289				casgstatus(gp, _Gwaiting, _Grunnable)
2290				if trace.enabled {
2291					traceGoUnpark(gp, 0)
2292				}
2293				return gp, false
2294			}
2295			if wasSpinning {
2296				_g_.m.spinning = true
2297				atomic.Xadd(&sched.nmspinning, 1)
2298			}
2299			goto top
2300		}
2301	} else if pollUntil != 0 && netpollinited() {
2302		pollerPollUntil := int64(atomic.Load64(&sched.pollUntil))
2303		if pollerPollUntil == 0 || pollerPollUntil > pollUntil {
2304			netpollBreak()
2305		}
2306	}
2307	stopm()
2308	goto top
2309}
2310
2311// pollWork reports whether there is non-background work this P could
2312// be doing. This is a fairly lightweight check to be used for
2313// background work loops, like idle GC. It checks a subset of the
2314// conditions checked by the actual scheduler.
2315func pollWork() bool {
2316	if sched.runqsize != 0 {
2317		return true
2318	}
2319	p := getg().m.p.ptr()
2320	if !runqempty(p) {
2321		return true
2322	}
2323	if netpollinited() && atomic.Load(&netpollWaiters) > 0 && sched.lastpoll != 0 {
2324		if list := netpoll(0); !list.empty() {
2325			injectglist(&list)
2326			return true
2327		}
2328	}
2329	return false
2330}
2331
2332// wakeNetPoller wakes up the thread sleeping in the network poller,
2333// if there is one, and if it isn't going to wake up anyhow before
2334// the when argument.
2335func wakeNetPoller(when int64) {
2336	if atomic.Load64(&sched.lastpoll) == 0 {
2337		// In findrunnable we ensure that when polling the pollUntil
2338		// field is either zero or the time to which the current
2339		// poll is expected to run. This can have a spurious wakeup
2340		// but should never miss a wakeup.
2341		pollerPollUntil := int64(atomic.Load64(&sched.pollUntil))
2342		if pollerPollUntil == 0 || pollerPollUntil > when {
2343			netpollBreak()
2344		}
2345	}
2346}
2347
2348func resetspinning() {
2349	_g_ := getg()
2350	if !_g_.m.spinning {
2351		throw("resetspinning: not a spinning m")
2352	}
2353	_g_.m.spinning = false
2354	nmspinning := atomic.Xadd(&sched.nmspinning, -1)
2355	if int32(nmspinning) < 0 {
2356		throw("findrunnable: negative nmspinning")
2357	}
2358	// M wakeup policy is deliberately somewhat conservative, so check if we
2359	// need to wakeup another P here. See "Worker thread parking/unparking"
2360	// comment at the top of the file for details.
2361	if nmspinning == 0 && atomic.Load(&sched.npidle) > 0 {
2362		wakep()
2363	}
2364}
2365
2366// Injects the list of runnable G's into the scheduler and clears glist.
2367// Can run concurrently with GC.
2368func injectglist(glist *gList) {
2369	if glist.empty() {
2370		return
2371	}
2372	if trace.enabled {
2373		for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() {
2374			traceGoUnpark(gp, 0)
2375		}
2376	}
2377	lock(&sched.lock)
2378	var n int
2379	for n = 0; !glist.empty(); n++ {
2380		gp := glist.pop()
2381		casgstatus(gp, _Gwaiting, _Grunnable)
2382		globrunqput(gp)
2383	}
2384	unlock(&sched.lock)
2385	for ; n != 0 && sched.npidle != 0; n-- {
2386		startm(nil, false)
2387	}
2388	*glist = gList{}
2389}
2390
2391// One round of scheduler: find a runnable goroutine and execute it.
2392// Never returns.
2393func schedule() {
2394	_g_ := getg()
2395
2396	if _g_.m.locks != 0 {
2397		throw("schedule: holding locks")
2398	}
2399
2400	if _g_.m.lockedg != 0 {
2401		stoplockedm()
2402		execute(_g_.m.lockedg.ptr(), false) // Never returns.
2403	}
2404
2405	// We should not schedule away from a g that is executing a cgo call,
2406	// since the cgo call is using the m's g0 stack.
2407	if _g_.m.incgo {
2408		throw("schedule: in cgo")
2409	}
2410
2411top:
2412	pp := _g_.m.p.ptr()
2413	pp.preempt = false
2414
2415	if sched.gcwaiting != 0 {
2416		gcstopm()
2417		goto top
2418	}
2419	if pp.runSafePointFn != 0 {
2420		runSafePointFn()
2421	}
2422
2423	// Sanity check: if we are spinning, the run queue should be empty.
2424	// Check this before calling checkTimers, as that might call
2425	// goready to put a ready goroutine on the local run queue.
2426	if _g_.m.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) {
2427		throw("schedule: spinning with local work")
2428	}
2429
2430	checkTimers(pp, 0)
2431
2432	var gp *g
2433	var inheritTime bool
2434
2435	// Normal goroutines will check for need to wakeP in ready,
2436	// but GCworkers and tracereaders will not, so the check must
2437	// be done here instead.
2438	tryWakeP := false
2439	if trace.enabled || trace.shutdown {
2440		gp = traceReader()
2441		if gp != nil {
2442			casgstatus(gp, _Gwaiting, _Grunnable)
2443			traceGoUnpark(gp, 0)
2444			tryWakeP = true
2445		}
2446	}
2447	if gp == nil && gcBlackenEnabled != 0 {
2448		gp = gcController.findRunnableGCWorker(_g_.m.p.ptr())
2449		tryWakeP = tryWakeP || gp != nil
2450	}
2451	if gp == nil {
2452		// Check the global runnable queue once in a while to ensure fairness.
2453		// Otherwise two goroutines can completely occupy the local runqueue
2454		// by constantly respawning each other.
2455		if _g_.m.p.ptr().schedtick%61 == 0 && sched.runqsize > 0 {
2456			lock(&sched.lock)
2457			gp = globrunqget(_g_.m.p.ptr(), 1)
2458			unlock(&sched.lock)
2459		}
2460	}
2461	if gp == nil {
2462		gp, inheritTime = runqget(_g_.m.p.ptr())
2463		// We can see gp != nil here even if the M is spinning,
2464		// if checkTimers added a local goroutine via goready.
2465
2466		// Because gccgo does not implement preemption as a stack check,
2467		// we need to check for preemption here for fairness.
2468		// Otherwise goroutines on the local queue may starve
2469		// goroutines on the global queue.
2470		// Since we preempt by storing the goroutine on the global
2471		// queue, this is the only place we need to check preempt.
2472		// This does not call checkPreempt because gp is not running.
2473		if gp != nil && gp.preempt {
2474			gp.preempt = false
2475			lock(&sched.lock)
2476			globrunqput(gp)
2477			unlock(&sched.lock)
2478			goto top
2479		}
2480	}
2481	if gp == nil {
2482		gp, inheritTime = findrunnable() // blocks until work is available
2483	}
2484
2485	// This thread is going to run a goroutine and is not spinning anymore,
2486	// so if it was marked as spinning we need to reset it now and potentially
2487	// start a new spinning M.
2488	if _g_.m.spinning {
2489		resetspinning()
2490	}
2491
2492	if sched.disable.user && !schedEnabled(gp) {
2493		// Scheduling of this goroutine is disabled. Put it on
2494		// the list of pending runnable goroutines for when we
2495		// re-enable user scheduling and look again.
2496		lock(&sched.lock)
2497		if schedEnabled(gp) {
2498			// Something re-enabled scheduling while we
2499			// were acquiring the lock.
2500			unlock(&sched.lock)
2501		} else {
2502			sched.disable.runnable.pushBack(gp)
2503			sched.disable.n++
2504			unlock(&sched.lock)
2505			goto top
2506		}
2507	}
2508
2509	// If about to schedule a not-normal goroutine (a GCworker or tracereader),
2510	// wake a P if there is one.
2511	if tryWakeP {
2512		if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
2513			wakep()
2514		}
2515	}
2516	if gp.lockedm != 0 {
2517		// Hands off own p to the locked m,
2518		// then blocks waiting for a new p.
2519		startlockedm(gp)
2520		goto top
2521	}
2522
2523	execute(gp, inheritTime)
2524}
2525
2526// dropg removes the association between m and the current goroutine m->curg (gp for short).
2527// Typically a caller sets gp's status away from Grunning and then
2528// immediately calls dropg to finish the job. The caller is also responsible
2529// for arranging that gp will be restarted using ready at an
2530// appropriate time. After calling dropg and arranging for gp to be
2531// readied later, the caller can do other work but eventually should
2532// call schedule to restart the scheduling of goroutines on this m.
2533func dropg() {
2534	_g_ := getg()
2535
2536	setMNoWB(&_g_.m.curg.m, nil)
2537	setGNoWB(&_g_.m.curg, nil)
2538}
2539
2540// checkTimers runs any timers for the P that are ready.
2541// If now is not 0 it is the current time.
2542// It returns the current time or 0 if it is not known,
2543// and the time when the next timer should run or 0 if there is no next timer,
2544// and reports whether it ran any timers.
2545// If the time when the next timer should run is not 0,
2546// it is always larger than the returned time.
2547// We pass now in and out to avoid extra calls of nanotime.
2548//go:yeswritebarrierrec
2549func checkTimers(pp *p, now int64) (rnow, pollUntil int64, ran bool) {
2550	// If there are no timers to adjust, and the first timer on
2551	// the heap is not yet ready to run, then there is nothing to do.
2552	if atomic.Load(&pp.adjustTimers) == 0 {
2553		next := int64(atomic.Load64(&pp.timer0When))
2554		if next == 0 {
2555			return now, 0, false
2556		}
2557		if now == 0 {
2558			now = nanotime()
2559		}
2560		if now < next {
2561			// Next timer is not ready to run.
2562			// But keep going if we would clear deleted timers.
2563			// This corresponds to the condition below where
2564			// we decide whether to call clearDeletedTimers.
2565			if pp != getg().m.p.ptr() || int(atomic.Load(&pp.deletedTimers)) <= int(atomic.Load(&pp.numTimers)/4) {
2566				return now, next, false
2567			}
2568		}
2569	}
2570
2571	lock(&pp.timersLock)
2572
2573	adjusttimers(pp)
2574
2575	rnow = now
2576	if len(pp.timers) > 0 {
2577		if rnow == 0 {
2578			rnow = nanotime()
2579		}
2580		for len(pp.timers) > 0 {
2581			// Note that runtimer may temporarily unlock
2582			// pp.timersLock.
2583			if tw := runtimer(pp, rnow); tw != 0 {
2584				if tw > 0 {
2585					pollUntil = tw
2586				}
2587				break
2588			}
2589			ran = true
2590		}
2591	}
2592
2593	// If this is the local P, and there are a lot of deleted timers,
2594	// clear them out. We only do this for the local P to reduce
2595	// lock contention on timersLock.
2596	if pp == getg().m.p.ptr() && int(atomic.Load(&pp.deletedTimers)) > len(pp.timers)/4 {
2597		clearDeletedTimers(pp)
2598	}
2599
2600	unlock(&pp.timersLock)
2601
2602	return rnow, pollUntil, ran
2603}
2604
2605// shouldStealTimers reports whether we should try stealing the timers from p2.
2606// We don't steal timers from a running P that is not marked for preemption,
2607// on the assumption that it will run its own timers. This reduces
2608// contention on the timers lock.
2609func shouldStealTimers(p2 *p) bool {
2610	if p2.status != _Prunning {
2611		return true
2612	}
2613	mp := p2.m.ptr()
2614	if mp == nil || mp.locks > 0 {
2615		return false
2616	}
2617	gp := mp.curg
2618	if gp == nil || gp.atomicstatus != _Grunning || !gp.preempt {
2619		return false
2620	}
2621	return true
2622}
2623
2624func parkunlock_c(gp *g, lock unsafe.Pointer) bool {
2625	unlock((*mutex)(lock))
2626	return true
2627}
2628
2629// park continuation on g0.
2630func park_m(gp *g) {
2631	_g_ := getg()
2632
2633	if trace.enabled {
2634		traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip)
2635	}
2636
2637	casgstatus(gp, _Grunning, _Gwaiting)
2638	dropg()
2639
2640	if fn := _g_.m.waitunlockf; fn != nil {
2641		ok := fn(gp, _g_.m.waitlock)
2642		_g_.m.waitunlockf = nil
2643		_g_.m.waitlock = nil
2644		if !ok {
2645			if trace.enabled {
2646				traceGoUnpark(gp, 2)
2647			}
2648			casgstatus(gp, _Gwaiting, _Grunnable)
2649			execute(gp, true) // Schedule it back, never returns.
2650		}
2651	}
2652	schedule()
2653}
2654
2655func goschedImpl(gp *g) {
2656	status := readgstatus(gp)
2657	if status&^_Gscan != _Grunning {
2658		dumpgstatus(gp)
2659		throw("bad g status")
2660	}
2661	casgstatus(gp, _Grunning, _Grunnable)
2662	dropg()
2663	lock(&sched.lock)
2664	globrunqput(gp)
2665	unlock(&sched.lock)
2666
2667	schedule()
2668}
2669
2670// Gosched continuation on g0.
2671func gosched_m(gp *g) {
2672	if trace.enabled {
2673		traceGoSched()
2674	}
2675	goschedImpl(gp)
2676}
2677
2678// goschedguarded is a forbidden-states-avoided version of gosched_m
2679func goschedguarded_m(gp *g) {
2680
2681	if !canPreemptM(gp.m) {
2682		gogo(gp) // never return
2683	}
2684
2685	if trace.enabled {
2686		traceGoSched()
2687	}
2688	goschedImpl(gp)
2689}
2690
2691func gopreempt_m(gp *g) {
2692	if trace.enabled {
2693		traceGoPreempt()
2694	}
2695	goschedImpl(gp)
2696}
2697
2698// preemptPark parks gp and puts it in _Gpreempted.
2699//
2700//go:systemstack
2701func preemptPark(gp *g) {
2702	if trace.enabled {
2703		traceGoPark(traceEvGoBlock, 0)
2704	}
2705	status := readgstatus(gp)
2706	if status&^_Gscan != _Grunning {
2707		dumpgstatus(gp)
2708		throw("bad g status")
2709	}
2710	gp.waitreason = waitReasonPreempted
2711	// Transition from _Grunning to _Gscan|_Gpreempted. We can't
2712	// be in _Grunning when we dropg because then we'd be running
2713	// without an M, but the moment we're in _Gpreempted,
2714	// something could claim this G before we've fully cleaned it
2715	// up. Hence, we set the scan bit to lock down further
2716	// transitions until we can dropg.
2717	casGToPreemptScan(gp, _Grunning, _Gscan|_Gpreempted)
2718	dropg()
2719	casfrom_Gscanstatus(gp, _Gscan|_Gpreempted, _Gpreempted)
2720	schedule()
2721}
2722
2723// goyield is like Gosched, but it:
2724// - emits a GoPreempt trace event instead of a GoSched trace event
2725// - puts the current G on the runq of the current P instead of the globrunq
2726func goyield() {
2727	checkTimeouts()
2728	mcall(goyield_m)
2729}
2730
2731func goyield_m(gp *g) {
2732	if trace.enabled {
2733		traceGoPreempt()
2734	}
2735	pp := gp.m.p.ptr()
2736	casgstatus(gp, _Grunning, _Grunnable)
2737	dropg()
2738	runqput(pp, gp, false)
2739	schedule()
2740}
2741
2742// Finishes execution of the current goroutine.
2743func goexit1() {
2744	if trace.enabled {
2745		traceGoEnd()
2746	}
2747	mcall(goexit0)
2748}
2749
2750// goexit continuation on g0.
2751func goexit0(gp *g) {
2752	_g_ := getg()
2753
2754	casgstatus(gp, _Grunning, _Gdead)
2755	if isSystemGoroutine(gp, false) {
2756		atomic.Xadd(&sched.ngsys, -1)
2757		gp.isSystemGoroutine = false
2758	}
2759	gp.m = nil
2760	locked := gp.lockedm != 0
2761	gp.lockedm = 0
2762	_g_.m.lockedg = 0
2763	gp.entry = nil
2764	gp.preemptStop = false
2765	gp.paniconfault = false
2766	gp._defer = nil // should be true already but just in case.
2767	gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
2768	gp.writebuf = nil
2769	gp.waitreason = 0
2770	gp.param = nil
2771	gp.labels = nil
2772	gp.timer = nil
2773
2774	if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 {
2775		// Flush assist credit to the global pool. This gives
2776		// better information to pacing if the application is
2777		// rapidly creating an exiting goroutines.
2778		scanCredit := int64(gcController.assistWorkPerByte * float64(gp.gcAssistBytes))
2779		atomic.Xaddint64(&gcController.bgScanCredit, scanCredit)
2780		gp.gcAssistBytes = 0
2781	}
2782
2783	dropg()
2784
2785	if GOARCH == "wasm" { // no threads yet on wasm
2786		gfput(_g_.m.p.ptr(), gp)
2787		schedule() // never returns
2788	}
2789
2790	if _g_.m.lockedInt != 0 {
2791		print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
2792		throw("internal lockOSThread error")
2793	}
2794	gfput(_g_.m.p.ptr(), gp)
2795	if locked {
2796		// The goroutine may have locked this thread because
2797		// it put it in an unusual kernel state. Kill it
2798		// rather than returning it to the thread pool.
2799
2800		// Return to mstart, which will release the P and exit
2801		// the thread.
2802		if GOOS != "plan9" { // See golang.org/issue/22227.
2803			_g_.m.exiting = true
2804			gogo(_g_.m.g0)
2805		} else {
2806			// Clear lockedExt on plan9 since we may end up re-using
2807			// this thread.
2808			_g_.m.lockedExt = 0
2809		}
2810	}
2811	schedule()
2812}
2813
2814// The goroutine g is about to enter a system call.
2815// Record that it's not using the cpu anymore.
2816// This is called only from the go syscall library and cgocall,
2817// not from the low-level system calls used by the runtime.
2818//
2819// The entersyscall function is written in C, so that it can save the
2820// current register context so that the GC will see them.
2821// It calls reentersyscall.
2822//
2823// Syscall tracing:
2824// At the start of a syscall we emit traceGoSysCall to capture the stack trace.
2825// If the syscall does not block, that is it, we do not emit any other events.
2826// If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock;
2827// when syscall returns we emit traceGoSysExit and when the goroutine starts running
2828// (potentially instantly, if exitsyscallfast returns true) we emit traceGoStart.
2829// To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock,
2830// we remember current value of syscalltick in m (_g_.m.syscalltick = _g_.m.p.ptr().syscalltick),
2831// whoever emits traceGoSysBlock increments p.syscalltick afterwards;
2832// and we wait for the increment before emitting traceGoSysExit.
2833// Note that the increment is done even if tracing is not enabled,
2834// because tracing can be enabled in the middle of syscall. We don't want the wait to hang.
2835//
2836//go:nosplit
2837//go:noinline
2838func reentersyscall(pc, sp uintptr) {
2839	_g_ := getg()
2840
2841	// Disable preemption because during this function g is in Gsyscall status,
2842	// but can have inconsistent g->sched, do not let GC observe it.
2843	_g_.m.locks++
2844
2845	_g_.syscallsp = sp
2846	_g_.syscallpc = pc
2847	casgstatus(_g_, _Grunning, _Gsyscall)
2848
2849	if trace.enabled {
2850		systemstack(traceGoSysCall)
2851	}
2852
2853	if atomic.Load(&sched.sysmonwait) != 0 {
2854		systemstack(entersyscall_sysmon)
2855	}
2856
2857	if _g_.m.p.ptr().runSafePointFn != 0 {
2858		// runSafePointFn may stack split if run on this stack
2859		systemstack(runSafePointFn)
2860	}
2861
2862	_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
2863	_g_.sysblocktraced = true
2864	_g_.m.mcache = nil
2865	pp := _g_.m.p.ptr()
2866	pp.m = 0
2867	_g_.m.oldp.set(pp)
2868	_g_.m.p = 0
2869	atomic.Store(&pp.status, _Psyscall)
2870	if sched.gcwaiting != 0 {
2871		systemstack(entersyscall_gcwait)
2872	}
2873
2874	_g_.m.locks--
2875}
2876
2877func entersyscall_sysmon() {
2878	lock(&sched.lock)
2879	if atomic.Load(&sched.sysmonwait) != 0 {
2880		atomic.Store(&sched.sysmonwait, 0)
2881		notewakeup(&sched.sysmonnote)
2882	}
2883	unlock(&sched.lock)
2884}
2885
2886func entersyscall_gcwait() {
2887	_g_ := getg()
2888	_p_ := _g_.m.oldp.ptr()
2889
2890	lock(&sched.lock)
2891	if sched.stopwait > 0 && atomic.Cas(&_p_.status, _Psyscall, _Pgcstop) {
2892		if trace.enabled {
2893			traceGoSysBlock(_p_)
2894			traceProcStop(_p_)
2895		}
2896		_p_.syscalltick++
2897		if sched.stopwait--; sched.stopwait == 0 {
2898			notewakeup(&sched.stopnote)
2899		}
2900	}
2901	unlock(&sched.lock)
2902}
2903
2904func reentersyscallblock(pc, sp uintptr) {
2905	_g_ := getg()
2906
2907	_g_.m.locks++ // see comment in entersyscall
2908	_g_.throwsplit = true
2909	_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
2910	_g_.sysblocktraced = true
2911	_g_.m.p.ptr().syscalltick++
2912
2913	// Leave SP around for GC and traceback.
2914	_g_.syscallsp = sp
2915	_g_.syscallpc = pc
2916	casgstatus(_g_, _Grunning, _Gsyscall)
2917	systemstack(entersyscallblock_handoff)
2918
2919	_g_.m.locks--
2920}
2921
2922func entersyscallblock_handoff() {
2923	if trace.enabled {
2924		traceGoSysCall()
2925		traceGoSysBlock(getg().m.p.ptr())
2926	}
2927	handoffp(releasep())
2928}
2929
2930// The goroutine g exited its system call.
2931// Arrange for it to run on a cpu again.
2932// This is called only from the go syscall library, not
2933// from the low-level system calls used by the runtime.
2934//
2935// Write barriers are not allowed because our P may have been stolen.
2936//
2937//go:nosplit
2938//go:nowritebarrierrec
2939func exitsyscall() {
2940	_g_ := getg()
2941
2942	_g_.m.locks++ // see comment in entersyscall
2943
2944	_g_.waitsince = 0
2945	oldp := _g_.m.oldp.ptr()
2946	_g_.m.oldp = 0
2947	if exitsyscallfast(oldp) {
2948		if _g_.m.mcache == nil {
2949			throw("lost mcache")
2950		}
2951		if trace.enabled {
2952			if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
2953				systemstack(traceGoStart)
2954			}
2955		}
2956		// There's a cpu for us, so we can run.
2957		_g_.m.p.ptr().syscalltick++
2958		// We need to cas the status and scan before resuming...
2959		casgstatus(_g_, _Gsyscall, _Grunning)
2960
2961		exitsyscallclear(_g_)
2962		_g_.m.locks--
2963		_g_.throwsplit = false
2964
2965		// Check preemption, since unlike gc we don't check on
2966		// every call.
2967		if getg().preempt {
2968			checkPreempt()
2969		}
2970		_g_.throwsplit = false
2971
2972		if sched.disable.user && !schedEnabled(_g_) {
2973			// Scheduling of this goroutine is disabled.
2974			Gosched()
2975		}
2976
2977		return
2978	}
2979
2980	_g_.sysexitticks = 0
2981	if trace.enabled {
2982		// Wait till traceGoSysBlock event is emitted.
2983		// This ensures consistency of the trace (the goroutine is started after it is blocked).
2984		for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {
2985			osyield()
2986		}
2987		// We can't trace syscall exit right now because we don't have a P.
2988		// Tracing code can invoke write barriers that cannot run without a P.
2989		// So instead we remember the syscall exit time and emit the event
2990		// in execute when we have a P.
2991		_g_.sysexitticks = cputicks()
2992	}
2993
2994	_g_.m.locks--
2995
2996	// Call the scheduler.
2997	mcall(exitsyscall0)
2998
2999	if _g_.m.mcache == nil {
3000		throw("lost mcache")
3001	}
3002
3003	// Scheduler returned, so we're allowed to run now.
3004	// Delete the syscallsp information that we left for
3005	// the garbage collector during the system call.
3006	// Must wait until now because until gosched returns
3007	// we don't know for sure that the garbage collector
3008	// is not running.
3009	exitsyscallclear(_g_)
3010
3011	_g_.m.p.ptr().syscalltick++
3012	_g_.throwsplit = false
3013}
3014
3015//go:nosplit
3016func exitsyscallfast(oldp *p) bool {
3017	_g_ := getg()
3018
3019	// Freezetheworld sets stopwait but does not retake P's.
3020	if sched.stopwait == freezeStopWait {
3021		return false
3022	}
3023
3024	// Try to re-acquire the last P.
3025	if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) {
3026		// There's a cpu for us, so we can run.
3027		wirep(oldp)
3028		exitsyscallfast_reacquired()
3029		return true
3030	}
3031
3032	// Try to get any other idle P.
3033	if sched.pidle != 0 {
3034		var ok bool
3035		systemstack(func() {
3036			ok = exitsyscallfast_pidle()
3037			if ok && trace.enabled {
3038				if oldp != nil {
3039					// Wait till traceGoSysBlock event is emitted.
3040					// This ensures consistency of the trace (the goroutine is started after it is blocked).
3041					for oldp.syscalltick == _g_.m.syscalltick {
3042						osyield()
3043					}
3044				}
3045				traceGoSysExit(0)
3046			}
3047		})
3048		if ok {
3049			return true
3050		}
3051	}
3052	return false
3053}
3054
3055// exitsyscallfast_reacquired is the exitsyscall path on which this G
3056// has successfully reacquired the P it was running on before the
3057// syscall.
3058//
3059//go:nosplit
3060func exitsyscallfast_reacquired() {
3061	_g_ := getg()
3062	if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
3063		if trace.enabled {
3064			// The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed).
3065			// traceGoSysBlock for this syscall was already emitted,
3066			// but here we effectively retake the p from the new syscall running on the same p.
3067			systemstack(func() {
3068				// Denote blocking of the new syscall.
3069				traceGoSysBlock(_g_.m.p.ptr())
3070				// Denote completion of the current syscall.
3071				traceGoSysExit(0)
3072			})
3073		}
3074		_g_.m.p.ptr().syscalltick++
3075	}
3076}
3077
3078func exitsyscallfast_pidle() bool {
3079	lock(&sched.lock)
3080	_p_ := pidleget()
3081	if _p_ != nil && atomic.Load(&sched.sysmonwait) != 0 {
3082		atomic.Store(&sched.sysmonwait, 0)
3083		notewakeup(&sched.sysmonnote)
3084	}
3085	unlock(&sched.lock)
3086	if _p_ != nil {
3087		acquirep(_p_)
3088		return true
3089	}
3090	return false
3091}
3092
3093// exitsyscall slow path on g0.
3094// Failed to acquire P, enqueue gp as runnable.
3095//
3096//go:nowritebarrierrec
3097func exitsyscall0(gp *g) {
3098	_g_ := getg()
3099
3100	casgstatus(gp, _Gsyscall, _Gexitingsyscall)
3101	dropg()
3102	casgstatus(gp, _Gexitingsyscall, _Grunnable)
3103	lock(&sched.lock)
3104	var _p_ *p
3105	if schedEnabled(_g_) {
3106		_p_ = pidleget()
3107	}
3108	if _p_ == nil {
3109		globrunqput(gp)
3110	} else if atomic.Load(&sched.sysmonwait) != 0 {
3111		atomic.Store(&sched.sysmonwait, 0)
3112		notewakeup(&sched.sysmonnote)
3113	}
3114	unlock(&sched.lock)
3115	if _p_ != nil {
3116		acquirep(_p_)
3117		execute(gp, false) // Never returns.
3118	}
3119	if _g_.m.lockedg != 0 {
3120		// Wait until another thread schedules gp and so m again.
3121		stoplockedm()
3122		execute(gp, false) // Never returns.
3123	}
3124	stopm()
3125	schedule() // Never returns.
3126}
3127
3128// exitsyscallclear clears GC-related information that we only track
3129// during a syscall.
3130func exitsyscallclear(gp *g) {
3131	// Garbage collector isn't running (since we are), so okay to
3132	// clear syscallsp.
3133	gp.syscallsp = 0
3134
3135	gp.gcstack = 0
3136	gp.gcnextsp = 0
3137	memclrNoHeapPointers(unsafe.Pointer(&gp.gcregs), unsafe.Sizeof(gp.gcregs))
3138}
3139
3140// Code generated by cgo, and some library code, calls syscall.Entersyscall
3141// and syscall.Exitsyscall.
3142
3143//go:linkname syscall_entersyscall syscall.Entersyscall
3144//go:nosplit
3145func syscall_entersyscall() {
3146	entersyscall()
3147}
3148
3149//go:linkname syscall_exitsyscall syscall.Exitsyscall
3150//go:nosplit
3151func syscall_exitsyscall() {
3152	exitsyscall()
3153}
3154
3155func beforefork() {
3156	gp := getg().m.curg
3157
3158	// Block signals during a fork, so that the child does not run
3159	// a signal handler before exec if a signal is sent to the process
3160	// group. See issue #18600.
3161	gp.m.locks++
3162	msigsave(gp.m)
3163	sigblock()
3164}
3165
3166// Called from syscall package before fork.
3167//go:linkname syscall_runtime_BeforeFork syscall.runtime_BeforeFork
3168//go:nosplit
3169func syscall_runtime_BeforeFork() {
3170	systemstack(beforefork)
3171}
3172
3173func afterfork() {
3174	gp := getg().m.curg
3175
3176	msigrestore(gp.m.sigmask)
3177
3178	gp.m.locks--
3179}
3180
3181// Called from syscall package after fork in parent.
3182//go:linkname syscall_runtime_AfterFork syscall.runtime_AfterFork
3183//go:nosplit
3184func syscall_runtime_AfterFork() {
3185	systemstack(afterfork)
3186}
3187
3188// inForkedChild is true while manipulating signals in the child process.
3189// This is used to avoid calling libc functions in case we are using vfork.
3190var inForkedChild bool
3191
3192// Called from syscall package after fork in child.
3193// It resets non-sigignored signals to the default handler, and
3194// restores the signal mask in preparation for the exec.
3195//
3196// Because this might be called during a vfork, and therefore may be
3197// temporarily sharing address space with the parent process, this must
3198// not change any global variables or calling into C code that may do so.
3199//
3200//go:linkname syscall_runtime_AfterForkInChild syscall.runtime_AfterForkInChild
3201//go:nosplit
3202//go:nowritebarrierrec
3203func syscall_runtime_AfterForkInChild() {
3204	// It's OK to change the global variable inForkedChild here
3205	// because we are going to change it back. There is no race here,
3206	// because if we are sharing address space with the parent process,
3207	// then the parent process can not be running concurrently.
3208	inForkedChild = true
3209
3210	clearSignalHandlers()
3211
3212	// When we are the child we are the only thread running,
3213	// so we know that nothing else has changed gp.m.sigmask.
3214	msigrestore(getg().m.sigmask)
3215
3216	inForkedChild = false
3217}
3218
3219// Called from syscall package before Exec.
3220//go:linkname syscall_runtime_BeforeExec syscall.runtime_BeforeExec
3221func syscall_runtime_BeforeExec() {
3222	// Prevent thread creation during exec.
3223	execLock.lock()
3224}
3225
3226// Called from syscall package after Exec.
3227//go:linkname syscall_runtime_AfterExec syscall.runtime_AfterExec
3228func syscall_runtime_AfterExec() {
3229	execLock.unlock()
3230}
3231
3232// panicgonil is used for gccgo as we need to use a compiler check for
3233// a nil func, in case we have to build a thunk.
3234//go:linkname panicgonil
3235func panicgonil() {
3236	getg().m.throwing = -1 // do not dump full stacks
3237	throw("go of nil func value")
3238}
3239
3240// Create a new g running fn passing arg as the single argument.
3241// Put it on the queue of g's waiting to run.
3242// The compiler turns a go statement into a call to this.
3243//go:linkname newproc __go_go
3244func newproc(fn uintptr, arg unsafe.Pointer) *g {
3245	_g_ := getg()
3246
3247	if fn == 0 {
3248		_g_.m.throwing = -1 // do not dump full stacks
3249		throw("go of nil func value")
3250	}
3251	acquirem() // disable preemption because it can be holding p in a local var
3252
3253	_p_ := _g_.m.p.ptr()
3254	newg := gfget(_p_)
3255	var (
3256		sp     unsafe.Pointer
3257		spsize uintptr
3258	)
3259	if newg == nil {
3260		newg = malg(true, false, &sp, &spsize)
3261		casgstatus(newg, _Gidle, _Gdead)
3262		allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
3263	} else {
3264		resetNewG(newg, &sp, &spsize)
3265	}
3266	newg.traceback = 0
3267
3268	if readgstatus(newg) != _Gdead {
3269		throw("newproc1: new g is not Gdead")
3270	}
3271
3272	// Store the C function pointer into entryfn, take the address
3273	// of entryfn, convert it to a Go function value, and store
3274	// that in entry.
3275	newg.entryfn = fn
3276	var entry func(unsafe.Pointer)
3277	*(*unsafe.Pointer)(unsafe.Pointer(&entry)) = unsafe.Pointer(&newg.entryfn)
3278	newg.entry = entry
3279
3280	newg.param = arg
3281	newg.gopc = getcallerpc()
3282	newg.ancestors = saveAncestors(_g_)
3283	newg.startpc = fn
3284	if _g_.m.curg != nil {
3285		newg.labels = _g_.m.curg.labels
3286	}
3287	if isSystemGoroutine(newg, false) {
3288		atomic.Xadd(&sched.ngsys, +1)
3289	}
3290	casgstatus(newg, _Gdead, _Grunnable)
3291
3292	if _p_.goidcache == _p_.goidcacheend {
3293		// Sched.goidgen is the last allocated id,
3294		// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
3295		// At startup sched.goidgen=0, so main goroutine receives goid=1.
3296		_p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch)
3297		_p_.goidcache -= _GoidCacheBatch - 1
3298		_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
3299	}
3300	newg.goid = int64(_p_.goidcache)
3301	_p_.goidcache++
3302	if trace.enabled {
3303		traceGoCreate(newg, newg.startpc)
3304	}
3305
3306	makeGContext(newg, sp, spsize)
3307
3308	runqput(_p_, newg, true)
3309
3310	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && mainStarted {
3311		wakep()
3312	}
3313	releasem(_g_.m)
3314	return newg
3315}
3316
3317// expectedSystemGoroutines counts the number of goroutines expected
3318// to mark themselves as system goroutines. After they mark themselves
3319// by calling setSystemGoroutine, this is decremented. NumGoroutines
3320// uses this to wait for all system goroutines to mark themselves
3321// before it counts them.
3322var expectedSystemGoroutines uint32
3323
3324// expectSystemGoroutine is called when starting a goroutine that will
3325// call setSystemGoroutine. It increments expectedSystemGoroutines.
3326func expectSystemGoroutine() {
3327	atomic.Xadd(&expectedSystemGoroutines, +1)
3328}
3329
3330// waitForSystemGoroutines waits for all currently expected system
3331// goroutines to register themselves.
3332func waitForSystemGoroutines() {
3333	for atomic.Load(&expectedSystemGoroutines) > 0 {
3334		Gosched()
3335		osyield()
3336	}
3337}
3338
3339// setSystemGoroutine marks this goroutine as a "system goroutine".
3340// In the gc toolchain this is done by comparing startpc to a list of
3341// saved special PCs. In gccgo that approach does not work as startpc
3342// is often a thunk that invokes the real function with arguments,
3343// so the thunk address never matches the saved special PCs. Instead,
3344// since there are only a limited number of "system goroutines",
3345// we force each one to mark itself as special.
3346func setSystemGoroutine() {
3347	getg().isSystemGoroutine = true
3348	atomic.Xadd(&sched.ngsys, +1)
3349	atomic.Xadd(&expectedSystemGoroutines, -1)
3350}
3351
3352// saveAncestors copies previous ancestors of the given caller g and
3353// includes infor for the current caller into a new set of tracebacks for
3354// a g being created.
3355func saveAncestors(callergp *g) *[]ancestorInfo {
3356	// Copy all prior info, except for the root goroutine (goid 0).
3357	if debug.tracebackancestors <= 0 || callergp.goid == 0 {
3358		return nil
3359	}
3360	var callerAncestors []ancestorInfo
3361	if callergp.ancestors != nil {
3362		callerAncestors = *callergp.ancestors
3363	}
3364	n := int32(len(callerAncestors)) + 1
3365	if n > debug.tracebackancestors {
3366		n = debug.tracebackancestors
3367	}
3368	ancestors := make([]ancestorInfo, n)
3369	copy(ancestors[1:], callerAncestors)
3370
3371	var pcs [_TracebackMaxFrames]uintptr
3372	// FIXME: This should get a traceback of callergp.
3373	// npcs := gcallers(callergp, 0, pcs[:])
3374	npcs := 0
3375	ipcs := make([]uintptr, npcs)
3376	copy(ipcs, pcs[:])
3377	ancestors[0] = ancestorInfo{
3378		pcs:  ipcs,
3379		goid: callergp.goid,
3380		gopc: callergp.gopc,
3381	}
3382
3383	ancestorsp := new([]ancestorInfo)
3384	*ancestorsp = ancestors
3385	return ancestorsp
3386}
3387
3388// Put on gfree list.
3389// If local list is too long, transfer a batch to the global list.
3390func gfput(_p_ *p, gp *g) {
3391	if readgstatus(gp) != _Gdead {
3392		throw("gfput: bad status (not Gdead)")
3393	}
3394
3395	_p_.gFree.push(gp)
3396	_p_.gFree.n++
3397	if _p_.gFree.n >= 64 {
3398		lock(&sched.gFree.lock)
3399		for _p_.gFree.n >= 32 {
3400			_p_.gFree.n--
3401			gp = _p_.gFree.pop()
3402			sched.gFree.list.push(gp)
3403			sched.gFree.n++
3404		}
3405		unlock(&sched.gFree.lock)
3406	}
3407}
3408
3409// Get from gfree list.
3410// If local list is empty, grab a batch from global list.
3411func gfget(_p_ *p) *g {
3412retry:
3413	if _p_.gFree.empty() && !sched.gFree.list.empty() {
3414		lock(&sched.gFree.lock)
3415		// Move a batch of free Gs to the P.
3416		for _p_.gFree.n < 32 {
3417			gp := sched.gFree.list.pop()
3418			if gp == nil {
3419				break
3420			}
3421			sched.gFree.n--
3422			_p_.gFree.push(gp)
3423			_p_.gFree.n++
3424		}
3425		unlock(&sched.gFree.lock)
3426		goto retry
3427	}
3428	gp := _p_.gFree.pop()
3429	if gp == nil {
3430		return nil
3431	}
3432	_p_.gFree.n--
3433	return gp
3434}
3435
3436// Purge all cached G's from gfree list to the global list.
3437func gfpurge(_p_ *p) {
3438	lock(&sched.gFree.lock)
3439	for !_p_.gFree.empty() {
3440		gp := _p_.gFree.pop()
3441		_p_.gFree.n--
3442		sched.gFree.list.push(gp)
3443		sched.gFree.n++
3444	}
3445	unlock(&sched.gFree.lock)
3446}
3447
3448// Breakpoint executes a breakpoint trap.
3449func Breakpoint() {
3450	breakpoint()
3451}
3452
3453// dolockOSThread is called by LockOSThread and lockOSThread below
3454// after they modify m.locked. Do not allow preemption during this call,
3455// or else the m might be different in this function than in the caller.
3456//go:nosplit
3457func dolockOSThread() {
3458	if GOARCH == "wasm" {
3459		return // no threads on wasm yet
3460	}
3461	_g_ := getg()
3462	_g_.m.lockedg.set(_g_)
3463	_g_.lockedm.set(_g_.m)
3464}
3465
3466//go:nosplit
3467
3468// LockOSThread wires the calling goroutine to its current operating system thread.
3469// The calling goroutine will always execute in that thread,
3470// and no other goroutine will execute in it,
3471// until the calling goroutine has made as many calls to
3472// UnlockOSThread as to LockOSThread.
3473// If the calling goroutine exits without unlocking the thread,
3474// the thread will be terminated.
3475//
3476// All init functions are run on the startup thread. Calling LockOSThread
3477// from an init function will cause the main function to be invoked on
3478// that thread.
3479//
3480// A goroutine should call LockOSThread before calling OS services or
3481// non-Go library functions that depend on per-thread state.
3482func LockOSThread() {
3483	if atomic.Load(&newmHandoff.haveTemplateThread) == 0 && GOOS != "plan9" {
3484		// If we need to start a new thread from the locked
3485		// thread, we need the template thread. Start it now
3486		// while we're in a known-good state.
3487		startTemplateThread()
3488	}
3489	_g_ := getg()
3490	_g_.m.lockedExt++
3491	if _g_.m.lockedExt == 0 {
3492		_g_.m.lockedExt--
3493		panic("LockOSThread nesting overflow")
3494	}
3495	dolockOSThread()
3496}
3497
3498//go:nosplit
3499func lockOSThread() {
3500	getg().m.lockedInt++
3501	dolockOSThread()
3502}
3503
3504// dounlockOSThread is called by UnlockOSThread and unlockOSThread below
3505// after they update m->locked. Do not allow preemption during this call,
3506// or else the m might be in different in this function than in the caller.
3507//go:nosplit
3508func dounlockOSThread() {
3509	if GOARCH == "wasm" {
3510		return // no threads on wasm yet
3511	}
3512	_g_ := getg()
3513	if _g_.m.lockedInt != 0 || _g_.m.lockedExt != 0 {
3514		return
3515	}
3516	_g_.m.lockedg = 0
3517	_g_.lockedm = 0
3518}
3519
3520//go:nosplit
3521
3522// UnlockOSThread undoes an earlier call to LockOSThread.
3523// If this drops the number of active LockOSThread calls on the
3524// calling goroutine to zero, it unwires the calling goroutine from
3525// its fixed operating system thread.
3526// If there are no active LockOSThread calls, this is a no-op.
3527//
3528// Before calling UnlockOSThread, the caller must ensure that the OS
3529// thread is suitable for running other goroutines. If the caller made
3530// any permanent changes to the state of the thread that would affect
3531// other goroutines, it should not call this function and thus leave
3532// the goroutine locked to the OS thread until the goroutine (and
3533// hence the thread) exits.
3534func UnlockOSThread() {
3535	_g_ := getg()
3536	if _g_.m.lockedExt == 0 {
3537		return
3538	}
3539	_g_.m.lockedExt--
3540	dounlockOSThread()
3541}
3542
3543//go:nosplit
3544func unlockOSThread() {
3545	_g_ := getg()
3546	if _g_.m.lockedInt == 0 {
3547		systemstack(badunlockosthread)
3548	}
3549	_g_.m.lockedInt--
3550	dounlockOSThread()
3551}
3552
3553func badunlockosthread() {
3554	throw("runtime: internal error: misuse of lockOSThread/unlockOSThread")
3555}
3556
3557func gcount() int32 {
3558	n := int32(allglen) - sched.gFree.n - int32(atomic.Load(&sched.ngsys))
3559	for _, _p_ := range allp {
3560		n -= _p_.gFree.n
3561	}
3562
3563	// All these variables can be changed concurrently, so the result can be inconsistent.
3564	// But at least the current goroutine is running.
3565	if n < 1 {
3566		n = 1
3567	}
3568	return n
3569}
3570
3571func mcount() int32 {
3572	return int32(sched.mnext - sched.nmfreed)
3573}
3574
3575var prof struct {
3576	signalLock uint32
3577	hz         int32
3578}
3579
3580func _System()                    { _System() }
3581func _ExternalCode()              { _ExternalCode() }
3582func _LostExternalCode()          { _LostExternalCode() }
3583func _GC()                        { _GC() }
3584func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() }
3585func _VDSO()                      { _VDSO() }
3586
3587var _SystemPC = funcPC(_System)
3588var _ExternalCodePC = funcPC(_ExternalCode)
3589var _LostExternalCodePC = funcPC(_LostExternalCode)
3590var _GCPC = funcPC(_GC)
3591var _LostSIGPROFDuringAtomic64PC = funcPC(_LostSIGPROFDuringAtomic64)
3592
3593// Called if we receive a SIGPROF signal.
3594// Called by the signal handler, may run during STW.
3595//go:nowritebarrierrec
3596func sigprof(pc uintptr, gp *g, mp *m) {
3597	if prof.hz == 0 {
3598		return
3599	}
3600
3601	// Profiling runs concurrently with GC, so it must not allocate.
3602	// Set a trap in case the code does allocate.
3603	// Note that on windows, one thread takes profiles of all the
3604	// other threads, so mp is usually not getg().m.
3605	// In fact mp may not even be stopped.
3606	// See golang.org/issue/17165.
3607	getg().m.mallocing++
3608
3609	traceback := true
3610
3611	// If SIGPROF arrived while already fetching runtime callers
3612	// we can have trouble on older systems because the unwind
3613	// library calls dl_iterate_phdr which was not reentrant in
3614	// the past. alreadyInCallers checks for that.
3615	if gp == nil || alreadyInCallers() {
3616		traceback = false
3617	}
3618
3619	var stk [maxCPUProfStack]uintptr
3620	n := 0
3621	if traceback {
3622		var stklocs [maxCPUProfStack]location
3623		n = callers(0, stklocs[:])
3624
3625		// Issue 26595: the stack trace we've just collected is going
3626		// to include frames that we don't want to report in the CPU
3627		// profile, including signal handler frames. Here is what we
3628		// might typically see at the point of "callers" above for a
3629		// signal delivered to the application routine "interesting"
3630		// called by "main".
3631		//
3632		//  0: runtime.sigprof
3633		//  1: runtime.sighandler
3634		//  2: runtime.sigtrampgo
3635		//  3: runtime.sigtramp
3636		//  4: <signal handler called>
3637		//  5: main.interesting_routine
3638		//  6: main.main
3639		//
3640		// To ensure a sane profile, walk through the frames in
3641		// "stklocs" until we find the "runtime.sigtramp" frame, then
3642		// report only those frames below the frame one down from
3643		// that. On systems that don't split stack, "sigtramp" can
3644		// do a sibling call to "sigtrampgo", so use "sigtrampgo"
3645		// if we don't find "sigtramp". If for some reason
3646		// neither "runtime.sigtramp" nor "runtime.sigtrampgo" is
3647		// present, don't make any changes.
3648		framesToDiscard := 0
3649		for i := 0; i < n; i++ {
3650			if stklocs[i].function == "runtime.sigtrampgo" && i+2 < n {
3651				framesToDiscard = i + 2
3652			}
3653			if stklocs[i].function == "runtime.sigtramp" && i+2 < n {
3654				framesToDiscard = i + 2
3655				break
3656			}
3657		}
3658		n -= framesToDiscard
3659		for i := 0; i < n; i++ {
3660			stk[i] = stklocs[i+framesToDiscard].pc
3661		}
3662	}
3663
3664	if n <= 0 {
3665		// Normal traceback is impossible or has failed.
3666		// Account it against abstract "System" or "GC".
3667		n = 2
3668		stk[0] = pc
3669		if mp.preemptoff != "" {
3670			stk[1] = _GCPC + sys.PCQuantum
3671		} else {
3672			stk[1] = _SystemPC + sys.PCQuantum
3673		}
3674	}
3675
3676	if prof.hz != 0 {
3677		cpuprof.add(gp, stk[:n])
3678	}
3679	getg().m.mallocing--
3680}
3681
3682// Use global arrays rather than using up lots of stack space in the
3683// signal handler. This is safe since while we are executing a SIGPROF
3684// signal other SIGPROF signals are blocked.
3685var nonprofGoStklocs [maxCPUProfStack]location
3686var nonprofGoStk [maxCPUProfStack]uintptr
3687
3688// sigprofNonGo is called if we receive a SIGPROF signal on a non-Go thread,
3689// and the signal handler collected a stack trace in sigprofCallers.
3690// When this is called, sigprofCallersUse will be non-zero.
3691// g is nil, and what we can do is very limited.
3692//go:nosplit
3693//go:nowritebarrierrec
3694func sigprofNonGo(pc uintptr) {
3695	if prof.hz != 0 {
3696		n := callers(0, nonprofGoStklocs[:])
3697
3698		for i := 0; i < n; i++ {
3699			nonprofGoStk[i] = nonprofGoStklocs[i].pc
3700		}
3701
3702		if n <= 0 {
3703			n = 2
3704			nonprofGoStk[0] = pc
3705			nonprofGoStk[1] = _ExternalCodePC + sys.PCQuantum
3706		}
3707
3708		cpuprof.addNonGo(nonprofGoStk[:n])
3709	}
3710}
3711
3712// sigprofNonGoPC is called when a profiling signal arrived on a
3713// non-Go thread and we have a single PC value, not a stack trace.
3714// g is nil, and what we can do is very limited.
3715//go:nosplit
3716//go:nowritebarrierrec
3717func sigprofNonGoPC(pc uintptr) {
3718	if prof.hz != 0 {
3719		stk := []uintptr{
3720			pc,
3721			_ExternalCodePC + sys.PCQuantum,
3722		}
3723		cpuprof.addNonGo(stk)
3724	}
3725}
3726
3727// setcpuprofilerate sets the CPU profiling rate to hz times per second.
3728// If hz <= 0, setcpuprofilerate turns off CPU profiling.
3729func setcpuprofilerate(hz int32) {
3730	// Force sane arguments.
3731	if hz < 0 {
3732		hz = 0
3733	}
3734
3735	// Disable preemption, otherwise we can be rescheduled to another thread
3736	// that has profiling enabled.
3737	_g_ := getg()
3738	_g_.m.locks++
3739
3740	// Stop profiler on this thread so that it is safe to lock prof.
3741	// if a profiling signal came in while we had prof locked,
3742	// it would deadlock.
3743	setThreadCPUProfiler(0)
3744
3745	for !atomic.Cas(&prof.signalLock, 0, 1) {
3746		osyield()
3747	}
3748	if prof.hz != hz {
3749		setProcessCPUProfiler(hz)
3750		prof.hz = hz
3751	}
3752	atomic.Store(&prof.signalLock, 0)
3753
3754	lock(&sched.lock)
3755	sched.profilehz = hz
3756	unlock(&sched.lock)
3757
3758	if hz != 0 {
3759		setThreadCPUProfiler(hz)
3760	}
3761
3762	_g_.m.locks--
3763}
3764
3765// init initializes pp, which may be a freshly allocated p or a
3766// previously destroyed p, and transitions it to status _Pgcstop.
3767func (pp *p) init(id int32) {
3768	pp.id = id
3769	pp.status = _Pgcstop
3770	pp.sudogcache = pp.sudogbuf[:0]
3771	pp.deferpool = pp.deferpoolbuf[:0]
3772	pp.wbBuf.reset()
3773	if pp.mcache == nil {
3774		if id == 0 {
3775			if getg().m.mcache == nil {
3776				throw("missing mcache?")
3777			}
3778			pp.mcache = getg().m.mcache // bootstrap
3779		} else {
3780			pp.mcache = allocmcache()
3781		}
3782	}
3783	if raceenabled && pp.raceprocctx == 0 {
3784		if id == 0 {
3785			pp.raceprocctx = raceprocctx0
3786			raceprocctx0 = 0 // bootstrap
3787		} else {
3788			pp.raceprocctx = raceproccreate()
3789		}
3790	}
3791}
3792
3793// destroy releases all of the resources associated with pp and
3794// transitions it to status _Pdead.
3795//
3796// sched.lock must be held and the world must be stopped.
3797func (pp *p) destroy() {
3798	// Move all runnable goroutines to the global queue
3799	for pp.runqhead != pp.runqtail {
3800		// Pop from tail of local queue
3801		pp.runqtail--
3802		gp := pp.runq[pp.runqtail%uint32(len(pp.runq))].ptr()
3803		// Push onto head of global queue
3804		globrunqputhead(gp)
3805	}
3806	if pp.runnext != 0 {
3807		globrunqputhead(pp.runnext.ptr())
3808		pp.runnext = 0
3809	}
3810	if len(pp.timers) > 0 {
3811		plocal := getg().m.p.ptr()
3812		// The world is stopped, but we acquire timersLock to
3813		// protect against sysmon calling timeSleepUntil.
3814		// This is the only case where we hold the timersLock of
3815		// more than one P, so there are no deadlock concerns.
3816		lock(&plocal.timersLock)
3817		lock(&pp.timersLock)
3818		moveTimers(plocal, pp.timers)
3819		pp.timers = nil
3820		pp.numTimers = 0
3821		pp.adjustTimers = 0
3822		pp.deletedTimers = 0
3823		atomic.Store64(&pp.timer0When, 0)
3824		unlock(&pp.timersLock)
3825		unlock(&plocal.timersLock)
3826	}
3827	// If there's a background worker, make it runnable and put
3828	// it on the global queue so it can clean itself up.
3829	if gp := pp.gcBgMarkWorker.ptr(); gp != nil {
3830		casgstatus(gp, _Gwaiting, _Grunnable)
3831		if trace.enabled {
3832			traceGoUnpark(gp, 0)
3833		}
3834		globrunqput(gp)
3835		// This assignment doesn't race because the
3836		// world is stopped.
3837		pp.gcBgMarkWorker.set(nil)
3838	}
3839	// Flush p's write barrier buffer.
3840	if gcphase != _GCoff {
3841		wbBufFlush1(pp)
3842		pp.gcw.dispose()
3843	}
3844	for i := range pp.sudogbuf {
3845		pp.sudogbuf[i] = nil
3846	}
3847	pp.sudogcache = pp.sudogbuf[:0]
3848	for i := range pp.deferpoolbuf {
3849		pp.deferpoolbuf[i] = nil
3850	}
3851	pp.deferpool = pp.deferpoolbuf[:0]
3852	systemstack(func() {
3853		for i := 0; i < pp.mspancache.len; i++ {
3854			// Safe to call since the world is stopped.
3855			mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i]))
3856		}
3857		pp.mspancache.len = 0
3858		pp.pcache.flush(&mheap_.pages)
3859	})
3860	freemcache(pp.mcache)
3861	pp.mcache = nil
3862	gfpurge(pp)
3863	traceProcFree(pp)
3864	pp.gcAssistTime = 0
3865	pp.status = _Pdead
3866}
3867
3868// Change number of processors. The world is stopped, sched is locked.
3869// gcworkbufs are not being modified by either the GC or
3870// the write barrier code.
3871// Returns list of Ps with local work, they need to be scheduled by the caller.
3872func procresize(nprocs int32) *p {
3873	old := gomaxprocs
3874	if old < 0 || nprocs <= 0 {
3875		throw("procresize: invalid arg")
3876	}
3877	if trace.enabled {
3878		traceGomaxprocs(nprocs)
3879	}
3880
3881	// update statistics
3882	now := nanotime()
3883	if sched.procresizetime != 0 {
3884		sched.totaltime += int64(old) * (now - sched.procresizetime)
3885	}
3886	sched.procresizetime = now
3887
3888	// Grow allp if necessary.
3889	if nprocs > int32(len(allp)) {
3890		// Synchronize with retake, which could be running
3891		// concurrently since it doesn't run on a P.
3892		lock(&allpLock)
3893		if nprocs <= int32(cap(allp)) {
3894			allp = allp[:nprocs]
3895		} else {
3896			nallp := make([]*p, nprocs)
3897			// Copy everything up to allp's cap so we
3898			// never lose old allocated Ps.
3899			copy(nallp, allp[:cap(allp)])
3900			allp = nallp
3901		}
3902		unlock(&allpLock)
3903	}
3904
3905	// initialize new P's
3906	for i := old; i < nprocs; i++ {
3907		pp := allp[i]
3908		if pp == nil {
3909			pp = new(p)
3910		}
3911		pp.init(i)
3912		atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
3913	}
3914
3915	_g_ := getg()
3916	if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
3917		// continue to use the current P
3918		_g_.m.p.ptr().status = _Prunning
3919		_g_.m.p.ptr().mcache.prepareForSweep()
3920	} else {
3921		// release the current P and acquire allp[0].
3922		//
3923		// We must do this before destroying our current P
3924		// because p.destroy itself has write barriers, so we
3925		// need to do that from a valid P.
3926		if _g_.m.p != 0 {
3927			if trace.enabled {
3928				// Pretend that we were descheduled
3929				// and then scheduled again to keep
3930				// the trace sane.
3931				traceGoSched()
3932				traceProcStop(_g_.m.p.ptr())
3933			}
3934			_g_.m.p.ptr().m = 0
3935		}
3936		_g_.m.p = 0
3937		_g_.m.mcache = nil
3938		p := allp[0]
3939		p.m = 0
3940		p.status = _Pidle
3941		acquirep(p)
3942		if trace.enabled {
3943			traceGoStart()
3944		}
3945	}
3946
3947	// release resources from unused P's
3948	for i := nprocs; i < old; i++ {
3949		p := allp[i]
3950		p.destroy()
3951		// can't free P itself because it can be referenced by an M in syscall
3952	}
3953
3954	// Trim allp.
3955	if int32(len(allp)) != nprocs {
3956		lock(&allpLock)
3957		allp = allp[:nprocs]
3958		unlock(&allpLock)
3959	}
3960
3961	var runnablePs *p
3962	for i := nprocs - 1; i >= 0; i-- {
3963		p := allp[i]
3964		if _g_.m.p.ptr() == p {
3965			continue
3966		}
3967		p.status = _Pidle
3968		if runqempty(p) {
3969			pidleput(p)
3970		} else {
3971			p.m.set(mget())
3972			p.link.set(runnablePs)
3973			runnablePs = p
3974		}
3975	}
3976	stealOrder.reset(uint32(nprocs))
3977	var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
3978	atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs))
3979	return runnablePs
3980}
3981
3982// Associate p and the current m.
3983//
3984// This function is allowed to have write barriers even if the caller
3985// isn't because it immediately acquires _p_.
3986//
3987//go:yeswritebarrierrec
3988func acquirep(_p_ *p) {
3989	// Do the part that isn't allowed to have write barriers.
3990	wirep(_p_)
3991
3992	// Have p; write barriers now allowed.
3993
3994	// Perform deferred mcache flush before this P can allocate
3995	// from a potentially stale mcache.
3996	_p_.mcache.prepareForSweep()
3997
3998	if trace.enabled {
3999		traceProcStart()
4000	}
4001}
4002
4003// wirep is the first step of acquirep, which actually associates the
4004// current M to _p_. This is broken out so we can disallow write
4005// barriers for this part, since we don't yet have a P.
4006//
4007//go:nowritebarrierrec
4008//go:nosplit
4009func wirep(_p_ *p) {
4010	_g_ := getg()
4011
4012	if _g_.m.p != 0 || _g_.m.mcache != nil {
4013		throw("wirep: already in go")
4014	}
4015	if _p_.m != 0 || _p_.status != _Pidle {
4016		id := int64(0)
4017		if _p_.m != 0 {
4018			id = _p_.m.ptr().id
4019		}
4020		print("wirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
4021		throw("wirep: invalid p state")
4022	}
4023	_g_.m.mcache = _p_.mcache
4024	_g_.m.p.set(_p_)
4025	_p_.m.set(_g_.m)
4026	_p_.status = _Prunning
4027}
4028
4029// Disassociate p and the current m.
4030func releasep() *p {
4031	_g_ := getg()
4032
4033	if _g_.m.p == 0 || _g_.m.mcache == nil {
4034		throw("releasep: invalid arg")
4035	}
4036	_p_ := _g_.m.p.ptr()
4037	if _p_.m.ptr() != _g_.m || _p_.mcache != _g_.m.mcache || _p_.status != _Prunning {
4038		print("releasep: m=", _g_.m, " m->p=", _g_.m.p.ptr(), " p->m=", hex(_p_.m), " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n")
4039		throw("releasep: invalid p state")
4040	}
4041	if trace.enabled {
4042		traceProcStop(_g_.m.p.ptr())
4043	}
4044	_g_.m.p = 0
4045	_g_.m.mcache = nil
4046	_p_.m = 0
4047	_p_.status = _Pidle
4048	return _p_
4049}
4050
4051func incidlelocked(v int32) {
4052	lock(&sched.lock)
4053	sched.nmidlelocked += v
4054	if v > 0 {
4055		checkdead()
4056	}
4057	unlock(&sched.lock)
4058}
4059
4060// Check for deadlock situation.
4061// The check is based on number of running M's, if 0 -> deadlock.
4062// sched.lock must be held.
4063func checkdead() {
4064	// For -buildmode=c-shared or -buildmode=c-archive it's OK if
4065	// there are no running goroutines. The calling program is
4066	// assumed to be running.
4067	if islibrary || isarchive {
4068		return
4069	}
4070
4071	// If we are dying because of a signal caught on an already idle thread,
4072	// freezetheworld will cause all running threads to block.
4073	// And runtime will essentially enter into deadlock state,
4074	// except that there is a thread that will call exit soon.
4075	if panicking > 0 {
4076		return
4077	}
4078
4079	// If we are not running under cgo, but we have an extra M then account
4080	// for it. (It is possible to have an extra M on Windows without cgo to
4081	// accommodate callbacks created by syscall.NewCallback. See issue #6751
4082	// for details.)
4083	var run0 int32
4084	if !iscgo && cgoHasExtraM {
4085		mp := lockextra(true)
4086		haveExtraM := extraMCount > 0
4087		unlockextra(mp)
4088		if haveExtraM {
4089			run0 = 1
4090		}
4091	}
4092
4093	run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys
4094	if run > run0 {
4095		return
4096	}
4097	if run < 0 {
4098		print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n")
4099		throw("checkdead: inconsistent counts")
4100	}
4101
4102	grunning := 0
4103	lock(&allglock)
4104	for i := 0; i < len(allgs); i++ {
4105		gp := allgs[i]
4106		if isSystemGoroutine(gp, false) {
4107			continue
4108		}
4109		s := readgstatus(gp)
4110		switch s &^ _Gscan {
4111		case _Gwaiting,
4112			_Gpreempted:
4113			grunning++
4114		case _Grunnable,
4115			_Grunning,
4116			_Gsyscall:
4117			unlock(&allglock)
4118			print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n")
4119			throw("checkdead: runnable g")
4120		}
4121	}
4122	unlock(&allglock)
4123	if grunning == 0 { // possible if main goroutine calls runtime·Goexit()
4124		unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang
4125		throw("no goroutines (main called runtime.Goexit) - deadlock!")
4126	}
4127
4128	// Maybe jump time forward for playground.
4129	if faketime != 0 {
4130		when, _p_ := timeSleepUntil()
4131		if _p_ != nil {
4132			faketime = when
4133			for pp := &sched.pidle; *pp != 0; pp = &(*pp).ptr().link {
4134				if (*pp).ptr() == _p_ {
4135					*pp = _p_.link
4136					break
4137				}
4138			}
4139			mp := mget()
4140			if mp == nil {
4141				// There should always be a free M since
4142				// nothing is running.
4143				throw("checkdead: no m for timer")
4144			}
4145			mp.nextp.set(_p_)
4146			notewakeup(&mp.park)
4147			return
4148		}
4149	}
4150
4151	// There are no goroutines running, so we can look at the P's.
4152	for _, _p_ := range allp {
4153		if len(_p_.timers) > 0 {
4154			return
4155		}
4156	}
4157
4158	getg().m.throwing = -1 // do not dump full stacks
4159	unlock(&sched.lock)    // unlock so that GODEBUG=scheddetail=1 doesn't hang
4160	throw("all goroutines are asleep - deadlock!")
4161}
4162
4163// forcegcperiod is the maximum time in nanoseconds between garbage
4164// collections. If we go this long without a garbage collection, one
4165// is forced to run.
4166//
4167// This is a variable for testing purposes. It normally doesn't change.
4168var forcegcperiod int64 = 2 * 60 * 1e9
4169
4170// Always runs without a P, so write barriers are not allowed.
4171//
4172//go:nowritebarrierrec
4173func sysmon() {
4174	lock(&sched.lock)
4175	sched.nmsys++
4176	checkdead()
4177	unlock(&sched.lock)
4178
4179	lasttrace := int64(0)
4180	idle := 0 // how many cycles in succession we had not wokeup somebody
4181	delay := uint32(0)
4182	for {
4183		if idle == 0 { // start with 20us sleep...
4184			delay = 20
4185		} else if idle > 50 { // start doubling the sleep after 1ms...
4186			delay *= 2
4187		}
4188		if delay > 10*1000 { // up to 10ms
4189			delay = 10 * 1000
4190		}
4191		usleep(delay)
4192		now := nanotime()
4193		next, _ := timeSleepUntil()
4194		if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) {
4195			lock(&sched.lock)
4196			if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) {
4197				if next > now {
4198					atomic.Store(&sched.sysmonwait, 1)
4199					unlock(&sched.lock)
4200					// Make wake-up period small enough
4201					// for the sampling to be correct.
4202					sleep := forcegcperiod / 2
4203					if next-now < sleep {
4204						sleep = next - now
4205					}
4206					shouldRelax := sleep >= osRelaxMinNS
4207					if shouldRelax {
4208						osRelax(true)
4209					}
4210					notetsleep(&sched.sysmonnote, sleep)
4211					if shouldRelax {
4212						osRelax(false)
4213					}
4214					now = nanotime()
4215					next, _ = timeSleepUntil()
4216					lock(&sched.lock)
4217					atomic.Store(&sched.sysmonwait, 0)
4218					noteclear(&sched.sysmonnote)
4219				}
4220				idle = 0
4221				delay = 20
4222			}
4223			unlock(&sched.lock)
4224		}
4225		// trigger libc interceptors if needed
4226		if *cgo_yield != nil {
4227			asmcgocall(*cgo_yield, nil)
4228		}
4229		// poll network if not polled for more than 10ms
4230		lastpoll := int64(atomic.Load64(&sched.lastpoll))
4231		if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
4232			atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
4233			list := netpoll(0) // non-blocking - returns list of goroutines
4234			if !list.empty() {
4235				// Need to decrement number of idle locked M's
4236				// (pretending that one more is running) before injectglist.
4237				// Otherwise it can lead to the following situation:
4238				// injectglist grabs all P's but before it starts M's to run the P's,
4239				// another M returns from syscall, finishes running its G,
4240				// observes that there is no work to do and no other running M's
4241				// and reports deadlock.
4242				incidlelocked(-1)
4243				injectglist(&list)
4244				incidlelocked(1)
4245			}
4246		}
4247		if next < now {
4248			// There are timers that should have already run,
4249			// perhaps because there is an unpreemptible P.
4250			// Try to start an M to run them.
4251			startm(nil, false)
4252		}
4253		// retake P's blocked in syscalls
4254		// and preempt long running G's
4255		if retake(now) != 0 {
4256			idle = 0
4257		} else {
4258			idle++
4259		}
4260		// check if we need to force a GC
4261		if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && atomic.Load(&forcegc.idle) != 0 {
4262			lock(&forcegc.lock)
4263			forcegc.idle = 0
4264			var list gList
4265			list.push(forcegc.g)
4266			injectglist(&list)
4267			unlock(&forcegc.lock)
4268		}
4269		if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace)*1000000 <= now {
4270			lasttrace = now
4271			schedtrace(debug.scheddetail > 0)
4272		}
4273	}
4274}
4275
4276type sysmontick struct {
4277	schedtick   uint32
4278	schedwhen   int64
4279	syscalltick uint32
4280	syscallwhen int64
4281}
4282
4283// forcePreemptNS is the time slice given to a G before it is
4284// preempted.
4285const forcePreemptNS = 10 * 1000 * 1000 // 10ms
4286
4287func retake(now int64) uint32 {
4288	n := 0
4289	// Prevent allp slice changes. This lock will be completely
4290	// uncontended unless we're already stopping the world.
4291	lock(&allpLock)
4292	// We can't use a range loop over allp because we may
4293	// temporarily drop the allpLock. Hence, we need to re-fetch
4294	// allp each time around the loop.
4295	for i := 0; i < len(allp); i++ {
4296		_p_ := allp[i]
4297		if _p_ == nil {
4298			// This can happen if procresize has grown
4299			// allp but not yet created new Ps.
4300			continue
4301		}
4302		pd := &_p_.sysmontick
4303		s := _p_.status
4304		sysretake := false
4305		if s == _Prunning || s == _Psyscall {
4306			// Preempt G if it's running for too long.
4307			t := int64(_p_.schedtick)
4308			if int64(pd.schedtick) != t {
4309				pd.schedtick = uint32(t)
4310				pd.schedwhen = now
4311			} else if pd.schedwhen+forcePreemptNS <= now {
4312				preemptone(_p_)
4313				// In case of syscall, preemptone() doesn't
4314				// work, because there is no M wired to P.
4315				sysretake = true
4316			}
4317		}
4318		if s == _Psyscall {
4319			// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
4320			t := int64(_p_.syscalltick)
4321			if !sysretake && int64(pd.syscalltick) != t {
4322				pd.syscalltick = uint32(t)
4323				pd.syscallwhen = now
4324				continue
4325			}
4326			// On the one hand we don't want to retake Ps if there is no other work to do,
4327			// but on the other hand we want to retake them eventually
4328			// because they can prevent the sysmon thread from deep sleep.
4329			if runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {
4330				continue
4331			}
4332			// Drop allpLock so we can take sched.lock.
4333			unlock(&allpLock)
4334			// Need to decrement number of idle locked M's
4335			// (pretending that one more is running) before the CAS.
4336			// Otherwise the M from which we retake can exit the syscall,
4337			// increment nmidle and report deadlock.
4338			incidlelocked(-1)
4339			if atomic.Cas(&_p_.status, s, _Pidle) {
4340				if trace.enabled {
4341					traceGoSysBlock(_p_)
4342					traceProcStop(_p_)
4343				}
4344				n++
4345				_p_.syscalltick++
4346				handoffp(_p_)
4347			}
4348			incidlelocked(1)
4349			lock(&allpLock)
4350		}
4351	}
4352	unlock(&allpLock)
4353	return uint32(n)
4354}
4355
4356// Tell all goroutines that they have been preempted and they should stop.
4357// This function is purely best-effort. It can fail to inform a goroutine if a
4358// processor just started running it.
4359// No locks need to be held.
4360// Returns true if preemption request was issued to at least one goroutine.
4361func preemptall() bool {
4362	res := false
4363	for _, _p_ := range allp {
4364		if _p_.status != _Prunning {
4365			continue
4366		}
4367		if preemptone(_p_) {
4368			res = true
4369		}
4370	}
4371	return res
4372}
4373
4374// Tell the goroutine running on processor P to stop.
4375// This function is purely best-effort. It can incorrectly fail to inform the
4376// goroutine. It can send inform the wrong goroutine. Even if it informs the
4377// correct goroutine, that goroutine might ignore the request if it is
4378// simultaneously executing newstack.
4379// No lock needs to be held.
4380// Returns true if preemption request was issued.
4381// The actual preemption will happen at some point in the future
4382// and will be indicated by the gp->status no longer being
4383// Grunning
4384func preemptone(_p_ *p) bool {
4385	mp := _p_.m.ptr()
4386	if mp == nil || mp == getg().m {
4387		return false
4388	}
4389	gp := mp.curg
4390	if gp == nil || gp == mp.g0 {
4391		return false
4392	}
4393
4394	gp.preempt = true
4395
4396	// At this point the gc implementation sets gp.stackguard0 to
4397	// a value that causes the goroutine to suspend itself.
4398	// gccgo has no support for this, and it's hard to support.
4399	// The split stack code reads a value from its TCB.
4400	// We have no way to set a value in the TCB of a different thread.
4401	// And, of course, not all systems support split stack anyhow.
4402	// Checking the field in the g is expensive, since it requires
4403	// loading the g from TLS.  The best mechanism is likely to be
4404	// setting a global variable and figuring out a way to efficiently
4405	// check that global variable.
4406	//
4407	// For now we check gp.preempt in schedule, mallocgc, selectgo,
4408	// and a few other places, which is at least better than doing
4409	// nothing at all.
4410
4411	// Request an async preemption of this P.
4412	if preemptMSupported && debug.asyncpreemptoff == 0 {
4413		_p_.preempt = true
4414		preemptM(mp)
4415	}
4416
4417	return true
4418}
4419
4420var starttime int64
4421
4422func schedtrace(detailed bool) {
4423	now := nanotime()
4424	if starttime == 0 {
4425		starttime = now
4426	}
4427
4428	lock(&sched.lock)
4429	print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", mcount(), " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
4430	if detailed {
4431		print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n")
4432	}
4433	// We must be careful while reading data from P's, M's and G's.
4434	// Even if we hold schedlock, most data can be changed concurrently.
4435	// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
4436	for i, _p_ := range allp {
4437		mp := _p_.m.ptr()
4438		h := atomic.Load(&_p_.runqhead)
4439		t := atomic.Load(&_p_.runqtail)
4440		if detailed {
4441			id := int64(-1)
4442			if mp != nil {
4443				id = mp.id
4444			}
4445			print("  P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, " timerslen=", len(_p_.timers), "\n")
4446		} else {
4447			// In non-detailed mode format lengths of per-P run queues as:
4448			// [len1 len2 len3 len4]
4449			print(" ")
4450			if i == 0 {
4451				print("[")
4452			}
4453			print(t - h)
4454			if i == len(allp)-1 {
4455				print("]\n")
4456			}
4457		}
4458	}
4459
4460	if !detailed {
4461		unlock(&sched.lock)
4462		return
4463	}
4464
4465	for mp := allm; mp != nil; mp = mp.alllink {
4466		_p_ := mp.p.ptr()
4467		gp := mp.curg
4468		lockedg := mp.lockedg.ptr()
4469		id1 := int32(-1)
4470		if _p_ != nil {
4471			id1 = _p_.id
4472		}
4473		id2 := int64(-1)
4474		if gp != nil {
4475			id2 = gp.goid
4476		}
4477		id3 := int64(-1)
4478		if lockedg != nil {
4479			id3 = lockedg.goid
4480		}
4481		print("  M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=", id3, "\n")
4482	}
4483
4484	lock(&allglock)
4485	for gi := 0; gi < len(allgs); gi++ {
4486		gp := allgs[gi]
4487		mp := gp.m
4488		lockedm := gp.lockedm.ptr()
4489		id1 := int64(-1)
4490		if mp != nil {
4491			id1 = mp.id
4492		}
4493		id2 := int64(-1)
4494		if lockedm != nil {
4495			id2 = lockedm.id
4496		}
4497		print("  G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=", id1, " lockedm=", id2, "\n")
4498	}
4499	unlock(&allglock)
4500	unlock(&sched.lock)
4501}
4502
4503// schedEnableUser enables or disables the scheduling of user
4504// goroutines.
4505//
4506// This does not stop already running user goroutines, so the caller
4507// should first stop the world when disabling user goroutines.
4508func schedEnableUser(enable bool) {
4509	lock(&sched.lock)
4510	if sched.disable.user == !enable {
4511		unlock(&sched.lock)
4512		return
4513	}
4514	sched.disable.user = !enable
4515	if enable {
4516		n := sched.disable.n
4517		sched.disable.n = 0
4518		globrunqputbatch(&sched.disable.runnable, n)
4519		unlock(&sched.lock)
4520		for ; n != 0 && sched.npidle != 0; n-- {
4521			startm(nil, false)
4522		}
4523	} else {
4524		unlock(&sched.lock)
4525	}
4526}
4527
4528// schedEnabled reports whether gp should be scheduled. It returns
4529// false is scheduling of gp is disabled.
4530func schedEnabled(gp *g) bool {
4531	if sched.disable.user {
4532		return isSystemGoroutine(gp, true)
4533	}
4534	return true
4535}
4536
4537// Put mp on midle list.
4538// Sched must be locked.
4539// May run during STW, so write barriers are not allowed.
4540//go:nowritebarrierrec
4541func mput(mp *m) {
4542	mp.schedlink = sched.midle
4543	sched.midle.set(mp)
4544	sched.nmidle++
4545	checkdead()
4546}
4547
4548// Try to get an m from midle list.
4549// Sched must be locked.
4550// May run during STW, so write barriers are not allowed.
4551//go:nowritebarrierrec
4552func mget() *m {
4553	mp := sched.midle.ptr()
4554	if mp != nil {
4555		sched.midle = mp.schedlink
4556		sched.nmidle--
4557	}
4558	return mp
4559}
4560
4561// Put gp on the global runnable queue.
4562// Sched must be locked.
4563// May run during STW, so write barriers are not allowed.
4564//go:nowritebarrierrec
4565func globrunqput(gp *g) {
4566	sched.runq.pushBack(gp)
4567	sched.runqsize++
4568}
4569
4570// Put gp at the head of the global runnable queue.
4571// Sched must be locked.
4572// May run during STW, so write barriers are not allowed.
4573//go:nowritebarrierrec
4574func globrunqputhead(gp *g) {
4575	sched.runq.push(gp)
4576	sched.runqsize++
4577}
4578
4579// Put a batch of runnable goroutines on the global runnable queue.
4580// This clears *batch.
4581// Sched must be locked.
4582func globrunqputbatch(batch *gQueue, n int32) {
4583	sched.runq.pushBackAll(*batch)
4584	sched.runqsize += n
4585	*batch = gQueue{}
4586}
4587
4588// Try get a batch of G's from the global runnable queue.
4589// Sched must be locked.
4590func globrunqget(_p_ *p, max int32) *g {
4591	if sched.runqsize == 0 {
4592		return nil
4593	}
4594
4595	n := sched.runqsize/gomaxprocs + 1
4596	if n > sched.runqsize {
4597		n = sched.runqsize
4598	}
4599	if max > 0 && n > max {
4600		n = max
4601	}
4602	if n > int32(len(_p_.runq))/2 {
4603		n = int32(len(_p_.runq)) / 2
4604	}
4605
4606	sched.runqsize -= n
4607
4608	gp := sched.runq.pop()
4609	n--
4610	for ; n > 0; n-- {
4611		gp1 := sched.runq.pop()
4612		runqput(_p_, gp1, false)
4613	}
4614	return gp
4615}
4616
4617// Put p to on _Pidle list.
4618// Sched must be locked.
4619// May run during STW, so write barriers are not allowed.
4620//go:nowritebarrierrec
4621func pidleput(_p_ *p) {
4622	if !runqempty(_p_) {
4623		throw("pidleput: P has non-empty run queue")
4624	}
4625	_p_.link = sched.pidle
4626	sched.pidle.set(_p_)
4627	atomic.Xadd(&sched.npidle, 1) // TODO: fast atomic
4628}
4629
4630// Try get a p from _Pidle list.
4631// Sched must be locked.
4632// May run during STW, so write barriers are not allowed.
4633//go:nowritebarrierrec
4634func pidleget() *p {
4635	_p_ := sched.pidle.ptr()
4636	if _p_ != nil {
4637		sched.pidle = _p_.link
4638		atomic.Xadd(&sched.npidle, -1) // TODO: fast atomic
4639	}
4640	return _p_
4641}
4642
4643// runqempty reports whether _p_ has no Gs on its local run queue.
4644// It never returns true spuriously.
4645func runqempty(_p_ *p) bool {
4646	// Defend against a race where 1) _p_ has G1 in runqnext but runqhead == runqtail,
4647	// 2) runqput on _p_ kicks G1 to the runq, 3) runqget on _p_ empties runqnext.
4648	// Simply observing that runqhead == runqtail and then observing that runqnext == nil
4649	// does not mean the queue is empty.
4650	for {
4651		head := atomic.Load(&_p_.runqhead)
4652		tail := atomic.Load(&_p_.runqtail)
4653		runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&_p_.runnext)))
4654		if tail == atomic.Load(&_p_.runqtail) {
4655			return head == tail && runnext == 0
4656		}
4657	}
4658}
4659
4660// To shake out latent assumptions about scheduling order,
4661// we introduce some randomness into scheduling decisions
4662// when running with the race detector.
4663// The need for this was made obvious by changing the
4664// (deterministic) scheduling order in Go 1.5 and breaking
4665// many poorly-written tests.
4666// With the randomness here, as long as the tests pass
4667// consistently with -race, they shouldn't have latent scheduling
4668// assumptions.
4669const randomizeScheduler = raceenabled
4670
4671// runqput tries to put g on the local runnable queue.
4672// If next is false, runqput adds g to the tail of the runnable queue.
4673// If next is true, runqput puts g in the _p_.runnext slot.
4674// If the run queue is full, runnext puts g on the global queue.
4675// Executed only by the owner P.
4676func runqput(_p_ *p, gp *g, next bool) {
4677	if randomizeScheduler && next && fastrand()%2 == 0 {
4678		next = false
4679	}
4680
4681	if next {
4682	retryNext:
4683		oldnext := _p_.runnext
4684		if !_p_.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) {
4685			goto retryNext
4686		}
4687		if oldnext == 0 {
4688			return
4689		}
4690		// Kick the old runnext out to the regular run queue.
4691		gp = oldnext.ptr()
4692	}
4693
4694retry:
4695	h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers
4696	t := _p_.runqtail
4697	if t-h < uint32(len(_p_.runq)) {
4698		_p_.runq[t%uint32(len(_p_.runq))].set(gp)
4699		atomic.StoreRel(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
4700		return
4701	}
4702	if runqputslow(_p_, gp, h, t) {
4703		return
4704	}
4705	// the queue is not full, now the put above must succeed
4706	goto retry
4707}
4708
4709// Put g and a batch of work from local runnable queue on global queue.
4710// Executed only by the owner P.
4711func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
4712	var batch [len(_p_.runq)/2 + 1]*g
4713
4714	// First, grab a batch from local queue.
4715	n := t - h
4716	n = n / 2
4717	if n != uint32(len(_p_.runq)/2) {
4718		throw("runqputslow: queue is not full")
4719	}
4720	for i := uint32(0); i < n; i++ {
4721		batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))].ptr()
4722	}
4723	if !atomic.CasRel(&_p_.runqhead, h, h+n) { // cas-release, commits consume
4724		return false
4725	}
4726	batch[n] = gp
4727
4728	if randomizeScheduler {
4729		for i := uint32(1); i <= n; i++ {
4730			j := fastrandn(i + 1)
4731			batch[i], batch[j] = batch[j], batch[i]
4732		}
4733	}
4734
4735	// Link the goroutines.
4736	for i := uint32(0); i < n; i++ {
4737		batch[i].schedlink.set(batch[i+1])
4738	}
4739	var q gQueue
4740	q.head.set(batch[0])
4741	q.tail.set(batch[n])
4742
4743	// Now put the batch on global queue.
4744	lock(&sched.lock)
4745	globrunqputbatch(&q, int32(n+1))
4746	unlock(&sched.lock)
4747	return true
4748}
4749
4750// Get g from local runnable queue.
4751// If inheritTime is true, gp should inherit the remaining time in the
4752// current time slice. Otherwise, it should start a new time slice.
4753// Executed only by the owner P.
4754func runqget(_p_ *p) (gp *g, inheritTime bool) {
4755	// If there's a runnext, it's the next G to run.
4756	for {
4757		next := _p_.runnext
4758		if next == 0 {
4759			break
4760		}
4761		if _p_.runnext.cas(next, 0) {
4762			return next.ptr(), true
4763		}
4764	}
4765
4766	for {
4767		h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers
4768		t := _p_.runqtail
4769		if t == h {
4770			return nil, false
4771		}
4772		gp := _p_.runq[h%uint32(len(_p_.runq))].ptr()
4773		if atomic.CasRel(&_p_.runqhead, h, h+1) { // cas-release, commits consume
4774			return gp, false
4775		}
4776	}
4777}
4778
4779// Grabs a batch of goroutines from _p_'s runnable queue into batch.
4780// Batch is a ring buffer starting at batchHead.
4781// Returns number of grabbed goroutines.
4782// Can be executed by any P.
4783func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 {
4784	for {
4785		h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers
4786		t := atomic.LoadAcq(&_p_.runqtail) // load-acquire, synchronize with the producer
4787		n := t - h
4788		n = n - n/2
4789		if n == 0 {
4790			if stealRunNextG {
4791				// Try to steal from _p_.runnext.
4792				if next := _p_.runnext; next != 0 {
4793					if _p_.status == _Prunning {
4794						// Sleep to ensure that _p_ isn't about to run the g
4795						// we are about to steal.
4796						// The important use case here is when the g running
4797						// on _p_ ready()s another g and then almost
4798						// immediately blocks. Instead of stealing runnext
4799						// in this window, back off to give _p_ a chance to
4800						// schedule runnext. This will avoid thrashing gs
4801						// between different Ps.
4802						// A sync chan send/recv takes ~50ns as of time of
4803						// writing, so 3us gives ~50x overshoot.
4804						if GOOS != "windows" {
4805							usleep(3)
4806						} else {
4807							// On windows system timer granularity is
4808							// 1-15ms, which is way too much for this
4809							// optimization. So just yield.
4810							osyield()
4811						}
4812					}
4813					if !_p_.runnext.cas(next, 0) {
4814						continue
4815					}
4816					batch[batchHead%uint32(len(batch))] = next
4817					return 1
4818				}
4819			}
4820			return 0
4821		}
4822		if n > uint32(len(_p_.runq)/2) { // read inconsistent h and t
4823			continue
4824		}
4825		for i := uint32(0); i < n; i++ {
4826			g := _p_.runq[(h+i)%uint32(len(_p_.runq))]
4827			batch[(batchHead+i)%uint32(len(batch))] = g
4828		}
4829		if atomic.CasRel(&_p_.runqhead, h, h+n) { // cas-release, commits consume
4830			return n
4831		}
4832	}
4833}
4834
4835// Steal half of elements from local runnable queue of p2
4836// and put onto local runnable queue of p.
4837// Returns one of the stolen elements (or nil if failed).
4838func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
4839	t := _p_.runqtail
4840	n := runqgrab(p2, &_p_.runq, t, stealRunNextG)
4841	if n == 0 {
4842		return nil
4843	}
4844	n--
4845	gp := _p_.runq[(t+n)%uint32(len(_p_.runq))].ptr()
4846	if n == 0 {
4847		return gp
4848	}
4849	h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers
4850	if t-h+n >= uint32(len(_p_.runq)) {
4851		throw("runqsteal: runq overflow")
4852	}
4853	atomic.StoreRel(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
4854	return gp
4855}
4856
4857// A gQueue is a dequeue of Gs linked through g.schedlink. A G can only
4858// be on one gQueue or gList at a time.
4859type gQueue struct {
4860	head guintptr
4861	tail guintptr
4862}
4863
4864// empty reports whether q is empty.
4865func (q *gQueue) empty() bool {
4866	return q.head == 0
4867}
4868
4869// push adds gp to the head of q.
4870func (q *gQueue) push(gp *g) {
4871	gp.schedlink = q.head
4872	q.head.set(gp)
4873	if q.tail == 0 {
4874		q.tail.set(gp)
4875	}
4876}
4877
4878// pushBack adds gp to the tail of q.
4879func (q *gQueue) pushBack(gp *g) {
4880	gp.schedlink = 0
4881	if q.tail != 0 {
4882		q.tail.ptr().schedlink.set(gp)
4883	} else {
4884		q.head.set(gp)
4885	}
4886	q.tail.set(gp)
4887}
4888
4889// pushBackAll adds all Gs in l2 to the tail of q. After this q2 must
4890// not be used.
4891func (q *gQueue) pushBackAll(q2 gQueue) {
4892	if q2.tail == 0 {
4893		return
4894	}
4895	q2.tail.ptr().schedlink = 0
4896	if q.tail != 0 {
4897		q.tail.ptr().schedlink = q2.head
4898	} else {
4899		q.head = q2.head
4900	}
4901	q.tail = q2.tail
4902}
4903
4904// pop removes and returns the head of queue q. It returns nil if
4905// q is empty.
4906func (q *gQueue) pop() *g {
4907	gp := q.head.ptr()
4908	if gp != nil {
4909		q.head = gp.schedlink
4910		if q.head == 0 {
4911			q.tail = 0
4912		}
4913	}
4914	return gp
4915}
4916
4917// popList takes all Gs in q and returns them as a gList.
4918func (q *gQueue) popList() gList {
4919	stack := gList{q.head}
4920	*q = gQueue{}
4921	return stack
4922}
4923
4924// A gList is a list of Gs linked through g.schedlink. A G can only be
4925// on one gQueue or gList at a time.
4926type gList struct {
4927	head guintptr
4928}
4929
4930// empty reports whether l is empty.
4931func (l *gList) empty() bool {
4932	return l.head == 0
4933}
4934
4935// push adds gp to the head of l.
4936func (l *gList) push(gp *g) {
4937	gp.schedlink = l.head
4938	l.head.set(gp)
4939}
4940
4941// pushAll prepends all Gs in q to l.
4942func (l *gList) pushAll(q gQueue) {
4943	if !q.empty() {
4944		q.tail.ptr().schedlink = l.head
4945		l.head = q.head
4946	}
4947}
4948
4949// pop removes and returns the head of l. If l is empty, it returns nil.
4950func (l *gList) pop() *g {
4951	gp := l.head.ptr()
4952	if gp != nil {
4953		l.head = gp.schedlink
4954	}
4955	return gp
4956}
4957
4958//go:linkname setMaxThreads runtime..z2fdebug.setMaxThreads
4959func setMaxThreads(in int) (out int) {
4960	lock(&sched.lock)
4961	out = int(sched.maxmcount)
4962	if in > 0x7fffffff { // MaxInt32
4963		sched.maxmcount = 0x7fffffff
4964	} else {
4965		sched.maxmcount = int32(in)
4966	}
4967	checkmcount()
4968	unlock(&sched.lock)
4969	return
4970}
4971
4972func haveexperiment(name string) bool {
4973	// The gofrontend does not support experiments.
4974	return false
4975}
4976
4977//go:nosplit
4978func procPin() int {
4979	_g_ := getg()
4980	mp := _g_.m
4981
4982	mp.locks++
4983	return int(mp.p.ptr().id)
4984}
4985
4986//go:nosplit
4987func procUnpin() {
4988	_g_ := getg()
4989	_g_.m.locks--
4990}
4991
4992//go:linkname sync_runtime_procPin sync.runtime_procPin
4993//go:nosplit
4994func sync_runtime_procPin() int {
4995	return procPin()
4996}
4997
4998//go:linkname sync_runtime_procUnpin sync.runtime_procUnpin
4999//go:nosplit
5000func sync_runtime_procUnpin() {
5001	procUnpin()
5002}
5003
5004//go:linkname sync_atomic_runtime_procPin sync..z2fatomic.runtime_procPin
5005//go:nosplit
5006func sync_atomic_runtime_procPin() int {
5007	return procPin()
5008}
5009
5010//go:linkname sync_atomic_runtime_procUnpin sync..z2fatomic.runtime_procUnpin
5011//go:nosplit
5012func sync_atomic_runtime_procUnpin() {
5013	procUnpin()
5014}
5015
5016// Active spinning for sync.Mutex.
5017//go:linkname sync_runtime_canSpin sync.runtime_canSpin
5018//go:nosplit
5019func sync_runtime_canSpin(i int) bool {
5020	// sync.Mutex is cooperative, so we are conservative with spinning.
5021	// Spin only few times and only if running on a multicore machine and
5022	// GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
5023	// As opposed to runtime mutex we don't do passive spinning here,
5024	// because there can be work on global runq or on other Ps.
5025	if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 {
5026		return false
5027	}
5028	if p := getg().m.p.ptr(); !runqempty(p) {
5029		return false
5030	}
5031	return true
5032}
5033
5034//go:linkname sync_runtime_doSpin sync.runtime_doSpin
5035//go:nosplit
5036func sync_runtime_doSpin() {
5037	procyield(active_spin_cnt)
5038}
5039
5040var stealOrder randomOrder
5041
5042// randomOrder/randomEnum are helper types for randomized work stealing.
5043// They allow to enumerate all Ps in different pseudo-random orders without repetitions.
5044// The algorithm is based on the fact that if we have X such that X and GOMAXPROCS
5045// are coprime, then a sequences of (i + X) % GOMAXPROCS gives the required enumeration.
5046type randomOrder struct {
5047	count    uint32
5048	coprimes []uint32
5049}
5050
5051type randomEnum struct {
5052	i     uint32
5053	count uint32
5054	pos   uint32
5055	inc   uint32
5056}
5057
5058func (ord *randomOrder) reset(count uint32) {
5059	ord.count = count
5060	ord.coprimes = ord.coprimes[:0]
5061	for i := uint32(1); i <= count; i++ {
5062		if gcd(i, count) == 1 {
5063			ord.coprimes = append(ord.coprimes, i)
5064		}
5065	}
5066}
5067
5068func (ord *randomOrder) start(i uint32) randomEnum {
5069	return randomEnum{
5070		count: ord.count,
5071		pos:   i % ord.count,
5072		inc:   ord.coprimes[i%uint32(len(ord.coprimes))],
5073	}
5074}
5075
5076func (enum *randomEnum) done() bool {
5077	return enum.i == enum.count
5078}
5079
5080func (enum *randomEnum) next() {
5081	enum.i++
5082	enum.pos = (enum.pos + enum.inc) % enum.count
5083}
5084
5085func (enum *randomEnum) position() uint32 {
5086	return enum.pos
5087}
5088
5089func gcd(a, b uint32) uint32 {
5090	for b != 0 {
5091		a, b = b, a%b
5092	}
5093	return a
5094}
5095