1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Page heap.
6//
7// See malloc.go for overview.
8
9package runtime
10
11import (
12	"internal/cpu"
13	"runtime/internal/atomic"
14	"runtime/internal/sys"
15	"unsafe"
16)
17
18const (
19	// minPhysPageSize is a lower-bound on the physical page size. The
20	// true physical page size may be larger than this. In contrast,
21	// sys.PhysPageSize is an upper-bound on the physical page size.
22	minPhysPageSize = 4096
23
24	// maxPhysPageSize is the maximum page size the runtime supports.
25	maxPhysPageSize = 512 << 10
26
27	// maxPhysHugePageSize sets an upper-bound on the maximum huge page size
28	// that the runtime supports.
29	maxPhysHugePageSize = pallocChunkBytes
30)
31
32// Main malloc heap.
33// The heap itself is the "free" and "scav" treaps,
34// but all the other global data is here too.
35//
36// mheap must not be heap-allocated because it contains mSpanLists,
37// which must not be heap-allocated.
38//
39//go:notinheap
40type mheap struct {
41	// lock must only be acquired on the system stack, otherwise a g
42	// could self-deadlock if its stack grows with the lock held.
43	lock      mutex
44	pages     pageAlloc // page allocation data structure
45	sweepgen  uint32    // sweep generation, see comment in mspan; written during STW
46	sweepdone uint32    // all spans are swept
47	sweepers  uint32    // number of active sweepone calls
48
49	// allspans is a slice of all mspans ever created. Each mspan
50	// appears exactly once.
51	//
52	// The memory for allspans is manually managed and can be
53	// reallocated and move as the heap grows.
54	//
55	// In general, allspans is protected by mheap_.lock, which
56	// prevents concurrent access as well as freeing the backing
57	// store. Accesses during STW might not hold the lock, but
58	// must ensure that allocation cannot happen around the
59	// access (since that may free the backing store).
60	allspans []*mspan // all spans out there
61
62	// sweepSpans contains two mspan stacks: one of swept in-use
63	// spans, and one of unswept in-use spans. These two trade
64	// roles on each GC cycle. Since the sweepgen increases by 2
65	// on each cycle, this means the swept spans are in
66	// sweepSpans[sweepgen/2%2] and the unswept spans are in
67	// sweepSpans[1-sweepgen/2%2]. Sweeping pops spans from the
68	// unswept stack and pushes spans that are still in-use on the
69	// swept stack. Likewise, allocating an in-use span pushes it
70	// on the swept stack.
71	sweepSpans [2]gcSweepBuf
72
73	// _ uint32 // align uint64 fields on 32-bit for atomics
74
75	// Proportional sweep
76	//
77	// These parameters represent a linear function from heap_live
78	// to page sweep count. The proportional sweep system works to
79	// stay in the black by keeping the current page sweep count
80	// above this line at the current heap_live.
81	//
82	// The line has slope sweepPagesPerByte and passes through a
83	// basis point at (sweepHeapLiveBasis, pagesSweptBasis). At
84	// any given time, the system is at (memstats.heap_live,
85	// pagesSwept) in this space.
86	//
87	// It's important that the line pass through a point we
88	// control rather than simply starting at a (0,0) origin
89	// because that lets us adjust sweep pacing at any time while
90	// accounting for current progress. If we could only adjust
91	// the slope, it would create a discontinuity in debt if any
92	// progress has already been made.
93	pagesInUse         uint64  // pages of spans in stats mSpanInUse; updated atomically
94	pagesSwept         uint64  // pages swept this cycle; updated atomically
95	pagesSweptBasis    uint64  // pagesSwept to use as the origin of the sweep ratio; updated atomically
96	sweepHeapLiveBasis uint64  // value of heap_live to use as the origin of sweep ratio; written with lock, read without
97	sweepPagesPerByte  float64 // proportional sweep ratio; written with lock, read without
98	// TODO(austin): pagesInUse should be a uintptr, but the 386
99	// compiler can't 8-byte align fields.
100
101	// scavengeGoal is the amount of total retained heap memory (measured by
102	// heapRetained) that the runtime will try to maintain by returning memory
103	// to the OS.
104	scavengeGoal uint64
105
106	// Page reclaimer state
107
108	// reclaimIndex is the page index in allArenas of next page to
109	// reclaim. Specifically, it refers to page (i %
110	// pagesPerArena) of arena allArenas[i / pagesPerArena].
111	//
112	// If this is >= 1<<63, the page reclaimer is done scanning
113	// the page marks.
114	//
115	// This is accessed atomically.
116	reclaimIndex uint64
117	// reclaimCredit is spare credit for extra pages swept. Since
118	// the page reclaimer works in large chunks, it may reclaim
119	// more than requested. Any spare pages released go to this
120	// credit pool.
121	//
122	// This is accessed atomically.
123	reclaimCredit uintptr
124
125	// Malloc stats.
126	largealloc  uint64                  // bytes allocated for large objects
127	nlargealloc uint64                  // number of large object allocations
128	largefree   uint64                  // bytes freed for large objects (>maxsmallsize)
129	nlargefree  uint64                  // number of frees for large objects (>maxsmallsize)
130	nsmallfree  [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
131
132	// arenas is the heap arena map. It points to the metadata for
133	// the heap for every arena frame of the entire usable virtual
134	// address space.
135	//
136	// Use arenaIndex to compute indexes into this array.
137	//
138	// For regions of the address space that are not backed by the
139	// Go heap, the arena map contains nil.
140	//
141	// Modifications are protected by mheap_.lock. Reads can be
142	// performed without locking; however, a given entry can
143	// transition from nil to non-nil at any time when the lock
144	// isn't held. (Entries never transitions back to nil.)
145	//
146	// In general, this is a two-level mapping consisting of an L1
147	// map and possibly many L2 maps. This saves space when there
148	// are a huge number of arena frames. However, on many
149	// platforms (even 64-bit), arenaL1Bits is 0, making this
150	// effectively a single-level map. In this case, arenas[0]
151	// will never be nil.
152	arenas [1 << arenaL1Bits]*[1 << arenaL2Bits]*heapArena
153
154	// heapArenaAlloc is pre-reserved space for allocating heapArena
155	// objects. This is only used on 32-bit, where we pre-reserve
156	// this space to avoid interleaving it with the heap itself.
157	heapArenaAlloc linearAlloc
158
159	// arenaHints is a list of addresses at which to attempt to
160	// add more heap arenas. This is initially populated with a
161	// set of general hint addresses, and grown with the bounds of
162	// actual heap arena ranges.
163	arenaHints *arenaHint
164
165	// arena is a pre-reserved space for allocating heap arenas
166	// (the actual arenas). This is only used on 32-bit.
167	arena linearAlloc
168
169	// allArenas is the arenaIndex of every mapped arena. This can
170	// be used to iterate through the address space.
171	//
172	// Access is protected by mheap_.lock. However, since this is
173	// append-only and old backing arrays are never freed, it is
174	// safe to acquire mheap_.lock, copy the slice header, and
175	// then release mheap_.lock.
176	allArenas []arenaIdx
177
178	// sweepArenas is a snapshot of allArenas taken at the
179	// beginning of the sweep cycle. This can be read safely by
180	// simply blocking GC (by disabling preemption).
181	sweepArenas []arenaIdx
182
183	// curArena is the arena that the heap is currently growing
184	// into. This should always be physPageSize-aligned.
185	curArena struct {
186		base, end uintptr
187	}
188
189	_ uint32 // ensure 64-bit alignment of central
190
191	// central free lists for small size classes.
192	// the padding makes sure that the mcentrals are
193	// spaced CacheLinePadSize bytes apart, so that each mcentral.lock
194	// gets its own cache line.
195	// central is indexed by spanClass.
196	central [numSpanClasses]struct {
197		mcentral mcentral
198		pad      [cpu.CacheLinePadSize - unsafe.Sizeof(mcentral{})%cpu.CacheLinePadSize]byte
199	}
200
201	spanalloc             fixalloc // allocator for span*
202	cachealloc            fixalloc // allocator for mcache*
203	specialfinalizeralloc fixalloc // allocator for specialfinalizer*
204	specialprofilealloc   fixalloc // allocator for specialprofile*
205	speciallock           mutex    // lock for special record allocators.
206	arenaHintAlloc        fixalloc // allocator for arenaHints
207
208	unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
209}
210
211var mheap_ mheap
212
213// A heapArena stores metadata for a heap arena. heapArenas are stored
214// outside of the Go heap and accessed via the mheap_.arenas index.
215//
216//go:notinheap
217type heapArena struct {
218	// bitmap stores the pointer/scalar bitmap for the words in
219	// this arena. See mbitmap.go for a description. Use the
220	// heapBits type to access this.
221	bitmap [heapArenaBitmapBytes]byte
222
223	// spans maps from virtual address page ID within this arena to *mspan.
224	// For allocated spans, their pages map to the span itself.
225	// For free spans, only the lowest and highest pages map to the span itself.
226	// Internal pages map to an arbitrary span.
227	// For pages that have never been allocated, spans entries are nil.
228	//
229	// Modifications are protected by mheap.lock. Reads can be
230	// performed without locking, but ONLY from indexes that are
231	// known to contain in-use or stack spans. This means there
232	// must not be a safe-point between establishing that an
233	// address is live and looking it up in the spans array.
234	spans [pagesPerArena]*mspan
235
236	// pageInUse is a bitmap that indicates which spans are in
237	// state mSpanInUse. This bitmap is indexed by page number,
238	// but only the bit corresponding to the first page in each
239	// span is used.
240	//
241	// Reads and writes are atomic.
242	pageInUse [pagesPerArena / 8]uint8
243
244	// pageMarks is a bitmap that indicates which spans have any
245	// marked objects on them. Like pageInUse, only the bit
246	// corresponding to the first page in each span is used.
247	//
248	// Writes are done atomically during marking. Reads are
249	// non-atomic and lock-free since they only occur during
250	// sweeping (and hence never race with writes).
251	//
252	// This is used to quickly find whole spans that can be freed.
253	//
254	// TODO(austin): It would be nice if this was uint64 for
255	// faster scanning, but we don't have 64-bit atomic bit
256	// operations.
257	pageMarks [pagesPerArena / 8]uint8
258
259	// zeroedBase marks the first byte of the first page in this
260	// arena which hasn't been used yet and is therefore already
261	// zero. zeroedBase is relative to the arena base.
262	// Increases monotonically until it hits heapArenaBytes.
263	//
264	// This field is sufficient to determine if an allocation
265	// needs to be zeroed because the page allocator follows an
266	// address-ordered first-fit policy.
267	//
268	// Read atomically and written with an atomic CAS.
269	zeroedBase uintptr
270}
271
272// arenaHint is a hint for where to grow the heap arenas. See
273// mheap_.arenaHints.
274//
275//go:notinheap
276type arenaHint struct {
277	addr uintptr
278	down bool
279	next *arenaHint
280}
281
282// An mspan is a run of pages.
283//
284// When a mspan is in the heap free treap, state == mSpanFree
285// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
286// If the mspan is in the heap scav treap, then in addition to the
287// above scavenged == true. scavenged == false in all other cases.
288//
289// When a mspan is allocated, state == mSpanInUse or mSpanManual
290// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
291
292// Every mspan is in one doubly-linked list, either in the mheap's
293// busy list or one of the mcentral's span lists.
294
295// An mspan representing actual memory has state mSpanInUse,
296// mSpanManual, or mSpanFree. Transitions between these states are
297// constrained as follows:
298//
299// * A span may transition from free to in-use or manual during any GC
300//   phase.
301//
302// * During sweeping (gcphase == _GCoff), a span may transition from
303//   in-use to free (as a result of sweeping) or manual to free (as a
304//   result of stacks being freed).
305//
306// * During GC (gcphase != _GCoff), a span *must not* transition from
307//   manual or in-use to free. Because concurrent GC may read a pointer
308//   and then look up its span, the span state must be monotonic.
309//
310// Setting mspan.state to mSpanInUse or mSpanManual must be done
311// atomically and only after all other span fields are valid.
312// Likewise, if inspecting a span is contingent on it being
313// mSpanInUse, the state should be loaded atomically and checked
314// before depending on other fields. This allows the garbage collector
315// to safely deal with potentially invalid pointers, since resolving
316// such pointers may race with a span being allocated.
317type mSpanState uint8
318
319const (
320	mSpanDead   mSpanState = iota
321	mSpanInUse             // allocated for garbage collected heap
322	mSpanManual            // allocated for manual management (e.g., stack allocator)
323)
324
325// mSpanStateNames are the names of the span states, indexed by
326// mSpanState.
327var mSpanStateNames = []string{
328	"mSpanDead",
329	"mSpanInUse",
330	"mSpanManual",
331	"mSpanFree",
332}
333
334// mSpanStateBox holds an mSpanState and provides atomic operations on
335// it. This is a separate type to disallow accidental comparison or
336// assignment with mSpanState.
337type mSpanStateBox struct {
338	s mSpanState
339}
340
341func (b *mSpanStateBox) set(s mSpanState) {
342	atomic.Store8((*uint8)(&b.s), uint8(s))
343}
344
345func (b *mSpanStateBox) get() mSpanState {
346	return mSpanState(atomic.Load8((*uint8)(&b.s)))
347}
348
349// mSpanList heads a linked list of spans.
350//
351//go:notinheap
352type mSpanList struct {
353	first *mspan // first span in list, or nil if none
354	last  *mspan // last span in list, or nil if none
355}
356
357//go:notinheap
358type mspan struct {
359	next *mspan     // next span in list, or nil if none
360	prev *mspan     // previous span in list, or nil if none
361	list *mSpanList // For debugging. TODO: Remove.
362
363	startAddr uintptr // address of first byte of span aka s.base()
364	npages    uintptr // number of pages in span
365
366	manualFreeList gclinkptr // list of free objects in mSpanManual spans
367
368	// freeindex is the slot index between 0 and nelems at which to begin scanning
369	// for the next free object in this span.
370	// Each allocation scans allocBits starting at freeindex until it encounters a 0
371	// indicating a free object. freeindex is then adjusted so that subsequent scans begin
372	// just past the newly discovered free object.
373	//
374	// If freeindex == nelem, this span has no free objects.
375	//
376	// allocBits is a bitmap of objects in this span.
377	// If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0
378	// then object n is free;
379	// otherwise, object n is allocated. Bits starting at nelem are
380	// undefined and should never be referenced.
381	//
382	// Object n starts at address n*elemsize + (start << pageShift).
383	freeindex uintptr
384	// TODO: Look up nelems from sizeclass and remove this field if it
385	// helps performance.
386	nelems uintptr // number of object in the span.
387
388	// Cache of the allocBits at freeindex. allocCache is shifted
389	// such that the lowest bit corresponds to the bit freeindex.
390	// allocCache holds the complement of allocBits, thus allowing
391	// ctz (count trailing zero) to use it directly.
392	// allocCache may contain bits beyond s.nelems; the caller must ignore
393	// these.
394	allocCache uint64
395
396	// allocBits and gcmarkBits hold pointers to a span's mark and
397	// allocation bits. The pointers are 8 byte aligned.
398	// There are three arenas where this data is held.
399	// free: Dirty arenas that are no longer accessed
400	//       and can be reused.
401	// next: Holds information to be used in the next GC cycle.
402	// current: Information being used during this GC cycle.
403	// previous: Information being used during the last GC cycle.
404	// A new GC cycle starts with the call to finishsweep_m.
405	// finishsweep_m moves the previous arena to the free arena,
406	// the current arena to the previous arena, and
407	// the next arena to the current arena.
408	// The next arena is populated as the spans request
409	// memory to hold gcmarkBits for the next GC cycle as well
410	// as allocBits for newly allocated spans.
411	//
412	// The pointer arithmetic is done "by hand" instead of using
413	// arrays to avoid bounds checks along critical performance
414	// paths.
415	// The sweep will free the old allocBits and set allocBits to the
416	// gcmarkBits. The gcmarkBits are replaced with a fresh zeroed
417	// out memory.
418	allocBits  *gcBits
419	gcmarkBits *gcBits
420
421	// sweep generation:
422	// if sweepgen == h->sweepgen - 2, the span needs sweeping
423	// if sweepgen == h->sweepgen - 1, the span is currently being swept
424	// if sweepgen == h->sweepgen, the span is swept and ready to use
425	// if sweepgen == h->sweepgen + 1, the span was cached before sweep began and is still cached, and needs sweeping
426	// if sweepgen == h->sweepgen + 3, the span was swept and then cached and is still cached
427	// h->sweepgen is incremented by 2 after every GC
428
429	sweepgen    uint32
430	divMul      uint16        // for divide by elemsize - divMagic.mul
431	baseMask    uint16        // if non-0, elemsize is a power of 2, & this will get object allocation base
432	allocCount  uint16        // number of allocated objects
433	spanclass   spanClass     // size class and noscan (uint8)
434	state       mSpanStateBox // mSpanInUse etc; accessed atomically (get/set methods)
435	needzero    uint8         // needs to be zeroed before allocation
436	divShift    uint8         // for divide by elemsize - divMagic.shift
437	divShift2   uint8         // for divide by elemsize - divMagic.shift2
438	elemsize    uintptr       // computed from sizeclass or from npages
439	limit       uintptr       // end of data in span
440	speciallock mutex         // guards specials list
441	specials    *special      // linked list of special records sorted by offset.
442}
443
444func (s *mspan) base() uintptr {
445	return s.startAddr
446}
447
448func (s *mspan) layout() (size, n, total uintptr) {
449	total = s.npages << _PageShift
450	size = s.elemsize
451	if size > 0 {
452		n = total / size
453	}
454	return
455}
456
457// recordspan adds a newly allocated span to h.allspans.
458//
459// This only happens the first time a span is allocated from
460// mheap.spanalloc (it is not called when a span is reused).
461//
462// Write barriers are disallowed here because it can be called from
463// gcWork when allocating new workbufs. However, because it's an
464// indirect call from the fixalloc initializer, the compiler can't see
465// this.
466//
467//go:nowritebarrierrec
468func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
469	h := (*mheap)(vh)
470	s := (*mspan)(p)
471	if len(h.allspans) >= cap(h.allspans) {
472		n := 64 * 1024 / sys.PtrSize
473		if n < cap(h.allspans)*3/2 {
474			n = cap(h.allspans) * 3 / 2
475		}
476		var new []*mspan
477		sp := (*slice)(unsafe.Pointer(&new))
478		sp.array = sysAlloc(uintptr(n)*sys.PtrSize, &memstats.other_sys)
479		if sp.array == nil {
480			throw("runtime: cannot allocate memory")
481		}
482		sp.len = len(h.allspans)
483		sp.cap = n
484		if len(h.allspans) > 0 {
485			copy(new, h.allspans)
486		}
487		oldAllspans := h.allspans
488		*(*notInHeapSlice)(unsafe.Pointer(&h.allspans)) = *(*notInHeapSlice)(unsafe.Pointer(&new))
489		if len(oldAllspans) != 0 {
490			sysFree(unsafe.Pointer(&oldAllspans[0]), uintptr(cap(oldAllspans))*unsafe.Sizeof(oldAllspans[0]), &memstats.other_sys)
491		}
492	}
493	h.allspans = h.allspans[:len(h.allspans)+1]
494	h.allspans[len(h.allspans)-1] = s
495}
496
497// A spanClass represents the size class and noscan-ness of a span.
498//
499// Each size class has a noscan spanClass and a scan spanClass. The
500// noscan spanClass contains only noscan objects, which do not contain
501// pointers and thus do not need to be scanned by the garbage
502// collector.
503type spanClass uint8
504
505const (
506	numSpanClasses = _NumSizeClasses << 1
507	tinySpanClass  = spanClass(tinySizeClass<<1 | 1)
508)
509
510func makeSpanClass(sizeclass uint8, noscan bool) spanClass {
511	return spanClass(sizeclass<<1) | spanClass(bool2int(noscan))
512}
513
514func (sc spanClass) sizeclass() int8 {
515	return int8(sc >> 1)
516}
517
518func (sc spanClass) noscan() bool {
519	return sc&1 != 0
520}
521
522// arenaIndex returns the index into mheap_.arenas of the arena
523// containing metadata for p. This index combines of an index into the
524// L1 map and an index into the L2 map and should be used as
525// mheap_.arenas[ai.l1()][ai.l2()].
526//
527// If p is outside the range of valid heap addresses, either l1() or
528// l2() will be out of bounds.
529//
530// It is nosplit because it's called by spanOf and several other
531// nosplit functions.
532//
533//go:nosplit
534func arenaIndex(p uintptr) arenaIdx {
535	return arenaIdx((p + arenaBaseOffset) / heapArenaBytes)
536}
537
538// arenaBase returns the low address of the region covered by heap
539// arena i.
540func arenaBase(i arenaIdx) uintptr {
541	return uintptr(i)*heapArenaBytes - arenaBaseOffset
542}
543
544type arenaIdx uint
545
546func (i arenaIdx) l1() uint {
547	if arenaL1Bits == 0 {
548		// Let the compiler optimize this away if there's no
549		// L1 map.
550		return 0
551	} else {
552		return uint(i) >> arenaL1Shift
553	}
554}
555
556func (i arenaIdx) l2() uint {
557	if arenaL1Bits == 0 {
558		return uint(i)
559	} else {
560		return uint(i) & (1<<arenaL2Bits - 1)
561	}
562}
563
564// inheap reports whether b is a pointer into a (potentially dead) heap object.
565// It returns false for pointers into mSpanManual spans.
566// Non-preemptible because it is used by write barriers.
567//go:nowritebarrier
568//go:nosplit
569func inheap(b uintptr) bool {
570	return spanOfHeap(b) != nil
571}
572
573// inHeapOrStack is a variant of inheap that returns true for pointers
574// into any allocated heap span.
575//
576//go:nowritebarrier
577//go:nosplit
578func inHeapOrStack(b uintptr) bool {
579	s := spanOf(b)
580	if s == nil || b < s.base() {
581		return false
582	}
583	switch s.state.get() {
584	case mSpanInUse, mSpanManual:
585		return b < s.limit
586	default:
587		return false
588	}
589}
590
591// spanOf returns the span of p. If p does not point into the heap
592// arena or no span has ever contained p, spanOf returns nil.
593//
594// If p does not point to allocated memory, this may return a non-nil
595// span that does *not* contain p. If this is a possibility, the
596// caller should either call spanOfHeap or check the span bounds
597// explicitly.
598//
599// Must be nosplit because it has callers that are nosplit.
600//
601//go:nosplit
602func spanOf(p uintptr) *mspan {
603	// This function looks big, but we use a lot of constant
604	// folding around arenaL1Bits to get it under the inlining
605	// budget. Also, many of the checks here are safety checks
606	// that Go needs to do anyway, so the generated code is quite
607	// short.
608	ri := arenaIndex(p)
609	if arenaL1Bits == 0 {
610		// If there's no L1, then ri.l1() can't be out of bounds but ri.l2() can.
611		if ri.l2() >= uint(len(mheap_.arenas[0])) {
612			return nil
613		}
614	} else {
615		// If there's an L1, then ri.l1() can be out of bounds but ri.l2() can't.
616		if ri.l1() >= uint(len(mheap_.arenas)) {
617			return nil
618		}
619	}
620	l2 := mheap_.arenas[ri.l1()]
621	if arenaL1Bits != 0 && l2 == nil { // Should never happen if there's no L1.
622		return nil
623	}
624	ha := l2[ri.l2()]
625	if ha == nil {
626		return nil
627	}
628	return ha.spans[(p/pageSize)%pagesPerArena]
629}
630
631// spanOfUnchecked is equivalent to spanOf, but the caller must ensure
632// that p points into an allocated heap arena.
633//
634// Must be nosplit because it has callers that are nosplit.
635//
636//go:nosplit
637func spanOfUnchecked(p uintptr) *mspan {
638	ai := arenaIndex(p)
639	return mheap_.arenas[ai.l1()][ai.l2()].spans[(p/pageSize)%pagesPerArena]
640}
641
642// spanOfHeap is like spanOf, but returns nil if p does not point to a
643// heap object.
644//
645// Must be nosplit because it has callers that are nosplit.
646//
647//go:nosplit
648func spanOfHeap(p uintptr) *mspan {
649	s := spanOf(p)
650	// s is nil if it's never been allocated. Otherwise, we check
651	// its state first because we don't trust this pointer, so we
652	// have to synchronize with span initialization. Then, it's
653	// still possible we picked up a stale span pointer, so we
654	// have to check the span's bounds.
655	if s == nil || s.state.get() != mSpanInUse || p < s.base() || p >= s.limit {
656		return nil
657	}
658	return s
659}
660
661// pageIndexOf returns the arena, page index, and page mask for pointer p.
662// The caller must ensure p is in the heap.
663func pageIndexOf(p uintptr) (arena *heapArena, pageIdx uintptr, pageMask uint8) {
664	ai := arenaIndex(p)
665	arena = mheap_.arenas[ai.l1()][ai.l2()]
666	pageIdx = ((p / pageSize) / 8) % uintptr(len(arena.pageInUse))
667	pageMask = byte(1 << ((p / pageSize) % 8))
668	return
669}
670
671// Initialize the heap.
672func (h *mheap) init() {
673	h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
674	h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
675	h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
676	h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
677	h.arenaHintAlloc.init(unsafe.Sizeof(arenaHint{}), nil, nil, &memstats.other_sys)
678
679	// Don't zero mspan allocations. Background sweeping can
680	// inspect a span concurrently with allocating it, so it's
681	// important that the span's sweepgen survive across freeing
682	// and re-allocating a span to prevent background sweeping
683	// from improperly cas'ing it from 0.
684	//
685	// This is safe because mspan contains no heap pointers.
686	h.spanalloc.zero = false
687
688	// h->mapcache needs no init
689
690	for i := range h.central {
691		h.central[i].mcentral.init(spanClass(i))
692	}
693
694	h.pages.init(&h.lock, &memstats.gc_sys)
695}
696
697// reclaim sweeps and reclaims at least npage pages into the heap.
698// It is called before allocating npage pages to keep growth in check.
699//
700// reclaim implements the page-reclaimer half of the sweeper.
701//
702// h must NOT be locked.
703func (h *mheap) reclaim(npage uintptr) {
704	// This scans pagesPerChunk at a time. Higher values reduce
705	// contention on h.reclaimPos, but increase the minimum
706	// latency of performing a reclaim.
707	//
708	// Must be a multiple of the pageInUse bitmap element size.
709	//
710	// The time required by this can vary a lot depending on how
711	// many spans are actually freed. Experimentally, it can scan
712	// for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only
713	// free spans at ~32 MB/ms. Using 512 pages bounds this at
714	// roughly 100µs.
715	//
716	// TODO(austin): Half of the time spent freeing spans is in
717	// locking/unlocking the heap (even with low contention). We
718	// could make the slow path here several times faster by
719	// batching heap frees.
720	const pagesPerChunk = 512
721
722	// Bail early if there's no more reclaim work.
723	if atomic.Load64(&h.reclaimIndex) >= 1<<63 {
724		return
725	}
726
727	// Disable preemption so the GC can't start while we're
728	// sweeping, so we can read h.sweepArenas, and so
729	// traceGCSweepStart/Done pair on the P.
730	mp := acquirem()
731
732	if trace.enabled {
733		traceGCSweepStart()
734	}
735
736	arenas := h.sweepArenas
737	locked := false
738	for npage > 0 {
739		// Pull from accumulated credit first.
740		if credit := atomic.Loaduintptr(&h.reclaimCredit); credit > 0 {
741			take := credit
742			if take > npage {
743				// Take only what we need.
744				take = npage
745			}
746			if atomic.Casuintptr(&h.reclaimCredit, credit, credit-take) {
747				npage -= take
748			}
749			continue
750		}
751
752		// Claim a chunk of work.
753		idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerChunk) - pagesPerChunk)
754		if idx/pagesPerArena >= uintptr(len(arenas)) {
755			// Page reclaiming is done.
756			atomic.Store64(&h.reclaimIndex, 1<<63)
757			break
758		}
759
760		if !locked {
761			// Lock the heap for reclaimChunk.
762			lock(&h.lock)
763			locked = true
764		}
765
766		// Scan this chunk.
767		nfound := h.reclaimChunk(arenas, idx, pagesPerChunk)
768		if nfound <= npage {
769			npage -= nfound
770		} else {
771			// Put spare pages toward global credit.
772			atomic.Xadduintptr(&h.reclaimCredit, nfound-npage)
773			npage = 0
774		}
775	}
776	if locked {
777		unlock(&h.lock)
778	}
779
780	if trace.enabled {
781		traceGCSweepDone()
782	}
783	releasem(mp)
784}
785
786// reclaimChunk sweeps unmarked spans that start at page indexes [pageIdx, pageIdx+n).
787// It returns the number of pages returned to the heap.
788//
789// h.lock must be held and the caller must be non-preemptible. Note: h.lock may be
790// temporarily unlocked and re-locked in order to do sweeping or if tracing is
791// enabled.
792func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
793	// The heap lock must be held because this accesses the
794	// heapArena.spans arrays using potentially non-live pointers.
795	// In particular, if a span were freed and merged concurrently
796	// with this probing heapArena.spans, it would be possible to
797	// observe arbitrary, stale span pointers.
798	n0 := n
799	var nFreed uintptr
800	sg := h.sweepgen
801	for n > 0 {
802		ai := arenas[pageIdx/pagesPerArena]
803		ha := h.arenas[ai.l1()][ai.l2()]
804
805		// Get a chunk of the bitmap to work on.
806		arenaPage := uint(pageIdx % pagesPerArena)
807		inUse := ha.pageInUse[arenaPage/8:]
808		marked := ha.pageMarks[arenaPage/8:]
809		if uintptr(len(inUse)) > n/8 {
810			inUse = inUse[:n/8]
811			marked = marked[:n/8]
812		}
813
814		// Scan this bitmap chunk for spans that are in-use
815		// but have no marked objects on them.
816		for i := range inUse {
817			inUseUnmarked := atomic.Load8(&inUse[i]) &^ marked[i]
818			if inUseUnmarked == 0 {
819				continue
820			}
821
822			for j := uint(0); j < 8; j++ {
823				if inUseUnmarked&(1<<j) != 0 {
824					s := ha.spans[arenaPage+uint(i)*8+j]
825					if atomic.Load(&s.sweepgen) == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
826						npages := s.npages
827						unlock(&h.lock)
828						if s.sweep(false) {
829							nFreed += npages
830						}
831						lock(&h.lock)
832						// Reload inUse. It's possible nearby
833						// spans were freed when we dropped the
834						// lock and we don't want to get stale
835						// pointers from the spans array.
836						inUseUnmarked = atomic.Load8(&inUse[i]) &^ marked[i]
837					}
838				}
839			}
840		}
841
842		// Advance.
843		pageIdx += uintptr(len(inUse) * 8)
844		n -= uintptr(len(inUse) * 8)
845	}
846	if trace.enabled {
847		unlock(&h.lock)
848		// Account for pages scanned but not reclaimed.
849		traceGCSweepSpan((n0 - nFreed) * pageSize)
850		lock(&h.lock)
851	}
852	return nFreed
853}
854
855// alloc allocates a new span of npage pages from the GC'd heap.
856//
857// spanclass indicates the span's size class and scannability.
858//
859// If needzero is true, the memory for the returned span will be zeroed.
860func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) *mspan {
861	// Don't do any operations that lock the heap on the G stack.
862	// It might trigger stack growth, and the stack growth code needs
863	// to be able to allocate heap.
864	var s *mspan
865	systemstack(func() {
866		// To prevent excessive heap growth, before allocating n pages
867		// we need to sweep and reclaim at least n pages.
868		if h.sweepdone == 0 {
869			h.reclaim(npages)
870		}
871		s = h.allocSpan(npages, false, spanclass, &memstats.heap_inuse)
872	})
873
874	if s != nil {
875		if needzero && s.needzero != 0 {
876			memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift)
877		}
878		s.needzero = 0
879	}
880	return s
881}
882
883// allocManual allocates a manually-managed span of npage pages.
884// allocManual returns nil if allocation fails.
885//
886// allocManual adds the bytes used to *stat, which should be a
887// memstats in-use field. Unlike allocations in the GC'd heap, the
888// allocation does *not* count toward heap_inuse or heap_sys.
889//
890// The memory backing the returned span may not be zeroed if
891// span.needzero is set.
892//
893// allocManual must be called on the system stack because it may
894// acquire the heap lock via allocSpan. See mheap for details.
895//
896//go:systemstack
897func (h *mheap) allocManual(npages uintptr, stat *uint64) *mspan {
898	return h.allocSpan(npages, true, 0, stat)
899}
900
901// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
902// is s.
903func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
904	p := base / pageSize
905	ai := arenaIndex(base)
906	ha := h.arenas[ai.l1()][ai.l2()]
907	for n := uintptr(0); n < npage; n++ {
908		i := (p + n) % pagesPerArena
909		if i == 0 {
910			ai = arenaIndex(base + n*pageSize)
911			ha = h.arenas[ai.l1()][ai.l2()]
912		}
913		ha.spans[i] = s
914	}
915}
916
917// allocNeedsZero checks if the region of address space [base, base+npage*pageSize),
918// assumed to be allocated, needs to be zeroed, updating heap arena metadata for
919// future allocations.
920//
921// This must be called each time pages are allocated from the heap, even if the page
922// allocator can otherwise prove the memory it's allocating is already zero because
923// they're fresh from the operating system. It updates heapArena metadata that is
924// critical for future page allocations.
925//
926// There are no locking constraints on this method.
927func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) {
928	for npage > 0 {
929		ai := arenaIndex(base)
930		ha := h.arenas[ai.l1()][ai.l2()]
931
932		zeroedBase := atomic.Loaduintptr(&ha.zeroedBase)
933		arenaBase := base % heapArenaBytes
934		if arenaBase < zeroedBase {
935			// We extended into the non-zeroed part of the
936			// arena, so this region needs to be zeroed before use.
937			//
938			// zeroedBase is monotonically increasing, so if we see this now then
939			// we can be sure we need to zero this memory region.
940			//
941			// We still need to update zeroedBase for this arena, and
942			// potentially more arenas.
943			needZero = true
944		}
945		// We may observe arenaBase > zeroedBase if we're racing with one or more
946		// allocations which are acquiring memory directly before us in the address
947		// space. But, because we know no one else is acquiring *this* memory, it's
948		// still safe to not zero.
949
950		// Compute how far into the arena we extend into, capped
951		// at heapArenaBytes.
952		arenaLimit := arenaBase + npage*pageSize
953		if arenaLimit > heapArenaBytes {
954			arenaLimit = heapArenaBytes
955		}
956		// Increase ha.zeroedBase so it's >= arenaLimit.
957		// We may be racing with other updates.
958		for arenaLimit > zeroedBase {
959			if atomic.Casuintptr(&ha.zeroedBase, zeroedBase, arenaLimit) {
960				break
961			}
962			zeroedBase = atomic.Loaduintptr(&ha.zeroedBase)
963			// Sanity check zeroedBase.
964			if zeroedBase <= arenaLimit && zeroedBase > arenaBase {
965				// The zeroedBase moved into the space we were trying to
966				// claim. That's very bad, and indicates someone allocated
967				// the same region we did.
968				throw("potentially overlapping in-use allocations detected")
969			}
970		}
971
972		// Move base forward and subtract from npage to move into
973		// the next arena, or finish.
974		base += arenaLimit - arenaBase
975		npage -= (arenaLimit - arenaBase) / pageSize
976	}
977	return
978}
979
980// tryAllocMSpan attempts to allocate an mspan object from
981// the P-local cache, but may fail.
982//
983// h need not be locked.
984//
985// This caller must ensure that its P won't change underneath
986// it during this function. Currently to ensure that we enforce
987// that the function is run on the system stack, because that's
988// the only place it is used now. In the future, this requirement
989// may be relaxed if its use is necessary elsewhere.
990//
991//go:systemstack
992func (h *mheap) tryAllocMSpan() *mspan {
993	pp := getg().m.p.ptr()
994	// If we don't have a p or the cache is empty, we can't do
995	// anything here.
996	if pp == nil || pp.mspancache.len == 0 {
997		return nil
998	}
999	// Pull off the last entry in the cache.
1000	s := pp.mspancache.buf[pp.mspancache.len-1]
1001	pp.mspancache.len--
1002	return s
1003}
1004
1005// allocMSpanLocked allocates an mspan object.
1006//
1007// h must be locked.
1008//
1009// allocMSpanLocked must be called on the system stack because
1010// its caller holds the heap lock. See mheap for details.
1011// Running on the system stack also ensures that we won't
1012// switch Ps during this function. See tryAllocMSpan for details.
1013//
1014//go:systemstack
1015func (h *mheap) allocMSpanLocked() *mspan {
1016	pp := getg().m.p.ptr()
1017	if pp == nil {
1018		// We don't have a p so just do the normal thing.
1019		return (*mspan)(h.spanalloc.alloc())
1020	}
1021	// Refill the cache if necessary.
1022	if pp.mspancache.len == 0 {
1023		const refillCount = len(pp.mspancache.buf) / 2
1024		for i := 0; i < refillCount; i++ {
1025			pp.mspancache.buf[i] = (*mspan)(h.spanalloc.alloc())
1026		}
1027		pp.mspancache.len = refillCount
1028	}
1029	// Pull off the last entry in the cache.
1030	s := pp.mspancache.buf[pp.mspancache.len-1]
1031	pp.mspancache.len--
1032	return s
1033}
1034
1035// freeMSpanLocked free an mspan object.
1036//
1037// h must be locked.
1038//
1039// freeMSpanLocked must be called on the system stack because
1040// its caller holds the heap lock. See mheap for details.
1041// Running on the system stack also ensures that we won't
1042// switch Ps during this function. See tryAllocMSpan for details.
1043//
1044//go:systemstack
1045func (h *mheap) freeMSpanLocked(s *mspan) {
1046	pp := getg().m.p.ptr()
1047	// First try to free the mspan directly to the cache.
1048	if pp != nil && pp.mspancache.len < len(pp.mspancache.buf) {
1049		pp.mspancache.buf[pp.mspancache.len] = s
1050		pp.mspancache.len++
1051		return
1052	}
1053	// Failing that (or if we don't have a p), just free it to
1054	// the heap.
1055	h.spanalloc.free(unsafe.Pointer(s))
1056}
1057
1058// allocSpan allocates an mspan which owns npages worth of memory.
1059//
1060// If manual == false, allocSpan allocates a heap span of class spanclass
1061// and updates heap accounting. If manual == true, allocSpan allocates a
1062// manually-managed span (spanclass is ignored), and the caller is
1063// responsible for any accounting related to its use of the span. Either
1064// way, allocSpan will atomically add the bytes in the newly allocated
1065// span to *sysStat.
1066//
1067// The returned span is fully initialized.
1068//
1069// h must not be locked.
1070//
1071// allocSpan must be called on the system stack both because it acquires
1072// the heap lock and because it must block GC transitions.
1073//
1074//go:systemstack
1075func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysStat *uint64) (s *mspan) {
1076	// Function-global state.
1077	gp := getg()
1078	base, scav := uintptr(0), uintptr(0)
1079
1080	// If the allocation is small enough, try the page cache!
1081	pp := gp.m.p.ptr()
1082	if pp != nil && npages < pageCachePages/4 {
1083		c := &pp.pcache
1084
1085		// If the cache is empty, refill it.
1086		if c.empty() {
1087			lock(&h.lock)
1088			*c = h.pages.allocToCache()
1089			unlock(&h.lock)
1090		}
1091
1092		// Try to allocate from the cache.
1093		base, scav = c.alloc(npages)
1094		if base != 0 {
1095			s = h.tryAllocMSpan()
1096
1097			if s != nil && gcBlackenEnabled == 0 && (manual || spanclass.sizeclass() != 0) {
1098				goto HaveSpan
1099			}
1100			// We're either running duing GC, failed to acquire a mspan,
1101			// or the allocation is for a large object. This means we
1102			// have to lock the heap and do a bunch of extra work,
1103			// so go down the HaveBaseLocked path.
1104			//
1105			// We must do this during GC to avoid skew with heap_scan
1106			// since we flush mcache stats whenever we lock.
1107			//
1108			// TODO(mknyszek): It would be nice to not have to
1109			// lock the heap if it's a large allocation, but
1110			// it's fine for now. The critical section here is
1111			// short and large object allocations are relatively
1112			// infrequent.
1113		}
1114	}
1115
1116	// For one reason or another, we couldn't get the
1117	// whole job done without the heap lock.
1118	lock(&h.lock)
1119
1120	if base == 0 {
1121		// Try to acquire a base address.
1122		base, scav = h.pages.alloc(npages)
1123		if base == 0 {
1124			if !h.grow(npages) {
1125				unlock(&h.lock)
1126				return nil
1127			}
1128			base, scav = h.pages.alloc(npages)
1129			if base == 0 {
1130				throw("grew heap, but no adequate free space found")
1131			}
1132		}
1133	}
1134	if s == nil {
1135		// We failed to get an mspan earlier, so grab
1136		// one now that we have the heap lock.
1137		s = h.allocMSpanLocked()
1138	}
1139	if !manual {
1140		// This is a heap span, so we should do some additional accounting
1141		// which may only be done with the heap locked.
1142
1143		// Transfer stats from mcache to global.
1144		memstats.heap_scan += uint64(gp.m.mcache.local_scan)
1145		gp.m.mcache.local_scan = 0
1146		memstats.tinyallocs += uint64(gp.m.mcache.local_tinyallocs)
1147		gp.m.mcache.local_tinyallocs = 0
1148
1149		// Do some additional accounting if it's a large allocation.
1150		if spanclass.sizeclass() == 0 {
1151			mheap_.largealloc += uint64(npages * pageSize)
1152			mheap_.nlargealloc++
1153			atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize))
1154		}
1155
1156		// Either heap_live or heap_scan could have been updated.
1157		if gcBlackenEnabled != 0 {
1158			gcController.revise()
1159		}
1160	}
1161	unlock(&h.lock)
1162
1163HaveSpan:
1164	// At this point, both s != nil and base != 0, and the heap
1165	// lock is no longer held. Initialize the span.
1166	s.init(base, npages)
1167	if h.allocNeedsZero(base, npages) {
1168		s.needzero = 1
1169	}
1170	nbytes := npages * pageSize
1171	if manual {
1172		s.manualFreeList = 0
1173		s.nelems = 0
1174		s.limit = s.base() + s.npages*pageSize
1175		// Manually managed memory doesn't count toward heap_sys.
1176		mSysStatDec(&memstats.heap_sys, s.npages*pageSize)
1177		s.state.set(mSpanManual)
1178	} else {
1179		// We must set span properties before the span is published anywhere
1180		// since we're not holding the heap lock.
1181		s.spanclass = spanclass
1182		if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
1183			s.elemsize = nbytes
1184			s.nelems = 1
1185
1186			s.divShift = 0
1187			s.divMul = 0
1188			s.divShift2 = 0
1189			s.baseMask = 0
1190		} else {
1191			s.elemsize = uintptr(class_to_size[sizeclass])
1192			s.nelems = nbytes / s.elemsize
1193
1194			m := &class_to_divmagic[sizeclass]
1195			s.divShift = m.shift
1196			s.divMul = m.mul
1197			s.divShift2 = m.shift2
1198			s.baseMask = m.baseMask
1199		}
1200
1201		// Initialize mark and allocation structures.
1202		s.freeindex = 0
1203		s.allocCache = ^uint64(0) // all 1s indicating all free.
1204		s.gcmarkBits = newMarkBits(s.nelems)
1205		s.allocBits = newAllocBits(s.nelems)
1206
1207		// It's safe to access h.sweepgen without the heap lock because it's
1208		// only ever updated with the world stopped and we run on the
1209		// systemstack which blocks a STW transition.
1210		atomic.Store(&s.sweepgen, h.sweepgen)
1211
1212		// Now that the span is filled in, set its state. This
1213		// is a publication barrier for the other fields in
1214		// the span. While valid pointers into this span
1215		// should never be visible until the span is returned,
1216		// if the garbage collector finds an invalid pointer,
1217		// access to the span may race with initialization of
1218		// the span. We resolve this race by atomically
1219		// setting the state after the span is fully
1220		// initialized, and atomically checking the state in
1221		// any situation where a pointer is suspect.
1222		s.state.set(mSpanInUse)
1223	}
1224
1225	// Commit and account for any scavenged memory that the span now owns.
1226	if scav != 0 {
1227		// sysUsed all the pages that are actually available
1228		// in the span since some of them might be scavenged.
1229		sysUsed(unsafe.Pointer(base), nbytes)
1230		mSysStatDec(&memstats.heap_released, scav)
1231	}
1232	// Update stats.
1233	mSysStatInc(sysStat, nbytes)
1234	mSysStatDec(&memstats.heap_idle, nbytes)
1235
1236	// Publish the span in various locations.
1237
1238	// This is safe to call without the lock held because the slots
1239	// related to this span will only every be read or modified by
1240	// this thread until pointers into the span are published or
1241	// pageInUse is updated.
1242	h.setSpans(s.base(), npages, s)
1243
1244	if !manual {
1245		// Add to swept in-use list.
1246		//
1247		// This publishes the span to root marking.
1248		//
1249		// h.sweepgen is guaranteed to only change during STW,
1250		// and preemption is disabled in the page allocator.
1251		h.sweepSpans[h.sweepgen/2%2].push(s)
1252
1253		// Mark in-use span in arena page bitmap.
1254		//
1255		// This publishes the span to the page sweeper, so
1256		// it's imperative that the span be completely initialized
1257		// prior to this line.
1258		arena, pageIdx, pageMask := pageIndexOf(s.base())
1259		atomic.Or8(&arena.pageInUse[pageIdx], pageMask)
1260
1261		// Update related page sweeper stats.
1262		atomic.Xadd64(&h.pagesInUse, int64(npages))
1263
1264		if trace.enabled {
1265			// Trace that a heap alloc occurred.
1266			traceHeapAlloc()
1267		}
1268	}
1269	return s
1270}
1271
1272// Try to add at least npage pages of memory to the heap,
1273// returning whether it worked.
1274//
1275// h must be locked.
1276func (h *mheap) grow(npage uintptr) bool {
1277	// We must grow the heap in whole palloc chunks.
1278	ask := alignUp(npage, pallocChunkPages) * pageSize
1279
1280	totalGrowth := uintptr(0)
1281	nBase := alignUp(h.curArena.base+ask, physPageSize)
1282	if nBase > h.curArena.end {
1283		// Not enough room in the current arena. Allocate more
1284		// arena space. This may not be contiguous with the
1285		// current arena, so we have to request the full ask.
1286		av, asize := h.sysAlloc(ask)
1287		if av == nil {
1288			print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
1289			return false
1290		}
1291
1292		if uintptr(av) == h.curArena.end {
1293			// The new space is contiguous with the old
1294			// space, so just extend the current space.
1295			h.curArena.end = uintptr(av) + asize
1296		} else {
1297			// The new space is discontiguous. Track what
1298			// remains of the current space and switch to
1299			// the new space. This should be rare.
1300			if size := h.curArena.end - h.curArena.base; size != 0 {
1301				h.pages.grow(h.curArena.base, size)
1302				totalGrowth += size
1303			}
1304			// Switch to the new space.
1305			h.curArena.base = uintptr(av)
1306			h.curArena.end = uintptr(av) + asize
1307		}
1308
1309		// The memory just allocated counts as both released
1310		// and idle, even though it's not yet backed by spans.
1311		//
1312		// The allocation is always aligned to the heap arena
1313		// size which is always > physPageSize, so its safe to
1314		// just add directly to heap_released.
1315		mSysStatInc(&memstats.heap_released, asize)
1316		mSysStatInc(&memstats.heap_idle, asize)
1317
1318		// Recalculate nBase
1319		nBase = alignUp(h.curArena.base+ask, physPageSize)
1320	}
1321
1322	// Grow into the current arena.
1323	v := h.curArena.base
1324	h.curArena.base = nBase
1325	h.pages.grow(v, nBase-v)
1326	totalGrowth += nBase - v
1327
1328	// We just caused a heap growth, so scavenge down what will soon be used.
1329	// By scavenging inline we deal with the failure to allocate out of
1330	// memory fragments by scavenging the memory fragments that are least
1331	// likely to be re-used.
1332	if retained := heapRetained(); retained+uint64(totalGrowth) > h.scavengeGoal {
1333		todo := totalGrowth
1334		if overage := uintptr(retained + uint64(totalGrowth) - h.scavengeGoal); todo > overage {
1335			todo = overage
1336		}
1337		h.pages.scavenge(todo, true)
1338	}
1339	return true
1340}
1341
1342// Free the span back into the heap.
1343func (h *mheap) freeSpan(s *mspan) {
1344	systemstack(func() {
1345		mp := getg().m
1346		lock(&h.lock)
1347		memstats.heap_scan += uint64(mp.mcache.local_scan)
1348		mp.mcache.local_scan = 0
1349		memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
1350		mp.mcache.local_tinyallocs = 0
1351		if msanenabled {
1352			// Tell msan that this entire span is no longer in use.
1353			base := unsafe.Pointer(s.base())
1354			bytes := s.npages << _PageShift
1355			msanfree(base, bytes)
1356		}
1357		if gcBlackenEnabled != 0 {
1358			// heap_scan changed.
1359			gcController.revise()
1360		}
1361		h.freeSpanLocked(s, true, true)
1362		unlock(&h.lock)
1363	})
1364}
1365
1366// freeManual frees a manually-managed span returned by allocManual.
1367// stat must be the same as the stat passed to the allocManual that
1368// allocated s.
1369//
1370// This must only be called when gcphase == _GCoff. See mSpanState for
1371// an explanation.
1372//
1373// freeManual must be called on the system stack because it acquires
1374// the heap lock. See mheap for details.
1375//
1376//go:systemstack
1377func (h *mheap) freeManual(s *mspan, stat *uint64) {
1378	s.needzero = 1
1379	lock(&h.lock)
1380	mSysStatDec(stat, s.npages*pageSize)
1381	mSysStatInc(&memstats.heap_sys, s.npages*pageSize)
1382	h.freeSpanLocked(s, false, true)
1383	unlock(&h.lock)
1384}
1385
1386func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool) {
1387	switch s.state.get() {
1388	case mSpanManual:
1389		if s.allocCount != 0 {
1390			throw("mheap.freeSpanLocked - invalid stack free")
1391		}
1392	case mSpanInUse:
1393		if s.allocCount != 0 || s.sweepgen != h.sweepgen {
1394			print("mheap.freeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
1395			throw("mheap.freeSpanLocked - invalid free")
1396		}
1397		atomic.Xadd64(&h.pagesInUse, -int64(s.npages))
1398
1399		// Clear in-use bit in arena page bitmap.
1400		arena, pageIdx, pageMask := pageIndexOf(s.base())
1401		atomic.And8(&arena.pageInUse[pageIdx], ^pageMask)
1402	default:
1403		throw("mheap.freeSpanLocked - invalid span state")
1404	}
1405
1406	if acctinuse {
1407		mSysStatDec(&memstats.heap_inuse, s.npages*pageSize)
1408	}
1409	if acctidle {
1410		mSysStatInc(&memstats.heap_idle, s.npages*pageSize)
1411	}
1412
1413	// Mark the space as free.
1414	h.pages.free(s.base(), s.npages)
1415
1416	// Free the span structure. We no longer have a use for it.
1417	s.state.set(mSpanDead)
1418	h.freeMSpanLocked(s)
1419}
1420
1421// scavengeAll visits each node in the free treap and scavenges the
1422// treapNode's span. It then removes the scavenged span from
1423// unscav and adds it into scav before continuing.
1424func (h *mheap) scavengeAll() {
1425	// Disallow malloc or panic while holding the heap lock. We do
1426	// this here because this is a non-mallocgc entry-point to
1427	// the mheap API.
1428	gp := getg()
1429	gp.m.mallocing++
1430	lock(&h.lock)
1431	// Reset the scavenger address so we have access to the whole heap.
1432	h.pages.resetScavengeAddr()
1433	released := h.pages.scavenge(^uintptr(0), true)
1434	unlock(&h.lock)
1435	gp.m.mallocing--
1436
1437	if debug.scavtrace > 0 {
1438		printScavTrace(released, true)
1439	}
1440}
1441
1442//go:linkname runtime_debug_freeOSMemory runtime/debug.freeOSMemory
1443func runtime_debug_freeOSMemory() {
1444	GC()
1445	systemstack(func() { mheap_.scavengeAll() })
1446}
1447
1448// Initialize a new span with the given start and npages.
1449func (span *mspan) init(base uintptr, npages uintptr) {
1450	// span is *not* zeroed.
1451	span.next = nil
1452	span.prev = nil
1453	span.list = nil
1454	span.startAddr = base
1455	span.npages = npages
1456	span.allocCount = 0
1457	span.spanclass = 0
1458	span.elemsize = 0
1459	span.speciallock.key = 0
1460	span.specials = nil
1461	span.needzero = 0
1462	span.freeindex = 0
1463	span.allocBits = nil
1464	span.gcmarkBits = nil
1465	span.state.set(mSpanDead)
1466}
1467
1468func (span *mspan) inList() bool {
1469	return span.list != nil
1470}
1471
1472// Initialize an empty doubly-linked list.
1473func (list *mSpanList) init() {
1474	list.first = nil
1475	list.last = nil
1476}
1477
1478func (list *mSpanList) remove(span *mspan) {
1479	if span.list != list {
1480		print("runtime: failed mSpanList.remove span.npages=", span.npages,
1481			" span=", span, " prev=", span.prev, " span.list=", span.list, " list=", list, "\n")
1482		throw("mSpanList.remove")
1483	}
1484	if list.first == span {
1485		list.first = span.next
1486	} else {
1487		span.prev.next = span.next
1488	}
1489	if list.last == span {
1490		list.last = span.prev
1491	} else {
1492		span.next.prev = span.prev
1493	}
1494	span.next = nil
1495	span.prev = nil
1496	span.list = nil
1497}
1498
1499func (list *mSpanList) isEmpty() bool {
1500	return list.first == nil
1501}
1502
1503func (list *mSpanList) insert(span *mspan) {
1504	if span.next != nil || span.prev != nil || span.list != nil {
1505		println("runtime: failed mSpanList.insert", span, span.next, span.prev, span.list)
1506		throw("mSpanList.insert")
1507	}
1508	span.next = list.first
1509	if list.first != nil {
1510		// The list contains at least one span; link it in.
1511		// The last span in the list doesn't change.
1512		list.first.prev = span
1513	} else {
1514		// The list contains no spans, so this is also the last span.
1515		list.last = span
1516	}
1517	list.first = span
1518	span.list = list
1519}
1520
1521func (list *mSpanList) insertBack(span *mspan) {
1522	if span.next != nil || span.prev != nil || span.list != nil {
1523		println("runtime: failed mSpanList.insertBack", span, span.next, span.prev, span.list)
1524		throw("mSpanList.insertBack")
1525	}
1526	span.prev = list.last
1527	if list.last != nil {
1528		// The list contains at least one span.
1529		list.last.next = span
1530	} else {
1531		// The list contains no spans, so this is also the first span.
1532		list.first = span
1533	}
1534	list.last = span
1535	span.list = list
1536}
1537
1538// takeAll removes all spans from other and inserts them at the front
1539// of list.
1540func (list *mSpanList) takeAll(other *mSpanList) {
1541	if other.isEmpty() {
1542		return
1543	}
1544
1545	// Reparent everything in other to list.
1546	for s := other.first; s != nil; s = s.next {
1547		s.list = list
1548	}
1549
1550	// Concatenate the lists.
1551	if list.isEmpty() {
1552		*list = *other
1553	} else {
1554		// Neither list is empty. Put other before list.
1555		other.last.next = list.first
1556		list.first.prev = other.last
1557		list.first = other.first
1558	}
1559
1560	other.first, other.last = nil, nil
1561}
1562
1563const (
1564	_KindSpecialFinalizer = 1
1565	_KindSpecialProfile   = 2
1566	// Note: The finalizer special must be first because if we're freeing
1567	// an object, a finalizer special will cause the freeing operation
1568	// to abort, and we want to keep the other special records around
1569	// if that happens.
1570)
1571
1572//go:notinheap
1573type special struct {
1574	next   *special // linked list in span
1575	offset uint16   // span offset of object
1576	kind   byte     // kind of special
1577}
1578
1579// Adds the special record s to the list of special records for
1580// the object p. All fields of s should be filled in except for
1581// offset & next, which this routine will fill in.
1582// Returns true if the special was successfully added, false otherwise.
1583// (The add will fail only if a record with the same p and s->kind
1584//  already exists.)
1585func addspecial(p unsafe.Pointer, s *special) bool {
1586	span := spanOfHeap(uintptr(p))
1587	if span == nil {
1588		throw("addspecial on invalid pointer")
1589	}
1590
1591	// Ensure that the span is swept.
1592	// Sweeping accesses the specials list w/o locks, so we have
1593	// to synchronize with it. And it's just much safer.
1594	mp := acquirem()
1595	span.ensureSwept()
1596
1597	offset := uintptr(p) - span.base()
1598	kind := s.kind
1599
1600	lock(&span.speciallock)
1601
1602	// Find splice point, check for existing record.
1603	t := &span.specials
1604	for {
1605		x := *t
1606		if x == nil {
1607			break
1608		}
1609		if offset == uintptr(x.offset) && kind == x.kind {
1610			unlock(&span.speciallock)
1611			releasem(mp)
1612			return false // already exists
1613		}
1614		if offset < uintptr(x.offset) || (offset == uintptr(x.offset) && kind < x.kind) {
1615			break
1616		}
1617		t = &x.next
1618	}
1619
1620	// Splice in record, fill in offset.
1621	s.offset = uint16(offset)
1622	s.next = *t
1623	*t = s
1624	unlock(&span.speciallock)
1625	releasem(mp)
1626
1627	return true
1628}
1629
1630// Removes the Special record of the given kind for the object p.
1631// Returns the record if the record existed, nil otherwise.
1632// The caller must FixAlloc_Free the result.
1633func removespecial(p unsafe.Pointer, kind uint8) *special {
1634	span := spanOfHeap(uintptr(p))
1635	if span == nil {
1636		throw("removespecial on invalid pointer")
1637	}
1638
1639	// Ensure that the span is swept.
1640	// Sweeping accesses the specials list w/o locks, so we have
1641	// to synchronize with it. And it's just much safer.
1642	mp := acquirem()
1643	span.ensureSwept()
1644
1645	offset := uintptr(p) - span.base()
1646
1647	lock(&span.speciallock)
1648	t := &span.specials
1649	for {
1650		s := *t
1651		if s == nil {
1652			break
1653		}
1654		// This function is used for finalizers only, so we don't check for
1655		// "interior" specials (p must be exactly equal to s->offset).
1656		if offset == uintptr(s.offset) && kind == s.kind {
1657			*t = s.next
1658			unlock(&span.speciallock)
1659			releasem(mp)
1660			return s
1661		}
1662		t = &s.next
1663	}
1664	unlock(&span.speciallock)
1665	releasem(mp)
1666	return nil
1667}
1668
1669// The described object has a finalizer set for it.
1670//
1671// specialfinalizer is allocated from non-GC'd memory, so any heap
1672// pointers must be specially handled.
1673//
1674//go:notinheap
1675type specialfinalizer struct {
1676	special special
1677	fn      *funcval // May be a heap pointer.
1678	nret    uintptr
1679	fint    *_type   // May be a heap pointer, but always live.
1680	ot      *ptrtype // May be a heap pointer, but always live.
1681}
1682
1683// Adds a finalizer to the object p. Returns true if it succeeded.
1684func addfinalizer(p unsafe.Pointer, f *funcval, nret uintptr, fint *_type, ot *ptrtype) bool {
1685	lock(&mheap_.speciallock)
1686	s := (*specialfinalizer)(mheap_.specialfinalizeralloc.alloc())
1687	unlock(&mheap_.speciallock)
1688	s.special.kind = _KindSpecialFinalizer
1689	s.fn = f
1690	s.nret = nret
1691	s.fint = fint
1692	s.ot = ot
1693	if addspecial(p, &s.special) {
1694		// This is responsible for maintaining the same
1695		// GC-related invariants as markrootSpans in any
1696		// situation where it's possible that markrootSpans
1697		// has already run but mark termination hasn't yet.
1698		if gcphase != _GCoff {
1699			base, _, _ := findObject(uintptr(p), 0, 0)
1700			mp := acquirem()
1701			gcw := &mp.p.ptr().gcw
1702			// Mark everything reachable from the object
1703			// so it's retained for the finalizer.
1704			scanobject(base, gcw)
1705			// Mark the finalizer itself, since the
1706			// special isn't part of the GC'd heap.
1707			scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil)
1708			releasem(mp)
1709		}
1710		return true
1711	}
1712
1713	// There was an old finalizer
1714	lock(&mheap_.speciallock)
1715	mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
1716	unlock(&mheap_.speciallock)
1717	return false
1718}
1719
1720// Removes the finalizer (if any) from the object p.
1721func removefinalizer(p unsafe.Pointer) {
1722	s := (*specialfinalizer)(unsafe.Pointer(removespecial(p, _KindSpecialFinalizer)))
1723	if s == nil {
1724		return // there wasn't a finalizer to remove
1725	}
1726	lock(&mheap_.speciallock)
1727	mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
1728	unlock(&mheap_.speciallock)
1729}
1730
1731// The described object is being heap profiled.
1732//
1733//go:notinheap
1734type specialprofile struct {
1735	special special
1736	b       *bucket
1737}
1738
1739// Set the heap profile bucket associated with addr to b.
1740func setprofilebucket(p unsafe.Pointer, b *bucket) {
1741	lock(&mheap_.speciallock)
1742	s := (*specialprofile)(mheap_.specialprofilealloc.alloc())
1743	unlock(&mheap_.speciallock)
1744	s.special.kind = _KindSpecialProfile
1745	s.b = b
1746	if !addspecial(p, &s.special) {
1747		throw("setprofilebucket: profile already set")
1748	}
1749}
1750
1751// Do whatever cleanup needs to be done to deallocate s. It has
1752// already been unlinked from the mspan specials list.
1753func freespecial(s *special, p unsafe.Pointer, size uintptr) {
1754	switch s.kind {
1755	case _KindSpecialFinalizer:
1756		sf := (*specialfinalizer)(unsafe.Pointer(s))
1757		queuefinalizer(p, sf.fn, sf.nret, sf.fint, sf.ot)
1758		lock(&mheap_.speciallock)
1759		mheap_.specialfinalizeralloc.free(unsafe.Pointer(sf))
1760		unlock(&mheap_.speciallock)
1761	case _KindSpecialProfile:
1762		sp := (*specialprofile)(unsafe.Pointer(s))
1763		mProf_Free(sp.b, size)
1764		lock(&mheap_.speciallock)
1765		mheap_.specialprofilealloc.free(unsafe.Pointer(sp))
1766		unlock(&mheap_.speciallock)
1767	default:
1768		throw("bad special kind")
1769		panic("not reached")
1770	}
1771}
1772
1773// gcBits is an alloc/mark bitmap. This is always used as *gcBits.
1774//
1775//go:notinheap
1776type gcBits uint8
1777
1778// bytep returns a pointer to the n'th byte of b.
1779func (b *gcBits) bytep(n uintptr) *uint8 {
1780	return addb((*uint8)(b), n)
1781}
1782
1783// bitp returns a pointer to the byte containing bit n and a mask for
1784// selecting that bit from *bytep.
1785func (b *gcBits) bitp(n uintptr) (bytep *uint8, mask uint8) {
1786	return b.bytep(n / 8), 1 << (n % 8)
1787}
1788
1789const gcBitsChunkBytes = uintptr(64 << 10)
1790const gcBitsHeaderBytes = unsafe.Sizeof(gcBitsHeader{})
1791
1792type gcBitsHeader struct {
1793	free uintptr // free is the index into bits of the next free byte.
1794	next uintptr // *gcBits triggers recursive type bug. (issue 14620)
1795}
1796
1797//go:notinheap
1798type gcBitsArena struct {
1799	// gcBitsHeader // side step recursive type bug (issue 14620) by including fields by hand.
1800	free uintptr // free is the index into bits of the next free byte; read/write atomically
1801	next *gcBitsArena
1802	bits [gcBitsChunkBytes - gcBitsHeaderBytes]gcBits
1803}
1804
1805var gcBitsArenas struct {
1806	lock     mutex
1807	free     *gcBitsArena
1808	next     *gcBitsArena // Read atomically. Write atomically under lock.
1809	current  *gcBitsArena
1810	previous *gcBitsArena
1811}
1812
1813// tryAlloc allocates from b or returns nil if b does not have enough room.
1814// This is safe to call concurrently.
1815func (b *gcBitsArena) tryAlloc(bytes uintptr) *gcBits {
1816	if b == nil || atomic.Loaduintptr(&b.free)+bytes > uintptr(len(b.bits)) {
1817		return nil
1818	}
1819	// Try to allocate from this block.
1820	end := atomic.Xadduintptr(&b.free, bytes)
1821	if end > uintptr(len(b.bits)) {
1822		return nil
1823	}
1824	// There was enough room.
1825	start := end - bytes
1826	return &b.bits[start]
1827}
1828
1829// newMarkBits returns a pointer to 8 byte aligned bytes
1830// to be used for a span's mark bits.
1831func newMarkBits(nelems uintptr) *gcBits {
1832	blocksNeeded := uintptr((nelems + 63) / 64)
1833	bytesNeeded := blocksNeeded * 8
1834
1835	// Try directly allocating from the current head arena.
1836	head := (*gcBitsArena)(atomic.Loadp(unsafe.Pointer(&gcBitsArenas.next)))
1837	if p := head.tryAlloc(bytesNeeded); p != nil {
1838		return p
1839	}
1840
1841	// There's not enough room in the head arena. We may need to
1842	// allocate a new arena.
1843	lock(&gcBitsArenas.lock)
1844	// Try the head arena again, since it may have changed. Now
1845	// that we hold the lock, the list head can't change, but its
1846	// free position still can.
1847	if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
1848		unlock(&gcBitsArenas.lock)
1849		return p
1850	}
1851
1852	// Allocate a new arena. This may temporarily drop the lock.
1853	fresh := newArenaMayUnlock()
1854	// If newArenaMayUnlock dropped the lock, another thread may
1855	// have put a fresh arena on the "next" list. Try allocating
1856	// from next again.
1857	if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
1858		// Put fresh back on the free list.
1859		// TODO: Mark it "already zeroed"
1860		fresh.next = gcBitsArenas.free
1861		gcBitsArenas.free = fresh
1862		unlock(&gcBitsArenas.lock)
1863		return p
1864	}
1865
1866	// Allocate from the fresh arena. We haven't linked it in yet, so
1867	// this cannot race and is guaranteed to succeed.
1868	p := fresh.tryAlloc(bytesNeeded)
1869	if p == nil {
1870		throw("markBits overflow")
1871	}
1872
1873	// Add the fresh arena to the "next" list.
1874	fresh.next = gcBitsArenas.next
1875	atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), unsafe.Pointer(fresh))
1876
1877	unlock(&gcBitsArenas.lock)
1878	return p
1879}
1880
1881// newAllocBits returns a pointer to 8 byte aligned bytes
1882// to be used for this span's alloc bits.
1883// newAllocBits is used to provide newly initialized spans
1884// allocation bits. For spans not being initialized the
1885// mark bits are repurposed as allocation bits when
1886// the span is swept.
1887func newAllocBits(nelems uintptr) *gcBits {
1888	return newMarkBits(nelems)
1889}
1890
1891// nextMarkBitArenaEpoch establishes a new epoch for the arenas
1892// holding the mark bits. The arenas are named relative to the
1893// current GC cycle which is demarcated by the call to finishweep_m.
1894//
1895// All current spans have been swept.
1896// During that sweep each span allocated room for its gcmarkBits in
1897// gcBitsArenas.next block. gcBitsArenas.next becomes the gcBitsArenas.current
1898// where the GC will mark objects and after each span is swept these bits
1899// will be used to allocate objects.
1900// gcBitsArenas.current becomes gcBitsArenas.previous where the span's
1901// gcAllocBits live until all the spans have been swept during this GC cycle.
1902// The span's sweep extinguishes all the references to gcBitsArenas.previous
1903// by pointing gcAllocBits into the gcBitsArenas.current.
1904// The gcBitsArenas.previous is released to the gcBitsArenas.free list.
1905func nextMarkBitArenaEpoch() {
1906	lock(&gcBitsArenas.lock)
1907	if gcBitsArenas.previous != nil {
1908		if gcBitsArenas.free == nil {
1909			gcBitsArenas.free = gcBitsArenas.previous
1910		} else {
1911			// Find end of previous arenas.
1912			last := gcBitsArenas.previous
1913			for last = gcBitsArenas.previous; last.next != nil; last = last.next {
1914			}
1915			last.next = gcBitsArenas.free
1916			gcBitsArenas.free = gcBitsArenas.previous
1917		}
1918	}
1919	gcBitsArenas.previous = gcBitsArenas.current
1920	gcBitsArenas.current = gcBitsArenas.next
1921	atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), nil) // newMarkBits calls newArena when needed
1922	unlock(&gcBitsArenas.lock)
1923}
1924
1925// newArenaMayUnlock allocates and zeroes a gcBits arena.
1926// The caller must hold gcBitsArena.lock. This may temporarily release it.
1927func newArenaMayUnlock() *gcBitsArena {
1928	var result *gcBitsArena
1929	if gcBitsArenas.free == nil {
1930		unlock(&gcBitsArenas.lock)
1931		result = (*gcBitsArena)(sysAlloc(gcBitsChunkBytes, &memstats.gc_sys))
1932		if result == nil {
1933			throw("runtime: cannot allocate memory")
1934		}
1935		lock(&gcBitsArenas.lock)
1936	} else {
1937		result = gcBitsArenas.free
1938		gcBitsArenas.free = gcBitsArenas.free.next
1939		memclrNoHeapPointers(unsafe.Pointer(result), gcBitsChunkBytes)
1940	}
1941	result.next = nil
1942	// If result.bits is not 8 byte aligned adjust index so
1943	// that &result.bits[result.free] is 8 byte aligned.
1944	if uintptr(unsafe.Offsetof(gcBitsArena{}.bits))&7 == 0 {
1945		result.free = 0
1946	} else {
1947		result.free = 8 - (uintptr(unsafe.Pointer(&result.bits[0])) & 7)
1948	}
1949	return result
1950}
1951