1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Page heap.
6//
7// See malloc.go for overview.
8
9package runtime
10
11import (
12	"internal/cpu"
13	"runtime/internal/atomic"
14	"runtime/internal/sys"
15	"unsafe"
16)
17
18// minPhysPageSize is a lower-bound on the physical page size. The
19// true physical page size may be larger than this. In contrast,
20// sys.PhysPageSize is an upper-bound on the physical page size.
21const minPhysPageSize = 4096
22
23// Main malloc heap.
24// The heap itself is the "free" and "scav" treaps,
25// but all the other global data is here too.
26//
27// mheap must not be heap-allocated because it contains mSpanLists,
28// which must not be heap-allocated.
29//
30//go:notinheap
31type mheap struct {
32	lock      mutex
33	free      mTreap // free and non-scavenged spans
34	scav      mTreap // free and scavenged spans
35	sweepgen  uint32 // sweep generation, see comment in mspan
36	sweepdone uint32 // all spans are swept
37	sweepers  uint32 // number of active sweepone calls
38
39	// allspans is a slice of all mspans ever created. Each mspan
40	// appears exactly once.
41	//
42	// The memory for allspans is manually managed and can be
43	// reallocated and move as the heap grows.
44	//
45	// In general, allspans is protected by mheap_.lock, which
46	// prevents concurrent access as well as freeing the backing
47	// store. Accesses during STW might not hold the lock, but
48	// must ensure that allocation cannot happen around the
49	// access (since that may free the backing store).
50	allspans []*mspan // all spans out there
51
52	// sweepSpans contains two mspan stacks: one of swept in-use
53	// spans, and one of unswept in-use spans. These two trade
54	// roles on each GC cycle. Since the sweepgen increases by 2
55	// on each cycle, this means the swept spans are in
56	// sweepSpans[sweepgen/2%2] and the unswept spans are in
57	// sweepSpans[1-sweepgen/2%2]. Sweeping pops spans from the
58	// unswept stack and pushes spans that are still in-use on the
59	// swept stack. Likewise, allocating an in-use span pushes it
60	// on the swept stack.
61	sweepSpans [2]gcSweepBuf
62
63	_ uint32 // align uint64 fields on 32-bit for atomics
64
65	// Proportional sweep
66	//
67	// These parameters represent a linear function from heap_live
68	// to page sweep count. The proportional sweep system works to
69	// stay in the black by keeping the current page sweep count
70	// above this line at the current heap_live.
71	//
72	// The line has slope sweepPagesPerByte and passes through a
73	// basis point at (sweepHeapLiveBasis, pagesSweptBasis). At
74	// any given time, the system is at (memstats.heap_live,
75	// pagesSwept) in this space.
76	//
77	// It's important that the line pass through a point we
78	// control rather than simply starting at a (0,0) origin
79	// because that lets us adjust sweep pacing at any time while
80	// accounting for current progress. If we could only adjust
81	// the slope, it would create a discontinuity in debt if any
82	// progress has already been made.
83	pagesInUse         uint64  // pages of spans in stats mSpanInUse; R/W with mheap.lock
84	pagesSwept         uint64  // pages swept this cycle; updated atomically
85	pagesSweptBasis    uint64  // pagesSwept to use as the origin of the sweep ratio; updated atomically
86	sweepHeapLiveBasis uint64  // value of heap_live to use as the origin of sweep ratio; written with lock, read without
87	sweepPagesPerByte  float64 // proportional sweep ratio; written with lock, read without
88	// TODO(austin): pagesInUse should be a uintptr, but the 386
89	// compiler can't 8-byte align fields.
90
91	// Page reclaimer state
92
93	// reclaimIndex is the page index in allArenas of next page to
94	// reclaim. Specifically, it refers to page (i %
95	// pagesPerArena) of arena allArenas[i / pagesPerArena].
96	//
97	// If this is >= 1<<63, the page reclaimer is done scanning
98	// the page marks.
99	//
100	// This is accessed atomically.
101	reclaimIndex uint64
102	// reclaimCredit is spare credit for extra pages swept. Since
103	// the page reclaimer works in large chunks, it may reclaim
104	// more than requested. Any spare pages released go to this
105	// credit pool.
106	//
107	// This is accessed atomically.
108	reclaimCredit uintptr
109
110	// scavengeCredit is spare credit for extra bytes scavenged.
111	// Since the scavenging mechanisms operate on spans, it may
112	// scavenge more than requested. Any spare pages released
113	// go to this credit pool.
114	//
115	// This is protected by the mheap lock.
116	scavengeCredit uintptr
117
118	// Malloc stats.
119	largealloc  uint64                  // bytes allocated for large objects
120	nlargealloc uint64                  // number of large object allocations
121	largefree   uint64                  // bytes freed for large objects (>maxsmallsize)
122	nlargefree  uint64                  // number of frees for large objects (>maxsmallsize)
123	nsmallfree  [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
124
125	// arenas is the heap arena map. It points to the metadata for
126	// the heap for every arena frame of the entire usable virtual
127	// address space.
128	//
129	// Use arenaIndex to compute indexes into this array.
130	//
131	// For regions of the address space that are not backed by the
132	// Go heap, the arena map contains nil.
133	//
134	// Modifications are protected by mheap_.lock. Reads can be
135	// performed without locking; however, a given entry can
136	// transition from nil to non-nil at any time when the lock
137	// isn't held. (Entries never transitions back to nil.)
138	//
139	// In general, this is a two-level mapping consisting of an L1
140	// map and possibly many L2 maps. This saves space when there
141	// are a huge number of arena frames. However, on many
142	// platforms (even 64-bit), arenaL1Bits is 0, making this
143	// effectively a single-level map. In this case, arenas[0]
144	// will never be nil.
145	arenas [1 << arenaL1Bits]*[1 << arenaL2Bits]*heapArena
146
147	// heapArenaAlloc is pre-reserved space for allocating heapArena
148	// objects. This is only used on 32-bit, where we pre-reserve
149	// this space to avoid interleaving it with the heap itself.
150	heapArenaAlloc linearAlloc
151
152	// arenaHints is a list of addresses at which to attempt to
153	// add more heap arenas. This is initially populated with a
154	// set of general hint addresses, and grown with the bounds of
155	// actual heap arena ranges.
156	arenaHints *arenaHint
157
158	// arena is a pre-reserved space for allocating heap arenas
159	// (the actual arenas). This is only used on 32-bit.
160	arena linearAlloc
161
162	// allArenas is the arenaIndex of every mapped arena. This can
163	// be used to iterate through the address space.
164	//
165	// Access is protected by mheap_.lock. However, since this is
166	// append-only and old backing arrays are never freed, it is
167	// safe to acquire mheap_.lock, copy the slice header, and
168	// then release mheap_.lock.
169	allArenas []arenaIdx
170
171	// sweepArenas is a snapshot of allArenas taken at the
172	// beginning of the sweep cycle. This can be read safely by
173	// simply blocking GC (by disabling preemption).
174	sweepArenas []arenaIdx
175
176	// _ uint32 // ensure 64-bit alignment of central
177
178	// central free lists for small size classes.
179	// the padding makes sure that the mcentrals are
180	// spaced CacheLinePadSize bytes apart, so that each mcentral.lock
181	// gets its own cache line.
182	// central is indexed by spanClass.
183	central [numSpanClasses]struct {
184		mcentral mcentral
185		pad      [cpu.CacheLinePadSize - unsafe.Sizeof(mcentral{})%cpu.CacheLinePadSize]byte
186	}
187
188	spanalloc             fixalloc // allocator for span*
189	cachealloc            fixalloc // allocator for mcache*
190	treapalloc            fixalloc // allocator for treapNodes*
191	specialfinalizeralloc fixalloc // allocator for specialfinalizer*
192	specialprofilealloc   fixalloc // allocator for specialprofile*
193	speciallock           mutex    // lock for special record allocators.
194	arenaHintAlloc        fixalloc // allocator for arenaHints
195
196	unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
197}
198
199var mheap_ mheap
200
201// A heapArena stores metadata for a heap arena. heapArenas are stored
202// outside of the Go heap and accessed via the mheap_.arenas index.
203//
204// This gets allocated directly from the OS, so ideally it should be a
205// multiple of the system page size. For example, avoid adding small
206// fields.
207//
208//go:notinheap
209type heapArena struct {
210	// bitmap stores the pointer/scalar bitmap for the words in
211	// this arena. See mbitmap.go for a description. Use the
212	// heapBits type to access this.
213	bitmap [heapArenaBitmapBytes]byte
214
215	// spans maps from virtual address page ID within this arena to *mspan.
216	// For allocated spans, their pages map to the span itself.
217	// For free spans, only the lowest and highest pages map to the span itself.
218	// Internal pages map to an arbitrary span.
219	// For pages that have never been allocated, spans entries are nil.
220	//
221	// Modifications are protected by mheap.lock. Reads can be
222	// performed without locking, but ONLY from indexes that are
223	// known to contain in-use or stack spans. This means there
224	// must not be a safe-point between establishing that an
225	// address is live and looking it up in the spans array.
226	spans [pagesPerArena]*mspan
227
228	// pageInUse is a bitmap that indicates which spans are in
229	// state mSpanInUse. This bitmap is indexed by page number,
230	// but only the bit corresponding to the first page in each
231	// span is used.
232	//
233	// Writes are protected by mheap_.lock.
234	pageInUse [pagesPerArena / 8]uint8
235
236	// pageMarks is a bitmap that indicates which spans have any
237	// marked objects on them. Like pageInUse, only the bit
238	// corresponding to the first page in each span is used.
239	//
240	// Writes are done atomically during marking. Reads are
241	// non-atomic and lock-free since they only occur during
242	// sweeping (and hence never race with writes).
243	//
244	// This is used to quickly find whole spans that can be freed.
245	//
246	// TODO(austin): It would be nice if this was uint64 for
247	// faster scanning, but we don't have 64-bit atomic bit
248	// operations.
249	pageMarks [pagesPerArena / 8]uint8
250}
251
252// arenaHint is a hint for where to grow the heap arenas. See
253// mheap_.arenaHints.
254//
255//go:notinheap
256type arenaHint struct {
257	addr uintptr
258	down bool
259	next *arenaHint
260}
261
262// An mspan is a run of pages.
263//
264// When a mspan is in the heap free treap, state == mSpanFree
265// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
266// If the mspan is in the heap scav treap, then in addition to the
267// above scavenged == true. scavenged == false in all other cases.
268//
269// When a mspan is allocated, state == mSpanInUse or mSpanManual
270// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
271
272// Every mspan is in one doubly-linked list, either in the mheap's
273// busy list or one of the mcentral's span lists.
274
275// An mspan representing actual memory has state mSpanInUse,
276// mSpanManual, or mSpanFree. Transitions between these states are
277// constrained as follows:
278//
279// * A span may transition from free to in-use or manual during any GC
280//   phase.
281//
282// * During sweeping (gcphase == _GCoff), a span may transition from
283//   in-use to free (as a result of sweeping) or manual to free (as a
284//   result of stacks being freed).
285//
286// * During GC (gcphase != _GCoff), a span *must not* transition from
287//   manual or in-use to free. Because concurrent GC may read a pointer
288//   and then look up its span, the span state must be monotonic.
289type mSpanState uint8
290
291const (
292	mSpanDead   mSpanState = iota
293	mSpanInUse             // allocated for garbage collected heap
294	mSpanManual            // allocated for manual management (e.g., stack allocator)
295	mSpanFree
296)
297
298// mSpanStateNames are the names of the span states, indexed by
299// mSpanState.
300var mSpanStateNames = []string{
301	"mSpanDead",
302	"mSpanInUse",
303	"mSpanManual",
304	"mSpanFree",
305}
306
307// mSpanList heads a linked list of spans.
308//
309//go:notinheap
310type mSpanList struct {
311	first *mspan // first span in list, or nil if none
312	last  *mspan // last span in list, or nil if none
313}
314
315//go:notinheap
316type mspan struct {
317	next *mspan     // next span in list, or nil if none
318	prev *mspan     // previous span in list, or nil if none
319	list *mSpanList // For debugging. TODO: Remove.
320
321	startAddr uintptr // address of first byte of span aka s.base()
322	npages    uintptr // number of pages in span
323
324	manualFreeList gclinkptr // list of free objects in mSpanManual spans
325
326	// freeindex is the slot index between 0 and nelems at which to begin scanning
327	// for the next free object in this span.
328	// Each allocation scans allocBits starting at freeindex until it encounters a 0
329	// indicating a free object. freeindex is then adjusted so that subsequent scans begin
330	// just past the newly discovered free object.
331	//
332	// If freeindex == nelem, this span has no free objects.
333	//
334	// allocBits is a bitmap of objects in this span.
335	// If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0
336	// then object n is free;
337	// otherwise, object n is allocated. Bits starting at nelem are
338	// undefined and should never be referenced.
339	//
340	// Object n starts at address n*elemsize + (start << pageShift).
341	freeindex uintptr
342	// TODO: Look up nelems from sizeclass and remove this field if it
343	// helps performance.
344	nelems uintptr // number of object in the span.
345
346	// Cache of the allocBits at freeindex. allocCache is shifted
347	// such that the lowest bit corresponds to the bit freeindex.
348	// allocCache holds the complement of allocBits, thus allowing
349	// ctz (count trailing zero) to use it directly.
350	// allocCache may contain bits beyond s.nelems; the caller must ignore
351	// these.
352	allocCache uint64
353
354	// allocBits and gcmarkBits hold pointers to a span's mark and
355	// allocation bits. The pointers are 8 byte aligned.
356	// There are three arenas where this data is held.
357	// free: Dirty arenas that are no longer accessed
358	//       and can be reused.
359	// next: Holds information to be used in the next GC cycle.
360	// current: Information being used during this GC cycle.
361	// previous: Information being used during the last GC cycle.
362	// A new GC cycle starts with the call to finishsweep_m.
363	// finishsweep_m moves the previous arena to the free arena,
364	// the current arena to the previous arena, and
365	// the next arena to the current arena.
366	// The next arena is populated as the spans request
367	// memory to hold gcmarkBits for the next GC cycle as well
368	// as allocBits for newly allocated spans.
369	//
370	// The pointer arithmetic is done "by hand" instead of using
371	// arrays to avoid bounds checks along critical performance
372	// paths.
373	// The sweep will free the old allocBits and set allocBits to the
374	// gcmarkBits. The gcmarkBits are replaced with a fresh zeroed
375	// out memory.
376	allocBits  *gcBits
377	gcmarkBits *gcBits
378
379	// sweep generation:
380	// if sweepgen == h->sweepgen - 2, the span needs sweeping
381	// if sweepgen == h->sweepgen - 1, the span is currently being swept
382	// if sweepgen == h->sweepgen, the span is swept and ready to use
383	// if sweepgen == h->sweepgen + 1, the span was cached before sweep began and is still cached, and needs sweeping
384	// if sweepgen == h->sweepgen + 3, the span was swept and then cached and is still cached
385	// h->sweepgen is incremented by 2 after every GC
386
387	sweepgen    uint32
388	divMul      uint16     // for divide by elemsize - divMagic.mul
389	baseMask    uint16     // if non-0, elemsize is a power of 2, & this will get object allocation base
390	allocCount  uint16     // number of allocated objects
391	spanclass   spanClass  // size class and noscan (uint8)
392	state       mSpanState // mspaninuse etc
393	needzero    uint8      // needs to be zeroed before allocation
394	divShift    uint8      // for divide by elemsize - divMagic.shift
395	divShift2   uint8      // for divide by elemsize - divMagic.shift2
396	scavenged   bool       // whether this span has had its pages released to the OS
397	elemsize    uintptr    // computed from sizeclass or from npages
398	unusedsince int64      // first time spotted by gc in mspanfree state
399	limit       uintptr    // end of data in span
400	speciallock mutex      // guards specials list
401	specials    *special   // linked list of special records sorted by offset.
402}
403
404func (s *mspan) base() uintptr {
405	return s.startAddr
406}
407
408func (s *mspan) layout() (size, n, total uintptr) {
409	total = s.npages << _PageShift
410	size = s.elemsize
411	if size > 0 {
412		n = total / size
413	}
414	return
415}
416
417// physPageBounds returns the start and end of the span
418// rounded in to the physical page size.
419func (s *mspan) physPageBounds() (uintptr, uintptr) {
420	start := s.base()
421	end := start + s.npages<<_PageShift
422	if physPageSize > _PageSize {
423		// Round start and end in.
424		start = (start + physPageSize - 1) &^ (physPageSize - 1)
425		end &^= physPageSize - 1
426	}
427	return start, end
428}
429
430func (h *mheap) coalesce(s *mspan) {
431	// We scavenge s at the end after coalescing if s or anything
432	// it merged with is marked scavenged.
433	needsScavenge := false
434	prescavenged := s.released() // number of bytes already scavenged.
435
436	// merge is a helper which merges other into s, deletes references to other
437	// in heap metadata, and then discards it. other must be adjacent to s.
438	merge := func(other *mspan) {
439		// Adjust s via base and npages and also in heap metadata.
440		s.npages += other.npages
441		s.needzero |= other.needzero
442		if other.startAddr < s.startAddr {
443			s.startAddr = other.startAddr
444			h.setSpan(s.base(), s)
445		} else {
446			h.setSpan(s.base()+s.npages*pageSize-1, s)
447		}
448
449		// If before or s are scavenged, then we need to scavenge the final coalesced span.
450		needsScavenge = needsScavenge || other.scavenged || s.scavenged
451		prescavenged += other.released()
452
453		// The size is potentially changing so the treap needs to delete adjacent nodes and
454		// insert back as a combined node.
455		if other.scavenged {
456			h.scav.removeSpan(other)
457		} else {
458			h.free.removeSpan(other)
459		}
460		other.state = mSpanDead
461		h.spanalloc.free(unsafe.Pointer(other))
462	}
463
464	// realign is a helper which shrinks other and grows s such that their
465	// boundary is on a physical page boundary.
466	realign := func(a, b, other *mspan) {
467		// Caller must ensure a.startAddr < b.startAddr and that either a or
468		// b is s. a and b must be adjacent. other is whichever of the two is
469		// not s.
470
471		// If pageSize <= physPageSize then spans are always aligned
472		// to physical page boundaries, so just exit.
473		if pageSize <= physPageSize {
474			return
475		}
476		// Since we're resizing other, we must remove it from the treap.
477		if other.scavenged {
478			h.scav.removeSpan(other)
479		} else {
480			h.free.removeSpan(other)
481		}
482		// Round boundary to the nearest physical page size, toward the
483		// scavenged span.
484		boundary := b.startAddr
485		if a.scavenged {
486			boundary &^= (physPageSize - 1)
487		} else {
488			boundary = (boundary + physPageSize - 1) &^ (physPageSize - 1)
489		}
490		a.npages = (boundary - a.startAddr) / pageSize
491		b.npages = (b.startAddr + b.npages*pageSize - boundary) / pageSize
492		b.startAddr = boundary
493
494		h.setSpan(boundary-1, a)
495		h.setSpan(boundary, b)
496
497		// Re-insert other now that it has a new size.
498		if other.scavenged {
499			h.scav.insert(other)
500		} else {
501			h.free.insert(other)
502		}
503	}
504
505	// Coalesce with earlier, later spans.
506	if before := spanOf(s.base() - 1); before != nil && before.state == mSpanFree {
507		if s.scavenged == before.scavenged {
508			merge(before)
509		} else {
510			realign(before, s, before)
511		}
512	}
513
514	// Now check to see if next (greater addresses) span is free and can be coalesced.
515	if after := spanOf(s.base() + s.npages*pageSize); after != nil && after.state == mSpanFree {
516		if s.scavenged == after.scavenged {
517			merge(after)
518		} else {
519			realign(s, after, after)
520		}
521	}
522
523	if needsScavenge {
524		// When coalescing spans, some physical pages which
525		// were not returned to the OS previously because
526		// they were only partially covered by the span suddenly
527		// become available for scavenging. We want to make sure
528		// those holes are filled in, and the span is properly
529		// scavenged. Rather than trying to detect those holes
530		// directly, we collect how many bytes were already
531		// scavenged above and subtract that from heap_released
532		// before re-scavenging the entire newly-coalesced span,
533		// which will implicitly bump up heap_released.
534		memstats.heap_released -= uint64(prescavenged)
535		s.scavenge()
536	}
537}
538
539func (s *mspan) scavenge() uintptr {
540	// start and end must be rounded in, otherwise madvise
541	// will round them *out* and release more memory
542	// than we want.
543	start, end := s.physPageBounds()
544	if end <= start {
545		// start and end don't span a whole physical page.
546		return 0
547	}
548	released := end - start
549	memstats.heap_released += uint64(released)
550	s.scavenged = true
551	sysUnused(unsafe.Pointer(start), released)
552	return released
553}
554
555// released returns the number of bytes in this span
556// which were returned back to the OS.
557func (s *mspan) released() uintptr {
558	if !s.scavenged {
559		return 0
560	}
561	start, end := s.physPageBounds()
562	return end - start
563}
564
565// recordspan adds a newly allocated span to h.allspans.
566//
567// This only happens the first time a span is allocated from
568// mheap.spanalloc (it is not called when a span is reused).
569//
570// Write barriers are disallowed here because it can be called from
571// gcWork when allocating new workbufs. However, because it's an
572// indirect call from the fixalloc initializer, the compiler can't see
573// this.
574//
575//go:nowritebarrierrec
576func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
577	h := (*mheap)(vh)
578	s := (*mspan)(p)
579	if len(h.allspans) >= cap(h.allspans) {
580		n := 64 * 1024 / sys.PtrSize
581		if n < cap(h.allspans)*3/2 {
582			n = cap(h.allspans) * 3 / 2
583		}
584		var new []*mspan
585		sp := (*notInHeapSlice)(unsafe.Pointer(&new))
586		sp.array = (*notInHeap)(sysAlloc(uintptr(n)*sys.PtrSize, &memstats.other_sys))
587		if sp.array == nil {
588			throw("runtime: cannot allocate memory")
589		}
590		sp.len = len(h.allspans)
591		sp.cap = n
592		if len(h.allspans) > 0 {
593			copy(new, h.allspans)
594		}
595		oldAllspans := h.allspans
596		*(*notInHeapSlice)(unsafe.Pointer(&h.allspans)) = *(*notInHeapSlice)(unsafe.Pointer(&new))
597		if len(oldAllspans) != 0 {
598			sysFree(unsafe.Pointer(&oldAllspans[0]), uintptr(cap(oldAllspans))*unsafe.Sizeof(oldAllspans[0]), &memstats.other_sys)
599		}
600	}
601	h.allspans = h.allspans[:len(h.allspans)+1]
602	h.allspans[len(h.allspans)-1] = s
603}
604
605// A spanClass represents the size class and noscan-ness of a span.
606//
607// Each size class has a noscan spanClass and a scan spanClass. The
608// noscan spanClass contains only noscan objects, which do not contain
609// pointers and thus do not need to be scanned by the garbage
610// collector.
611type spanClass uint8
612
613const (
614	numSpanClasses = _NumSizeClasses << 1
615	tinySpanClass  = spanClass(tinySizeClass<<1 | 1)
616)
617
618func makeSpanClass(sizeclass uint8, noscan bool) spanClass {
619	return spanClass(sizeclass<<1) | spanClass(bool2int(noscan))
620}
621
622func (sc spanClass) sizeclass() int8 {
623	return int8(sc >> 1)
624}
625
626func (sc spanClass) noscan() bool {
627	return sc&1 != 0
628}
629
630// arenaIndex returns the index into mheap_.arenas of the arena
631// containing metadata for p. This index combines of an index into the
632// L1 map and an index into the L2 map and should be used as
633// mheap_.arenas[ai.l1()][ai.l2()].
634//
635// If p is outside the range of valid heap addresses, either l1() or
636// l2() will be out of bounds.
637//
638// It is nosplit because it's called by spanOf and several other
639// nosplit functions.
640//
641//go:nosplit
642func arenaIndex(p uintptr) arenaIdx {
643	return arenaIdx((p + arenaBaseOffset) / heapArenaBytes)
644}
645
646// arenaBase returns the low address of the region covered by heap
647// arena i.
648func arenaBase(i arenaIdx) uintptr {
649	return uintptr(i)*heapArenaBytes - arenaBaseOffset
650}
651
652type arenaIdx uint
653
654func (i arenaIdx) l1() uint {
655	if arenaL1Bits == 0 {
656		// Let the compiler optimize this away if there's no
657		// L1 map.
658		return 0
659	} else {
660		return uint(i) >> arenaL1Shift
661	}
662}
663
664func (i arenaIdx) l2() uint {
665	if arenaL1Bits == 0 {
666		return uint(i)
667	} else {
668		return uint(i) & (1<<arenaL2Bits - 1)
669	}
670}
671
672// inheap reports whether b is a pointer into a (potentially dead) heap object.
673// It returns false for pointers into mSpanManual spans.
674// Non-preemptible because it is used by write barriers.
675//go:nowritebarrier
676//go:nosplit
677func inheap(b uintptr) bool {
678	return spanOfHeap(b) != nil
679}
680
681// inHeapOrStack is a variant of inheap that returns true for pointers
682// into any allocated heap span.
683//
684//go:nowritebarrier
685//go:nosplit
686func inHeapOrStack(b uintptr) bool {
687	s := spanOf(b)
688	if s == nil || b < s.base() {
689		return false
690	}
691	switch s.state {
692	case mSpanInUse, mSpanManual:
693		return b < s.limit
694	default:
695		return false
696	}
697}
698
699// spanOf returns the span of p. If p does not point into the heap
700// arena or no span has ever contained p, spanOf returns nil.
701//
702// If p does not point to allocated memory, this may return a non-nil
703// span that does *not* contain p. If this is a possibility, the
704// caller should either call spanOfHeap or check the span bounds
705// explicitly.
706//
707// Must be nosplit because it has callers that are nosplit.
708//
709//go:nosplit
710func spanOf(p uintptr) *mspan {
711	// This function looks big, but we use a lot of constant
712	// folding around arenaL1Bits to get it under the inlining
713	// budget. Also, many of the checks here are safety checks
714	// that Go needs to do anyway, so the generated code is quite
715	// short.
716	ri := arenaIndex(p)
717	if arenaL1Bits == 0 {
718		// If there's no L1, then ri.l1() can't be out of bounds but ri.l2() can.
719		if ri.l2() >= uint(len(mheap_.arenas[0])) {
720			return nil
721		}
722	} else {
723		// If there's an L1, then ri.l1() can be out of bounds but ri.l2() can't.
724		if ri.l1() >= uint(len(mheap_.arenas)) {
725			return nil
726		}
727	}
728	l2 := mheap_.arenas[ri.l1()]
729	if arenaL1Bits != 0 && l2 == nil { // Should never happen if there's no L1.
730		return nil
731	}
732	ha := l2[ri.l2()]
733	if ha == nil {
734		return nil
735	}
736	return ha.spans[(p/pageSize)%pagesPerArena]
737}
738
739// spanOfUnchecked is equivalent to spanOf, but the caller must ensure
740// that p points into an allocated heap arena.
741//
742// Must be nosplit because it has callers that are nosplit.
743//
744//go:nosplit
745func spanOfUnchecked(p uintptr) *mspan {
746	ai := arenaIndex(p)
747	return mheap_.arenas[ai.l1()][ai.l2()].spans[(p/pageSize)%pagesPerArena]
748}
749
750// spanOfHeap is like spanOf, but returns nil if p does not point to a
751// heap object.
752//
753// Must be nosplit because it has callers that are nosplit.
754//
755//go:nosplit
756func spanOfHeap(p uintptr) *mspan {
757	s := spanOf(p)
758	// If p is not allocated, it may point to a stale span, so we
759	// have to check the span's bounds and state.
760	if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
761		return nil
762	}
763	return s
764}
765
766// pageIndexOf returns the arena, page index, and page mask for pointer p.
767// The caller must ensure p is in the heap.
768func pageIndexOf(p uintptr) (arena *heapArena, pageIdx uintptr, pageMask uint8) {
769	ai := arenaIndex(p)
770	arena = mheap_.arenas[ai.l1()][ai.l2()]
771	pageIdx = ((p / pageSize) / 8) % uintptr(len(arena.pageInUse))
772	pageMask = byte(1 << ((p / pageSize) % 8))
773	return
774}
775
776// Initialize the heap.
777func (h *mheap) init() {
778	h.treapalloc.init(unsafe.Sizeof(treapNode{}), nil, nil, &memstats.other_sys)
779	h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
780	h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
781	h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
782	h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
783	h.arenaHintAlloc.init(unsafe.Sizeof(arenaHint{}), nil, nil, &memstats.other_sys)
784
785	// Don't zero mspan allocations. Background sweeping can
786	// inspect a span concurrently with allocating it, so it's
787	// important that the span's sweepgen survive across freeing
788	// and re-allocating a span to prevent background sweeping
789	// from improperly cas'ing it from 0.
790	//
791	// This is safe because mspan contains no heap pointers.
792	h.spanalloc.zero = false
793
794	// h->mapcache needs no init
795
796	for i := range h.central {
797		h.central[i].mcentral.init(spanClass(i))
798	}
799}
800
801// reclaim sweeps and reclaims at least npage pages into the heap.
802// It is called before allocating npage pages to keep growth in check.
803//
804// reclaim implements the page-reclaimer half of the sweeper.
805//
806// h must NOT be locked.
807func (h *mheap) reclaim(npage uintptr) {
808	// This scans pagesPerChunk at a time. Higher values reduce
809	// contention on h.reclaimPos, but increase the minimum
810	// latency of performing a reclaim.
811	//
812	// Must be a multiple of the pageInUse bitmap element size.
813	//
814	// The time required by this can vary a lot depending on how
815	// many spans are actually freed. Experimentally, it can scan
816	// for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only
817	// free spans at ~32 MB/ms. Using 512 pages bounds this at
818	// roughly 100µs.
819	//
820	// TODO(austin): Half of the time spent freeing spans is in
821	// locking/unlocking the heap (even with low contention). We
822	// could make the slow path here several times faster by
823	// batching heap frees.
824	const pagesPerChunk = 512
825
826	// Bail early if there's no more reclaim work.
827	if atomic.Load64(&h.reclaimIndex) >= 1<<63 {
828		return
829	}
830
831	// Disable preemption so the GC can't start while we're
832	// sweeping, so we can read h.sweepArenas, and so
833	// traceGCSweepStart/Done pair on the P.
834	mp := acquirem()
835
836	if trace.enabled {
837		traceGCSweepStart()
838	}
839
840	arenas := h.sweepArenas
841	locked := false
842	for npage > 0 {
843		// Pull from accumulated credit first.
844		if credit := atomic.Loaduintptr(&h.reclaimCredit); credit > 0 {
845			take := credit
846			if take > npage {
847				// Take only what we need.
848				take = npage
849			}
850			if atomic.Casuintptr(&h.reclaimCredit, credit, credit-take) {
851				npage -= take
852			}
853			continue
854		}
855
856		// Claim a chunk of work.
857		idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerChunk) - pagesPerChunk)
858		if idx/pagesPerArena >= uintptr(len(arenas)) {
859			// Page reclaiming is done.
860			atomic.Store64(&h.reclaimIndex, 1<<63)
861			break
862		}
863
864		if !locked {
865			// Lock the heap for reclaimChunk.
866			lock(&h.lock)
867			locked = true
868		}
869
870		// Scan this chunk.
871		nfound := h.reclaimChunk(arenas, idx, pagesPerChunk)
872		if nfound <= npage {
873			npage -= nfound
874		} else {
875			// Put spare pages toward global credit.
876			atomic.Xadduintptr(&h.reclaimCredit, nfound-npage)
877			npage = 0
878		}
879	}
880	if locked {
881		unlock(&h.lock)
882	}
883
884	if trace.enabled {
885		traceGCSweepDone()
886	}
887	releasem(mp)
888}
889
890// reclaimChunk sweeps unmarked spans that start at page indexes [pageIdx, pageIdx+n).
891// It returns the number of pages returned to the heap.
892//
893// h.lock must be held and the caller must be non-preemptible.
894func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
895	// The heap lock must be held because this accesses the
896	// heapArena.spans arrays using potentially non-live pointers.
897	// In particular, if a span were freed and merged concurrently
898	// with this probing heapArena.spans, it would be possible to
899	// observe arbitrary, stale span pointers.
900	n0 := n
901	var nFreed uintptr
902	sg := h.sweepgen
903	for n > 0 {
904		ai := arenas[pageIdx/pagesPerArena]
905		ha := h.arenas[ai.l1()][ai.l2()]
906
907		// Get a chunk of the bitmap to work on.
908		arenaPage := uint(pageIdx % pagesPerArena)
909		inUse := ha.pageInUse[arenaPage/8:]
910		marked := ha.pageMarks[arenaPage/8:]
911		if uintptr(len(inUse)) > n/8 {
912			inUse = inUse[:n/8]
913			marked = marked[:n/8]
914		}
915
916		// Scan this bitmap chunk for spans that are in-use
917		// but have no marked objects on them.
918		for i := range inUse {
919			inUseUnmarked := inUse[i] &^ marked[i]
920			if inUseUnmarked == 0 {
921				continue
922			}
923
924			for j := uint(0); j < 8; j++ {
925				if inUseUnmarked&(1<<j) != 0 {
926					s := ha.spans[arenaPage+uint(i)*8+j]
927					if atomic.Load(&s.sweepgen) == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
928						npages := s.npages
929						unlock(&h.lock)
930						if s.sweep(false) {
931							nFreed += npages
932						}
933						lock(&h.lock)
934						// Reload inUse. It's possible nearby
935						// spans were freed when we dropped the
936						// lock and we don't want to get stale
937						// pointers from the spans array.
938						inUseUnmarked = inUse[i] &^ marked[i]
939					}
940				}
941			}
942		}
943
944		// Advance.
945		pageIdx += uintptr(len(inUse) * 8)
946		n -= uintptr(len(inUse) * 8)
947	}
948	if trace.enabled {
949		// Account for pages scanned but not reclaimed.
950		traceGCSweepSpan((n0 - nFreed) * pageSize)
951	}
952	return nFreed
953}
954
955// alloc_m is the internal implementation of mheap.alloc.
956//
957// alloc_m must run on the system stack because it locks the heap, so
958// any stack growth during alloc_m would self-deadlock.
959//
960//go:systemstack
961func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan {
962	_g_ := getg()
963
964	// To prevent excessive heap growth, before allocating n pages
965	// we need to sweep and reclaim at least n pages.
966	if h.sweepdone == 0 {
967		h.reclaim(npage)
968	}
969
970	lock(&h.lock)
971	// transfer stats from cache to global
972	memstats.heap_scan += uint64(_g_.m.mcache.local_scan)
973	_g_.m.mcache.local_scan = 0
974	memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
975	_g_.m.mcache.local_tinyallocs = 0
976
977	s := h.allocSpanLocked(npage, &memstats.heap_inuse)
978	if s != nil {
979		// Record span info, because gc needs to be
980		// able to map interior pointer to containing span.
981		atomic.Store(&s.sweepgen, h.sweepgen)
982		h.sweepSpans[h.sweepgen/2%2].push(s) // Add to swept in-use list.
983		s.state = mSpanInUse
984		s.allocCount = 0
985		s.spanclass = spanclass
986		if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
987			s.elemsize = s.npages << _PageShift
988			s.divShift = 0
989			s.divMul = 0
990			s.divShift2 = 0
991			s.baseMask = 0
992		} else {
993			s.elemsize = uintptr(class_to_size[sizeclass])
994			m := &class_to_divmagic[sizeclass]
995			s.divShift = m.shift
996			s.divMul = m.mul
997			s.divShift2 = m.shift2
998			s.baseMask = m.baseMask
999		}
1000
1001		// Mark in-use span in arena page bitmap.
1002		arena, pageIdx, pageMask := pageIndexOf(s.base())
1003		arena.pageInUse[pageIdx] |= pageMask
1004
1005		// update stats, sweep lists
1006		h.pagesInUse += uint64(npage)
1007		if large {
1008			memstats.heap_objects++
1009			mheap_.largealloc += uint64(s.elemsize)
1010			mheap_.nlargealloc++
1011			atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
1012		}
1013	}
1014	// heap_scan and heap_live were updated.
1015	if gcBlackenEnabled != 0 {
1016		gcController.revise()
1017	}
1018
1019	if trace.enabled {
1020		traceHeapAlloc()
1021	}
1022
1023	// h.spans is accessed concurrently without synchronization
1024	// from other threads. Hence, there must be a store/store
1025	// barrier here to ensure the writes to h.spans above happen
1026	// before the caller can publish a pointer p to an object
1027	// allocated from s. As soon as this happens, the garbage
1028	// collector running on another processor could read p and
1029	// look up s in h.spans. The unlock acts as the barrier to
1030	// order these writes. On the read side, the data dependency
1031	// between p and the index in h.spans orders the reads.
1032	unlock(&h.lock)
1033	return s
1034}
1035
1036// alloc allocates a new span of npage pages from the GC'd heap.
1037//
1038// Either large must be true or spanclass must indicates the span's
1039// size class and scannability.
1040//
1041// If needzero is true, the memory for the returned span will be zeroed.
1042func (h *mheap) alloc(npage uintptr, spanclass spanClass, large bool, needzero bool) *mspan {
1043	// Don't do any operations that lock the heap on the G stack.
1044	// It might trigger stack growth, and the stack growth code needs
1045	// to be able to allocate heap.
1046	var s *mspan
1047	systemstack(func() {
1048		s = h.alloc_m(npage, spanclass, large)
1049	})
1050
1051	if s != nil {
1052		if needzero && s.needzero != 0 {
1053			memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift)
1054		}
1055		s.needzero = 0
1056	}
1057	return s
1058}
1059
1060// allocManual allocates a manually-managed span of npage pages.
1061// allocManual returns nil if allocation fails.
1062//
1063// allocManual adds the bytes used to *stat, which should be a
1064// memstats in-use field. Unlike allocations in the GC'd heap, the
1065// allocation does *not* count toward heap_inuse or heap_sys.
1066//
1067// The memory backing the returned span may not be zeroed if
1068// span.needzero is set.
1069//
1070// allocManual must be called on the system stack to prevent stack
1071// growth. Since this is used by the stack allocator, stack growth
1072// during allocManual would self-deadlock.
1073//
1074//go:systemstack
1075func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
1076	lock(&h.lock)
1077	s := h.allocSpanLocked(npage, stat)
1078	if s != nil {
1079		s.state = mSpanManual
1080		s.manualFreeList = 0
1081		s.allocCount = 0
1082		s.spanclass = 0
1083		s.nelems = 0
1084		s.elemsize = 0
1085		s.limit = s.base() + s.npages<<_PageShift
1086		// Manually managed memory doesn't count toward heap_sys.
1087		memstats.heap_sys -= uint64(s.npages << _PageShift)
1088	}
1089
1090	// This unlock acts as a release barrier. See mheap.alloc_m.
1091	unlock(&h.lock)
1092
1093	return s
1094}
1095
1096// setSpan modifies the span map so spanOf(base) is s.
1097func (h *mheap) setSpan(base uintptr, s *mspan) {
1098	ai := arenaIndex(base)
1099	h.arenas[ai.l1()][ai.l2()].spans[(base/pageSize)%pagesPerArena] = s
1100}
1101
1102// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
1103// is s.
1104func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
1105	p := base / pageSize
1106	ai := arenaIndex(base)
1107	ha := h.arenas[ai.l1()][ai.l2()]
1108	for n := uintptr(0); n < npage; n++ {
1109		i := (p + n) % pagesPerArena
1110		if i == 0 {
1111			ai = arenaIndex(base + n*pageSize)
1112			ha = h.arenas[ai.l1()][ai.l2()]
1113		}
1114		ha.spans[i] = s
1115	}
1116}
1117
1118// pickFreeSpan acquires a free span from internal free list
1119// structures if one is available. Otherwise returns nil.
1120// h must be locked.
1121func (h *mheap) pickFreeSpan(npage uintptr) *mspan {
1122	tf := h.free.find(npage)
1123	ts := h.scav.find(npage)
1124
1125	// Check for whichever treap gave us the smaller, non-nil result.
1126	// Note that we want the _smaller_ free span, i.e. the free span
1127	// closer in size to the amount we requested (npage).
1128	var s *mspan
1129	if tf != nil && (ts == nil || tf.spanKey.npages <= ts.spanKey.npages) {
1130		s = tf.spanKey
1131		h.free.removeNode(tf)
1132	} else if ts != nil && (tf == nil || tf.spanKey.npages > ts.spanKey.npages) {
1133		s = ts.spanKey
1134		h.scav.removeNode(ts)
1135	}
1136	return s
1137}
1138
1139// Allocates a span of the given size.  h must be locked.
1140// The returned span has been removed from the
1141// free structures, but its state is still mSpanFree.
1142func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
1143	var s *mspan
1144
1145	s = h.pickFreeSpan(npage)
1146	if s != nil {
1147		goto HaveSpan
1148	}
1149	// On failure, grow the heap and try again.
1150	if !h.grow(npage) {
1151		return nil
1152	}
1153	s = h.pickFreeSpan(npage)
1154	if s != nil {
1155		goto HaveSpan
1156	}
1157	throw("grew heap, but no adequate free span found")
1158
1159HaveSpan:
1160	// Mark span in use.
1161	if s.state != mSpanFree {
1162		throw("candidate mspan for allocation is not free")
1163	}
1164	if s.npages < npage {
1165		throw("candidate mspan for allocation is too small")
1166	}
1167
1168	// First, subtract any memory that was released back to
1169	// the OS from s. We will re-scavenge the trimmed section
1170	// if necessary.
1171	memstats.heap_released -= uint64(s.released())
1172
1173	if s.npages > npage {
1174		// Trim extra and put it back in the heap.
1175		t := (*mspan)(h.spanalloc.alloc())
1176		t.init(s.base()+npage<<_PageShift, s.npages-npage)
1177		s.npages = npage
1178		h.setSpan(t.base()-1, s)
1179		h.setSpan(t.base(), t)
1180		h.setSpan(t.base()+t.npages*pageSize-1, t)
1181		t.needzero = s.needzero
1182		// If s was scavenged, then t may be scavenged.
1183		start, end := t.physPageBounds()
1184		if s.scavenged && start < end {
1185			memstats.heap_released += uint64(end - start)
1186			t.scavenged = true
1187		}
1188		s.state = mSpanManual // prevent coalescing with s
1189		t.state = mSpanManual
1190		h.freeSpanLocked(t, false, false, s.unusedsince)
1191		s.state = mSpanFree
1192	}
1193	// "Unscavenge" s only AFTER splitting so that
1194	// we only sysUsed whatever we actually need.
1195	if s.scavenged {
1196		// sysUsed all the pages that are actually available
1197		// in the span. Note that we don't need to decrement
1198		// heap_released since we already did so earlier.
1199		sysUsed(unsafe.Pointer(s.base()), s.npages<<_PageShift)
1200		s.scavenged = false
1201
1202		// Since we allocated out of a scavenged span, we just
1203		// grew the RSS. Mitigate this by scavenging enough free
1204		// space to make up for it.
1205		//
1206		// Also, scavengeLargest may cause coalescing, so prevent
1207		// coalescing with s by temporarily changing its state.
1208		s.state = mSpanManual
1209		h.scavengeLargest(s.npages * pageSize)
1210		s.state = mSpanFree
1211	}
1212	s.unusedsince = 0
1213
1214	h.setSpans(s.base(), npage, s)
1215
1216	*stat += uint64(npage << _PageShift)
1217	memstats.heap_idle -= uint64(npage << _PageShift)
1218
1219	//println("spanalloc", hex(s.start<<_PageShift))
1220	if s.inList() {
1221		throw("still in list")
1222	}
1223	return s
1224}
1225
1226// Try to add at least npage pages of memory to the heap,
1227// returning whether it worked.
1228//
1229// h must be locked.
1230func (h *mheap) grow(npage uintptr) bool {
1231	ask := npage << _PageShift
1232	v, size := h.sysAlloc(ask)
1233	if v == nil {
1234		print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
1235		return false
1236	}
1237
1238	// Scavenge some pages out of the free treap to make up for
1239	// the virtual memory space we just allocated. We prefer to
1240	// scavenge the largest spans first since the cost of scavenging
1241	// is proportional to the number of sysUnused() calls rather than
1242	// the number of pages released, so we make fewer of those calls
1243	// with larger spans.
1244	h.scavengeLargest(size)
1245
1246	// Create a fake "in use" span and free it, so that the
1247	// right coalescing happens.
1248	s := (*mspan)(h.spanalloc.alloc())
1249	s.init(uintptr(v), size/pageSize)
1250	h.setSpans(s.base(), s.npages, s)
1251	atomic.Store(&s.sweepgen, h.sweepgen)
1252	s.state = mSpanInUse
1253	h.pagesInUse += uint64(s.npages)
1254	h.freeSpanLocked(s, false, true, 0)
1255	return true
1256}
1257
1258// Free the span back into the heap.
1259//
1260// large must match the value of large passed to mheap.alloc. This is
1261// used for accounting.
1262func (h *mheap) freeSpan(s *mspan, large bool) {
1263	systemstack(func() {
1264		mp := getg().m
1265		lock(&h.lock)
1266		memstats.heap_scan += uint64(mp.mcache.local_scan)
1267		mp.mcache.local_scan = 0
1268		memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
1269		mp.mcache.local_tinyallocs = 0
1270		if msanenabled {
1271			// Tell msan that this entire span is no longer in use.
1272			base := unsafe.Pointer(s.base())
1273			bytes := s.npages << _PageShift
1274			msanfree(base, bytes)
1275		}
1276		if large {
1277			// Match accounting done in mheap.alloc.
1278			memstats.heap_objects--
1279		}
1280		if gcBlackenEnabled != 0 {
1281			// heap_scan changed.
1282			gcController.revise()
1283		}
1284		h.freeSpanLocked(s, true, true, 0)
1285		unlock(&h.lock)
1286	})
1287}
1288
1289// freeManual frees a manually-managed span returned by allocManual.
1290// stat must be the same as the stat passed to the allocManual that
1291// allocated s.
1292//
1293// This must only be called when gcphase == _GCoff. See mSpanState for
1294// an explanation.
1295//
1296// freeManual must be called on the system stack to prevent stack
1297// growth, just like allocManual.
1298//
1299//go:systemstack
1300func (h *mheap) freeManual(s *mspan, stat *uint64) {
1301	s.needzero = 1
1302	lock(&h.lock)
1303	*stat -= uint64(s.npages << _PageShift)
1304	memstats.heap_sys += uint64(s.npages << _PageShift)
1305	h.freeSpanLocked(s, false, true, 0)
1306	unlock(&h.lock)
1307}
1308
1309// s must be on the busy list or unlinked.
1310func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) {
1311	switch s.state {
1312	case mSpanManual:
1313		if s.allocCount != 0 {
1314			throw("mheap.freeSpanLocked - invalid stack free")
1315		}
1316	case mSpanInUse:
1317		if s.allocCount != 0 || s.sweepgen != h.sweepgen {
1318			print("mheap.freeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
1319			throw("mheap.freeSpanLocked - invalid free")
1320		}
1321		h.pagesInUse -= uint64(s.npages)
1322
1323		// Clear in-use bit in arena page bitmap.
1324		arena, pageIdx, pageMask := pageIndexOf(s.base())
1325		arena.pageInUse[pageIdx] &^= pageMask
1326	default:
1327		throw("mheap.freeSpanLocked - invalid span state")
1328	}
1329
1330	if acctinuse {
1331		memstats.heap_inuse -= uint64(s.npages << _PageShift)
1332	}
1333	if acctidle {
1334		memstats.heap_idle += uint64(s.npages << _PageShift)
1335	}
1336	s.state = mSpanFree
1337
1338	// Stamp newly unused spans. The scavenger will use that
1339	// info to potentially give back some pages to the OS.
1340	s.unusedsince = unusedsince
1341	if unusedsince == 0 {
1342		s.unusedsince = nanotime()
1343	}
1344
1345	// Coalesce span with neighbors.
1346	h.coalesce(s)
1347
1348	// Insert s into the appropriate treap.
1349	if s.scavenged {
1350		h.scav.insert(s)
1351	} else {
1352		h.free.insert(s)
1353	}
1354}
1355
1356// scavengeLargest scavenges nbytes worth of spans in unscav
1357// starting from the largest span and working down. It then takes those spans
1358// and places them in scav. h must be locked.
1359func (h *mheap) scavengeLargest(nbytes uintptr) {
1360	// Use up scavenge credit if there's any available.
1361	if nbytes > h.scavengeCredit {
1362		nbytes -= h.scavengeCredit
1363		h.scavengeCredit = 0
1364	} else {
1365		h.scavengeCredit -= nbytes
1366		return
1367	}
1368	// Iterate over the treap backwards (from largest to smallest) scavenging spans
1369	// until we've reached our quota of nbytes.
1370	released := uintptr(0)
1371	for t := h.free.end(); released < nbytes && t.valid(); {
1372		s := t.span()
1373		r := s.scavenge()
1374		if r == 0 {
1375			// Since we're going in order of largest-to-smallest span, this
1376			// means all other spans are no bigger than s. There's a high
1377			// chance that the other spans don't even cover a full page,
1378			// (though they could) but iterating further just for a handful
1379			// of pages probably isn't worth it, so just stop here.
1380			//
1381			// This check also preserves the invariant that spans that have
1382			// `scavenged` set are only ever in the `scav` treap, and
1383			// those which have it unset are only in the `free` treap.
1384			return
1385		}
1386		n := t.prev()
1387		h.free.erase(t)
1388		// Now that s is scavenged, we must eagerly coalesce it
1389		// with its neighbors to prevent having two spans with
1390		// the same scavenged state adjacent to each other.
1391		h.coalesce(s)
1392		t = n
1393		h.scav.insert(s)
1394		released += r
1395	}
1396	// If we over-scavenged, turn that extra amount into credit.
1397	if released > nbytes {
1398		h.scavengeCredit += released - nbytes
1399	}
1400}
1401
1402// scavengeAll visits each node in the unscav treap and scavenges the
1403// treapNode's span. It then removes the scavenged span from
1404// unscav and adds it into scav before continuing. h must be locked.
1405func (h *mheap) scavengeAll(now, limit uint64) uintptr {
1406	// Iterate over the treap scavenging spans if unused for at least limit time.
1407	released := uintptr(0)
1408	for t := h.free.start(); t.valid(); {
1409		s := t.span()
1410		n := t.next()
1411		if (now - uint64(s.unusedsince)) > limit {
1412			r := s.scavenge()
1413			if r != 0 {
1414				h.free.erase(t)
1415				// Now that s is scavenged, we must eagerly coalesce it
1416				// with its neighbors to prevent having two spans with
1417				// the same scavenged state adjacent to each other.
1418				h.coalesce(s)
1419				h.scav.insert(s)
1420				released += r
1421			}
1422		}
1423		t = n
1424	}
1425	return released
1426}
1427
1428func (h *mheap) scavenge(k int32, now, limit uint64) {
1429	// Disallow malloc or panic while holding the heap lock. We do
1430	// this here because this is an non-mallocgc entry-point to
1431	// the mheap API.
1432	gp := getg()
1433	gp.m.mallocing++
1434	lock(&h.lock)
1435	released := h.scavengeAll(now, limit)
1436	unlock(&h.lock)
1437	gp.m.mallocing--
1438
1439	if debug.gctrace > 0 {
1440		if released > 0 {
1441			print("scvg", k, ": ", released>>20, " MB released\n")
1442		}
1443		print("scvg", k, ": inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
1444	}
1445}
1446
1447//go:linkname runtime_debug_freeOSMemory runtime..z2fdebug.freeOSMemory
1448func runtime_debug_freeOSMemory() {
1449	GC()
1450	systemstack(func() { mheap_.scavenge(-1, ^uint64(0), 0) })
1451}
1452
1453// Initialize a new span with the given start and npages.
1454func (span *mspan) init(base uintptr, npages uintptr) {
1455	// span is *not* zeroed.
1456	span.next = nil
1457	span.prev = nil
1458	span.list = nil
1459	span.startAddr = base
1460	span.npages = npages
1461	span.allocCount = 0
1462	span.spanclass = 0
1463	span.elemsize = 0
1464	span.state = mSpanDead
1465	span.unusedsince = 0
1466	span.scavenged = false
1467	span.speciallock.key = 0
1468	span.specials = nil
1469	span.needzero = 0
1470	span.freeindex = 0
1471	span.allocBits = nil
1472	span.gcmarkBits = nil
1473}
1474
1475func (span *mspan) inList() bool {
1476	return span.list != nil
1477}
1478
1479// Initialize an empty doubly-linked list.
1480func (list *mSpanList) init() {
1481	list.first = nil
1482	list.last = nil
1483}
1484
1485func (list *mSpanList) remove(span *mspan) {
1486	if span.list != list {
1487		print("runtime: failed mSpanList.remove span.npages=", span.npages,
1488			" span=", span, " prev=", span.prev, " span.list=", span.list, " list=", list, "\n")
1489		throw("mSpanList.remove")
1490	}
1491	if list.first == span {
1492		list.first = span.next
1493	} else {
1494		span.prev.next = span.next
1495	}
1496	if list.last == span {
1497		list.last = span.prev
1498	} else {
1499		span.next.prev = span.prev
1500	}
1501	span.next = nil
1502	span.prev = nil
1503	span.list = nil
1504}
1505
1506func (list *mSpanList) isEmpty() bool {
1507	return list.first == nil
1508}
1509
1510func (list *mSpanList) insert(span *mspan) {
1511	if span.next != nil || span.prev != nil || span.list != nil {
1512		println("runtime: failed mSpanList.insert", span, span.next, span.prev, span.list)
1513		throw("mSpanList.insert")
1514	}
1515	span.next = list.first
1516	if list.first != nil {
1517		// The list contains at least one span; link it in.
1518		// The last span in the list doesn't change.
1519		list.first.prev = span
1520	} else {
1521		// The list contains no spans, so this is also the last span.
1522		list.last = span
1523	}
1524	list.first = span
1525	span.list = list
1526}
1527
1528func (list *mSpanList) insertBack(span *mspan) {
1529	if span.next != nil || span.prev != nil || span.list != nil {
1530		println("runtime: failed mSpanList.insertBack", span, span.next, span.prev, span.list)
1531		throw("mSpanList.insertBack")
1532	}
1533	span.prev = list.last
1534	if list.last != nil {
1535		// The list contains at least one span.
1536		list.last.next = span
1537	} else {
1538		// The list contains no spans, so this is also the first span.
1539		list.first = span
1540	}
1541	list.last = span
1542	span.list = list
1543}
1544
1545// takeAll removes all spans from other and inserts them at the front
1546// of list.
1547func (list *mSpanList) takeAll(other *mSpanList) {
1548	if other.isEmpty() {
1549		return
1550	}
1551
1552	// Reparent everything in other to list.
1553	for s := other.first; s != nil; s = s.next {
1554		s.list = list
1555	}
1556
1557	// Concatenate the lists.
1558	if list.isEmpty() {
1559		*list = *other
1560	} else {
1561		// Neither list is empty. Put other before list.
1562		other.last.next = list.first
1563		list.first.prev = other.last
1564		list.first = other.first
1565	}
1566
1567	other.first, other.last = nil, nil
1568}
1569
1570const (
1571	_KindSpecialFinalizer = 1
1572	_KindSpecialProfile   = 2
1573	// Note: The finalizer special must be first because if we're freeing
1574	// an object, a finalizer special will cause the freeing operation
1575	// to abort, and we want to keep the other special records around
1576	// if that happens.
1577)
1578
1579//go:notinheap
1580type special struct {
1581	next   *special // linked list in span
1582	offset uint16   // span offset of object
1583	kind   byte     // kind of special
1584}
1585
1586// Adds the special record s to the list of special records for
1587// the object p. All fields of s should be filled in except for
1588// offset & next, which this routine will fill in.
1589// Returns true if the special was successfully added, false otherwise.
1590// (The add will fail only if a record with the same p and s->kind
1591//  already exists.)
1592func addspecial(p unsafe.Pointer, s *special) bool {
1593	span := spanOfHeap(uintptr(p))
1594	if span == nil {
1595		throw("addspecial on invalid pointer")
1596	}
1597
1598	// Ensure that the span is swept.
1599	// Sweeping accesses the specials list w/o locks, so we have
1600	// to synchronize with it. And it's just much safer.
1601	mp := acquirem()
1602	span.ensureSwept()
1603
1604	offset := uintptr(p) - span.base()
1605	kind := s.kind
1606
1607	lock(&span.speciallock)
1608
1609	// Find splice point, check for existing record.
1610	t := &span.specials
1611	for {
1612		x := *t
1613		if x == nil {
1614			break
1615		}
1616		if offset == uintptr(x.offset) && kind == x.kind {
1617			unlock(&span.speciallock)
1618			releasem(mp)
1619			return false // already exists
1620		}
1621		if offset < uintptr(x.offset) || (offset == uintptr(x.offset) && kind < x.kind) {
1622			break
1623		}
1624		t = &x.next
1625	}
1626
1627	// Splice in record, fill in offset.
1628	s.offset = uint16(offset)
1629	s.next = *t
1630	*t = s
1631	unlock(&span.speciallock)
1632	releasem(mp)
1633
1634	return true
1635}
1636
1637// Removes the Special record of the given kind for the object p.
1638// Returns the record if the record existed, nil otherwise.
1639// The caller must FixAlloc_Free the result.
1640func removespecial(p unsafe.Pointer, kind uint8) *special {
1641	span := spanOfHeap(uintptr(p))
1642	if span == nil {
1643		throw("removespecial on invalid pointer")
1644	}
1645
1646	// Ensure that the span is swept.
1647	// Sweeping accesses the specials list w/o locks, so we have
1648	// to synchronize with it. And it's just much safer.
1649	mp := acquirem()
1650	span.ensureSwept()
1651
1652	offset := uintptr(p) - span.base()
1653
1654	lock(&span.speciallock)
1655	t := &span.specials
1656	for {
1657		s := *t
1658		if s == nil {
1659			break
1660		}
1661		// This function is used for finalizers only, so we don't check for
1662		// "interior" specials (p must be exactly equal to s->offset).
1663		if offset == uintptr(s.offset) && kind == s.kind {
1664			*t = s.next
1665			unlock(&span.speciallock)
1666			releasem(mp)
1667			return s
1668		}
1669		t = &s.next
1670	}
1671	unlock(&span.speciallock)
1672	releasem(mp)
1673	return nil
1674}
1675
1676// The described object has a finalizer set for it.
1677//
1678// specialfinalizer is allocated from non-GC'd memory, so any heap
1679// pointers must be specially handled.
1680//
1681//go:notinheap
1682type specialfinalizer struct {
1683	special special
1684	fn      *funcval  // May be a heap pointer.
1685	ft      *functype // May be a heap pointer, but always live.
1686	ot      *ptrtype  // May be a heap pointer, but always live.
1687}
1688
1689// Adds a finalizer to the object p. Returns true if it succeeded.
1690func addfinalizer(p unsafe.Pointer, f *funcval, ft *functype, ot *ptrtype) bool {
1691	lock(&mheap_.speciallock)
1692	s := (*specialfinalizer)(mheap_.specialfinalizeralloc.alloc())
1693	unlock(&mheap_.speciallock)
1694	s.special.kind = _KindSpecialFinalizer
1695	s.fn = f
1696	s.ft = ft
1697	s.ot = ot
1698	if addspecial(p, &s.special) {
1699		// This is responsible for maintaining the same
1700		// GC-related invariants as markrootSpans in any
1701		// situation where it's possible that markrootSpans
1702		// has already run but mark termination hasn't yet.
1703		if gcphase != _GCoff {
1704			base, _, _ := findObject(uintptr(p), 0, 0, false)
1705			mp := acquirem()
1706			gcw := &mp.p.ptr().gcw
1707			// Mark everything reachable from the object
1708			// so it's retained for the finalizer.
1709			scanobject(base, gcw)
1710			// Mark the finalizer itself, since the
1711			// special isn't part of the GC'd heap.
1712			scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw)
1713			releasem(mp)
1714		}
1715		return true
1716	}
1717
1718	// There was an old finalizer
1719	lock(&mheap_.speciallock)
1720	mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
1721	unlock(&mheap_.speciallock)
1722	return false
1723}
1724
1725// Removes the finalizer (if any) from the object p.
1726func removefinalizer(p unsafe.Pointer) {
1727	s := (*specialfinalizer)(unsafe.Pointer(removespecial(p, _KindSpecialFinalizer)))
1728	if s == nil {
1729		return // there wasn't a finalizer to remove
1730	}
1731	lock(&mheap_.speciallock)
1732	mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
1733	unlock(&mheap_.speciallock)
1734}
1735
1736// The described object is being heap profiled.
1737//
1738//go:notinheap
1739type specialprofile struct {
1740	special special
1741	b       *bucket
1742}
1743
1744// Set the heap profile bucket associated with addr to b.
1745func setprofilebucket(p unsafe.Pointer, b *bucket) {
1746	lock(&mheap_.speciallock)
1747	s := (*specialprofile)(mheap_.specialprofilealloc.alloc())
1748	unlock(&mheap_.speciallock)
1749	s.special.kind = _KindSpecialProfile
1750	s.b = b
1751	if !addspecial(p, &s.special) {
1752		throw("setprofilebucket: profile already set")
1753	}
1754}
1755
1756// Do whatever cleanup needs to be done to deallocate s. It has
1757// already been unlinked from the mspan specials list.
1758func freespecial(s *special, p unsafe.Pointer, size uintptr) {
1759	switch s.kind {
1760	case _KindSpecialFinalizer:
1761		sf := (*specialfinalizer)(unsafe.Pointer(s))
1762		queuefinalizer(p, sf.fn, sf.ft, sf.ot)
1763		lock(&mheap_.speciallock)
1764		mheap_.specialfinalizeralloc.free(unsafe.Pointer(sf))
1765		unlock(&mheap_.speciallock)
1766	case _KindSpecialProfile:
1767		sp := (*specialprofile)(unsafe.Pointer(s))
1768		mProf_Free(sp.b, size)
1769		lock(&mheap_.speciallock)
1770		mheap_.specialprofilealloc.free(unsafe.Pointer(sp))
1771		unlock(&mheap_.speciallock)
1772	default:
1773		throw("bad special kind")
1774		panic("not reached")
1775	}
1776}
1777
1778// gcBits is an alloc/mark bitmap. This is always used as *gcBits.
1779//
1780//go:notinheap
1781type gcBits uint8
1782
1783// bytep returns a pointer to the n'th byte of b.
1784func (b *gcBits) bytep(n uintptr) *uint8 {
1785	return addb((*uint8)(b), n)
1786}
1787
1788// bitp returns a pointer to the byte containing bit n and a mask for
1789// selecting that bit from *bytep.
1790func (b *gcBits) bitp(n uintptr) (bytep *uint8, mask uint8) {
1791	return b.bytep(n / 8), 1 << (n % 8)
1792}
1793
1794const gcBitsChunkBytes = uintptr(64 << 10)
1795const gcBitsHeaderBytes = unsafe.Sizeof(gcBitsHeader{})
1796
1797type gcBitsHeader struct {
1798	free uintptr // free is the index into bits of the next free byte.
1799	next uintptr // *gcBits triggers recursive type bug. (issue 14620)
1800}
1801
1802//go:notinheap
1803type gcBitsArena struct {
1804	// gcBitsHeader // side step recursive type bug (issue 14620) by including fields by hand.
1805	free uintptr // free is the index into bits of the next free byte; read/write atomically
1806	next *gcBitsArena
1807	bits [gcBitsChunkBytes - gcBitsHeaderBytes]gcBits
1808}
1809
1810var gcBitsArenas struct {
1811	lock     mutex
1812	free     *gcBitsArena
1813	next     *gcBitsArena // Read atomically. Write atomically under lock.
1814	current  *gcBitsArena
1815	previous *gcBitsArena
1816}
1817
1818// tryAlloc allocates from b or returns nil if b does not have enough room.
1819// This is safe to call concurrently.
1820func (b *gcBitsArena) tryAlloc(bytes uintptr) *gcBits {
1821	if b == nil || atomic.Loaduintptr(&b.free)+bytes > uintptr(len(b.bits)) {
1822		return nil
1823	}
1824	// Try to allocate from this block.
1825	end := atomic.Xadduintptr(&b.free, bytes)
1826	if end > uintptr(len(b.bits)) {
1827		return nil
1828	}
1829	// There was enough room.
1830	start := end - bytes
1831	return &b.bits[start]
1832}
1833
1834// newMarkBits returns a pointer to 8 byte aligned bytes
1835// to be used for a span's mark bits.
1836func newMarkBits(nelems uintptr) *gcBits {
1837	blocksNeeded := uintptr((nelems + 63) / 64)
1838	bytesNeeded := blocksNeeded * 8
1839
1840	// Try directly allocating from the current head arena.
1841	head := (*gcBitsArena)(atomic.Loadp(unsafe.Pointer(&gcBitsArenas.next)))
1842	if p := head.tryAlloc(bytesNeeded); p != nil {
1843		return p
1844	}
1845
1846	// There's not enough room in the head arena. We may need to
1847	// allocate a new arena.
1848	lock(&gcBitsArenas.lock)
1849	// Try the head arena again, since it may have changed. Now
1850	// that we hold the lock, the list head can't change, but its
1851	// free position still can.
1852	if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
1853		unlock(&gcBitsArenas.lock)
1854		return p
1855	}
1856
1857	// Allocate a new arena. This may temporarily drop the lock.
1858	fresh := newArenaMayUnlock()
1859	// If newArenaMayUnlock dropped the lock, another thread may
1860	// have put a fresh arena on the "next" list. Try allocating
1861	// from next again.
1862	if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
1863		// Put fresh back on the free list.
1864		// TODO: Mark it "already zeroed"
1865		fresh.next = gcBitsArenas.free
1866		gcBitsArenas.free = fresh
1867		unlock(&gcBitsArenas.lock)
1868		return p
1869	}
1870
1871	// Allocate from the fresh arena. We haven't linked it in yet, so
1872	// this cannot race and is guaranteed to succeed.
1873	p := fresh.tryAlloc(bytesNeeded)
1874	if p == nil {
1875		throw("markBits overflow")
1876	}
1877
1878	// Add the fresh arena to the "next" list.
1879	fresh.next = gcBitsArenas.next
1880	atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), unsafe.Pointer(fresh))
1881
1882	unlock(&gcBitsArenas.lock)
1883	return p
1884}
1885
1886// newAllocBits returns a pointer to 8 byte aligned bytes
1887// to be used for this span's alloc bits.
1888// newAllocBits is used to provide newly initialized spans
1889// allocation bits. For spans not being initialized the
1890// mark bits are repurposed as allocation bits when
1891// the span is swept.
1892func newAllocBits(nelems uintptr) *gcBits {
1893	return newMarkBits(nelems)
1894}
1895
1896// nextMarkBitArenaEpoch establishes a new epoch for the arenas
1897// holding the mark bits. The arenas are named relative to the
1898// current GC cycle which is demarcated by the call to finishweep_m.
1899//
1900// All current spans have been swept.
1901// During that sweep each span allocated room for its gcmarkBits in
1902// gcBitsArenas.next block. gcBitsArenas.next becomes the gcBitsArenas.current
1903// where the GC will mark objects and after each span is swept these bits
1904// will be used to allocate objects.
1905// gcBitsArenas.current becomes gcBitsArenas.previous where the span's
1906// gcAllocBits live until all the spans have been swept during this GC cycle.
1907// The span's sweep extinguishes all the references to gcBitsArenas.previous
1908// by pointing gcAllocBits into the gcBitsArenas.current.
1909// The gcBitsArenas.previous is released to the gcBitsArenas.free list.
1910func nextMarkBitArenaEpoch() {
1911	lock(&gcBitsArenas.lock)
1912	if gcBitsArenas.previous != nil {
1913		if gcBitsArenas.free == nil {
1914			gcBitsArenas.free = gcBitsArenas.previous
1915		} else {
1916			// Find end of previous arenas.
1917			last := gcBitsArenas.previous
1918			for last = gcBitsArenas.previous; last.next != nil; last = last.next {
1919			}
1920			last.next = gcBitsArenas.free
1921			gcBitsArenas.free = gcBitsArenas.previous
1922		}
1923	}
1924	gcBitsArenas.previous = gcBitsArenas.current
1925	gcBitsArenas.current = gcBitsArenas.next
1926	atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), nil) // newMarkBits calls newArena when needed
1927	unlock(&gcBitsArenas.lock)
1928}
1929
1930// newArenaMayUnlock allocates and zeroes a gcBits arena.
1931// The caller must hold gcBitsArena.lock. This may temporarily release it.
1932func newArenaMayUnlock() *gcBitsArena {
1933	var result *gcBitsArena
1934	if gcBitsArenas.free == nil {
1935		unlock(&gcBitsArenas.lock)
1936		result = (*gcBitsArena)(sysAlloc(gcBitsChunkBytes, &memstats.gc_sys))
1937		if result == nil {
1938			throw("runtime: cannot allocate memory")
1939		}
1940		lock(&gcBitsArenas.lock)
1941	} else {
1942		result = gcBitsArenas.free
1943		gcBitsArenas.free = gcBitsArenas.free.next
1944		memclrNoHeapPointers(unsafe.Pointer(result), gcBitsChunkBytes)
1945	}
1946	result.next = nil
1947	// If result.bits is not 8 byte aligned adjust index so
1948	// that &result.bits[result.free] is 8 byte aligned.
1949	if uintptr(unsafe.Offsetof(gcBitsArena{}.bits))&7 == 0 {
1950		result.free = 0
1951	} else {
1952		result.free = 8 - (uintptr(unsafe.Pointer(&result.bits[0])) & 7)
1953	}
1954	return result
1955}
1956