1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Page heap.
6//
7// See malloc.go for overview.
8
9package runtime
10
11import (
12	"runtime/internal/atomic"
13	"runtime/internal/sys"
14	"unsafe"
15)
16
17// minPhysPageSize is a lower-bound on the physical page size. The
18// true physical page size may be larger than this. In contrast,
19// sys.PhysPageSize is an upper-bound on the physical page size.
20const minPhysPageSize = 4096
21
22// Main malloc heap.
23// The heap itself is the "free[]" and "large" arrays,
24// but all the other global data is here too.
25//
26// mheap must not be heap-allocated because it contains mSpanLists,
27// which must not be heap-allocated.
28//
29//go:notinheap
30type mheap struct {
31	lock      mutex
32	free      [_MaxMHeapList]mSpanList // free lists of given length up to _MaxMHeapList
33	freelarge mTreap                   // free treap of length >= _MaxMHeapList
34	busy      [_MaxMHeapList]mSpanList // busy lists of large spans of given length
35	busylarge mSpanList                // busy lists of large spans length >= _MaxMHeapList
36	sweepgen  uint32                   // sweep generation, see comment in mspan
37	sweepdone uint32                   // all spans are swept
38	sweepers  uint32                   // number of active sweepone calls
39
40	// allspans is a slice of all mspans ever created. Each mspan
41	// appears exactly once.
42	//
43	// The memory for allspans is manually managed and can be
44	// reallocated and move as the heap grows.
45	//
46	// In general, allspans is protected by mheap_.lock, which
47	// prevents concurrent access as well as freeing the backing
48	// store. Accesses during STW might not hold the lock, but
49	// must ensure that allocation cannot happen around the
50	// access (since that may free the backing store).
51	allspans []*mspan // all spans out there
52
53	// spans is a lookup table to map virtual address page IDs to *mspan.
54	// For allocated spans, their pages map to the span itself.
55	// For free spans, only the lowest and highest pages map to the span itself.
56	// Internal pages map to an arbitrary span.
57	// For pages that have never been allocated, spans entries are nil.
58	//
59	// Modifications are protected by mheap.lock. Reads can be
60	// performed without locking, but ONLY from indexes that are
61	// known to contain in-use or stack spans. This means there
62	// must not be a safe-point between establishing that an
63	// address is live and looking it up in the spans array.
64	//
65	// This is backed by a reserved region of the address space so
66	// it can grow without moving. The memory up to len(spans) is
67	// mapped. cap(spans) indicates the total reserved memory.
68	spans []*mspan
69
70	// sweepSpans contains two mspan stacks: one of swept in-use
71	// spans, and one of unswept in-use spans. These two trade
72	// roles on each GC cycle. Since the sweepgen increases by 2
73	// on each cycle, this means the swept spans are in
74	// sweepSpans[sweepgen/2%2] and the unswept spans are in
75	// sweepSpans[1-sweepgen/2%2]. Sweeping pops spans from the
76	// unswept stack and pushes spans that are still in-use on the
77	// swept stack. Likewise, allocating an in-use span pushes it
78	// on the swept stack.
79	sweepSpans [2]gcSweepBuf
80
81	_ uint32 // align uint64 fields on 32-bit for atomics
82
83	// Proportional sweep
84	//
85	// These parameters represent a linear function from heap_live
86	// to page sweep count. The proportional sweep system works to
87	// stay in the black by keeping the current page sweep count
88	// above this line at the current heap_live.
89	//
90	// The line has slope sweepPagesPerByte and passes through a
91	// basis point at (sweepHeapLiveBasis, pagesSweptBasis). At
92	// any given time, the system is at (memstats.heap_live,
93	// pagesSwept) in this space.
94	//
95	// It's important that the line pass through a point we
96	// control rather than simply starting at a (0,0) origin
97	// because that lets us adjust sweep pacing at any time while
98	// accounting for current progress. If we could only adjust
99	// the slope, it would create a discontinuity in debt if any
100	// progress has already been made.
101	pagesInUse         uint64  // pages of spans in stats _MSpanInUse; R/W with mheap.lock
102	pagesSwept         uint64  // pages swept this cycle; updated atomically
103	pagesSweptBasis    uint64  // pagesSwept to use as the origin of the sweep ratio; updated atomically
104	sweepHeapLiveBasis uint64  // value of heap_live to use as the origin of sweep ratio; written with lock, read without
105	sweepPagesPerByte  float64 // proportional sweep ratio; written with lock, read without
106	// TODO(austin): pagesInUse should be a uintptr, but the 386
107	// compiler can't 8-byte align fields.
108
109	// Malloc stats.
110	largealloc  uint64                  // bytes allocated for large objects
111	nlargealloc uint64                  // number of large object allocations
112	largefree   uint64                  // bytes freed for large objects (>maxsmallsize)
113	nlargefree  uint64                  // number of frees for large objects (>maxsmallsize)
114	nsmallfree  [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
115
116	// range of addresses we might see in the heap
117	bitmap        uintptr // Points to one byte past the end of the bitmap
118	bitmap_mapped uintptr
119
120	// The arena_* fields indicate the addresses of the Go heap.
121	//
122	// The maximum range of the Go heap is
123	// [arena_start, arena_start+_MaxMem+1).
124	//
125	// The range of the current Go heap is
126	// [arena_start, arena_used). Parts of this range may not be
127	// mapped, but the metadata structures are always mapped for
128	// the full range.
129	arena_start uintptr
130	arena_used  uintptr // Set with setArenaUsed.
131
132	// The heap is grown using a linear allocator that allocates
133	// from the block [arena_alloc, arena_end). arena_alloc is
134	// often, but *not always* equal to arena_used.
135	arena_alloc uintptr
136	arena_end   uintptr
137
138	// arena_reserved indicates that the memory [arena_alloc,
139	// arena_end) is reserved (e.g., mapped PROT_NONE). If this is
140	// false, we have to be careful not to clobber existing
141	// mappings here. If this is true, then we own the mapping
142	// here and *must* clobber it to use it.
143	arena_reserved bool
144
145	_ uint32 // ensure 64-bit alignment
146
147	// central free lists for small size classes.
148	// the padding makes sure that the MCentrals are
149	// spaced CacheLineSize bytes apart, so that each MCentral.lock
150	// gets its own cache line.
151	// central is indexed by spanClass.
152	central [numSpanClasses]struct {
153		mcentral mcentral
154		pad      [sys.CacheLineSize - unsafe.Sizeof(mcentral{})%sys.CacheLineSize]byte
155	}
156
157	spanalloc             fixalloc // allocator for span*
158	cachealloc            fixalloc // allocator for mcache*
159	treapalloc            fixalloc // allocator for treapNodes* used by large objects
160	specialfinalizeralloc fixalloc // allocator for specialfinalizer*
161	specialprofilealloc   fixalloc // allocator for specialprofile*
162	speciallock           mutex    // lock for special record allocators.
163
164	unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
165}
166
167var mheap_ mheap
168
169// An MSpan is a run of pages.
170//
171// When a MSpan is in the heap free list, state == MSpanFree
172// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
173//
174// When a MSpan is allocated, state == MSpanInUse or MSpanManual
175// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
176
177// Every MSpan is in one doubly-linked list,
178// either one of the MHeap's free lists or one of the
179// MCentral's span lists.
180
181// An MSpan representing actual memory has state _MSpanInUse,
182// _MSpanManual, or _MSpanFree. Transitions between these states are
183// constrained as follows:
184//
185// * A span may transition from free to in-use or manual during any GC
186//   phase.
187//
188// * During sweeping (gcphase == _GCoff), a span may transition from
189//   in-use to free (as a result of sweeping) or manual to free (as a
190//   result of stacks being freed).
191//
192// * During GC (gcphase != _GCoff), a span *must not* transition from
193//   manual or in-use to free. Because concurrent GC may read a pointer
194//   and then look up its span, the span state must be monotonic.
195type mSpanState uint8
196
197const (
198	_MSpanDead   mSpanState = iota
199	_MSpanInUse             // allocated for garbage collected heap
200	_MSpanManual            // allocated for manual management (e.g., stack allocator)
201	_MSpanFree
202)
203
204// mSpanStateNames are the names of the span states, indexed by
205// mSpanState.
206var mSpanStateNames = []string{
207	"_MSpanDead",
208	"_MSpanInUse",
209	"_MSpanManual",
210	"_MSpanFree",
211}
212
213// mSpanList heads a linked list of spans.
214//
215//go:notinheap
216type mSpanList struct {
217	first *mspan // first span in list, or nil if none
218	last  *mspan // last span in list, or nil if none
219}
220
221//go:notinheap
222type mspan struct {
223	next *mspan     // next span in list, or nil if none
224	prev *mspan     // previous span in list, or nil if none
225	list *mSpanList // For debugging. TODO: Remove.
226
227	startAddr uintptr // address of first byte of span aka s.base()
228	npages    uintptr // number of pages in span
229
230	manualFreeList gclinkptr // list of free objects in _MSpanManual spans
231
232	// freeindex is the slot index between 0 and nelems at which to begin scanning
233	// for the next free object in this span.
234	// Each allocation scans allocBits starting at freeindex until it encounters a 0
235	// indicating a free object. freeindex is then adjusted so that subsequent scans begin
236	// just past the newly discovered free object.
237	//
238	// If freeindex == nelem, this span has no free objects.
239	//
240	// allocBits is a bitmap of objects in this span.
241	// If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0
242	// then object n is free;
243	// otherwise, object n is allocated. Bits starting at nelem are
244	// undefined and should never be referenced.
245	//
246	// Object n starts at address n*elemsize + (start << pageShift).
247	freeindex uintptr
248	// TODO: Look up nelems from sizeclass and remove this field if it
249	// helps performance.
250	nelems uintptr // number of object in the span.
251
252	// Cache of the allocBits at freeindex. allocCache is shifted
253	// such that the lowest bit corresponds to the bit freeindex.
254	// allocCache holds the complement of allocBits, thus allowing
255	// ctz (count trailing zero) to use it directly.
256	// allocCache may contain bits beyond s.nelems; the caller must ignore
257	// these.
258	allocCache uint64
259
260	// allocBits and gcmarkBits hold pointers to a span's mark and
261	// allocation bits. The pointers are 8 byte aligned.
262	// There are three arenas where this data is held.
263	// free: Dirty arenas that are no longer accessed
264	//       and can be reused.
265	// next: Holds information to be used in the next GC cycle.
266	// current: Information being used during this GC cycle.
267	// previous: Information being used during the last GC cycle.
268	// A new GC cycle starts with the call to finishsweep_m.
269	// finishsweep_m moves the previous arena to the free arena,
270	// the current arena to the previous arena, and
271	// the next arena to the current arena.
272	// The next arena is populated as the spans request
273	// memory to hold gcmarkBits for the next GC cycle as well
274	// as allocBits for newly allocated spans.
275	//
276	// The pointer arithmetic is done "by hand" instead of using
277	// arrays to avoid bounds checks along critical performance
278	// paths.
279	// The sweep will free the old allocBits and set allocBits to the
280	// gcmarkBits. The gcmarkBits are replaced with a fresh zeroed
281	// out memory.
282	allocBits  *gcBits
283	gcmarkBits *gcBits
284
285	// sweep generation:
286	// if sweepgen == h->sweepgen - 2, the span needs sweeping
287	// if sweepgen == h->sweepgen - 1, the span is currently being swept
288	// if sweepgen == h->sweepgen, the span is swept and ready to use
289	// h->sweepgen is incremented by 2 after every GC
290
291	sweepgen    uint32
292	divMul      uint16     // for divide by elemsize - divMagic.mul
293	baseMask    uint16     // if non-0, elemsize is a power of 2, & this will get object allocation base
294	allocCount  uint16     // number of allocated objects
295	spanclass   spanClass  // size class and noscan (uint8)
296	incache     bool       // being used by an mcache
297	state       mSpanState // mspaninuse etc
298	needzero    uint8      // needs to be zeroed before allocation
299	divShift    uint8      // for divide by elemsize - divMagic.shift
300	divShift2   uint8      // for divide by elemsize - divMagic.shift2
301	elemsize    uintptr    // computed from sizeclass or from npages
302	unusedsince int64      // first time spotted by gc in mspanfree state
303	npreleased  uintptr    // number of pages released to the os
304	limit       uintptr    // end of data in span
305	speciallock mutex      // guards specials list
306	specials    *special   // linked list of special records sorted by offset.
307}
308
309func (s *mspan) base() uintptr {
310	return s.startAddr
311}
312
313func (s *mspan) layout() (size, n, total uintptr) {
314	total = s.npages << _PageShift
315	size = s.elemsize
316	if size > 0 {
317		n = total / size
318	}
319	return
320}
321
322// recordspan adds a newly allocated span to h.allspans.
323//
324// This only happens the first time a span is allocated from
325// mheap.spanalloc (it is not called when a span is reused).
326//
327// Write barriers are disallowed here because it can be called from
328// gcWork when allocating new workbufs. However, because it's an
329// indirect call from the fixalloc initializer, the compiler can't see
330// this.
331//
332//go:nowritebarrierrec
333func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
334	h := (*mheap)(vh)
335	s := (*mspan)(p)
336	if len(h.allspans) >= cap(h.allspans) {
337		n := 64 * 1024 / sys.PtrSize
338		if n < cap(h.allspans)*3/2 {
339			n = cap(h.allspans) * 3 / 2
340		}
341		var new []*mspan
342		sp := (*notInHeapSlice)(unsafe.Pointer(&new))
343		sp.array = (*notInHeap)(sysAlloc(uintptr(n)*sys.PtrSize, &memstats.other_sys))
344		if sp.array == nil {
345			throw("runtime: cannot allocate memory")
346		}
347		sp.len = len(h.allspans)
348		sp.cap = n
349		if len(h.allspans) > 0 {
350			copy(new, h.allspans)
351		}
352		oldAllspans := h.allspans
353		*(*notInHeapSlice)(unsafe.Pointer(&h.allspans)) = *(*notInHeapSlice)(unsafe.Pointer(&new))
354		if len(oldAllspans) != 0 {
355			sysFree(unsafe.Pointer(&oldAllspans[0]), uintptr(cap(oldAllspans))*unsafe.Sizeof(oldAllspans[0]), &memstats.other_sys)
356		}
357	}
358	h.allspans = h.allspans[:len(h.allspans)+1]
359	h.allspans[len(h.allspans)-1] = s
360}
361
362// A spanClass represents the size class and noscan-ness of a span.
363//
364// Each size class has a noscan spanClass and a scan spanClass. The
365// noscan spanClass contains only noscan objects, which do not contain
366// pointers and thus do not need to be scanned by the garbage
367// collector.
368type spanClass uint8
369
370const (
371	numSpanClasses = _NumSizeClasses << 1
372	tinySpanClass  = spanClass(tinySizeClass<<1 | 1)
373)
374
375func makeSpanClass(sizeclass uint8, noscan bool) spanClass {
376	return spanClass(sizeclass<<1) | spanClass(bool2int(noscan))
377}
378
379func (sc spanClass) sizeclass() int8 {
380	return int8(sc >> 1)
381}
382
383func (sc spanClass) noscan() bool {
384	return sc&1 != 0
385}
386
387// inheap reports whether b is a pointer into a (potentially dead) heap object.
388// It returns false for pointers into _MSpanManual spans.
389// Non-preemptible because it is used by write barriers.
390//go:nowritebarrier
391//go:nosplit
392func inheap(b uintptr) bool {
393	if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
394		return false
395	}
396	// Not a beginning of a block, consult span table to find the block beginning.
397	s := mheap_.spans[(b-mheap_.arena_start)>>_PageShift]
398	if s == nil || b < s.base() || b >= s.limit || s.state != mSpanInUse {
399		return false
400	}
401	return true
402}
403
404// inHeapOrStack is a variant of inheap that returns true for pointers
405// into any allocated heap span.
406//
407//go:nowritebarrier
408//go:nosplit
409func inHeapOrStack(b uintptr) bool {
410	if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
411		return false
412	}
413	// Not a beginning of a block, consult span table to find the block beginning.
414	s := mheap_.spans[(b-mheap_.arena_start)>>_PageShift]
415	if s == nil || b < s.base() {
416		return false
417	}
418	switch s.state {
419	case mSpanInUse, _MSpanManual:
420		return b < s.limit
421	default:
422		return false
423	}
424}
425
426// TODO: spanOf and spanOfUnchecked are open-coded in a lot of places.
427// Use the functions instead.
428
429// spanOf returns the span of p. If p does not point into the heap or
430// no span contains p, spanOf returns nil.
431func spanOf(p uintptr) *mspan {
432	if p == 0 || p < mheap_.arena_start || p >= mheap_.arena_used {
433		return nil
434	}
435	return spanOfUnchecked(p)
436}
437
438// spanOfUnchecked is equivalent to spanOf, but the caller must ensure
439// that p points into the heap (that is, mheap_.arena_start <= p <
440// mheap_.arena_used).
441func spanOfUnchecked(p uintptr) *mspan {
442	return mheap_.spans[(p-mheap_.arena_start)>>_PageShift]
443}
444
445func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
446	_g_ := getg()
447
448	_g_.m.mcache.local_nlookup++
449	if sys.PtrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 {
450		// purge cache stats to prevent overflow
451		lock(&mheap_.lock)
452		purgecachedstats(_g_.m.mcache)
453		unlock(&mheap_.lock)
454	}
455
456	s := mheap_.lookupMaybe(unsafe.Pointer(v))
457	if sp != nil {
458		*sp = s
459	}
460	if s == nil {
461		if base != nil {
462			*base = 0
463		}
464		if size != nil {
465			*size = 0
466		}
467		return 0
468	}
469
470	p := s.base()
471	if s.spanclass.sizeclass() == 0 {
472		// Large object.
473		if base != nil {
474			*base = p
475		}
476		if size != nil {
477			*size = s.npages << _PageShift
478		}
479		return 1
480	}
481
482	n := s.elemsize
483	if base != nil {
484		i := (v - p) / n
485		*base = p + i*n
486	}
487	if size != nil {
488		*size = n
489	}
490
491	return 1
492}
493
494// Initialize the heap.
495func (h *mheap) init(spansStart, spansBytes uintptr) {
496	h.treapalloc.init(unsafe.Sizeof(treapNode{}), nil, nil, &memstats.other_sys)
497	h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
498	h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
499	h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
500	h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
501
502	// Don't zero mspan allocations. Background sweeping can
503	// inspect a span concurrently with allocating it, so it's
504	// important that the span's sweepgen survive across freeing
505	// and re-allocating a span to prevent background sweeping
506	// from improperly cas'ing it from 0.
507	//
508	// This is safe because mspan contains no heap pointers.
509	h.spanalloc.zero = false
510
511	// h->mapcache needs no init
512	for i := range h.free {
513		h.free[i].init()
514		h.busy[i].init()
515	}
516
517	h.busylarge.init()
518	for i := range h.central {
519		h.central[i].mcentral.init(spanClass(i))
520	}
521
522	sp := (*slice)(unsafe.Pointer(&h.spans))
523	sp.array = unsafe.Pointer(spansStart)
524	sp.len = 0
525	sp.cap = int(spansBytes / sys.PtrSize)
526
527	// Map metadata structures. But don't map race detector memory
528	// since we're not actually growing the arena here (and TSAN
529	// gets mad if you map 0 bytes).
530	h.setArenaUsed(h.arena_used, false)
531}
532
533// setArenaUsed extends the usable arena to address arena_used and
534// maps auxiliary VM regions for any newly usable arena space.
535//
536// racemap indicates that this memory should be managed by the race
537// detector. racemap should be true unless this is covering a VM hole.
538func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) {
539	// Map auxiliary structures *before* h.arena_used is updated.
540	// Waiting to update arena_used until after the memory has been mapped
541	// avoids faults when other threads try access these regions immediately
542	// after observing the change to arena_used.
543
544	// Map the bitmap.
545	h.mapBits(arena_used)
546
547	// Map spans array.
548	h.mapSpans(arena_used)
549
550	// Tell the race detector about the new heap memory.
551	if racemap && raceenabled {
552		racemapshadow(unsafe.Pointer(h.arena_used), arena_used-h.arena_used)
553	}
554
555	h.arena_used = arena_used
556}
557
558// mapSpans makes sure that the spans are mapped
559// up to the new value of arena_used.
560//
561// Don't call this directly. Call mheap.setArenaUsed.
562func (h *mheap) mapSpans(arena_used uintptr) {
563	// Map spans array, PageSize at a time.
564	n := arena_used
565	n -= h.arena_start
566	n = n / _PageSize * sys.PtrSize
567	n = round(n, physPageSize)
568	need := n / unsafe.Sizeof(h.spans[0])
569	have := uintptr(len(h.spans))
570	if have >= need {
571		return
572	}
573	h.spans = h.spans[:need]
574	sysMap(unsafe.Pointer(&h.spans[have]), (need-have)*unsafe.Sizeof(h.spans[0]), h.arena_reserved, &memstats.other_sys)
575}
576
577// Sweeps spans in list until reclaims at least npages into heap.
578// Returns the actual number of pages reclaimed.
579func (h *mheap) reclaimList(list *mSpanList, npages uintptr) uintptr {
580	n := uintptr(0)
581	sg := mheap_.sweepgen
582retry:
583	for s := list.first; s != nil; s = s.next {
584		if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
585			list.remove(s)
586			// swept spans are at the end of the list
587			list.insertBack(s) // Puts it back on a busy list. s is not in the treap at this point.
588			unlock(&h.lock)
589			snpages := s.npages
590			if s.sweep(false) {
591				n += snpages
592			}
593			lock(&h.lock)
594			if n >= npages {
595				return n
596			}
597			// the span could have been moved elsewhere
598			goto retry
599		}
600		if s.sweepgen == sg-1 {
601			// the span is being sweept by background sweeper, skip
602			continue
603		}
604		// already swept empty span,
605		// all subsequent ones must also be either swept or in process of sweeping
606		break
607	}
608	return n
609}
610
611// Sweeps and reclaims at least npage pages into heap.
612// Called before allocating npage pages.
613func (h *mheap) reclaim(npage uintptr) {
614	// First try to sweep busy spans with large objects of size >= npage,
615	// this has good chances of reclaiming the necessary space.
616	for i := int(npage); i < len(h.busy); i++ {
617		if h.reclaimList(&h.busy[i], npage) != 0 {
618			return // Bingo!
619		}
620	}
621
622	// Then -- even larger objects.
623	if h.reclaimList(&h.busylarge, npage) != 0 {
624		return // Bingo!
625	}
626
627	// Now try smaller objects.
628	// One such object is not enough, so we need to reclaim several of them.
629	reclaimed := uintptr(0)
630	for i := 0; i < int(npage) && i < len(h.busy); i++ {
631		reclaimed += h.reclaimList(&h.busy[i], npage-reclaimed)
632		if reclaimed >= npage {
633			return
634		}
635	}
636
637	// Now sweep everything that is not yet swept.
638	unlock(&h.lock)
639	for {
640		n := sweepone()
641		if n == ^uintptr(0) { // all spans are swept
642			break
643		}
644		reclaimed += n
645		if reclaimed >= npage {
646			break
647		}
648	}
649	lock(&h.lock)
650}
651
652// Allocate a new span of npage pages from the heap for GC'd memory
653// and record its size class in the HeapMap and HeapMapCache.
654func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan {
655	_g_ := getg()
656	lock(&h.lock)
657
658	// To prevent excessive heap growth, before allocating n pages
659	// we need to sweep and reclaim at least n pages.
660	if h.sweepdone == 0 {
661		// TODO(austin): This tends to sweep a large number of
662		// spans in order to find a few completely free spans
663		// (for example, in the garbage benchmark, this sweeps
664		// ~30x the number of pages its trying to allocate).
665		// If GC kept a bit for whether there were any marks
666		// in a span, we could release these free spans
667		// at the end of GC and eliminate this entirely.
668		if trace.enabled {
669			traceGCSweepStart()
670		}
671		h.reclaim(npage)
672		if trace.enabled {
673			traceGCSweepDone()
674		}
675	}
676
677	// transfer stats from cache to global
678	memstats.heap_scan += uint64(_g_.m.mcache.local_scan)
679	_g_.m.mcache.local_scan = 0
680	memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
681	_g_.m.mcache.local_tinyallocs = 0
682
683	s := h.allocSpanLocked(npage, &memstats.heap_inuse)
684	if s != nil {
685		// Record span info, because gc needs to be
686		// able to map interior pointer to containing span.
687		atomic.Store(&s.sweepgen, h.sweepgen)
688		h.sweepSpans[h.sweepgen/2%2].push(s) // Add to swept in-use list.
689		s.state = _MSpanInUse
690		s.allocCount = 0
691		s.spanclass = spanclass
692		if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
693			s.elemsize = s.npages << _PageShift
694			s.divShift = 0
695			s.divMul = 0
696			s.divShift2 = 0
697			s.baseMask = 0
698		} else {
699			s.elemsize = uintptr(class_to_size[sizeclass])
700			m := &class_to_divmagic[sizeclass]
701			s.divShift = m.shift
702			s.divMul = m.mul
703			s.divShift2 = m.shift2
704			s.baseMask = m.baseMask
705		}
706
707		// update stats, sweep lists
708		h.pagesInUse += uint64(npage)
709		if large {
710			memstats.heap_objects++
711			mheap_.largealloc += uint64(s.elemsize)
712			mheap_.nlargealloc++
713			atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
714			// Swept spans are at the end of lists.
715			if s.npages < uintptr(len(h.busy)) {
716				h.busy[s.npages].insertBack(s)
717			} else {
718				h.busylarge.insertBack(s)
719			}
720		}
721	}
722	// heap_scan and heap_live were updated.
723	if gcBlackenEnabled != 0 {
724		gcController.revise()
725	}
726
727	if trace.enabled {
728		traceHeapAlloc()
729	}
730
731	// h.spans is accessed concurrently without synchronization
732	// from other threads. Hence, there must be a store/store
733	// barrier here to ensure the writes to h.spans above happen
734	// before the caller can publish a pointer p to an object
735	// allocated from s. As soon as this happens, the garbage
736	// collector running on another processor could read p and
737	// look up s in h.spans. The unlock acts as the barrier to
738	// order these writes. On the read side, the data dependency
739	// between p and the index in h.spans orders the reads.
740	unlock(&h.lock)
741	return s
742}
743
744func (h *mheap) alloc(npage uintptr, spanclass spanClass, large bool, needzero bool) *mspan {
745	// Don't do any operations that lock the heap on the G stack.
746	// It might trigger stack growth, and the stack growth code needs
747	// to be able to allocate heap.
748	var s *mspan
749	systemstack(func() {
750		s = h.alloc_m(npage, spanclass, large)
751	})
752
753	if s != nil {
754		if needzero && s.needzero != 0 {
755			memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift)
756		}
757		s.needzero = 0
758	}
759	return s
760}
761
762// allocManual allocates a manually-managed span of npage pages.
763// allocManual returns nil if allocation fails.
764//
765// allocManual adds the bytes used to *stat, which should be a
766// memstats in-use field. Unlike allocations in the GC'd heap, the
767// allocation does *not* count toward heap_inuse or heap_sys.
768//
769// The memory backing the returned span may not be zeroed if
770// span.needzero is set.
771//
772// allocManual must be called on the system stack to prevent stack
773// growth. Since this is used by the stack allocator, stack growth
774// during allocManual would self-deadlock.
775//
776//go:systemstack
777func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
778	lock(&h.lock)
779	s := h.allocSpanLocked(npage, stat)
780	if s != nil {
781		s.state = _MSpanManual
782		s.manualFreeList = 0
783		s.allocCount = 0
784		s.spanclass = 0
785		s.nelems = 0
786		s.elemsize = 0
787		s.limit = s.base() + s.npages<<_PageShift
788		// Manually manged memory doesn't count toward heap_sys.
789		memstats.heap_sys -= uint64(s.npages << _PageShift)
790	}
791
792	// This unlock acts as a release barrier. See mheap.alloc_m.
793	unlock(&h.lock)
794
795	return s
796}
797
798// Allocates a span of the given size.  h must be locked.
799// The returned span has been removed from the
800// free list, but its state is still MSpanFree.
801func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
802	var list *mSpanList
803	var s *mspan
804
805	// Try in fixed-size lists up to max.
806	for i := int(npage); i < len(h.free); i++ {
807		list = &h.free[i]
808		if !list.isEmpty() {
809			s = list.first
810			list.remove(s)
811			goto HaveSpan
812		}
813	}
814	// Best fit in list of large spans.
815	s = h.allocLarge(npage) // allocLarge removed s from h.freelarge for us
816	if s == nil {
817		if !h.grow(npage) {
818			return nil
819		}
820		s = h.allocLarge(npage)
821		if s == nil {
822			return nil
823		}
824	}
825
826HaveSpan:
827	// Mark span in use.
828	if s.state != _MSpanFree {
829		throw("MHeap_AllocLocked - MSpan not free")
830	}
831	if s.npages < npage {
832		throw("MHeap_AllocLocked - bad npages")
833	}
834	if s.npreleased > 0 {
835		sysUsed(unsafe.Pointer(s.base()), s.npages<<_PageShift)
836		memstats.heap_released -= uint64(s.npreleased << _PageShift)
837		s.npreleased = 0
838	}
839
840	if s.npages > npage {
841		// Trim extra and put it back in the heap.
842		t := (*mspan)(h.spanalloc.alloc())
843		t.init(s.base()+npage<<_PageShift, s.npages-npage)
844		s.npages = npage
845		p := (t.base() - h.arena_start) >> _PageShift
846		if p > 0 {
847			h.spans[p-1] = s
848		}
849		h.spans[p] = t
850		h.spans[p+t.npages-1] = t
851		t.needzero = s.needzero
852		s.state = _MSpanManual // prevent coalescing with s
853		t.state = _MSpanManual
854		h.freeSpanLocked(t, false, false, s.unusedsince)
855		s.state = _MSpanFree
856	}
857	s.unusedsince = 0
858
859	p := (s.base() - h.arena_start) >> _PageShift
860	for n := uintptr(0); n < npage; n++ {
861		h.spans[p+n] = s
862	}
863
864	*stat += uint64(npage << _PageShift)
865	memstats.heap_idle -= uint64(npage << _PageShift)
866
867	//println("spanalloc", hex(s.start<<_PageShift))
868	if s.inList() {
869		throw("still in list")
870	}
871	return s
872}
873
874// Large spans have a minimum size of 1MByte. The maximum number of large spans to support
875// 1TBytes is 1 million, experimentation using random sizes indicates that the depth of
876// the tree is less that 2x that of a perfectly balanced tree. For 1TByte can be referenced
877// by a perfectly balanced tree with a depth of 20. Twice that is an acceptable 40.
878func (h *mheap) isLargeSpan(npages uintptr) bool {
879	return npages >= uintptr(len(h.free))
880}
881
882// allocLarge allocates a span of at least npage pages from the treap of large spans.
883// Returns nil if no such span currently exists.
884func (h *mheap) allocLarge(npage uintptr) *mspan {
885	// Search treap for smallest span with >= npage pages.
886	return h.freelarge.remove(npage)
887}
888
889// Try to add at least npage pages of memory to the heap,
890// returning whether it worked.
891//
892// h must be locked.
893func (h *mheap) grow(npage uintptr) bool {
894	// Ask for a big chunk, to reduce the number of mappings
895	// the operating system needs to track; also amortizes
896	// the overhead of an operating system mapping.
897	// Allocate a multiple of 64kB.
898	npage = round(npage, (64<<10)/_PageSize)
899	ask := npage << _PageShift
900	if ask < _HeapAllocChunk {
901		ask = _HeapAllocChunk
902	}
903
904	v := h.sysAlloc(ask)
905	if v == nil {
906		if ask > npage<<_PageShift {
907			ask = npage << _PageShift
908			v = h.sysAlloc(ask)
909		}
910		if v == nil {
911			print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
912			return false
913		}
914	}
915
916	// Create a fake "in use" span and free it, so that the
917	// right coalescing happens.
918	s := (*mspan)(h.spanalloc.alloc())
919	s.init(uintptr(v), ask>>_PageShift)
920	p := (s.base() - h.arena_start) >> _PageShift
921	for i := p; i < p+s.npages; i++ {
922		h.spans[i] = s
923	}
924	atomic.Store(&s.sweepgen, h.sweepgen)
925	s.state = _MSpanInUse
926	h.pagesInUse += uint64(s.npages)
927	h.freeSpanLocked(s, false, true, 0)
928	return true
929}
930
931// Look up the span at the given address.
932// Address is guaranteed to be in map
933// and is guaranteed to be start or end of span.
934func (h *mheap) lookup(v unsafe.Pointer) *mspan {
935	p := uintptr(v)
936	p -= h.arena_start
937	return h.spans[p>>_PageShift]
938}
939
940// Look up the span at the given address.
941// Address is *not* guaranteed to be in map
942// and may be anywhere in the span.
943// Map entries for the middle of a span are only
944// valid for allocated spans. Free spans may have
945// other garbage in their middles, so we have to
946// check for that.
947func (h *mheap) lookupMaybe(v unsafe.Pointer) *mspan {
948	if uintptr(v) < h.arena_start || uintptr(v) >= h.arena_used {
949		return nil
950	}
951	s := h.spans[(uintptr(v)-h.arena_start)>>_PageShift]
952	if s == nil || uintptr(v) < s.base() || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != _MSpanInUse {
953		return nil
954	}
955	return s
956}
957
958// Free the span back into the heap.
959func (h *mheap) freeSpan(s *mspan, acct int32) {
960	systemstack(func() {
961		mp := getg().m
962		lock(&h.lock)
963		memstats.heap_scan += uint64(mp.mcache.local_scan)
964		mp.mcache.local_scan = 0
965		memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
966		mp.mcache.local_tinyallocs = 0
967		if msanenabled {
968			// Tell msan that this entire span is no longer in use.
969			base := unsafe.Pointer(s.base())
970			bytes := s.npages << _PageShift
971			msanfree(base, bytes)
972		}
973		if acct != 0 {
974			memstats.heap_objects--
975		}
976		if gcBlackenEnabled != 0 {
977			// heap_scan changed.
978			gcController.revise()
979		}
980		h.freeSpanLocked(s, true, true, 0)
981		unlock(&h.lock)
982	})
983}
984
985// freeManual frees a manually-managed span returned by allocManual.
986// stat must be the same as the stat passed to the allocManual that
987// allocated s.
988//
989// This must only be called when gcphase == _GCoff. See mSpanState for
990// an explanation.
991//
992// freeManual must be called on the system stack to prevent stack
993// growth, just like allocManual.
994//
995//go:systemstack
996func (h *mheap) freeManual(s *mspan, stat *uint64) {
997	s.needzero = 1
998	lock(&h.lock)
999	*stat -= uint64(s.npages << _PageShift)
1000	memstats.heap_sys += uint64(s.npages << _PageShift)
1001	h.freeSpanLocked(s, false, true, 0)
1002	unlock(&h.lock)
1003}
1004
1005// s must be on a busy list (h.busy or h.busylarge) or unlinked.
1006func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) {
1007	switch s.state {
1008	case _MSpanManual:
1009		if s.allocCount != 0 {
1010			throw("MHeap_FreeSpanLocked - invalid stack free")
1011		}
1012	case _MSpanInUse:
1013		if s.allocCount != 0 || s.sweepgen != h.sweepgen {
1014			print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
1015			throw("MHeap_FreeSpanLocked - invalid free")
1016		}
1017		h.pagesInUse -= uint64(s.npages)
1018	default:
1019		throw("MHeap_FreeSpanLocked - invalid span state")
1020	}
1021
1022	if acctinuse {
1023		memstats.heap_inuse -= uint64(s.npages << _PageShift)
1024	}
1025	if acctidle {
1026		memstats.heap_idle += uint64(s.npages << _PageShift)
1027	}
1028	s.state = _MSpanFree
1029	if s.inList() {
1030		h.busyList(s.npages).remove(s)
1031	}
1032
1033	// Stamp newly unused spans. The scavenger will use that
1034	// info to potentially give back some pages to the OS.
1035	s.unusedsince = unusedsince
1036	if unusedsince == 0 {
1037		s.unusedsince = nanotime()
1038	}
1039	s.npreleased = 0
1040
1041	// Coalesce with earlier, later spans.
1042	p := (s.base() - h.arena_start) >> _PageShift
1043	if p > 0 {
1044		before := h.spans[p-1]
1045		if before != nil && before.state == _MSpanFree {
1046			// Now adjust s.
1047			s.startAddr = before.startAddr
1048			s.npages += before.npages
1049			s.npreleased = before.npreleased // absorb released pages
1050			s.needzero |= before.needzero
1051			p -= before.npages
1052			h.spans[p] = s
1053			// The size is potentially changing so the treap needs to delete adjacent nodes and
1054			// insert back as a combined node.
1055			if h.isLargeSpan(before.npages) {
1056				// We have a t, it is large so it has to be in the treap so we can remove it.
1057				h.freelarge.removeSpan(before)
1058			} else {
1059				h.freeList(before.npages).remove(before)
1060			}
1061			before.state = _MSpanDead
1062			h.spanalloc.free(unsafe.Pointer(before))
1063		}
1064	}
1065
1066	// Now check to see if next (greater addresses) span is free and can be coalesced.
1067	if (p + s.npages) < uintptr(len(h.spans)) {
1068		after := h.spans[p+s.npages]
1069		if after != nil && after.state == _MSpanFree {
1070			s.npages += after.npages
1071			s.npreleased += after.npreleased
1072			s.needzero |= after.needzero
1073			h.spans[p+s.npages-1] = s
1074			if h.isLargeSpan(after.npages) {
1075				h.freelarge.removeSpan(after)
1076			} else {
1077				h.freeList(after.npages).remove(after)
1078			}
1079			after.state = _MSpanDead
1080			h.spanalloc.free(unsafe.Pointer(after))
1081		}
1082	}
1083
1084	// Insert s into appropriate list or treap.
1085	if h.isLargeSpan(s.npages) {
1086		h.freelarge.insert(s)
1087	} else {
1088		h.freeList(s.npages).insert(s)
1089	}
1090}
1091
1092func (h *mheap) freeList(npages uintptr) *mSpanList {
1093	return &h.free[npages]
1094}
1095
1096func (h *mheap) busyList(npages uintptr) *mSpanList {
1097	if npages < uintptr(len(h.busy)) {
1098		return &h.busy[npages]
1099	}
1100	return &h.busylarge
1101}
1102
1103func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
1104	s := t.spanKey
1105	var sumreleased uintptr
1106	if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages {
1107		start := s.base()
1108		end := start + s.npages<<_PageShift
1109		if physPageSize > _PageSize {
1110			// We can only release pages in
1111			// physPageSize blocks, so round start
1112			// and end in. (Otherwise, madvise
1113			// will round them *out* and release
1114			// more memory than we want.)
1115			start = (start + physPageSize - 1) &^ (physPageSize - 1)
1116			end &^= physPageSize - 1
1117			if end <= start {
1118				// start and end don't span a
1119				// whole physical page.
1120				return sumreleased
1121			}
1122		}
1123		len := end - start
1124		released := len - (s.npreleased << _PageShift)
1125		if physPageSize > _PageSize && released == 0 {
1126			return sumreleased
1127		}
1128		memstats.heap_released += uint64(released)
1129		sumreleased += released
1130		s.npreleased = len >> _PageShift
1131		sysUnused(unsafe.Pointer(start), len)
1132	}
1133	return sumreleased
1134}
1135
1136func scavengelist(list *mSpanList, now, limit uint64) uintptr {
1137	if list.isEmpty() {
1138		return 0
1139	}
1140
1141	var sumreleased uintptr
1142	for s := list.first; s != nil; s = s.next {
1143		if (now-uint64(s.unusedsince)) <= limit || s.npreleased == s.npages {
1144			continue
1145		}
1146		start := s.base()
1147		end := start + s.npages<<_PageShift
1148		if physPageSize > _PageSize {
1149			// We can only release pages in
1150			// physPageSize blocks, so round start
1151			// and end in. (Otherwise, madvise
1152			// will round them *out* and release
1153			// more memory than we want.)
1154			start = (start + physPageSize - 1) &^ (physPageSize - 1)
1155			end &^= physPageSize - 1
1156			if end <= start {
1157				// start and end don't span a
1158				// whole physical page.
1159				continue
1160			}
1161		}
1162		len := end - start
1163
1164		released := len - (s.npreleased << _PageShift)
1165		if physPageSize > _PageSize && released == 0 {
1166			continue
1167		}
1168		memstats.heap_released += uint64(released)
1169		sumreleased += released
1170		s.npreleased = len >> _PageShift
1171		sysUnused(unsafe.Pointer(start), len)
1172	}
1173	return sumreleased
1174}
1175
1176func (h *mheap) scavenge(k int32, now, limit uint64) {
1177	// Disallow malloc or panic while holding the heap lock. We do
1178	// this here because this is an non-mallocgc entry-point to
1179	// the mheap API.
1180	gp := getg()
1181	gp.m.mallocing++
1182	lock(&h.lock)
1183	var sumreleased uintptr
1184	for i := 0; i < len(h.free); i++ {
1185		sumreleased += scavengelist(&h.free[i], now, limit)
1186	}
1187	sumreleased += scavengetreap(h.freelarge.treap, now, limit)
1188	unlock(&h.lock)
1189	gp.m.mallocing--
1190
1191	if debug.gctrace > 0 {
1192		if sumreleased > 0 {
1193			print("scvg", k, ": ", sumreleased>>20, " MB released\n")
1194		}
1195		print("scvg", k, ": inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
1196	}
1197}
1198
1199//go:linkname runtime_debug_freeOSMemory runtime_debug.freeOSMemory
1200func runtime_debug_freeOSMemory() {
1201	GC()
1202	systemstack(func() { mheap_.scavenge(-1, ^uint64(0), 0) })
1203}
1204
1205// Initialize a new span with the given start and npages.
1206func (span *mspan) init(base uintptr, npages uintptr) {
1207	// span is *not* zeroed.
1208	span.next = nil
1209	span.prev = nil
1210	span.list = nil
1211	span.startAddr = base
1212	span.npages = npages
1213	span.allocCount = 0
1214	span.spanclass = 0
1215	span.incache = false
1216	span.elemsize = 0
1217	span.state = _MSpanDead
1218	span.unusedsince = 0
1219	span.npreleased = 0
1220	span.speciallock.key = 0
1221	span.specials = nil
1222	span.needzero = 0
1223	span.freeindex = 0
1224	span.allocBits = nil
1225	span.gcmarkBits = nil
1226}
1227
1228func (span *mspan) inList() bool {
1229	return span.list != nil
1230}
1231
1232// Initialize an empty doubly-linked list.
1233func (list *mSpanList) init() {
1234	list.first = nil
1235	list.last = nil
1236}
1237
1238func (list *mSpanList) remove(span *mspan) {
1239	if span.list != list {
1240		print("runtime: failed MSpanList_Remove span.npages=", span.npages,
1241			" span=", span, " prev=", span.prev, " span.list=", span.list, " list=", list, "\n")
1242		throw("MSpanList_Remove")
1243	}
1244	if list.first == span {
1245		list.first = span.next
1246	} else {
1247		span.prev.next = span.next
1248	}
1249	if list.last == span {
1250		list.last = span.prev
1251	} else {
1252		span.next.prev = span.prev
1253	}
1254	span.next = nil
1255	span.prev = nil
1256	span.list = nil
1257}
1258
1259func (list *mSpanList) isEmpty() bool {
1260	return list.first == nil
1261}
1262
1263func (list *mSpanList) insert(span *mspan) {
1264	if span.next != nil || span.prev != nil || span.list != nil {
1265		println("runtime: failed MSpanList_Insert", span, span.next, span.prev, span.list)
1266		throw("MSpanList_Insert")
1267	}
1268	span.next = list.first
1269	if list.first != nil {
1270		// The list contains at least one span; link it in.
1271		// The last span in the list doesn't change.
1272		list.first.prev = span
1273	} else {
1274		// The list contains no spans, so this is also the last span.
1275		list.last = span
1276	}
1277	list.first = span
1278	span.list = list
1279}
1280
1281func (list *mSpanList) insertBack(span *mspan) {
1282	if span.next != nil || span.prev != nil || span.list != nil {
1283		println("runtime: failed MSpanList_InsertBack", span, span.next, span.prev, span.list)
1284		throw("MSpanList_InsertBack")
1285	}
1286	span.prev = list.last
1287	if list.last != nil {
1288		// The list contains at least one span.
1289		list.last.next = span
1290	} else {
1291		// The list contains no spans, so this is also the first span.
1292		list.first = span
1293	}
1294	list.last = span
1295	span.list = list
1296}
1297
1298// takeAll removes all spans from other and inserts them at the front
1299// of list.
1300func (list *mSpanList) takeAll(other *mSpanList) {
1301	if other.isEmpty() {
1302		return
1303	}
1304
1305	// Reparent everything in other to list.
1306	for s := other.first; s != nil; s = s.next {
1307		s.list = list
1308	}
1309
1310	// Concatenate the lists.
1311	if list.isEmpty() {
1312		*list = *other
1313	} else {
1314		// Neither list is empty. Put other before list.
1315		other.last.next = list.first
1316		list.first.prev = other.last
1317		list.first = other.first
1318	}
1319
1320	other.first, other.last = nil, nil
1321}
1322
1323const (
1324	_KindSpecialFinalizer = 1
1325	_KindSpecialProfile   = 2
1326	// Note: The finalizer special must be first because if we're freeing
1327	// an object, a finalizer special will cause the freeing operation
1328	// to abort, and we want to keep the other special records around
1329	// if that happens.
1330)
1331
1332//go:notinheap
1333type special struct {
1334	next   *special // linked list in span
1335	offset uint16   // span offset of object
1336	kind   byte     // kind of special
1337}
1338
1339// Adds the special record s to the list of special records for
1340// the object p. All fields of s should be filled in except for
1341// offset & next, which this routine will fill in.
1342// Returns true if the special was successfully added, false otherwise.
1343// (The add will fail only if a record with the same p and s->kind
1344//  already exists.)
1345func addspecial(p unsafe.Pointer, s *special) bool {
1346	span := mheap_.lookupMaybe(p)
1347	if span == nil {
1348		throw("addspecial on invalid pointer")
1349	}
1350
1351	// Ensure that the span is swept.
1352	// Sweeping accesses the specials list w/o locks, so we have
1353	// to synchronize with it. And it's just much safer.
1354	mp := acquirem()
1355	span.ensureSwept()
1356
1357	offset := uintptr(p) - span.base()
1358	kind := s.kind
1359
1360	lock(&span.speciallock)
1361
1362	// Find splice point, check for existing record.
1363	t := &span.specials
1364	for {
1365		x := *t
1366		if x == nil {
1367			break
1368		}
1369		if offset == uintptr(x.offset) && kind == x.kind {
1370			unlock(&span.speciallock)
1371			releasem(mp)
1372			return false // already exists
1373		}
1374		if offset < uintptr(x.offset) || (offset == uintptr(x.offset) && kind < x.kind) {
1375			break
1376		}
1377		t = &x.next
1378	}
1379
1380	// Splice in record, fill in offset.
1381	s.offset = uint16(offset)
1382	s.next = *t
1383	*t = s
1384	unlock(&span.speciallock)
1385	releasem(mp)
1386
1387	return true
1388}
1389
1390// Removes the Special record of the given kind for the object p.
1391// Returns the record if the record existed, nil otherwise.
1392// The caller must FixAlloc_Free the result.
1393func removespecial(p unsafe.Pointer, kind uint8) *special {
1394	span := mheap_.lookupMaybe(p)
1395	if span == nil {
1396		throw("removespecial on invalid pointer")
1397	}
1398
1399	// Ensure that the span is swept.
1400	// Sweeping accesses the specials list w/o locks, so we have
1401	// to synchronize with it. And it's just much safer.
1402	mp := acquirem()
1403	span.ensureSwept()
1404
1405	offset := uintptr(p) - span.base()
1406
1407	lock(&span.speciallock)
1408	t := &span.specials
1409	for {
1410		s := *t
1411		if s == nil {
1412			break
1413		}
1414		// This function is used for finalizers only, so we don't check for
1415		// "interior" specials (p must be exactly equal to s->offset).
1416		if offset == uintptr(s.offset) && kind == s.kind {
1417			*t = s.next
1418			unlock(&span.speciallock)
1419			releasem(mp)
1420			return s
1421		}
1422		t = &s.next
1423	}
1424	unlock(&span.speciallock)
1425	releasem(mp)
1426	return nil
1427}
1428
1429// The described object has a finalizer set for it.
1430//
1431// specialfinalizer is allocated from non-GC'd memory, so any heap
1432// pointers must be specially handled.
1433//
1434//go:notinheap
1435type specialfinalizer struct {
1436	special special
1437	fn      *funcval  // May be a heap pointer.
1438	ft      *functype // May be a heap pointer, but always live.
1439	ot      *ptrtype  // May be a heap pointer, but always live.
1440}
1441
1442// Adds a finalizer to the object p. Returns true if it succeeded.
1443func addfinalizer(p unsafe.Pointer, f *funcval, ft *functype, ot *ptrtype) bool {
1444	lock(&mheap_.speciallock)
1445	s := (*specialfinalizer)(mheap_.specialfinalizeralloc.alloc())
1446	unlock(&mheap_.speciallock)
1447	s.special.kind = _KindSpecialFinalizer
1448	s.fn = f
1449	s.ft = ft
1450	s.ot = ot
1451	if addspecial(p, &s.special) {
1452		// This is responsible for maintaining the same
1453		// GC-related invariants as markrootSpans in any
1454		// situation where it's possible that markrootSpans
1455		// has already run but mark termination hasn't yet.
1456		if gcphase != _GCoff {
1457			_, base, _ := findObject(p)
1458			mp := acquirem()
1459			gcw := &mp.p.ptr().gcw
1460			// Mark everything reachable from the object
1461			// so it's retained for the finalizer.
1462			scanobject(uintptr(base), gcw)
1463			// Mark the finalizer itself, since the
1464			// special isn't part of the GC'd heap.
1465			scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw)
1466			if gcBlackenPromptly {
1467				gcw.dispose()
1468			}
1469			releasem(mp)
1470		}
1471		return true
1472	}
1473
1474	// There was an old finalizer
1475	lock(&mheap_.speciallock)
1476	mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
1477	unlock(&mheap_.speciallock)
1478	return false
1479}
1480
1481// Removes the finalizer (if any) from the object p.
1482func removefinalizer(p unsafe.Pointer) {
1483	s := (*specialfinalizer)(unsafe.Pointer(removespecial(p, _KindSpecialFinalizer)))
1484	if s == nil {
1485		return // there wasn't a finalizer to remove
1486	}
1487	lock(&mheap_.speciallock)
1488	mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
1489	unlock(&mheap_.speciallock)
1490}
1491
1492// The described object is being heap profiled.
1493//
1494//go:notinheap
1495type specialprofile struct {
1496	special special
1497	b       *bucket
1498}
1499
1500// Set the heap profile bucket associated with addr to b.
1501func setprofilebucket(p unsafe.Pointer, b *bucket) {
1502	lock(&mheap_.speciallock)
1503	s := (*specialprofile)(mheap_.specialprofilealloc.alloc())
1504	unlock(&mheap_.speciallock)
1505	s.special.kind = _KindSpecialProfile
1506	s.b = b
1507	if !addspecial(p, &s.special) {
1508		throw("setprofilebucket: profile already set")
1509	}
1510}
1511
1512// Do whatever cleanup needs to be done to deallocate s. It has
1513// already been unlinked from the MSpan specials list.
1514func freespecial(s *special, p unsafe.Pointer, size uintptr) {
1515	switch s.kind {
1516	case _KindSpecialFinalizer:
1517		sf := (*specialfinalizer)(unsafe.Pointer(s))
1518		queuefinalizer(p, sf.fn, sf.ft, sf.ot)
1519		lock(&mheap_.speciallock)
1520		mheap_.specialfinalizeralloc.free(unsafe.Pointer(sf))
1521		unlock(&mheap_.speciallock)
1522	case _KindSpecialProfile:
1523		sp := (*specialprofile)(unsafe.Pointer(s))
1524		mProf_Free(sp.b, size)
1525		lock(&mheap_.speciallock)
1526		mheap_.specialprofilealloc.free(unsafe.Pointer(sp))
1527		unlock(&mheap_.speciallock)
1528	default:
1529		throw("bad special kind")
1530		panic("not reached")
1531	}
1532}
1533
1534// gcBits is an alloc/mark bitmap. This is always used as *gcBits.
1535//
1536//go:notinheap
1537type gcBits uint8
1538
1539// bytep returns a pointer to the n'th byte of b.
1540func (b *gcBits) bytep(n uintptr) *uint8 {
1541	return addb((*uint8)(b), n)
1542}
1543
1544// bitp returns a pointer to the byte containing bit n and a mask for
1545// selecting that bit from *bytep.
1546func (b *gcBits) bitp(n uintptr) (bytep *uint8, mask uint8) {
1547	return b.bytep(n / 8), 1 << (n % 8)
1548}
1549
1550const gcBitsChunkBytes = uintptr(64 << 10)
1551const gcBitsHeaderBytes = unsafe.Sizeof(gcBitsHeader{})
1552
1553type gcBitsHeader struct {
1554	free uintptr // free is the index into bits of the next free byte.
1555	next uintptr // *gcBits triggers recursive type bug. (issue 14620)
1556}
1557
1558//go:notinheap
1559type gcBitsArena struct {
1560	// gcBitsHeader // side step recursive type bug (issue 14620) by including fields by hand.
1561	free uintptr // free is the index into bits of the next free byte; read/write atomically
1562	next *gcBitsArena
1563	bits [gcBitsChunkBytes - gcBitsHeaderBytes]gcBits
1564}
1565
1566var gcBitsArenas struct {
1567	lock     mutex
1568	free     *gcBitsArena
1569	next     *gcBitsArena // Read atomically. Write atomically under lock.
1570	current  *gcBitsArena
1571	previous *gcBitsArena
1572}
1573
1574// tryAlloc allocates from b or returns nil if b does not have enough room.
1575// This is safe to call concurrently.
1576func (b *gcBitsArena) tryAlloc(bytes uintptr) *gcBits {
1577	if b == nil || atomic.Loaduintptr(&b.free)+bytes > uintptr(len(b.bits)) {
1578		return nil
1579	}
1580	// Try to allocate from this block.
1581	end := atomic.Xadduintptr(&b.free, bytes)
1582	if end > uintptr(len(b.bits)) {
1583		return nil
1584	}
1585	// There was enough room.
1586	start := end - bytes
1587	return &b.bits[start]
1588}
1589
1590// newMarkBits returns a pointer to 8 byte aligned bytes
1591// to be used for a span's mark bits.
1592func newMarkBits(nelems uintptr) *gcBits {
1593	blocksNeeded := uintptr((nelems + 63) / 64)
1594	bytesNeeded := blocksNeeded * 8
1595
1596	// Try directly allocating from the current head arena.
1597	head := (*gcBitsArena)(atomic.Loadp(unsafe.Pointer(&gcBitsArenas.next)))
1598	if p := head.tryAlloc(bytesNeeded); p != nil {
1599		return p
1600	}
1601
1602	// There's not enough room in the head arena. We may need to
1603	// allocate a new arena.
1604	lock(&gcBitsArenas.lock)
1605	// Try the head arena again, since it may have changed. Now
1606	// that we hold the lock, the list head can't change, but its
1607	// free position still can.
1608	if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
1609		unlock(&gcBitsArenas.lock)
1610		return p
1611	}
1612
1613	// Allocate a new arena. This may temporarily drop the lock.
1614	fresh := newArenaMayUnlock()
1615	// If newArenaMayUnlock dropped the lock, another thread may
1616	// have put a fresh arena on the "next" list. Try allocating
1617	// from next again.
1618	if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
1619		// Put fresh back on the free list.
1620		// TODO: Mark it "already zeroed"
1621		fresh.next = gcBitsArenas.free
1622		gcBitsArenas.free = fresh
1623		unlock(&gcBitsArenas.lock)
1624		return p
1625	}
1626
1627	// Allocate from the fresh arena. We haven't linked it in yet, so
1628	// this cannot race and is guaranteed to succeed.
1629	p := fresh.tryAlloc(bytesNeeded)
1630	if p == nil {
1631		throw("markBits overflow")
1632	}
1633
1634	// Add the fresh arena to the "next" list.
1635	fresh.next = gcBitsArenas.next
1636	atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), unsafe.Pointer(fresh))
1637
1638	unlock(&gcBitsArenas.lock)
1639	return p
1640}
1641
1642// newAllocBits returns a pointer to 8 byte aligned bytes
1643// to be used for this span's alloc bits.
1644// newAllocBits is used to provide newly initialized spans
1645// allocation bits. For spans not being initialized the
1646// the mark bits are repurposed as allocation bits when
1647// the span is swept.
1648func newAllocBits(nelems uintptr) *gcBits {
1649	return newMarkBits(nelems)
1650}
1651
1652// nextMarkBitArenaEpoch establishes a new epoch for the arenas
1653// holding the mark bits. The arenas are named relative to the
1654// current GC cycle which is demarcated by the call to finishweep_m.
1655//
1656// All current spans have been swept.
1657// During that sweep each span allocated room for its gcmarkBits in
1658// gcBitsArenas.next block. gcBitsArenas.next becomes the gcBitsArenas.current
1659// where the GC will mark objects and after each span is swept these bits
1660// will be used to allocate objects.
1661// gcBitsArenas.current becomes gcBitsArenas.previous where the span's
1662// gcAllocBits live until all the spans have been swept during this GC cycle.
1663// The span's sweep extinguishes all the references to gcBitsArenas.previous
1664// by pointing gcAllocBits into the gcBitsArenas.current.
1665// The gcBitsArenas.previous is released to the gcBitsArenas.free list.
1666func nextMarkBitArenaEpoch() {
1667	lock(&gcBitsArenas.lock)
1668	if gcBitsArenas.previous != nil {
1669		if gcBitsArenas.free == nil {
1670			gcBitsArenas.free = gcBitsArenas.previous
1671		} else {
1672			// Find end of previous arenas.
1673			last := gcBitsArenas.previous
1674			for last = gcBitsArenas.previous; last.next != nil; last = last.next {
1675			}
1676			last.next = gcBitsArenas.free
1677			gcBitsArenas.free = gcBitsArenas.previous
1678		}
1679	}
1680	gcBitsArenas.previous = gcBitsArenas.current
1681	gcBitsArenas.current = gcBitsArenas.next
1682	atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), nil) // newMarkBits calls newArena when needed
1683	unlock(&gcBitsArenas.lock)
1684}
1685
1686// newArenaMayUnlock allocates and zeroes a gcBits arena.
1687// The caller must hold gcBitsArena.lock. This may temporarily release it.
1688func newArenaMayUnlock() *gcBitsArena {
1689	var result *gcBitsArena
1690	if gcBitsArenas.free == nil {
1691		unlock(&gcBitsArenas.lock)
1692		result = (*gcBitsArena)(sysAlloc(gcBitsChunkBytes, &memstats.gc_sys))
1693		if result == nil {
1694			throw("runtime: cannot allocate memory")
1695		}
1696		lock(&gcBitsArenas.lock)
1697	} else {
1698		result = gcBitsArenas.free
1699		gcBitsArenas.free = gcBitsArenas.free.next
1700		memclrNoHeapPointers(unsafe.Pointer(result), gcBitsChunkBytes)
1701	}
1702	result.next = nil
1703	// If result.bits is not 8 byte aligned adjust index so
1704	// that &result.bits[result.free] is 8 byte aligned.
1705	if uintptr(unsafe.Offsetof(gcBitsArena{}.bits))&7 == 0 {
1706		result.free = 0
1707	} else {
1708		result.free = 8 - (uintptr(unsafe.Pointer(&result.bits[0])) & 7)
1709	}
1710	return result
1711}
1712