1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Memory statistics
6
7package runtime
8
9import (
10	"runtime/internal/atomic"
11	"unsafe"
12)
13
14// Statistics.
15//
16// For detailed descriptions see the documentation for MemStats.
17// Fields that differ from MemStats are further documented here.
18//
19// Many of these fields are updated on the fly, while others are only
20// updated when updatememstats is called.
21type mstats struct {
22	// General statistics.
23	alloc       uint64 // bytes allocated and not yet freed
24	total_alloc uint64 // bytes allocated (even if freed)
25	sys         uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
26	nlookup     uint64 // number of pointer lookups (unused)
27	nmalloc     uint64 // number of mallocs
28	nfree       uint64 // number of frees
29
30	// Statistics about malloc heap.
31	// Updated atomically, or with the world stopped.
32	//
33	// Like MemStats, heap_sys and heap_inuse do not count memory
34	// in manually-managed spans.
35	heap_sys      sysMemStat // virtual address space obtained from system for GC'd heap
36	heap_inuse    uint64     // bytes in mSpanInUse spans
37	heap_released uint64     // bytes released to the os
38
39	// heap_objects is not used by the runtime directly and instead
40	// computed on the fly by updatememstats.
41	heap_objects uint64 // total number of allocated objects
42
43	// Statistics about stacks.
44	stacks_inuse uint64     // bytes in manually-managed stack spans; computed by updatememstats
45	stacks_sys   sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
46
47	// Statistics about allocation of low-level fixed-size structures.
48	// Protected by FixAlloc locks.
49	mspan_inuse  uint64 // mspan structures
50	mspan_sys    sysMemStat
51	mcache_inuse uint64 // mcache structures
52	mcache_sys   sysMemStat
53	buckhash_sys sysMemStat // profiling bucket hash table
54
55	// Statistics about GC overhead.
56	gcWorkBufInUse           uint64     // computed by updatememstats
57	gcProgPtrScalarBitsInUse uint64     // computed by updatememstats
58	gcMiscSys                sysMemStat // updated atomically or during STW
59
60	// Miscellaneous statistics.
61	other_sys sysMemStat // updated atomically or during STW
62
63	// Statistics about the garbage collector.
64
65	// next_gc is the goal heap_live for when next GC ends.
66	// Set to ^uint64(0) if disabled.
67	//
68	// Read and written atomically, unless the world is stopped.
69	next_gc uint64
70
71	// Protected by mheap or stopping the world during GC.
72	last_gc_unix    uint64 // last gc (in unix time)
73	pause_total_ns  uint64
74	pause_ns        [256]uint64 // circular buffer of recent gc pause lengths
75	pause_end       [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
76	numgc           uint32
77	numforcedgc     uint32  // number of user-forced GCs
78	gc_cpu_fraction float64 // fraction of CPU time used by GC
79	enablegc        bool
80	debuggc         bool
81
82	// Statistics about allocation size classes.
83
84	by_size [_NumSizeClasses]struct {
85		size    uint32
86		nmalloc uint64
87		nfree   uint64
88	}
89
90	// Add an uint32 for even number of size classes to align below fields
91	// to 64 bits for atomic operations on 32 bit platforms.
92	_ [1 - _NumSizeClasses%2]uint32
93
94	last_gc_nanotime uint64 // last gc (monotonic time)
95	tinyallocs       uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
96	last_next_gc     uint64 // next_gc for the previous GC
97	last_heap_inuse  uint64 // heap_inuse at mark termination of the previous GC
98
99	// triggerRatio is the heap growth ratio that triggers marking.
100	//
101	// E.g., if this is 0.6, then GC should start when the live
102	// heap has reached 1.6 times the heap size marked by the
103	// previous cycle. This should be ≤ GOGC/100 so the trigger
104	// heap size is less than the goal heap size. This is set
105	// during mark termination for the next cycle's trigger.
106	triggerRatio float64
107
108	// gc_trigger is the heap size that triggers marking.
109	//
110	// When heap_live ≥ gc_trigger, the mark phase will start.
111	// This is also the heap size by which proportional sweeping
112	// must be complete.
113	//
114	// This is computed from triggerRatio during mark termination
115	// for the next cycle's trigger.
116	gc_trigger uint64
117
118	// heap_live is the number of bytes considered live by the GC.
119	// That is: retained by the most recent GC plus allocated
120	// since then. heap_live <= alloc, since alloc includes unmarked
121	// objects that have not yet been swept (and hence goes up as we
122	// allocate and down as we sweep) while heap_live excludes these
123	// objects (and hence only goes up between GCs).
124	//
125	// This is updated atomically without locking. To reduce
126	// contention, this is updated only when obtaining a span from
127	// an mcentral and at this point it counts all of the
128	// unallocated slots in that span (which will be allocated
129	// before that mcache obtains another span from that
130	// mcentral). Hence, it slightly overestimates the "true" live
131	// heap size. It's better to overestimate than to
132	// underestimate because 1) this triggers the GC earlier than
133	// necessary rather than potentially too late and 2) this
134	// leads to a conservative GC rate rather than a GC rate that
135	// is potentially too low.
136	//
137	// Reads should likewise be atomic (or during STW).
138	//
139	// Whenever this is updated, call traceHeapAlloc() and
140	// gcController.revise().
141	heap_live uint64
142
143	// heap_scan is the number of bytes of "scannable" heap. This
144	// is the live heap (as counted by heap_live), but omitting
145	// no-scan objects and no-scan tails of objects.
146	//
147	// Whenever this is updated, call gcController.revise().
148	//
149	// Read and written atomically or with the world stopped.
150	heap_scan uint64
151
152	// heap_marked is the number of bytes marked by the previous
153	// GC. After mark termination, heap_live == heap_marked, but
154	// unlike heap_live, heap_marked does not change until the
155	// next mark termination.
156	heap_marked uint64
157
158	// heapStats is a set of statistics
159	heapStats consistentHeapStats
160
161	// _ uint32 // ensure gcPauseDist is aligned
162
163	// gcPauseDist represents the distribution of all GC-related
164	// application pauses in the runtime.
165	//
166	// Each individual pause is counted separately, unlike pause_ns.
167	gcPauseDist timeHistogram
168}
169
170var memstats mstats
171
172// A MemStats records statistics about the memory allocator.
173type MemStats struct {
174	// General statistics.
175
176	// Alloc is bytes of allocated heap objects.
177	//
178	// This is the same as HeapAlloc (see below).
179	Alloc uint64
180
181	// TotalAlloc is cumulative bytes allocated for heap objects.
182	//
183	// TotalAlloc increases as heap objects are allocated, but
184	// unlike Alloc and HeapAlloc, it does not decrease when
185	// objects are freed.
186	TotalAlloc uint64
187
188	// Sys is the total bytes of memory obtained from the OS.
189	//
190	// Sys is the sum of the XSys fields below. Sys measures the
191	// virtual address space reserved by the Go runtime for the
192	// heap, stacks, and other internal data structures. It's
193	// likely that not all of the virtual address space is backed
194	// by physical memory at any given moment, though in general
195	// it all was at some point.
196	Sys uint64
197
198	// Lookups is the number of pointer lookups performed by the
199	// runtime.
200	//
201	// This is primarily useful for debugging runtime internals.
202	Lookups uint64
203
204	// Mallocs is the cumulative count of heap objects allocated.
205	// The number of live objects is Mallocs - Frees.
206	Mallocs uint64
207
208	// Frees is the cumulative count of heap objects freed.
209	Frees uint64
210
211	// Heap memory statistics.
212	//
213	// Interpreting the heap statistics requires some knowledge of
214	// how Go organizes memory. Go divides the virtual address
215	// space of the heap into "spans", which are contiguous
216	// regions of memory 8K or larger. A span may be in one of
217	// three states:
218	//
219	// An "idle" span contains no objects or other data. The
220	// physical memory backing an idle span can be released back
221	// to the OS (but the virtual address space never is), or it
222	// can be converted into an "in use" or "stack" span.
223	//
224	// An "in use" span contains at least one heap object and may
225	// have free space available to allocate more heap objects.
226	//
227	// A "stack" span is used for goroutine stacks. Stack spans
228	// are not considered part of the heap. A span can change
229	// between heap and stack memory; it is never used for both
230	// simultaneously.
231
232	// HeapAlloc is bytes of allocated heap objects.
233	//
234	// "Allocated" heap objects include all reachable objects, as
235	// well as unreachable objects that the garbage collector has
236	// not yet freed. Specifically, HeapAlloc increases as heap
237	// objects are allocated and decreases as the heap is swept
238	// and unreachable objects are freed. Sweeping occurs
239	// incrementally between GC cycles, so these two processes
240	// occur simultaneously, and as a result HeapAlloc tends to
241	// change smoothly (in contrast with the sawtooth that is
242	// typical of stop-the-world garbage collectors).
243	HeapAlloc uint64
244
245	// HeapSys is bytes of heap memory obtained from the OS.
246	//
247	// HeapSys measures the amount of virtual address space
248	// reserved for the heap. This includes virtual address space
249	// that has been reserved but not yet used, which consumes no
250	// physical memory, but tends to be small, as well as virtual
251	// address space for which the physical memory has been
252	// returned to the OS after it became unused (see HeapReleased
253	// for a measure of the latter).
254	//
255	// HeapSys estimates the largest size the heap has had.
256	HeapSys uint64
257
258	// HeapIdle is bytes in idle (unused) spans.
259	//
260	// Idle spans have no objects in them. These spans could be
261	// (and may already have been) returned to the OS, or they can
262	// be reused for heap allocations, or they can be reused as
263	// stack memory.
264	//
265	// HeapIdle minus HeapReleased estimates the amount of memory
266	// that could be returned to the OS, but is being retained by
267	// the runtime so it can grow the heap without requesting more
268	// memory from the OS. If this difference is significantly
269	// larger than the heap size, it indicates there was a recent
270	// transient spike in live heap size.
271	HeapIdle uint64
272
273	// HeapInuse is bytes in in-use spans.
274	//
275	// In-use spans have at least one object in them. These spans
276	// can only be used for other objects of roughly the same
277	// size.
278	//
279	// HeapInuse minus HeapAlloc estimates the amount of memory
280	// that has been dedicated to particular size classes, but is
281	// not currently being used. This is an upper bound on
282	// fragmentation, but in general this memory can be reused
283	// efficiently.
284	HeapInuse uint64
285
286	// HeapReleased is bytes of physical memory returned to the OS.
287	//
288	// This counts heap memory from idle spans that was returned
289	// to the OS and has not yet been reacquired for the heap.
290	HeapReleased uint64
291
292	// HeapObjects is the number of allocated heap objects.
293	//
294	// Like HeapAlloc, this increases as objects are allocated and
295	// decreases as the heap is swept and unreachable objects are
296	// freed.
297	HeapObjects uint64
298
299	// Stack memory statistics.
300	//
301	// Stacks are not considered part of the heap, but the runtime
302	// can reuse a span of heap memory for stack memory, and
303	// vice-versa.
304
305	// StackInuse is bytes in stack spans.
306	//
307	// In-use stack spans have at least one stack in them. These
308	// spans can only be used for other stacks of the same size.
309	//
310	// There is no StackIdle because unused stack spans are
311	// returned to the heap (and hence counted toward HeapIdle).
312	StackInuse uint64
313
314	// StackSys is bytes of stack memory obtained from the OS.
315	//
316	// StackSys is StackInuse, plus any memory obtained directly
317	// from the OS for OS thread stacks (which should be minimal).
318	StackSys uint64
319
320	// Off-heap memory statistics.
321	//
322	// The following statistics measure runtime-internal
323	// structures that are not allocated from heap memory (usually
324	// because they are part of implementing the heap). Unlike
325	// heap or stack memory, any memory allocated to these
326	// structures is dedicated to these structures.
327	//
328	// These are primarily useful for debugging runtime memory
329	// overheads.
330
331	// MSpanInuse is bytes of allocated mspan structures.
332	MSpanInuse uint64
333
334	// MSpanSys is bytes of memory obtained from the OS for mspan
335	// structures.
336	MSpanSys uint64
337
338	// MCacheInuse is bytes of allocated mcache structures.
339	MCacheInuse uint64
340
341	// MCacheSys is bytes of memory obtained from the OS for
342	// mcache structures.
343	MCacheSys uint64
344
345	// BuckHashSys is bytes of memory in profiling bucket hash tables.
346	BuckHashSys uint64
347
348	// GCSys is bytes of memory in garbage collection metadata.
349	GCSys uint64
350
351	// OtherSys is bytes of memory in miscellaneous off-heap
352	// runtime allocations.
353	OtherSys uint64
354
355	// Garbage collector statistics.
356
357	// NextGC is the target heap size of the next GC cycle.
358	//
359	// The garbage collector's goal is to keep HeapAlloc ≤ NextGC.
360	// At the end of each GC cycle, the target for the next cycle
361	// is computed based on the amount of reachable data and the
362	// value of GOGC.
363	NextGC uint64
364
365	// LastGC is the time the last garbage collection finished, as
366	// nanoseconds since 1970 (the UNIX epoch).
367	LastGC uint64
368
369	// PauseTotalNs is the cumulative nanoseconds in GC
370	// stop-the-world pauses since the program started.
371	//
372	// During a stop-the-world pause, all goroutines are paused
373	// and only the garbage collector can run.
374	PauseTotalNs uint64
375
376	// PauseNs is a circular buffer of recent GC stop-the-world
377	// pause times in nanoseconds.
378	//
379	// The most recent pause is at PauseNs[(NumGC+255)%256]. In
380	// general, PauseNs[N%256] records the time paused in the most
381	// recent N%256th GC cycle. There may be multiple pauses per
382	// GC cycle; this is the sum of all pauses during a cycle.
383	PauseNs [256]uint64
384
385	// PauseEnd is a circular buffer of recent GC pause end times,
386	// as nanoseconds since 1970 (the UNIX epoch).
387	//
388	// This buffer is filled the same way as PauseNs. There may be
389	// multiple pauses per GC cycle; this records the end of the
390	// last pause in a cycle.
391	PauseEnd [256]uint64
392
393	// NumGC is the number of completed GC cycles.
394	NumGC uint32
395
396	// NumForcedGC is the number of GC cycles that were forced by
397	// the application calling the GC function.
398	NumForcedGC uint32
399
400	// GCCPUFraction is the fraction of this program's available
401	// CPU time used by the GC since the program started.
402	//
403	// GCCPUFraction is expressed as a number between 0 and 1,
404	// where 0 means GC has consumed none of this program's CPU. A
405	// program's available CPU time is defined as the integral of
406	// GOMAXPROCS since the program started. That is, if
407	// GOMAXPROCS is 2 and a program has been running for 10
408	// seconds, its "available CPU" is 20 seconds. GCCPUFraction
409	// does not include CPU time used for write barrier activity.
410	//
411	// This is the same as the fraction of CPU reported by
412	// GODEBUG=gctrace=1.
413	GCCPUFraction float64
414
415	// EnableGC indicates that GC is enabled. It is always true,
416	// even if GOGC=off.
417	EnableGC bool
418
419	// DebugGC is currently unused.
420	DebugGC bool
421
422	// BySize reports per-size class allocation statistics.
423	//
424	// BySize[N] gives statistics for allocations of size S where
425	// BySize[N-1].Size < S ≤ BySize[N].Size.
426	//
427	// This does not report allocations larger than BySize[60].Size.
428	BySize [61]struct {
429		// Size is the maximum byte size of an object in this
430		// size class.
431		Size uint32
432
433		// Mallocs is the cumulative count of heap objects
434		// allocated in this size class. The cumulative bytes
435		// of allocation is Size*Mallocs. The number of live
436		// objects in this size class is Mallocs - Frees.
437		Mallocs uint64
438
439		// Frees is the cumulative count of heap objects freed
440		// in this size class.
441		Frees uint64
442	}
443}
444
445func init() {
446	if offset := unsafe.Offsetof(memstats.heap_live); offset%8 != 0 {
447		println(offset)
448		throw("memstats.heap_live not aligned to 8 bytes")
449	}
450	if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 {
451		println(offset)
452		throw("memstats.heapStats not aligned to 8 bytes")
453	}
454	if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 {
455		println(offset)
456		throw("memstats.gcPauseDist not aligned to 8 bytes")
457	}
458	// Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g.
459	// [3]heapStatsDelta) to be 8-byte aligned.
460	if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 {
461		println(size)
462		throw("heapStatsDelta not a multiple of 8 bytes in size")
463	}
464}
465
466// ReadMemStats populates m with memory allocator statistics.
467//
468// The returned memory allocator statistics are up to date as of the
469// call to ReadMemStats. This is in contrast with a heap profile,
470// which is a snapshot as of the most recently completed garbage
471// collection cycle.
472func ReadMemStats(m *MemStats) {
473	stopTheWorld("read mem stats")
474
475	systemstack(func() {
476		readmemstats_m(m)
477	})
478
479	startTheWorld()
480}
481
482func readmemstats_m(stats *MemStats) {
483	updatememstats()
484
485	stats.Alloc = memstats.alloc
486	stats.TotalAlloc = memstats.total_alloc
487	stats.Sys = memstats.sys
488	stats.Mallocs = memstats.nmalloc
489	stats.Frees = memstats.nfree
490	stats.HeapAlloc = memstats.alloc
491	stats.HeapSys = memstats.heap_sys.load()
492	// By definition, HeapIdle is memory that was mapped
493	// for the heap but is not currently used to hold heap
494	// objects. It also specifically is memory that can be
495	// used for other purposes, like stacks, but this memory
496	// is subtracted out of HeapSys before it makes that
497	// transition. Put another way:
498	//
499	// heap_sys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes
500	// heap_idle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose
501	//
502	// or
503	//
504	// heap_sys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse
505	// heap_idle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heap_inuse
506	//
507	// => heap_idle = heap_sys - heap_inuse
508	stats.HeapIdle = memstats.heap_sys.load() - memstats.heap_inuse
509	stats.HeapInuse = memstats.heap_inuse
510	stats.HeapReleased = memstats.heap_released
511	stats.HeapObjects = memstats.heap_objects
512	stats.StackInuse = memstats.stacks_inuse
513	// memstats.stacks_sys is only memory mapped directly for OS stacks.
514	// Add in heap-allocated stack memory for user consumption.
515	stats.StackSys = memstats.stacks_inuse + memstats.stacks_sys.load()
516	stats.MSpanInuse = memstats.mspan_inuse
517	stats.MSpanSys = memstats.mspan_sys.load()
518	stats.MCacheInuse = memstats.mcache_inuse
519	stats.MCacheSys = memstats.mcache_sys.load()
520	stats.BuckHashSys = memstats.buckhash_sys.load()
521	// MemStats defines GCSys as an aggregate of all memory related
522	// to the memory management system, but we track this memory
523	// at a more granular level in the runtime.
524	stats.GCSys = memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse
525	stats.OtherSys = memstats.other_sys.load()
526	stats.NextGC = memstats.next_gc
527	stats.LastGC = memstats.last_gc_unix
528	stats.PauseTotalNs = memstats.pause_total_ns
529	stats.PauseNs = memstats.pause_ns
530	stats.PauseEnd = memstats.pause_end
531	stats.NumGC = memstats.numgc
532	stats.NumForcedGC = memstats.numforcedgc
533	stats.GCCPUFraction = memstats.gc_cpu_fraction
534	stats.EnableGC = true
535
536	// Handle BySize. Copy N values, where N is
537	// the minimum of the lengths of the two arrays.
538	// Unfortunately copy() won't work here because
539	// the arrays have different structs.
540	//
541	// TODO(mknyszek): Consider renaming the fields
542	// of by_size's elements to align so we can use
543	// the copy built-in.
544	bySizeLen := len(stats.BySize)
545	if l := len(memstats.by_size); l < bySizeLen {
546		bySizeLen = l
547	}
548	for i := 0; i < bySizeLen; i++ {
549		stats.BySize[i].Size = memstats.by_size[i].size
550		stats.BySize[i].Mallocs = memstats.by_size[i].nmalloc
551		stats.BySize[i].Frees = memstats.by_size[i].nfree
552	}
553}
554
555//go:linkname readGCStats runtime_1debug.readGCStats
556func readGCStats(pauses *[]uint64) {
557	systemstack(func() {
558		readGCStats_m(pauses)
559	})
560}
561
562// readGCStats_m must be called on the system stack because it acquires the heap
563// lock. See mheap for details.
564//go:systemstack
565func readGCStats_m(pauses *[]uint64) {
566	p := *pauses
567	// Calling code in runtime/debug should make the slice large enough.
568	if cap(p) < len(memstats.pause_ns)+3 {
569		throw("short slice passed to readGCStats")
570	}
571
572	// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
573	lock(&mheap_.lock)
574
575	n := memstats.numgc
576	if n > uint32(len(memstats.pause_ns)) {
577		n = uint32(len(memstats.pause_ns))
578	}
579
580	// The pause buffer is circular. The most recent pause is at
581	// pause_ns[(numgc-1)%len(pause_ns)], and then backward
582	// from there to go back farther in time. We deliver the times
583	// most recent first (in p[0]).
584	p = p[:cap(p)]
585	for i := uint32(0); i < n; i++ {
586		j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
587		p[i] = memstats.pause_ns[j]
588		p[n+i] = memstats.pause_end[j]
589	}
590
591	p[n+n] = memstats.last_gc_unix
592	p[n+n+1] = uint64(memstats.numgc)
593	p[n+n+2] = memstats.pause_total_ns
594	unlock(&mheap_.lock)
595	*pauses = p[:n+n+3]
596}
597
598// Updates the memstats structure.
599//
600// The world must be stopped.
601//
602//go:nowritebarrier
603func updatememstats() {
604	assertWorldStopped()
605
606	// Flush mcaches to mcentral before doing anything else.
607	//
608	// Flushing to the mcentral may in general cause stats to
609	// change as mcentral data structures are manipulated.
610	systemstack(flushallmcaches)
611
612	memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
613	memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
614	memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() +
615		memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gcMiscSys.load() +
616		memstats.other_sys.load()
617
618	// Calculate memory allocator stats.
619	// During program execution we only count number of frees and amount of freed memory.
620	// Current number of alive objects in the heap and amount of alive heap memory
621	// are calculated by scanning all spans.
622	// Total number of mallocs is calculated as number of frees plus number of alive objects.
623	// Similarly, total amount of allocated memory is calculated as amount of freed memory
624	// plus amount of alive heap memory.
625	memstats.alloc = 0
626	memstats.total_alloc = 0
627	memstats.nmalloc = 0
628	memstats.nfree = 0
629	for i := 0; i < len(memstats.by_size); i++ {
630		memstats.by_size[i].nmalloc = 0
631		memstats.by_size[i].nfree = 0
632	}
633	// Collect consistent stats, which are the source-of-truth in the some cases.
634	var consStats heapStatsDelta
635	memstats.heapStats.unsafeRead(&consStats)
636
637	// Collect large allocation stats.
638	totalAlloc := uint64(consStats.largeAlloc)
639	memstats.nmalloc += uint64(consStats.largeAllocCount)
640	totalFree := uint64(consStats.largeFree)
641	memstats.nfree += uint64(consStats.largeFreeCount)
642
643	// Collect per-sizeclass stats.
644	for i := 0; i < _NumSizeClasses; i++ {
645		// Malloc stats.
646		a := uint64(consStats.smallAllocCount[i])
647		totalAlloc += a * uint64(class_to_size[i])
648		memstats.nmalloc += a
649		memstats.by_size[i].nmalloc = a
650
651		// Free stats.
652		f := uint64(consStats.smallFreeCount[i])
653		totalFree += f * uint64(class_to_size[i])
654		memstats.nfree += f
655		memstats.by_size[i].nfree = f
656	}
657
658	// Account for tiny allocations.
659	memstats.nfree += memstats.tinyallocs
660	memstats.nmalloc += memstats.tinyallocs
661
662	// Calculate derived stats.
663	memstats.total_alloc = totalAlloc
664	memstats.alloc = totalAlloc - totalFree
665	memstats.heap_objects = memstats.nmalloc - memstats.nfree
666
667	memstats.stacks_inuse = uint64(consStats.inStacks)
668	memstats.gcWorkBufInUse = uint64(consStats.inWorkBufs)
669	memstats.gcProgPtrScalarBitsInUse = uint64(consStats.inPtrScalarBits)
670
671	// We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory.
672	memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse
673
674	// The world is stopped, so the consistent stats (after aggregation)
675	// should be identical to some combination of memstats. In particular:
676	//
677	// * heap_inuse == inHeap
678	// * heap_released == released
679	// * heap_sys - heap_released == committed - inStacks - inWorkBufs - inPtrScalarBits
680	//
681	// Check if that's actually true.
682	//
683	// TODO(mknyszek): Maybe don't throw here. It would be bad if a
684	// bug in otherwise benign accounting caused the whole application
685	// to crash.
686	if memstats.heap_inuse != uint64(consStats.inHeap) {
687		print("runtime: heap_inuse=", memstats.heap_inuse, "\n")
688		print("runtime: consistent value=", consStats.inHeap, "\n")
689		throw("heap_inuse and consistent stats are not equal")
690	}
691	if memstats.heap_released != uint64(consStats.released) {
692		print("runtime: heap_released=", memstats.heap_released, "\n")
693		print("runtime: consistent value=", consStats.released, "\n")
694		throw("heap_released and consistent stats are not equal")
695	}
696	globalRetained := memstats.heap_sys.load() - memstats.heap_released
697	consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits)
698	if globalRetained != consRetained {
699		print("runtime: global value=", globalRetained, "\n")
700		print("runtime: consistent value=", consRetained, "\n")
701		throw("measures of the retained heap are not equal")
702	}
703}
704
705// flushmcache flushes the mcache of allp[i].
706//
707// The world must be stopped.
708//
709//go:nowritebarrier
710func flushmcache(i int) {
711	assertWorldStopped()
712
713	p := allp[i]
714	c := p.mcache
715	if c == nil {
716		return
717	}
718	c.releaseAll()
719}
720
721// flushallmcaches flushes the mcaches of all Ps.
722//
723// The world must be stopped.
724//
725//go:nowritebarrier
726func flushallmcaches() {
727	assertWorldStopped()
728
729	for i := 0; i < int(gomaxprocs); i++ {
730		flushmcache(i)
731	}
732}
733
734// sysMemStat represents a global system statistic that is managed atomically.
735//
736// This type must structurally be a uint64 so that mstats aligns with MemStats.
737type sysMemStat uint64
738
739// load atomically reads the value of the stat.
740//
741// Must be nosplit as it is called in runtime initialization, e.g. newosproc0.
742//go:nosplit
743func (s *sysMemStat) load() uint64 {
744	return atomic.Load64((*uint64)(s))
745}
746
747// add atomically adds the sysMemStat by n.
748//
749// Must be nosplit as it is called in runtime initialization, e.g. newosproc0.
750//go:nosplit
751func (s *sysMemStat) add(n int64) {
752	if s == nil {
753		return
754	}
755	val := atomic.Xadd64((*uint64)(s), n)
756	if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) {
757		print("runtime: val=", val, " n=", n, "\n")
758		throw("sysMemStat overflow")
759	}
760}
761
762// heapStatsDelta contains deltas of various runtime memory statistics
763// that need to be updated together in order for them to be kept
764// consistent with one another.
765type heapStatsDelta struct {
766	// Memory stats.
767	committed       int64 // byte delta of memory committed
768	released        int64 // byte delta of released memory generated
769	inHeap          int64 // byte delta of memory placed in the heap
770	inStacks        int64 // byte delta of memory reserved for stacks
771	inWorkBufs      int64 // byte delta of memory reserved for work bufs
772	inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits
773
774	// Allocator stats.
775	largeAlloc      uintptr                  // bytes allocated for large objects
776	largeAllocCount uintptr                  // number of large object allocations
777	smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects
778	largeFree       uintptr                  // bytes freed for large objects (>maxSmallSize)
779	largeFreeCount  uintptr                  // number of frees for large objects (>maxSmallSize)
780	smallFreeCount  [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize)
781
782	// Add a uint32 to ensure this struct is a multiple of 8 bytes in size.
783	// Only necessary on 32-bit platforms.
784	// _ [(sys.PtrSize / 4) % 2]uint32
785}
786
787// merge adds in the deltas from b into a.
788func (a *heapStatsDelta) merge(b *heapStatsDelta) {
789	a.committed += b.committed
790	a.released += b.released
791	a.inHeap += b.inHeap
792	a.inStacks += b.inStacks
793	a.inWorkBufs += b.inWorkBufs
794	a.inPtrScalarBits += b.inPtrScalarBits
795
796	a.largeAlloc += b.largeAlloc
797	a.largeAllocCount += b.largeAllocCount
798	for i := range b.smallAllocCount {
799		a.smallAllocCount[i] += b.smallAllocCount[i]
800	}
801	a.largeFree += b.largeFree
802	a.largeFreeCount += b.largeFreeCount
803	for i := range b.smallFreeCount {
804		a.smallFreeCount[i] += b.smallFreeCount[i]
805	}
806}
807
808// consistentHeapStats represents a set of various memory statistics
809// whose updates must be viewed completely to get a consistent
810// state of the world.
811//
812// To write updates to memory stats use the acquire and release
813// methods. To obtain a consistent global snapshot of these statistics,
814// use read.
815type consistentHeapStats struct {
816	// stats is a ring buffer of heapStatsDelta values.
817	// Writers always atomically update the delta at index gen.
818	//
819	// Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...)
820	// and synchronizing with writers by observing each P's
821	// statsSeq field. If the reader observes a P not writing,
822	// it can be sure that it will pick up the new gen value the
823	// next time it writes.
824	//
825	// The reader then takes responsibility by clearing space
826	// in the ring buffer for the next reader to rotate gen to
827	// that space (i.e. it merges in values from index (gen-2) mod 3
828	// to index (gen-1) mod 3, then clears the former).
829	//
830	// Note that this means only one reader can be reading at a time.
831	// There is no way for readers to synchronize.
832	//
833	// This process is why we need a ring buffer of size 3 instead
834	// of 2: one is for the writers, one contains the most recent
835	// data, and the last one is clear so writers can begin writing
836	// to it the moment gen is updated.
837	stats [3]heapStatsDelta
838
839	// gen represents the current index into which writers
840	// are writing, and can take on the value of 0, 1, or 2.
841	// This value is updated atomically.
842	gen uint32
843
844	// noPLock is intended to provide mutual exclusion for updating
845	// stats when no P is available. It does not block other writers
846	// with a P, only other writers without a P and the reader. Because
847	// stats are usually updated when a P is available, contention on
848	// this lock should be minimal.
849	noPLock mutex
850}
851
852// acquire returns a heapStatsDelta to be updated. In effect,
853// it acquires the shard for writing. release must be called
854// as soon as the relevant deltas are updated.
855//
856// The returned heapStatsDelta must be updated atomically.
857//
858// The caller's P must not change between acquire and
859// release. This also means that the caller should not
860// acquire a P or release its P in between.
861func (m *consistentHeapStats) acquire() *heapStatsDelta {
862	if pp := getg().m.p.ptr(); pp != nil {
863		seq := atomic.Xadd(&pp.statsSeq, 1)
864		if seq%2 == 0 {
865			// Should have been incremented to odd.
866			print("runtime: seq=", seq, "\n")
867			throw("bad sequence number")
868		}
869	} else {
870		lock(&m.noPLock)
871	}
872	gen := atomic.Load(&m.gen) % 3
873	return &m.stats[gen]
874}
875
876// release indicates that the writer is done modifying
877// the delta. The value returned by the corresponding
878// acquire must no longer be accessed or modified after
879// release is called.
880//
881// The caller's P must not change between acquire and
882// release. This also means that the caller should not
883// acquire a P or release its P in between.
884func (m *consistentHeapStats) release() {
885	if pp := getg().m.p.ptr(); pp != nil {
886		seq := atomic.Xadd(&pp.statsSeq, 1)
887		if seq%2 != 0 {
888			// Should have been incremented to even.
889			print("runtime: seq=", seq, "\n")
890			throw("bad sequence number")
891		}
892	} else {
893		unlock(&m.noPLock)
894	}
895}
896
897// unsafeRead aggregates the delta for this shard into out.
898//
899// Unsafe because it does so without any synchronization. The
900// world must be stopped.
901func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) {
902	assertWorldStopped()
903
904	for i := range m.stats {
905		out.merge(&m.stats[i])
906	}
907}
908
909// unsafeClear clears the shard.
910//
911// Unsafe because the world must be stopped and values should
912// be donated elsewhere before clearing.
913func (m *consistentHeapStats) unsafeClear() {
914	assertWorldStopped()
915
916	for i := range m.stats {
917		m.stats[i] = heapStatsDelta{}
918	}
919}
920
921// read takes a globally consistent snapshot of m
922// and puts the aggregated value in out. Even though out is a
923// heapStatsDelta, the resulting values should be complete and
924// valid statistic values.
925//
926// Not safe to call concurrently. The world must be stopped
927// or metricsSema must be held.
928func (m *consistentHeapStats) read(out *heapStatsDelta) {
929	// Getting preempted after this point is not safe because
930	// we read allp. We need to make sure a STW can't happen
931	// so it doesn't change out from under us.
932	mp := acquirem()
933
934	// Get the current generation. We can be confident that this
935	// will not change since read is serialized and is the only
936	// one that modifies currGen.
937	currGen := atomic.Load(&m.gen)
938	prevGen := currGen - 1
939	if currGen == 0 {
940		prevGen = 2
941	}
942
943	// Prevent writers without a P from writing while we update gen.
944	lock(&m.noPLock)
945
946	// Rotate gen, effectively taking a snapshot of the state of
947	// these statistics at the point of the exchange by moving
948	// writers to the next set of deltas.
949	//
950	// This exchange is safe to do because we won't race
951	// with anyone else trying to update this value.
952	atomic.Xchg(&m.gen, (currGen+1)%3)
953
954	// Allow P-less writers to continue. They'll be writing to the
955	// next generation now.
956	unlock(&m.noPLock)
957
958	for _, p := range allp {
959		// Spin until there are no more writers.
960		for atomic.Load(&p.statsSeq)%2 != 0 {
961		}
962	}
963
964	// At this point we've observed that each sequence
965	// number is even, so any future writers will observe
966	// the new gen value. That means it's safe to read from
967	// the other deltas in the stats buffer.
968
969	// Perform our responsibilities and free up
970	// stats[prevGen] for the next time we want to take
971	// a snapshot.
972	m.stats[currGen].merge(&m.stats[prevGen])
973	m.stats[prevGen] = heapStatsDelta{}
974
975	// Finally, copy out the complete delta.
976	*out = m.stats[currGen]
977
978	releasem(mp)
979}
980