1// Copyright 2020 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime
6
7// Metrics implementation exported to runtime/metrics.
8
9import (
10	"runtime/internal/atomic"
11	"unsafe"
12)
13
14var (
15	// metrics is a map of runtime/metrics keys to
16	// data used by the runtime to sample each metric's
17	// value.
18	metricsSema uint32 = 1
19	metricsInit bool
20	metrics     map[string]metricData
21
22	sizeClassBuckets []float64
23	timeHistBuckets  []float64
24)
25
26type metricData struct {
27	// deps is the set of runtime statistics that this metric
28	// depends on. Before compute is called, the statAggregate
29	// which will be passed must ensure() these dependencies.
30	deps statDepSet
31
32	// compute is a function that populates a metricValue
33	// given a populated statAggregate structure.
34	compute func(in *statAggregate, out *metricValue)
35}
36
37// initMetrics initializes the metrics map if it hasn't been yet.
38//
39// metricsSema must be held.
40func initMetrics() {
41	if metricsInit {
42		return
43	}
44
45	sizeClassBuckets = make([]float64, _NumSizeClasses, _NumSizeClasses+1)
46	// Skip size class 0 which is a stand-in for large objects, but large
47	// objects are tracked separately (and they actually get placed in
48	// the last bucket, not the first).
49	sizeClassBuckets[0] = 1 // The smallest allocation is 1 byte in size.
50	for i := 1; i < _NumSizeClasses; i++ {
51		// Size classes have an inclusive upper-bound
52		// and exclusive lower bound (e.g. 48-byte size class is
53		// (32, 48]) whereas we want and inclusive lower-bound
54		// and exclusive upper-bound (e.g. 48-byte size class is
55		// [33, 49). We can achieve this by shifting all bucket
56		// boundaries up by 1.
57		//
58		// Also, a float64 can precisely represent integers with
59		// value up to 2^53 and size classes are relatively small
60		// (nowhere near 2^48 even) so this will give us exact
61		// boundaries.
62		sizeClassBuckets[i] = float64(class_to_size[i] + 1)
63	}
64	sizeClassBuckets = append(sizeClassBuckets, float64Inf())
65
66	timeHistBuckets = timeHistogramMetricsBuckets()
67	metrics = map[string]metricData{
68		"/gc/cycles/automatic:gc-cycles": {
69			deps: makeStatDepSet(sysStatsDep),
70			compute: func(in *statAggregate, out *metricValue) {
71				out.kind = metricKindUint64
72				out.scalar = in.sysStats.gcCyclesDone - in.sysStats.gcCyclesForced
73			},
74		},
75		"/gc/cycles/forced:gc-cycles": {
76			deps: makeStatDepSet(sysStatsDep),
77			compute: func(in *statAggregate, out *metricValue) {
78				out.kind = metricKindUint64
79				out.scalar = in.sysStats.gcCyclesForced
80			},
81		},
82		"/gc/cycles/total:gc-cycles": {
83			deps: makeStatDepSet(sysStatsDep),
84			compute: func(in *statAggregate, out *metricValue) {
85				out.kind = metricKindUint64
86				out.scalar = in.sysStats.gcCyclesDone
87			},
88		},
89		"/gc/heap/allocs-by-size:bytes": {
90			deps: makeStatDepSet(heapStatsDep),
91			compute: func(in *statAggregate, out *metricValue) {
92				hist := out.float64HistOrInit(sizeClassBuckets)
93				hist.counts[len(hist.counts)-1] = uint64(in.heapStats.largeAllocCount)
94				// Cut off the first index which is ostensibly for size class 0,
95				// but large objects are tracked separately so it's actually unused.
96				for i, count := range in.heapStats.smallAllocCount[1:] {
97					hist.counts[i] = uint64(count)
98				}
99			},
100		},
101		"/gc/heap/allocs:bytes": {
102			deps: makeStatDepSet(heapStatsDep),
103			compute: func(in *statAggregate, out *metricValue) {
104				out.kind = metricKindUint64
105				out.scalar = in.heapStats.totalAllocated
106			},
107		},
108		"/gc/heap/allocs:objects": {
109			deps: makeStatDepSet(heapStatsDep),
110			compute: func(in *statAggregate, out *metricValue) {
111				out.kind = metricKindUint64
112				out.scalar = in.heapStats.totalAllocs
113			},
114		},
115		"/gc/heap/frees-by-size:bytes": {
116			deps: makeStatDepSet(heapStatsDep),
117			compute: func(in *statAggregate, out *metricValue) {
118				hist := out.float64HistOrInit(sizeClassBuckets)
119				hist.counts[len(hist.counts)-1] = uint64(in.heapStats.largeFreeCount)
120				// Cut off the first index which is ostensibly for size class 0,
121				// but large objects are tracked separately so it's actually unused.
122				for i, count := range in.heapStats.smallFreeCount[1:] {
123					hist.counts[i] = uint64(count)
124				}
125			},
126		},
127		"/gc/heap/frees:bytes": {
128			deps: makeStatDepSet(heapStatsDep),
129			compute: func(in *statAggregate, out *metricValue) {
130				out.kind = metricKindUint64
131				out.scalar = in.heapStats.totalFreed
132			},
133		},
134		"/gc/heap/frees:objects": {
135			deps: makeStatDepSet(heapStatsDep),
136			compute: func(in *statAggregate, out *metricValue) {
137				out.kind = metricKindUint64
138				out.scalar = in.heapStats.totalFrees
139			},
140		},
141		"/gc/heap/goal:bytes": {
142			deps: makeStatDepSet(sysStatsDep),
143			compute: func(in *statAggregate, out *metricValue) {
144				out.kind = metricKindUint64
145				out.scalar = in.sysStats.heapGoal
146			},
147		},
148		"/gc/heap/objects:objects": {
149			deps: makeStatDepSet(heapStatsDep),
150			compute: func(in *statAggregate, out *metricValue) {
151				out.kind = metricKindUint64
152				out.scalar = in.heapStats.numObjects
153			},
154		},
155		"/gc/heap/tiny/allocs:objects": {
156			deps: makeStatDepSet(heapStatsDep),
157			compute: func(in *statAggregate, out *metricValue) {
158				out.kind = metricKindUint64
159				out.scalar = uint64(in.heapStats.tinyAllocCount)
160			},
161		},
162		"/gc/pauses:seconds": {
163			compute: func(_ *statAggregate, out *metricValue) {
164				hist := out.float64HistOrInit(timeHistBuckets)
165				// The bottom-most bucket, containing negative values, is tracked
166				// as a separately as underflow, so fill that in manually and then
167				// iterate over the rest.
168				hist.counts[0] = atomic.Load64(&memstats.gcPauseDist.underflow)
169				for i := range memstats.gcPauseDist.counts {
170					hist.counts[i+1] = atomic.Load64(&memstats.gcPauseDist.counts[i])
171				}
172			},
173		},
174		"/memory/classes/heap/free:bytes": {
175			deps: makeStatDepSet(heapStatsDep),
176			compute: func(in *statAggregate, out *metricValue) {
177				out.kind = metricKindUint64
178				out.scalar = uint64(in.heapStats.committed - in.heapStats.inHeap -
179					in.heapStats.inStacks - in.heapStats.inWorkBufs -
180					in.heapStats.inPtrScalarBits)
181			},
182		},
183		"/memory/classes/heap/objects:bytes": {
184			deps: makeStatDepSet(heapStatsDep),
185			compute: func(in *statAggregate, out *metricValue) {
186				out.kind = metricKindUint64
187				out.scalar = in.heapStats.inObjects
188			},
189		},
190		"/memory/classes/heap/released:bytes": {
191			deps: makeStatDepSet(heapStatsDep),
192			compute: func(in *statAggregate, out *metricValue) {
193				out.kind = metricKindUint64
194				out.scalar = uint64(in.heapStats.released)
195			},
196		},
197		"/memory/classes/heap/stacks:bytes": {
198			deps: makeStatDepSet(heapStatsDep),
199			compute: func(in *statAggregate, out *metricValue) {
200				out.kind = metricKindUint64
201				out.scalar = uint64(in.heapStats.inStacks)
202			},
203		},
204		"/memory/classes/heap/unused:bytes": {
205			deps: makeStatDepSet(heapStatsDep),
206			compute: func(in *statAggregate, out *metricValue) {
207				out.kind = metricKindUint64
208				out.scalar = uint64(in.heapStats.inHeap) - in.heapStats.inObjects
209			},
210		},
211		"/memory/classes/metadata/mcache/free:bytes": {
212			deps: makeStatDepSet(sysStatsDep),
213			compute: func(in *statAggregate, out *metricValue) {
214				out.kind = metricKindUint64
215				out.scalar = in.sysStats.mCacheSys - in.sysStats.mCacheInUse
216			},
217		},
218		"/memory/classes/metadata/mcache/inuse:bytes": {
219			deps: makeStatDepSet(sysStatsDep),
220			compute: func(in *statAggregate, out *metricValue) {
221				out.kind = metricKindUint64
222				out.scalar = in.sysStats.mCacheInUse
223			},
224		},
225		"/memory/classes/metadata/mspan/free:bytes": {
226			deps: makeStatDepSet(sysStatsDep),
227			compute: func(in *statAggregate, out *metricValue) {
228				out.kind = metricKindUint64
229				out.scalar = in.sysStats.mSpanSys - in.sysStats.mSpanInUse
230			},
231		},
232		"/memory/classes/metadata/mspan/inuse:bytes": {
233			deps: makeStatDepSet(sysStatsDep),
234			compute: func(in *statAggregate, out *metricValue) {
235				out.kind = metricKindUint64
236				out.scalar = in.sysStats.mSpanInUse
237			},
238		},
239		"/memory/classes/metadata/other:bytes": {
240			deps: makeStatDepSet(heapStatsDep, sysStatsDep),
241			compute: func(in *statAggregate, out *metricValue) {
242				out.kind = metricKindUint64
243				out.scalar = uint64(in.heapStats.inWorkBufs+in.heapStats.inPtrScalarBits) + in.sysStats.gcMiscSys
244			},
245		},
246		"/memory/classes/os-stacks:bytes": {
247			deps: makeStatDepSet(sysStatsDep),
248			compute: func(in *statAggregate, out *metricValue) {
249				out.kind = metricKindUint64
250				out.scalar = in.sysStats.stacksSys
251			},
252		},
253		"/memory/classes/other:bytes": {
254			deps: makeStatDepSet(sysStatsDep),
255			compute: func(in *statAggregate, out *metricValue) {
256				out.kind = metricKindUint64
257				out.scalar = in.sysStats.otherSys
258			},
259		},
260		"/memory/classes/profiling/buckets:bytes": {
261			deps: makeStatDepSet(sysStatsDep),
262			compute: func(in *statAggregate, out *metricValue) {
263				out.kind = metricKindUint64
264				out.scalar = in.sysStats.buckHashSys
265			},
266		},
267		"/memory/classes/total:bytes": {
268			deps: makeStatDepSet(heapStatsDep, sysStatsDep),
269			compute: func(in *statAggregate, out *metricValue) {
270				out.kind = metricKindUint64
271				out.scalar = uint64(in.heapStats.committed+in.heapStats.released) +
272					in.sysStats.stacksSys + in.sysStats.mSpanSys +
273					in.sysStats.mCacheSys + in.sysStats.buckHashSys +
274					in.sysStats.gcMiscSys + in.sysStats.otherSys
275			},
276		},
277		"/sched/goroutines:goroutines": {
278			compute: func(_ *statAggregate, out *metricValue) {
279				out.kind = metricKindUint64
280				out.scalar = uint64(gcount())
281			},
282		},
283		"/sched/latencies:seconds": {
284			compute: func(_ *statAggregate, out *metricValue) {
285				hist := out.float64HistOrInit(timeHistBuckets)
286				hist.counts[0] = atomic.Load64(&sched.timeToRun.underflow)
287				for i := range sched.timeToRun.counts {
288					hist.counts[i+1] = atomic.Load64(&sched.timeToRun.counts[i])
289				}
290			},
291		},
292	}
293	metricsInit = true
294}
295
296// statDep is a dependency on a group of statistics
297// that a metric might have.
298type statDep uint
299
300const (
301	heapStatsDep statDep = iota // corresponds to heapStatsAggregate
302	sysStatsDep                 // corresponds to sysStatsAggregate
303	numStatsDeps
304)
305
306// statDepSet represents a set of statDeps.
307//
308// Under the hood, it's a bitmap.
309type statDepSet [1]uint64
310
311// makeStatDepSet creates a new statDepSet from a list of statDeps.
312func makeStatDepSet(deps ...statDep) statDepSet {
313	var s statDepSet
314	for _, d := range deps {
315		s[d/64] |= 1 << (d % 64)
316	}
317	return s
318}
319
320// differennce returns set difference of s from b as a new set.
321func (s statDepSet) difference(b statDepSet) statDepSet {
322	var c statDepSet
323	for i := range s {
324		c[i] = s[i] &^ b[i]
325	}
326	return c
327}
328
329// union returns the union of the two sets as a new set.
330func (s statDepSet) union(b statDepSet) statDepSet {
331	var c statDepSet
332	for i := range s {
333		c[i] = s[i] | b[i]
334	}
335	return c
336}
337
338// empty returns true if there are no dependencies in the set.
339func (s *statDepSet) empty() bool {
340	for _, c := range s {
341		if c != 0 {
342			return false
343		}
344	}
345	return true
346}
347
348// has returns true if the set contains a given statDep.
349func (s *statDepSet) has(d statDep) bool {
350	return s[d/64]&(1<<(d%64)) != 0
351}
352
353// heapStatsAggregate represents memory stats obtained from the
354// runtime. This set of stats is grouped together because they
355// depend on each other in some way to make sense of the runtime's
356// current heap memory use. They're also sharded across Ps, so it
357// makes sense to grab them all at once.
358type heapStatsAggregate struct {
359	heapStatsDelta
360
361	// Derived from values in heapStatsDelta.
362
363	// inObjects is the bytes of memory occupied by objects,
364	inObjects uint64
365
366	// numObjects is the number of live objects in the heap.
367	numObjects uint64
368
369	// totalAllocated is the total bytes of heap objects allocated
370	// over the lifetime of the program.
371	totalAllocated uint64
372
373	// totalFreed is the total bytes of heap objects freed
374	// over the lifetime of the program.
375	totalFreed uint64
376
377	// totalAllocs is the number of heap objects allocated over
378	// the lifetime of the program.
379	totalAllocs uint64
380
381	// totalFrees is the number of heap objects freed over
382	// the lifetime of the program.
383	totalFrees uint64
384}
385
386// compute populates the heapStatsAggregate with values from the runtime.
387func (a *heapStatsAggregate) compute() {
388	memstats.heapStats.read(&a.heapStatsDelta)
389
390	// Calculate derived stats.
391	a.totalAllocs = uint64(a.largeAllocCount)
392	a.totalFrees = uint64(a.largeFreeCount)
393	a.totalAllocated = uint64(a.largeAlloc)
394	a.totalFreed = uint64(a.largeFree)
395	for i := range a.smallAllocCount {
396		na := uint64(a.smallAllocCount[i])
397		nf := uint64(a.smallFreeCount[i])
398		a.totalAllocs += na
399		a.totalFrees += nf
400		a.totalAllocated += na * uint64(class_to_size[i])
401		a.totalFreed += nf * uint64(class_to_size[i])
402	}
403	a.inObjects = a.totalAllocated - a.totalFreed
404	a.numObjects = a.totalAllocs - a.totalFrees
405}
406
407// sysStatsAggregate represents system memory stats obtained
408// from the runtime. This set of stats is grouped together because
409// they're all relatively cheap to acquire and generally independent
410// of one another and other runtime memory stats. The fact that they
411// may be acquired at different times, especially with respect to
412// heapStatsAggregate, means there could be some skew, but because of
413// these stats are independent, there's no real consistency issue here.
414type sysStatsAggregate struct {
415	stacksSys      uint64
416	mSpanSys       uint64
417	mSpanInUse     uint64
418	mCacheSys      uint64
419	mCacheInUse    uint64
420	buckHashSys    uint64
421	gcMiscSys      uint64
422	otherSys       uint64
423	heapGoal       uint64
424	gcCyclesDone   uint64
425	gcCyclesForced uint64
426}
427
428// compute populates the sysStatsAggregate with values from the runtime.
429func (a *sysStatsAggregate) compute() {
430	a.stacksSys = memstats.stacks_sys.load()
431	a.buckHashSys = memstats.buckhash_sys.load()
432	a.gcMiscSys = memstats.gcMiscSys.load()
433	a.otherSys = memstats.other_sys.load()
434	a.heapGoal = atomic.Load64(&gcController.heapGoal)
435	a.gcCyclesDone = uint64(memstats.numgc)
436	a.gcCyclesForced = uint64(memstats.numforcedgc)
437
438	systemstack(func() {
439		lock(&mheap_.lock)
440		a.mSpanSys = memstats.mspan_sys.load()
441		a.mSpanInUse = uint64(mheap_.spanalloc.inuse)
442		a.mCacheSys = memstats.mcache_sys.load()
443		a.mCacheInUse = uint64(mheap_.cachealloc.inuse)
444		unlock(&mheap_.lock)
445	})
446}
447
448// statAggregate is the main driver of the metrics implementation.
449//
450// It contains multiple aggregates of runtime statistics, as well
451// as a set of these aggregates that it has populated. The aggergates
452// are populated lazily by its ensure method.
453type statAggregate struct {
454	ensured   statDepSet
455	heapStats heapStatsAggregate
456	sysStats  sysStatsAggregate
457}
458
459// ensure populates statistics aggregates determined by deps if they
460// haven't yet been populated.
461func (a *statAggregate) ensure(deps *statDepSet) {
462	missing := deps.difference(a.ensured)
463	if missing.empty() {
464		return
465	}
466	for i := statDep(0); i < numStatsDeps; i++ {
467		if !missing.has(i) {
468			continue
469		}
470		switch i {
471		case heapStatsDep:
472			a.heapStats.compute()
473		case sysStatsDep:
474			a.sysStats.compute()
475		}
476	}
477	a.ensured = a.ensured.union(missing)
478}
479
480// metricValidKind is a runtime copy of runtime/metrics.ValueKind and
481// must be kept structurally identical to that type.
482type metricKind int
483
484const (
485	// These values must be kept identical to their corresponding Kind* values
486	// in the runtime/metrics package.
487	metricKindBad metricKind = iota
488	metricKindUint64
489	metricKindFloat64
490	metricKindFloat64Histogram
491)
492
493// metricSample is a runtime copy of runtime/metrics.Sample and
494// must be kept structurally identical to that type.
495type metricSample struct {
496	name  string
497	value metricValue
498}
499
500// metricValue is a runtime copy of runtime/metrics.Sample and
501// must be kept structurally identical to that type.
502type metricValue struct {
503	kind    metricKind
504	scalar  uint64         // contains scalar values for scalar Kinds.
505	pointer unsafe.Pointer // contains non-scalar values.
506}
507
508// float64HistOrInit tries to pull out an existing float64Histogram
509// from the value, but if none exists, then it allocates one with
510// the given buckets.
511func (v *metricValue) float64HistOrInit(buckets []float64) *metricFloat64Histogram {
512	var hist *metricFloat64Histogram
513	if v.kind == metricKindFloat64Histogram && v.pointer != nil {
514		hist = (*metricFloat64Histogram)(v.pointer)
515	} else {
516		v.kind = metricKindFloat64Histogram
517		hist = new(metricFloat64Histogram)
518		v.pointer = unsafe.Pointer(hist)
519	}
520	hist.buckets = buckets
521	if len(hist.counts) != len(hist.buckets)-1 {
522		hist.counts = make([]uint64, len(buckets)-1)
523	}
524	return hist
525}
526
527// metricFloat64Histogram is a runtime copy of runtime/metrics.Float64Histogram
528// and must be kept structurally identical to that type.
529type metricFloat64Histogram struct {
530	counts  []uint64
531	buckets []float64
532}
533
534// agg is used by readMetrics, and is protected by metricsSema.
535//
536// Managed as a global variable because its pointer will be
537// an argument to a dynamically-defined function, and we'd
538// like to avoid it escaping to the heap.
539var agg statAggregate
540
541// readMetrics is the implementation of runtime/metrics.Read.
542//
543//go:linkname readMetrics runtime/metrics.runtime_readMetrics
544func readMetrics(samplesp unsafe.Pointer, len int, cap int) {
545	// Construct a slice from the args.
546	sl := slice{samplesp, len, cap}
547	samples := *(*[]metricSample)(unsafe.Pointer(&sl))
548
549	// Acquire the metricsSema but with handoff. This operation
550	// is expensive enough that queueing up goroutines and handing
551	// off between them will be noticeably better-behaved.
552	semacquire1(&metricsSema, true, 0, 0)
553
554	// Ensure the map is initialized.
555	initMetrics()
556
557	// Clear agg defensively.
558	agg = statAggregate{}
559
560	// Sample.
561	for i := range samples {
562		sample := &samples[i]
563		data, ok := metrics[sample.name]
564		if !ok {
565			sample.value.kind = metricKindBad
566			continue
567		}
568		// Ensure we have all the stats we need.
569		// agg is populated lazily.
570		agg.ensure(&data.deps)
571
572		// Compute the value based on the stats we have.
573		data.compute(&agg, &sample.value)
574	}
575
576	semrelease(&metricsSema)
577}
578