1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Malloc profiling.
6// Patterned after tcmalloc's algorithms; shorter code.
7
8package runtime
9
10import (
11	"internal/abi"
12	"runtime/internal/atomic"
13	"unsafe"
14)
15
16// NOTE(rsc): Everything here could use cas if contention became an issue.
17var proflock mutex
18
19// All memory allocations are local and do not escape outside of the profiler.
20// The profiler is forbidden from referring to garbage-collected memory.
21
22const (
23	// profile types
24	memProfile bucketType = 1 + iota
25	blockProfile
26	mutexProfile
27
28	// size of bucket hash table
29	buckHashSize = 179999
30
31	// max depth of stack to record in bucket
32	maxStack = 32
33)
34
35type bucketType int
36
37// A bucket holds per-call-stack profiling information.
38// The representation is a bit sleazy, inherited from C.
39// This struct defines the bucket header. It is followed in
40// memory by the stack words and then the actual record
41// data, either a memRecord or a blockRecord.
42//
43// Per-call-stack profiling information.
44// Lookup by hashing call stack into a linked-list hash table.
45//
46// No heap pointers.
47//
48//go:notinheap
49type bucket struct {
50	next    *bucket
51	allnext *bucket
52	typ     bucketType // memBucket or blockBucket (includes mutexProfile)
53	hash    uintptr
54	size    uintptr
55	nstk    uintptr
56}
57
58// A memRecord is the bucket data for a bucket of type memProfile,
59// part of the memory profile.
60type memRecord struct {
61	// The following complex 3-stage scheme of stats accumulation
62	// is required to obtain a consistent picture of mallocs and frees
63	// for some point in time.
64	// The problem is that mallocs come in real time, while frees
65	// come only after a GC during concurrent sweeping. So if we would
66	// naively count them, we would get a skew toward mallocs.
67	//
68	// Hence, we delay information to get consistent snapshots as
69	// of mark termination. Allocations count toward the next mark
70	// termination's snapshot, while sweep frees count toward the
71	// previous mark termination's snapshot:
72	//
73	//              MT          MT          MT          MT
74	//             .·|         .·|         .·|         .·|
75	//          .·˙  |      .·˙  |      .·˙  |      .·˙  |
76	//       .·˙     |   .·˙     |   .·˙     |   .·˙     |
77	//    .·˙        |.·˙        |.·˙        |.·˙        |
78	//
79	//       alloc → ▲ ← free
80	//               ┠┅┅┅┅┅┅┅┅┅┅┅P
81	//       C+2     →    C+1    →  C
82	//
83	//                   alloc → ▲ ← free
84	//                           ┠┅┅┅┅┅┅┅┅┅┅┅P
85	//                   C+2     →    C+1    →  C
86	//
87	// Since we can't publish a consistent snapshot until all of
88	// the sweep frees are accounted for, we wait until the next
89	// mark termination ("MT" above) to publish the previous mark
90	// termination's snapshot ("P" above). To do this, allocation
91	// and free events are accounted to *future* heap profile
92	// cycles ("C+n" above) and we only publish a cycle once all
93	// of the events from that cycle must be done. Specifically:
94	//
95	// Mallocs are accounted to cycle C+2.
96	// Explicit frees are accounted to cycle C+2.
97	// GC frees (done during sweeping) are accounted to cycle C+1.
98	//
99	// After mark termination, we increment the global heap
100	// profile cycle counter and accumulate the stats from cycle C
101	// into the active profile.
102
103	// active is the currently published profile. A profiling
104	// cycle can be accumulated into active once its complete.
105	active memRecordCycle
106
107	// future records the profile events we're counting for cycles
108	// that have not yet been published. This is ring buffer
109	// indexed by the global heap profile cycle C and stores
110	// cycles C, C+1, and C+2. Unlike active, these counts are
111	// only for a single cycle; they are not cumulative across
112	// cycles.
113	//
114	// We store cycle C here because there's a window between when
115	// C becomes the active cycle and when we've flushed it to
116	// active.
117	future [3]memRecordCycle
118}
119
120// memRecordCycle
121type memRecordCycle struct {
122	allocs, frees           uintptr
123	alloc_bytes, free_bytes uintptr
124}
125
126// add accumulates b into a. It does not zero b.
127func (a *memRecordCycle) add(b *memRecordCycle) {
128	a.allocs += b.allocs
129	a.frees += b.frees
130	a.alloc_bytes += b.alloc_bytes
131	a.free_bytes += b.free_bytes
132}
133
134// A blockRecord is the bucket data for a bucket of type blockProfile,
135// which is used in blocking and mutex profiles.
136type blockRecord struct {
137	count  float64
138	cycles int64
139}
140
141var (
142	mbuckets  *bucket // memory profile buckets
143	bbuckets  *bucket // blocking profile buckets
144	xbuckets  *bucket // mutex profile buckets
145	buckhash  *[buckHashSize]*bucket
146	bucketmem uintptr
147
148	mProf struct {
149		// All fields in mProf are protected by proflock.
150
151		// cycle is the global heap profile cycle. This wraps
152		// at mProfCycleWrap.
153		cycle uint32
154		// flushed indicates that future[cycle] in all buckets
155		// has been flushed to the active profile.
156		flushed bool
157	}
158)
159
160const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24)
161
162// newBucket allocates a bucket with the given type and number of stack entries.
163func newBucket(typ bucketType, nstk int) *bucket {
164	size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(uintptr(0))
165	switch typ {
166	default:
167		throw("invalid profile bucket type")
168	case memProfile:
169		size += unsafe.Sizeof(memRecord{})
170	case blockProfile, mutexProfile:
171		size += unsafe.Sizeof(blockRecord{})
172	}
173
174	b := (*bucket)(persistentalloc(size, 0, &memstats.buckhash_sys))
175	bucketmem += size
176	b.typ = typ
177	b.nstk = uintptr(nstk)
178	return b
179}
180
181// stk returns the slice in b holding the stack.
182func (b *bucket) stk() []uintptr {
183	stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b)))
184	return stk[:b.nstk:b.nstk]
185}
186
187// mp returns the memRecord associated with the memProfile bucket b.
188func (b *bucket) mp() *memRecord {
189	if b.typ != memProfile {
190		throw("bad use of bucket.mp")
191	}
192	data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0)))
193	return (*memRecord)(data)
194}
195
196// bp returns the blockRecord associated with the blockProfile bucket b.
197func (b *bucket) bp() *blockRecord {
198	if b.typ != blockProfile && b.typ != mutexProfile {
199		throw("bad use of bucket.bp")
200	}
201	data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0)))
202	return (*blockRecord)(data)
203}
204
205// Return the bucket for stk[0:nstk], allocating new bucket if needed.
206func stkbucket(typ bucketType, size uintptr, stk []uintptr, alloc bool) *bucket {
207	if buckhash == nil {
208		buckhash = (*[buckHashSize]*bucket)(sysAlloc(unsafe.Sizeof(*buckhash), &memstats.buckhash_sys))
209		if buckhash == nil {
210			throw("runtime: cannot allocate memory")
211		}
212	}
213
214	// Hash stack.
215	var h uintptr
216	for _, pc := range stk {
217		h += pc
218		h += h << 10
219		h ^= h >> 6
220	}
221	// hash in size
222	h += size
223	h += h << 10
224	h ^= h >> 6
225	// finalize
226	h += h << 3
227	h ^= h >> 11
228
229	i := int(h % buckHashSize)
230	for b := buckhash[i]; b != nil; b = b.next {
231		if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) {
232			return b
233		}
234	}
235
236	if !alloc {
237		return nil
238	}
239
240	// Create new bucket.
241	b := newBucket(typ, len(stk))
242	copy(b.stk(), stk)
243	b.hash = h
244	b.size = size
245	b.next = buckhash[i]
246	buckhash[i] = b
247	if typ == memProfile {
248		b.allnext = mbuckets
249		mbuckets = b
250	} else if typ == mutexProfile {
251		b.allnext = xbuckets
252		xbuckets = b
253	} else {
254		b.allnext = bbuckets
255		bbuckets = b
256	}
257	return b
258}
259
260func eqslice(x, y []uintptr) bool {
261	if len(x) != len(y) {
262		return false
263	}
264	for i, xi := range x {
265		if xi != y[i] {
266			return false
267		}
268	}
269	return true
270}
271
272// mProf_NextCycle publishes the next heap profile cycle and creates a
273// fresh heap profile cycle. This operation is fast and can be done
274// during STW. The caller must call mProf_Flush before calling
275// mProf_NextCycle again.
276//
277// This is called by mark termination during STW so allocations and
278// frees after the world is started again count towards a new heap
279// profiling cycle.
280func mProf_NextCycle() {
281	lock(&proflock)
282	// We explicitly wrap mProf.cycle rather than depending on
283	// uint wraparound because the memRecord.future ring does not
284	// itself wrap at a power of two.
285	mProf.cycle = (mProf.cycle + 1) % mProfCycleWrap
286	mProf.flushed = false
287	unlock(&proflock)
288}
289
290// mProf_Flush flushes the events from the current heap profiling
291// cycle into the active profile. After this it is safe to start a new
292// heap profiling cycle with mProf_NextCycle.
293//
294// This is called by GC after mark termination starts the world. In
295// contrast with mProf_NextCycle, this is somewhat expensive, but safe
296// to do concurrently.
297func mProf_Flush() {
298	lock(&proflock)
299	if !mProf.flushed {
300		mProf_FlushLocked()
301		mProf.flushed = true
302	}
303	unlock(&proflock)
304}
305
306func mProf_FlushLocked() {
307	c := mProf.cycle
308	for b := mbuckets; b != nil; b = b.allnext {
309		mp := b.mp()
310
311		// Flush cycle C into the published profile and clear
312		// it for reuse.
313		mpc := &mp.future[c%uint32(len(mp.future))]
314		mp.active.add(mpc)
315		*mpc = memRecordCycle{}
316	}
317}
318
319// mProf_PostSweep records that all sweep frees for this GC cycle have
320// completed. This has the effect of publishing the heap profile
321// snapshot as of the last mark termination without advancing the heap
322// profile cycle.
323func mProf_PostSweep() {
324	lock(&proflock)
325	// Flush cycle C+1 to the active profile so everything as of
326	// the last mark termination becomes visible. *Don't* advance
327	// the cycle, since we're still accumulating allocs in cycle
328	// C+2, which have to become C+1 in the next mark termination
329	// and so on.
330	c := mProf.cycle
331	for b := mbuckets; b != nil; b = b.allnext {
332		mp := b.mp()
333		mpc := &mp.future[(c+1)%uint32(len(mp.future))]
334		mp.active.add(mpc)
335		*mpc = memRecordCycle{}
336	}
337	unlock(&proflock)
338}
339
340// Called by malloc to record a profiled block.
341func mProf_Malloc(p unsafe.Pointer, size uintptr) {
342	var stk [maxStack]uintptr
343	nstk := callers(4, stk[:])
344	lock(&proflock)
345	b := stkbucket(memProfile, size, stk[:nstk], true)
346	c := mProf.cycle
347	mp := b.mp()
348	mpc := &mp.future[(c+2)%uint32(len(mp.future))]
349	mpc.allocs++
350	mpc.alloc_bytes += size
351	unlock(&proflock)
352
353	// Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
354	// This reduces potential contention and chances of deadlocks.
355	// Since the object must be alive during call to mProf_Malloc,
356	// it's fine to do this non-atomically.
357	systemstack(func() {
358		setprofilebucket(p, b)
359	})
360}
361
362// Called when freeing a profiled block.
363func mProf_Free(b *bucket, size uintptr) {
364	lock(&proflock)
365	c := mProf.cycle
366	mp := b.mp()
367	mpc := &mp.future[(c+1)%uint32(len(mp.future))]
368	mpc.frees++
369	mpc.free_bytes += size
370	unlock(&proflock)
371}
372
373var blockprofilerate uint64 // in CPU ticks
374
375// SetBlockProfileRate controls the fraction of goroutine blocking events
376// that are reported in the blocking profile. The profiler aims to sample
377// an average of one blocking event per rate nanoseconds spent blocked.
378//
379// To include every blocking event in the profile, pass rate = 1.
380// To turn off profiling entirely, pass rate <= 0.
381func SetBlockProfileRate(rate int) {
382	var r int64
383	if rate <= 0 {
384		r = 0 // disable profiling
385	} else if rate == 1 {
386		r = 1 // profile everything
387	} else {
388		// convert ns to cycles, use float64 to prevent overflow during multiplication
389		r = int64(float64(rate) * float64(tickspersecond()) / (1000 * 1000 * 1000))
390		if r == 0 {
391			r = 1
392		}
393	}
394
395	atomic.Store64(&blockprofilerate, uint64(r))
396}
397
398func blockevent(cycles int64, skip int) {
399	if cycles <= 0 {
400		cycles = 1
401	}
402
403	rate := int64(atomic.Load64(&blockprofilerate))
404	if blocksampled(cycles, rate) {
405		saveblockevent(cycles, rate, skip+1, blockProfile)
406	}
407}
408
409// blocksampled returns true for all events where cycles >= rate. Shorter
410// events have a cycles/rate random chance of returning true.
411func blocksampled(cycles, rate int64) bool {
412	if rate <= 0 || (rate > cycles && int64(fastrand())%rate > cycles) {
413		return false
414	}
415	return true
416}
417
418func saveblockevent(cycles, rate int64, skip int, which bucketType) {
419	gp := getg()
420	var nstk int
421	var stk [maxStack]uintptr
422	if gp.m.curg == nil || gp.m.curg == gp {
423		nstk = callers(skip, stk[:])
424	} else {
425		nstk = gcallers(gp.m.curg, skip, stk[:])
426	}
427	lock(&proflock)
428	b := stkbucket(which, 0, stk[:nstk], true)
429
430	if which == blockProfile && cycles < rate {
431		// Remove sampling bias, see discussion on http://golang.org/cl/299991.
432		b.bp().count += float64(rate) / float64(cycles)
433		b.bp().cycles += rate
434	} else {
435		b.bp().count++
436		b.bp().cycles += cycles
437	}
438	unlock(&proflock)
439}
440
441var mutexprofilerate uint64 // fraction sampled
442
443// SetMutexProfileFraction controls the fraction of mutex contention events
444// that are reported in the mutex profile. On average 1/rate events are
445// reported. The previous rate is returned.
446//
447// To turn off profiling entirely, pass rate 0.
448// To just read the current rate, pass rate < 0.
449// (For n>1 the details of sampling may change.)
450func SetMutexProfileFraction(rate int) int {
451	if rate < 0 {
452		return int(mutexprofilerate)
453	}
454	old := mutexprofilerate
455	atomic.Store64(&mutexprofilerate, uint64(rate))
456	return int(old)
457}
458
459//go:linkname mutexevent sync.event
460func mutexevent(cycles int64, skip int) {
461	if cycles < 0 {
462		cycles = 0
463	}
464	rate := int64(atomic.Load64(&mutexprofilerate))
465	// TODO(pjw): measure impact of always calling fastrand vs using something
466	// like malloc.go:nextSample()
467	if rate > 0 && int64(fastrand())%rate == 0 {
468		saveblockevent(cycles, rate, skip+1, mutexProfile)
469	}
470}
471
472// Go interface to profile data.
473
474// A StackRecord describes a single execution stack.
475type StackRecord struct {
476	Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry
477}
478
479// Stack returns the stack trace associated with the record,
480// a prefix of r.Stack0.
481func (r *StackRecord) Stack() []uintptr {
482	for i, v := range r.Stack0 {
483		if v == 0 {
484			return r.Stack0[0:i]
485		}
486	}
487	return r.Stack0[0:]
488}
489
490// MemProfileRate controls the fraction of memory allocations
491// that are recorded and reported in the memory profile.
492// The profiler aims to sample an average of
493// one allocation per MemProfileRate bytes allocated.
494//
495// To include every allocated block in the profile, set MemProfileRate to 1.
496// To turn off profiling entirely, set MemProfileRate to 0.
497//
498// The tools that process the memory profiles assume that the
499// profile rate is constant across the lifetime of the program
500// and equal to the current value. Programs that change the
501// memory profiling rate should do so just once, as early as
502// possible in the execution of the program (for example,
503// at the beginning of main).
504var MemProfileRate int = defaultMemProfileRate(512 * 1024)
505
506// defaultMemProfileRate returns 0 if disableMemoryProfiling is set.
507// It exists primarily for the godoc rendering of MemProfileRate
508// above.
509func defaultMemProfileRate(v int) int {
510	if disableMemoryProfiling {
511		return 0
512	}
513	return v
514}
515
516// disableMemoryProfiling is set by the linker if runtime.MemProfile
517// is not used and the link type guarantees nobody else could use it
518// elsewhere.
519var disableMemoryProfiling bool
520
521// A MemProfileRecord describes the live objects allocated
522// by a particular call sequence (stack trace).
523type MemProfileRecord struct {
524	AllocBytes, FreeBytes     int64       // number of bytes allocated, freed
525	AllocObjects, FreeObjects int64       // number of objects allocated, freed
526	Stack0                    [32]uintptr // stack trace for this record; ends at first 0 entry
527}
528
529// InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes).
530func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes }
531
532// InUseObjects returns the number of objects in use (AllocObjects - FreeObjects).
533func (r *MemProfileRecord) InUseObjects() int64 {
534	return r.AllocObjects - r.FreeObjects
535}
536
537// Stack returns the stack trace associated with the record,
538// a prefix of r.Stack0.
539func (r *MemProfileRecord) Stack() []uintptr {
540	for i, v := range r.Stack0 {
541		if v == 0 {
542			return r.Stack0[0:i]
543		}
544	}
545	return r.Stack0[0:]
546}
547
548// MemProfile returns a profile of memory allocated and freed per allocation
549// site.
550//
551// MemProfile returns n, the number of records in the current memory profile.
552// If len(p) >= n, MemProfile copies the profile into p and returns n, true.
553// If len(p) < n, MemProfile does not change p and returns n, false.
554//
555// If inuseZero is true, the profile includes allocation records
556// where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.
557// These are sites where memory was allocated, but it has all
558// been released back to the runtime.
559//
560// The returned profile may be up to two garbage collection cycles old.
561// This is to avoid skewing the profile toward allocations; because
562// allocations happen in real time but frees are delayed until the garbage
563// collector performs sweeping, the profile only accounts for allocations
564// that have had a chance to be freed by the garbage collector.
565//
566// Most clients should use the runtime/pprof package or
567// the testing package's -test.memprofile flag instead
568// of calling MemProfile directly.
569func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
570	lock(&proflock)
571	// If we're between mProf_NextCycle and mProf_Flush, take care
572	// of flushing to the active profile so we only have to look
573	// at the active profile below.
574	mProf_FlushLocked()
575	clear := true
576	for b := mbuckets; b != nil; b = b.allnext {
577		mp := b.mp()
578		if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
579			n++
580		}
581		if mp.active.allocs != 0 || mp.active.frees != 0 {
582			clear = false
583		}
584	}
585	if clear {
586		// Absolutely no data, suggesting that a garbage collection
587		// has not yet happened. In order to allow profiling when
588		// garbage collection is disabled from the beginning of execution,
589		// accumulate all of the cycles, and recount buckets.
590		n = 0
591		for b := mbuckets; b != nil; b = b.allnext {
592			mp := b.mp()
593			for c := range mp.future {
594				mp.active.add(&mp.future[c])
595				mp.future[c] = memRecordCycle{}
596			}
597			if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
598				n++
599			}
600		}
601	}
602	if n <= len(p) {
603		ok = true
604		idx := 0
605		for b := mbuckets; b != nil; b = b.allnext {
606			mp := b.mp()
607			if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
608				record(&p[idx], b)
609				idx++
610			}
611		}
612	}
613	unlock(&proflock)
614	return
615}
616
617// Write b's data to r.
618func record(r *MemProfileRecord, b *bucket) {
619	mp := b.mp()
620	r.AllocBytes = int64(mp.active.alloc_bytes)
621	r.FreeBytes = int64(mp.active.free_bytes)
622	r.AllocObjects = int64(mp.active.allocs)
623	r.FreeObjects = int64(mp.active.frees)
624	if raceenabled {
625		racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(MemProfile))
626	}
627	if msanenabled {
628		msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
629	}
630	if asanenabled {
631		asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
632	}
633	copy(r.Stack0[:], b.stk())
634	for i := int(b.nstk); i < len(r.Stack0); i++ {
635		r.Stack0[i] = 0
636	}
637}
638
639func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) {
640	lock(&proflock)
641	for b := mbuckets; b != nil; b = b.allnext {
642		mp := b.mp()
643		fn(b, b.nstk, &b.stk()[0], b.size, mp.active.allocs, mp.active.frees)
644	}
645	unlock(&proflock)
646}
647
648// BlockProfileRecord describes blocking events originated
649// at a particular call sequence (stack trace).
650type BlockProfileRecord struct {
651	Count  int64
652	Cycles int64
653	StackRecord
654}
655
656// BlockProfile returns n, the number of records in the current blocking profile.
657// If len(p) >= n, BlockProfile copies the profile into p and returns n, true.
658// If len(p) < n, BlockProfile does not change p and returns n, false.
659//
660// Most clients should use the runtime/pprof package or
661// the testing package's -test.blockprofile flag instead
662// of calling BlockProfile directly.
663func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
664	lock(&proflock)
665	for b := bbuckets; b != nil; b = b.allnext {
666		n++
667	}
668	if n <= len(p) {
669		ok = true
670		for b := bbuckets; b != nil; b = b.allnext {
671			bp := b.bp()
672			r := &p[0]
673			r.Count = int64(bp.count)
674			// Prevent callers from having to worry about division by zero errors.
675			// See discussion on http://golang.org/cl/299991.
676			if r.Count == 0 {
677				r.Count = 1
678			}
679			r.Cycles = bp.cycles
680			if raceenabled {
681				racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(BlockProfile))
682			}
683			if msanenabled {
684				msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
685			}
686			if asanenabled {
687				asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
688			}
689			i := copy(r.Stack0[:], b.stk())
690			for ; i < len(r.Stack0); i++ {
691				r.Stack0[i] = 0
692			}
693			p = p[1:]
694		}
695	}
696	unlock(&proflock)
697	return
698}
699
700// MutexProfile returns n, the number of records in the current mutex profile.
701// If len(p) >= n, MutexProfile copies the profile into p and returns n, true.
702// Otherwise, MutexProfile does not change p, and returns n, false.
703//
704// Most clients should use the runtime/pprof package
705// instead of calling MutexProfile directly.
706func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
707	lock(&proflock)
708	for b := xbuckets; b != nil; b = b.allnext {
709		n++
710	}
711	if n <= len(p) {
712		ok = true
713		for b := xbuckets; b != nil; b = b.allnext {
714			bp := b.bp()
715			r := &p[0]
716			r.Count = int64(bp.count)
717			r.Cycles = bp.cycles
718			i := copy(r.Stack0[:], b.stk())
719			for ; i < len(r.Stack0); i++ {
720				r.Stack0[i] = 0
721			}
722			p = p[1:]
723		}
724	}
725	unlock(&proflock)
726	return
727}
728
729// ThreadCreateProfile returns n, the number of records in the thread creation profile.
730// If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
731// If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
732//
733// Most clients should use the runtime/pprof package instead
734// of calling ThreadCreateProfile directly.
735func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {
736	first := (*m)(atomic.Loadp(unsafe.Pointer(&allm)))
737	for mp := first; mp != nil; mp = mp.alllink {
738		n++
739	}
740	if n <= len(p) {
741		ok = true
742		i := 0
743		for mp := first; mp != nil; mp = mp.alllink {
744			p[i].Stack0 = mp.createstack
745			i++
746		}
747	}
748	return
749}
750
751//go:linkname runtime_goroutineProfileWithLabels runtime/pprof.runtime_goroutineProfileWithLabels
752func runtime_goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
753	return goroutineProfileWithLabels(p, labels)
754}
755
756// labels may be nil. If labels is non-nil, it must have the same length as p.
757func goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
758	if labels != nil && len(labels) != len(p) {
759		labels = nil
760	}
761	gp := getg()
762
763	isOK := func(gp1 *g) bool {
764		// Checking isSystemGoroutine here makes GoroutineProfile
765		// consistent with both NumGoroutine and Stack.
766		return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1, false)
767	}
768
769	stopTheWorld("profile")
770
771	// World is stopped, no locking required.
772	n = 1
773	forEachGRace(func(gp1 *g) {
774		if isOK(gp1) {
775			n++
776		}
777	})
778
779	if n <= len(p) {
780		ok = true
781		r, lbl := p, labels
782
783		// Save current goroutine.
784		sp := getcallersp()
785		pc := getcallerpc()
786		systemstack(func() {
787			saveg(pc, sp, gp, &r[0])
788		})
789		r = r[1:]
790
791		// If we have a place to put our goroutine labelmap, insert it there.
792		if labels != nil {
793			lbl[0] = gp.labels
794			lbl = lbl[1:]
795		}
796
797		// Save other goroutines.
798		forEachGRace(func(gp1 *g) {
799			if !isOK(gp1) {
800				return
801			}
802
803			if len(r) == 0 {
804				// Should be impossible, but better to return a
805				// truncated profile than to crash the entire process.
806				return
807			}
808			// saveg calls gentraceback, which may call cgo traceback functions.
809			// The world is stopped, so it cannot use cgocall (which will be
810			// blocked at exitsyscall). Do it on the system stack so it won't
811			// call into the schedular (see traceback.go:cgoContextPCs).
812			systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &r[0]) })
813			if labels != nil {
814				lbl[0] = gp1.labels
815				lbl = lbl[1:]
816			}
817			r = r[1:]
818		})
819	}
820
821	startTheWorld()
822	return n, ok
823}
824
825// GoroutineProfile returns n, the number of records in the active goroutine stack profile.
826// If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
827// If len(p) < n, GoroutineProfile does not change p and returns n, false.
828//
829// Most clients should use the runtime/pprof package instead
830// of calling GoroutineProfile directly.
831func GoroutineProfile(p []StackRecord) (n int, ok bool) {
832
833	return goroutineProfileWithLabels(p, nil)
834}
835
836func saveg(pc, sp uintptr, gp *g, r *StackRecord) {
837	n := gentraceback(pc, sp, 0, gp, 0, &r.Stack0[0], len(r.Stack0), nil, nil, 0)
838	if n < len(r.Stack0) {
839		r.Stack0[n] = 0
840	}
841}
842
843// Stack formats a stack trace of the calling goroutine into buf
844// and returns the number of bytes written to buf.
845// If all is true, Stack formats stack traces of all other goroutines
846// into buf after the trace for the current goroutine.
847func Stack(buf []byte, all bool) int {
848	if all {
849		stopTheWorld("stack trace")
850	}
851
852	n := 0
853	if len(buf) > 0 {
854		gp := getg()
855		sp := getcallersp()
856		pc := getcallerpc()
857		systemstack(func() {
858			g0 := getg()
859			// Force traceback=1 to override GOTRACEBACK setting,
860			// so that Stack's results are consistent.
861			// GOTRACEBACK is only about crash dumps.
862			g0.m.traceback = 1
863			g0.writebuf = buf[0:0:len(buf)]
864			goroutineheader(gp)
865			traceback(pc, sp, 0, gp)
866			if all {
867				tracebackothers(gp)
868			}
869			g0.m.traceback = 0
870			n = len(g0.writebuf)
871			g0.writebuf = nil
872		})
873	}
874
875	if all {
876		startTheWorld()
877	}
878	return n
879}
880
881// Tracing of alloc/free/gc.
882
883var tracelock mutex
884
885func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) {
886	lock(&tracelock)
887	gp := getg()
888	gp.m.traceback = 2
889	if typ == nil {
890		print("tracealloc(", p, ", ", hex(size), ")\n")
891	} else {
892		print("tracealloc(", p, ", ", hex(size), ", ", typ.string(), ")\n")
893	}
894	if gp.m.curg == nil || gp == gp.m.curg {
895		goroutineheader(gp)
896		pc := getcallerpc()
897		sp := getcallersp()
898		systemstack(func() {
899			traceback(pc, sp, 0, gp)
900		})
901	} else {
902		goroutineheader(gp.m.curg)
903		traceback(^uintptr(0), ^uintptr(0), 0, gp.m.curg)
904	}
905	print("\n")
906	gp.m.traceback = 0
907	unlock(&tracelock)
908}
909
910func tracefree(p unsafe.Pointer, size uintptr) {
911	lock(&tracelock)
912	gp := getg()
913	gp.m.traceback = 2
914	print("tracefree(", p, ", ", hex(size), ")\n")
915	goroutineheader(gp)
916	pc := getcallerpc()
917	sp := getcallersp()
918	systemstack(func() {
919		traceback(pc, sp, 0, gp)
920	})
921	print("\n")
922	gp.m.traceback = 0
923	unlock(&tracelock)
924}
925
926func tracegc() {
927	lock(&tracelock)
928	gp := getg()
929	gp.m.traceback = 2
930	print("tracegc()\n")
931	// running on m->g0 stack; show all non-g0 goroutines
932	tracebackothers(gp)
933	print("end tracegc\n")
934	print("\n")
935	gp.m.traceback = 0
936	unlock(&tracelock)
937}
938