1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package profile provides a representation of
6// github.com/google/pprof/proto/profile.proto and
7// methods to encode/decode/merge profiles in this format.
8package profile
9
10import (
11	"bytes"
12	"compress/gzip"
13	"fmt"
14	"io"
15	"regexp"
16	"strings"
17	"time"
18)
19
20// Profile is an in-memory representation of profile.proto.
21type Profile struct {
22	SampleType        []*ValueType
23	DefaultSampleType string
24	Sample            []*Sample
25	Mapping           []*Mapping
26	Location          []*Location
27	Function          []*Function
28	Comments          []string
29
30	DropFrames string
31	KeepFrames string
32
33	TimeNanos     int64
34	DurationNanos int64
35	PeriodType    *ValueType
36	Period        int64
37
38	commentX           []int64
39	dropFramesX        int64
40	keepFramesX        int64
41	stringTable        []string
42	defaultSampleTypeX int64
43}
44
45// ValueType corresponds to Profile.ValueType
46type ValueType struct {
47	Type string // cpu, wall, inuse_space, etc
48	Unit string // seconds, nanoseconds, bytes, etc
49
50	typeX int64
51	unitX int64
52}
53
54// Sample corresponds to Profile.Sample
55type Sample struct {
56	Location []*Location
57	Value    []int64
58	Label    map[string][]string
59	NumLabel map[string][]int64
60	NumUnit  map[string][]string
61
62	locationIDX []uint64
63	labelX      []Label
64}
65
66// Label corresponds to Profile.Label
67type Label struct {
68	keyX int64
69	// Exactly one of the two following values must be set
70	strX int64
71	numX int64 // Integer value for this label
72}
73
74// Mapping corresponds to Profile.Mapping
75type Mapping struct {
76	ID              uint64
77	Start           uint64
78	Limit           uint64
79	Offset          uint64
80	File            string
81	BuildID         string
82	HasFunctions    bool
83	HasFilenames    bool
84	HasLineNumbers  bool
85	HasInlineFrames bool
86
87	fileX    int64
88	buildIDX int64
89}
90
91// Location corresponds to Profile.Location
92type Location struct {
93	ID       uint64
94	Mapping  *Mapping
95	Address  uint64
96	Line     []Line
97	IsFolded bool
98
99	mappingIDX uint64
100}
101
102// Line corresponds to Profile.Line
103type Line struct {
104	Function *Function
105	Line     int64
106
107	functionIDX uint64
108}
109
110// Function corresponds to Profile.Function
111type Function struct {
112	ID         uint64
113	Name       string
114	SystemName string
115	Filename   string
116	StartLine  int64
117
118	nameX       int64
119	systemNameX int64
120	filenameX   int64
121}
122
123// Parse parses a profile and checks for its validity. The input
124// may be a gzip-compressed encoded protobuf or one of many legacy
125// profile formats which may be unsupported in the future.
126func Parse(r io.Reader) (*Profile, error) {
127	orig, err := io.ReadAll(r)
128	if err != nil {
129		return nil, err
130	}
131
132	var p *Profile
133	if len(orig) >= 2 && orig[0] == 0x1f && orig[1] == 0x8b {
134		gz, err := gzip.NewReader(bytes.NewBuffer(orig))
135		if err != nil {
136			return nil, fmt.Errorf("decompressing profile: %v", err)
137		}
138		data, err := io.ReadAll(gz)
139		if err != nil {
140			return nil, fmt.Errorf("decompressing profile: %v", err)
141		}
142		orig = data
143	}
144	if p, err = parseUncompressed(orig); err != nil {
145		if p, err = parseLegacy(orig); err != nil {
146			return nil, fmt.Errorf("parsing profile: %v", err)
147		}
148	}
149
150	if err := p.CheckValid(); err != nil {
151		return nil, fmt.Errorf("malformed profile: %v", err)
152	}
153	return p, nil
154}
155
156var errUnrecognized = fmt.Errorf("unrecognized profile format")
157var errMalformed = fmt.Errorf("malformed profile format")
158
159func parseLegacy(data []byte) (*Profile, error) {
160	parsers := []func([]byte) (*Profile, error){
161		parseCPU,
162		parseHeap,
163		parseGoCount, // goroutine, threadcreate
164		parseThread,
165		parseContention,
166	}
167
168	for _, parser := range parsers {
169		p, err := parser(data)
170		if err == nil {
171			p.setMain()
172			p.addLegacyFrameInfo()
173			return p, nil
174		}
175		if err != errUnrecognized {
176			return nil, err
177		}
178	}
179	return nil, errUnrecognized
180}
181
182func parseUncompressed(data []byte) (*Profile, error) {
183	p := &Profile{}
184	if err := unmarshal(data, p); err != nil {
185		return nil, err
186	}
187
188	if err := p.postDecode(); err != nil {
189		return nil, err
190	}
191
192	return p, nil
193}
194
195var libRx = regexp.MustCompile(`([.]so$|[.]so[._][0-9]+)`)
196
197// setMain scans Mapping entries and guesses which entry is main
198// because legacy profiles don't obey the convention of putting main
199// first.
200func (p *Profile) setMain() {
201	for i := 0; i < len(p.Mapping); i++ {
202		file := strings.TrimSpace(strings.ReplaceAll(p.Mapping[i].File, "(deleted)", ""))
203		if len(file) == 0 {
204			continue
205		}
206		if len(libRx.FindStringSubmatch(file)) > 0 {
207			continue
208		}
209		if strings.HasPrefix(file, "[") {
210			continue
211		}
212		// Swap what we guess is main to position 0.
213		p.Mapping[i], p.Mapping[0] = p.Mapping[0], p.Mapping[i]
214		break
215	}
216}
217
218// Write writes the profile as a gzip-compressed marshaled protobuf.
219func (p *Profile) Write(w io.Writer) error {
220	p.preEncode()
221	b := marshal(p)
222	zw := gzip.NewWriter(w)
223	defer zw.Close()
224	_, err := zw.Write(b)
225	return err
226}
227
228// CheckValid tests whether the profile is valid. Checks include, but are
229// not limited to:
230//   - len(Profile.Sample[n].value) == len(Profile.value_unit)
231//   - Sample.id has a corresponding Profile.Location
232func (p *Profile) CheckValid() error {
233	// Check that sample values are consistent
234	sampleLen := len(p.SampleType)
235	if sampleLen == 0 && len(p.Sample) != 0 {
236		return fmt.Errorf("missing sample type information")
237	}
238	for _, s := range p.Sample {
239		if len(s.Value) != sampleLen {
240			return fmt.Errorf("mismatch: sample has: %d values vs. %d types", len(s.Value), len(p.SampleType))
241		}
242	}
243
244	// Check that all mappings/locations/functions are in the tables
245	// Check that there are no duplicate ids
246	mappings := make(map[uint64]*Mapping, len(p.Mapping))
247	for _, m := range p.Mapping {
248		if m.ID == 0 {
249			return fmt.Errorf("found mapping with reserved ID=0")
250		}
251		if mappings[m.ID] != nil {
252			return fmt.Errorf("multiple mappings with same id: %d", m.ID)
253		}
254		mappings[m.ID] = m
255	}
256	functions := make(map[uint64]*Function, len(p.Function))
257	for _, f := range p.Function {
258		if f.ID == 0 {
259			return fmt.Errorf("found function with reserved ID=0")
260		}
261		if functions[f.ID] != nil {
262			return fmt.Errorf("multiple functions with same id: %d", f.ID)
263		}
264		functions[f.ID] = f
265	}
266	locations := make(map[uint64]*Location, len(p.Location))
267	for _, l := range p.Location {
268		if l.ID == 0 {
269			return fmt.Errorf("found location with reserved id=0")
270		}
271		if locations[l.ID] != nil {
272			return fmt.Errorf("multiple locations with same id: %d", l.ID)
273		}
274		locations[l.ID] = l
275		if m := l.Mapping; m != nil {
276			if m.ID == 0 || mappings[m.ID] != m {
277				return fmt.Errorf("inconsistent mapping %p: %d", m, m.ID)
278			}
279		}
280		for _, ln := range l.Line {
281			if f := ln.Function; f != nil {
282				if f.ID == 0 || functions[f.ID] != f {
283					return fmt.Errorf("inconsistent function %p: %d", f, f.ID)
284				}
285			}
286		}
287	}
288	return nil
289}
290
291// Aggregate merges the locations in the profile into equivalence
292// classes preserving the request attributes. It also updates the
293// samples to point to the merged locations.
294func (p *Profile) Aggregate(inlineFrame, function, filename, linenumber, address bool) error {
295	for _, m := range p.Mapping {
296		m.HasInlineFrames = m.HasInlineFrames && inlineFrame
297		m.HasFunctions = m.HasFunctions && function
298		m.HasFilenames = m.HasFilenames && filename
299		m.HasLineNumbers = m.HasLineNumbers && linenumber
300	}
301
302	// Aggregate functions
303	if !function || !filename {
304		for _, f := range p.Function {
305			if !function {
306				f.Name = ""
307				f.SystemName = ""
308			}
309			if !filename {
310				f.Filename = ""
311			}
312		}
313	}
314
315	// Aggregate locations
316	if !inlineFrame || !address || !linenumber {
317		for _, l := range p.Location {
318			if !inlineFrame && len(l.Line) > 1 {
319				l.Line = l.Line[len(l.Line)-1:]
320			}
321			if !linenumber {
322				for i := range l.Line {
323					l.Line[i].Line = 0
324				}
325			}
326			if !address {
327				l.Address = 0
328			}
329		}
330	}
331
332	return p.CheckValid()
333}
334
335// Print dumps a text representation of a profile. Intended mainly
336// for debugging purposes.
337func (p *Profile) String() string {
338
339	ss := make([]string, 0, len(p.Sample)+len(p.Mapping)+len(p.Location))
340	if pt := p.PeriodType; pt != nil {
341		ss = append(ss, fmt.Sprintf("PeriodType: %s %s", pt.Type, pt.Unit))
342	}
343	ss = append(ss, fmt.Sprintf("Period: %d", p.Period))
344	if p.TimeNanos != 0 {
345		ss = append(ss, fmt.Sprintf("Time: %v", time.Unix(0, p.TimeNanos)))
346	}
347	if p.DurationNanos != 0 {
348		ss = append(ss, fmt.Sprintf("Duration: %v", time.Duration(p.DurationNanos)))
349	}
350
351	ss = append(ss, "Samples:")
352	var sh1 string
353	for _, s := range p.SampleType {
354		sh1 = sh1 + fmt.Sprintf("%s/%s ", s.Type, s.Unit)
355	}
356	ss = append(ss, strings.TrimSpace(sh1))
357	for _, s := range p.Sample {
358		var sv string
359		for _, v := range s.Value {
360			sv = fmt.Sprintf("%s %10d", sv, v)
361		}
362		sv = sv + ": "
363		for _, l := range s.Location {
364			sv = sv + fmt.Sprintf("%d ", l.ID)
365		}
366		ss = append(ss, sv)
367		const labelHeader = "                "
368		if len(s.Label) > 0 {
369			ls := labelHeader
370			for k, v := range s.Label {
371				ls = ls + fmt.Sprintf("%s:%v ", k, v)
372			}
373			ss = append(ss, ls)
374		}
375		if len(s.NumLabel) > 0 {
376			ls := labelHeader
377			for k, v := range s.NumLabel {
378				ls = ls + fmt.Sprintf("%s:%v ", k, v)
379			}
380			ss = append(ss, ls)
381		}
382	}
383
384	ss = append(ss, "Locations")
385	for _, l := range p.Location {
386		locStr := fmt.Sprintf("%6d: %#x ", l.ID, l.Address)
387		if m := l.Mapping; m != nil {
388			locStr = locStr + fmt.Sprintf("M=%d ", m.ID)
389		}
390		if len(l.Line) == 0 {
391			ss = append(ss, locStr)
392		}
393		for li := range l.Line {
394			lnStr := "??"
395			if fn := l.Line[li].Function; fn != nil {
396				lnStr = fmt.Sprintf("%s %s:%d s=%d",
397					fn.Name,
398					fn.Filename,
399					l.Line[li].Line,
400					fn.StartLine)
401				if fn.Name != fn.SystemName {
402					lnStr = lnStr + "(" + fn.SystemName + ")"
403				}
404			}
405			ss = append(ss, locStr+lnStr)
406			// Do not print location details past the first line
407			locStr = "             "
408		}
409	}
410
411	ss = append(ss, "Mappings")
412	for _, m := range p.Mapping {
413		bits := ""
414		if m.HasFunctions {
415			bits += "[FN]"
416		}
417		if m.HasFilenames {
418			bits += "[FL]"
419		}
420		if m.HasLineNumbers {
421			bits += "[LN]"
422		}
423		if m.HasInlineFrames {
424			bits += "[IN]"
425		}
426		ss = append(ss, fmt.Sprintf("%d: %#x/%#x/%#x %s %s %s",
427			m.ID,
428			m.Start, m.Limit, m.Offset,
429			m.File,
430			m.BuildID,
431			bits))
432	}
433
434	return strings.Join(ss, "\n") + "\n"
435}
436
437// Merge adds profile p adjusted by ratio r into profile p. Profiles
438// must be compatible (same Type and SampleType).
439// TODO(rsilvera): consider normalizing the profiles based on the
440// total samples collected.
441func (p *Profile) Merge(pb *Profile, r float64) error {
442	if err := p.Compatible(pb); err != nil {
443		return err
444	}
445
446	pb = pb.Copy()
447
448	// Keep the largest of the two periods.
449	if pb.Period > p.Period {
450		p.Period = pb.Period
451	}
452
453	p.DurationNanos += pb.DurationNanos
454
455	p.Mapping = append(p.Mapping, pb.Mapping...)
456	for i, m := range p.Mapping {
457		m.ID = uint64(i + 1)
458	}
459	p.Location = append(p.Location, pb.Location...)
460	for i, l := range p.Location {
461		l.ID = uint64(i + 1)
462	}
463	p.Function = append(p.Function, pb.Function...)
464	for i, f := range p.Function {
465		f.ID = uint64(i + 1)
466	}
467
468	if r != 1.0 {
469		for _, s := range pb.Sample {
470			for i, v := range s.Value {
471				s.Value[i] = int64((float64(v) * r))
472			}
473		}
474	}
475	p.Sample = append(p.Sample, pb.Sample...)
476	return p.CheckValid()
477}
478
479// Compatible determines if two profiles can be compared/merged.
480// returns nil if the profiles are compatible; otherwise an error with
481// details on the incompatibility.
482func (p *Profile) Compatible(pb *Profile) error {
483	if !compatibleValueTypes(p.PeriodType, pb.PeriodType) {
484		return fmt.Errorf("incompatible period types %v and %v", p.PeriodType, pb.PeriodType)
485	}
486
487	if len(p.SampleType) != len(pb.SampleType) {
488		return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
489	}
490
491	for i := range p.SampleType {
492		if !compatibleValueTypes(p.SampleType[i], pb.SampleType[i]) {
493			return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
494		}
495	}
496
497	return nil
498}
499
500// HasFunctions determines if all locations in this profile have
501// symbolized function information.
502func (p *Profile) HasFunctions() bool {
503	for _, l := range p.Location {
504		if l.Mapping == nil || !l.Mapping.HasFunctions {
505			return false
506		}
507	}
508	return true
509}
510
511// HasFileLines determines if all locations in this profile have
512// symbolized file and line number information.
513func (p *Profile) HasFileLines() bool {
514	for _, l := range p.Location {
515		if l.Mapping == nil || (!l.Mapping.HasFilenames || !l.Mapping.HasLineNumbers) {
516			return false
517		}
518	}
519	return true
520}
521
522func compatibleValueTypes(v1, v2 *ValueType) bool {
523	if v1 == nil || v2 == nil {
524		return true // No grounds to disqualify.
525	}
526	return v1.Type == v2.Type && v1.Unit == v2.Unit
527}
528
529// Copy makes a fully independent copy of a profile.
530func (p *Profile) Copy() *Profile {
531	p.preEncode()
532	b := marshal(p)
533
534	pp := &Profile{}
535	if err := unmarshal(b, pp); err != nil {
536		panic(err)
537	}
538	if err := pp.postDecode(); err != nil {
539		panic(err)
540	}
541
542	return pp
543}
544
545// Demangler maps symbol names to a human-readable form. This may
546// include C++ demangling and additional simplification. Names that
547// are not demangled may be missing from the resulting map.
548type Demangler func(name []string) (map[string]string, error)
549
550// Demangle attempts to demangle and optionally simplify any function
551// names referenced in the profile. It works on a best-effort basis:
552// it will silently preserve the original names in case of any errors.
553func (p *Profile) Demangle(d Demangler) error {
554	// Collect names to demangle.
555	var names []string
556	for _, fn := range p.Function {
557		names = append(names, fn.SystemName)
558	}
559
560	// Update profile with demangled names.
561	demangled, err := d(names)
562	if err != nil {
563		return err
564	}
565	for _, fn := range p.Function {
566		if dd, ok := demangled[fn.SystemName]; ok {
567			fn.Name = dd
568		}
569	}
570	return nil
571}
572
573// Empty reports whether the profile contains no samples.
574func (p *Profile) Empty() bool {
575	return len(p.Sample) == 0
576}
577
578// Scale multiplies all sample values in a profile by a constant.
579func (p *Profile) Scale(ratio float64) {
580	if ratio == 1 {
581		return
582	}
583	ratios := make([]float64, len(p.SampleType))
584	for i := range p.SampleType {
585		ratios[i] = ratio
586	}
587	p.ScaleN(ratios)
588}
589
590// ScaleN multiplies each sample values in a sample by a different amount.
591func (p *Profile) ScaleN(ratios []float64) error {
592	if len(p.SampleType) != len(ratios) {
593		return fmt.Errorf("mismatched scale ratios, got %d, want %d", len(ratios), len(p.SampleType))
594	}
595	allOnes := true
596	for _, r := range ratios {
597		if r != 1 {
598			allOnes = false
599			break
600		}
601	}
602	if allOnes {
603		return nil
604	}
605	for _, s := range p.Sample {
606		for i, v := range s.Value {
607			if ratios[i] != 1 {
608				s.Value[i] = int64(float64(v) * ratios[i])
609			}
610		}
611	}
612	return nil
613}
614