1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file implements parsers to convert legacy profiles into the
6// profile.proto format.
7
8package profile
9
10import (
11	"bufio"
12	"bytes"
13	"fmt"
14	"io"
15	"math"
16	"regexp"
17	"strconv"
18	"strings"
19)
20
21var (
22	countStartRE = regexp.MustCompile(`\A(\w+) profile: total \d+\n\z`)
23	countRE      = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\n\z`)
24
25	heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
26	heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
27
28	contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
29
30	hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
31
32	growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz`)
33
34	fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz`)
35
36	threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
37	threadStartRE  = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
38
39	procMapsRE = regexp.MustCompile(`([[:xdigit:]]+)-([[:xdigit:]]+)\s+([-rwxp]+)\s+([[:xdigit:]]+)\s+([[:xdigit:]]+):([[:xdigit:]]+)\s+([[:digit:]]+)\s*(\S+)?`)
40
41	briefMapsRE = regexp.MustCompile(`\s*([[:xdigit:]]+)-([[:xdigit:]]+):\s*(\S+)(\s.*@)?([[:xdigit:]]+)?`)
42
43	// LegacyHeapAllocated instructs the heapz parsers to use the
44	// allocated memory stats instead of the default in-use memory. Note
45	// that tcmalloc doesn't provide all allocated memory, only in-use
46	// stats.
47	LegacyHeapAllocated bool
48)
49
50func isSpaceOrComment(line string) bool {
51	trimmed := strings.TrimSpace(line)
52	return len(trimmed) == 0 || trimmed[0] == '#'
53}
54
55// parseGoCount parses a Go count profile (e.g., threadcreate or
56// goroutine) and returns a new Profile.
57func parseGoCount(b []byte) (*Profile, error) {
58	r := bytes.NewBuffer(b)
59
60	var line string
61	var err error
62	for {
63		// Skip past comments and empty lines seeking a real header.
64		line, err = r.ReadString('\n')
65		if err != nil {
66			return nil, err
67		}
68		if !isSpaceOrComment(line) {
69			break
70		}
71	}
72
73	m := countStartRE.FindStringSubmatch(line)
74	if m == nil {
75		return nil, errUnrecognized
76	}
77	profileType := m[1]
78	p := &Profile{
79		PeriodType: &ValueType{Type: profileType, Unit: "count"},
80		Period:     1,
81		SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
82	}
83	locations := make(map[uint64]*Location)
84	for {
85		line, err = r.ReadString('\n')
86		if err != nil {
87			if err == io.EOF {
88				break
89			}
90			return nil, err
91		}
92		if isSpaceOrComment(line) {
93			continue
94		}
95		if strings.HasPrefix(line, "---") {
96			break
97		}
98		m := countRE.FindStringSubmatch(line)
99		if m == nil {
100			return nil, errMalformed
101		}
102		n, err := strconv.ParseInt(m[1], 0, 64)
103		if err != nil {
104			return nil, errMalformed
105		}
106		fields := strings.Fields(m[2])
107		locs := make([]*Location, 0, len(fields))
108		for _, stk := range fields {
109			addr, err := strconv.ParseUint(stk, 0, 64)
110			if err != nil {
111				return nil, errMalformed
112			}
113			// Adjust all frames by -1 to land on the call instruction.
114			addr--
115			loc := locations[addr]
116			if loc == nil {
117				loc = &Location{
118					Address: addr,
119				}
120				locations[addr] = loc
121				p.Location = append(p.Location, loc)
122			}
123			locs = append(locs, loc)
124		}
125		p.Sample = append(p.Sample, &Sample{
126			Location: locs,
127			Value:    []int64{n},
128		})
129	}
130
131	if err = parseAdditionalSections(strings.TrimSpace(line), r, p); err != nil {
132		return nil, err
133	}
134	return p, nil
135}
136
137// remapLocationIDs ensures there is a location for each address
138// referenced by a sample, and remaps the samples to point to the new
139// location ids.
140func (p *Profile) remapLocationIDs() {
141	seen := make(map[*Location]bool, len(p.Location))
142	var locs []*Location
143
144	for _, s := range p.Sample {
145		for _, l := range s.Location {
146			if seen[l] {
147				continue
148			}
149			l.ID = uint64(len(locs) + 1)
150			locs = append(locs, l)
151			seen[l] = true
152		}
153	}
154	p.Location = locs
155}
156
157func (p *Profile) remapFunctionIDs() {
158	seen := make(map[*Function]bool, len(p.Function))
159	var fns []*Function
160
161	for _, l := range p.Location {
162		for _, ln := range l.Line {
163			fn := ln.Function
164			if fn == nil || seen[fn] {
165				continue
166			}
167			fn.ID = uint64(len(fns) + 1)
168			fns = append(fns, fn)
169			seen[fn] = true
170		}
171	}
172	p.Function = fns
173}
174
175// remapMappingIDs matches location addresses with existing mappings
176// and updates them appropriately. This is O(N*M), if this ever shows
177// up as a bottleneck, evaluate sorting the mappings and doing a
178// binary search, which would make it O(N*log(M)).
179func (p *Profile) remapMappingIDs() {
180	if len(p.Mapping) == 0 {
181		return
182	}
183
184	// Some profile handlers will incorrectly set regions for the main
185	// executable if its section is remapped. Fix them through heuristics.
186
187	// Remove the initial mapping if named '/anon_hugepage' and has a
188	// consecutive adjacent mapping.
189	if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
190		if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
191			p.Mapping = p.Mapping[1:]
192		}
193	}
194
195	// Subtract the offset from the start of the main mapping if it
196	// ends up at a recognizable start address.
197	const expectedStart = 0x400000
198	if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
199		m.Start = expectedStart
200		m.Offset = 0
201	}
202
203	for _, l := range p.Location {
204		if a := l.Address; a != 0 {
205			for _, m := range p.Mapping {
206				if m.Start <= a && a < m.Limit {
207					l.Mapping = m
208					break
209				}
210			}
211		}
212	}
213
214	// Reset all mapping IDs.
215	for i, m := range p.Mapping {
216		m.ID = uint64(i + 1)
217	}
218}
219
220var cpuInts = []func([]byte) (uint64, []byte){
221	get32l,
222	get32b,
223	get64l,
224	get64b,
225}
226
227func get32l(b []byte) (uint64, []byte) {
228	if len(b) < 4 {
229		return 0, nil
230	}
231	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
232}
233
234func get32b(b []byte) (uint64, []byte) {
235	if len(b) < 4 {
236		return 0, nil
237	}
238	return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
239}
240
241func get64l(b []byte) (uint64, []byte) {
242	if len(b) < 8 {
243		return 0, nil
244	}
245	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
246}
247
248func get64b(b []byte) (uint64, []byte) {
249	if len(b) < 8 {
250		return 0, nil
251	}
252	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
253}
254
255// ParseTracebacks parses a set of tracebacks and returns a newly
256// populated profile. It will accept any text file and generate a
257// Profile out of it with any hex addresses it can identify, including
258// a process map if it can recognize one. Each sample will include a
259// tag "source" with the addresses recognized in string format.
260func ParseTracebacks(b []byte) (*Profile, error) {
261	r := bytes.NewBuffer(b)
262
263	p := &Profile{
264		PeriodType: &ValueType{Type: "trace", Unit: "count"},
265		Period:     1,
266		SampleType: []*ValueType{
267			{Type: "trace", Unit: "count"},
268		},
269	}
270
271	var sources []string
272	var sloc []*Location
273
274	locs := make(map[uint64]*Location)
275	for {
276		l, err := r.ReadString('\n')
277		if err != nil {
278			if err != io.EOF {
279				return nil, err
280			}
281			if l == "" {
282				break
283			}
284		}
285		if sectionTrigger(l) == memoryMapSection {
286			break
287		}
288		if s, addrs := extractHexAddresses(l); len(s) > 0 {
289			for _, addr := range addrs {
290				// Addresses from stack traces point to the next instruction after
291				// each call. Adjust by -1 to land somewhere on the actual call.
292				addr--
293				loc := locs[addr]
294				if locs[addr] == nil {
295					loc = &Location{
296						Address: addr,
297					}
298					p.Location = append(p.Location, loc)
299					locs[addr] = loc
300				}
301				sloc = append(sloc, loc)
302			}
303
304			sources = append(sources, s...)
305		} else {
306			if len(sources) > 0 || len(sloc) > 0 {
307				addTracebackSample(sloc, sources, p)
308				sloc, sources = nil, nil
309			}
310		}
311	}
312
313	// Add final sample to save any leftover data.
314	if len(sources) > 0 || len(sloc) > 0 {
315		addTracebackSample(sloc, sources, p)
316	}
317
318	if err := p.ParseMemoryMap(r); err != nil {
319		return nil, err
320	}
321	return p, nil
322}
323
324func addTracebackSample(l []*Location, s []string, p *Profile) {
325	p.Sample = append(p.Sample,
326		&Sample{
327			Value:    []int64{1},
328			Location: l,
329			Label:    map[string][]string{"source": s},
330		})
331}
332
333// parseCPU parses a profilez legacy profile and returns a newly
334// populated Profile.
335//
336// The general format for profilez samples is a sequence of words in
337// binary format. The first words are a header with the following data:
338//   1st word -- 0
339//   2nd word -- 3
340//   3rd word -- 0 if a c++ application, 1 if a java application.
341//   4th word -- Sampling period (in microseconds).
342//   5th word -- Padding.
343func parseCPU(b []byte) (*Profile, error) {
344	var parse func([]byte) (uint64, []byte)
345	var n1, n2, n3, n4, n5 uint64
346	for _, parse = range cpuInts {
347		var tmp []byte
348		n1, tmp = parse(b)
349		n2, tmp = parse(tmp)
350		n3, tmp = parse(tmp)
351		n4, tmp = parse(tmp)
352		n5, tmp = parse(tmp)
353
354		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
355			b = tmp
356			return cpuProfile(b, int64(n4), parse)
357		}
358	}
359	return nil, errUnrecognized
360}
361
362// cpuProfile returns a new Profile from C++ profilez data.
363// b is the profile bytes after the header, period is the profiling
364// period, and parse is a function to parse 8-byte chunks from the
365// profile in its native endianness.
366func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
367	p := &Profile{
368		Period:     period * 1000,
369		PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
370		SampleType: []*ValueType{
371			{Type: "samples", Unit: "count"},
372			{Type: "cpu", Unit: "nanoseconds"},
373		},
374	}
375	var err error
376	if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
377		return nil, err
378	}
379
380	// If all samples have the same second-to-the-bottom frame, it
381	// strongly suggests that it is an uninteresting artifact of
382	// measurement -- a stack frame pushed by the signal handler. The
383	// bottom frame is always correct as it is picked up from the signal
384	// structure, not the stack. Check if this is the case and if so,
385	// remove.
386	if len(p.Sample) > 1 && len(p.Sample[0].Location) > 1 {
387		allSame := true
388		id1 := p.Sample[0].Location[1].Address
389		for _, s := range p.Sample {
390			if len(s.Location) < 2 || id1 != s.Location[1].Address {
391				allSame = false
392				break
393			}
394		}
395		if allSame {
396			for _, s := range p.Sample {
397				s.Location = append(s.Location[:1], s.Location[2:]...)
398			}
399		}
400	}
401
402	if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
403		return nil, err
404	}
405	return p, nil
406}
407
408// parseCPUSamples parses a collection of profilez samples from a
409// profile.
410//
411// profilez samples are a repeated sequence of stack frames of the
412// form:
413//    1st word -- The number of times this stack was encountered.
414//    2nd word -- The size of the stack (StackSize).
415//    3rd word -- The first address on the stack.
416//    ...
417//    StackSize + 2 -- The last address on the stack
418// The last stack trace is of the form:
419//   1st word -- 0
420//   2nd word -- 1
421//   3rd word -- 0
422//
423// Addresses from stack traces may point to the next instruction after
424// each call. Optionally adjust by -1 to land somewhere on the actual
425// call (except for the leaf, which is not a call).
426func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
427	locs := make(map[uint64]*Location)
428	for len(b) > 0 {
429		var count, nstk uint64
430		count, b = parse(b)
431		nstk, b = parse(b)
432		if b == nil || nstk > uint64(len(b)/4) {
433			return nil, nil, errUnrecognized
434		}
435		var sloc []*Location
436		addrs := make([]uint64, nstk)
437		for i := 0; i < int(nstk); i++ {
438			addrs[i], b = parse(b)
439		}
440
441		if count == 0 && nstk == 1 && addrs[0] == 0 {
442			// End of data marker
443			break
444		}
445		for i, addr := range addrs {
446			if adjust && i > 0 {
447				addr--
448			}
449			loc := locs[addr]
450			if loc == nil {
451				loc = &Location{
452					Address: addr,
453				}
454				locs[addr] = loc
455				p.Location = append(p.Location, loc)
456			}
457			sloc = append(sloc, loc)
458		}
459		p.Sample = append(p.Sample,
460			&Sample{
461				Value:    []int64{int64(count), int64(count) * p.Period},
462				Location: sloc,
463			})
464	}
465	// Reached the end without finding the EOD marker.
466	return b, locs, nil
467}
468
469// parseHeap parses a heapz legacy or a growthz profile and
470// returns a newly populated Profile.
471func parseHeap(b []byte) (p *Profile, err error) {
472	r := bytes.NewBuffer(b)
473	l, err := r.ReadString('\n')
474	if err != nil {
475		return nil, errUnrecognized
476	}
477
478	sampling := ""
479
480	if header := heapHeaderRE.FindStringSubmatch(l); header != nil {
481		p = &Profile{
482			SampleType: []*ValueType{
483				{Type: "objects", Unit: "count"},
484				{Type: "space", Unit: "bytes"},
485			},
486			PeriodType: &ValueType{Type: "objects", Unit: "bytes"},
487		}
488
489		var period int64
490		if len(header[6]) > 0 {
491			if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
492				return nil, errUnrecognized
493			}
494		}
495
496		switch header[5] {
497		case "heapz_v2", "heap_v2":
498			sampling, p.Period = "v2", period
499		case "heapprofile":
500			sampling, p.Period = "", 1
501		case "heap":
502			sampling, p.Period = "v2", period/2
503		default:
504			return nil, errUnrecognized
505		}
506	} else if header = growthHeaderRE.FindStringSubmatch(l); header != nil {
507		p = &Profile{
508			SampleType: []*ValueType{
509				{Type: "objects", Unit: "count"},
510				{Type: "space", Unit: "bytes"},
511			},
512			PeriodType: &ValueType{Type: "heapgrowth", Unit: "count"},
513			Period:     1,
514		}
515	} else if header = fragmentationHeaderRE.FindStringSubmatch(l); header != nil {
516		p = &Profile{
517			SampleType: []*ValueType{
518				{Type: "objects", Unit: "count"},
519				{Type: "space", Unit: "bytes"},
520			},
521			PeriodType: &ValueType{Type: "allocations", Unit: "count"},
522			Period:     1,
523		}
524	} else {
525		return nil, errUnrecognized
526	}
527
528	if LegacyHeapAllocated {
529		for _, st := range p.SampleType {
530			st.Type = "alloc_" + st.Type
531		}
532	} else {
533		for _, st := range p.SampleType {
534			st.Type = "inuse_" + st.Type
535		}
536	}
537
538	locs := make(map[uint64]*Location)
539	for {
540		l, err = r.ReadString('\n')
541		if err != nil {
542			if err != io.EOF {
543				return nil, err
544			}
545
546			if l == "" {
547				break
548			}
549		}
550
551		if isSpaceOrComment(l) {
552			continue
553		}
554		l = strings.TrimSpace(l)
555
556		if sectionTrigger(l) != unrecognizedSection {
557			break
558		}
559
560		value, blocksize, addrs, err := parseHeapSample(l, p.Period, sampling)
561		if err != nil {
562			return nil, err
563		}
564		var sloc []*Location
565		for _, addr := range addrs {
566			// Addresses from stack traces point to the next instruction after
567			// each call. Adjust by -1 to land somewhere on the actual call.
568			addr--
569			loc := locs[addr]
570			if locs[addr] == nil {
571				loc = &Location{
572					Address: addr,
573				}
574				p.Location = append(p.Location, loc)
575				locs[addr] = loc
576			}
577			sloc = append(sloc, loc)
578		}
579
580		p.Sample = append(p.Sample, &Sample{
581			Value:    value,
582			Location: sloc,
583			NumLabel: map[string][]int64{"bytes": {blocksize}},
584		})
585	}
586
587	if err = parseAdditionalSections(l, r, p); err != nil {
588		return nil, err
589	}
590	return p, nil
591}
592
593// parseHeapSample parses a single row from a heap profile into a new Sample.
594func parseHeapSample(line string, rate int64, sampling string) (value []int64, blocksize int64, addrs []uint64, err error) {
595	sampleData := heapSampleRE.FindStringSubmatch(line)
596	if len(sampleData) != 6 {
597		return value, blocksize, addrs, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
598	}
599
600	// Use first two values by default; tcmalloc sampling generates the
601	// same value for both, only the older heap-profile collect separate
602	// stats for in-use and allocated objects.
603	valueIndex := 1
604	if LegacyHeapAllocated {
605		valueIndex = 3
606	}
607
608	var v1, v2 int64
609	if v1, err = strconv.ParseInt(sampleData[valueIndex], 10, 64); err != nil {
610		return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
611	}
612	if v2, err = strconv.ParseInt(sampleData[valueIndex+1], 10, 64); err != nil {
613		return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
614	}
615
616	if v1 == 0 {
617		if v2 != 0 {
618			return value, blocksize, addrs, fmt.Errorf("allocation count was 0 but allocation bytes was %d", v2)
619		}
620	} else {
621		blocksize = v2 / v1
622		if sampling == "v2" {
623			v1, v2 = scaleHeapSample(v1, v2, rate)
624		}
625	}
626
627	value = []int64{v1, v2}
628	addrs = parseHexAddresses(sampleData[5])
629
630	return value, blocksize, addrs, nil
631}
632
633// extractHexAddresses extracts hex numbers from a string and returns
634// them, together with their numeric value, in a slice.
635func extractHexAddresses(s string) ([]string, []uint64) {
636	hexStrings := hexNumberRE.FindAllString(s, -1)
637	var ids []uint64
638	for _, s := range hexStrings {
639		if id, err := strconv.ParseUint(s, 0, 64); err == nil {
640			ids = append(ids, id)
641		} else {
642			// Do not expect any parsing failures due to the regexp matching.
643			panic("failed to parse hex value:" + s)
644		}
645	}
646	return hexStrings, ids
647}
648
649// parseHexAddresses parses hex numbers from a string and returns them
650// in a slice.
651func parseHexAddresses(s string) []uint64 {
652	_, ids := extractHexAddresses(s)
653	return ids
654}
655
656// scaleHeapSample adjusts the data from a heapz Sample to
657// account for its probability of appearing in the collected
658// data. heapz profiles are a sampling of the memory allocations
659// requests in a program. We estimate the unsampled value by dividing
660// each collected sample by its probability of appearing in the
661// profile. heapz v2 profiles rely on a poisson process to determine
662// which samples to collect, based on the desired average collection
663// rate R. The probability of a sample of size S to appear in that
664// profile is 1-exp(-S/R).
665func scaleHeapSample(count, size, rate int64) (int64, int64) {
666	if count == 0 || size == 0 {
667		return 0, 0
668	}
669
670	if rate <= 1 {
671		// if rate==1 all samples were collected so no adjustment is needed.
672		// if rate<1 treat as unknown and skip scaling.
673		return count, size
674	}
675
676	avgSize := float64(size) / float64(count)
677	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
678
679	return int64(float64(count) * scale), int64(float64(size) * scale)
680}
681
682// parseContention parses a mutex or contention profile. There are 2 cases:
683// "--- contentionz " for legacy C++ profiles (and backwards compatibility)
684// "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
685// This code converts the text output from runtime into a *Profile. (In the future
686// the runtime might write a serialized Profile directly making this unnecessary.)
687func parseContention(b []byte) (*Profile, error) {
688	r := bytes.NewBuffer(b)
689	var l string
690	var err error
691	for {
692		// Skip past comments and empty lines seeking a real header.
693		l, err = r.ReadString('\n')
694		if err != nil {
695			return nil, err
696		}
697		if !isSpaceOrComment(l) {
698			break
699		}
700	}
701
702	if strings.HasPrefix(l, "--- contentionz ") {
703		return parseCppContention(r)
704	} else if strings.HasPrefix(l, "--- mutex:") {
705		return parseCppContention(r)
706	} else if strings.HasPrefix(l, "--- contention:") {
707		return parseCppContention(r)
708	}
709	return nil, errUnrecognized
710}
711
712// parseCppContention parses the output from synchronization_profiling.cc
713// for backward compatibility, and the compatible (non-debug) block profile
714// output from the Go runtime.
715func parseCppContention(r *bytes.Buffer) (*Profile, error) {
716	p := &Profile{
717		PeriodType: &ValueType{Type: "contentions", Unit: "count"},
718		Period:     1,
719		SampleType: []*ValueType{
720			{Type: "contentions", Unit: "count"},
721			{Type: "delay", Unit: "nanoseconds"},
722		},
723	}
724
725	var cpuHz int64
726	var l string
727	var err error
728	// Parse text of the form "attribute = value" before the samples.
729	const delimiter = "="
730	for {
731		l, err = r.ReadString('\n')
732		if err != nil {
733			if err != io.EOF {
734				return nil, err
735			}
736
737			if l == "" {
738				break
739			}
740		}
741		if isSpaceOrComment(l) {
742			continue
743		}
744
745		if l = strings.TrimSpace(l); l == "" {
746			continue
747		}
748
749		if strings.HasPrefix(l, "---") {
750			break
751		}
752
753		attr := strings.SplitN(l, delimiter, 2)
754		if len(attr) != 2 {
755			break
756		}
757		key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
758		var err error
759		switch key {
760		case "cycles/second":
761			if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
762				return nil, errUnrecognized
763			}
764		case "sampling period":
765			if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
766				return nil, errUnrecognized
767			}
768		case "ms since reset":
769			ms, err := strconv.ParseInt(val, 0, 64)
770			if err != nil {
771				return nil, errUnrecognized
772			}
773			p.DurationNanos = ms * 1000 * 1000
774		case "format":
775			// CPP contentionz profiles don't have format.
776			return nil, errUnrecognized
777		case "resolution":
778			// CPP contentionz profiles don't have resolution.
779			return nil, errUnrecognized
780		case "discarded samples":
781		default:
782			return nil, errUnrecognized
783		}
784	}
785
786	locs := make(map[uint64]*Location)
787	for {
788		if !isSpaceOrComment(l) {
789			if l = strings.TrimSpace(l); strings.HasPrefix(l, "---") {
790				break
791			}
792			value, addrs, err := parseContentionSample(l, p.Period, cpuHz)
793			if err != nil {
794				return nil, err
795			}
796			var sloc []*Location
797			for _, addr := range addrs {
798				// Addresses from stack traces point to the next instruction after
799				// each call. Adjust by -1 to land somewhere on the actual call.
800				addr--
801				loc := locs[addr]
802				if locs[addr] == nil {
803					loc = &Location{
804						Address: addr,
805					}
806					p.Location = append(p.Location, loc)
807					locs[addr] = loc
808				}
809				sloc = append(sloc, loc)
810			}
811			p.Sample = append(p.Sample, &Sample{
812				Value:    value,
813				Location: sloc,
814			})
815		}
816
817		if l, err = r.ReadString('\n'); err != nil {
818			if err != io.EOF {
819				return nil, err
820			}
821			if l == "" {
822				break
823			}
824		}
825	}
826
827	if err = parseAdditionalSections(l, r, p); err != nil {
828		return nil, err
829	}
830
831	return p, nil
832}
833
834// parseContentionSample parses a single row from a contention profile
835// into a new Sample.
836func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
837	sampleData := contentionSampleRE.FindStringSubmatch(line)
838	if sampleData == nil {
839		return value, addrs, errUnrecognized
840	}
841
842	v1, err := strconv.ParseInt(sampleData[1], 10, 64)
843	if err != nil {
844		return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
845	}
846	v2, err := strconv.ParseInt(sampleData[2], 10, 64)
847	if err != nil {
848		return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
849	}
850
851	// Unsample values if period and cpuHz are available.
852	// - Delays are scaled to cycles and then to nanoseconds.
853	// - Contentions are scaled to cycles.
854	if period > 0 {
855		if cpuHz > 0 {
856			cpuGHz := float64(cpuHz) / 1e9
857			v1 = int64(float64(v1) * float64(period) / cpuGHz)
858		}
859		v2 = v2 * period
860	}
861
862	value = []int64{v2, v1}
863	addrs = parseHexAddresses(sampleData[3])
864
865	return value, addrs, nil
866}
867
868// parseThread parses a Threadz profile and returns a new Profile.
869func parseThread(b []byte) (*Profile, error) {
870	r := bytes.NewBuffer(b)
871
872	var line string
873	var err error
874	for {
875		// Skip past comments and empty lines seeking a real header.
876		line, err = r.ReadString('\n')
877		if err != nil {
878			return nil, err
879		}
880		if !isSpaceOrComment(line) {
881			break
882		}
883	}
884
885	if m := threadzStartRE.FindStringSubmatch(line); m != nil {
886		// Advance over initial comments until first stack trace.
887		for {
888			line, err = r.ReadString('\n')
889			if err != nil {
890				if err != io.EOF {
891					return nil, err
892				}
893
894				if line == "" {
895					break
896				}
897			}
898			if sectionTrigger(line) != unrecognizedSection || line[0] == '-' {
899				break
900			}
901		}
902	} else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
903		return nil, errUnrecognized
904	}
905
906	p := &Profile{
907		SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
908		PeriodType: &ValueType{Type: "thread", Unit: "count"},
909		Period:     1,
910	}
911
912	locs := make(map[uint64]*Location)
913	// Recognize each thread and populate profile samples.
914	for sectionTrigger(line) == unrecognizedSection {
915		if strings.HasPrefix(line, "---- no stack trace for") {
916			line = ""
917			break
918		}
919		if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
920			return nil, errUnrecognized
921		}
922
923		var addrs []uint64
924		line, addrs, err = parseThreadSample(r)
925		if err != nil {
926			return nil, errUnrecognized
927		}
928		if len(addrs) == 0 {
929			// We got a --same as previous threads--. Bump counters.
930			if len(p.Sample) > 0 {
931				s := p.Sample[len(p.Sample)-1]
932				s.Value[0]++
933			}
934			continue
935		}
936
937		var sloc []*Location
938		for _, addr := range addrs {
939			// Addresses from stack traces point to the next instruction after
940			// each call. Adjust by -1 to land somewhere on the actual call.
941			addr--
942			loc := locs[addr]
943			if locs[addr] == nil {
944				loc = &Location{
945					Address: addr,
946				}
947				p.Location = append(p.Location, loc)
948				locs[addr] = loc
949			}
950			sloc = append(sloc, loc)
951		}
952
953		p.Sample = append(p.Sample, &Sample{
954			Value:    []int64{1},
955			Location: sloc,
956		})
957	}
958
959	if err = parseAdditionalSections(line, r, p); err != nil {
960		return nil, err
961	}
962
963	return p, nil
964}
965
966// parseThreadSample parses a symbolized or unsymbolized stack trace.
967// Returns the first line after the traceback, the sample (or nil if
968// it hits a 'same-as-previous' marker) and an error.
969func parseThreadSample(b *bytes.Buffer) (nextl string, addrs []uint64, err error) {
970	var l string
971	sameAsPrevious := false
972	for {
973		if l, err = b.ReadString('\n'); err != nil {
974			if err != io.EOF {
975				return "", nil, err
976			}
977			if l == "" {
978				break
979			}
980		}
981		if l = strings.TrimSpace(l); l == "" {
982			continue
983		}
984
985		if strings.HasPrefix(l, "---") {
986			break
987		}
988		if strings.Contains(l, "same as previous thread") {
989			sameAsPrevious = true
990			continue
991		}
992
993		addrs = append(addrs, parseHexAddresses(l)...)
994	}
995
996	if sameAsPrevious {
997		return l, nil, nil
998	}
999	return l, addrs, nil
1000}
1001
1002// parseAdditionalSections parses any additional sections in the
1003// profile, ignoring any unrecognized sections.
1004func parseAdditionalSections(l string, b *bytes.Buffer, p *Profile) (err error) {
1005	for {
1006		if sectionTrigger(l) == memoryMapSection {
1007			break
1008		}
1009		// Ignore any unrecognized sections.
1010		if l, err := b.ReadString('\n'); err != nil {
1011			if err != io.EOF {
1012				return err
1013			}
1014			if l == "" {
1015				break
1016			}
1017		}
1018	}
1019	return p.ParseMemoryMap(b)
1020}
1021
1022// ParseMemoryMap parses a memory map in the format of
1023// /proc/self/maps, and overrides the mappings in the current profile.
1024// It renumbers the samples and locations in the profile correspondingly.
1025func (p *Profile) ParseMemoryMap(rd io.Reader) error {
1026	b := bufio.NewReader(rd)
1027
1028	var attrs []string
1029	var r *strings.Replacer
1030	const delimiter = "="
1031	for {
1032		l, err := b.ReadString('\n')
1033		if err != nil {
1034			if err != io.EOF {
1035				return err
1036			}
1037			if l == "" {
1038				break
1039			}
1040		}
1041		if l = strings.TrimSpace(l); l == "" {
1042			continue
1043		}
1044
1045		if r != nil {
1046			l = r.Replace(l)
1047		}
1048		m, err := parseMappingEntry(l)
1049		if err != nil {
1050			if err == errUnrecognized {
1051				// Recognize assignments of the form: attr=value, and replace
1052				// $attr with value on subsequent mappings.
1053				if attr := strings.SplitN(l, delimiter, 2); len(attr) == 2 {
1054					attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
1055					r = strings.NewReplacer(attrs...)
1056				}
1057				// Ignore any unrecognized entries
1058				continue
1059			}
1060			return err
1061		}
1062		if m == nil || (m.File == "" && len(p.Mapping) != 0) {
1063			// In some cases the first entry may include the address range
1064			// but not the name of the file. It should be followed by
1065			// another entry with the name.
1066			continue
1067		}
1068		if len(p.Mapping) == 1 && p.Mapping[0].File == "" {
1069			// Update the name if this is the entry following that empty one.
1070			p.Mapping[0].File = m.File
1071			continue
1072		}
1073		p.Mapping = append(p.Mapping, m)
1074	}
1075	p.remapLocationIDs()
1076	p.remapFunctionIDs()
1077	p.remapMappingIDs()
1078	return nil
1079}
1080
1081func parseMappingEntry(l string) (*Mapping, error) {
1082	mapping := &Mapping{}
1083	var err error
1084	if me := procMapsRE.FindStringSubmatch(l); len(me) == 9 {
1085		if !strings.Contains(me[3], "x") {
1086			// Skip non-executable entries.
1087			return nil, nil
1088		}
1089		if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
1090			return nil, errUnrecognized
1091		}
1092		if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
1093			return nil, errUnrecognized
1094		}
1095		if me[4] != "" {
1096			if mapping.Offset, err = strconv.ParseUint(me[4], 16, 64); err != nil {
1097				return nil, errUnrecognized
1098			}
1099		}
1100		mapping.File = me[8]
1101		return mapping, nil
1102	}
1103
1104	if me := briefMapsRE.FindStringSubmatch(l); len(me) == 6 {
1105		if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
1106			return nil, errUnrecognized
1107		}
1108		if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
1109			return nil, errUnrecognized
1110		}
1111		mapping.File = me[3]
1112		if me[5] != "" {
1113			if mapping.Offset, err = strconv.ParseUint(me[5], 16, 64); err != nil {
1114				return nil, errUnrecognized
1115			}
1116		}
1117		return mapping, nil
1118	}
1119
1120	return nil, errUnrecognized
1121}
1122
1123type sectionType int
1124
1125const (
1126	unrecognizedSection sectionType = iota
1127	memoryMapSection
1128)
1129
1130var memoryMapTriggers = []string{
1131	"--- Memory map: ---",
1132	"MAPPED_LIBRARIES:",
1133}
1134
1135func sectionTrigger(line string) sectionType {
1136	for _, trigger := range memoryMapTriggers {
1137		if strings.Contains(line, trigger) {
1138			return memoryMapSection
1139		}
1140	}
1141	return unrecognizedSection
1142}
1143
1144func (p *Profile) addLegacyFrameInfo() {
1145	switch {
1146	case isProfileType(p, heapzSampleTypes) ||
1147		isProfileType(p, heapzInUseSampleTypes) ||
1148		isProfileType(p, heapzAllocSampleTypes):
1149		p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
1150	case isProfileType(p, contentionzSampleTypes):
1151		p.DropFrames, p.KeepFrames = lockRxStr, ""
1152	default:
1153		p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
1154	}
1155}
1156
1157var heapzSampleTypes = []string{"allocations", "size"} // early Go pprof profiles
1158var heapzInUseSampleTypes = []string{"inuse_objects", "inuse_space"}
1159var heapzAllocSampleTypes = []string{"alloc_objects", "alloc_space"}
1160var contentionzSampleTypes = []string{"contentions", "delay"}
1161
1162func isProfileType(p *Profile, t []string) bool {
1163	st := p.SampleType
1164	if len(st) != len(t) {
1165		return false
1166	}
1167
1168	for i := range st {
1169		if st[i].Type != t[i] {
1170			return false
1171		}
1172	}
1173	return true
1174}
1175
1176var allocRxStr = strings.Join([]string{
1177	// POSIX entry points.
1178	`calloc`,
1179	`cfree`,
1180	`malloc`,
1181	`free`,
1182	`memalign`,
1183	`do_memalign`,
1184	`(__)?posix_memalign`,
1185	`pvalloc`,
1186	`valloc`,
1187	`realloc`,
1188
1189	// TC malloc.
1190	`tcmalloc::.*`,
1191	`tc_calloc`,
1192	`tc_cfree`,
1193	`tc_malloc`,
1194	`tc_free`,
1195	`tc_memalign`,
1196	`tc_posix_memalign`,
1197	`tc_pvalloc`,
1198	`tc_valloc`,
1199	`tc_realloc`,
1200	`tc_new`,
1201	`tc_delete`,
1202	`tc_newarray`,
1203	`tc_deletearray`,
1204	`tc_new_nothrow`,
1205	`tc_newarray_nothrow`,
1206
1207	// Memory-allocation routines on OS X.
1208	`malloc_zone_malloc`,
1209	`malloc_zone_calloc`,
1210	`malloc_zone_valloc`,
1211	`malloc_zone_realloc`,
1212	`malloc_zone_memalign`,
1213	`malloc_zone_free`,
1214
1215	// Go runtime
1216	`runtime\..*`,
1217
1218	// Other misc. memory allocation routines
1219	`BaseArena::.*`,
1220	`(::)?do_malloc_no_errno`,
1221	`(::)?do_malloc_pages`,
1222	`(::)?do_malloc`,
1223	`DoSampledAllocation`,
1224	`MallocedMemBlock::MallocedMemBlock`,
1225	`_M_allocate`,
1226	`__builtin_(vec_)?delete`,
1227	`__builtin_(vec_)?new`,
1228	`__gnu_cxx::new_allocator::allocate`,
1229	`__libc_malloc`,
1230	`__malloc_alloc_template::allocate`,
1231	`allocate`,
1232	`cpp_alloc`,
1233	`operator new(\[\])?`,
1234	`simple_alloc::allocate`,
1235}, `|`)
1236
1237var allocSkipRxStr = strings.Join([]string{
1238	// Preserve Go runtime frames that appear in the middle/bottom of
1239	// the stack.
1240	`runtime\.panic`,
1241	`runtime\.reflectcall`,
1242	`runtime\.call[0-9]*`,
1243}, `|`)
1244
1245var cpuProfilerRxStr = strings.Join([]string{
1246	`ProfileData::Add`,
1247	`ProfileData::prof_handler`,
1248	`CpuProfiler::prof_handler`,
1249	`__pthread_sighandler`,
1250	`__restore`,
1251}, `|`)
1252
1253var lockRxStr = strings.Join([]string{
1254	`RecordLockProfileData`,
1255	`(base::)?RecordLockProfileData.*`,
1256	`(base::)?SubmitMutexProfileData.*`,
1257	`(base::)?SubmitSpinLockProfileData.*`,
1258	`(Mutex::)?AwaitCommon.*`,
1259	`(Mutex::)?Unlock.*`,
1260	`(Mutex::)?UnlockSlow.*`,
1261	`(Mutex::)?ReaderUnlock.*`,
1262	`(MutexLock::)?~MutexLock.*`,
1263	`(SpinLock::)?Unlock.*`,
1264	`(SpinLock::)?SlowUnlock.*`,
1265	`(SpinLockHolder::)?~SpinLockHolder.*`,
1266}, `|`)
1267