1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package binutils
16
17import (
18	"bytes"
19	"io"
20	"regexp"
21	"strconv"
22
23	"github.com/google/pprof/internal/plugin"
24	"github.com/ianlancetaylor/demangle"
25)
26
27var (
28	nmOutputRE            = regexp.MustCompile(`^\s*([[:xdigit:]]+)\s+(.)\s+(.*)`)
29	objdumpAsmOutputRE    = regexp.MustCompile(`^\s*([[:xdigit:]]+):\s+(.*)`)
30	objdumpOutputFileLine = regexp.MustCompile(`^(.*):([0-9]+)`)
31	objdumpOutputFunction = regexp.MustCompile(`^(\S.*)\(\):`)
32)
33
34func findSymbols(syms []byte, file string, r *regexp.Regexp, address uint64) ([]*plugin.Sym, error) {
35	// Collect all symbols from the nm output, grouping names mapped to
36	// the same address into a single symbol.
37
38	// The symbols to return.
39	var symbols []*plugin.Sym
40
41	// The current group of symbol names, and the address they are all at.
42	names, start := []string{}, uint64(0)
43
44	buf := bytes.NewBuffer(syms)
45
46	for {
47		symAddr, name, err := nextSymbol(buf)
48		if err == io.EOF {
49			// Done. If there was an unfinished group, append it.
50			if len(names) != 0 {
51				if match := matchSymbol(names, start, symAddr-1, r, address); match != nil {
52					symbols = append(symbols, &plugin.Sym{Name: match, File: file, Start: start, End: symAddr - 1})
53				}
54			}
55
56			// And return the symbols.
57			return symbols, nil
58		}
59
60		if err != nil {
61			// There was some kind of serious error reading nm's output.
62			return nil, err
63		}
64
65		// If this symbol is at the same address as the current group, add it to the group.
66		if symAddr == start {
67			names = append(names, name)
68			continue
69		}
70
71		// Otherwise append the current group to the list of symbols.
72		if match := matchSymbol(names, start, symAddr-1, r, address); match != nil {
73			symbols = append(symbols, &plugin.Sym{Name: match, File: file, Start: start, End: symAddr - 1})
74		}
75
76		// And start a new group.
77		names, start = []string{name}, symAddr
78	}
79}
80
81// matchSymbol checks if a symbol is to be selected by checking its
82// name to the regexp and optionally its address. It returns the name(s)
83// to be used for the matched symbol, or nil if no match
84func matchSymbol(names []string, start, end uint64, r *regexp.Regexp, address uint64) []string {
85	if address != 0 && address >= start && address <= end {
86		return names
87	}
88	for _, name := range names {
89		if r == nil || r.MatchString(name) {
90			return []string{name}
91		}
92
93		// Match all possible demangled versions of the name.
94		for _, o := range [][]demangle.Option{
95			{demangle.NoClones},
96			{demangle.NoParams},
97			{demangle.NoParams, demangle.NoTemplateParams},
98		} {
99			if demangled, err := demangle.ToString(name, o...); err == nil && r.MatchString(demangled) {
100				return []string{demangled}
101			}
102		}
103	}
104	return nil
105}
106
107// disassemble parses the output of the objdump command and returns
108// the assembly instructions in a slice.
109func disassemble(asm []byte) ([]plugin.Inst, error) {
110	buf := bytes.NewBuffer(asm)
111	function, file, line := "", "", 0
112	var assembly []plugin.Inst
113	for {
114		input, err := buf.ReadString('\n')
115		if err != nil {
116			if err != io.EOF {
117				return nil, err
118			}
119			if input == "" {
120				break
121			}
122		}
123
124		if fields := objdumpAsmOutputRE.FindStringSubmatch(input); len(fields) == 3 {
125			if address, err := strconv.ParseUint(fields[1], 16, 64); err == nil {
126				assembly = append(assembly,
127					plugin.Inst{
128						Addr:     address,
129						Text:     fields[2],
130						Function: function,
131						File:     file,
132						Line:     line,
133					})
134				continue
135			}
136		}
137		if fields := objdumpOutputFileLine.FindStringSubmatch(input); len(fields) == 3 {
138			if l, err := strconv.ParseUint(fields[2], 10, 32); err == nil {
139				file, line = fields[1], int(l)
140			}
141			continue
142		}
143		if fields := objdumpOutputFunction.FindStringSubmatch(input); len(fields) == 2 {
144			function = fields[1]
145			continue
146		}
147		// Reset on unrecognized lines.
148		function, file, line = "", "", 0
149	}
150
151	return assembly, nil
152}
153
154// nextSymbol parses the nm output to find the next symbol listed.
155// Skips over any output it cannot recognize.
156func nextSymbol(buf *bytes.Buffer) (uint64, string, error) {
157	for {
158		line, err := buf.ReadString('\n')
159		if err != nil {
160			if err != io.EOF || line == "" {
161				return 0, "", err
162			}
163		}
164
165		if fields := nmOutputRE.FindStringSubmatch(line); len(fields) == 4 {
166			if address, err := strconv.ParseUint(fields[1], 16, 64); err == nil {
167				return address, fields[3], nil
168			}
169		}
170	}
171}
172