1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package binutils
16
17import (
18	"bufio"
19	"fmt"
20	"io"
21	"os/exec"
22	"strconv"
23	"strings"
24	"sync"
25
26	"github.com/google/pprof/internal/plugin"
27)
28
29const (
30	defaultLLVMSymbolizer = "llvm-symbolizer"
31)
32
33// llvmSymbolizer is a connection to an llvm-symbolizer command for
34// obtaining address and line number information from a binary.
35type llvmSymbolizer struct {
36	sync.Mutex
37	filename string
38	rw       lineReaderWriter
39	base     uint64
40}
41
42type llvmSymbolizerJob struct {
43	cmd *exec.Cmd
44	in  io.WriteCloser
45	out *bufio.Reader
46	// llvm-symbolizer requires the symbol type, CODE or DATA, for symbolization.
47	symType string
48}
49
50func (a *llvmSymbolizerJob) write(s string) error {
51	_, err := fmt.Fprintln(a.in, a.symType, s)
52	return err
53}
54
55func (a *llvmSymbolizerJob) readLine() (string, error) {
56	s, err := a.out.ReadString('\n')
57	if err != nil {
58		return "", err
59	}
60	return strings.TrimSpace(s), nil
61}
62
63// close releases any resources used by the llvmSymbolizer object.
64func (a *llvmSymbolizerJob) close() {
65	a.in.Close()
66	a.cmd.Wait()
67}
68
69// newLlvmSymbolizer starts the given llvmSymbolizer command reporting
70// information about the given executable file. If file is a shared
71// library, base should be the address at which it was mapped in the
72// program under consideration.
73func newLLVMSymbolizer(cmd, file string, base uint64, isData bool) (*llvmSymbolizer, error) {
74	if cmd == "" {
75		cmd = defaultLLVMSymbolizer
76	}
77
78	j := &llvmSymbolizerJob{
79		cmd:     exec.Command(cmd, "--inlining", "-demangle=false"),
80		symType: "CODE",
81	}
82	if isData {
83		j.symType = "DATA"
84	}
85
86	var err error
87	if j.in, err = j.cmd.StdinPipe(); err != nil {
88		return nil, err
89	}
90
91	outPipe, err := j.cmd.StdoutPipe()
92	if err != nil {
93		return nil, err
94	}
95
96	j.out = bufio.NewReader(outPipe)
97	if err := j.cmd.Start(); err != nil {
98		return nil, err
99	}
100
101	a := &llvmSymbolizer{
102		filename: file,
103		rw:       j,
104		base:     base,
105	}
106
107	return a, nil
108}
109
110// readFrame parses the llvm-symbolizer output for a single address. It
111// returns a populated plugin.Frame and whether it has reached the end of the
112// data.
113func (d *llvmSymbolizer) readFrame() (plugin.Frame, bool) {
114	funcname, err := d.rw.readLine()
115	if err != nil {
116		return plugin.Frame{}, true
117	}
118
119	switch funcname {
120	case "":
121		return plugin.Frame{}, true
122	case "??":
123		funcname = ""
124	}
125
126	fileline, err := d.rw.readLine()
127	if err != nil {
128		return plugin.Frame{Func: funcname}, true
129	}
130
131	linenumber := 0
132	// The llvm-symbolizer outputs the <file_name>:<line_number>:<column_number>.
133	// When it cannot identify the source code location, it outputs "??:0:0".
134	// Older versions output just the filename and line number, so we check for
135	// both conditions here.
136	if fileline == "??:0" || fileline == "??:0:0" {
137		fileline = ""
138	} else {
139		switch split := strings.Split(fileline, ":"); len(split) {
140		case 1:
141			// filename
142			fileline = split[0]
143		case 2, 3:
144			// filename:line , or
145			// filename:line:disc , or
146			fileline = split[0]
147			if line, err := strconv.Atoi(split[1]); err == nil {
148				linenumber = line
149			}
150		default:
151			// Unrecognized, ignore
152		}
153	}
154
155	return plugin.Frame{Func: funcname, File: fileline, Line: linenumber}, false
156}
157
158// addrInfo returns the stack frame information for a specific program
159// address. It returns nil if the address could not be identified.
160func (d *llvmSymbolizer) addrInfo(addr uint64) ([]plugin.Frame, error) {
161	d.Lock()
162	defer d.Unlock()
163
164	if err := d.rw.write(fmt.Sprintf("%s 0x%x", d.filename, addr-d.base)); err != nil {
165		return nil, err
166	}
167
168	var stack []plugin.Frame
169	for {
170		frame, end := d.readFrame()
171		if end {
172			break
173		}
174
175		if frame != (plugin.Frame{}) {
176			stack = append(stack, frame)
177		}
178	}
179
180	return stack, nil
181}
182