1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package report
16
17// This file contains routines related to the generation of annotated
18// source listings.
19
20import (
21	"bufio"
22	"fmt"
23	"html/template"
24	"io"
25	"os"
26	"path/filepath"
27	"strconv"
28	"strings"
29
30	"github.com/google/pprof/internal/graph"
31	"github.com/google/pprof/internal/plugin"
32)
33
34// printSource prints an annotated source listing, include all
35// functions with samples that match the regexp rpt.options.symbol.
36// The sources are sorted by function name and then by filename to
37// eliminate potential nondeterminism.
38func printSource(w io.Writer, rpt *Report) error {
39	o := rpt.options
40	g := rpt.newGraph(nil)
41
42	// Identify all the functions that match the regexp provided.
43	// Group nodes for each matching function.
44	var functions graph.Nodes
45	functionNodes := make(map[string]graph.Nodes)
46	for _, n := range g.Nodes {
47		if !o.Symbol.MatchString(n.Info.Name) {
48			continue
49		}
50		if functionNodes[n.Info.Name] == nil {
51			functions = append(functions, n)
52		}
53		functionNodes[n.Info.Name] = append(functionNodes[n.Info.Name], n)
54	}
55	functions.Sort(graph.NameOrder)
56
57	sourcePath := o.SourcePath
58	if sourcePath == "" {
59		wd, err := os.Getwd()
60		if err != nil {
61			return fmt.Errorf("Could not stat current dir: %v", err)
62		}
63		sourcePath = wd
64	}
65
66	fmt.Fprintf(w, "Total: %s\n", rpt.formatValue(rpt.total))
67	for _, fn := range functions {
68		name := fn.Info.Name
69
70		// Identify all the source files associated to this function.
71		// Group nodes for each source file.
72		var sourceFiles graph.Nodes
73		fileNodes := make(map[string]graph.Nodes)
74		for _, n := range functionNodes[name] {
75			if n.Info.File == "" {
76				continue
77			}
78			if fileNodes[n.Info.File] == nil {
79				sourceFiles = append(sourceFiles, n)
80			}
81			fileNodes[n.Info.File] = append(fileNodes[n.Info.File], n)
82		}
83
84		if len(sourceFiles) == 0 {
85			fmt.Fprintf(w, "No source information for %s\n", name)
86			continue
87		}
88
89		sourceFiles.Sort(graph.FileOrder)
90
91		// Print each file associated with this function.
92		for _, fl := range sourceFiles {
93			filename := fl.Info.File
94			fns := fileNodes[filename]
95			flatSum, cumSum := fns.Sum()
96
97			fnodes, _, err := getSourceFromFile(filename, sourcePath, fns, 0, 0)
98			fmt.Fprintf(w, "ROUTINE ======================== %s in %s\n", name, filename)
99			fmt.Fprintf(w, "%10s %10s (flat, cum) %s of Total\n",
100				rpt.formatValue(flatSum), rpt.formatValue(cumSum),
101				percentage(cumSum, rpt.total))
102
103			if err != nil {
104				fmt.Fprintf(w, " Error: %v\n", err)
105				continue
106			}
107
108			for _, fn := range fnodes {
109				fmt.Fprintf(w, "%10s %10s %6d:%s\n", valueOrDot(fn.Flat, rpt), valueOrDot(fn.Cum, rpt), fn.Info.Lineno, fn.Info.Name)
110			}
111		}
112	}
113	return nil
114}
115
116// printWebSource prints an annotated source listing, include all
117// functions with samples that match the regexp rpt.options.symbol.
118func printWebSource(w io.Writer, rpt *Report, obj plugin.ObjTool) error {
119	o := rpt.options
120	g := rpt.newGraph(nil)
121
122	// If the regexp source can be parsed as an address, also match
123	// functions that land on that address.
124	var address *uint64
125	if hex, err := strconv.ParseUint(o.Symbol.String(), 0, 64); err == nil {
126		address = &hex
127	}
128
129	sourcePath := o.SourcePath
130	if sourcePath == "" {
131		wd, err := os.Getwd()
132		if err != nil {
133			return fmt.Errorf("Could not stat current dir: %v", err)
134		}
135		sourcePath = wd
136	}
137
138	type fileFunction struct {
139		fileName, functionName string
140	}
141
142	// Extract interesting symbols from binary files in the profile and
143	// classify samples per symbol.
144	symbols := symbolsFromBinaries(rpt.prof, g, o.Symbol, address, obj)
145	symNodes := nodesPerSymbol(g.Nodes, symbols)
146
147	// Identify sources associated to a symbol by examining
148	// symbol samples. Classify samples per source file.
149	fileNodes := make(map[fileFunction]graph.Nodes)
150	if len(symNodes) == 0 {
151		for _, n := range g.Nodes {
152			if n.Info.File == "" || !o.Symbol.MatchString(n.Info.Name) {
153				continue
154			}
155			ff := fileFunction{n.Info.File, n.Info.Name}
156			fileNodes[ff] = append(fileNodes[ff], n)
157		}
158	} else {
159		for _, nodes := range symNodes {
160			for _, n := range nodes {
161				if n.Info.File != "" {
162					ff := fileFunction{n.Info.File, n.Info.Name}
163					fileNodes[ff] = append(fileNodes[ff], n)
164				}
165			}
166		}
167	}
168
169	if len(fileNodes) == 0 {
170		return fmt.Errorf("No source information for %s\n", o.Symbol.String())
171	}
172
173	sourceFiles := make(graph.Nodes, 0, len(fileNodes))
174	for _, nodes := range fileNodes {
175		sNode := *nodes[0]
176		sNode.Flat, sNode.Cum = nodes.Sum()
177		sourceFiles = append(sourceFiles, &sNode)
178	}
179	sourceFiles.Sort(graph.FileOrder)
180
181	// Print each file associated with this function.
182	printHeader(w, rpt)
183	for _, n := range sourceFiles {
184		ff := fileFunction{n.Info.File, n.Info.Name}
185		fns := fileNodes[ff]
186
187		asm := assemblyPerSourceLine(symbols, fns, ff.fileName, obj)
188		start, end := sourceCoordinates(asm)
189
190		fnodes, path, err := getSourceFromFile(ff.fileName, sourcePath, fns, start, end)
191		if err != nil {
192			fnodes, path = getMissingFunctionSource(ff.fileName, asm, start, end)
193		}
194
195		printFunctionHeader(w, ff.functionName, path, n.Flat, n.Cum, rpt)
196		for _, fn := range fnodes {
197			printFunctionSourceLine(w, fn, asm[fn.Info.Lineno], rpt)
198		}
199		printFunctionClosing(w)
200	}
201	printPageClosing(w)
202	return nil
203}
204
205// sourceCoordinates returns the lowest and highest line numbers from
206// a set of assembly statements.
207func sourceCoordinates(asm map[int][]assemblyInstruction) (start, end int) {
208	for l := range asm {
209		if start == 0 || l < start {
210			start = l
211		}
212		if end == 0 || l > end {
213			end = l
214		}
215	}
216	return start, end
217}
218
219// assemblyPerSourceLine disassembles the binary containing a symbol
220// and classifies the assembly instructions according to its
221// corresponding source line, annotating them with a set of samples.
222func assemblyPerSourceLine(objSyms []*objSymbol, rs graph.Nodes, src string, obj plugin.ObjTool) map[int][]assemblyInstruction {
223	assembly := make(map[int][]assemblyInstruction)
224	// Identify symbol to use for this collection of samples.
225	o := findMatchingSymbol(objSyms, rs)
226	if o == nil {
227		return assembly
228	}
229
230	// Extract assembly for matched symbol
231	insts, err := obj.Disasm(o.sym.File, o.sym.Start, o.sym.End)
232	if err != nil {
233		return assembly
234	}
235
236	srcBase := filepath.Base(src)
237	anodes := annotateAssembly(insts, rs, o.base)
238	var lineno = 0
239	for _, an := range anodes {
240		if filepath.Base(an.file) == srcBase {
241			lineno = an.line
242		}
243		if lineno != 0 {
244			assembly[lineno] = append(assembly[lineno], an)
245		}
246	}
247
248	return assembly
249}
250
251// findMatchingSymbol looks for the symbol that corresponds to a set
252// of samples, by comparing their addresses.
253func findMatchingSymbol(objSyms []*objSymbol, ns graph.Nodes) *objSymbol {
254	for _, n := range ns {
255		for _, o := range objSyms {
256			if filepath.Base(o.sym.File) == filepath.Base(n.Info.Objfile) &&
257				o.sym.Start <= n.Info.Address-o.base &&
258				n.Info.Address-o.base <= o.sym.End {
259				return o
260			}
261		}
262	}
263	return nil
264}
265
266// printHeader prints the page header for a weblist report.
267func printHeader(w io.Writer, rpt *Report) {
268	fmt.Fprintln(w, weblistPageHeader)
269
270	var labels []string
271	for _, l := range ProfileLabels(rpt) {
272		labels = append(labels, template.HTMLEscapeString(l))
273	}
274
275	fmt.Fprintf(w, `<div class="legend">%s<br>Total: %s</div>`,
276		strings.Join(labels, "<br>\n"),
277		rpt.formatValue(rpt.total),
278	)
279}
280
281// printFunctionHeader prints a function header for a weblist report.
282func printFunctionHeader(w io.Writer, name, path string, flatSum, cumSum int64, rpt *Report) {
283	fmt.Fprintf(w, `<h1>%s</h1>%s
284<pre onClick="pprof_toggle_asm(event)">
285  Total:  %10s %10s (flat, cum) %s
286`,
287		template.HTMLEscapeString(name), template.HTMLEscapeString(path),
288		rpt.formatValue(flatSum), rpt.formatValue(cumSum),
289		percentage(cumSum, rpt.total))
290}
291
292// printFunctionSourceLine prints a source line and the corresponding assembly.
293func printFunctionSourceLine(w io.Writer, fn *graph.Node, assembly []assemblyInstruction, rpt *Report) {
294	if len(assembly) == 0 {
295		fmt.Fprintf(w,
296			"<span class=line> %6d</span> <span class=nop>  %10s %10s %s </span>\n",
297			fn.Info.Lineno,
298			valueOrDot(fn.Flat, rpt), valueOrDot(fn.Cum, rpt),
299			template.HTMLEscapeString(fn.Info.Name))
300		return
301	}
302
303	fmt.Fprintf(w,
304		"<span class=line> %6d</span> <span class=deadsrc>  %10s %10s %s </span>",
305		fn.Info.Lineno,
306		valueOrDot(fn.Flat, rpt), valueOrDot(fn.Cum, rpt),
307		template.HTMLEscapeString(fn.Info.Name))
308	fmt.Fprint(w, "<span class=asm>")
309	for _, an := range assembly {
310		var fileline string
311		class := "disasmloc"
312		if an.file != "" {
313			fileline = fmt.Sprintf("%s:%d", template.HTMLEscapeString(an.file), an.line)
314			if an.line != fn.Info.Lineno {
315				class = "unimportant"
316			}
317		}
318		flat, cum := an.flat, an.cum
319		if an.flatDiv != 0 {
320			flat = flat / an.flatDiv
321		}
322		if an.cumDiv != 0 {
323			cum = cum / an.cumDiv
324		}
325		fmt.Fprintf(w, " %8s %10s %10s %8x: %-48s <span class=%s>%s</span>\n", "",
326			valueOrDot(flat, rpt), valueOrDot(cum, rpt),
327			an.address,
328			template.HTMLEscapeString(an.instruction),
329			class,
330			template.HTMLEscapeString(fileline))
331	}
332	fmt.Fprintln(w, "</span>")
333}
334
335// printFunctionClosing prints the end of a function in a weblist report.
336func printFunctionClosing(w io.Writer) {
337	fmt.Fprintln(w, "</pre>")
338}
339
340// printPageClosing prints the end of the page in a weblist report.
341func printPageClosing(w io.Writer) {
342	fmt.Fprintln(w, weblistPageClosing)
343}
344
345// getSourceFromFile collects the sources of a function from a source
346// file and annotates it with the samples in fns. Returns the sources
347// as nodes, using the info.name field to hold the source code.
348func getSourceFromFile(file, sourcePath string, fns graph.Nodes, start, end int) (graph.Nodes, string, error) {
349	file = trimPath(file)
350	f, err := openSourceFile(file, sourcePath)
351	if err != nil {
352		return nil, file, err
353	}
354
355	lineNodes := make(map[int]graph.Nodes)
356	// Collect source coordinates from profile.
357	const margin = 5 // Lines before first/after last sample.
358	if start == 0 {
359		if fns[0].Info.StartLine != 0 {
360			start = fns[0].Info.StartLine
361		} else {
362			start = fns[0].Info.Lineno - margin
363		}
364	} else {
365		start -= margin
366	}
367	if end == 0 {
368		end = fns[0].Info.Lineno
369	}
370	end += margin
371	for _, n := range fns {
372		lineno := n.Info.Lineno
373		nodeStart := n.Info.StartLine
374		if nodeStart == 0 {
375			nodeStart = lineno - margin
376		}
377		nodeEnd := lineno + margin
378		if nodeStart < start {
379			start = nodeStart
380		} else if nodeEnd > end {
381			end = nodeEnd
382		}
383		lineNodes[lineno] = append(lineNodes[lineno], n)
384	}
385
386	var src graph.Nodes
387	buf := bufio.NewReader(f)
388	lineno := 1
389	for {
390		line, err := buf.ReadString('\n')
391		if err != nil {
392			if err != io.EOF {
393				return nil, file, err
394			}
395			if line == "" {
396				break
397			}
398		}
399		if lineno >= start {
400			flat, cum := lineNodes[lineno].Sum()
401
402			src = append(src, &graph.Node{
403				Info: graph.NodeInfo{
404					Name:   strings.TrimRight(line, "\n"),
405					Lineno: lineno,
406				},
407				Flat: flat,
408				Cum:  cum,
409			})
410		}
411		lineno++
412		if lineno > end {
413			break
414		}
415	}
416	return src, file, nil
417}
418
419// getMissingFunctionSource creates a dummy function body to point to
420// the source file and annotates it with the samples in asm.
421func getMissingFunctionSource(filename string, asm map[int][]assemblyInstruction, start, end int) (graph.Nodes, string) {
422	var fnodes graph.Nodes
423	for i := start; i <= end; i++ {
424		insts := asm[i]
425		if len(insts) == 0 {
426			continue
427		}
428		var group assemblyInstruction
429		for _, insn := range insts {
430			group.flat += insn.flat
431			group.cum += insn.cum
432			group.flatDiv += insn.flatDiv
433			group.cumDiv += insn.cumDiv
434		}
435		flat := group.flatValue()
436		cum := group.cumValue()
437		fnodes = append(fnodes, &graph.Node{
438			Info: graph.NodeInfo{
439				Name:   "???",
440				Lineno: i,
441			},
442			Flat: flat,
443			Cum:  cum,
444		})
445	}
446	return fnodes, filename
447}
448
449// openSourceFile opens a source file from a name encoded in a
450// profile. File names in a profile after often relative paths, so
451// search them in each of the paths in searchPath (or CWD by default),
452// and their parents.
453func openSourceFile(path, searchPath string) (*os.File, error) {
454	if filepath.IsAbs(path) {
455		f, err := os.Open(path)
456		return f, err
457	}
458
459	// Scan each component of the path
460	for _, dir := range strings.Split(searchPath, ":") {
461		// Search up for every parent of each possible path.
462		for {
463			filename := filepath.Join(dir, path)
464			if f, err := os.Open(filename); err == nil {
465				return f, nil
466			}
467			parent := filepath.Dir(dir)
468			if parent == dir {
469				break
470			}
471			dir = parent
472		}
473	}
474
475	return nil, fmt.Errorf("Could not find file %s on path %s", path, searchPath)
476}
477
478// trimPath cleans up a path by removing prefixes that are commonly
479// found on profiles.
480func trimPath(path string) string {
481	basePaths := []string{
482		"/proc/self/cwd/./",
483		"/proc/self/cwd/",
484	}
485
486	sPath := filepath.ToSlash(path)
487
488	for _, base := range basePaths {
489		if strings.HasPrefix(sPath, base) {
490			return filepath.FromSlash(sPath[len(base):])
491		}
492	}
493	return path
494}
495