1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package objfile
6
7import (
8	"bufio"
9	"bytes"
10	"cmd/internal/src"
11	"container/list"
12	"debug/gosym"
13	"encoding/binary"
14	"fmt"
15	"io"
16	"io/ioutil"
17	"os"
18	"path/filepath"
19	"regexp"
20	"sort"
21	"strings"
22	"text/tabwriter"
23
24	"golang.org/x/arch/arm/armasm"
25	"golang.org/x/arch/arm64/arm64asm"
26	"golang.org/x/arch/ppc64/ppc64asm"
27	"golang.org/x/arch/x86/x86asm"
28)
29
30// Disasm is a disassembler for a given File.
31type Disasm struct {
32	syms      []Sym            //symbols in file, sorted by address
33	pcln      Liner            // pcln table
34	text      []byte           // bytes of text segment (actual instructions)
35	textStart uint64           // start PC of text
36	textEnd   uint64           // end PC of text
37	goarch    string           // GOARCH string
38	disasm    disasmFunc       // disassembler function for goarch
39	byteOrder binary.ByteOrder // byte order for goarch
40}
41
42// Disasm returns a disassembler for the file f.
43func (e *Entry) Disasm() (*Disasm, error) {
44	syms, err := e.Symbols()
45	if err != nil {
46		return nil, err
47	}
48
49	pcln, err := e.PCLineTable()
50	if err != nil {
51		return nil, err
52	}
53
54	textStart, textBytes, err := e.Text()
55	if err != nil {
56		return nil, err
57	}
58
59	goarch := e.GOARCH()
60	disasm := disasms[goarch]
61	byteOrder := byteOrders[goarch]
62	if disasm == nil || byteOrder == nil {
63		return nil, fmt.Errorf("unsupported architecture")
64	}
65
66	// Filter out section symbols, overwriting syms in place.
67	keep := syms[:0]
68	for _, sym := range syms {
69		switch sym.Name {
70		case "runtime.text", "text", "_text", "runtime.etext", "etext", "_etext":
71			// drop
72		default:
73			keep = append(keep, sym)
74		}
75	}
76	syms = keep
77	d := &Disasm{
78		syms:      syms,
79		pcln:      pcln,
80		text:      textBytes,
81		textStart: textStart,
82		textEnd:   textStart + uint64(len(textBytes)),
83		goarch:    goarch,
84		disasm:    disasm,
85		byteOrder: byteOrder,
86	}
87
88	return d, nil
89}
90
91// lookup finds the symbol name containing addr.
92func (d *Disasm) lookup(addr uint64) (name string, base uint64) {
93	i := sort.Search(len(d.syms), func(i int) bool { return addr < d.syms[i].Addr })
94	if i > 0 {
95		s := d.syms[i-1]
96		if s.Addr != 0 && s.Addr <= addr && addr < s.Addr+uint64(s.Size) {
97			return s.Name, s.Addr
98		}
99	}
100	return "", 0
101}
102
103// base returns the final element in the path.
104// It works on both Windows and Unix paths,
105// regardless of host operating system.
106func base(path string) string {
107	path = path[strings.LastIndex(path, "/")+1:]
108	path = path[strings.LastIndex(path, `\`)+1:]
109	return path
110}
111
112// CachedFile contains the content of a file split into lines.
113type CachedFile struct {
114	FileName string
115	Lines    [][]byte
116}
117
118// FileCache is a simple LRU cache of file contents.
119type FileCache struct {
120	files  *list.List
121	maxLen int
122}
123
124// NewFileCache returns a FileCache which can contain up to maxLen cached file contents.
125func NewFileCache(maxLen int) *FileCache {
126	return &FileCache{
127		files:  list.New(),
128		maxLen: maxLen,
129	}
130}
131
132// Line returns the source code line for the given file and line number.
133// If the file is not already cached, reads it, inserts it into the cache,
134// and removes the least recently used file if necessary.
135// If the file is in cache, it is moved to the front of the list.
136func (fc *FileCache) Line(filename string, line int) ([]byte, error) {
137	if filepath.Ext(filename) != ".go" {
138		return nil, nil
139	}
140
141	// Clean filenames returned by src.Pos.SymFilename()
142	// or src.PosBase.SymFilename() removing
143	// the leading src.FileSymPrefix.
144	filename = strings.TrimPrefix(filename, src.FileSymPrefix)
145
146	// Expand literal "$GOROOT" rewritten by obj.AbsFile()
147	filename = filepath.Clean(os.ExpandEnv(filename))
148
149	var cf *CachedFile
150	var e *list.Element
151
152	for e = fc.files.Front(); e != nil; e = e.Next() {
153		cf = e.Value.(*CachedFile)
154		if cf.FileName == filename {
155			break
156		}
157	}
158
159	if e == nil {
160		content, err := ioutil.ReadFile(filename)
161		if err != nil {
162			return nil, err
163		}
164
165		cf = &CachedFile{
166			FileName: filename,
167			Lines:    bytes.Split(content, []byte{'\n'}),
168		}
169		fc.files.PushFront(cf)
170
171		if fc.files.Len() >= fc.maxLen {
172			fc.files.Remove(fc.files.Back())
173		}
174	} else {
175		fc.files.MoveToFront(e)
176	}
177
178	// because //line directives can be out-of-range. (#36683)
179	if line-1 >= len(cf.Lines) || line-1 < 0 {
180		return nil, nil
181	}
182
183	return cf.Lines[line-1], nil
184}
185
186// Print prints a disassembly of the file to w.
187// If filter is non-nil, the disassembly only includes functions with names matching filter.
188// If printCode is true, the disassembly includs corresponding source lines.
189// The disassembly only includes functions that overlap the range [start, end).
190func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64, printCode bool, gnuAsm bool) {
191	if start < d.textStart {
192		start = d.textStart
193	}
194	if end > d.textEnd {
195		end = d.textEnd
196	}
197	printed := false
198	bw := bufio.NewWriter(w)
199
200	var fc *FileCache
201	if printCode {
202		fc = NewFileCache(8)
203	}
204
205	tw := tabwriter.NewWriter(bw, 18, 8, 1, '\t', tabwriter.StripEscape)
206	for _, sym := range d.syms {
207		symStart := sym.Addr
208		symEnd := sym.Addr + uint64(sym.Size)
209		relocs := sym.Relocs
210		if sym.Code != 'T' && sym.Code != 't' ||
211			symStart < d.textStart ||
212			symEnd <= start || end <= symStart ||
213			filter != nil && !filter.MatchString(sym.Name) {
214			continue
215		}
216		if printed {
217			fmt.Fprintf(bw, "\n")
218		}
219		printed = true
220
221		file, _, _ := d.pcln.PCToLine(sym.Addr)
222		fmt.Fprintf(bw, "TEXT %s(SB) %s\n", sym.Name, file)
223
224		if symEnd > end {
225			symEnd = end
226		}
227		code := d.text[:end-d.textStart]
228
229		var lastFile string
230		var lastLine int
231
232		d.Decode(symStart, symEnd, relocs, gnuAsm, func(pc, size uint64, file string, line int, text string) {
233			i := pc - d.textStart
234
235			if printCode {
236				if file != lastFile || line != lastLine {
237					if srcLine, err := fc.Line(file, line); err == nil {
238						fmt.Fprintf(tw, "%s%s%s\n", []byte{tabwriter.Escape}, srcLine, []byte{tabwriter.Escape})
239					}
240
241					lastFile, lastLine = file, line
242				}
243
244				fmt.Fprintf(tw, "  %#x\t", pc)
245			} else {
246				fmt.Fprintf(tw, "  %s:%d\t%#x\t", base(file), line, pc)
247			}
248
249			if size%4 != 0 || d.goarch == "386" || d.goarch == "amd64" {
250				// Print instruction as bytes.
251				fmt.Fprintf(tw, "%x", code[i:i+size])
252			} else {
253				// Print instruction as 32-bit words.
254				for j := uint64(0); j < size; j += 4 {
255					if j > 0 {
256						fmt.Fprintf(tw, " ")
257					}
258					fmt.Fprintf(tw, "%08x", d.byteOrder.Uint32(code[i+j:]))
259				}
260			}
261			fmt.Fprintf(tw, "\t%s\t\n", text)
262		})
263		tw.Flush()
264	}
265	bw.Flush()
266}
267
268// Decode disassembles the text segment range [start, end), calling f for each instruction.
269func (d *Disasm) Decode(start, end uint64, relocs []Reloc, gnuAsm bool, f func(pc, size uint64, file string, line int, text string)) {
270	if start < d.textStart {
271		start = d.textStart
272	}
273	if end > d.textEnd {
274		end = d.textEnd
275	}
276	code := d.text[:end-d.textStart]
277	lookup := d.lookup
278	for pc := start; pc < end; {
279		i := pc - d.textStart
280		text, size := d.disasm(code[i:], pc, lookup, d.byteOrder, gnuAsm)
281		file, line, _ := d.pcln.PCToLine(pc)
282		sep := "\t"
283		for len(relocs) > 0 && relocs[0].Addr < i+uint64(size) {
284			text += sep + relocs[0].Stringer.String(pc-start)
285			sep = " "
286			relocs = relocs[1:]
287		}
288		f(pc, uint64(size), file, line, text)
289		pc += uint64(size)
290	}
291}
292
293type lookupFunc = func(addr uint64) (sym string, base uint64)
294type disasmFunc func(code []byte, pc uint64, lookup lookupFunc, ord binary.ByteOrder, _ bool) (text string, size int)
295
296func disasm_386(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) {
297	return disasm_x86(code, pc, lookup, 32, gnuAsm)
298}
299
300func disasm_amd64(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) {
301	return disasm_x86(code, pc, lookup, 64, gnuAsm)
302}
303
304func disasm_x86(code []byte, pc uint64, lookup lookupFunc, arch int, gnuAsm bool) (string, int) {
305	inst, err := x86asm.Decode(code, arch)
306	var text string
307	size := inst.Len
308	if err != nil || size == 0 || inst.Op == 0 {
309		size = 1
310		text = "?"
311	} else {
312		if gnuAsm {
313			text = fmt.Sprintf("%-36s // %s", x86asm.GoSyntax(inst, pc, lookup), x86asm.GNUSyntax(inst, pc, nil))
314		} else {
315			text = x86asm.GoSyntax(inst, pc, lookup)
316		}
317	}
318	return text, size
319}
320
321type textReader struct {
322	code []byte
323	pc   uint64
324}
325
326func (r textReader) ReadAt(data []byte, off int64) (n int, err error) {
327	if off < 0 || uint64(off) < r.pc {
328		return 0, io.EOF
329	}
330	d := uint64(off) - r.pc
331	if d >= uint64(len(r.code)) {
332		return 0, io.EOF
333	}
334	n = copy(data, r.code[d:])
335	if n < len(data) {
336		err = io.ErrUnexpectedEOF
337	}
338	return
339}
340
341func disasm_arm(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) {
342	inst, err := armasm.Decode(code, armasm.ModeARM)
343	var text string
344	size := inst.Len
345	if err != nil || size == 0 || inst.Op == 0 {
346		size = 4
347		text = "?"
348	} else if gnuAsm {
349		text = fmt.Sprintf("%-36s // %s", armasm.GoSyntax(inst, pc, lookup, textReader{code, pc}), armasm.GNUSyntax(inst))
350	} else {
351		text = armasm.GoSyntax(inst, pc, lookup, textReader{code, pc})
352	}
353	return text, size
354}
355
356func disasm_arm64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) {
357	inst, err := arm64asm.Decode(code)
358	var text string
359	if err != nil || inst.Op == 0 {
360		text = "?"
361	} else if gnuAsm {
362		text = fmt.Sprintf("%-36s // %s", arm64asm.GoSyntax(inst, pc, lookup, textReader{code, pc}), arm64asm.GNUSyntax(inst))
363	} else {
364		text = arm64asm.GoSyntax(inst, pc, lookup, textReader{code, pc})
365	}
366	return text, 4
367}
368
369func disasm_ppc64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) {
370	inst, err := ppc64asm.Decode(code, byteOrder)
371	var text string
372	size := inst.Len
373	if err != nil || size == 0 {
374		size = 4
375		text = "?"
376	} else {
377		if gnuAsm {
378			text = fmt.Sprintf("%-36s // %s", ppc64asm.GoSyntax(inst, pc, lookup), ppc64asm.GNUSyntax(inst, pc))
379		} else {
380			text = ppc64asm.GoSyntax(inst, pc, lookup)
381		}
382	}
383	return text, size
384}
385
386var disasms = map[string]disasmFunc{
387	"386":     disasm_386,
388	"amd64":   disasm_amd64,
389	"arm":     disasm_arm,
390	"arm64":   disasm_arm64,
391	"ppc64":   disasm_ppc64,
392	"ppc64le": disasm_ppc64,
393}
394
395var byteOrders = map[string]binary.ByteOrder{
396	"386":     binary.LittleEndian,
397	"amd64":   binary.LittleEndian,
398	"arm":     binary.LittleEndian,
399	"arm64":   binary.LittleEndian,
400	"ppc64":   binary.BigEndian,
401	"ppc64le": binary.LittleEndian,
402	"s390x":   binary.BigEndian,
403}
404
405type Liner interface {
406	// Given a pc, returns the corresponding file, line, and function data.
407	// If unknown, returns "",0,nil.
408	PCToLine(uint64) (string, int, *gosym.Func)
409}
410