1// Copyright 2018 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// DWARF debug information entry parser.
16// An entry is a sequence of data items of a given format.
17// The first word in the entry is an index into what DWARF
18// calls the ``abbreviation table.''  An abbreviation is really
19// just a type descriptor: it's an array of attribute tag/value format pairs.
20
21package dwarf
22
23import (
24	"errors"
25	"strconv"
26)
27
28// a single entry's description: a sequence of attributes
29type abbrev struct {
30	tag      Tag
31	children bool
32	field    []afield
33}
34
35type afield struct {
36	attr Attr
37	fmt  format
38}
39
40// a map from entry format ids to their descriptions
41type abbrevTable map[uint32]abbrev
42
43// ParseAbbrev returns the abbreviation table that starts at byte off
44// in the .debug_abbrev section.
45func (d *Data) parseAbbrev(off uint32) (abbrevTable, error) {
46	if m, ok := d.abbrevCache[off]; ok {
47		return m, nil
48	}
49
50	data := d.abbrev
51	if off > uint32(len(data)) {
52		data = nil
53	} else {
54		data = data[off:]
55	}
56	b := makeBuf(d, unknownFormat{}, "abbrev", 0, data)
57
58	// Error handling is simplified by the buf getters
59	// returning an endless stream of 0s after an error.
60	m := make(abbrevTable)
61	for {
62		// Table ends with id == 0.
63		id := uint32(b.uint())
64		if id == 0 {
65			break
66		}
67
68		// Walk over attributes, counting.
69		n := 0
70		b1 := b // Read from copy of b.
71		b1.uint()
72		b1.uint8()
73		for {
74			tag := b1.uint()
75			fmt := b1.uint()
76			if tag == 0 && fmt == 0 {
77				break
78			}
79			n++
80		}
81		if b1.err != nil {
82			return nil, b1.err
83		}
84
85		// Walk over attributes again, this time writing them down.
86		var a abbrev
87		a.tag = Tag(b.uint())
88		a.children = b.uint8() != 0
89		a.field = make([]afield, n)
90		for i := range a.field {
91			a.field[i].attr = Attr(b.uint())
92			a.field[i].fmt = format(b.uint())
93		}
94		b.uint()
95		b.uint()
96
97		m[id] = a
98	}
99	if b.err != nil {
100		return nil, b.err
101	}
102	d.abbrevCache[off] = m
103	return m, nil
104}
105
106// An entry is a sequence of attribute/value pairs.
107type Entry struct {
108	Offset   Offset // offset of Entry in DWARF info
109	Tag      Tag    // tag (kind of Entry)
110	Children bool   // whether Entry is followed by children
111	Field    []Field
112}
113
114// A Field is a single attribute/value pair in an Entry.
115type Field struct {
116	Attr Attr
117	Val  interface{}
118}
119
120// Val returns the value associated with attribute Attr in Entry,
121// or nil if there is no such attribute.
122//
123// A common idiom is to merge the check for nil return with
124// the check that the value has the expected dynamic type, as in:
125//	v, ok := e.Val(AttrSibling).(int64);
126//
127func (e *Entry) Val(a Attr) interface{} {
128	for _, f := range e.Field {
129		if f.Attr == a {
130			return f.Val
131		}
132	}
133	return nil
134}
135
136// An Offset represents the location of an Entry within the DWARF info.
137// (See Reader.Seek.)
138type Offset uint32
139
140// Entry reads a single entry from buf, decoding
141// according to the given abbreviation table.
142func (b *buf) entry(atab abbrevTable, ubase Offset) *Entry {
143	off := b.off
144	id := uint32(b.uint())
145	if id == 0 {
146		return &Entry{}
147	}
148	a, ok := atab[id]
149	if !ok {
150		b.error("unknown abbreviation table index")
151		return nil
152	}
153	e := &Entry{
154		Offset:   off,
155		Tag:      a.tag,
156		Children: a.children,
157		Field:    make([]Field, len(a.field)),
158	}
159	for i := range e.Field {
160		e.Field[i].Attr = a.field[i].attr
161		fmt := a.field[i].fmt
162		if fmt == formIndirect {
163			fmt = format(b.uint())
164		}
165		var val interface{}
166		switch fmt {
167		default:
168			b.error("unknown entry attr format 0x" + strconv.FormatInt(int64(fmt), 16))
169
170		// address
171		case formAddr:
172			val = b.addr()
173
174		// block
175		case formDwarfBlock1:
176			val = b.bytes(int(b.uint8()))
177		case formDwarfBlock2:
178			val = b.bytes(int(b.uint16()))
179		case formDwarfBlock4:
180			val = b.bytes(int(b.uint32()))
181		case formDwarfBlock:
182			val = b.bytes(int(b.uint()))
183
184		// constant
185		case formData1:
186			val = int64(b.uint8())
187		case formData2:
188			val = int64(b.uint16())
189		case formData4:
190			val = int64(b.uint32())
191		case formData8:
192			val = int64(b.uint64())
193		case formSdata:
194			val = int64(b.int())
195		case formUdata:
196			val = int64(b.uint())
197
198		// flag
199		case formFlag:
200			val = b.uint8() == 1
201		// New in DWARF 4.
202		case formFlagPresent:
203			// The attribute is implicitly indicated as present, and no value is
204			// encoded in the debugging information entry itself.
205			val = true
206
207		// reference to other entry
208		case formRefAddr:
209			vers := b.format.version()
210			if vers == 0 {
211				b.error("unknown version for DW_FORM_ref_addr")
212			} else if vers == 2 {
213				val = Offset(b.addr())
214			} else {
215				is64, known := b.format.dwarf64()
216				if !known {
217					b.error("unknown size for DW_FORM_ref_addr")
218				} else if is64 {
219					val = Offset(b.uint64())
220				} else {
221					val = Offset(b.uint32())
222				}
223			}
224		case formRef1:
225			val = Offset(b.uint8()) + ubase
226		case formRef2:
227			val = Offset(b.uint16()) + ubase
228		case formRef4:
229			val = Offset(b.uint32()) + ubase
230		case formRef8:
231			val = Offset(b.uint64()) + ubase
232		case formRefUdata:
233			val = Offset(b.uint()) + ubase
234
235		// string
236		case formString:
237			val = b.string()
238		case formStrp:
239			off := b.uint32() // offset into .debug_str
240			if b.err != nil {
241				return nil
242			}
243			b1 := makeBuf(b.dwarf, unknownFormat{}, "str", 0, b.dwarf.str)
244			b1.skip(int(off))
245			val = b1.string()
246			if b1.err != nil {
247				b.err = b1.err
248				return nil
249			}
250
251		// lineptr, loclistptr, macptr, rangelistptr
252		// New in DWARF 4, but clang can generate them with -gdwarf-2.
253		// Section reference, replacing use of formData4 and formData8.
254		case formSecOffset, formGnuRefAlt, formGnuStrpAlt:
255			is64, known := b.format.dwarf64()
256			if !known {
257				b.error("unknown size for form 0x" + strconv.FormatInt(int64(fmt), 16))
258			} else if is64 {
259				val = int64(b.uint64())
260			} else {
261				val = int64(b.uint32())
262			}
263
264		// exprloc
265		// New in DWARF 4.
266		case formExprloc:
267			val = b.bytes(int(b.uint()))
268
269		// reference
270		// New in DWARF 4.
271		case formRefSig8:
272			// 64-bit type signature.
273			val = b.uint64()
274		}
275		e.Field[i].Val = val
276	}
277	if b.err != nil {
278		return nil
279	}
280	return e
281}
282
283// A Reader allows reading Entry structures from a DWARF ``info'' section.
284// The Entry structures are arranged in a tree.  The Reader's Next function
285// return successive entries from a pre-order traversal of the tree.
286// If an entry has children, its Children field will be true, and the children
287// follow, terminated by an Entry with Tag 0.
288type Reader struct {
289	b            buf
290	d            *Data
291	err          error
292	unit         int
293	lastChildren bool   // .Children of last entry returned by Next
294	lastSibling  Offset // .Val(AttrSibling) of last entry returned by Next
295}
296
297// Reader returns a new Reader for Data.
298// The reader is positioned at byte offset 0 in the DWARF ``info'' section.
299func (d *Data) Reader() *Reader {
300	r := &Reader{d: d}
301	r.Seek(0)
302	return r
303}
304
305// AddressSize returns the size in bytes of addresses in the current compilation
306// unit.
307func (r *Reader) AddressSize() int {
308	return r.d.unit[r.unit].asize
309}
310
311// Seek positions the Reader at offset off in the encoded entry stream.
312// Offset 0 can be used to denote the first entry.
313func (r *Reader) Seek(off Offset) {
314	d := r.d
315	r.err = nil
316	r.lastChildren = false
317	if off == 0 {
318		if len(d.unit) == 0 {
319			return
320		}
321		u := &d.unit[0]
322		r.unit = 0
323		r.b = makeBuf(r.d, u, "info", u.off, u.data)
324		return
325	}
326
327	// TODO(rsc): binary search (maybe a new package)
328	var i int
329	var u *unit
330	for i = range d.unit {
331		u = &d.unit[i]
332		if u.off <= off && off < u.off+Offset(len(u.data)) {
333			r.unit = i
334			r.b = makeBuf(r.d, u, "info", off, u.data[off-u.off:])
335			return
336		}
337	}
338	r.err = errors.New("offset out of range")
339}
340
341// maybeNextUnit advances to the next unit if this one is finished.
342func (r *Reader) maybeNextUnit() {
343	for len(r.b.data) == 0 && r.unit+1 < len(r.d.unit) {
344		r.unit++
345		u := &r.d.unit[r.unit]
346		r.b = makeBuf(r.d, u, "info", u.off, u.data)
347	}
348}
349
350// Next reads the next entry from the encoded entry stream.
351// It returns nil, nil when it reaches the end of the section.
352// It returns an error if the current offset is invalid or the data at the
353// offset cannot be decoded as a valid Entry.
354func (r *Reader) Next() (*Entry, error) {
355	if r.err != nil {
356		return nil, r.err
357	}
358	r.maybeNextUnit()
359	if len(r.b.data) == 0 {
360		return nil, nil
361	}
362	u := &r.d.unit[r.unit]
363	e := r.b.entry(u.atable, u.base)
364	if r.b.err != nil {
365		r.err = r.b.err
366		return nil, r.err
367	}
368	if e != nil {
369		r.lastChildren = e.Children
370		if r.lastChildren {
371			r.lastSibling, _ = e.Val(AttrSibling).(Offset)
372		}
373	} else {
374		r.lastChildren = false
375	}
376	return e, nil
377}
378
379// SkipChildren skips over the child entries associated with
380// the last Entry returned by Next.  If that Entry did not have
381// children or Next has not been called, SkipChildren is a no-op.
382func (r *Reader) SkipChildren() {
383	if r.err != nil || !r.lastChildren {
384		return
385	}
386
387	// If the last entry had a sibling attribute,
388	// that attribute gives the offset of the next
389	// sibling, so we can avoid decoding the
390	// child subtrees.
391	if r.lastSibling >= r.b.off {
392		r.Seek(r.lastSibling)
393		return
394	}
395
396	for {
397		e, err := r.Next()
398		if err != nil || e == nil || e.Tag == 0 {
399			break
400		}
401		if e.Children {
402			r.SkipChildren()
403		}
404	}
405}
406
407// clone returns a copy of the reader.  This is used by the typeReader
408// interface.
409func (r *Reader) clone() typeReader {
410	return r.d.Reader()
411}
412
413// offset returns the current buffer offset.  This is used by the
414// typeReader interface.
415func (r *Reader) offset() Offset {
416	return r.b.off
417}
418