1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// DWARF debug information entry parser.
6// An entry is a sequence of data items of a given format.
7// The first word in the entry is an index into what DWARF
8// calls the ``abbreviation table.''  An abbreviation is really
9// just a type descriptor: it's an array of attribute tag/value format pairs.
10
11package dwarf
12
13import (
14	"errors"
15	"strconv"
16)
17
18// a single entry's description: a sequence of attributes
19type abbrev struct {
20	tag      Tag
21	children bool
22	field    []afield
23}
24
25type afield struct {
26	attr  Attr
27	fmt   format
28	class Class
29}
30
31// a map from entry format ids to their descriptions
32type abbrevTable map[uint32]abbrev
33
34// ParseAbbrev returns the abbreviation table that starts at byte off
35// in the .debug_abbrev section.
36func (d *Data) parseAbbrev(off uint32, vers int) (abbrevTable, error) {
37	if m, ok := d.abbrevCache[off]; ok {
38		return m, nil
39	}
40
41	data := d.abbrev
42	if off > uint32(len(data)) {
43		data = nil
44	} else {
45		data = data[off:]
46	}
47	b := makeBuf(d, unknownFormat{}, "abbrev", 0, data)
48
49	// Error handling is simplified by the buf getters
50	// returning an endless stream of 0s after an error.
51	m := make(abbrevTable)
52	for {
53		// Table ends with id == 0.
54		id := uint32(b.uint())
55		if id == 0 {
56			break
57		}
58
59		// Walk over attributes, counting.
60		n := 0
61		b1 := b // Read from copy of b.
62		b1.uint()
63		b1.uint8()
64		for {
65			tag := b1.uint()
66			fmt := b1.uint()
67			if tag == 0 && fmt == 0 {
68				break
69			}
70			n++
71		}
72		if b1.err != nil {
73			return nil, b1.err
74		}
75
76		// Walk over attributes again, this time writing them down.
77		var a abbrev
78		a.tag = Tag(b.uint())
79		a.children = b.uint8() != 0
80		a.field = make([]afield, n)
81		for i := range a.field {
82			a.field[i].attr = Attr(b.uint())
83			a.field[i].fmt = format(b.uint())
84			a.field[i].class = formToClass(a.field[i].fmt, a.field[i].attr, vers, &b)
85		}
86		b.uint()
87		b.uint()
88
89		m[id] = a
90	}
91	if b.err != nil {
92		return nil, b.err
93	}
94	d.abbrevCache[off] = m
95	return m, nil
96}
97
98// attrIsExprloc indicates attributes that allow exprloc values that
99// are encoded as block values in DWARF 2 and 3. See DWARF 4, Figure
100// 20.
101var attrIsExprloc = map[Attr]bool{
102	AttrLocation:      true,
103	AttrByteSize:      true,
104	AttrBitOffset:     true,
105	AttrBitSize:       true,
106	AttrStringLength:  true,
107	AttrLowerBound:    true,
108	AttrReturnAddr:    true,
109	AttrStrideSize:    true,
110	AttrUpperBound:    true,
111	AttrCount:         true,
112	AttrDataMemberLoc: true,
113	AttrFrameBase:     true,
114	AttrSegment:       true,
115	AttrStaticLink:    true,
116	AttrUseLocation:   true,
117	AttrVtableElemLoc: true,
118	AttrAllocated:     true,
119	AttrAssociated:    true,
120	AttrDataLocation:  true,
121	AttrStride:        true,
122}
123
124// attrPtrClass indicates the *ptr class of attributes that have
125// encoding formSecOffset in DWARF 4 or formData* in DWARF 2 and 3.
126var attrPtrClass = map[Attr]Class{
127	AttrLocation:      ClassLocListPtr,
128	AttrStmtList:      ClassLinePtr,
129	AttrStringLength:  ClassLocListPtr,
130	AttrReturnAddr:    ClassLocListPtr,
131	AttrStartScope:    ClassRangeListPtr,
132	AttrDataMemberLoc: ClassLocListPtr,
133	AttrFrameBase:     ClassLocListPtr,
134	AttrMacroInfo:     ClassMacPtr,
135	AttrSegment:       ClassLocListPtr,
136	AttrStaticLink:    ClassLocListPtr,
137	AttrUseLocation:   ClassLocListPtr,
138	AttrVtableElemLoc: ClassLocListPtr,
139	AttrRanges:        ClassRangeListPtr,
140}
141
142// formToClass returns the DWARF 4 Class for the given form. If the
143// DWARF version is less then 4, it will disambiguate some forms
144// depending on the attribute.
145func formToClass(form format, attr Attr, vers int, b *buf) Class {
146	switch form {
147	default:
148		b.error("cannot determine class of unknown attribute form")
149		return 0
150
151	case formAddr:
152		return ClassAddress
153
154	case formDwarfBlock1, formDwarfBlock2, formDwarfBlock4, formDwarfBlock:
155		// In DWARF 2 and 3, ClassExprLoc was encoded as a
156		// block. DWARF 4 distinguishes ClassBlock and
157		// ClassExprLoc, but there are no attributes that can
158		// be both, so we also promote ClassBlock values in
159		// DWARF 4 that should be ClassExprLoc in case
160		// producers get this wrong.
161		if attrIsExprloc[attr] {
162			return ClassExprLoc
163		}
164		return ClassBlock
165
166	case formData1, formData2, formData4, formData8, formSdata, formUdata:
167		// In DWARF 2 and 3, ClassPtr was encoded as a
168		// constant. Unlike ClassExprLoc/ClassBlock, some
169		// DWARF 4 attributes need to distinguish Class*Ptr
170		// from ClassConstant, so we only do this promotion
171		// for versions 2 and 3.
172		if class, ok := attrPtrClass[attr]; vers < 4 && ok {
173			return class
174		}
175		return ClassConstant
176
177	case formFlag, formFlagPresent:
178		return ClassFlag
179
180	case formRefAddr, formRef1, formRef2, formRef4, formRef8, formRefUdata:
181		return ClassReference
182
183	case formRefSig8:
184		return ClassReferenceSig
185
186	case formString, formStrp:
187		return ClassString
188
189	case formSecOffset:
190		// DWARF 4 defines four *ptr classes, but doesn't
191		// distinguish them in the encoding. Disambiguate
192		// these classes using the attribute.
193		if class, ok := attrPtrClass[attr]; ok {
194			return class
195		}
196		return ClassUnknown
197
198	case formExprloc:
199		return ClassExprLoc
200
201	case formGnuRefAlt:
202		return ClassReferenceAlt
203
204	case formGnuStrpAlt:
205		return ClassStringAlt
206	}
207}
208
209// An entry is a sequence of attribute/value pairs.
210type Entry struct {
211	Offset   Offset // offset of Entry in DWARF info
212	Tag      Tag    // tag (kind of Entry)
213	Children bool   // whether Entry is followed by children
214	Field    []Field
215}
216
217// A Field is a single attribute/value pair in an Entry.
218//
219// A value can be one of several "attribute classes" defined by DWARF.
220// The Go types corresponding to each class are:
221//
222//    DWARF class       Go type        Class
223//    -----------       -------        -----
224//    address           uint64         ClassAddress
225//    block             []byte         ClassBlock
226//    constant          int64          ClassConstant
227//    flag              bool           ClassFlag
228//    reference
229//      to info         dwarf.Offset   ClassReference
230//      to type unit    uint64         ClassReferenceSig
231//    string            string         ClassString
232//    exprloc           []byte         ClassExprLoc
233//    lineptr           int64          ClassLinePtr
234//    loclistptr        int64          ClassLocListPtr
235//    macptr            int64          ClassMacPtr
236//    rangelistptr      int64          ClassRangeListPtr
237//
238// For unrecognized or vendor-defined attributes, Class may be
239// ClassUnknown.
240type Field struct {
241	Attr  Attr
242	Val   interface{}
243	Class Class
244}
245
246// A Class is the DWARF 4 class of an attribute value.
247//
248// In general, a given attribute's value may take on one of several
249// possible classes defined by DWARF, each of which leads to a
250// slightly different interpretation of the attribute.
251//
252// DWARF version 4 distinguishes attribute value classes more finely
253// than previous versions of DWARF. The reader will disambiguate
254// coarser classes from earlier versions of DWARF into the appropriate
255// DWARF 4 class. For example, DWARF 2 uses "constant" for constants
256// as well as all types of section offsets, but the reader will
257// canonicalize attributes in DWARF 2 files that refer to section
258// offsets to one of the Class*Ptr classes, even though these classes
259// were only defined in DWARF 3.
260type Class int
261
262const (
263	// ClassUnknown represents values of unknown DWARF class.
264	ClassUnknown Class = iota
265
266	// ClassAddress represents values of type uint64 that are
267	// addresses on the target machine.
268	ClassAddress
269
270	// ClassBlock represents values of type []byte whose
271	// interpretation depends on the attribute.
272	ClassBlock
273
274	// ClassConstant represents values of type int64 that are
275	// constants. The interpretation of this constant depends on
276	// the attribute.
277	ClassConstant
278
279	// ClassExprLoc represents values of type []byte that contain
280	// an encoded DWARF expression or location description.
281	ClassExprLoc
282
283	// ClassFlag represents values of type bool.
284	ClassFlag
285
286	// ClassLinePtr represents values that are an int64 offset
287	// into the "line" section.
288	ClassLinePtr
289
290	// ClassLocListPtr represents values that are an int64 offset
291	// into the "loclist" section.
292	ClassLocListPtr
293
294	// ClassMacPtr represents values that are an int64 offset into
295	// the "mac" section.
296	ClassMacPtr
297
298	// ClassMacPtr represents values that are an int64 offset into
299	// the "rangelist" section.
300	ClassRangeListPtr
301
302	// ClassReference represents values that are an Offset offset
303	// of an Entry in the info section (for use with Reader.Seek).
304	// The DWARF specification combines ClassReference and
305	// ClassReferenceSig into class "reference".
306	ClassReference
307
308	// ClassReferenceSig represents values that are a uint64 type
309	// signature referencing a type Entry.
310	ClassReferenceSig
311
312	// ClassString represents values that are strings. If the
313	// compilation unit specifies the AttrUseUTF8 flag (strongly
314	// recommended), the string value will be encoded in UTF-8.
315	// Otherwise, the encoding is unspecified.
316	ClassString
317
318	// ClassReferenceAlt represents values of type int64 that are
319	// an offset into the DWARF "info" section of an alternate
320	// object file.
321	ClassReferenceAlt
322
323	// ClassStringAlt represents values of type int64 that are an
324	// offset into the DWARF string section of an alternate object
325	// file.
326	ClassStringAlt
327)
328
329//go:generate stringer -type=Class
330
331func (i Class) GoString() string {
332	return "dwarf." + i.String()
333}
334
335// Val returns the value associated with attribute Attr in Entry,
336// or nil if there is no such attribute.
337//
338// A common idiom is to merge the check for nil return with
339// the check that the value has the expected dynamic type, as in:
340//	v, ok := e.Val(AttrSibling).(int64)
341//
342func (e *Entry) Val(a Attr) interface{} {
343	if f := e.AttrField(a); f != nil {
344		return f.Val
345	}
346	return nil
347}
348
349// AttrField returns the Field associated with attribute Attr in
350// Entry, or nil if there is no such attribute.
351func (e *Entry) AttrField(a Attr) *Field {
352	for i, f := range e.Field {
353		if f.Attr == a {
354			return &e.Field[i]
355		}
356	}
357	return nil
358}
359
360// An Offset represents the location of an Entry within the DWARF info.
361// (See Reader.Seek.)
362type Offset uint32
363
364// Entry reads a single entry from buf, decoding
365// according to the given abbreviation table.
366func (b *buf) entry(atab abbrevTable, ubase Offset) *Entry {
367	off := b.off
368	id := uint32(b.uint())
369	if id == 0 {
370		return &Entry{}
371	}
372	a, ok := atab[id]
373	if !ok {
374		b.error("unknown abbreviation table index")
375		return nil
376	}
377	e := &Entry{
378		Offset:   off,
379		Tag:      a.tag,
380		Children: a.children,
381		Field:    make([]Field, len(a.field)),
382	}
383	for i := range e.Field {
384		e.Field[i].Attr = a.field[i].attr
385		e.Field[i].Class = a.field[i].class
386		fmt := a.field[i].fmt
387		if fmt == formIndirect {
388			fmt = format(b.uint())
389		}
390		var val interface{}
391		switch fmt {
392		default:
393			b.error("unknown entry attr format 0x" + strconv.FormatInt(int64(fmt), 16))
394
395		// address
396		case formAddr:
397			val = b.addr()
398
399		// block
400		case formDwarfBlock1:
401			val = b.bytes(int(b.uint8()))
402		case formDwarfBlock2:
403			val = b.bytes(int(b.uint16()))
404		case formDwarfBlock4:
405			val = b.bytes(int(b.uint32()))
406		case formDwarfBlock:
407			val = b.bytes(int(b.uint()))
408
409		// constant
410		case formData1:
411			val = int64(b.uint8())
412		case formData2:
413			val = int64(b.uint16())
414		case formData4:
415			val = int64(b.uint32())
416		case formData8:
417			val = int64(b.uint64())
418		case formSdata:
419			val = int64(b.int())
420		case formUdata:
421			val = int64(b.uint())
422
423		// flag
424		case formFlag:
425			val = b.uint8() == 1
426		// New in DWARF 4.
427		case formFlagPresent:
428			// The attribute is implicitly indicated as present, and no value is
429			// encoded in the debugging information entry itself.
430			val = true
431
432		// reference to other entry
433		case formRefAddr:
434			vers := b.format.version()
435			if vers == 0 {
436				b.error("unknown version for DW_FORM_ref_addr")
437			} else if vers == 2 {
438				val = Offset(b.addr())
439			} else {
440				is64, known := b.format.dwarf64()
441				if !known {
442					b.error("unknown size for DW_FORM_ref_addr")
443				} else if is64 {
444					val = Offset(b.uint64())
445				} else {
446					val = Offset(b.uint32())
447				}
448			}
449		case formRef1:
450			val = Offset(b.uint8()) + ubase
451		case formRef2:
452			val = Offset(b.uint16()) + ubase
453		case formRef4:
454			val = Offset(b.uint32()) + ubase
455		case formRef8:
456			val = Offset(b.uint64()) + ubase
457		case formRefUdata:
458			val = Offset(b.uint()) + ubase
459
460		// string
461		case formString:
462			val = b.string()
463		case formStrp:
464			off := b.uint32() // offset into .debug_str
465			if b.err != nil {
466				return nil
467			}
468			b1 := makeBuf(b.dwarf, unknownFormat{}, "str", 0, b.dwarf.str)
469			b1.skip(int(off))
470			val = b1.string()
471			if b1.err != nil {
472				b.err = b1.err
473				return nil
474			}
475
476		// lineptr, loclistptr, macptr, rangelistptr
477		// New in DWARF 4, but clang can generate them with -gdwarf-2.
478		// Section reference, replacing use of formData4 and formData8.
479		case formSecOffset, formGnuRefAlt, formGnuStrpAlt:
480			is64, known := b.format.dwarf64()
481			if !known {
482				b.error("unknown size for form 0x" + strconv.FormatInt(int64(fmt), 16))
483			} else if is64 {
484				val = int64(b.uint64())
485			} else {
486				val = int64(b.uint32())
487			}
488
489		// exprloc
490		// New in DWARF 4.
491		case formExprloc:
492			val = b.bytes(int(b.uint()))
493
494		// reference
495		// New in DWARF 4.
496		case formRefSig8:
497			// 64-bit type signature.
498			val = b.uint64()
499		}
500		e.Field[i].Val = val
501	}
502	if b.err != nil {
503		return nil
504	}
505	return e
506}
507
508// A Reader allows reading Entry structures from a DWARF ``info'' section.
509// The Entry structures are arranged in a tree. The Reader's Next function
510// return successive entries from a pre-order traversal of the tree.
511// If an entry has children, its Children field will be true, and the children
512// follow, terminated by an Entry with Tag 0.
513type Reader struct {
514	b            buf
515	d            *Data
516	err          error
517	unit         int
518	lastChildren bool   // .Children of last entry returned by Next
519	lastSibling  Offset // .Val(AttrSibling) of last entry returned by Next
520}
521
522// Reader returns a new Reader for Data.
523// The reader is positioned at byte offset 0 in the DWARF ``info'' section.
524func (d *Data) Reader() *Reader {
525	r := &Reader{d: d}
526	r.Seek(0)
527	return r
528}
529
530// AddressSize returns the size in bytes of addresses in the current compilation
531// unit.
532func (r *Reader) AddressSize() int {
533	return r.d.unit[r.unit].asize
534}
535
536// Seek positions the Reader at offset off in the encoded entry stream.
537// Offset 0 can be used to denote the first entry.
538func (r *Reader) Seek(off Offset) {
539	d := r.d
540	r.err = nil
541	r.lastChildren = false
542	if off == 0 {
543		if len(d.unit) == 0 {
544			return
545		}
546		u := &d.unit[0]
547		r.unit = 0
548		r.b = makeBuf(r.d, u, "info", u.off, u.data)
549		return
550	}
551
552	i := d.offsetToUnit(off)
553	if i == -1 {
554		r.err = errors.New("offset out of range")
555		return
556	}
557	u := &d.unit[i]
558	r.unit = i
559	r.b = makeBuf(r.d, u, "info", off, u.data[off-u.off:])
560}
561
562// maybeNextUnit advances to the next unit if this one is finished.
563func (r *Reader) maybeNextUnit() {
564	for len(r.b.data) == 0 && r.unit+1 < len(r.d.unit) {
565		r.unit++
566		u := &r.d.unit[r.unit]
567		r.b = makeBuf(r.d, u, "info", u.off, u.data)
568	}
569}
570
571// Next reads the next entry from the encoded entry stream.
572// It returns nil, nil when it reaches the end of the section.
573// It returns an error if the current offset is invalid or the data at the
574// offset cannot be decoded as a valid Entry.
575func (r *Reader) Next() (*Entry, error) {
576	if r.err != nil {
577		return nil, r.err
578	}
579	r.maybeNextUnit()
580	if len(r.b.data) == 0 {
581		return nil, nil
582	}
583	u := &r.d.unit[r.unit]
584	e := r.b.entry(u.atable, u.base)
585	if r.b.err != nil {
586		r.err = r.b.err
587		return nil, r.err
588	}
589	if e != nil {
590		r.lastChildren = e.Children
591		if r.lastChildren {
592			r.lastSibling, _ = e.Val(AttrSibling).(Offset)
593		}
594	} else {
595		r.lastChildren = false
596	}
597	return e, nil
598}
599
600// SkipChildren skips over the child entries associated with
601// the last Entry returned by Next. If that Entry did not have
602// children or Next has not been called, SkipChildren is a no-op.
603func (r *Reader) SkipChildren() {
604	if r.err != nil || !r.lastChildren {
605		return
606	}
607
608	// If the last entry had a sibling attribute,
609	// that attribute gives the offset of the next
610	// sibling, so we can avoid decoding the
611	// child subtrees.
612	if r.lastSibling >= r.b.off {
613		r.Seek(r.lastSibling)
614		return
615	}
616
617	for {
618		e, err := r.Next()
619		if err != nil || e == nil || e.Tag == 0 {
620			break
621		}
622		if e.Children {
623			r.SkipChildren()
624		}
625	}
626}
627
628// clone returns a copy of the reader. This is used by the typeReader
629// interface.
630func (r *Reader) clone() typeReader {
631	return r.d.Reader()
632}
633
634// offset returns the current buffer offset. This is used by the
635// typeReader interface.
636func (r *Reader) offset() Offset {
637	return r.b.off
638}
639
640// SeekPC returns the Entry for the compilation unit that includes pc,
641// and positions the reader to read the children of that unit.  If pc
642// is not covered by any unit, SeekPC returns ErrUnknownPC and the
643// position of the reader is undefined.
644//
645// Because compilation units can describe multiple regions of the
646// executable, in the worst case SeekPC must search through all the
647// ranges in all the compilation units. Each call to SeekPC starts the
648// search at the compilation unit of the last call, so in general
649// looking up a series of PCs will be faster if they are sorted. If
650// the caller wishes to do repeated fast PC lookups, it should build
651// an appropriate index using the Ranges method.
652func (r *Reader) SeekPC(pc uint64) (*Entry, error) {
653	unit := r.unit
654	for i := 0; i < len(r.d.unit); i++ {
655		if unit >= len(r.d.unit) {
656			unit = 0
657		}
658		r.err = nil
659		r.lastChildren = false
660		r.unit = unit
661		u := &r.d.unit[unit]
662		r.b = makeBuf(r.d, u, "info", u.off, u.data)
663		e, err := r.Next()
664		if err != nil {
665			return nil, err
666		}
667		ranges, err := r.d.Ranges(e)
668		if err != nil {
669			return nil, err
670		}
671		for _, pcs := range ranges {
672			if pcs[0] <= pc && pc < pcs[1] {
673				return e, nil
674			}
675		}
676		unit++
677	}
678	return nil, ErrUnknownPC
679}
680
681// Ranges returns the PC ranges covered by e, a slice of [low,high) pairs.
682// Only some entry types, such as TagCompileUnit or TagSubprogram, have PC
683// ranges; for others, this will return nil with no error.
684func (d *Data) Ranges(e *Entry) ([][2]uint64, error) {
685	var ret [][2]uint64
686
687	low, lowOK := e.Val(AttrLowpc).(uint64)
688
689	var high uint64
690	var highOK bool
691	highField := e.AttrField(AttrHighpc)
692	if highField != nil {
693		switch highField.Class {
694		case ClassAddress:
695			high, highOK = highField.Val.(uint64)
696		case ClassConstant:
697			off, ok := highField.Val.(int64)
698			if ok {
699				high = low + uint64(off)
700				highOK = true
701			}
702		}
703	}
704
705	if lowOK && highOK {
706		ret = append(ret, [2]uint64{low, high})
707	}
708
709	ranges, rangesOK := e.Val(AttrRanges).(int64)
710	if rangesOK && d.ranges != nil {
711		// The initial base address is the lowpc attribute
712		// of the enclosing compilation unit.
713		// Although DWARF specifies the lowpc attribute,
714		// comments in gdb/dwarf2read.c say that some versions
715		// of GCC use the entrypc attribute, so we check that too.
716		var cu *Entry
717		if e.Tag == TagCompileUnit {
718			cu = e
719		} else {
720			i := d.offsetToUnit(e.Offset)
721			if i == -1 {
722				return nil, errors.New("no unit for entry")
723			}
724			u := &d.unit[i]
725			b := makeBuf(d, u, "info", u.off, u.data)
726			cu = b.entry(u.atable, u.base)
727			if b.err != nil {
728				return nil, b.err
729			}
730		}
731
732		var base uint64
733		if cuEntry, cuEntryOK := cu.Val(AttrEntrypc).(uint64); cuEntryOK {
734			base = cuEntry
735		} else if cuLow, cuLowOK := cu.Val(AttrLowpc).(uint64); cuLowOK {
736			base = cuLow
737		}
738
739		u := &d.unit[d.offsetToUnit(e.Offset)]
740		buf := makeBuf(d, u, "ranges", Offset(ranges), d.ranges[ranges:])
741		for len(buf.data) > 0 {
742			low = buf.addr()
743			high = buf.addr()
744
745			if low == 0 && high == 0 {
746				break
747			}
748
749			if low == ^uint64(0)>>uint((8-u.addrsize())*8) {
750				base = high
751			} else {
752				ret = append(ret, [2]uint64{base + low, base + high})
753			}
754		}
755	}
756
757	return ret, nil
758}
759