1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// DWARF debug information entry parser.
6// An entry is a sequence of data items of a given format.
7// The first word in the entry is an index into what DWARF
8// calls the ``abbreviation table.''  An abbreviation is really
9// just a type descriptor: it's an array of attribute tag/value format pairs.
10
11package dwarf
12
13import (
14	"errors"
15	"strconv"
16)
17
18// a single entry's description: a sequence of attributes
19type abbrev struct {
20	tag      Tag
21	children bool
22	field    []afield
23}
24
25type afield struct {
26	attr  Attr
27	fmt   format
28	class Class
29}
30
31// a map from entry format ids to their descriptions
32type abbrevTable map[uint32]abbrev
33
34// ParseAbbrev returns the abbreviation table that starts at byte off
35// in the .debug_abbrev section.
36func (d *Data) parseAbbrev(off uint64, vers int) (abbrevTable, error) {
37	if m, ok := d.abbrevCache[off]; ok {
38		return m, nil
39	}
40
41	data := d.abbrev
42	if off > uint64(len(data)) {
43		data = nil
44	} else {
45		data = data[off:]
46	}
47	b := makeBuf(d, unknownFormat{}, "abbrev", 0, data)
48
49	// Error handling is simplified by the buf getters
50	// returning an endless stream of 0s after an error.
51	m := make(abbrevTable)
52	for {
53		// Table ends with id == 0.
54		id := uint32(b.uint())
55		if id == 0 {
56			break
57		}
58
59		// Walk over attributes, counting.
60		n := 0
61		b1 := b // Read from copy of b.
62		b1.uint()
63		b1.uint8()
64		for {
65			tag := b1.uint()
66			fmt := b1.uint()
67			if tag == 0 && fmt == 0 {
68				break
69			}
70			n++
71		}
72		if b1.err != nil {
73			return nil, b1.err
74		}
75
76		// Walk over attributes again, this time writing them down.
77		var a abbrev
78		a.tag = Tag(b.uint())
79		a.children = b.uint8() != 0
80		a.field = make([]afield, n)
81		for i := range a.field {
82			a.field[i].attr = Attr(b.uint())
83			a.field[i].fmt = format(b.uint())
84			a.field[i].class = formToClass(a.field[i].fmt, a.field[i].attr, vers, &b)
85		}
86		b.uint()
87		b.uint()
88
89		m[id] = a
90	}
91	if b.err != nil {
92		return nil, b.err
93	}
94	d.abbrevCache[off] = m
95	return m, nil
96}
97
98// attrIsExprloc indicates attributes that allow exprloc values that
99// are encoded as block values in DWARF 2 and 3. See DWARF 4, Figure
100// 20.
101var attrIsExprloc = map[Attr]bool{
102	AttrLocation:      true,
103	AttrByteSize:      true,
104	AttrBitOffset:     true,
105	AttrBitSize:       true,
106	AttrStringLength:  true,
107	AttrLowerBound:    true,
108	AttrReturnAddr:    true,
109	AttrStrideSize:    true,
110	AttrUpperBound:    true,
111	AttrCount:         true,
112	AttrDataMemberLoc: true,
113	AttrFrameBase:     true,
114	AttrSegment:       true,
115	AttrStaticLink:    true,
116	AttrUseLocation:   true,
117	AttrVtableElemLoc: true,
118	AttrAllocated:     true,
119	AttrAssociated:    true,
120	AttrDataLocation:  true,
121	AttrStride:        true,
122}
123
124// attrPtrClass indicates the *ptr class of attributes that have
125// encoding formSecOffset in DWARF 4 or formData* in DWARF 2 and 3.
126var attrPtrClass = map[Attr]Class{
127	AttrLocation:      ClassLocListPtr,
128	AttrStmtList:      ClassLinePtr,
129	AttrStringLength:  ClassLocListPtr,
130	AttrReturnAddr:    ClassLocListPtr,
131	AttrStartScope:    ClassRangeListPtr,
132	AttrDataMemberLoc: ClassLocListPtr,
133	AttrFrameBase:     ClassLocListPtr,
134	AttrMacroInfo:     ClassMacPtr,
135	AttrSegment:       ClassLocListPtr,
136	AttrStaticLink:    ClassLocListPtr,
137	AttrUseLocation:   ClassLocListPtr,
138	AttrVtableElemLoc: ClassLocListPtr,
139	AttrRanges:        ClassRangeListPtr,
140}
141
142// formToClass returns the DWARF 4 Class for the given form. If the
143// DWARF version is less then 4, it will disambiguate some forms
144// depending on the attribute.
145func formToClass(form format, attr Attr, vers int, b *buf) Class {
146	switch form {
147	default:
148		b.error("cannot determine class of unknown attribute form")
149		return 0
150
151	case formAddr:
152		return ClassAddress
153
154	case formDwarfBlock1, formDwarfBlock2, formDwarfBlock4, formDwarfBlock:
155		// In DWARF 2 and 3, ClassExprLoc was encoded as a
156		// block. DWARF 4 distinguishes ClassBlock and
157		// ClassExprLoc, but there are no attributes that can
158		// be both, so we also promote ClassBlock values in
159		// DWARF 4 that should be ClassExprLoc in case
160		// producers get this wrong.
161		if attrIsExprloc[attr] {
162			return ClassExprLoc
163		}
164		return ClassBlock
165
166	case formData1, formData2, formData4, formData8, formSdata, formUdata:
167		// In DWARF 2 and 3, ClassPtr was encoded as a
168		// constant. Unlike ClassExprLoc/ClassBlock, some
169		// DWARF 4 attributes need to distinguish Class*Ptr
170		// from ClassConstant, so we only do this promotion
171		// for versions 2 and 3.
172		if class, ok := attrPtrClass[attr]; vers < 4 && ok {
173			return class
174		}
175		return ClassConstant
176
177	case formFlag, formFlagPresent:
178		return ClassFlag
179
180	case formRefAddr, formRef1, formRef2, formRef4, formRef8, formRefUdata:
181		return ClassReference
182
183	case formRefSig8:
184		return ClassReferenceSig
185
186	case formString, formStrp:
187		return ClassString
188
189	case formSecOffset:
190		// DWARF 4 defines four *ptr classes, but doesn't
191		// distinguish them in the encoding. Disambiguate
192		// these classes using the attribute.
193		if class, ok := attrPtrClass[attr]; ok {
194			return class
195		}
196		return ClassUnknown
197
198	case formExprloc:
199		return ClassExprLoc
200
201	case formGnuRefAlt:
202		return ClassReferenceAlt
203
204	case formGnuStrpAlt:
205		return ClassStringAlt
206	}
207}
208
209// An entry is a sequence of attribute/value pairs.
210type Entry struct {
211	Offset   Offset // offset of Entry in DWARF info
212	Tag      Tag    // tag (kind of Entry)
213	Children bool   // whether Entry is followed by children
214	Field    []Field
215}
216
217// A Field is a single attribute/value pair in an Entry.
218//
219// A value can be one of several "attribute classes" defined by DWARF.
220// The Go types corresponding to each class are:
221//
222//    DWARF class       Go type        Class
223//    -----------       -------        -----
224//    address           uint64         ClassAddress
225//    block             []byte         ClassBlock
226//    constant          int64          ClassConstant
227//    flag              bool           ClassFlag
228//    reference
229//      to info         dwarf.Offset   ClassReference
230//      to type unit    uint64         ClassReferenceSig
231//    string            string         ClassString
232//    exprloc           []byte         ClassExprLoc
233//    lineptr           int64          ClassLinePtr
234//    loclistptr        int64          ClassLocListPtr
235//    macptr            int64          ClassMacPtr
236//    rangelistptr      int64          ClassRangeListPtr
237//
238// For unrecognized or vendor-defined attributes, Class may be
239// ClassUnknown.
240type Field struct {
241	Attr  Attr
242	Val   interface{}
243	Class Class
244}
245
246// A Class is the DWARF 4 class of an attribute value.
247//
248// In general, a given attribute's value may take on one of several
249// possible classes defined by DWARF, each of which leads to a
250// slightly different interpretation of the attribute.
251//
252// DWARF version 4 distinguishes attribute value classes more finely
253// than previous versions of DWARF. The reader will disambiguate
254// coarser classes from earlier versions of DWARF into the appropriate
255// DWARF 4 class. For example, DWARF 2 uses "constant" for constants
256// as well as all types of section offsets, but the reader will
257// canonicalize attributes in DWARF 2 files that refer to section
258// offsets to one of the Class*Ptr classes, even though these classes
259// were only defined in DWARF 3.
260type Class int
261
262const (
263	// ClassUnknown represents values of unknown DWARF class.
264	ClassUnknown Class = iota
265
266	// ClassAddress represents values of type uint64 that are
267	// addresses on the target machine.
268	ClassAddress
269
270	// ClassBlock represents values of type []byte whose
271	// interpretation depends on the attribute.
272	ClassBlock
273
274	// ClassConstant represents values of type int64 that are
275	// constants. The interpretation of this constant depends on
276	// the attribute.
277	ClassConstant
278
279	// ClassExprLoc represents values of type []byte that contain
280	// an encoded DWARF expression or location description.
281	ClassExprLoc
282
283	// ClassFlag represents values of type bool.
284	ClassFlag
285
286	// ClassLinePtr represents values that are an int64 offset
287	// into the "line" section.
288	ClassLinePtr
289
290	// ClassLocListPtr represents values that are an int64 offset
291	// into the "loclist" section.
292	ClassLocListPtr
293
294	// ClassMacPtr represents values that are an int64 offset into
295	// the "mac" section.
296	ClassMacPtr
297
298	// ClassMacPtr represents values that are an int64 offset into
299	// the "rangelist" section.
300	ClassRangeListPtr
301
302	// ClassReference represents values that are an Offset offset
303	// of an Entry in the info section (for use with Reader.Seek).
304	// The DWARF specification combines ClassReference and
305	// ClassReferenceSig into class "reference".
306	ClassReference
307
308	// ClassReferenceSig represents values that are a uint64 type
309	// signature referencing a type Entry.
310	ClassReferenceSig
311
312	// ClassString represents values that are strings. If the
313	// compilation unit specifies the AttrUseUTF8 flag (strongly
314	// recommended), the string value will be encoded in UTF-8.
315	// Otherwise, the encoding is unspecified.
316	ClassString
317
318	// ClassReferenceAlt represents values of type int64 that are
319	// an offset into the DWARF "info" section of an alternate
320	// object file.
321	ClassReferenceAlt
322
323	// ClassStringAlt represents values of type int64 that are an
324	// offset into the DWARF string section of an alternate object
325	// file.
326	ClassStringAlt
327)
328
329//go:generate stringer -type=Class
330
331func (i Class) GoString() string {
332	return "dwarf." + i.String()
333}
334
335// Val returns the value associated with attribute Attr in Entry,
336// or nil if there is no such attribute.
337//
338// A common idiom is to merge the check for nil return with
339// the check that the value has the expected dynamic type, as in:
340//	v, ok := e.Val(AttrSibling).(int64)
341//
342func (e *Entry) Val(a Attr) interface{} {
343	if f := e.AttrField(a); f != nil {
344		return f.Val
345	}
346	return nil
347}
348
349// AttrField returns the Field associated with attribute Attr in
350// Entry, or nil if there is no such attribute.
351func (e *Entry) AttrField(a Attr) *Field {
352	for i, f := range e.Field {
353		if f.Attr == a {
354			return &e.Field[i]
355		}
356	}
357	return nil
358}
359
360// An Offset represents the location of an Entry within the DWARF info.
361// (See Reader.Seek.)
362type Offset uint32
363
364// Entry reads a single entry from buf, decoding
365// according to the given abbreviation table.
366func (b *buf) entry(atab abbrevTable, ubase Offset) *Entry {
367	off := b.off
368	id := uint32(b.uint())
369	if id == 0 {
370		return &Entry{}
371	}
372	a, ok := atab[id]
373	if !ok {
374		b.error("unknown abbreviation table index")
375		return nil
376	}
377	e := &Entry{
378		Offset:   off,
379		Tag:      a.tag,
380		Children: a.children,
381		Field:    make([]Field, len(a.field)),
382	}
383	for i := range e.Field {
384		e.Field[i].Attr = a.field[i].attr
385		e.Field[i].Class = a.field[i].class
386		fmt := a.field[i].fmt
387		if fmt == formIndirect {
388			fmt = format(b.uint())
389		}
390		var val interface{}
391		switch fmt {
392		default:
393			b.error("unknown entry attr format 0x" + strconv.FormatInt(int64(fmt), 16))
394
395		// address
396		case formAddr:
397			val = b.addr()
398
399		// block
400		case formDwarfBlock1:
401			val = b.bytes(int(b.uint8()))
402		case formDwarfBlock2:
403			val = b.bytes(int(b.uint16()))
404		case formDwarfBlock4:
405			val = b.bytes(int(b.uint32()))
406		case formDwarfBlock:
407			val = b.bytes(int(b.uint()))
408
409		// constant
410		case formData1:
411			val = int64(b.uint8())
412		case formData2:
413			val = int64(b.uint16())
414		case formData4:
415			val = int64(b.uint32())
416		case formData8:
417			val = int64(b.uint64())
418		case formSdata:
419			val = int64(b.int())
420		case formUdata:
421			val = int64(b.uint())
422
423		// flag
424		case formFlag:
425			val = b.uint8() == 1
426		// New in DWARF 4.
427		case formFlagPresent:
428			// The attribute is implicitly indicated as present, and no value is
429			// encoded in the debugging information entry itself.
430			val = true
431
432		// reference to other entry
433		case formRefAddr:
434			vers := b.format.version()
435			if vers == 0 {
436				b.error("unknown version for DW_FORM_ref_addr")
437			} else if vers == 2 {
438				val = Offset(b.addr())
439			} else {
440				is64, known := b.format.dwarf64()
441				if !known {
442					b.error("unknown size for DW_FORM_ref_addr")
443				} else if is64 {
444					val = Offset(b.uint64())
445				} else {
446					val = Offset(b.uint32())
447				}
448			}
449		case formRef1:
450			val = Offset(b.uint8()) + ubase
451		case formRef2:
452			val = Offset(b.uint16()) + ubase
453		case formRef4:
454			val = Offset(b.uint32()) + ubase
455		case formRef8:
456			val = Offset(b.uint64()) + ubase
457		case formRefUdata:
458			val = Offset(b.uint()) + ubase
459
460		// string
461		case formString:
462			val = b.string()
463		case formStrp:
464			var off uint64 // offset into .debug_str
465			is64, known := b.format.dwarf64()
466			if !known {
467				b.error("unknown size for DW_FORM_strp")
468			} else if is64 {
469				off = b.uint64()
470			} else {
471				off = uint64(b.uint32())
472			}
473			if uint64(int(off)) != off {
474				b.error("DW_FORM_strp offset out of range")
475			}
476			if b.err != nil {
477				return nil
478			}
479			b1 := makeBuf(b.dwarf, unknownFormat{}, "str", 0, b.dwarf.str)
480			b1.skip(int(off))
481			val = b1.string()
482			if b1.err != nil {
483				b.err = b1.err
484				return nil
485			}
486
487		// lineptr, loclistptr, macptr, rangelistptr
488		// New in DWARF 4, but clang can generate them with -gdwarf-2.
489		// Section reference, replacing use of formData4 and formData8.
490		case formSecOffset, formGnuRefAlt, formGnuStrpAlt:
491			is64, known := b.format.dwarf64()
492			if !known {
493				b.error("unknown size for form 0x" + strconv.FormatInt(int64(fmt), 16))
494			} else if is64 {
495				val = int64(b.uint64())
496			} else {
497				val = int64(b.uint32())
498			}
499
500		// exprloc
501		// New in DWARF 4.
502		case formExprloc:
503			val = b.bytes(int(b.uint()))
504
505		// reference
506		// New in DWARF 4.
507		case formRefSig8:
508			// 64-bit type signature.
509			val = b.uint64()
510		}
511		e.Field[i].Val = val
512	}
513	if b.err != nil {
514		return nil
515	}
516	return e
517}
518
519// A Reader allows reading Entry structures from a DWARF ``info'' section.
520// The Entry structures are arranged in a tree. The Reader's Next function
521// return successive entries from a pre-order traversal of the tree.
522// If an entry has children, its Children field will be true, and the children
523// follow, terminated by an Entry with Tag 0.
524type Reader struct {
525	b            buf
526	d            *Data
527	err          error
528	unit         int
529	lastChildren bool   // .Children of last entry returned by Next
530	lastSibling  Offset // .Val(AttrSibling) of last entry returned by Next
531}
532
533// Reader returns a new Reader for Data.
534// The reader is positioned at byte offset 0 in the DWARF ``info'' section.
535func (d *Data) Reader() *Reader {
536	r := &Reader{d: d}
537	r.Seek(0)
538	return r
539}
540
541// AddressSize returns the size in bytes of addresses in the current compilation
542// unit.
543func (r *Reader) AddressSize() int {
544	return r.d.unit[r.unit].asize
545}
546
547// Seek positions the Reader at offset off in the encoded entry stream.
548// Offset 0 can be used to denote the first entry.
549func (r *Reader) Seek(off Offset) {
550	d := r.d
551	r.err = nil
552	r.lastChildren = false
553	if off == 0 {
554		if len(d.unit) == 0 {
555			return
556		}
557		u := &d.unit[0]
558		r.unit = 0
559		r.b = makeBuf(r.d, u, "info", u.off, u.data)
560		return
561	}
562
563	i := d.offsetToUnit(off)
564	if i == -1 {
565		r.err = errors.New("offset out of range")
566		return
567	}
568	u := &d.unit[i]
569	r.unit = i
570	r.b = makeBuf(r.d, u, "info", off, u.data[off-u.off:])
571}
572
573// maybeNextUnit advances to the next unit if this one is finished.
574func (r *Reader) maybeNextUnit() {
575	for len(r.b.data) == 0 && r.unit+1 < len(r.d.unit) {
576		r.unit++
577		u := &r.d.unit[r.unit]
578		r.b = makeBuf(r.d, u, "info", u.off, u.data)
579	}
580}
581
582// Next reads the next entry from the encoded entry stream.
583// It returns nil, nil when it reaches the end of the section.
584// It returns an error if the current offset is invalid or the data at the
585// offset cannot be decoded as a valid Entry.
586func (r *Reader) Next() (*Entry, error) {
587	if r.err != nil {
588		return nil, r.err
589	}
590	r.maybeNextUnit()
591	if len(r.b.data) == 0 {
592		return nil, nil
593	}
594	u := &r.d.unit[r.unit]
595	e := r.b.entry(u.atable, u.base)
596	if r.b.err != nil {
597		r.err = r.b.err
598		return nil, r.err
599	}
600	if e != nil {
601		r.lastChildren = e.Children
602		if r.lastChildren {
603			r.lastSibling, _ = e.Val(AttrSibling).(Offset)
604		}
605	} else {
606		r.lastChildren = false
607	}
608	return e, nil
609}
610
611// SkipChildren skips over the child entries associated with
612// the last Entry returned by Next. If that Entry did not have
613// children or Next has not been called, SkipChildren is a no-op.
614func (r *Reader) SkipChildren() {
615	if r.err != nil || !r.lastChildren {
616		return
617	}
618
619	// If the last entry had a sibling attribute,
620	// that attribute gives the offset of the next
621	// sibling, so we can avoid decoding the
622	// child subtrees.
623	if r.lastSibling >= r.b.off {
624		r.Seek(r.lastSibling)
625		return
626	}
627
628	for {
629		e, err := r.Next()
630		if err != nil || e == nil || e.Tag == 0 {
631			break
632		}
633		if e.Children {
634			r.SkipChildren()
635		}
636	}
637}
638
639// clone returns a copy of the reader. This is used by the typeReader
640// interface.
641func (r *Reader) clone() typeReader {
642	return r.d.Reader()
643}
644
645// offset returns the current buffer offset. This is used by the
646// typeReader interface.
647func (r *Reader) offset() Offset {
648	return r.b.off
649}
650
651// SeekPC returns the Entry for the compilation unit that includes pc,
652// and positions the reader to read the children of that unit.  If pc
653// is not covered by any unit, SeekPC returns ErrUnknownPC and the
654// position of the reader is undefined.
655//
656// Because compilation units can describe multiple regions of the
657// executable, in the worst case SeekPC must search through all the
658// ranges in all the compilation units. Each call to SeekPC starts the
659// search at the compilation unit of the last call, so in general
660// looking up a series of PCs will be faster if they are sorted. If
661// the caller wishes to do repeated fast PC lookups, it should build
662// an appropriate index using the Ranges method.
663func (r *Reader) SeekPC(pc uint64) (*Entry, error) {
664	unit := r.unit
665	for i := 0; i < len(r.d.unit); i++ {
666		if unit >= len(r.d.unit) {
667			unit = 0
668		}
669		r.err = nil
670		r.lastChildren = false
671		r.unit = unit
672		u := &r.d.unit[unit]
673		r.b = makeBuf(r.d, u, "info", u.off, u.data)
674		e, err := r.Next()
675		if err != nil {
676			return nil, err
677		}
678		ranges, err := r.d.Ranges(e)
679		if err != nil {
680			return nil, err
681		}
682		for _, pcs := range ranges {
683			if pcs[0] <= pc && pc < pcs[1] {
684				return e, nil
685			}
686		}
687		unit++
688	}
689	return nil, ErrUnknownPC
690}
691
692// Ranges returns the PC ranges covered by e, a slice of [low,high) pairs.
693// Only some entry types, such as TagCompileUnit or TagSubprogram, have PC
694// ranges; for others, this will return nil with no error.
695func (d *Data) Ranges(e *Entry) ([][2]uint64, error) {
696	var ret [][2]uint64
697
698	low, lowOK := e.Val(AttrLowpc).(uint64)
699
700	var high uint64
701	var highOK bool
702	highField := e.AttrField(AttrHighpc)
703	if highField != nil {
704		switch highField.Class {
705		case ClassAddress:
706			high, highOK = highField.Val.(uint64)
707		case ClassConstant:
708			off, ok := highField.Val.(int64)
709			if ok {
710				high = low + uint64(off)
711				highOK = true
712			}
713		}
714	}
715
716	if lowOK && highOK {
717		ret = append(ret, [2]uint64{low, high})
718	}
719
720	ranges, rangesOK := e.Val(AttrRanges).(int64)
721	if rangesOK && d.ranges != nil {
722		// The initial base address is the lowpc attribute
723		// of the enclosing compilation unit.
724		// Although DWARF specifies the lowpc attribute,
725		// comments in gdb/dwarf2read.c say that some versions
726		// of GCC use the entrypc attribute, so we check that too.
727		var cu *Entry
728		if e.Tag == TagCompileUnit {
729			cu = e
730		} else {
731			i := d.offsetToUnit(e.Offset)
732			if i == -1 {
733				return nil, errors.New("no unit for entry")
734			}
735			u := &d.unit[i]
736			b := makeBuf(d, u, "info", u.off, u.data)
737			cu = b.entry(u.atable, u.base)
738			if b.err != nil {
739				return nil, b.err
740			}
741		}
742
743		var base uint64
744		if cuEntry, cuEntryOK := cu.Val(AttrEntrypc).(uint64); cuEntryOK {
745			base = cuEntry
746		} else if cuLow, cuLowOK := cu.Val(AttrLowpc).(uint64); cuLowOK {
747			base = cuLow
748		}
749
750		u := &d.unit[d.offsetToUnit(e.Offset)]
751		buf := makeBuf(d, u, "ranges", Offset(ranges), d.ranges[ranges:])
752		for len(buf.data) > 0 {
753			low = buf.addr()
754			high = buf.addr()
755
756			if low == 0 && high == 0 {
757				break
758			}
759
760			if low == ^uint64(0)>>uint((8-u.addrsize())*8) {
761				base = high
762			} else {
763				ret = append(ret, [2]uint64{base + low, base + high})
764			}
765		}
766	}
767
768	return ret, nil
769}
770