1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// DWARF debug information entry parser. 6// An entry is a sequence of data items of a given format. 7// The first word in the entry is an index into what DWARF 8// calls the ``abbreviation table.'' An abbreviation is really 9// just a type descriptor: it's an array of attribute tag/value format pairs. 10 11package dwarf 12 13import ( 14 "errors" 15 "strconv" 16) 17 18// a single entry's description: a sequence of attributes 19type abbrev struct { 20 tag Tag 21 children bool 22 field []afield 23} 24 25type afield struct { 26 attr Attr 27 fmt format 28 class Class 29} 30 31// a map from entry format ids to their descriptions 32type abbrevTable map[uint32]abbrev 33 34// ParseAbbrev returns the abbreviation table that starts at byte off 35// in the .debug_abbrev section. 36func (d *Data) parseAbbrev(off uint32, vers int) (abbrevTable, error) { 37 if m, ok := d.abbrevCache[off]; ok { 38 return m, nil 39 } 40 41 data := d.abbrev 42 if off > uint32(len(data)) { 43 data = nil 44 } else { 45 data = data[off:] 46 } 47 b := makeBuf(d, unknownFormat{}, "abbrev", 0, data) 48 49 // Error handling is simplified by the buf getters 50 // returning an endless stream of 0s after an error. 51 m := make(abbrevTable) 52 for { 53 // Table ends with id == 0. 54 id := uint32(b.uint()) 55 if id == 0 { 56 break 57 } 58 59 // Walk over attributes, counting. 60 n := 0 61 b1 := b // Read from copy of b. 62 b1.uint() 63 b1.uint8() 64 for { 65 tag := b1.uint() 66 fmt := b1.uint() 67 if tag == 0 && fmt == 0 { 68 break 69 } 70 n++ 71 } 72 if b1.err != nil { 73 return nil, b1.err 74 } 75 76 // Walk over attributes again, this time writing them down. 77 var a abbrev 78 a.tag = Tag(b.uint()) 79 a.children = b.uint8() != 0 80 a.field = make([]afield, n) 81 for i := range a.field { 82 a.field[i].attr = Attr(b.uint()) 83 a.field[i].fmt = format(b.uint()) 84 a.field[i].class = formToClass(a.field[i].fmt, a.field[i].attr, vers, &b) 85 } 86 b.uint() 87 b.uint() 88 89 m[id] = a 90 } 91 if b.err != nil { 92 return nil, b.err 93 } 94 d.abbrevCache[off] = m 95 return m, nil 96} 97 98// attrIsExprloc indicates attributes that allow exprloc values that 99// are encoded as block values in DWARF 2 and 3. See DWARF 4, Figure 100// 20. 101var attrIsExprloc = map[Attr]bool{ 102 AttrLocation: true, 103 AttrByteSize: true, 104 AttrBitOffset: true, 105 AttrBitSize: true, 106 AttrStringLength: true, 107 AttrLowerBound: true, 108 AttrReturnAddr: true, 109 AttrStrideSize: true, 110 AttrUpperBound: true, 111 AttrCount: true, 112 AttrDataMemberLoc: true, 113 AttrFrameBase: true, 114 AttrSegment: true, 115 AttrStaticLink: true, 116 AttrUseLocation: true, 117 AttrVtableElemLoc: true, 118 AttrAllocated: true, 119 AttrAssociated: true, 120 AttrDataLocation: true, 121 AttrStride: true, 122} 123 124// attrPtrClass indicates the *ptr class of attributes that have 125// encoding formSecOffset in DWARF 4 or formData* in DWARF 2 and 3. 126var attrPtrClass = map[Attr]Class{ 127 AttrLocation: ClassLocListPtr, 128 AttrStmtList: ClassLinePtr, 129 AttrStringLength: ClassLocListPtr, 130 AttrReturnAddr: ClassLocListPtr, 131 AttrStartScope: ClassRangeListPtr, 132 AttrDataMemberLoc: ClassLocListPtr, 133 AttrFrameBase: ClassLocListPtr, 134 AttrMacroInfo: ClassMacPtr, 135 AttrSegment: ClassLocListPtr, 136 AttrStaticLink: ClassLocListPtr, 137 AttrUseLocation: ClassLocListPtr, 138 AttrVtableElemLoc: ClassLocListPtr, 139 AttrRanges: ClassRangeListPtr, 140} 141 142// formToClass returns the DWARF 4 Class for the given form. If the 143// DWARF version is less then 4, it will disambiguate some forms 144// depending on the attribute. 145func formToClass(form format, attr Attr, vers int, b *buf) Class { 146 switch form { 147 default: 148 b.error("cannot determine class of unknown attribute form") 149 return 0 150 151 case formAddr: 152 return ClassAddress 153 154 case formDwarfBlock1, formDwarfBlock2, formDwarfBlock4, formDwarfBlock: 155 // In DWARF 2 and 3, ClassExprLoc was encoded as a 156 // block. DWARF 4 distinguishes ClassBlock and 157 // ClassExprLoc, but there are no attributes that can 158 // be both, so we also promote ClassBlock values in 159 // DWARF 4 that should be ClassExprLoc in case 160 // producers get this wrong. 161 if attrIsExprloc[attr] { 162 return ClassExprLoc 163 } 164 return ClassBlock 165 166 case formData1, formData2, formData4, formData8, formSdata, formUdata: 167 // In DWARF 2 and 3, ClassPtr was encoded as a 168 // constant. Unlike ClassExprLoc/ClassBlock, some 169 // DWARF 4 attributes need to distinguish Class*Ptr 170 // from ClassConstant, so we only do this promotion 171 // for versions 2 and 3. 172 if class, ok := attrPtrClass[attr]; vers < 4 && ok { 173 return class 174 } 175 return ClassConstant 176 177 case formFlag, formFlagPresent: 178 return ClassFlag 179 180 case formRefAddr, formRef1, formRef2, formRef4, formRef8, formRefUdata: 181 return ClassReference 182 183 case formRefSig8: 184 return ClassReferenceSig 185 186 case formString, formStrp: 187 return ClassString 188 189 case formSecOffset: 190 // DWARF 4 defines four *ptr classes, but doesn't 191 // distinguish them in the encoding. Disambiguate 192 // these classes using the attribute. 193 if class, ok := attrPtrClass[attr]; ok { 194 return class 195 } 196 return ClassUnknown 197 198 case formExprloc: 199 return ClassExprLoc 200 201 case formGnuRefAlt: 202 return ClassReferenceAlt 203 204 case formGnuStrpAlt: 205 return ClassStringAlt 206 } 207} 208 209// An entry is a sequence of attribute/value pairs. 210type Entry struct { 211 Offset Offset // offset of Entry in DWARF info 212 Tag Tag // tag (kind of Entry) 213 Children bool // whether Entry is followed by children 214 Field []Field 215} 216 217// A Field is a single attribute/value pair in an Entry. 218// 219// A value can be one of several "attribute classes" defined by DWARF. 220// The Go types corresponding to each class are: 221// 222// DWARF class Go type Class 223// ----------- ------- ----- 224// address uint64 ClassAddress 225// block []byte ClassBlock 226// constant int64 ClassConstant 227// flag bool ClassFlag 228// reference 229// to info dwarf.Offset ClassReference 230// to type unit uint64 ClassReferenceSig 231// string string ClassString 232// exprloc []byte ClassExprLoc 233// lineptr int64 ClassLinePtr 234// loclistptr int64 ClassLocListPtr 235// macptr int64 ClassMacPtr 236// rangelistptr int64 ClassRangeListPtr 237// 238// For unrecognized or vendor-defined attributes, Class may be 239// ClassUnknown. 240type Field struct { 241 Attr Attr 242 Val interface{} 243 Class Class 244} 245 246// A Class is the DWARF 4 class of an attribute value. 247// 248// In general, a given attribute's value may take on one of several 249// possible classes defined by DWARF, each of which leads to a 250// slightly different interpretation of the attribute. 251// 252// DWARF version 4 distinguishes attribute value classes more finely 253// than previous versions of DWARF. The reader will disambiguate 254// coarser classes from earlier versions of DWARF into the appropriate 255// DWARF 4 class. For example, DWARF 2 uses "constant" for constants 256// as well as all types of section offsets, but the reader will 257// canonicalize attributes in DWARF 2 files that refer to section 258// offsets to one of the Class*Ptr classes, even though these classes 259// were only defined in DWARF 3. 260type Class int 261 262const ( 263 // ClassUnknown represents values of unknown DWARF class. 264 ClassUnknown Class = iota 265 266 // ClassAddress represents values of type uint64 that are 267 // addresses on the target machine. 268 ClassAddress 269 270 // ClassBlock represents values of type []byte whose 271 // interpretation depends on the attribute. 272 ClassBlock 273 274 // ClassConstant represents values of type int64 that are 275 // constants. The interpretation of this constant depends on 276 // the attribute. 277 ClassConstant 278 279 // ClassExprLoc represents values of type []byte that contain 280 // an encoded DWARF expression or location description. 281 ClassExprLoc 282 283 // ClassFlag represents values of type bool. 284 ClassFlag 285 286 // ClassLinePtr represents values that are an int64 offset 287 // into the "line" section. 288 ClassLinePtr 289 290 // ClassLocListPtr represents values that are an int64 offset 291 // into the "loclist" section. 292 ClassLocListPtr 293 294 // ClassMacPtr represents values that are an int64 offset into 295 // the "mac" section. 296 ClassMacPtr 297 298 // ClassMacPtr represents values that are an int64 offset into 299 // the "rangelist" section. 300 ClassRangeListPtr 301 302 // ClassReference represents values that are an Offset offset 303 // of an Entry in the info section (for use with Reader.Seek). 304 // The DWARF specification combines ClassReference and 305 // ClassReferenceSig into class "reference". 306 ClassReference 307 308 // ClassReferenceSig represents values that are a uint64 type 309 // signature referencing a type Entry. 310 ClassReferenceSig 311 312 // ClassString represents values that are strings. If the 313 // compilation unit specifies the AttrUseUTF8 flag (strongly 314 // recommended), the string value will be encoded in UTF-8. 315 // Otherwise, the encoding is unspecified. 316 ClassString 317 318 // ClassReferenceAlt represents values of type int64 that are 319 // an offset into the DWARF "info" section of an alternate 320 // object file. 321 ClassReferenceAlt 322 323 // ClassStringAlt represents values of type int64 that are an 324 // offset into the DWARF string section of an alternate object 325 // file. 326 ClassStringAlt 327) 328 329//go:generate stringer -type=Class 330 331func (i Class) GoString() string { 332 return "dwarf." + i.String() 333} 334 335// Val returns the value associated with attribute Attr in Entry, 336// or nil if there is no such attribute. 337// 338// A common idiom is to merge the check for nil return with 339// the check that the value has the expected dynamic type, as in: 340// v, ok := e.Val(AttrSibling).(int64) 341// 342func (e *Entry) Val(a Attr) interface{} { 343 if f := e.AttrField(a); f != nil { 344 return f.Val 345 } 346 return nil 347} 348 349// AttrField returns the Field associated with attribute Attr in 350// Entry, or nil if there is no such attribute. 351func (e *Entry) AttrField(a Attr) *Field { 352 for i, f := range e.Field { 353 if f.Attr == a { 354 return &e.Field[i] 355 } 356 } 357 return nil 358} 359 360// An Offset represents the location of an Entry within the DWARF info. 361// (See Reader.Seek.) 362type Offset uint32 363 364// Entry reads a single entry from buf, decoding 365// according to the given abbreviation table. 366func (b *buf) entry(atab abbrevTable, ubase Offset) *Entry { 367 off := b.off 368 id := uint32(b.uint()) 369 if id == 0 { 370 return &Entry{} 371 } 372 a, ok := atab[id] 373 if !ok { 374 b.error("unknown abbreviation table index") 375 return nil 376 } 377 e := &Entry{ 378 Offset: off, 379 Tag: a.tag, 380 Children: a.children, 381 Field: make([]Field, len(a.field)), 382 } 383 for i := range e.Field { 384 e.Field[i].Attr = a.field[i].attr 385 e.Field[i].Class = a.field[i].class 386 fmt := a.field[i].fmt 387 if fmt == formIndirect { 388 fmt = format(b.uint()) 389 } 390 var val interface{} 391 switch fmt { 392 default: 393 b.error("unknown entry attr format 0x" + strconv.FormatInt(int64(fmt), 16)) 394 395 // address 396 case formAddr: 397 val = b.addr() 398 399 // block 400 case formDwarfBlock1: 401 val = b.bytes(int(b.uint8())) 402 case formDwarfBlock2: 403 val = b.bytes(int(b.uint16())) 404 case formDwarfBlock4: 405 val = b.bytes(int(b.uint32())) 406 case formDwarfBlock: 407 val = b.bytes(int(b.uint())) 408 409 // constant 410 case formData1: 411 val = int64(b.uint8()) 412 case formData2: 413 val = int64(b.uint16()) 414 case formData4: 415 val = int64(b.uint32()) 416 case formData8: 417 val = int64(b.uint64()) 418 case formSdata: 419 val = int64(b.int()) 420 case formUdata: 421 val = int64(b.uint()) 422 423 // flag 424 case formFlag: 425 val = b.uint8() == 1 426 // New in DWARF 4. 427 case formFlagPresent: 428 // The attribute is implicitly indicated as present, and no value is 429 // encoded in the debugging information entry itself. 430 val = true 431 432 // reference to other entry 433 case formRefAddr: 434 vers := b.format.version() 435 if vers == 0 { 436 b.error("unknown version for DW_FORM_ref_addr") 437 } else if vers == 2 { 438 val = Offset(b.addr()) 439 } else { 440 is64, known := b.format.dwarf64() 441 if !known { 442 b.error("unknown size for DW_FORM_ref_addr") 443 } else if is64 { 444 val = Offset(b.uint64()) 445 } else { 446 val = Offset(b.uint32()) 447 } 448 } 449 case formRef1: 450 val = Offset(b.uint8()) + ubase 451 case formRef2: 452 val = Offset(b.uint16()) + ubase 453 case formRef4: 454 val = Offset(b.uint32()) + ubase 455 case formRef8: 456 val = Offset(b.uint64()) + ubase 457 case formRefUdata: 458 val = Offset(b.uint()) + ubase 459 460 // string 461 case formString: 462 val = b.string() 463 case formStrp: 464 off := b.uint32() // offset into .debug_str 465 if b.err != nil { 466 return nil 467 } 468 b1 := makeBuf(b.dwarf, unknownFormat{}, "str", 0, b.dwarf.str) 469 b1.skip(int(off)) 470 val = b1.string() 471 if b1.err != nil { 472 b.err = b1.err 473 return nil 474 } 475 476 // lineptr, loclistptr, macptr, rangelistptr 477 // New in DWARF 4, but clang can generate them with -gdwarf-2. 478 // Section reference, replacing use of formData4 and formData8. 479 case formSecOffset, formGnuRefAlt, formGnuStrpAlt: 480 is64, known := b.format.dwarf64() 481 if !known { 482 b.error("unknown size for form 0x" + strconv.FormatInt(int64(fmt), 16)) 483 } else if is64 { 484 val = int64(b.uint64()) 485 } else { 486 val = int64(b.uint32()) 487 } 488 489 // exprloc 490 // New in DWARF 4. 491 case formExprloc: 492 val = b.bytes(int(b.uint())) 493 494 // reference 495 // New in DWARF 4. 496 case formRefSig8: 497 // 64-bit type signature. 498 val = b.uint64() 499 } 500 e.Field[i].Val = val 501 } 502 if b.err != nil { 503 return nil 504 } 505 return e 506} 507 508// A Reader allows reading Entry structures from a DWARF ``info'' section. 509// The Entry structures are arranged in a tree. The Reader's Next function 510// return successive entries from a pre-order traversal of the tree. 511// If an entry has children, its Children field will be true, and the children 512// follow, terminated by an Entry with Tag 0. 513type Reader struct { 514 b buf 515 d *Data 516 err error 517 unit int 518 lastChildren bool // .Children of last entry returned by Next 519 lastSibling Offset // .Val(AttrSibling) of last entry returned by Next 520} 521 522// Reader returns a new Reader for Data. 523// The reader is positioned at byte offset 0 in the DWARF ``info'' section. 524func (d *Data) Reader() *Reader { 525 r := &Reader{d: d} 526 r.Seek(0) 527 return r 528} 529 530// AddressSize returns the size in bytes of addresses in the current compilation 531// unit. 532func (r *Reader) AddressSize() int { 533 return r.d.unit[r.unit].asize 534} 535 536// Seek positions the Reader at offset off in the encoded entry stream. 537// Offset 0 can be used to denote the first entry. 538func (r *Reader) Seek(off Offset) { 539 d := r.d 540 r.err = nil 541 r.lastChildren = false 542 if off == 0 { 543 if len(d.unit) == 0 { 544 return 545 } 546 u := &d.unit[0] 547 r.unit = 0 548 r.b = makeBuf(r.d, u, "info", u.off, u.data) 549 return 550 } 551 552 i := d.offsetToUnit(off) 553 if i == -1 { 554 r.err = errors.New("offset out of range") 555 return 556 } 557 u := &d.unit[i] 558 r.unit = i 559 r.b = makeBuf(r.d, u, "info", off, u.data[off-u.off:]) 560} 561 562// maybeNextUnit advances to the next unit if this one is finished. 563func (r *Reader) maybeNextUnit() { 564 for len(r.b.data) == 0 && r.unit+1 < len(r.d.unit) { 565 r.unit++ 566 u := &r.d.unit[r.unit] 567 r.b = makeBuf(r.d, u, "info", u.off, u.data) 568 } 569} 570 571// Next reads the next entry from the encoded entry stream. 572// It returns nil, nil when it reaches the end of the section. 573// It returns an error if the current offset is invalid or the data at the 574// offset cannot be decoded as a valid Entry. 575func (r *Reader) Next() (*Entry, error) { 576 if r.err != nil { 577 return nil, r.err 578 } 579 r.maybeNextUnit() 580 if len(r.b.data) == 0 { 581 return nil, nil 582 } 583 u := &r.d.unit[r.unit] 584 e := r.b.entry(u.atable, u.base) 585 if r.b.err != nil { 586 r.err = r.b.err 587 return nil, r.err 588 } 589 if e != nil { 590 r.lastChildren = e.Children 591 if r.lastChildren { 592 r.lastSibling, _ = e.Val(AttrSibling).(Offset) 593 } 594 } else { 595 r.lastChildren = false 596 } 597 return e, nil 598} 599 600// SkipChildren skips over the child entries associated with 601// the last Entry returned by Next. If that Entry did not have 602// children or Next has not been called, SkipChildren is a no-op. 603func (r *Reader) SkipChildren() { 604 if r.err != nil || !r.lastChildren { 605 return 606 } 607 608 // If the last entry had a sibling attribute, 609 // that attribute gives the offset of the next 610 // sibling, so we can avoid decoding the 611 // child subtrees. 612 if r.lastSibling >= r.b.off { 613 r.Seek(r.lastSibling) 614 return 615 } 616 617 for { 618 e, err := r.Next() 619 if err != nil || e == nil || e.Tag == 0 { 620 break 621 } 622 if e.Children { 623 r.SkipChildren() 624 } 625 } 626} 627 628// clone returns a copy of the reader. This is used by the typeReader 629// interface. 630func (r *Reader) clone() typeReader { 631 return r.d.Reader() 632} 633 634// offset returns the current buffer offset. This is used by the 635// typeReader interface. 636func (r *Reader) offset() Offset { 637 return r.b.off 638} 639 640// SeekPC returns the Entry for the compilation unit that includes pc, 641// and positions the reader to read the children of that unit. If pc 642// is not covered by any unit, SeekPC returns ErrUnknownPC and the 643// position of the reader is undefined. 644// 645// Because compilation units can describe multiple regions of the 646// executable, in the worst case SeekPC must search through all the 647// ranges in all the compilation units. Each call to SeekPC starts the 648// search at the compilation unit of the last call, so in general 649// looking up a series of PCs will be faster if they are sorted. If 650// the caller wishes to do repeated fast PC lookups, it should build 651// an appropriate index using the Ranges method. 652func (r *Reader) SeekPC(pc uint64) (*Entry, error) { 653 unit := r.unit 654 for i := 0; i < len(r.d.unit); i++ { 655 if unit >= len(r.d.unit) { 656 unit = 0 657 } 658 r.err = nil 659 r.lastChildren = false 660 r.unit = unit 661 u := &r.d.unit[unit] 662 r.b = makeBuf(r.d, u, "info", u.off, u.data) 663 e, err := r.Next() 664 if err != nil { 665 return nil, err 666 } 667 ranges, err := r.d.Ranges(e) 668 if err != nil { 669 return nil, err 670 } 671 for _, pcs := range ranges { 672 if pcs[0] <= pc && pc < pcs[1] { 673 return e, nil 674 } 675 } 676 unit++ 677 } 678 return nil, ErrUnknownPC 679} 680 681// Ranges returns the PC ranges covered by e, a slice of [low,high) pairs. 682// Only some entry types, such as TagCompileUnit or TagSubprogram, have PC 683// ranges; for others, this will return nil with no error. 684func (d *Data) Ranges(e *Entry) ([][2]uint64, error) { 685 var ret [][2]uint64 686 687 low, lowOK := e.Val(AttrLowpc).(uint64) 688 689 var high uint64 690 var highOK bool 691 highField := e.AttrField(AttrHighpc) 692 if highField != nil { 693 switch highField.Class { 694 case ClassAddress: 695 high, highOK = highField.Val.(uint64) 696 case ClassConstant: 697 off, ok := highField.Val.(int64) 698 if ok { 699 high = low + uint64(off) 700 highOK = true 701 } 702 } 703 } 704 705 if lowOK && highOK { 706 ret = append(ret, [2]uint64{low, high}) 707 } 708 709 ranges, rangesOK := e.Val(AttrRanges).(int64) 710 if rangesOK && d.ranges != nil { 711 // The initial base address is the lowpc attribute 712 // of the enclosing compilation unit. 713 // Although DWARF specifies the lowpc attribute, 714 // comments in gdb/dwarf2read.c say that some versions 715 // of GCC use the entrypc attribute, so we check that too. 716 var cu *Entry 717 if e.Tag == TagCompileUnit { 718 cu = e 719 } else { 720 i := d.offsetToUnit(e.Offset) 721 if i == -1 { 722 return nil, errors.New("no unit for entry") 723 } 724 u := &d.unit[i] 725 b := makeBuf(d, u, "info", u.off, u.data) 726 cu = b.entry(u.atable, u.base) 727 if b.err != nil { 728 return nil, b.err 729 } 730 } 731 732 var base uint64 733 if cuEntry, cuEntryOK := cu.Val(AttrEntrypc).(uint64); cuEntryOK { 734 base = cuEntry 735 } else if cuLow, cuLowOK := cu.Val(AttrLowpc).(uint64); cuLowOK { 736 base = cuLow 737 } 738 739 u := &d.unit[d.offsetToUnit(e.Offset)] 740 buf := makeBuf(d, u, "ranges", Offset(ranges), d.ranges[ranges:]) 741 for len(buf.data) > 0 { 742 low = buf.addr() 743 high = buf.addr() 744 745 if low == 0 && high == 0 { 746 break 747 } 748 749 if low == ^uint64(0)>>uint((8-u.addrsize())*8) { 750 base = high 751 } else { 752 ret = append(ret, [2]uint64{base + low, base + high}) 753 } 754 } 755 } 756 757 return ret, nil 758} 759