1 2 /*--------------------------------------------------------------------*/ 3 /*--- Format-neutral storage of and querying of info acquired from ---*/ 4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/ 5 /*--- priv_storage.h ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of Valgrind, a dynamic binary instrumentation 10 framework. 11 12 Copyright (C) 2000-2017 Julian Seward 13 jseward@acm.org 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 /* 33 Stabs reader greatly improved by Nick Nethercote, Apr 02. 34 This module was also extensively hacked on by Jeremy Fitzhardinge 35 and Tom Hughes. 36 */ 37 /* See comment at top of debuginfo.c for explanation of 38 the _svma / _avma / _image / _bias naming scheme. 39 */ 40 /* Note this is not freestanding; needs pub_core_xarray.h and 41 priv_tytypes.h to be included before it. */ 42 43 #ifndef __PRIV_STORAGE_H 44 #define __PRIV_STORAGE_H 45 46 #include "pub_core_basics.h" // Addr 47 #include "pub_core_xarray.h" // XArray 48 #include "pub_core_deduppoolalloc.h" // DedupPoolAlloc 49 #include "priv_d3basics.h" // GExpr et al. 50 #include "priv_image.h" // DiCursor 51 52 /* --------------------- SYMBOLS --------------------- */ 53 54 /* A structure to hold an ELF/MachO symbol (very crudely). Usually 55 the symbol only has one name, which is stored in ::pri_name, and 56 ::sec_names is NULL. If there are other names, these are stored in 57 ::sec_names, which is a NULL terminated vector holding the names. 58 The vector is allocated in VG_AR_DINFO, the names themselves live 59 in DebugInfo::strpool. 60 61 From the point of view of ELF, the primary vs secondary distinction 62 is artificial: they are all just names associated with the address, 63 none of which has higher precedence than any other. However, from 64 the point of view of mapping an address to a name to display to the 65 user, we need to choose one "preferred" name, and so that might as 66 well be installed as the pri_name, whilst all others can live in 67 sec_names[]. This has the convenient side effect that, in the 68 common case where there is only one name for the address, 69 sec_names[] does not need to be allocated. 70 */ 71 typedef 72 struct { 73 SymAVMAs avmas; /* Symbol Actual VMAs: lowest address of entity, 74 + platform specific fields, to access with 75 the macros defined in pub_core_debuginfo.h */ 76 const HChar* pri_name; /* primary name, never NULL */ 77 const HChar** sec_names; /* NULL, or a NULL term'd array of other names */ 78 // XXX: DiSym could be shrunk (on 32-bit platforms to exactly 16 79 // bytes, on 64-bit platforms the first 3 pointers already add 80 // up to 24 bytes, so size plus bits will extend to 32 bytes 81 // anyway) by using 29 bits for the size and 1 bit each for 82 // isText, isIFunc and isGlobal. If you do this, make sure that 83 // all assignments to the latter two use 0 or 1 (or True or 84 // False), and that a positive number larger than 1 is never 85 // used to represent True. 86 UInt size; /* size in bytes */ 87 Bool isText; 88 Bool isIFunc; /* symbol is an indirect function? */ 89 Bool isGlobal; /* Is this symbol globally visible? */ 90 } 91 DiSym; 92 93 /* --------------------- SRCLOCS --------------------- */ 94 95 /* Line count at which overflow happens, due to line numbers being 96 stored as shorts in `struct nlist' in a.out.h. */ 97 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8)) 98 99 #define LINENO_BITS 20 100 #define LOC_SIZE_BITS (32 - LINENO_BITS) 101 #define MAX_LINENO ((1 << LINENO_BITS) - 1) 102 103 /* Unlikely to have any lines with instruction ranges > 4096 bytes */ 104 #define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1) 105 106 /* Number used to detect line number overflows; if one line is 107 60000-odd smaller than the previous, it was probably an overflow. 108 */ 109 #define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000) 110 111 /* Filename and Dirname pair. FnDn are stored in di->fndnpool 112 and are allocated using VG_(allocFixedEltDedupPA). 113 The filename/dirname strings are themselves stored in di->strpool. */ 114 typedef 115 struct { 116 const HChar* filename; /* source filename */ 117 const HChar* dirname; /* source directory name */ 118 } FnDn; 119 120 /* A structure to hold addr-to-source info for a single line. There 121 can be a lot of these, hence the dense packing. */ 122 typedef 123 struct { 124 /* Word 1 */ 125 Addr addr; /* lowest address for this line */ 126 /* Word 2 */ 127 UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */ 128 UInt lineno:LINENO_BITS; /* source line number, or zero */ 129 } 130 DiLoc; 131 132 #define LEVEL_BITS (32 - LINENO_BITS) 133 #define MAX_LEVEL ((1 << LEVEL_BITS) - 1) 134 135 /* A structure to hold addr-to-inlined fn info. There 136 can be a lot of these, hence the dense packing. 137 Only caller source filename and lineno are stored. 138 Handling dirname should be done using fndn_ix technique 139 similar to ML_(addLineInfo). */ 140 typedef 141 struct { 142 /* Word 1 */ 143 Addr addr_lo; /* lowest address for inlined fn */ 144 /* Word 2 */ 145 Addr addr_hi; /* highest address following the inlined fn */ 146 /* Word 3 */ 147 const HChar* inlinedfn; /* inlined function name */ 148 /* Word 4 and 5 */ 149 UInt fndn_ix; /* index in di->fndnpool of caller source 150 dirname/filename */ 151 UInt lineno:LINENO_BITS; /* caller line number */ 152 UShort level:LEVEL_BITS; /* level of inlining */ 153 } 154 DiInlLoc; 155 156 /* --------------------- CF INFO --------------------- */ 157 158 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code 159 address range [base .. base+len-1]. 160 161 On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at 162 some point and {e,r}ip is in the range [base .. base+len-1], it 163 tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the 164 current frame and also ra, the return address of the current frame. 165 166 First off, calculate CFA, the Canonical Frame Address, thusly: 167 168 cfa = case cfa_how of 169 CFIC_IA_SPREL -> {e,r}sp + cfa_off 170 CFIC_IA_BPREL -> {e,r}bp + cfa_off 171 CFIC_EXPR -> expr whose index is in cfa_off 172 173 Once that is done, the previous frame's {e,r}sp/{e,r}bp values and 174 this frame's {e,r}ra value can be calculated like this: 175 176 old_{e,r}sp/{e,r}bp/ra 177 = case {e,r}sp/{e,r}bp/ra_how of 178 CFIR_UNKNOWN -> we don't know, sorry 179 CFIR_SAME -> same as it was before (sp/fp only) 180 CFIR_CFAREL -> cfa + sp/bp/ra_off 181 CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off ) 182 CFIR_EXPR -> expr whose index is in sp/bp/ra_off 183 184 On ARM it's pretty much the same, except we have more registers to 185 keep track of: 186 187 cfa = case cfa_how of 188 CFIC_ARM_R13REL -> r13 + cfa_off 189 CFIC_ARM_R12REL -> r12 + cfa_off 190 CFIC_ARM_R11REL -> r11 + cfa_off 191 CFIC_ARM_R7REL -> r7 + cfa_off 192 CFIR_EXPR -> expr whose index is in cfa_off 193 194 old_r14/r13/r12/r11/r7/ra 195 = case r14/r13/r12/r11/r7/ra_how of 196 CFIR_UNKNOWN -> we don't know, sorry 197 CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only) 198 CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off 199 CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off ) 200 CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off 201 202 On ARM64: 203 204 cfa = case cfa_how of 205 CFIC_ARM64_SPREL -> sp + cfa_off 206 CFIC_ARM64_X29REL -> x29 + cfa_off 207 CFIC_EXPR -> expr whose index is in cfa_off 208 209 old_sp/x30/x29/ra 210 = case sp/x30/x29/ra_how of 211 CFIR_UNKNOWN -> we don't know, sorry 212 CFIR_SAME -> same as it was before 213 CFIR_CFAREL -> cfa + sp/x30/x29/ra_how 214 CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how ) 215 CFIR_EXPR -> expr whose index is in sp/x30/x29/ra_off 216 217 On s390x we have a similar logic as x86 or amd64. We need the stack pointer 218 (r15), the frame pointer r11 (like BP) and together with the instruction 219 address in the PSW we can calculate the previous values: 220 cfa = case cfa_how of 221 CFIC_IA_SPREL -> r15 + cfa_off 222 CFIC_IA_BPREL -> r11 + cfa_off 223 CFIC_EXPR -> expr whose index is in cfa_off 224 225 old_sp/fp/ra 226 = case sp/fp/ra_how of 227 CFIR_UNKNOWN -> we don't know, sorry 228 CFIR_SAME -> same as it was before (sp/fp only) 229 CFIR_CFAREL -> cfa + sp/fp/ra_off 230 CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off ) 231 CFIR_EXPR -> expr whose index is in sp/fp/ra_off 232 */ 233 234 #define CFIC_IA_SPREL ((UChar)1) 235 #define CFIC_IA_BPREL ((UChar)2) 236 #define CFIC_ARM_R13REL ((UChar)3) 237 #define CFIC_ARM_R12REL ((UChar)4) 238 #define CFIC_ARM_R11REL ((UChar)5) 239 #define CFIC_ARM_R7REL ((UChar)6) 240 #define CFIC_ARM64_SPREL ((UChar)7) 241 #define CFIC_ARM64_X29REL ((UChar)8) 242 #define CFIC_EXPR ((UChar)9) /* all targets */ 243 244 #define CFIR_UNKNOWN ((UChar)64) 245 #define CFIR_SAME ((UChar)65) 246 #define CFIR_CFAREL ((UChar)66) 247 #define CFIR_MEMCFAREL ((UChar)67) 248 #define CFIR_EXPR ((UChar)68) 249 250 /* Definition of the DiCfSI_m DiCfSI machine dependent part. 251 These are highly duplicated, and are stored in a pool. */ 252 #if defined(VGA_x86) || defined(VGA_amd64) 253 typedef 254 struct { 255 UChar cfa_how; /* a CFIC_IA value */ 256 UChar ra_how; /* a CFIR_ value */ 257 UChar sp_how; /* a CFIR_ value */ 258 UChar bp_how; /* a CFIR_ value */ 259 Int cfa_off; 260 Int ra_off; 261 Int sp_off; 262 Int bp_off; 263 } 264 DiCfSI_m; 265 #elif defined(VGA_arm) 266 typedef 267 struct { 268 UChar cfa_how; /* a CFIC_ value */ 269 UChar ra_how; /* a CFIR_ value */ 270 UChar r14_how; /* a CFIR_ value */ 271 UChar r13_how; /* a CFIR_ value */ 272 UChar r12_how; /* a CFIR_ value */ 273 UChar r11_how; /* a CFIR_ value */ 274 UChar r7_how; /* a CFIR_ value */ 275 Int cfa_off; 276 Int ra_off; 277 Int r14_off; 278 Int r13_off; 279 Int r12_off; 280 Int r11_off; 281 Int r7_off; 282 // If you add additional fields, don't forget to update the 283 // initialisation of this in readexidx.c accordingly. 284 } 285 DiCfSI_m; 286 #elif defined(VGA_arm64) 287 typedef 288 struct { 289 UChar cfa_how; /* a CFIC_ value */ 290 UChar ra_how; /* a CFIR_ value */ 291 UChar sp_how; /* a CFIR_ value */ /*dw31=SP*/ 292 UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/ 293 UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/ 294 Int cfa_off; 295 Int ra_off; 296 Int sp_off; 297 Int x30_off; 298 Int x29_off; 299 } 300 DiCfSI_m; 301 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) 302 /* Just have a struct with the common fields in, so that code that 303 processes the common fields doesn't have to be ifdef'd against 304 VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux 305 at the moment. */ 306 typedef 307 struct { 308 UChar cfa_how; /* a CFIC_ value */ 309 UChar ra_how; /* a CFIR_ value */ 310 Int cfa_off; 311 Int ra_off; 312 } 313 DiCfSI_m; 314 #elif defined(VGA_s390x) 315 typedef 316 struct { 317 UChar cfa_how; /* a CFIC_ value */ 318 UChar sp_how; /* a CFIR_ value */ 319 UChar ra_how; /* a CFIR_ value */ 320 UChar fp_how; /* a CFIR_ value */ 321 Int cfa_off; 322 Int sp_off; 323 Int ra_off; 324 Int fp_off; 325 } 326 DiCfSI_m; 327 #elif defined(VGA_mips32) || defined(VGA_mips64) 328 typedef 329 struct { 330 UChar cfa_how; /* a CFIC_ value */ 331 UChar ra_how; /* a CFIR_ value */ 332 UChar sp_how; /* a CFIR_ value */ 333 UChar fp_how; /* a CFIR_ value */ 334 Int cfa_off; 335 Int ra_off; 336 Int sp_off; 337 Int fp_off; 338 } 339 DiCfSI_m; 340 #else 341 # error "Unknown arch" 342 #endif 343 344 typedef 345 struct { 346 Addr base; 347 UInt len; 348 UInt cfsi_m_ix; 349 } 350 DiCfSI; 351 352 typedef 353 enum { 354 Cunop_Abs=0x231, 355 Cunop_Neg, 356 Cunop_Not 357 } 358 CfiUnop; 359 360 typedef 361 enum { 362 Cbinop_Add=0x321, 363 Cbinop_Sub, 364 Cbinop_And, 365 Cbinop_Mul, 366 Cbinop_Shl, 367 Cbinop_Shr, 368 Cbinop_Eq, 369 Cbinop_Ge, 370 Cbinop_Gt, 371 Cbinop_Le, 372 Cbinop_Lt, 373 Cbinop_Ne 374 } 375 CfiBinop; 376 377 typedef 378 enum { 379 Creg_INVALID=0x213, 380 Creg_IA_SP, 381 Creg_IA_BP, 382 Creg_IA_IP, 383 Creg_ARM_R13, 384 Creg_ARM_R12, 385 Creg_ARM_R15, 386 Creg_ARM_R14, 387 Creg_ARM_R7, 388 Creg_ARM64_X30, 389 Creg_S390_IA, 390 Creg_S390_SP, 391 Creg_S390_FP, 392 Creg_S390_LR, 393 Creg_MIPS_RA 394 } 395 CfiReg; 396 397 typedef 398 enum { 399 Cex_Undef=0x123, 400 Cex_Deref, 401 Cex_Const, 402 Cex_Unop, 403 Cex_Binop, 404 Cex_CfiReg, 405 Cex_DwReg 406 } 407 CfiExprTag; 408 409 typedef 410 struct { 411 CfiExprTag tag; 412 union { 413 struct { 414 } Undef; 415 struct { 416 Int ixAddr; 417 } Deref; 418 struct { 419 UWord con; 420 } Const; 421 struct { 422 CfiUnop op; 423 Int ix; 424 } Unop; 425 struct { 426 CfiBinop op; 427 Int ixL; 428 Int ixR; 429 } Binop; 430 struct { 431 CfiReg reg; 432 } CfiReg; 433 struct { 434 Int reg; 435 } DwReg; 436 } 437 Cex; 438 } 439 CfiExpr; 440 441 extern Int ML_(CfiExpr_Undef) ( XArray* dst ); 442 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr ); 443 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con ); 444 extern Int ML_(CfiExpr_Unop) ( XArray* dst, CfiUnop op, Int ix ); 445 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR ); 446 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg ); 447 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg ); 448 449 extern void ML_(ppCfiExpr)( const XArray* src, Int ix ); 450 451 /* ---------------- FPO INFO (Windows PE) -------------- */ 452 453 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like 454 a primitive CFI */ 455 typedef 456 struct _FPO_DATA { /* 16 bytes */ 457 UInt ulOffStart; /* offset of 1st byte of function code */ 458 UInt cbProcSize; /* # bytes in function */ 459 UInt cdwLocals; /* # bytes/4 in locals */ 460 UShort cdwParams; /* # bytes/4 in params */ 461 UChar cbProlog; /* # bytes in prolog */ 462 UChar cbRegs :3; /* # regs saved */ 463 UChar fHasSEH:1; /* Structured Exception Handling */ 464 UChar fUseBP :1; /* EBP has been used */ 465 UChar reserved:1; 466 UChar cbFrame:2; /* frame type */ 467 } 468 FPO_DATA; 469 470 #define PDB_FRAME_FPO 0 471 #define PDB_FRAME_TRAP 1 472 #define PDB_FRAME_TSS 2 473 474 /* --------------------- VARIABLES --------------------- */ 475 476 typedef 477 struct { 478 Addr aMin; 479 Addr aMax; 480 XArray* /* of DiVariable */ vars; 481 } 482 DiAddrRange; 483 484 typedef 485 struct { 486 const HChar* name; /* in DebugInfo.strpool */ 487 UWord typeR; /* a cuOff */ 488 const GExpr* gexpr; /* on DebugInfo.gexprs list */ 489 const GExpr* fbGX; /* SHARED. */ 490 UInt fndn_ix; /* where declared; may be zero. index 491 in DebugInfo.fndnpool */ 492 Int lineNo; /* where declared; may be zero. */ 493 } 494 DiVariable; 495 496 Word 497 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV ); 498 499 /* --------------------- DEBUGINFO --------------------- */ 500 501 /* This is the top-level data type. It's a structure which contains 502 information pertaining to one mapped ELF object. This type is 503 exported only abstractly - in pub_tool_debuginfo.h. */ 504 505 /* First though, here's an auxiliary data structure. It is only ever 506 used as part of a struct _DebugInfo. We use it to record 507 observations about mappings and permission changes to the 508 associated file, so as to decide when to read debug info. It's 509 essentially an ultra-trivial finite state machine which, when it 510 reaches an accept state, signals that we should now read debug info 511 from the object into the associated struct _DebugInfo. The accept 512 state is arrived at when have_rx_map and have_rw_map both become 513 true. The initial state is one in which we have no observations, 514 so have_rx_map and have_rw_map are both false. 515 516 This all started as a rather ad-hoc solution, but was further 517 expanded to handle weird object layouts, e.g. more than one rw 518 or rx mapping for one binary. 519 520 The normal sequence of events is one of 521 522 start --> r-x mapping --> rw- mapping --> accept 523 start --> rw- mapping --> r-x mapping --> accept 524 525 that is, take the first r-x and rw- mapping we see, and we're done. 526 527 On MacOSX >= 10.7, 32-bit, there appears to be a new variant: 528 529 start --> r-- mapping --> rw- mapping 530 --> upgrade r-- mapping to r-x mapping --> accept 531 532 where the upgrade is done by a call to mach_vm_protect (OSX 10.7) 533 or kernelrpc_mach_vm_protect_trap (OSX 10.9 and possibly 10.8). 534 Hence we need to also track this possibility. 535 536 From perusal of dyld sources, it appears that this scheme could 537 also be used 64 bit libraries, although that doesn't seem to happen 538 in practice. dyld uses this scheme when the text section requires 539 relocation, which only appears to be the case for 32 bit objects. 540 */ 541 542 typedef struct 543 { 544 Addr avma; /* these fields record the file offset, length */ 545 SizeT size; /* and map address of each mapping */ 546 OffT foff; 547 Bool rx, rw, ro; /* memory access flags for this mapping */ 548 } DebugInfoMapping; 549 550 struct _DebugInfoFSM 551 { 552 HChar* filename; /* in mallocville (VG_AR_DINFO) */ 553 HChar* dbgname; /* in mallocville (VG_AR_DINFO) */ 554 XArray* maps; /* XArray of DebugInfoMapping structs */ 555 Bool have_rx_map; /* did we see a r?x mapping yet for the file? */ 556 Bool have_rw_map; /* did we see a rw? mapping yet for the file? */ 557 Bool have_ro_map; /* did we see a r-- mapping yet for the file? */ 558 }; 559 560 561 /* To do with the string table in struct _DebugInfo (::strpool) */ 562 #define SEGINFO_STRPOOLSIZE (64*1024) 563 564 565 /* We may encounter more than one .eh_frame section in an object -- 566 unusual but apparently allowed by ELF. See 567 http://sourceware.org/bugzilla/show_bug.cgi?id=12675 568 */ 569 #define N_EHFRAME_SECTS 2 570 571 572 /* So, the main structure for holding debug info for one object. */ 573 574 struct _DebugInfo { 575 576 /* Admin stuff */ 577 578 struct _DebugInfo* next; /* list of DebugInfos */ 579 Bool mark; /* marked for deletion? */ 580 581 /* An abstract handle, which can be used by entities outside of 582 m_debuginfo to (in an abstract datatype sense) refer to this 583 struct _DebugInfo. A .handle of zero is invalid; valid handles 584 are 1 and above. The same handle is never issued twice (in any 585 given run of Valgrind), so a handle becomes invalid when the 586 associated struct _DebugInfo is discarded, and remains invalid 587 forever thereafter. The .handle field is set as soon as this 588 structure is allocated. */ 589 ULong handle; 590 591 /* The range of epochs for which this DebugInfo is valid. These also 592 divide the DebugInfo's lifetime into three parts: 593 594 (1) Allocated: but with only .fsm holding useful info -- in 595 particular, not yet holding any debug info. 596 .first_epoch == DebugInfoEpoch_INVALID 597 .last_epoch == DebugInfoEpoch_INVALID 598 599 (2) Active: containing debug info, and current. 600 .first_epoch != DebugInfoEpoch_INVALID 601 .last_epoch == DebugInfoEpoch_INVALID 602 603 (3) Archived: containing debug info, but no longer current. 604 .first_epoch != DebugInfoEpoch_INVALID 605 .last_epoch != DebugInfoEpoch_INVALID 606 607 State (2) corresponds to an object which is currently mapped. When 608 the object is unmapped, what happens depends on the setting of 609 --keep-debuginfo: 610 611 * when =no, the DebugInfo is removed from debugInfo_list and 612 deleted. 613 614 * when =yes, the DebugInfo is retained in debugInfo_list, but its 615 .last_epoch field is filled in, and current_epoch is advanced. This 616 effectively moves the DebugInfo into state (3). 617 */ 618 DiEpoch first_epoch; 619 DiEpoch last_epoch; 620 621 /* Used for debugging only - indicate what stuff to dump whilst 622 reading stuff into the seginfo. Are computed as early in the 623 lifetime of the DebugInfo as possible -- at the point when it is 624 created. Use these when deciding what to spew out; do not use 625 the global VG_(clo_blah) flags. */ 626 627 Bool trace_symtab; /* symbols, our style */ 628 Bool trace_cfi; /* dwarf frame unwind, our style */ 629 Bool ddump_syms; /* mimic /usr/bin/readelf --syms */ 630 Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */ 631 Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */ 632 633 /* The "decide when it is time to read debuginfo" state machine. 634 This structure must get filled in before we can start reading 635 anything from the ELF/MachO file. This structure is filled in 636 by VG_(di_notify_mmap) and its immediate helpers. */ 637 struct _DebugInfoFSM fsm; 638 639 /* Once the ::fsm has reached an accept state -- typically, when 640 both a rw? and r?x mapping for .filename have been observed -- 641 we can go on to read the symbol tables and debug info. 642 .have_dinfo changes from False to True when the debug info has 643 been completely read in and postprocessed (canonicalised) and is 644 now suitable for querying. */ 645 /* If have_dinfo is False, then all fields below this point are 646 invalid and should not be consulted. */ 647 Bool have_dinfo; /* initially False */ 648 649 /* All the rest of the fields in this structure are filled in once 650 we have committed to reading the symbols and debug info (that 651 is, at the point where .have_dinfo is set to True). */ 652 653 /* The file's soname. */ 654 HChar* soname; 655 656 /* Description of some important mapped segments. The presence or 657 absence of the mapping is denoted by the _present field, since 658 in some obscure circumstances (to do with data/sdata/bss) it is 659 possible for the mapping to be present but have zero size. 660 Certainly text_ is mandatory on all platforms; not sure about 661 the rest though. 662 663 -------------------------------------------------------- 664 665 Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that 666 667 either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case) 668 669 or the normal case, which is the AND of the following: 670 (0) size of at least one rx mapping > 0 671 (1) no two non-archived DebugInfos with some rx mapping of size > 0 672 have overlapping rx mappings 673 (2) Each address in [cfsi_minavma,cfsi_maxavma] is in an rx mapping 674 or else no cfsi can cover this address. 675 The typical case is a single rx mapping covering the full range. 676 In some cases, the union of several rx mappings covers the range, 677 with possibly some holes between the rx mappings, and no cfsi fall 678 within such an hole. 679 (3) all DiCfSI in the cfsi array all have ranges that fall within 680 [avma,+size) of that rx mapping. 681 (4) all DiCfSI in the cfsi array are non-overlapping 682 683 The cumulative effect of these restrictions is to ensure that 684 all the DiCfSI records in the entire system are non overlapping. 685 Hence any address falls into either exactly one DiCfSI record, 686 or none. Hence it is safe to cache the results of searches for 687 DiCfSI records. This is the whole point of these restrictions. 688 The caching of DiCfSI searches is done in VG_(use_CF_info). The 689 cache is flushed after any change to debugInfo_list. DiCfSI 690 searches are cached because they are central to stack unwinding 691 on amd64-linux. 692 693 Where are these invariants imposed and checked? 694 695 They are checked after a successful read of debuginfo into 696 a DebugInfo*, in check_CFSI_related_invariants. 697 698 (1) is not really imposed anywhere. We simply assume that the 699 kernel will not map the text segments from two different objects 700 into the same space. Sounds reasonable. 701 702 (2) follows from (4) and (3). It is ensured by canonicaliseCFI. 703 (3) is ensured by ML_(addDiCfSI). 704 (4) is ensured by canonicaliseCFI. 705 706 -------------------------------------------------------- 707 708 Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields: 709 710 The _debug_{svma,bias} fields were added as part of a fix to 711 #185816. The problem encompassed in that bug report was that it 712 wasn't correct to use apply the bias values deduced for a 713 primary object to its associated debuginfo object, because the 714 debuginfo object (or the primary) could have been prelinked to a 715 different SVMA. Hence debuginfo and primary objects need to 716 have their own biases. 717 718 ------ JRS: (referring to r9329): ------ 719 Let me see if I understand the workings correctly. Initially 720 the _debug_ values are set to the same values as the "normal" 721 ones, as there's a bunch of bits of code like this (in 722 readelf.c) 723 724 di->text_svma = svma; 725 ... 726 di->text_bias = rx_bias; 727 di->text_debug_svma = svma; 728 di->text_debug_bias = rx_bias; 729 730 If a debuginfo object subsequently shows up then the 731 _debug_svma/bias are set for the debuginfo object. Result is 732 that if there's no debuginfo object then the values are the same 733 as the primary-object values, and if there is a debuginfo object 734 then they will (or at least may) be different. 735 736 Then when we need to actually bias something, we'll have to 737 decide whether to use the primary bias or the debuginfo bias. 738 And the strategy is to use the primary bias for ELF symbols but 739 the debuginfo bias for anything pulled out of Dwarf. 740 741 ------ THH: ------ 742 Correct - the debug_svma and bias values apply to any address 743 read from the debug data regardless of where that debug data is 744 stored and the other values are used for addresses from other 745 places (primarily the symbol table). 746 747 ------ JRS: ------ 748 Ok; so this was my only area of concern. Are there any 749 corner-case scenarios where this wouldn't be right? It sounds 750 like we're assuming the ELF symbols come from the primary object 751 and, if there is a debug object, then all the Dwarf comes from 752 there. But what if (eg) both symbols and Dwarf come from the 753 debug object? Is that even possible or allowable? 754 755 ------ THH: ------ 756 You may have a point... 757 758 The current logic is to try and take any one set of data from 759 either the base object or the debug object. There are four sets 760 of data we consider: 761 762 - Symbol Table 763 - Stabs 764 - DWARF1 765 - DWARF2 766 767 If we see the primary section for a given set in the base object 768 then we ignore all sections relating to that set in the debug 769 object. 770 771 Now in principle if we saw a secondary section (like debug_line 772 say) in the base object, but not the main section (debug_info in 773 this case) then we would take debug_info from the debug object 774 but would use the debug_line from the base object unless we saw 775 a replacement copy in the debug object. That's probably unlikely 776 however. 777 778 A bigger issue might be, as you say, the symbol table as we will 779 pick that up from the debug object if it isn't in the base. The 780 dynamic symbol table will always have to be in the base object 781 though so we will have to be careful when processing symbols to 782 know which table we are reading in that case. 783 784 What we probably need to do is tell read_elf_symtab which object 785 the symbols it is being asked to read came from. 786 787 (A followup patch to deal with this was committed in r9469). 788 */ 789 /* .text */ 790 Bool text_present; 791 Addr text_avma; 792 Addr text_svma; 793 SizeT text_size; 794 PtrdiffT text_bias; 795 Addr text_debug_svma; 796 PtrdiffT text_debug_bias; 797 /* .data */ 798 Bool data_present; 799 Addr data_svma; 800 Addr data_avma; 801 SizeT data_size; 802 PtrdiffT data_bias; 803 Addr data_debug_svma; 804 PtrdiffT data_debug_bias; 805 /* .sdata */ 806 Bool sdata_present; 807 Addr sdata_svma; 808 Addr sdata_avma; 809 SizeT sdata_size; 810 PtrdiffT sdata_bias; 811 Addr sdata_debug_svma; 812 PtrdiffT sdata_debug_bias; 813 /* .rodata */ 814 Bool rodata_present; 815 Addr rodata_svma; 816 Addr rodata_avma; 817 SizeT rodata_size; 818 PtrdiffT rodata_bias; 819 Addr rodata_debug_svma; 820 PtrdiffT rodata_debug_bias; 821 /* .bss */ 822 Bool bss_present; 823 Addr bss_svma; 824 Addr bss_avma; 825 SizeT bss_size; 826 PtrdiffT bss_bias; 827 Addr bss_debug_svma; 828 PtrdiffT bss_debug_bias; 829 /* .sbss */ 830 Bool sbss_present; 831 Addr sbss_svma; 832 Addr sbss_avma; 833 SizeT sbss_size; 834 PtrdiffT sbss_bias; 835 Addr sbss_debug_svma; 836 PtrdiffT sbss_debug_bias; 837 /* .ARM.exidx -- sometimes present on arm32, containing unwind info. */ 838 Bool exidx_present; 839 Addr exidx_avma; 840 Addr exidx_svma; 841 SizeT exidx_size; 842 PtrdiffT exidx_bias; 843 /* .ARM.extab -- sometimes present on arm32, containing unwind info. */ 844 Bool extab_present; 845 Addr extab_avma; 846 Addr extab_svma; 847 SizeT extab_size; 848 PtrdiffT extab_bias; 849 /* .plt */ 850 Bool plt_present; 851 Addr plt_avma; 852 SizeT plt_size; 853 /* .got */ 854 Bool got_present; 855 Addr got_avma; 856 SizeT got_size; 857 /* .got.plt */ 858 Bool gotplt_present; 859 Addr gotplt_avma; 860 SizeT gotplt_size; 861 /* .opd -- needed on ppc64be-linux for finding symbols */ 862 Bool opd_present; 863 Addr opd_avma; 864 SizeT opd_size; 865 /* .ehframe -- needed on amd64-linux for stack unwinding. We might 866 see more than one, hence the arrays. */ 867 UInt n_ehframe; /* 0 .. N_EHFRAME_SECTS */ 868 Addr ehframe_avma[N_EHFRAME_SECTS]; 869 SizeT ehframe_size[N_EHFRAME_SECTS]; 870 871 /* Sorted tables of stuff we snarfed from the file. This is the 872 eventual product of reading the debug info. All this stuff 873 lives in VG_AR_DINFO. */ 874 875 /* An expandable array of symbols. */ 876 DiSym* symtab; 877 UWord symtab_used; 878 UWord symtab_size; 879 /* Two expandable arrays, storing locations and their filename/dirname. */ 880 DiLoc* loctab; 881 UInt sizeof_fndn_ix; /* Similar use as sizeof_cfsi_m_ix below. */ 882 void* loctab_fndn_ix; /* loctab[i] filename/dirname is identified by 883 loctab_fnindex_ix[i] (an index in di->fndnpool) 884 0 means filename/dirname unknown. 885 The void* is an UChar* or UShort* or UInt* 886 depending on sizeof_fndn_ix. */ 887 UWord loctab_used; 888 UWord loctab_size; 889 /* An expandable array of inlined fn info. 890 maxinl_codesz is the biggest inlined piece of code 891 in inltab (i.e. the max of 'addr_hi - addr_lo'. */ 892 DiInlLoc* inltab; 893 UWord inltab_used; 894 UWord inltab_size; 895 SizeT maxinl_codesz; 896 897 /* A set of expandable arrays to store CFI summary info records. 898 The machine specific information (i.e. the DiCfSI_m struct) 899 are stored in cfsi_m_pool, as these are highly duplicated. 900 The DiCfSI_m are allocated in cfsi_m_pool and identified using 901 a (we hope) small integer : often one byte is enough, sometimes 902 2 bytes are needed. 903 904 cfsi_base contains the bases of the code address ranges. 905 cfsi_size is the size of the cfsi_base array. 906 The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used. 907 Following elements are not used (yet). 908 909 For each base in cfsi_base, an index into cfsi_m_pool is stored 910 in cfsi_m_ix array. The size of cfsi_m_ix is equal to 911 cfsi_size*sizeof_cfsi_m_ix. The used portion of cfsi_m_ix is 912 cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_cfsi_m_ix]. 913 914 cfsi_base[i] gives the base address of a code range covered by 915 some CF Info. The corresponding CF Info is identified by an index 916 in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to 917 cfsi_base[i] is given 918 by ((UChar*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 1 919 by ((UShort*)cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 2 920 by ((UInt*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 4. 921 922 The end of the code range starting at cfsi_base[i] is given by 923 cfsi_base[i+1]-1 (or cfsi_maxavma for cfsi_base[cfsi_used-1]). 924 Some code ranges between cfsi_minavma and cfsi_maxavma might not 925 be covered by cfi information. Such not covered ranges are stored by 926 a base in cfsi_base and a corresponding 0 index in cfsi_m_ix. 927 928 A variable size representation has been chosen for the elements of 929 cfsi_m_ix as in many case, one byte is good enough. For big 930 objects, 2 bytes are needed. No object has yet been found where 931 4 bytes are needed (but the code is ready to handle this case). 932 Not covered ranges ('cfi holes') are stored explicitly in 933 cfsi_base/cfsi_m_ix as this is more memory efficient than storing 934 a length for each covered range : on x86 or amd64, we typically have 935 a hole every 8 covered ranges. On arm64, we have very few holes 936 (1 every 50 or 100 ranges). 937 938 The cfsi information is read and prepared in the cfsi_rd array. 939 Once all the information has been read, the cfsi_base and cfsi_m_ix 940 arrays will be filled in from cfsi_rd. cfsi_rd will then be freed. 941 This is all done by ML_(finish_CFSI_arrays). 942 943 Also includes summary address bounds, showing the min and max address 944 covered by any of the records, as an aid to fast searching. And, if the 945 records require any expression nodes, they are stored in 946 cfsi_exprs. */ 947 Addr* cfsi_base; 948 UInt sizeof_cfsi_m_ix; /* size in byte of indexes stored in cfsi_m_ix. */ 949 void* cfsi_m_ix; /* Each index occupies sizeof_cfsi_m_ix bytes. 950 The void* is an UChar* or UShort* or UInt* 951 depending on sizeof_cfsi_m_ix. */ 952 953 DiCfSI* cfsi_rd; /* Only used during reading, NULL once info is read. */ 954 955 UWord cfsi_used; 956 UWord cfsi_size; 957 958 DedupPoolAlloc *cfsi_m_pool; 959 Addr cfsi_minavma; 960 Addr cfsi_maxavma; 961 XArray* cfsi_exprs; /* XArray of CfiExpr */ 962 963 /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted 964 data. Non-expandable array, hence .size == .used. */ 965 FPO_DATA* fpo; 966 UWord fpo_size; 967 Addr fpo_minavma; 968 Addr fpo_maxavma; 969 Addr fpo_base_avma; 970 971 /* Pool of strings -- the string table. Pointers 972 into this are stable (the memory is not reallocated). */ 973 DedupPoolAlloc *strpool; 974 975 /* Pool of FnDn -- filename and dirname. 976 Elements in the pool are allocated using VG_(allocFixedEltDedupPA). */ 977 DedupPoolAlloc *fndnpool; 978 979 /* Variable scope information, as harvested from Dwarf3 files. 980 981 In short it's an 982 983 array of (array of PC address ranges and variables) 984 985 The outer array indexes over scopes, with Entry 0 containing 986 information on variables which exist for any value of the program 987 counter (PC) -- that is, the outermost scope. Entries 1, 2, 3, 988 etc contain information on increasinly deeply nested variables. 989 990 Each inner array is an array of (an address range, and a set 991 of variables that are in scope over that address range). 992 993 The address ranges may not overlap. 994 995 Since Entry 0 in the outer array holds information on variables 996 that exist for any value of the PC (that is, global vars), it 997 follows that Entry 0's inner array can only have one address 998 range pair, one that covers the entire address space. 999 */ 1000 XArray* /* of OSet of DiAddrRange */varinfo; 1001 1002 /* These are arrays of the relevant typed objects, held here 1003 partially for the purposes of visiting each object exactly once 1004 when we need to delete them. */ 1005 1006 /* An array of TyEnts. These are needed to make sense of any types 1007 in the .varinfo. Also, when deleting this DebugInfo, we must 1008 first traverse this array and throw away malloc'd stuff hanging 1009 off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */ 1010 XArray* /* of TyEnt */ admin_tyents; 1011 1012 /* An array of guarded DWARF3 expressions. */ 1013 XArray* admin_gexprs; 1014 1015 /* Cached last rx mapping matched and returned by ML_(find_rx_mapping). 1016 This helps performance a lot during ML_(addLineInfo) etc., which can 1017 easily be invoked hundreds of thousands of times. */ 1018 DebugInfoMapping* last_rx_map; 1019 }; 1020 1021 /* --------------------- functions --------------------- */ 1022 1023 /* ------ Adding ------ */ 1024 1025 /* Add a symbol to si's symbol table. The contents of 'sym' are 1026 copied. It is assumed (and checked) that 'sym' only contains one 1027 name, so there is no auxiliary ::sec_names vector to duplicate. 1028 IOW, the copy is a shallow copy, and there are assertions in place 1029 to ensure that's OK. */ 1030 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym ); 1031 1032 /* Add a filename/dirname pair to a DebugInfo and returns the index 1033 in the fndnpool fixed pool. */ 1034 extern UInt ML_(addFnDn) (struct _DebugInfo* di, 1035 const HChar* filename, 1036 const HChar* dirname); /* NULL is allowable */ 1037 1038 /* Returns the filename of the fndn pair identified by fndn_ix. 1039 Returns "???" if fndn_ix is 0. */ 1040 extern const HChar* ML_(fndn_ix2filename) (const DebugInfo* di, 1041 UInt fndn_ix); 1042 1043 /* Returns the dirname of the fndn pair identified by fndn_ix. 1044 Returns "" if fndn_ix is 0 or fndn->dirname is NULL. */ 1045 extern const HChar* ML_(fndn_ix2dirname) (const DebugInfo* di, 1046 UInt fndn_ix); 1047 1048 /* Returns the fndn_ix for the LineInfo locno in di->loctab. 1049 0 if filename/dirname are unknown. */ 1050 extern UInt ML_(fndn_ix) (const DebugInfo* di, Word locno); 1051 1052 /* Add a line-number record to a DebugInfo. 1053 fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn). 1054 Give a 0 index for a unknown filename/dirname pair. */ 1055 extern 1056 void ML_(addLineInfo) ( struct _DebugInfo* di, 1057 UInt fndn_ix, 1058 Addr this, Addr next, Int lineno, Int entry); 1059 1060 /* Add a call inlined record to a DebugInfo. 1061 A call to the below means that inlinedfn code has been 1062 inlined, resulting in code from [addr_lo, addr_hi[. 1063 Note that addr_hi is excluded, i.e. is not part of the inlined code. 1064 fndn_ix and lineno identifies the location of the call that caused 1065 this inlining. 1066 fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn). 1067 Give a 0 index for an unknown filename/dirname pair. 1068 In case of nested inlining, a small level indicates the call 1069 is closer to main that a call with a higher level. */ 1070 extern 1071 void ML_(addInlInfo) ( struct _DebugInfo* di, 1072 Addr addr_lo, Addr addr_hi, 1073 const HChar* inlinedfn, 1074 UInt fndn_ix, 1075 Int lineno, UShort level); 1076 1077 /* Add a CFI summary record. The supplied DiCfSI_m is copied. */ 1078 extern void ML_(addDiCfSI) ( struct _DebugInfo* di, 1079 Addr base, UInt len, DiCfSI_m* cfsi_m ); 1080 1081 /* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return 1082 the corresponding cfsi_m*. Return NULL if the position corresponds 1083 to a cfsi hole. */ 1084 DiCfSI_m* ML_(get_cfsi_m) (const DebugInfo* di, UInt pos); 1085 1086 /* Add a string to the string table of a DebugInfo. If len==-1, 1087 ML_(addStr) will itself measure the length of the string. */ 1088 extern const HChar* ML_(addStr) ( DebugInfo* di, const HChar* str, Int len ); 1089 1090 /* Add a string to the string table of a DebugInfo, by copying the 1091 string from the given DiCursor. Measures the length of the string 1092 itself. */ 1093 extern const HChar* ML_(addStrFromCursor)( DebugInfo* di, DiCursor c ); 1094 1095 extern void ML_(addVar)( struct _DebugInfo* di, 1096 Int level, 1097 Addr aMin, 1098 Addr aMax, 1099 const HChar* name, 1100 UWord typeR, /* a cuOff */ 1101 const GExpr* gexpr, 1102 const GExpr* fbGX, /* SHARED. */ 1103 UInt fndn_ix, /* where decl'd - may be zero */ 1104 Int lineNo, /* where decl'd - may be zero */ 1105 Bool show ); 1106 /* Note: fndn_ix identifies a filename/dirname pair similarly to 1107 ML_(addInlInfo) and ML_(addLineInfo). */ 1108 1109 /* Canonicalise the tables held by 'di', in preparation for use. Call 1110 this after finishing adding entries to these tables. */ 1111 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di ); 1112 1113 /* Canonicalise the call-frame-info table held by 'di', in preparation 1114 for use. This is called by ML_(canonicaliseTables) but can also be 1115 called on it's own to sort just this table. */ 1116 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di ); 1117 1118 /* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays 1119 from cfsi_rd array. cfsi_rd is then freed. */ 1120 extern void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di ); 1121 1122 /* ------ Searching ------ */ 1123 1124 /* Find a symbol-table index containing the specified pointer, or -1 1125 if not found. Binary search. */ 1126 extern Word ML_(search_one_symtab) ( const DebugInfo* di, Addr ptr, 1127 Bool findText ); 1128 1129 /* Find a location-table index containing the specified pointer, or -1 1130 if not found. Binary search. */ 1131 extern Word ML_(search_one_loctab) ( const DebugInfo* di, Addr ptr ); 1132 1133 /* Find a CFI-table index containing the specified pointer, or -1 if 1134 not found. Binary search. */ 1135 extern Word ML_(search_one_cfitab) ( const DebugInfo* di, Addr ptr ); 1136 1137 /* Find a FPO-table index containing the specified pointer, or -1 1138 if not found. Binary search. */ 1139 extern Word ML_(search_one_fpotab) ( const DebugInfo* di, Addr ptr ); 1140 1141 /* Helper function for the most often needed searching for an rx 1142 mapping containing the specified address range. The range must 1143 fall entirely within the mapping to be considered to be within it. 1144 Asserts if lo > hi; caller must ensure this doesn't happen. */ 1145 extern DebugInfoMapping* ML_(find_rx_mapping) ( DebugInfo* di, 1146 Addr lo, Addr hi ); 1147 1148 /* ------ Misc ------ */ 1149 1150 /* Show a non-fatal debug info reading error. Use VG_(core_panic) for 1151 fatal errors. 'serious' errors are always shown, not 'serious' ones 1152 are shown only at verbosity level 2 and above. */ 1153 extern 1154 void ML_(symerr) ( const DebugInfo* di, Bool serious, const HChar* msg ); 1155 1156 /* Print a symbol. */ 1157 extern void ML_(ppSym) ( Int idx, const DiSym* sym ); 1158 1159 /* Print a call-frame-info summary. */ 1160 extern void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs, 1161 Addr base, UInt len, 1162 const DiCfSI_m* si_m ); 1163 1164 1165 #define TRACE_SYMTAB_ENABLED (di->trace_symtab) 1166 #define TRACE_SYMTAB(format, args...) \ 1167 if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); } 1168 1169 1170 #endif /* ndef __PRIV_STORAGE_H */ 1171 1172 /*--------------------------------------------------------------------*/ 1173 /*--- end ---*/ 1174 /*--------------------------------------------------------------------*/ 1175