1#------------------------------------------------------------------------------- 2# elftools: dwarf/dwarfinfo.py 3# 4# DWARFInfo - Main class for accessing DWARF debug information 5# 6# Eli Bendersky (eliben@gmail.com) 7# This code is in the public domain 8#------------------------------------------------------------------------------- 9from collections import namedtuple 10 11from ..common.exceptions import DWARFError 12from ..common.utils import (struct_parse, dwarf_assert, 13 parse_cstring_from_stream) 14from .structs import DWARFStructs 15from .compileunit import CompileUnit 16from .abbrevtable import AbbrevTable 17from .lineprogram import LineProgram 18from .callframe import CallFrameInfo 19from .locationlists import LocationLists 20from .ranges import RangeLists 21 22 23# Describes a debug section 24# 25# stream: a stream object containing the data of this section 26# name: section name in the container file 27# global_offset: the global offset of the section in its container file 28# size: the size of the section's data, in bytes 29# 30# 'name' and 'global_offset' are for descriptional purposes only and 31# aren't strictly required for the DWARF parsing to work. 32# 33DebugSectionDescriptor = namedtuple('DebugSectionDescriptor', 34 'stream name global_offset size') 35 36 37# Some configuration parameters for the DWARF reader. This exists to allow 38# DWARFInfo to be independent from any specific file format/container. 39# 40# little_endian: 41# boolean flag specifying whether the data in the file is little endian 42# 43# machine_arch: 44# Machine architecture as a string. For example 'x86' or 'x64' 45# 46# default_address_size: 47# The default address size for the container file (sizeof pointer, in bytes) 48# 49DwarfConfig = namedtuple('DwarfConfig', 50 'little_endian machine_arch default_address_size') 51 52 53class DWARFInfo(object): 54 """ Acts also as a "context" to other major objects, bridging between 55 various parts of the debug infromation. 56 """ 57 def __init__(self, 58 config, 59 debug_info_sec, 60 debug_abbrev_sec, 61 debug_frame_sec, 62 eh_frame_sec, 63 debug_str_sec, 64 debug_loc_sec, 65 debug_ranges_sec, 66 debug_line_sec): 67 """ config: 68 A DwarfConfig object 69 70 debug_*_sec: 71 DebugSectionDescriptor for a section. Pass None for sections 72 that don't exist. These arguments are best given with 73 keyword syntax. 74 """ 75 self.config = config 76 self.debug_info_sec = debug_info_sec 77 self.debug_abbrev_sec = debug_abbrev_sec 78 self.debug_frame_sec = debug_frame_sec 79 self.eh_frame_sec = eh_frame_sec 80 self.debug_str_sec = debug_str_sec 81 self.debug_loc_sec = debug_loc_sec 82 self.debug_ranges_sec = debug_ranges_sec 83 self.debug_line_sec = debug_line_sec 84 85 # This is the DWARFStructs the context uses, so it doesn't depend on 86 # DWARF format and address_size (these are determined per CU) - set them 87 # to default values. 88 self.structs = DWARFStructs( 89 little_endian=self.config.little_endian, 90 dwarf_format=32, 91 address_size=self.config.default_address_size) 92 93 # Cache for abbrev tables: a dict keyed by offset 94 self._abbrevtable_cache = {} 95 96 def iter_CUs(self): 97 """ Yield all the compile units (CompileUnit objects) in the debug info 98 """ 99 return self._parse_CUs_iter() 100 101 def get_abbrev_table(self, offset): 102 """ Get an AbbrevTable from the given offset in the debug_abbrev 103 section. 104 105 The only verification done on the offset is that it's within the 106 bounds of the section (if not, an exception is raised). 107 It is the caller's responsibility to make sure the offset actually 108 points to a valid abbreviation table. 109 110 AbbrevTable objects are cached internally (two calls for the same 111 offset will return the same object). 112 """ 113 dwarf_assert( 114 offset < self.debug_abbrev_sec.size, 115 "Offset '0x%x' to abbrev table out of section bounds" % offset) 116 if offset not in self._abbrevtable_cache: 117 self._abbrevtable_cache[offset] = AbbrevTable( 118 structs=self.structs, 119 stream=self.debug_abbrev_sec.stream, 120 offset=offset) 121 return self._abbrevtable_cache[offset] 122 123 def get_string_from_table(self, offset): 124 """ Obtain a string from the string table section, given an offset 125 relative to the section. 126 """ 127 return parse_cstring_from_stream(self.debug_str_sec.stream, offset) 128 129 def line_program_for_CU(self, CU): 130 """ Given a CU object, fetch the line program it points to from the 131 .debug_line section. 132 If the CU doesn't point to a line program, return None. 133 """ 134 # The line program is pointed to by the DW_AT_stmt_list attribute of 135 # the top DIE of a CU. 136 top_DIE = CU.get_top_DIE() 137 if 'DW_AT_stmt_list' in top_DIE.attributes: 138 return self._parse_line_program_at_offset( 139 top_DIE.attributes['DW_AT_stmt_list'].value, CU.structs) 140 else: 141 return None 142 143 def has_CFI(self): 144 """ Does this dwarf info have a dwarf_frame CFI section? 145 """ 146 return self.debug_frame_sec is not None 147 148 def CFI_entries(self): 149 """ Get a list of dwarf_frame CFI entries from the .debug_frame section. 150 """ 151 cfi = CallFrameInfo( 152 stream=self.debug_frame_sec.stream, 153 size=self.debug_frame_sec.size, 154 base_structs=self.structs) 155 return cfi.get_entries() 156 157 def has_EH_CFI(self): 158 """ Does this dwarf info have a eh_frame CFI section? 159 """ 160 return self.eh_frame_sec is not None 161 162 def EH_CFI_entries(self): 163 """ Get a list of eh_frame CFI entries from the .eh_frame section. 164 """ 165 cfi = CallFrameInfo( 166 stream=self.eh_frame_sec.stream, 167 size=self.eh_frame_sec.size, 168 base_structs=self.structs) 169 return cfi.get_entries() 170 171 def location_lists(self): 172 """ Get a LocationLists object representing the .debug_loc section of 173 the DWARF data, or None if this section doesn't exist. 174 """ 175 if self.debug_loc_sec: 176 return LocationLists(self.debug_loc_sec.stream, self.structs) 177 else: 178 return None 179 180 def range_lists(self): 181 """ Get a RangeLists object representing the .debug_ranges section of 182 the DWARF data, or None if this section doesn't exist. 183 """ 184 if self.debug_ranges_sec: 185 return RangeLists(self.debug_ranges_sec.stream, self.structs) 186 else: 187 return None 188 189 #------ PRIVATE ------# 190 191 def _parse_CUs_iter(self): 192 """ Parse CU entries from debug_info. Yield CUs in order of appearance. 193 """ 194 offset = 0 195 while offset < self.debug_info_sec.size: 196 cu = self._parse_CU_at_offset(offset) 197 # Compute the offset of the next CU in the section. The unit_length 198 # field of the CU header contains its size not including the length 199 # field itself. 200 offset = ( offset + 201 cu['unit_length'] + 202 cu.structs.initial_length_field_size()) 203 yield cu 204 205 def _parse_CU_at_offset(self, offset): 206 """ Parse and return a CU at the given offset in the debug_info stream. 207 """ 208 # Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v3 209 # states that the first 32-bit word of the CU header determines 210 # whether the CU is represented with 32-bit or 64-bit DWARF format. 211 # 212 # So we peek at the first word in the CU header to determine its 213 # dwarf format. Based on it, we then create a new DWARFStructs 214 # instance suitable for this CU and use it to parse the rest. 215 # 216 initial_length = struct_parse( 217 self.structs.Dwarf_uint32(''), self.debug_info_sec.stream, offset) 218 dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32 219 220 # At this point we still haven't read the whole header, so we don't 221 # know the address_size. Therefore, we're going to create structs 222 # with a default address_size=4. If, after parsing the header, we 223 # find out address_size is actually 8, we just create a new structs 224 # object for this CU. 225 # 226 cu_structs = DWARFStructs( 227 little_endian=self.config.little_endian, 228 dwarf_format=dwarf_format, 229 address_size=4) 230 231 cu_header = struct_parse( 232 cu_structs.Dwarf_CU_header, self.debug_info_sec.stream, offset) 233 if cu_header['address_size'] == 8: 234 cu_structs = DWARFStructs( 235 little_endian=self.config.little_endian, 236 dwarf_format=dwarf_format, 237 address_size=8) 238 239 cu_die_offset = self.debug_info_sec.stream.tell() 240 dwarf_assert( 241 self._is_supported_version(cu_header['version']), 242 "Expected supported DWARF version. Got '%s'" % cu_header['version']) 243 return CompileUnit( 244 header=cu_header, 245 dwarfinfo=self, 246 structs=cu_structs, 247 cu_offset=offset, 248 cu_die_offset=cu_die_offset) 249 250 def _is_supported_version(self, version): 251 """ DWARF version supported by this parser 252 """ 253 return 2 <= version <= 4 254 255 def _parse_line_program_at_offset(self, debug_line_offset, structs): 256 """ Given an offset to the .debug_line section, parse the line program 257 starting at this offset in the section and return it. 258 structs is the DWARFStructs object used to do this parsing. 259 """ 260 lineprog_header = struct_parse( 261 structs.Dwarf_lineprog_header, 262 self.debug_line_sec.stream, 263 debug_line_offset) 264 265 # Calculate the offset to the next line program (see DWARF 6.2.4) 266 end_offset = ( debug_line_offset + lineprog_header['unit_length'] + 267 structs.initial_length_field_size()) 268 269 return LineProgram( 270 header=lineprog_header, 271 stream=self.debug_line_sec.stream, 272 structs=structs, 273 program_start_offset=self.debug_line_sec.stream.tell(), 274 program_end_offset=end_offset) 275 276