1#------------------------------------------------------------------------------- 2# elftools: elf/elffile.py 3# 4# ELFFile - main class for accessing ELF files 5# 6# Eli Bendersky (eliben@gmail.com) 7# This code is in the public domain 8#------------------------------------------------------------------------------- 9from ..common.py3compat import BytesIO 10from ..common.exceptions import ELFError 11from ..common.utils import struct_parse, elf_assert 12from ..construct import ConstructError 13from .structs import ELFStructs 14from .sections import ( 15 Section, StringTableSection, SymbolTableSection, 16 SUNWSyminfoTableSection, NullSection) 17from .dynamic import DynamicSection, DynamicSegment 18from .relocation import RelocationSection, RelocationHandler 19from .gnuversions import ( 20 GNUVerNeedSection, GNUVerDefSection, 21 GNUVerSymSection) 22from .segments import Segment, InterpSegment 23from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig 24 25 26class ELFFile(object): 27 """ Creation: the constructor accepts a stream (file-like object) with the 28 contents of an ELF file. 29 30 Accessible attributes: 31 32 stream: 33 The stream holding the data of the file - must be a binary 34 stream (bytes, not string). 35 36 elfclass: 37 32 or 64 - specifies the word size of the target machine 38 39 little_endian: 40 boolean - specifies the target machine's endianness 41 42 header: 43 the complete ELF file header 44 45 e_ident_raw: 46 the raw e_ident field of the header 47 """ 48 def __init__(self, stream): 49 self.stream = stream 50 self._identify_file() 51 self.structs = ELFStructs( 52 little_endian=self.little_endian, 53 elfclass=self.elfclass) 54 self.header = self._parse_elf_header() 55 56 self.stream.seek(0) 57 self.e_ident_raw = self.stream.read(16) 58 59 self._file_stringtable_section = self._get_file_stringtable() 60 self._section_name_map = None 61 62 def num_sections(self): 63 """ Number of sections in the file 64 """ 65 return self['e_shnum'] 66 67 def get_section(self, n): 68 """ Get the section at index #n from the file (Section object or a 69 subclass) 70 """ 71 section_header = self._get_section_header(n) 72 return self._make_section(section_header) 73 74 def get_section_by_name(self, name): 75 """ Get a section from the file, by name. Return None if no such 76 section exists. 77 """ 78 # The first time this method is called, construct a name to number 79 # mapping 80 # 81 if self._section_name_map is None: 82 self._section_name_map = {} 83 for i, sec in enumerate(self.iter_sections()): 84 self._section_name_map[sec.name] = i 85 secnum = self._section_name_map.get(name, None) 86 return None if secnum is None else self.get_section(secnum) 87 88 def iter_sections(self): 89 """ Yield all the sections in the file 90 """ 91 for i in range(self.num_sections()): 92 yield self.get_section(i) 93 94 def num_segments(self): 95 """ Number of segments in the file 96 """ 97 return self['e_phnum'] 98 99 def get_segment(self, n): 100 """ Get the segment at index #n from the file (Segment object) 101 """ 102 segment_header = self._get_segment_header(n) 103 return self._make_segment(segment_header) 104 105 def iter_segments(self): 106 """ Yield all the segments in the file 107 """ 108 for i in range(self.num_segments()): 109 yield self.get_segment(i) 110 111 def address_offsets(self, start, size=1): 112 """ Yield a file offset for each ELF segment containing a memory region. 113 114 A memory region is defined by the range [start...start+size). The 115 offset of the region is yielded. 116 """ 117 end = start + size 118 for seg in self.iter_segments(): 119 if (start >= seg['p_vaddr'] and 120 end <= seg['p_vaddr'] + seg['p_filesz']): 121 yield start - seg['p_vaddr'] + seg['p_offset'] 122 123 def has_dwarf_info(self): 124 """ Check whether this file appears to have debugging information. 125 We assume that if it has the debug_info section, it has all theother 126 required sections as well. 127 """ 128 return bool(self.get_section_by_name(b'.debug_info')) 129 130 def get_dwarf_info(self, relocate_dwarf_sections=True): 131 """ Return a DWARFInfo object representing the debugging information in 132 this file. 133 134 If relocate_dwarf_sections is True, relocations for DWARF sections 135 are looked up and applied. 136 """ 137 # Expect that has_dwarf_info was called, so at least .debug_info is 138 # present. 139 # Sections that aren't found will be passed as None to DWARFInfo. 140 # 141 debug_sections = {} 142 for secname in (b'.debug_info', b'.debug_abbrev', b'.debug_str', 143 b'.debug_line', b'.debug_frame', 144 b'.debug_loc', b'.debug_ranges'): 145 section = self.get_section_by_name(secname) 146 if section is None: 147 debug_sections[secname] = None 148 else: 149 debug_sections[secname] = self._read_dwarf_section( 150 section, 151 relocate_dwarf_sections) 152 153 return DWARFInfo( 154 config=DwarfConfig( 155 little_endian=self.little_endian, 156 default_address_size=self.elfclass // 8, 157 machine_arch=self.get_machine_arch()), 158 debug_info_sec=debug_sections[b'.debug_info'], 159 debug_abbrev_sec=debug_sections[b'.debug_abbrev'], 160 debug_frame_sec=debug_sections[b'.debug_frame'], 161 # TODO(eliben): reading of eh_frame is not hooked up yet 162 eh_frame_sec=None, 163 debug_str_sec=debug_sections[b'.debug_str'], 164 debug_loc_sec=debug_sections[b'.debug_loc'], 165 debug_ranges_sec=debug_sections[b'.debug_ranges'], 166 debug_line_sec=debug_sections[b'.debug_line']) 167 168 def get_machine_arch(self): 169 """ Return the machine architecture, as detected from the ELF header. 170 Not all architectures are supported at the moment. 171 """ 172 if self['e_machine'] == 'EM_X86_64': 173 return 'x64' 174 elif self['e_machine'] in ('EM_386', 'EM_486'): 175 return 'x86' 176 elif self['e_machine'] == 'EM_ARM': 177 return 'ARM' 178 elif self['e_machine'] == 'EM_AARCH64': 179 return 'AArch64' 180 else: 181 return '<unknown>' 182 183 #-------------------------------- PRIVATE --------------------------------# 184 185 def __getitem__(self, name): 186 """ Implement dict-like access to header entries 187 """ 188 return self.header[name] 189 190 def _identify_file(self): 191 """ Verify the ELF file and identify its class and endianness. 192 """ 193 # Note: this code reads the stream directly, without using ELFStructs, 194 # since we don't yet know its exact format. ELF was designed to be 195 # read like this - its e_ident field is word-size and endian agnostic. 196 # 197 self.stream.seek(0) 198 magic = self.stream.read(4) 199 elf_assert(magic == b'\x7fELF', 'Magic number does not match') 200 201 ei_class = self.stream.read(1) 202 if ei_class == b'\x01': 203 self.elfclass = 32 204 elif ei_class == b'\x02': 205 self.elfclass = 64 206 else: 207 raise ELFError('Invalid EI_CLASS %s' % repr(ei_class)) 208 209 ei_data = self.stream.read(1) 210 if ei_data == b'\x01': 211 self.little_endian = True 212 elif ei_data == b'\x02': 213 self.little_endian = False 214 else: 215 raise ELFError('Invalid EI_DATA %s' % repr(ei_data)) 216 217 def _section_offset(self, n): 218 """ Compute the offset of section #n in the file 219 """ 220 return self['e_shoff'] + n * self['e_shentsize'] 221 222 def _segment_offset(self, n): 223 """ Compute the offset of segment #n in the file 224 """ 225 return self['e_phoff'] + n * self['e_phentsize'] 226 227 def _make_segment(self, segment_header): 228 """ Create a Segment object of the appropriate type 229 """ 230 segtype = segment_header['p_type'] 231 if segtype == 'PT_INTERP': 232 return InterpSegment(segment_header, self.stream) 233 elif segtype == 'PT_DYNAMIC': 234 return DynamicSegment(segment_header, self.stream, self) 235 else: 236 return Segment(segment_header, self.stream) 237 238 def _get_section_header(self, n): 239 """ Find the header of section #n, parse it and return the struct 240 """ 241 return struct_parse( 242 self.structs.Elf_Shdr, 243 self.stream, 244 stream_pos=self._section_offset(n)) 245 246 def _get_section_name(self, section_header): 247 """ Given a section header, find this section's name in the file's 248 string table 249 """ 250 name_offset = section_header['sh_name'] 251 return self._file_stringtable_section.get_string(name_offset) 252 253 def _make_section(self, section_header): 254 """ Create a section object of the appropriate type 255 """ 256 name = self._get_section_name(section_header) 257 sectype = section_header['sh_type'] 258 259 if sectype == 'SHT_STRTAB': 260 return StringTableSection(section_header, name, self.stream) 261 elif sectype == 'SHT_NULL': 262 return NullSection(section_header, name, self.stream) 263 elif sectype in ('SHT_SYMTAB', 'SHT_DYNSYM', 'SHT_SUNW_LDYNSYM'): 264 return self._make_symbol_table_section(section_header, name) 265 elif sectype == 'SHT_SUNW_syminfo': 266 return self._make_sunwsyminfo_table_section(section_header, name) 267 elif sectype == 'SHT_GNU_verneed': 268 return self._make_gnu_verneed_section(section_header, name) 269 elif sectype == 'SHT_GNU_verdef': 270 return self._make_gnu_verdef_section(section_header, name) 271 elif sectype == 'SHT_GNU_versym': 272 return self._make_gnu_versym_section(section_header, name) 273 elif sectype in ('SHT_REL', 'SHT_RELA'): 274 return RelocationSection( 275 section_header, name, self.stream, self) 276 elif sectype == 'SHT_DYNAMIC': 277 return DynamicSection(section_header, name, self.stream, self) 278 else: 279 return Section(section_header, name, self.stream) 280 281 def _make_symbol_table_section(self, section_header, name): 282 """ Create a SymbolTableSection 283 """ 284 linked_strtab_index = section_header['sh_link'] 285 strtab_section = self.get_section(linked_strtab_index) 286 return SymbolTableSection( 287 section_header, name, self.stream, 288 elffile=self, 289 stringtable=strtab_section) 290 291 def _make_sunwsyminfo_table_section(self, section_header, name): 292 """ Create a SUNWSyminfoTableSection 293 """ 294 linked_strtab_index = section_header['sh_link'] 295 strtab_section = self.get_section(linked_strtab_index) 296 return SUNWSyminfoTableSection( 297 section_header, name, self.stream, 298 elffile=self, 299 symboltable=strtab_section) 300 301 def _make_gnu_verneed_section(self, section_header, name): 302 """ Create a GNUVerNeedSection 303 """ 304 linked_strtab_index = section_header['sh_link'] 305 strtab_section = self.get_section(linked_strtab_index) 306 return GNUVerNeedSection( 307 section_header, name, self.stream, 308 elffile=self, 309 stringtable=strtab_section) 310 311 def _make_gnu_verdef_section(self, section_header, name): 312 """ Create a GNUVerDefSection 313 """ 314 linked_strtab_index = section_header['sh_link'] 315 strtab_section = self.get_section(linked_strtab_index) 316 return GNUVerDefSection( 317 section_header, name, self.stream, 318 elffile=self, 319 stringtable=strtab_section) 320 321 def _make_gnu_versym_section(self, section_header, name): 322 """ Create a GNUVerSymSection 323 """ 324 linked_strtab_index = section_header['sh_link'] 325 strtab_section = self.get_section(linked_strtab_index) 326 return GNUVerSymSection( 327 section_header, name, self.stream, 328 elffile=self, 329 symboltable=strtab_section) 330 331 def _get_segment_header(self, n): 332 """ Find the header of segment #n, parse it and return the struct 333 """ 334 return struct_parse( 335 self.structs.Elf_Phdr, 336 self.stream, 337 stream_pos=self._segment_offset(n)) 338 339 def _get_file_stringtable(self): 340 """ Find the file's string table section 341 """ 342 stringtable_section_num = self['e_shstrndx'] 343 return StringTableSection( 344 header=self._get_section_header(stringtable_section_num), 345 name='', 346 stream=self.stream) 347 348 def _parse_elf_header(self): 349 """ Parses the ELF file header and assigns the result to attributes 350 of this object. 351 """ 352 return struct_parse(self.structs.Elf_Ehdr, self.stream, stream_pos=0) 353 354 def _read_dwarf_section(self, section, relocate_dwarf_sections): 355 """ Read the contents of a DWARF section from the stream and return a 356 DebugSectionDescriptor. Apply relocations if asked to. 357 """ 358 self.stream.seek(section['sh_offset']) 359 # The section data is read into a new stream, for processing 360 section_stream = BytesIO() 361 section_stream.write(self.stream.read(section['sh_size'])) 362 363 if relocate_dwarf_sections: 364 reloc_handler = RelocationHandler(self) 365 reloc_section = reloc_handler.find_relocations_for_section(section) 366 if reloc_section is not None: 367 reloc_handler.apply_section_relocations( 368 section_stream, reloc_section) 369 370 return DebugSectionDescriptor( 371 stream=section_stream, 372 name=section.name, 373 global_offset=section['sh_offset'], 374 size=section['sh_size']) 375 376 377