1#-------------------------------------------------------------------------------
2# elftools: elf/elffile.py
3#
4# ELFFile - main class for accessing ELF files
5#
6# Eli Bendersky (eliben@gmail.com)
7# This code is in the public domain
8#-------------------------------------------------------------------------------
9from ..common.py3compat import BytesIO
10from ..common.exceptions import ELFError
11from ..common.utils import struct_parse, elf_assert
12from ..construct import ConstructError
13from .structs import ELFStructs
14from .sections import (
15        Section, StringTableSection, SymbolTableSection,
16        SUNWSyminfoTableSection, NullSection)
17from .dynamic import DynamicSection, DynamicSegment
18from .relocation import RelocationSection, RelocationHandler
19from .gnuversions import (
20        GNUVerNeedSection, GNUVerDefSection,
21        GNUVerSymSection)
22from .segments import Segment, InterpSegment
23from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig
24
25
26class ELFFile(object):
27    """ Creation: the constructor accepts a stream (file-like object) with the
28        contents of an ELF file.
29
30        Accessible attributes:
31
32            stream:
33                The stream holding the data of the file - must be a binary
34                stream (bytes, not string).
35
36            elfclass:
37                32 or 64 - specifies the word size of the target machine
38
39            little_endian:
40                boolean - specifies the target machine's endianness
41
42            header:
43                the complete ELF file header
44
45            e_ident_raw:
46                the raw e_ident field of the header
47    """
48    def __init__(self, stream):
49        self.stream = stream
50        self._identify_file()
51        self.structs = ELFStructs(
52            little_endian=self.little_endian,
53            elfclass=self.elfclass)
54        self.header = self._parse_elf_header()
55
56        self.stream.seek(0)
57        self.e_ident_raw = self.stream.read(16)
58
59        self._file_stringtable_section = self._get_file_stringtable()
60        self._section_name_map = None
61
62    def num_sections(self):
63        """ Number of sections in the file
64        """
65        return self['e_shnum']
66
67    def get_section(self, n):
68        """ Get the section at index #n from the file (Section object or a
69            subclass)
70        """
71        section_header = self._get_section_header(n)
72        return self._make_section(section_header)
73
74    def get_section_by_name(self, name):
75        """ Get a section from the file, by name. Return None if no such
76            section exists.
77        """
78        # The first time this method is called, construct a name to number
79        # mapping
80        #
81        if self._section_name_map is None:
82            self._section_name_map = {}
83            for i, sec in enumerate(self.iter_sections()):
84                self._section_name_map[sec.name] = i
85        secnum = self._section_name_map.get(name, None)
86        return None if secnum is None else self.get_section(secnum)
87
88    def iter_sections(self):
89        """ Yield all the sections in the file
90        """
91        for i in range(self.num_sections()):
92            yield self.get_section(i)
93
94    def num_segments(self):
95        """ Number of segments in the file
96        """
97        return self['e_phnum']
98
99    def get_segment(self, n):
100        """ Get the segment at index #n from the file (Segment object)
101        """
102        segment_header = self._get_segment_header(n)
103        return self._make_segment(segment_header)
104
105    def iter_segments(self):
106        """ Yield all the segments in the file
107        """
108        for i in range(self.num_segments()):
109            yield self.get_segment(i)
110
111    def address_offsets(self, start, size=1):
112        """ Yield a file offset for each ELF segment containing a memory region.
113
114            A memory region is defined by the range [start...start+size). The
115            offset of the region is yielded.
116        """
117        end = start + size
118        for seg in self.iter_segments():
119            if (start >= seg['p_vaddr'] and
120                end <= seg['p_vaddr'] + seg['p_filesz']):
121                yield start - seg['p_vaddr'] + seg['p_offset']
122
123    def has_dwarf_info(self):
124        """ Check whether this file appears to have debugging information.
125            We assume that if it has the debug_info section, it has all theother
126            required sections as well.
127        """
128        return bool(self.get_section_by_name(b'.debug_info'))
129
130    def get_dwarf_info(self, relocate_dwarf_sections=True):
131        """ Return a DWARFInfo object representing the debugging information in
132            this file.
133
134            If relocate_dwarf_sections is True, relocations for DWARF sections
135            are looked up and applied.
136        """
137        # Expect that has_dwarf_info was called, so at least .debug_info is
138        # present.
139        # Sections that aren't found will be passed as None to DWARFInfo.
140        #
141        debug_sections = {}
142        for secname in (b'.debug_info', b'.debug_abbrev', b'.debug_str',
143                        b'.debug_line', b'.debug_frame',
144                        b'.debug_loc', b'.debug_ranges'):
145            section = self.get_section_by_name(secname)
146            if section is None:
147                debug_sections[secname] = None
148            else:
149                debug_sections[secname] = self._read_dwarf_section(
150                    section,
151                    relocate_dwarf_sections)
152
153        return DWARFInfo(
154                config=DwarfConfig(
155                    little_endian=self.little_endian,
156                    default_address_size=self.elfclass // 8,
157                    machine_arch=self.get_machine_arch()),
158                debug_info_sec=debug_sections[b'.debug_info'],
159                debug_abbrev_sec=debug_sections[b'.debug_abbrev'],
160                debug_frame_sec=debug_sections[b'.debug_frame'],
161                # TODO(eliben): reading of eh_frame is not hooked up yet
162                eh_frame_sec=None,
163                debug_str_sec=debug_sections[b'.debug_str'],
164                debug_loc_sec=debug_sections[b'.debug_loc'],
165                debug_ranges_sec=debug_sections[b'.debug_ranges'],
166                debug_line_sec=debug_sections[b'.debug_line'])
167
168    def get_machine_arch(self):
169        """ Return the machine architecture, as detected from the ELF header.
170            Not all architectures are supported at the moment.
171        """
172        if self['e_machine'] == 'EM_X86_64':
173            return 'x64'
174        elif self['e_machine'] in ('EM_386', 'EM_486'):
175            return 'x86'
176        elif self['e_machine'] == 'EM_ARM':
177            return 'ARM'
178        elif self['e_machine'] == 'EM_AARCH64':
179            return 'AArch64'
180        else:
181            return '<unknown>'
182
183    #-------------------------------- PRIVATE --------------------------------#
184
185    def __getitem__(self, name):
186        """ Implement dict-like access to header entries
187        """
188        return self.header[name]
189
190    def _identify_file(self):
191        """ Verify the ELF file and identify its class and endianness.
192        """
193        # Note: this code reads the stream directly, without using ELFStructs,
194        # since we don't yet know its exact format. ELF was designed to be
195        # read like this - its e_ident field is word-size and endian agnostic.
196        #
197        self.stream.seek(0)
198        magic = self.stream.read(4)
199        elf_assert(magic == b'\x7fELF', 'Magic number does not match')
200
201        ei_class = self.stream.read(1)
202        if ei_class == b'\x01':
203            self.elfclass = 32
204        elif ei_class == b'\x02':
205            self.elfclass = 64
206        else:
207            raise ELFError('Invalid EI_CLASS %s' % repr(ei_class))
208
209        ei_data = self.stream.read(1)
210        if ei_data == b'\x01':
211            self.little_endian = True
212        elif ei_data == b'\x02':
213            self.little_endian = False
214        else:
215            raise ELFError('Invalid EI_DATA %s' % repr(ei_data))
216
217    def _section_offset(self, n):
218        """ Compute the offset of section #n in the file
219        """
220        return self['e_shoff'] + n * self['e_shentsize']
221
222    def _segment_offset(self, n):
223        """ Compute the offset of segment #n in the file
224        """
225        return self['e_phoff'] + n * self['e_phentsize']
226
227    def _make_segment(self, segment_header):
228        """ Create a Segment object of the appropriate type
229        """
230        segtype = segment_header['p_type']
231        if segtype == 'PT_INTERP':
232            return InterpSegment(segment_header, self.stream)
233        elif segtype == 'PT_DYNAMIC':
234            return DynamicSegment(segment_header, self.stream, self)
235        else:
236            return Segment(segment_header, self.stream)
237
238    def _get_section_header(self, n):
239        """ Find the header of section #n, parse it and return the struct
240        """
241        return struct_parse(
242            self.structs.Elf_Shdr,
243            self.stream,
244            stream_pos=self._section_offset(n))
245
246    def _get_section_name(self, section_header):
247        """ Given a section header, find this section's name in the file's
248            string table
249        """
250        name_offset = section_header['sh_name']
251        return self._file_stringtable_section.get_string(name_offset)
252
253    def _make_section(self, section_header):
254        """ Create a section object of the appropriate type
255        """
256        name = self._get_section_name(section_header)
257        sectype = section_header['sh_type']
258
259        if sectype == 'SHT_STRTAB':
260            return StringTableSection(section_header, name, self.stream)
261        elif sectype == 'SHT_NULL':
262            return NullSection(section_header, name, self.stream)
263        elif sectype in ('SHT_SYMTAB', 'SHT_DYNSYM', 'SHT_SUNW_LDYNSYM'):
264            return self._make_symbol_table_section(section_header, name)
265        elif sectype == 'SHT_SUNW_syminfo':
266            return self._make_sunwsyminfo_table_section(section_header, name)
267        elif sectype == 'SHT_GNU_verneed':
268            return self._make_gnu_verneed_section(section_header, name)
269        elif sectype == 'SHT_GNU_verdef':
270            return self._make_gnu_verdef_section(section_header, name)
271        elif sectype == 'SHT_GNU_versym':
272            return self._make_gnu_versym_section(section_header, name)
273        elif sectype in ('SHT_REL', 'SHT_RELA'):
274            return RelocationSection(
275                section_header, name, self.stream, self)
276        elif sectype == 'SHT_DYNAMIC':
277            return DynamicSection(section_header, name, self.stream, self)
278        else:
279            return Section(section_header, name, self.stream)
280
281    def _make_symbol_table_section(self, section_header, name):
282        """ Create a SymbolTableSection
283        """
284        linked_strtab_index = section_header['sh_link']
285        strtab_section = self.get_section(linked_strtab_index)
286        return SymbolTableSection(
287            section_header, name, self.stream,
288            elffile=self,
289            stringtable=strtab_section)
290
291    def _make_sunwsyminfo_table_section(self, section_header, name):
292        """ Create a SUNWSyminfoTableSection
293        """
294        linked_strtab_index = section_header['sh_link']
295        strtab_section = self.get_section(linked_strtab_index)
296        return SUNWSyminfoTableSection(
297            section_header, name, self.stream,
298            elffile=self,
299            symboltable=strtab_section)
300
301    def _make_gnu_verneed_section(self, section_header, name):
302        """ Create a GNUVerNeedSection
303        """
304        linked_strtab_index = section_header['sh_link']
305        strtab_section = self.get_section(linked_strtab_index)
306        return GNUVerNeedSection(
307            section_header, name, self.stream,
308            elffile=self,
309            stringtable=strtab_section)
310
311    def _make_gnu_verdef_section(self, section_header, name):
312        """ Create a GNUVerDefSection
313        """
314        linked_strtab_index = section_header['sh_link']
315        strtab_section = self.get_section(linked_strtab_index)
316        return GNUVerDefSection(
317            section_header, name, self.stream,
318            elffile=self,
319            stringtable=strtab_section)
320
321    def _make_gnu_versym_section(self, section_header, name):
322        """ Create a GNUVerSymSection
323        """
324        linked_strtab_index = section_header['sh_link']
325        strtab_section = self.get_section(linked_strtab_index)
326        return GNUVerSymSection(
327            section_header, name, self.stream,
328            elffile=self,
329            symboltable=strtab_section)
330
331    def _get_segment_header(self, n):
332        """ Find the header of segment #n, parse it and return the struct
333        """
334        return struct_parse(
335            self.structs.Elf_Phdr,
336            self.stream,
337            stream_pos=self._segment_offset(n))
338
339    def _get_file_stringtable(self):
340        """ Find the file's string table section
341        """
342        stringtable_section_num = self['e_shstrndx']
343        return StringTableSection(
344                header=self._get_section_header(stringtable_section_num),
345                name='',
346                stream=self.stream)
347
348    def _parse_elf_header(self):
349        """ Parses the ELF file header and assigns the result to attributes
350            of this object.
351        """
352        return struct_parse(self.structs.Elf_Ehdr, self.stream, stream_pos=0)
353
354    def _read_dwarf_section(self, section, relocate_dwarf_sections):
355        """ Read the contents of a DWARF section from the stream and return a
356            DebugSectionDescriptor. Apply relocations if asked to.
357        """
358        self.stream.seek(section['sh_offset'])
359        # The section data is read into a new stream, for processing
360        section_stream = BytesIO()
361        section_stream.write(self.stream.read(section['sh_size']))
362
363        if relocate_dwarf_sections:
364            reloc_handler = RelocationHandler(self)
365            reloc_section = reloc_handler.find_relocations_for_section(section)
366            if reloc_section is not None:
367                reloc_handler.apply_section_relocations(
368                        section_stream, reloc_section)
369
370        return DebugSectionDescriptor(
371                stream=section_stream,
372                name=section.name,
373                global_offset=section['sh_offset'],
374                size=section['sh_size'])
375
376
377