1#-------------------------------------------------------------------------------
2# elftools: dwarf/dwarfinfo.py
3#
4# DWARFInfo - Main class for accessing DWARF debug information
5#
6# Eli Bendersky (eliben@gmail.com)
7# This code is in the public domain
8#-------------------------------------------------------------------------------
9from collections import namedtuple
10
11from ..common.exceptions import DWARFError
12from ..common.utils import (struct_parse, dwarf_assert,
13                            parse_cstring_from_stream)
14from .structs import DWARFStructs
15from .compileunit import CompileUnit
16from .abbrevtable import AbbrevTable
17from .lineprogram import LineProgram
18from .callframe import CallFrameInfo
19from .locationlists import LocationLists
20from .ranges import RangeLists
21
22
23# Describes a debug section
24#
25# stream: a stream object containing the data of this section
26# name: section name in the container file
27# global_offset: the global offset of the section in its container file
28# size: the size of the section's data, in bytes
29#
30# 'name' and 'global_offset' are for descriptional purposes only and
31# aren't strictly required for the DWARF parsing to work.
32#
33DebugSectionDescriptor = namedtuple('DebugSectionDescriptor',
34    'stream name global_offset size')
35
36
37# Some configuration parameters for the DWARF reader. This exists to allow
38# DWARFInfo to be independent from any specific file format/container.
39#
40# little_endian:
41#   boolean flag specifying whether the data in the file is little endian
42#
43# machine_arch:
44#   Machine architecture as a string. For example 'x86' or 'x64'
45#
46# default_address_size:
47#   The default address size for the container file (sizeof pointer, in bytes)
48#
49DwarfConfig = namedtuple('DwarfConfig',
50    'little_endian machine_arch default_address_size')
51
52
53class DWARFInfo(object):
54    """ Acts also as a "context" to other major objects, bridging between
55        various parts of the debug infromation.
56    """
57    def __init__(self,
58            config,
59            debug_info_sec,
60            debug_abbrev_sec,
61            debug_frame_sec,
62            eh_frame_sec,
63            debug_str_sec,
64            debug_loc_sec,
65            debug_ranges_sec,
66            debug_line_sec):
67        """ config:
68                A DwarfConfig object
69
70            debug_*_sec:
71                DebugSectionDescriptor for a section. Pass None for sections
72                that don't exist. These arguments are best given with
73                keyword syntax.
74        """
75        self.config = config
76        self.debug_info_sec = debug_info_sec
77        self.debug_abbrev_sec = debug_abbrev_sec
78        self.debug_frame_sec = debug_frame_sec
79        self.eh_frame_sec = eh_frame_sec
80        self.debug_str_sec = debug_str_sec
81        self.debug_loc_sec = debug_loc_sec
82        self.debug_ranges_sec = debug_ranges_sec
83        self.debug_line_sec = debug_line_sec
84
85        # This is the DWARFStructs the context uses, so it doesn't depend on
86        # DWARF format and address_size (these are determined per CU) - set them
87        # to default values.
88        self.structs = DWARFStructs(
89            little_endian=self.config.little_endian,
90            dwarf_format=32,
91            address_size=self.config.default_address_size)
92
93        # Cache for abbrev tables: a dict keyed by offset
94        self._abbrevtable_cache = {}
95
96    def iter_CUs(self):
97        """ Yield all the compile units (CompileUnit objects) in the debug info
98        """
99        return self._parse_CUs_iter()
100
101    def get_abbrev_table(self, offset):
102        """ Get an AbbrevTable from the given offset in the debug_abbrev
103            section.
104
105            The only verification done on the offset is that it's within the
106            bounds of the section (if not, an exception is raised).
107            It is the caller's responsibility to make sure the offset actually
108            points to a valid abbreviation table.
109
110            AbbrevTable objects are cached internally (two calls for the same
111            offset will return the same object).
112        """
113        dwarf_assert(
114            offset < self.debug_abbrev_sec.size,
115            "Offset '0x%x' to abbrev table out of section bounds" % offset)
116        if offset not in self._abbrevtable_cache:
117            self._abbrevtable_cache[offset] = AbbrevTable(
118                structs=self.structs,
119                stream=self.debug_abbrev_sec.stream,
120                offset=offset)
121        return self._abbrevtable_cache[offset]
122
123    def get_string_from_table(self, offset):
124        """ Obtain a string from the string table section, given an offset
125            relative to the section.
126        """
127        return parse_cstring_from_stream(self.debug_str_sec.stream, offset)
128
129    def line_program_for_CU(self, CU):
130        """ Given a CU object, fetch the line program it points to from the
131            .debug_line section.
132            If the CU doesn't point to a line program, return None.
133        """
134        # The line program is pointed to by the DW_AT_stmt_list attribute of
135        # the top DIE of a CU.
136        top_DIE = CU.get_top_DIE()
137        if 'DW_AT_stmt_list' in top_DIE.attributes:
138            return self._parse_line_program_at_offset(
139                    top_DIE.attributes['DW_AT_stmt_list'].value, CU.structs)
140        else:
141            return None
142
143    def has_CFI(self):
144        """ Does this dwarf info have a dwarf_frame CFI section?
145        """
146        return self.debug_frame_sec is not None
147
148    def CFI_entries(self):
149        """ Get a list of dwarf_frame CFI entries from the .debug_frame section.
150        """
151        cfi = CallFrameInfo(
152            stream=self.debug_frame_sec.stream,
153            size=self.debug_frame_sec.size,
154            base_structs=self.structs)
155        return cfi.get_entries()
156
157    def has_EH_CFI(self):
158        """ Does this dwarf info have a eh_frame CFI section?
159        """
160        return self.eh_frame_sec is not None
161
162    def EH_CFI_entries(self):
163        """ Get a list of eh_frame CFI entries from the .eh_frame section.
164        """
165        cfi = CallFrameInfo(
166            stream=self.eh_frame_sec.stream,
167            size=self.eh_frame_sec.size,
168            base_structs=self.structs)
169        return cfi.get_entries()
170
171    def location_lists(self):
172        """ Get a LocationLists object representing the .debug_loc section of
173            the DWARF data, or None if this section doesn't exist.
174        """
175        if self.debug_loc_sec:
176            return LocationLists(self.debug_loc_sec.stream, self.structs)
177        else:
178            return None
179
180    def range_lists(self):
181        """ Get a RangeLists object representing the .debug_ranges section of
182            the DWARF data, or None if this section doesn't exist.
183        """
184        if self.debug_ranges_sec:
185            return RangeLists(self.debug_ranges_sec.stream, self.structs)
186        else:
187            return None
188
189    #------ PRIVATE ------#
190
191    def _parse_CUs_iter(self):
192        """ Parse CU entries from debug_info. Yield CUs in order of appearance.
193        """
194        offset = 0
195        while offset < self.debug_info_sec.size:
196            cu = self._parse_CU_at_offset(offset)
197            # Compute the offset of the next CU in the section. The unit_length
198            # field of the CU header contains its size not including the length
199            # field itself.
200            offset = (  offset +
201                        cu['unit_length'] +
202                        cu.structs.initial_length_field_size())
203            yield cu
204
205    def _parse_CU_at_offset(self, offset):
206        """ Parse and return a CU at the given offset in the debug_info stream.
207        """
208        # Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v3
209        # states that the first 32-bit word of the CU header determines
210        # whether the CU is represented with 32-bit or 64-bit DWARF format.
211        #
212        # So we peek at the first word in the CU header to determine its
213        # dwarf format. Based on it, we then create a new DWARFStructs
214        # instance suitable for this CU and use it to parse the rest.
215        #
216        initial_length = struct_parse(
217            self.structs.Dwarf_uint32(''), self.debug_info_sec.stream, offset)
218        dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32
219
220        # At this point we still haven't read the whole header, so we don't
221        # know the address_size. Therefore, we're going to create structs
222        # with a default address_size=4. If, after parsing the header, we
223        # find out address_size is actually 8, we just create a new structs
224        # object for this CU.
225        #
226        cu_structs = DWARFStructs(
227            little_endian=self.config.little_endian,
228            dwarf_format=dwarf_format,
229            address_size=4)
230
231        cu_header = struct_parse(
232            cu_structs.Dwarf_CU_header, self.debug_info_sec.stream, offset)
233        if cu_header['address_size'] == 8:
234            cu_structs = DWARFStructs(
235                little_endian=self.config.little_endian,
236                dwarf_format=dwarf_format,
237                address_size=8)
238
239        cu_die_offset = self.debug_info_sec.stream.tell()
240        dwarf_assert(
241            self._is_supported_version(cu_header['version']),
242            "Expected supported DWARF version. Got '%s'" % cu_header['version'])
243        return CompileUnit(
244                header=cu_header,
245                dwarfinfo=self,
246                structs=cu_structs,
247                cu_offset=offset,
248                cu_die_offset=cu_die_offset)
249
250    def _is_supported_version(self, version):
251        """ DWARF version supported by this parser
252        """
253        return 2 <= version <= 4
254
255    def _parse_line_program_at_offset(self, debug_line_offset, structs):
256        """ Given an offset to the .debug_line section, parse the line program
257            starting at this offset in the section and return it.
258            structs is the DWARFStructs object used to do this parsing.
259        """
260        lineprog_header = struct_parse(
261            structs.Dwarf_lineprog_header,
262            self.debug_line_sec.stream,
263            debug_line_offset)
264
265        # Calculate the offset to the next line program (see DWARF 6.2.4)
266        end_offset = (  debug_line_offset + lineprog_header['unit_length'] +
267                        structs.initial_length_field_size())
268
269        return LineProgram(
270            header=lineprog_header,
271            stream=self.debug_line_sec.stream,
272            structs=structs,
273            program_start_offset=self.debug_line_sec.stream.tell(),
274            program_end_offset=end_offset)
275
276