1#-------------------------------------------------------------------------------
2# elftools: dwarf/namelut.py
3#
4# DWARF pubtypes/pubnames section decoding (.debug_pubtypes, .debug_pubnames)
5#
6# Vijay Ramasami (rvijayc@gmail.com)
7# This code is in the public domain
8#-------------------------------------------------------------------------------
9import os
10import collections
11from collections import OrderedDict
12from ..common.utils import struct_parse
13from ..common.py3compat import Mapping
14from bisect import bisect_right
15import math
16from ..construct import CString, Struct, If
17
18NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs')
19
20class NameLUT(Mapping):
21    """
22    A "Name LUT" holds any of the tables specified by .debug_pubtypes or
23    .debug_pubnames sections. This is basically a dictionary where the key is
24    the symbol name (either a public variable, function or a type), and the
25    value is the tuple (cu_offset, die_offset) corresponding to the variable.
26    The die_offset is an absolute offset (meaning, it can be used to search the
27    CU by iterating until a match is obtained).
28
29    An ordered dictionary is used to preserve the CU order (i.e, items are
30    stored on a per-CU basis (as it was originally in the .debug_* section).
31
32    Usage:
33
34    The NameLUT walks and talks like a dictionary and hence it can be used as
35    such. Some examples below:
36
37    # get the pubnames (a NameLUT from DWARF info).
38    pubnames = dwarf_info.get_pubnames()
39
40    # lookup a variable.
41    entry1 = pubnames["var_name1"]
42    entry2 = pubnames.get("var_name2", default=<default_var>)
43    print(entry2.cu_ofs)
44    ...
45
46    # iterate over items.
47    for (name, entry) in pubnames.items():
48      # do stuff with name, entry.cu_ofs, entry.die_ofs
49
50    # iterate over items on a per-CU basis.
51    import itertools
52    for cu_ofs, item_list in itertools.groupby(pubnames.items(),
53        key = lambda x: x[1].cu_ofs):
54      # items are now grouped by cu_ofs.
55      # item_list is an iterator yeilding NameLUTEntry'ies belonging
56      # to cu_ofs.
57      # We can parse the CU at cu_offset and use the parsed CU results
58      # to parse the pubname DIEs in the CU listed by item_list.
59      for item in item_list:
60        # work with item which is part of the CU with cu_ofs.
61
62    """
63
64    def __init__(self, stream, size, structs):
65
66        self._stream = stream
67        self._size = size
68        self._structs = structs
69        # entries are lazily loaded on demand.
70        self._entries = None
71        # CU headers (for readelf).
72        self._cu_headers = None
73
74    def get_entries(self):
75        """
76        Returns the parsed NameLUT entries. The returned object is a dictionary
77        with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as
78        the value.
79
80        This is useful when dealing with very large ELF files with millions of
81        entries. The returned entries can be pickled to a file and restored by
82        calling set_entries on subsequent loads.
83        """
84        if self._entries is None:
85            self._entries, self._cu_headers = self._get_entries()
86        return self._entries
87
88    def set_entries(self, entries, cu_headers):
89        """
90        Set the NameLUT entries from an external source. The input is a
91        dictionary with the symbol name as the key and NameLUTEntry(cu_ofs,
92        die_ofs) as the value.
93
94        This option is useful when dealing with very large ELF files with
95        millions of entries. The entries can be parsed once and pickled to a
96        file and can be restored via this function on subsequent loads.
97        """
98        self._entries = entries
99        self._cu_headers = cu_headers
100
101    def __len__(self):
102        """
103        Returns the number of entries in the NameLUT.
104        """
105        if self._entries is None:
106            self._entries, self._cu_headers = self._get_entries()
107        return len(self._entries)
108
109    def __getitem__(self, name):
110        """
111        Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds
112        to the given symbol name.
113        """
114        if self._entries is None:
115            self._entries, self._cu_headers = self._get_entries()
116        return self._entries.get(name)
117
118    def __iter__(self):
119        """
120        Returns an iterator to the NameLUT dictionary.
121        """
122        if self._entries is None:
123            self._entries, self._cu_headers = self._get_entries()
124        return iter(self._entries)
125
126    def items(self):
127        """
128        Returns the NameLUT dictionary items.
129        """
130        if self._entries is None:
131            self._entries, self._cu_headers = self._get_entries()
132        return self._entries.items()
133
134    def get(self, name, default=None):
135        """
136        Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or
137        None if the symbol does not exist in the corresponding section.
138        """
139        if self._entries is None:
140            self._entries, self._cu_headers = self._get_entries()
141        return self._entries.get(name, default)
142
143    def get_cu_headers(self):
144        """
145        Returns all CU headers. Mainly required for readelf.
146        """
147        if self._cu_headers is None:
148            self._entries, self._cu_headers = self._get_entries()
149
150        return self._cu_headers
151
152    def _get_entries(self):
153        """
154        Parse the (name, cu_ofs, die_ofs) information from this section and
155        store as a dictionary.
156        """
157
158        self._stream.seek(0)
159        entries = OrderedDict()
160        cu_headers = []
161        offset = 0
162        # According to 6.1.1. of DWARFv4, each set of names is terminated by
163        # an offset field containing zero (and no following string). Because
164        # of sequential parsing, every next entry may be that terminator.
165        # So, field "name" is conditional.
166        entry_struct = Struct("Dwarf_offset_name_pair",
167                self._structs.Dwarf_offset('die_ofs'),
168                If(lambda ctx: ctx['die_ofs'], CString('name')))
169
170        # each run of this loop will fetch one CU worth of entries.
171        while offset < self._size:
172
173            # read the header for this CU.
174            namelut_hdr = struct_parse(self._structs.Dwarf_nameLUT_header,
175                    self._stream, offset)
176            cu_headers.append(namelut_hdr)
177            # compute the next offset.
178            offset = (offset + namelut_hdr.unit_length +
179                     self._structs.initial_length_field_size())
180
181            # before inner loop, latch data that will be used in the inner
182            # loop to avoid attribute access and other computation.
183            hdr_cu_ofs = namelut_hdr.debug_info_offset
184
185            # while die_ofs of the entry is non-zero (which indicates the end) ...
186            while True:
187                entry = struct_parse(entry_struct, self._stream)
188
189                # if it is zero, this is the terminating record.
190                if entry.die_ofs == 0:
191                    break
192                # add this entry to the look-up dictionary.
193                entries[entry.name.decode('utf-8')] = NameLUTEntry(
194                        cu_ofs = hdr_cu_ofs,
195                        die_ofs = hdr_cu_ofs + entry.die_ofs)
196
197        # return the entries parsed so far.
198        return (entries, cu_headers)
199