1#------------------------------------------------------------------------------- 2# elftools: dwarf/namelut.py 3# 4# DWARF pubtypes/pubnames section decoding (.debug_pubtypes, .debug_pubnames) 5# 6# Vijay Ramasami (rvijayc@gmail.com) 7# This code is in the public domain 8#------------------------------------------------------------------------------- 9import os 10import collections 11from collections import OrderedDict 12from ..common.utils import struct_parse 13from ..common.py3compat import Mapping 14from bisect import bisect_right 15import math 16from ..construct import CString, Struct, If 17 18NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs') 19 20class NameLUT(Mapping): 21 """ 22 A "Name LUT" holds any of the tables specified by .debug_pubtypes or 23 .debug_pubnames sections. This is basically a dictionary where the key is 24 the symbol name (either a public variable, function or a type), and the 25 value is the tuple (cu_offset, die_offset) corresponding to the variable. 26 The die_offset is an absolute offset (meaning, it can be used to search the 27 CU by iterating until a match is obtained). 28 29 An ordered dictionary is used to preserve the CU order (i.e, items are 30 stored on a per-CU basis (as it was originally in the .debug_* section). 31 32 Usage: 33 34 The NameLUT walks and talks like a dictionary and hence it can be used as 35 such. Some examples below: 36 37 # get the pubnames (a NameLUT from DWARF info). 38 pubnames = dwarf_info.get_pubnames() 39 40 # lookup a variable. 41 entry1 = pubnames["var_name1"] 42 entry2 = pubnames.get("var_name2", default=<default_var>) 43 print(entry2.cu_ofs) 44 ... 45 46 # iterate over items. 47 for (name, entry) in pubnames.items(): 48 # do stuff with name, entry.cu_ofs, entry.die_ofs 49 50 # iterate over items on a per-CU basis. 51 import itertools 52 for cu_ofs, item_list in itertools.groupby(pubnames.items(), 53 key = lambda x: x[1].cu_ofs): 54 # items are now grouped by cu_ofs. 55 # item_list is an iterator yeilding NameLUTEntry'ies belonging 56 # to cu_ofs. 57 # We can parse the CU at cu_offset and use the parsed CU results 58 # to parse the pubname DIEs in the CU listed by item_list. 59 for item in item_list: 60 # work with item which is part of the CU with cu_ofs. 61 62 """ 63 64 def __init__(self, stream, size, structs): 65 66 self._stream = stream 67 self._size = size 68 self._structs = structs 69 # entries are lazily loaded on demand. 70 self._entries = None 71 # CU headers (for readelf). 72 self._cu_headers = None 73 74 def get_entries(self): 75 """ 76 Returns the parsed NameLUT entries. The returned object is a dictionary 77 with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as 78 the value. 79 80 This is useful when dealing with very large ELF files with millions of 81 entries. The returned entries can be pickled to a file and restored by 82 calling set_entries on subsequent loads. 83 """ 84 if self._entries is None: 85 self._entries, self._cu_headers = self._get_entries() 86 return self._entries 87 88 def set_entries(self, entries, cu_headers): 89 """ 90 Set the NameLUT entries from an external source. The input is a 91 dictionary with the symbol name as the key and NameLUTEntry(cu_ofs, 92 die_ofs) as the value. 93 94 This option is useful when dealing with very large ELF files with 95 millions of entries. The entries can be parsed once and pickled to a 96 file and can be restored via this function on subsequent loads. 97 """ 98 self._entries = entries 99 self._cu_headers = cu_headers 100 101 def __len__(self): 102 """ 103 Returns the number of entries in the NameLUT. 104 """ 105 if self._entries is None: 106 self._entries, self._cu_headers = self._get_entries() 107 return len(self._entries) 108 109 def __getitem__(self, name): 110 """ 111 Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds 112 to the given symbol name. 113 """ 114 if self._entries is None: 115 self._entries, self._cu_headers = self._get_entries() 116 return self._entries.get(name) 117 118 def __iter__(self): 119 """ 120 Returns an iterator to the NameLUT dictionary. 121 """ 122 if self._entries is None: 123 self._entries, self._cu_headers = self._get_entries() 124 return iter(self._entries) 125 126 def items(self): 127 """ 128 Returns the NameLUT dictionary items. 129 """ 130 if self._entries is None: 131 self._entries, self._cu_headers = self._get_entries() 132 return self._entries.items() 133 134 def get(self, name, default=None): 135 """ 136 Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or 137 None if the symbol does not exist in the corresponding section. 138 """ 139 if self._entries is None: 140 self._entries, self._cu_headers = self._get_entries() 141 return self._entries.get(name, default) 142 143 def get_cu_headers(self): 144 """ 145 Returns all CU headers. Mainly required for readelf. 146 """ 147 if self._cu_headers is None: 148 self._entries, self._cu_headers = self._get_entries() 149 150 return self._cu_headers 151 152 def _get_entries(self): 153 """ 154 Parse the (name, cu_ofs, die_ofs) information from this section and 155 store as a dictionary. 156 """ 157 158 self._stream.seek(0) 159 entries = OrderedDict() 160 cu_headers = [] 161 offset = 0 162 # According to 6.1.1. of DWARFv4, each set of names is terminated by 163 # an offset field containing zero (and no following string). Because 164 # of sequential parsing, every next entry may be that terminator. 165 # So, field "name" is conditional. 166 entry_struct = Struct("Dwarf_offset_name_pair", 167 self._structs.Dwarf_offset('die_ofs'), 168 If(lambda ctx: ctx['die_ofs'], CString('name'))) 169 170 # each run of this loop will fetch one CU worth of entries. 171 while offset < self._size: 172 173 # read the header for this CU. 174 namelut_hdr = struct_parse(self._structs.Dwarf_nameLUT_header, 175 self._stream, offset) 176 cu_headers.append(namelut_hdr) 177 # compute the next offset. 178 offset = (offset + namelut_hdr.unit_length + 179 self._structs.initial_length_field_size()) 180 181 # before inner loop, latch data that will be used in the inner 182 # loop to avoid attribute access and other computation. 183 hdr_cu_ofs = namelut_hdr.debug_info_offset 184 185 # while die_ofs of the entry is non-zero (which indicates the end) ... 186 while True: 187 entry = struct_parse(entry_struct, self._stream) 188 189 # if it is zero, this is the terminating record. 190 if entry.die_ofs == 0: 191 break 192 # add this entry to the look-up dictionary. 193 entries[entry.name.decode('utf-8')] = NameLUTEntry( 194 cu_ofs = hdr_cu_ofs, 195 die_ofs = hdr_cu_ofs + entry.die_ofs) 196 197 # return the entries parsed so far. 198 return (entries, cu_headers) 199