1# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80: 2#===----------------------------------------------------------------------===## 3# 4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5# See https://llvm.org/LICENSE.txt for license information. 6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7# 8#===----------------------------------------------------------------------===## 9""" 10extract - A set of function that extract symbol lists from shared libraries. 11""" 12import distutils.spawn 13import os.path 14import sys 15import re 16 17import libcxx.util 18from libcxx.sym_check import util 19 20extract_ignore_names = ['_init', '_fini'] 21 22class NMExtractor(object): 23 """ 24 NMExtractor - Extract symbol lists from libraries using nm. 25 """ 26 27 @staticmethod 28 def find_tool(): 29 """ 30 Search for the nm executable and return the path. 31 """ 32 return distutils.spawn.find_executable('nm') 33 34 def __init__(self, static_lib): 35 """ 36 Initialize the nm executable and flags that will be used to extract 37 symbols from shared libraries. 38 """ 39 self.nm_exe = self.find_tool() 40 if self.nm_exe is None: 41 # ERROR no NM found 42 print("ERROR: Could not find nm") 43 sys.exit(1) 44 self.static_lib = static_lib 45 self.flags = ['-P', '-g'] 46 47 48 def extract(self, lib): 49 """ 50 Extract symbols from a library and return the results as a dict of 51 parsed symbols. 52 """ 53 cmd = [self.nm_exe] + self.flags + [lib] 54 out, _, exit_code = libcxx.util.executeCommandVerbose(cmd) 55 if exit_code != 0: 56 raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib)) 57 fmt_syms = (self._extract_sym(l) 58 for l in out.splitlines() if l.strip()) 59 # Cast symbol to string. 60 final_syms = (repr(s) for s in fmt_syms if self._want_sym(s)) 61 # Make unique and sort strings. 62 tmp_list = list(sorted(set(final_syms))) 63 # Cast string back to symbol. 64 return util.read_syms_from_list(tmp_list) 65 66 def _extract_sym(self, sym_str): 67 bits = sym_str.split() 68 # Everything we want has at least two columns. 69 if len(bits) < 2: 70 return None 71 new_sym = { 72 'name': bits[0], 73 'type': bits[1], 74 'is_defined': (bits[1].lower() != 'u') 75 } 76 new_sym['name'] = new_sym['name'].replace('@@', '@') 77 new_sym = self._transform_sym_type(new_sym) 78 # NM types which we want to save the size for. 79 if new_sym['type'] == 'OBJECT' and len(bits) > 3: 80 new_sym['size'] = int(bits[3], 16) 81 return new_sym 82 83 @staticmethod 84 def _want_sym(sym): 85 """ 86 Check that s is a valid symbol that we want to keep. 87 """ 88 if sym is None or len(sym) < 2: 89 return False 90 if sym['name'] in extract_ignore_names: 91 return False 92 bad_types = ['t', 'b', 'r', 'd', 'w'] 93 return (sym['type'] not in bad_types 94 and sym['name'] not in ['__bss_start', '_end', '_edata']) 95 96 @staticmethod 97 def _transform_sym_type(sym): 98 """ 99 Map the nm single letter output for type to either FUNC or OBJECT. 100 If the type is not recognized it is left unchanged. 101 """ 102 func_types = ['T', 'W'] 103 obj_types = ['B', 'D', 'R', 'V', 'S'] 104 if sym['type'] in func_types: 105 sym['type'] = 'FUNC' 106 elif sym['type'] in obj_types: 107 sym['type'] = 'OBJECT' 108 return sym 109 110class ReadElfExtractor(object): 111 """ 112 ReadElfExtractor - Extract symbol lists from libraries using readelf. 113 """ 114 115 @staticmethod 116 def find_tool(): 117 """ 118 Search for the readelf executable and return the path. 119 """ 120 return distutils.spawn.find_executable('readelf') 121 122 def __init__(self, static_lib): 123 """ 124 Initialize the readelf executable and flags that will be used to 125 extract symbols from shared libraries. 126 """ 127 self.tool = self.find_tool() 128 if self.tool is None: 129 # ERROR no NM found 130 print("ERROR: Could not find readelf") 131 sys.exit(1) 132 # TODO: Support readelf for reading symbols from archives 133 assert not static_lib and "RealElf does not yet support static libs" 134 self.flags = ['--wide', '--symbols'] 135 136 def extract(self, lib): 137 """ 138 Extract symbols from a library and return the results as a dict of 139 parsed symbols. 140 """ 141 cmd = [self.tool] + self.flags + [lib] 142 out, _, exit_code = libcxx.util.executeCommandVerbose(cmd) 143 if exit_code != 0: 144 raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib)) 145 dyn_syms = self.get_dynsym_table(out) 146 return self.process_syms(dyn_syms) 147 148 def process_syms(self, sym_list): 149 new_syms = [] 150 for s in sym_list: 151 parts = s.split() 152 if not parts: 153 continue 154 assert len(parts) == 7 or len(parts) == 8 or len(parts) == 9 155 if len(parts) == 7: 156 continue 157 new_sym = { 158 'name': parts[7], 159 'size': int(parts[2]), 160 'type': parts[3], 161 'is_defined': (parts[6] != 'UND') 162 } 163 assert new_sym['type'] in ['OBJECT', 'FUNC', 'NOTYPE', 'TLS'] 164 if new_sym['name'] in extract_ignore_names: 165 continue 166 if new_sym['type'] == 'NOTYPE': 167 continue 168 if new_sym['type'] == 'FUNC': 169 del new_sym['size'] 170 new_syms += [new_sym] 171 return new_syms 172 173 def get_dynsym_table(self, out): 174 lines = out.splitlines() 175 start = -1 176 end = -1 177 for i in range(len(lines)): 178 if lines[i].startswith("Symbol table '.dynsym'"): 179 start = i + 2 180 if start != -1 and end == -1 and not lines[i].strip(): 181 end = i + 1 182 assert start != -1 183 if end == -1: 184 end = len(lines) 185 return lines[start:end] 186 187 188def extract_symbols(lib_file, static_lib=None): 189 """ 190 Extract and return a list of symbols extracted from a static or dynamic 191 library. The symbols are extracted using NM or readelf. They are then 192 filtered and formated. Finally they symbols are made unique. 193 """ 194 if static_lib is None: 195 _, ext = os.path.splitext(lib_file) 196 static_lib = True if ext in ['.a'] else False 197 if ReadElfExtractor.find_tool() and not static_lib: 198 extractor = ReadElfExtractor(static_lib=static_lib) 199 else: 200 extractor = NMExtractor(static_lib=static_lib) 201 return extractor.extract(lib_file) 202