1# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
2"""
3extract - A set of function that extract symbol lists from shared libraries.
4"""
5import distutils.spawn
6import sys
7
8from sym_check import util
9
10
11class NMExtractor(object):
12    """
13    NMExtractor - Extract symbol lists from libraries using nm.
14    """
15
16    @staticmethod
17    def find_tool():
18        """
19        Search for the nm executable and return the path.
20        """
21        return distutils.spawn.find_executable('nm')
22
23    def __init__(self):
24        """
25        Initialize the nm executable and flags that will be used to extract
26        symbols from shared libraries.
27        """
28        self.nm_exe = self.find_tool()
29        if self.nm_exe is None:
30            # ERROR no NM found
31            print("ERROR: Could not find nm")
32            sys.exit(1)
33        self.flags = ['-P', '-g']
34
35    def extract(self, lib):
36        """
37        Extract symbols from a library and return the results as a dict of
38        parsed symbols.
39        """
40        cmd = [self.nm_exe] + self.flags + [lib]
41        out, _, exit_code = util.execute_command_verbose(cmd)
42        if exit_code != 0:
43            raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
44        fmt_syms = (self._extract_sym(l)
45                    for l in out.splitlines() if l.strip())
46            # Cast symbol to string.
47        final_syms = (repr(s) for s in fmt_syms if self._want_sym(s))
48        # Make unique and sort strings.
49        tmp_list = list(sorted(set(final_syms)))
50        # Cast string back to symbol.
51        return util.read_syms_from_list(tmp_list)
52
53    def _extract_sym(self, sym_str):
54        bits = sym_str.split()
55        # Everything we want has at least two columns.
56        if len(bits) < 2:
57            return None
58        new_sym = {
59            'name': bits[0],
60            'type': bits[1]
61        }
62        new_sym['name'] = new_sym['name'].replace('@@', '@')
63        new_sym = self._transform_sym_type(new_sym)
64        # NM types which we want to save the size for.
65        if new_sym['type'] == 'OBJECT' and len(bits) > 3:
66            new_sym['size'] = int(bits[3], 16)
67        return new_sym
68
69    @staticmethod
70    def _want_sym(sym):
71        """
72        Check that s is a valid symbol that we want to keep.
73        """
74        if sym is None or len(sym) < 2:
75            return False
76        bad_types = ['t', 'b', 'r', 'd', 'w']
77        return (sym['type'] not in bad_types
78                and sym['name'] not in ['__bss_start', '_end', '_edata'])
79
80    @staticmethod
81    def _transform_sym_type(sym):
82        """
83        Map the nm single letter output for type to either FUNC or OBJECT.
84        If the type is not recognized it is left unchanged.
85        """
86        func_types = ['T', 'W']
87        obj_types = ['B', 'D', 'R', 'V', 'S']
88        if sym['type'] in func_types:
89            sym['type'] = 'FUNC'
90        elif sym['type'] in obj_types:
91            sym['type'] = 'OBJECT'
92        return sym
93
94class ReadElfExtractor(object):
95    """
96    ReadElfExtractor - Extract symbol lists from libraries using readelf.
97    """
98
99    @staticmethod
100    def find_tool():
101        """
102        Search for the readelf executable and return the path.
103        """
104        return distutils.spawn.find_executable('readelf')
105
106    def __init__(self):
107        """
108        Initialize the readelf executable and flags that will be used to
109        extract symbols from shared libraries.
110        """
111        self.tool = self.find_tool()
112        if self.tool is None:
113            # ERROR no NM found
114            print("ERROR: Could not find readelf")
115            sys.exit(1)
116        self.flags = ['--wide', '--symbols']
117
118    def extract(self, lib):
119        """
120        Extract symbols from a library and return the results as a dict of
121        parsed symbols.
122        """
123        cmd = [self.tool] + self.flags + [lib]
124        out, _, exit_code = util.execute_command_verbose(cmd)
125        if exit_code != 0:
126            raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
127        dyn_syms = self.get_dynsym_table(out)
128        return self.process_syms(dyn_syms)
129
130    def process_syms(self, sym_list):
131        new_syms = []
132        for s in sym_list:
133            parts = s.split()
134            if not parts:
135                continue
136            assert len(parts) == 7 or len(parts) == 8 or len(parts) == 9
137            if len(parts) == 7:
138                continue
139            new_sym = {
140                'name': parts[7],
141                'size': int(parts[2]),
142                'type': parts[3],
143            }
144            assert new_sym['type'] in ['OBJECT', 'FUNC', 'NOTYPE']
145            if new_sym['type'] == 'NOTYPE':
146                continue
147            if new_sym['type'] == 'FUNC':
148                del new_sym['size']
149            new_syms += [new_sym]
150        return new_syms
151
152    def get_dynsym_table(self, out):
153        lines = out.splitlines()
154        start = -1
155        end = -1
156        for i in range(len(lines)):
157            if lines[i].startswith("Symbol table '.dynsym'"):
158                start = i + 2
159            if start != -1 and end == -1 and not lines[i].strip():
160                end = i + 1
161        assert start != -1
162        if end == -1:
163            end = len(lines)
164        return lines[start:end]
165
166
167def extract_symbols(lib_file):
168    """
169    Extract and return a list of symbols extracted from a dynamic library.
170    The symbols are extracted using NM. They are then filtered and formated.
171    Finally they symbols are made unique.
172    """
173    if ReadElfExtractor.find_tool():
174        extractor = ReadElfExtractor()
175    else:
176        extractor = NMExtractor()
177    return extractor.extract(lib_file)
178