1#!/usr/bin/env python
2#
3# Licensed to the Apache Software Foundation (ASF) under one
4# or more contributor license agreements. See the NOTICE file
5# distributed with this work for additional information
6# regarding copyright ownership. The ASF licenses this file
7# to you under the Apache License, Version 2.0 (the
8# "License"); you may not use this file except in compliance
9# with the License. You may obtain a copy of the License at
10#
11#   http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing,
14# software distributed under the License is distributed on an
15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16# KIND, either express or implied. See the License for the
17# specific language governing permissions and limitations
18# under the License.
19#
20"""
21This script can be used to make the output from
22apache::thrift::profile_print_info() more human-readable.
23
24It translates each executable file name and address into the corresponding
25source file name, line number, and function name.  By default, it also
26demangles C++ symbol names.
27"""
28
29import optparse
30import os
31import re
32import subprocess
33import sys
34
35
36class AddressInfo(object):
37    """
38    A class to store information about a particular address in an object file.
39    """
40    def __init__(self, obj_file, address):
41        self.objectFile = obj_file
42        self.address = address
43        self.sourceFile = None
44        self.sourceLine = None
45        self.function = None
46
47
48g_addrs_by_filename = {}
49
50
51def get_address(filename, address):
52    """
53    Retrieve an AddressInfo object for the specified object file and address.
54
55    Keeps a global list of AddressInfo objects.  Two calls to get_address()
56    with the same filename and address will always return the same AddressInfo
57    object.
58    """
59    global g_addrs_by_filename
60    try:
61        by_address = g_addrs_by_filename[filename]
62    except KeyError:
63        by_address = {}
64        g_addrs_by_filename[filename] = by_address
65
66    try:
67        addr_info = by_address[address]
68    except KeyError:
69        addr_info = AddressInfo(filename, address)
70        by_address[address] = addr_info
71    return addr_info
72
73
74def translate_file_addresses(filename, addresses, options):
75    """
76    Use addr2line to look up information for the specified addresses.
77    All of the addresses must belong to the same object file.
78    """
79    # Do nothing if we can't find the file
80    if not os.path.isfile(filename):
81        return
82
83    args = ['addr2line']
84    if options.printFunctions:
85        args.append('-f')
86    args.extend(['-e', filename])
87
88    proc = subprocess.Popen(args, stdin=subprocess.PIPE,
89                            stdout=subprocess.PIPE)
90    for address in addresses:
91        assert address.objectFile == filename
92        proc.stdin.write(address.address + '\n')
93
94        if options.printFunctions:
95            function = proc.stdout.readline()
96            function = function.strip()
97            if not function:
98                raise Exception('unexpected EOF from addr2line')
99            address.function = function
100
101        file_and_line = proc.stdout.readline()
102        file_and_line = file_and_line.strip()
103        if not file_and_line:
104            raise Exception('unexpected EOF from addr2line')
105        idx = file_and_line.rfind(':')
106        if idx < 0:
107            msg = 'expected file and line number from addr2line; got %r' % \
108                (file_and_line,)
109            msg += '\nfile=%r, address=%r' % (filename, address.address)
110            raise Exception(msg)
111
112        address.sourceFile = file_and_line[:idx]
113        address.sourceLine = file_and_line[idx + 1:]
114
115    (remaining_out, cmd_err) = proc.communicate()
116    retcode = proc.wait()
117    if retcode != 0:
118        raise subprocess.CalledProcessError(retcode, args)
119
120
121def lookup_addresses(options):
122    """
123    Look up source file information for all of the addresses currently stored
124    in the global list of AddressInfo objects.
125    """
126    global g_addrs_by_filename
127    for (file, addresses) in g_addrs_by_filename.items():
128        translate_file_addresses(file, addresses.values(), options)
129
130
131class Entry(object):
132    """
133    An entry in the thrift profile output.
134    Contains a header line, and a backtrace.
135    """
136    def __init__(self, header):
137        self.header = header
138        self.bt = []
139
140    def addFrame(self, filename, address):
141        # If libc was able to determine the symbols names, the filename
142        # argument will be of the form <filename>(<function>+<offset>)
143        # So, strip off anything after the last '('
144        idx = filename.rfind('(')
145        if idx >= 0:
146            filename = filename[:idx]
147
148        addr = get_address(filename, address)
149        self.bt.append(addr)
150
151    def write(self, f, options):
152        f.write(self.header)
153        f.write('\n')
154        n = 0
155        for address in self.bt:
156            f.write('  #%-2d %s:%s\n' % (n, address.sourceFile,
157                                         address.sourceLine))
158            n += 1
159            if options.printFunctions:
160                if address.function:
161                    f.write('      %s\n' % (address.function,))
162                else:
163                    f.write('      ??\n')
164
165
166def process_file(in_file, out_file, options):
167    """
168    Read thrift profile output from the specified input file, and print
169    prettier information on the output file.
170    """
171    #
172    # A naive approach would be to read the input line by line,
173    # and each time we come to a filename and address, pass it to addr2line
174    # and print the resulting information.  Unfortunately, addr2line can be
175    # quite slow, especially with large executables.
176    #
177    # This approach is much faster.  We read in all of the input, storing
178    # the addresses in each file that need to be resolved.  We then call
179    # addr2line just once for each file.  This is much faster than calling
180    # addr2line once per address.
181    #
182
183    virt_call_regex = re.compile(r'^\s*T_VIRTUAL_CALL: (\d+) calls on (.*):$')
184    gen_prot_regex = re.compile(
185        r'^\s*T_GENERIC_PROTOCOL: (\d+) calls to (.*) with a (.*):$')
186    bt_regex = re.compile(r'^\s*#(\d+)\s*(.*) \[(0x[0-9A-Za-z]+)\]$')
187
188    # Parse all of the input, and store it as Entry objects
189    entries = []
190    current_entry = None
191    while True:
192        line = in_file.readline()
193        if not line:
194            break
195
196        if line == '\n' or line.startswith('Thrift virtual call info:'):
197            continue
198
199        virt_call_match = virt_call_regex.match(line)
200        if virt_call_match:
201            num_calls = int(virt_call_match.group(1))
202            type_name = virt_call_match.group(2)
203            if options.cxxfilt:
204                # Type names reported by typeid() are internal names.
205                # By default, c++filt doesn't demangle internal type names.
206                # (Some versions of c++filt have a "-t" option to enable this.
207                # Other versions don't have this argument, but demangle type
208                # names passed as an argument, but not on stdin.)
209                #
210                # If the output is being filtered through c++filt, prepend
211                # "_Z" to the type name to make it look like an external name.
212                type_name = '_Z' + type_name
213            header = 'T_VIRTUAL_CALL: %d calls on "%s"' % \
214                (num_calls, type_name)
215            if current_entry is not None:
216                entries.append(current_entry)
217            current_entry = Entry(header)
218            continue
219
220        gen_prot_match = gen_prot_regex.match(line)
221        if gen_prot_match:
222            num_calls = int(gen_prot_match.group(1))
223            type_name1 = gen_prot_match.group(2)
224            type_name2 = gen_prot_match.group(3)
225            if options.cxxfilt:
226                type_name1 = '_Z' + type_name1
227                type_name2 = '_Z' + type_name2
228            header = 'T_GENERIC_PROTOCOL: %d calls to "%s" with a "%s"' % \
229                (num_calls, type_name1, type_name2)
230            if current_entry is not None:
231                entries.append(current_entry)
232            current_entry = Entry(header)
233            continue
234
235        bt_match = bt_regex.match(line)
236        if bt_match:
237            if current_entry is None:
238                raise Exception('found backtrace frame before entry header')
239            frame_num = int(bt_match.group(1))
240            filename = bt_match.group(2)
241            address = bt_match.group(3)
242            current_entry.addFrame(filename, address)
243            continue
244
245        raise Exception('unexpected line in input: %r' % (line,))
246
247    # Add the last entry we were processing to the list
248    if current_entry is not None:
249        entries.append(current_entry)
250        current_entry = None
251
252    # Look up all of the addresses
253    lookup_addresses(options)
254
255    # Print out the entries, now that the information has been translated
256    for entry in entries:
257        entry.write(out_file, options)
258        out_file.write('\n')
259
260
261def start_cppfilt():
262    (read_pipe, write_pipe) = os.pipe()
263
264    # Fork.  Run c++filt in the parent process,
265    # and then continue normal processing in the child.
266    pid = os.fork()
267    if pid == 0:
268        # child
269        os.dup2(write_pipe, sys.stdout.fileno())
270        os.close(read_pipe)
271        os.close(write_pipe)
272        return
273    else:
274        # parent
275        os.dup2(read_pipe, sys.stdin.fileno())
276        os.close(read_pipe)
277        os.close(write_pipe)
278
279        cmd = ['c++filt']
280        os.execvp(cmd[0], cmd)
281
282
283def main(argv):
284    parser = optparse.OptionParser(usage='%prog [options] [<file>]')
285    parser.add_option('--no-functions', help='Don\'t print function names',
286                      dest='printFunctions', action='store_false',
287                      default=True)
288    parser.add_option('--no-demangle',
289                      help='Don\'t demangle C++ symbol names',
290                      dest='cxxfilt', action='store_false',
291                      default=True)
292
293    (options, args) = parser.parse_args(argv[1:])
294    num_args = len(args)
295    if num_args == 0:
296        in_file = sys.stdin
297    elif num_args == 1:
298        in_file = open(argv[1], 'r')
299    else:
300        parser.print_usage(sys.stderr)
301        print >> sys.stderr, 'trailing arguments: %s' % (' '.join(args[1:],))
302        return 1
303
304    if options.cxxfilt:
305        start_cppfilt()
306
307    process_file(in_file, sys.stdout, options)
308
309
310if __name__ == '__main__':
311    rc = main(sys.argv)
312    sys.exit(rc)
313