1#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be
19    used by pprof.
20
21  Example:
22    python app_profiler.py
23    python pprof_proto_generator.py
24    pprof -text pprof.profile
25"""
26
27from __future__ import print_function
28import argparse
29import os
30import os.path
31
32from simpleperf_report_lib import ReportLib
33from utils import Addr2Nearestline, extant_dir, find_real_dso_path, find_tool_path, flatten_arg_list
34from utils import log_info, log_exit, ReadElf
35try:
36    import profile_pb2
37except ImportError:
38    log_exit('google.protobuf module is missing. Please install it first.')
39
40def load_pprof_profile(filename):
41    profile = profile_pb2.Profile()
42    with open(filename, "rb") as f:
43        profile.ParseFromString(f.read())
44    return profile
45
46
47def store_pprof_profile(filename, profile):
48    with open(filename, 'wb') as f:
49        f.write(profile.SerializeToString())
50
51
52class PprofProfilePrinter(object):
53
54    def __init__(self, profile):
55        self.profile = profile
56        self.string_table = profile.string_table
57
58    def show(self):
59        p = self.profile
60        sub_space = '  '
61        print('Profile {')
62        print('%d sample_types' % len(p.sample_type))
63        for i in range(len(p.sample_type)):
64            print('sample_type[%d] = ' % i, end='')
65            self.show_value_type(p.sample_type[i])
66        print('%d samples' % len(p.sample))
67        for i in range(len(p.sample)):
68            print('sample[%d]:' % i)
69            self.show_sample(p.sample[i], sub_space)
70        print('%d mappings' % len(p.mapping))
71        for i in range(len(p.mapping)):
72            print('mapping[%d]:' % i)
73            self.show_mapping(p.mapping[i], sub_space)
74        print('%d locations' % len(p.location))
75        for i in range(len(p.location)):
76            print('location[%d]:' % i)
77            self.show_location(p.location[i], sub_space)
78        for i in range(len(p.function)):
79            print('function[%d]:' % i)
80            self.show_function(p.function[i], sub_space)
81        print('%d strings' % len(p.string_table))
82        for i in range(len(p.string_table)):
83            print('string[%d]: %s' % (i, p.string_table[i]))
84        print('drop_frames: %s' % self.string(p.drop_frames))
85        print('keep_frames: %s' % self.string(p.keep_frames))
86        print('time_nanos: %u' % p.time_nanos)
87        print('duration_nanos: %u' % p.duration_nanos)
88        print('period_type: ', end='')
89        self.show_value_type(p.period_type)
90        print('period: %u' % p.period)
91        for i in range(len(p.comment)):
92            print('comment[%d] = %s' % (i, self.string(p.comment[i])))
93        print('default_sample_type: %d' % p.default_sample_type)
94        print('} // Profile')
95        print()
96
97    def show_value_type(self, value_type, space=''):
98        print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' %
99              (space, value_type.type, value_type.unit,
100               self.string(value_type.type), self.string(value_type.unit)))
101
102    def show_sample(self, sample, space=''):
103        sub_space = space + '  '
104        for i in range(len(sample.location_id)):
105            print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i]))
106            self.show_location_id(sample.location_id[i], sub_space)
107        for i in range(len(sample.value)):
108            print('%svalue[%d] = %d' % (space, i, sample.value[i]))
109        for i in range(len(sample.label)):
110            print('%slabel[%d] = ', (space, i))
111
112    def show_location_id(self, location_id, space=''):
113        location = self.profile.location[location_id - 1]
114        self.show_location(location, space)
115
116    def show_location(self, location, space=''):
117        sub_space = space + '  '
118        print('%sid: %d' % (space, location.id))
119        print('%smapping_id: %d' % (space, location.mapping_id))
120        self.show_mapping_id(location.mapping_id, sub_space)
121        print('%saddress: %x' % (space, location.address))
122        for i in range(len(location.line)):
123            print('%sline[%d]:' % (space, i))
124            self.show_line(location.line[i], sub_space)
125
126    def show_mapping_id(self, mapping_id, space=''):
127        mapping = self.profile.mapping[mapping_id - 1]
128        self.show_mapping(mapping, space)
129
130    def show_mapping(self, mapping, space=''):
131        print('%sid: %d' % (space, mapping.id))
132        print('%smemory_start: %x' % (space, mapping.memory_start))
133        print('%smemory_limit: %x' % (space, mapping.memory_limit))
134        print('%sfile_offset: %x' % (space, mapping.file_offset))
135        print('%sfilename: %s(%d)' % (space, self.string(mapping.filename),
136                                      mapping.filename))
137        print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id),
138                                      mapping.build_id))
139        print('%shas_functions: %s' % (space, mapping.has_functions))
140        print('%shas_filenames: %s' % (space, mapping.has_filenames))
141        print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers))
142        print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames))
143
144    def show_line(self, line, space=''):
145        sub_space = space + '  '
146        print('%sfunction_id: %d' % (space, line.function_id))
147        self.show_function_id(line.function_id, sub_space)
148        print('%sline: %d' % (space, line.line))
149
150    def show_function_id(self, function_id, space=''):
151        function = self.profile.function[function_id - 1]
152        self.show_function(function, space)
153
154    def show_function(self, function, space=''):
155        print('%sid: %d' % (space, function.id))
156        print('%sname: %s' % (space, self.string(function.name)))
157        print('%ssystem_name: %s' % (space, self.string(function.system_name)))
158        print('%sfilename: %s' % (space, self.string(function.filename)))
159        print('%sstart_line: %d' % (space, function.start_line))
160
161    def string(self, string_id):
162        return self.string_table[string_id]
163
164
165class Sample(object):
166
167    def __init__(self):
168        self.location_ids = []
169        self.values = {}
170
171    def add_location_id(self, location_id):
172        self.location_ids.append(location_id)
173
174    def add_value(self, sample_type_id, value):
175        self.values[sample_type_id] = self.values.get(sample_type_id, 0) + value
176
177    def add_values(self, values):
178        for sample_type_id, value in values.items():
179            self.add_value(sample_type_id, value)
180
181    @property
182    def key(self):
183        return tuple(self.location_ids)
184
185
186class Location(object):
187
188    def __init__(self, mapping_id, address, vaddr_in_dso):
189        self.id = -1  # unset
190        self.mapping_id = mapping_id
191        self.address = address
192        self.vaddr_in_dso = vaddr_in_dso
193        self.lines = []
194
195    @property
196    def key(self):
197        return (self.mapping_id, self.address)
198
199
200class Line(object):
201
202    def __init__(self):
203        self.function_id = 0
204        self.line = 0
205
206
207class Mapping(object):
208
209    def __init__(self, start, end, pgoff, filename_id, build_id_id):
210        self.id = -1  # unset
211        self.memory_start = start
212        self.memory_limit = end
213        self.file_offset = pgoff
214        self.filename_id = filename_id
215        self.build_id_id = build_id_id
216
217    @property
218    def key(self):
219        return (
220            self.memory_start,
221            self.memory_limit,
222            self.file_offset,
223            self.filename_id,
224            self.build_id_id)
225
226
227class Function(object):
228
229    def __init__(self, name_id, dso_name_id, vaddr_in_dso):
230        self.id = -1  # unset
231        self.name_id = name_id
232        self.dso_name_id = dso_name_id
233        self.vaddr_in_dso = vaddr_in_dso
234        self.source_filename_id = 0
235        self.start_line = 0
236
237    @property
238    def key(self):
239        return (self.name_id, self.dso_name_id)
240
241
242# pylint: disable=no-member
243class PprofProfileGenerator(object):
244
245    def __init__(self, config):
246        self.config = config
247        self.lib = ReportLib()
248
249        config['binary_cache_dir'] = 'binary_cache'
250        if not os.path.isdir(config['binary_cache_dir']):
251            config['binary_cache_dir'] = None
252        else:
253            self.lib.SetSymfs(config['binary_cache_dir'])
254        if config.get('perf_data_path'):
255            self.lib.SetRecordFile(config['perf_data_path'])
256        kallsyms = 'binary_cache/kallsyms'
257        if os.path.isfile(kallsyms):
258            self.lib.SetKallsymsFile(kallsyms)
259        if config.get('show_art_frames'):
260            self.lib.ShowArtFrames()
261        self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
262        if config.get('pid_filters'):
263            self.pid_filter = {int(x) for x in config['pid_filters']}
264        else:
265            self.pid_filter = None
266        if config.get('tid_filters'):
267            self.tid_filter = {int(x) for x in config['tid_filters']}
268        else:
269            self.tid_filter = None
270        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
271        self.max_chain_length = config['max_chain_length']
272        self.profile = profile_pb2.Profile()
273        self.profile.string_table.append('')
274        self.string_table = {}
275        self.sample_types = {}
276        self.sample_map = {}
277        self.sample_list = []
278        self.location_map = {}
279        self.location_list = []
280        self.mapping_map = {}
281        self.mapping_list = []
282        self.function_map = {}
283        self.function_list = []
284
285        # Map from dso_name in perf.data to (binary path, build_id).
286        self.binary_map = {}
287        self.read_elf = ReadElf(self.config['ndk_path'])
288
289    def gen(self):
290        # 1. Process all samples in perf.data, aggregate samples.
291        while True:
292            report_sample = self.lib.GetNextSample()
293            if report_sample is None:
294                self.lib.Close()
295                break
296            event = self.lib.GetEventOfCurrentSample()
297            symbol = self.lib.GetSymbolOfCurrentSample()
298            callchain = self.lib.GetCallChainOfCurrentSample()
299
300            if not self._filter_report_sample(report_sample):
301                continue
302
303            sample_type_id = self.get_sample_type_id(event.name)
304            sample = Sample()
305            sample.add_value(sample_type_id, 1)
306            sample.add_value(sample_type_id + 1, report_sample.period)
307            if self._filter_symbol(symbol):
308                location_id = self.get_location_id(report_sample.ip, symbol)
309                sample.add_location_id(location_id)
310            for i in range(max(0, callchain.nr - self.max_chain_length), callchain.nr):
311                entry = callchain.entries[i]
312                if self._filter_symbol(symbol):
313                    location_id = self.get_location_id(entry.ip, entry.symbol)
314                    sample.add_location_id(location_id)
315            if sample.location_ids:
316                self.add_sample(sample)
317
318        # 2. Generate line info for locations and functions.
319        self.gen_source_lines()
320
321        # 3. Produce samples/locations/functions in profile
322        for sample in self.sample_list:
323            self.gen_profile_sample(sample)
324        for mapping in self.mapping_list:
325            self.gen_profile_mapping(mapping)
326        for location in self.location_list:
327            self.gen_profile_location(location)
328        for function in self.function_list:
329            self.gen_profile_function(function)
330
331        return self.profile
332
333    def _filter_report_sample(self, sample):
334        """Return true if the sample can be used."""
335        if self.comm_filter:
336            if sample.thread_comm not in self.comm_filter:
337                return False
338        if self.pid_filter:
339            if sample.pid not in self.pid_filter:
340                return False
341        if self.tid_filter:
342            if sample.tid not in self.tid_filter:
343                return False
344        return True
345
346    def _filter_symbol(self, symbol):
347        if not self.dso_filter or symbol.dso_name in self.dso_filter:
348            return True
349        return False
350
351    def get_string_id(self, str_value):
352        if not str_value:
353            return 0
354        str_id = self.string_table.get(str_value)
355        if str_id is not None:
356            return str_id
357        str_id = len(self.string_table) + 1
358        self.string_table[str_value] = str_id
359        self.profile.string_table.append(str_value)
360        return str_id
361
362    def get_string(self, str_id):
363        return self.profile.string_table[str_id]
364
365    def get_sample_type_id(self, name):
366        sample_type_id = self.sample_types.get(name)
367        if sample_type_id is not None:
368            return sample_type_id
369        sample_type_id = len(self.profile.sample_type)
370        sample_type = self.profile.sample_type.add()
371        sample_type.type = self.get_string_id('event_' + name + '_samples')
372        sample_type.unit = self.get_string_id('count')
373        sample_type = self.profile.sample_type.add()
374        sample_type.type = self.get_string_id('event_' + name + '_count')
375        sample_type.unit = self.get_string_id('count')
376        self.sample_types[name] = sample_type_id
377        return sample_type_id
378
379    def get_location_id(self, ip, symbol):
380        binary_path, build_id = self.get_binary(symbol.dso_name)
381        mapping_id = self.get_mapping_id(symbol.mapping[0], binary_path, build_id)
382        location = Location(mapping_id, ip, symbol.vaddr_in_file)
383        function_id = self.get_function_id(symbol.symbol_name, binary_path, symbol.symbol_addr)
384        if function_id:
385            # Add Line only when it has a valid function id, see http://b/36988814.
386            # Default line info only contains the function name
387            line = Line()
388            line.function_id = function_id
389            location.lines.append(line)
390
391        exist_location = self.location_map.get(location.key)
392        if exist_location:
393            return exist_location.id
394        # location_id starts from 1
395        location.id = len(self.location_list) + 1
396        self.location_list.append(location)
397        self.location_map[location.key] = location
398        return location.id
399
400    def get_mapping_id(self, report_mapping, filename, build_id):
401        filename_id = self.get_string_id(filename)
402        build_id_id = self.get_string_id(build_id)
403        mapping = Mapping(report_mapping.start, report_mapping.end,
404                          report_mapping.pgoff, filename_id, build_id_id)
405        exist_mapping = self.mapping_map.get(mapping.key)
406        if exist_mapping:
407            return exist_mapping.id
408        # mapping_id starts from 1
409        mapping.id = len(self.mapping_list) + 1
410        self.mapping_list.append(mapping)
411        self.mapping_map[mapping.key] = mapping
412        return mapping.id
413
414    def get_binary(self, dso_name):
415        """ Return (binary_path, build_id) for a given dso_name. """
416        value = self.binary_map.get(dso_name)
417        if value:
418            return value
419
420        binary_path = dso_name
421        build_id = ''
422
423        # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding.
424        # So read build id from the binary in binary_cache, and check it with build id in
425        # perf.data.
426        build_id_in_perf_data = self.lib.GetBuildIdForPath(dso_name)
427        if build_id_in_perf_data:
428            # Try elf_path in binary cache.
429            elf_path = find_real_dso_path(dso_name, self.config['binary_cache_dir'])
430            if elf_path:
431                elf_build_id = self.read_elf.get_build_id(elf_path, False)
432                if build_id_in_perf_data == self.read_elf.pad_build_id(elf_build_id):
433                    build_id = elf_build_id
434                    binary_path = elf_path
435
436            if not build_id and build_id_in_perf_data.startswith('0x'):
437                # Fallback to the way used by TrimZeroesFromBuildIDString() in quipper.
438                build_id = build_id_in_perf_data[2:]  # remove '0x'
439                padding = '0' * 8
440                while build_id.endswith(padding):
441                    build_id = build_id[:-len(padding)]
442        self.binary_map[dso_name] = (binary_path, build_id)
443        return (binary_path, build_id)
444
445    def get_mapping(self, mapping_id):
446        return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None
447
448    def get_function_id(self, name, dso_name, vaddr_in_file):
449        if name == 'unknown':
450            return 0
451        function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file)
452        exist_function = self.function_map.get(function.key)
453        if exist_function:
454            return exist_function.id
455        # function_id starts from 1
456        function.id = len(self.function_list) + 1
457        self.function_list.append(function)
458        self.function_map[function.key] = function
459        return function.id
460
461    def get_function(self, function_id):
462        return self.function_list[function_id - 1] if function_id > 0 else None
463
464    def add_sample(self, sample):
465        exist_sample = self.sample_map.get(sample.key)
466        if exist_sample:
467            exist_sample.add_values(sample.values)
468        else:
469            self.sample_list.append(sample)
470            self.sample_map[sample.key] = sample
471
472    def gen_source_lines(self):
473        # 1. Create Addr2line instance
474        if not self.config.get('binary_cache_dir'):
475            log_info("Can't generate line information because binary_cache is missing.")
476            return
477        if not find_tool_path('llvm-symbolizer', self.config['ndk_path']):
478            log_info("Can't generate line information because can't find llvm-symbolizer.")
479            return
480        # We have changed dso names to paths in binary_cache in self.get_binary(). So no need to
481        # pass binary_cache_dir to addr2line.
482        addr2line = Addr2Nearestline(self.config['ndk_path'], None, True)
483
484        # 2. Put all needed addresses to it.
485        for location in self.location_list:
486            mapping = self.get_mapping(location.mapping_id)
487            dso_name = self.get_string(mapping.filename_id)
488            if location.lines:
489                function = self.get_function(location.lines[0].function_id)
490                addr2line.add_addr(dso_name, function.vaddr_in_dso, location.vaddr_in_dso)
491        for function in self.function_list:
492            dso_name = self.get_string(function.dso_name_id)
493            addr2line.add_addr(dso_name, function.vaddr_in_dso, function.vaddr_in_dso)
494
495        # 3. Generate source lines.
496        addr2line.convert_addrs_to_lines()
497
498        # 4. Annotate locations and functions.
499        for location in self.location_list:
500            if not location.lines:
501                continue
502            mapping = self.get_mapping(location.mapping_id)
503            dso_name = self.get_string(mapping.filename_id)
504            dso = addr2line.get_dso(dso_name)
505            if not dso:
506                continue
507            sources = addr2line.get_addr_source(dso, location.vaddr_in_dso)
508            if not sources:
509                continue
510            for (source_id, source) in enumerate(sources):
511                source_file, source_line, function_name = source
512                function_id = self.get_function_id(function_name, dso_name, 0)
513                if function_id == 0:
514                    continue
515                if source_id == 0:
516                    # Clear default line info
517                    location.lines = []
518                location.lines.append(self.add_line(source_file, source_line, function_id))
519
520        for function in self.function_list:
521            dso_name = self.get_string(function.dso_name_id)
522            if function.vaddr_in_dso:
523                dso = addr2line.get_dso(dso_name)
524                if not dso:
525                    continue
526                sources = addr2line.get_addr_source(dso, function.vaddr_in_dso)
527                if sources:
528                    source_file, source_line, _ = sources[0]
529                    function.source_filename_id = self.get_string_id(source_file)
530                    function.start_line = source_line
531
532    def add_line(self, source_file, source_line, function_id):
533        line = Line()
534        function = self.get_function(function_id)
535        function.source_filename_id = self.get_string_id(source_file)
536        line.function_id = function_id
537        line.line = source_line
538        return line
539
540    def gen_profile_sample(self, sample):
541        profile_sample = self.profile.sample.add()
542        profile_sample.location_id.extend(sample.location_ids)
543        sample_type_count = len(self.sample_types) * 2
544        values = [0] * sample_type_count
545        for sample_type_id in sample.values:
546            values[sample_type_id] = sample.values[sample_type_id]
547        profile_sample.value.extend(values)
548
549    def gen_profile_mapping(self, mapping):
550        profile_mapping = self.profile.mapping.add()
551        profile_mapping.id = mapping.id
552        profile_mapping.memory_start = mapping.memory_start
553        profile_mapping.memory_limit = mapping.memory_limit
554        profile_mapping.file_offset = mapping.file_offset
555        profile_mapping.filename = mapping.filename_id
556        profile_mapping.build_id = mapping.build_id_id
557        profile_mapping.has_filenames = True
558        profile_mapping.has_functions = True
559        if self.config.get('binary_cache_dir'):
560            profile_mapping.has_line_numbers = True
561            profile_mapping.has_inline_frames = True
562        else:
563            profile_mapping.has_line_numbers = False
564            profile_mapping.has_inline_frames = False
565
566    def gen_profile_location(self, location):
567        profile_location = self.profile.location.add()
568        profile_location.id = location.id
569        profile_location.mapping_id = location.mapping_id
570        profile_location.address = location.address
571        for i in range(len(location.lines)):
572            line = profile_location.line.add()
573            line.function_id = location.lines[i].function_id
574            line.line = location.lines[i].line
575
576    def gen_profile_function(self, function):
577        profile_function = self.profile.function.add()
578        profile_function.id = function.id
579        profile_function.name = function.name_id
580        profile_function.system_name = function.name_id
581        profile_function.filename = function.source_filename_id
582        profile_function.start_line = function.start_line
583
584
585def main():
586    parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.')
587    parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.')
588    parser.add_argument('-i', '--perf_data_path', default='perf.data', help="""
589        The path of profiling data.""")
590    parser.add_argument('-o', '--output_file', default='pprof.profile', help="""
591        The path of generated pprof profile data.""")
592    parser.add_argument('--comm', nargs='+', action='append', help="""
593        Use samples only in threads with selected names.""")
594    parser.add_argument('--pid', nargs='+', action='append', help="""
595        Use samples only in processes with selected process ids.""")
596    parser.add_argument('--tid', nargs='+', action='append', help="""
597        Use samples only in threads with selected thread ids.""")
598    parser.add_argument('--dso', nargs='+', action='append', help="""
599        Use samples only in selected binaries.""")
600    parser.add_argument('--max_chain_length', type=int, default=1000000000, help="""
601        Maximum depth of samples to be converted.""")  # Large value as infinity standin.
602    parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.')
603    parser.add_argument('--show_art_frames', action='store_true',
604                        help='Show frames of internal methods in the ART Java interpreter.')
605
606    args = parser.parse_args()
607    if args.show:
608        show_file = args.show[0] if args.show[0] else 'pprof.profile'
609        profile = load_pprof_profile(show_file)
610        printer = PprofProfilePrinter(profile)
611        printer.show()
612        return
613
614    config = {}
615    config['perf_data_path'] = args.perf_data_path
616    config['output_file'] = args.output_file
617    config['comm_filters'] = flatten_arg_list(args.comm)
618    config['pid_filters'] = flatten_arg_list(args.pid)
619    config['tid_filters'] = flatten_arg_list(args.tid)
620    config['dso_filters'] = flatten_arg_list(args.dso)
621    config['ndk_path'] = args.ndk_path
622    config['show_art_frames'] = args.show_art_frames
623    config['max_chain_length'] = args.max_chain_length
624    generator = PprofProfileGenerator(config)
625    profile = generator.gen()
626    store_pprof_profile(config['output_file'], profile)
627
628
629if __name__ == '__main__':
630    main()
631