1#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18import argparse
19import collections
20import datetime
21import json
22import os
23import sys
24
25from simpleperf_report_lib import ReportLib
26from utils import log_info, log_exit
27from utils import Addr2Nearestline, get_script_dir, Objdump, open_report_in_browser
28from utils import SourceFileSearcher
29
30MAX_CALLSTACK_LENGTH = 750
31
32class HtmlWriter(object):
33
34    def __init__(self, output_path):
35        self.fh = open(output_path, 'w')
36        self.tag_stack = []
37
38    def close(self):
39        self.fh.close()
40
41    def open_tag(self, tag, **attrs):
42        attr_str = ''
43        for key in attrs:
44            attr_str += ' %s="%s"' % (key, attrs[key])
45        self.fh.write('<%s%s>' % (tag, attr_str))
46        self.tag_stack.append(tag)
47        return self
48
49    def close_tag(self, tag=None):
50        if tag:
51            assert tag == self.tag_stack[-1]
52        self.fh.write('</%s>\n' % self.tag_stack.pop())
53
54    def add(self, text):
55        self.fh.write(text)
56        return self
57
58    def add_file(self, file_path):
59        file_path = os.path.join(get_script_dir(), file_path)
60        with open(file_path, 'r') as f:
61            self.add(f.read())
62        return self
63
64def modify_text_for_html(text):
65    return text.replace('>', '&gt;').replace('<', '&lt;')
66
67class EventScope(object):
68
69    def __init__(self, name):
70        self.name = name
71        self.processes = {}  # map from pid to ProcessScope
72        self.sample_count = 0
73        self.event_count = 0
74
75    def get_process(self, pid):
76        process = self.processes.get(pid)
77        if not process:
78            process = self.processes[pid] = ProcessScope(pid)
79        return process
80
81    def get_sample_info(self, gen_addr_hit_map):
82        result = {}
83        result['eventName'] = self.name
84        result['eventCount'] = self.event_count
85        processes = sorted(self.processes.values(), key=lambda a: a.event_count, reverse=True)
86        result['processes'] = [process.get_sample_info(gen_addr_hit_map)
87                               for process in processes]
88        return result
89
90    @property
91    def threads(self):
92        for process in self.processes.values():
93            for thread in process.threads.values():
94                yield thread
95
96    @property
97    def libraries(self):
98        for process in self.processes.values():
99            for thread in process.threads.values():
100                for lib in thread.libs.values():
101                    yield lib
102
103
104class ProcessScope(object):
105
106    def __init__(self, pid):
107        self.pid = pid
108        self.name = ''
109        self.event_count = 0
110        self.threads = {}  # map from tid to ThreadScope
111
112    def get_thread(self, tid, thread_name):
113        thread = self.threads.get(tid)
114        if not thread:
115            thread = self.threads[tid] = ThreadScope(tid)
116        thread.name = thread_name
117        if self.pid == tid:
118            self.name = thread_name
119        return thread
120
121    def get_sample_info(self, gen_addr_hit_map):
122        result = {}
123        result['pid'] = self.pid
124        result['eventCount'] = self.event_count
125        threads = sorted(self.threads.values(), key=lambda a: a.event_count, reverse=True)
126        result['threads'] = [thread.get_sample_info(gen_addr_hit_map)
127                             for thread in threads]
128        return result
129
130    def merge_by_thread_name(self, process):
131        self.event_count += process.event_count
132        thread_list = list(self.threads.values()) + list(process.threads.values())
133        new_threads = {}  # map from thread name to ThreadScope
134        for thread in thread_list:
135            cur_thread = new_threads.get(thread.name)
136            if cur_thread is None:
137                new_threads[thread.name] = thread
138            else:
139                cur_thread.merge(thread)
140        self.threads = {}
141        for thread in new_threads.values():
142            self.threads[thread.tid] = thread
143
144
145class ThreadScope(object):
146
147    def __init__(self, tid):
148        self.tid = tid
149        self.name = ''
150        self.event_count = 0
151        self.sample_count = 0
152        self.libs = {}  # map from lib_id to LibScope
153        self.call_graph = CallNode(-1)
154        self.reverse_call_graph = CallNode(-1)
155
156    def add_callstack(self, event_count, callstack, build_addr_hit_map):
157        """ callstack is a list of tuple (lib_id, func_id, addr).
158            For each i > 0, callstack[i] calls callstack[i-1]."""
159        hit_func_ids = set()
160        for i, (lib_id, func_id, addr) in enumerate(callstack):
161            # When a callstack contains recursive function, only add for each function once.
162            if func_id in hit_func_ids:
163                continue
164            hit_func_ids.add(func_id)
165
166            lib = self.libs.get(lib_id)
167            if not lib:
168                lib = self.libs[lib_id] = LibScope(lib_id)
169            function = lib.get_function(func_id)
170            function.subtree_event_count += event_count
171            if i == 0:
172                lib.event_count += event_count
173                function.event_count += event_count
174                function.sample_count += 1
175            if build_addr_hit_map:
176                function.build_addr_hit_map(addr, event_count if i == 0 else 0, event_count)
177
178        # build call graph and reverse call graph
179        node = self.call_graph
180        for item in reversed(callstack):
181            node = node.get_child(item[1])
182        node.event_count += event_count
183        node = self.reverse_call_graph
184        for item in callstack:
185            node = node.get_child(item[1])
186        node.event_count += event_count
187
188    def update_subtree_event_count(self):
189        self.call_graph.update_subtree_event_count()
190        self.reverse_call_graph.update_subtree_event_count()
191
192    def limit_percents(self, min_func_limit, min_callchain_percent, hit_func_ids):
193        for lib in self.libs.values():
194            to_del_funcs = []
195            for function in lib.functions.values():
196                if function.subtree_event_count < min_func_limit:
197                    to_del_funcs.append(function.func_id)
198                else:
199                    hit_func_ids.add(function.func_id)
200            for func_id in to_del_funcs:
201                del lib.functions[func_id]
202        min_limit = min_callchain_percent * 0.01 * self.call_graph.subtree_event_count
203        self.call_graph.cut_edge(min_limit, hit_func_ids)
204        self.reverse_call_graph.cut_edge(min_limit, hit_func_ids)
205
206    def get_sample_info(self, gen_addr_hit_map):
207        result = {}
208        result['tid'] = self.tid
209        result['eventCount'] = self.event_count
210        result['sampleCount'] = self.sample_count
211        result['libs'] = [lib.gen_sample_info(gen_addr_hit_map)
212                          for lib in self.libs.values()]
213        result['g'] = self.call_graph.gen_sample_info()
214        result['rg'] = self.reverse_call_graph.gen_sample_info()
215        return result
216
217    def merge(self, thread):
218        self.event_count += thread.event_count
219        self.sample_count += thread.sample_count
220        for lib_id, lib in thread.libs.items():
221            cur_lib = self.libs.get(lib_id)
222            if cur_lib is None:
223                self.libs[lib_id] = lib
224            else:
225                cur_lib.merge(lib)
226        self.call_graph.merge(thread.call_graph)
227        self.reverse_call_graph.merge(thread.reverse_call_graph)
228
229
230class LibScope(object):
231
232    def __init__(self, lib_id):
233        self.lib_id = lib_id
234        self.event_count = 0
235        self.functions = {}  # map from func_id to FunctionScope.
236
237    def get_function(self, func_id):
238        function = self.functions.get(func_id)
239        if not function:
240            function = self.functions[func_id] = FunctionScope(func_id)
241        return function
242
243    def gen_sample_info(self, gen_addr_hit_map):
244        result = {}
245        result['libId'] = self.lib_id
246        result['eventCount'] = self.event_count
247        result['functions'] = [func.gen_sample_info(gen_addr_hit_map)
248                               for func in self.functions.values()]
249        return result
250
251    def merge(self, lib):
252        self.event_count += lib.event_count
253        for func_id, function in lib.functions.items():
254            cur_function = self.functions.get(func_id)
255            if cur_function is None:
256                self.functions[func_id] = function
257            else:
258                cur_function.merge(function)
259
260
261class FunctionScope(object):
262
263    def __init__(self, func_id):
264        self.func_id = func_id
265        self.sample_count = 0
266        self.event_count = 0
267        self.subtree_event_count = 0
268        self.addr_hit_map = None  # map from addr to [event_count, subtree_event_count].
269        # map from (source_file_id, line) to [event_count, subtree_event_count].
270        self.line_hit_map = None
271
272    def build_addr_hit_map(self, addr, event_count, subtree_event_count):
273        if self.addr_hit_map is None:
274            self.addr_hit_map = {}
275        count_info = self.addr_hit_map.get(addr)
276        if count_info is None:
277            self.addr_hit_map[addr] = [event_count, subtree_event_count]
278        else:
279            count_info[0] += event_count
280            count_info[1] += subtree_event_count
281
282    def build_line_hit_map(self, source_file_id, line, event_count, subtree_event_count):
283        if self.line_hit_map is None:
284            self.line_hit_map = {}
285        key = (source_file_id, line)
286        count_info = self.line_hit_map.get(key)
287        if count_info is None:
288            self.line_hit_map[key] = [event_count, subtree_event_count]
289        else:
290            count_info[0] += event_count
291            count_info[1] += subtree_event_count
292
293    def gen_sample_info(self, gen_addr_hit_map):
294        result = {}
295        result['f'] = self.func_id
296        result['c'] = [self.sample_count, self.event_count, self.subtree_event_count]
297        if self.line_hit_map:
298            items = []
299            for key in self.line_hit_map:
300                count_info = self.line_hit_map[key]
301                item = {'f': key[0], 'l': key[1], 'e': count_info[0], 's': count_info[1]}
302                items.append(item)
303            result['s'] = items
304        if gen_addr_hit_map and self.addr_hit_map:
305            items = []
306            for addr in sorted(self.addr_hit_map):
307                count_info = self.addr_hit_map[addr]
308                items.append({'a': addr, 'e': count_info[0], 's': count_info[1]})
309            result['a'] = items
310        return result
311
312    def merge(self, function):
313        self.sample_count += function.sample_count
314        self.event_count += function.event_count
315        self.subtree_event_count += function.subtree_event_count
316        self.addr_hit_map = self.__merge_hit_map(self.addr_hit_map, function.addr_hit_map)
317        self.line_hit_map = self.__merge_hit_map(self.line_hit_map, function.line_hit_map)
318
319    @staticmethod
320    def __merge_hit_map(map1, map2):
321        if not map1:
322            return map2
323        if not map2:
324            return map1
325        for key, value2 in map2.items():
326            value1 = map1.get(key)
327            if value1 is None:
328                map1[key] = value2
329            else:
330                value1[0] += value2[0]
331                value1[1] += value2[1]
332        return map1
333
334
335class CallNode(object):
336
337    def __init__(self, func_id):
338        self.event_count = 0
339        self.subtree_event_count = 0
340        self.func_id = func_id
341        self.children = collections.OrderedDict()  # map from func_id to CallNode
342
343    def get_child(self, func_id):
344        child = self.children.get(func_id)
345        if not child:
346            child = self.children[func_id] = CallNode(func_id)
347        return child
348
349    def update_subtree_event_count(self):
350        self.subtree_event_count = self.event_count
351        for child in self.children.values():
352            self.subtree_event_count += child.update_subtree_event_count()
353        return self.subtree_event_count
354
355    def cut_edge(self, min_limit, hit_func_ids):
356        hit_func_ids.add(self.func_id)
357        to_del_children = []
358        for key in self.children:
359            child = self.children[key]
360            if child.subtree_event_count < min_limit:
361                to_del_children.append(key)
362            else:
363                child.cut_edge(min_limit, hit_func_ids)
364        for key in to_del_children:
365            del self.children[key]
366
367    def gen_sample_info(self):
368        result = {}
369        result['e'] = self.event_count
370        result['s'] = self.subtree_event_count
371        result['f'] = self.func_id
372        result['c'] = [child.gen_sample_info() for child in self.children.values()]
373        return result
374
375    def merge(self, node):
376        self.event_count += node.event_count
377        self.subtree_event_count += node.subtree_event_count
378        for key, child in node.children.items():
379            cur_child = self.children.get(key)
380            if cur_child is None:
381                self.children[key] = child
382            else:
383                cur_child.merge(child)
384
385
386class LibSet(object):
387    """ Collection of shared libraries used in perf.data. """
388    def __init__(self):
389        self.lib_name_to_id = {}
390        self.lib_id_to_name = []
391
392    def get_lib_id(self, lib_name):
393        lib_id = self.lib_name_to_id.get(lib_name)
394        if lib_id is None:
395            lib_id = len(self.lib_id_to_name)
396            self.lib_name_to_id[lib_name] = lib_id
397            self.lib_id_to_name.append(lib_name)
398        return lib_id
399
400    def get_lib_name(self, lib_id):
401        return self.lib_id_to_name[lib_id]
402
403
404class Function(object):
405    """ Represent a function in a shared library. """
406    def __init__(self, lib_id, func_name, func_id, start_addr, addr_len):
407        self.lib_id = lib_id
408        self.func_name = func_name
409        self.func_id = func_id
410        self.start_addr = start_addr
411        self.addr_len = addr_len
412        self.source_info = None
413        self.disassembly = None
414
415
416class FunctionSet(object):
417    """ Collection of functions used in perf.data. """
418    def __init__(self):
419        self.name_to_func = {}
420        self.id_to_func = {}
421
422    def get_func_id(self, lib_id, symbol):
423        key = (lib_id, symbol.symbol_name)
424        function = self.name_to_func.get(key)
425        if function is None:
426            func_id = len(self.id_to_func)
427            function = Function(lib_id, symbol.symbol_name, func_id, symbol.symbol_addr,
428                                symbol.symbol_len)
429            self.name_to_func[key] = function
430            self.id_to_func[func_id] = function
431        return function.func_id
432
433    def trim_functions(self, left_func_ids):
434        """ Remove functions excepts those in left_func_ids. """
435        for function in self.name_to_func.values():
436            if function.func_id not in left_func_ids:
437                del self.id_to_func[function.func_id]
438        # name_to_func will not be used.
439        self.name_to_func = None
440
441
442class SourceFile(object):
443    """ A source file containing source code hit by samples. """
444    def __init__(self, file_id, abstract_path):
445        self.file_id = file_id
446        self.abstract_path = abstract_path  # path reported by addr2line
447        self.real_path = None  # file path in the file system
448        self.requested_lines = set()
449        self.line_to_code = {}  # map from line to code in that line.
450
451    def request_lines(self, start_line, end_line):
452        self.requested_lines |= set(range(start_line, end_line + 1))
453
454    def add_source_code(self, real_path):
455        self.real_path = real_path
456        with open(real_path, 'r') as f:
457            source_code = f.readlines()
458        max_line = len(source_code)
459        for line in self.requested_lines:
460            if line > 0 and line <= max_line:
461                self.line_to_code[line] = source_code[line - 1]
462        # requested_lines is no longer used.
463        self.requested_lines = None
464
465
466class SourceFileSet(object):
467    """ Collection of source files. """
468    def __init__(self):
469        self.path_to_source_files = {}  # map from file path to SourceFile.
470
471    def get_source_file(self, file_path):
472        source_file = self.path_to_source_files.get(file_path)
473        if source_file is None:
474            source_file = SourceFile(len(self.path_to_source_files), file_path)
475            self.path_to_source_files[file_path] = source_file
476        return source_file
477
478    def load_source_code(self, source_dirs):
479        file_searcher = SourceFileSearcher(source_dirs)
480        for source_file in self.path_to_source_files.values():
481            real_path = file_searcher.get_real_path(source_file.abstract_path)
482            if real_path:
483                source_file.add_source_code(real_path)
484
485
486
487class RecordData(object):
488
489    """RecordData reads perf.data, and generates data used by report.js in json format.
490        All generated items are listed as below:
491            1. recordTime: string
492            2. machineType: string
493            3. androidVersion: string
494            4. recordCmdline: string
495            5. totalSamples: int
496            6. processNames: map from pid to processName.
497            7. threadNames: map from tid to threadName.
498            8. libList: an array of libNames, indexed by libId.
499            9. functionMap: map from functionId to funcData.
500                funcData = {
501                    l: libId
502                    f: functionName
503                    s: [sourceFileId, startLine, endLine] [optional]
504                    d: [(disassembly, addr)] [optional]
505                }
506
507            10.  sampleInfo = [eventInfo]
508                eventInfo = {
509                    eventName
510                    eventCount
511                    processes: [processInfo]
512                }
513                processInfo = {
514                    pid
515                    eventCount
516                    threads: [threadInfo]
517                }
518                threadInfo = {
519                    tid
520                    eventCount
521                    sampleCount
522                    libs: [libInfo],
523                    g: callGraph,
524                    rg: reverseCallgraph
525                }
526                libInfo = {
527                    libId,
528                    eventCount,
529                    functions: [funcInfo]
530                }
531                funcInfo = {
532                    f: functionId
533                    c: [sampleCount, eventCount, subTreeEventCount]
534                    s: [sourceCodeInfo] [optional]
535                    a: [addrInfo] (sorted by addrInfo.addr) [optional]
536                }
537                callGraph and reverseCallGraph are both of type CallNode.
538                callGraph shows how a function calls other functions.
539                reverseCallGraph shows how a function is called by other functions.
540                CallNode {
541                    e: selfEventCount
542                    s: subTreeEventCount
543                    f: functionId
544                    c: [CallNode] # children
545                }
546
547                sourceCodeInfo {
548                    f: sourceFileId
549                    l: line
550                    e: eventCount
551                    s: subtreeEventCount
552                }
553
554                addrInfo {
555                    a: addr
556                    e: eventCount
557                    s: subtreeEventCount
558                }
559
560            11. sourceFiles: an array of sourceFile, indexed by sourceFileId.
561                sourceFile {
562                    path
563                    code:  # a map from line to code for that line.
564                }
565    """
566
567    def __init__(self, binary_cache_path, ndk_path, build_addr_hit_map):
568        self.binary_cache_path = binary_cache_path
569        self.ndk_path = ndk_path
570        self.build_addr_hit_map = build_addr_hit_map
571        self.meta_info = None
572        self.cmdline = None
573        self.arch = None
574        self.events = {}
575        self.libs = LibSet()
576        self.functions = FunctionSet()
577        self.total_samples = 0
578        self.source_files = SourceFileSet()
579        self.gen_addr_hit_map_in_record_info = False
580
581    def load_record_file(self, record_file, show_art_frames):
582        lib = ReportLib()
583        lib.SetRecordFile(record_file)
584        # If not showing ip for unknown symbols, the percent of the unknown symbol may be
585        # accumulated to very big, and ranks first in the sample table.
586        lib.ShowIpForUnknownSymbol()
587        if show_art_frames:
588            lib.ShowArtFrames()
589        if self.binary_cache_path:
590            lib.SetSymfs(self.binary_cache_path)
591        self.meta_info = lib.MetaInfo()
592        self.cmdline = lib.GetRecordCmd()
593        self.arch = lib.GetArch()
594        while True:
595            raw_sample = lib.GetNextSample()
596            if not raw_sample:
597                lib.Close()
598                break
599            raw_event = lib.GetEventOfCurrentSample()
600            symbol = lib.GetSymbolOfCurrentSample()
601            callchain = lib.GetCallChainOfCurrentSample()
602            event = self._get_event(raw_event.name)
603            self.total_samples += 1
604            event.sample_count += 1
605            event.event_count += raw_sample.period
606            process = event.get_process(raw_sample.pid)
607            process.event_count += raw_sample.period
608            thread = process.get_thread(raw_sample.tid, raw_sample.thread_comm)
609            thread.event_count += raw_sample.period
610            thread.sample_count += 1
611
612            lib_id = self.libs.get_lib_id(symbol.dso_name)
613            func_id = self.functions.get_func_id(lib_id, symbol)
614            callstack = [(lib_id, func_id, symbol.vaddr_in_file)]
615            for i in range(callchain.nr):
616                symbol = callchain.entries[i].symbol
617                lib_id = self.libs.get_lib_id(symbol.dso_name)
618                func_id = self.functions.get_func_id(lib_id, symbol)
619                callstack.append((lib_id, func_id, symbol.vaddr_in_file))
620            if len(callstack) > MAX_CALLSTACK_LENGTH:
621                callstack = callstack[:MAX_CALLSTACK_LENGTH]
622            thread.add_callstack(raw_sample.period, callstack, self.build_addr_hit_map)
623
624        for event in self.events.values():
625            for thread in event.threads:
626                thread.update_subtree_event_count()
627
628    def aggregate_by_thread_name(self):
629        for event in self.events.values():
630            new_processes = {}  # from process name to ProcessScope
631            for process in event.processes.values():
632                cur_process = new_processes.get(process.name)
633                if cur_process is None:
634                    new_processes[process.name] = process
635                else:
636                    cur_process.merge_by_thread_name(process)
637            event.processes = {}
638            for process in new_processes.values():
639                event.processes[process.pid] = process
640
641    def limit_percents(self, min_func_percent, min_callchain_percent):
642        hit_func_ids = set()
643        for event in self.events.values():
644            min_limit = event.event_count * min_func_percent * 0.01
645            to_del_processes = []
646            for process in event.processes.values():
647                to_del_threads = []
648                for thread in process.threads.values():
649                    if thread.call_graph.subtree_event_count < min_limit:
650                        to_del_threads.append(thread.tid)
651                    else:
652                        thread.limit_percents(min_limit, min_callchain_percent, hit_func_ids)
653                for thread in to_del_threads:
654                    del process.threads[thread]
655                if not process.threads:
656                    to_del_processes.append(process.pid)
657            for process in to_del_processes:
658                del event.processes[process]
659        self.functions.trim_functions(hit_func_ids)
660
661    def _get_event(self, event_name):
662        if event_name not in self.events:
663            self.events[event_name] = EventScope(event_name)
664        return self.events[event_name]
665
666    def add_source_code(self, source_dirs, filter_lib):
667        """ Collect source code information:
668            1. Find line ranges for each function in FunctionSet.
669            2. Find line for each addr in FunctionScope.addr_hit_map.
670            3. Collect needed source code in SourceFileSet.
671        """
672        addr2line = Addr2Nearestline(self.ndk_path, self.binary_cache_path, False)
673        # Request line range for each function.
674        for function in self.functions.id_to_func.values():
675            if function.func_name == 'unknown':
676                continue
677            lib_name = self.libs.get_lib_name(function.lib_id)
678            if filter_lib(lib_name):
679                addr2line.add_addr(lib_name, function.start_addr, function.start_addr)
680                addr2line.add_addr(lib_name, function.start_addr,
681                                   function.start_addr + function.addr_len - 1)
682        # Request line for each addr in FunctionScope.addr_hit_map.
683        for event in self.events.values():
684            for lib in event.libraries:
685                lib_name = self.libs.get_lib_name(lib.lib_id)
686                if filter_lib(lib_name):
687                    for function in lib.functions.values():
688                        func_addr = self.functions.id_to_func[function.func_id].start_addr
689                        for addr in function.addr_hit_map:
690                            addr2line.add_addr(lib_name, func_addr, addr)
691        addr2line.convert_addrs_to_lines()
692
693        # Set line range for each function.
694        for function in self.functions.id_to_func.values():
695            if function.func_name == 'unknown':
696                continue
697            dso = addr2line.get_dso(self.libs.get_lib_name(function.lib_id))
698            if not dso:
699                continue
700            start_source = addr2line.get_addr_source(dso, function.start_addr)
701            end_source = addr2line.get_addr_source(dso, function.start_addr + function.addr_len - 1)
702            if not start_source or not end_source:
703                continue
704            start_file_path, start_line = start_source[-1]
705            end_file_path, end_line = end_source[-1]
706            if start_file_path != end_file_path or start_line > end_line:
707                continue
708            source_file = self.source_files.get_source_file(start_file_path)
709            source_file.request_lines(start_line, end_line)
710            function.source_info = (source_file.file_id, start_line, end_line)
711
712        # Build FunctionScope.line_hit_map.
713        for event in self.events.values():
714            for lib in event.libraries:
715                dso = addr2line.get_dso(self.libs.get_lib_name(lib.lib_id))
716                if not dso:
717                    continue
718                for function in lib.functions.values():
719                    for addr in function.addr_hit_map:
720                        source = addr2line.get_addr_source(dso, addr)
721                        if not source:
722                            continue
723                        for file_path, line in source:
724                            source_file = self.source_files.get_source_file(file_path)
725                            # Show [line - 5, line + 5] of the line hit by a sample.
726                            source_file.request_lines(line - 5, line + 5)
727                            count_info = function.addr_hit_map[addr]
728                            function.build_line_hit_map(source_file.file_id, line, count_info[0],
729                                                        count_info[1])
730
731        # Collect needed source code in SourceFileSet.
732        self.source_files.load_source_code(source_dirs)
733
734    def add_disassembly(self, filter_lib):
735        """ Collect disassembly information:
736            1. Use objdump to collect disassembly for each function in FunctionSet.
737            2. Set flag to dump addr_hit_map when generating record info.
738        """
739        objdump = Objdump(self.ndk_path, self.binary_cache_path)
740        cur_lib_name = None
741        dso_info = None
742        for function in sorted(self.functions.id_to_func.values(), key=lambda a: a.lib_id):
743            if function.func_name == 'unknown':
744                continue
745            lib_name = self.libs.get_lib_name(function.lib_id)
746            if lib_name != cur_lib_name:
747                cur_lib_name = lib_name
748                if filter_lib(lib_name):
749                    dso_info = objdump.get_dso_info(lib_name)
750                else:
751                    dso_info = None
752                if dso_info:
753                    log_info('Disassemble %s' % dso_info[0])
754            if dso_info:
755                code = objdump.disassemble_code(dso_info, function.start_addr, function.addr_len)
756                function.disassembly = code
757
758        self.gen_addr_hit_map_in_record_info = True
759
760    def gen_record_info(self):
761        record_info = {}
762        timestamp = self.meta_info.get('timestamp')
763        if timestamp:
764            t = datetime.datetime.fromtimestamp(int(timestamp))
765        else:
766            t = datetime.datetime.now()
767        record_info['recordTime'] = t.strftime('%Y-%m-%d (%A) %H:%M:%S')
768
769        product_props = self.meta_info.get('product_props')
770        machine_type = self.arch
771        if product_props:
772            manufacturer, model, name = product_props.split(':')
773            machine_type = '%s (%s) by %s, arch %s' % (model, name, manufacturer, self.arch)
774        record_info['machineType'] = machine_type
775        record_info['androidVersion'] = self.meta_info.get('android_version', '')
776        record_info['recordCmdline'] = self.cmdline
777        record_info['totalSamples'] = self.total_samples
778        record_info['processNames'] = self._gen_process_names()
779        record_info['threadNames'] = self._gen_thread_names()
780        record_info['libList'] = self._gen_lib_list()
781        record_info['functionMap'] = self._gen_function_map()
782        record_info['sampleInfo'] = self._gen_sample_info()
783        record_info['sourceFiles'] = self._gen_source_files()
784        return record_info
785
786    def _gen_process_names(self):
787        process_names = {}
788        for event in self.events.values():
789            for process in event.processes.values():
790                process_names[process.pid] = process.name
791        return process_names
792
793    def _gen_thread_names(self):
794        thread_names = {}
795        for event in self.events.values():
796            for process in event.processes.values():
797                for thread in process.threads.values():
798                    thread_names[thread.tid] = thread.name
799        return thread_names
800
801    def _gen_lib_list(self):
802        return [modify_text_for_html(x) for x in self.libs.lib_id_to_name]
803
804    def _gen_function_map(self):
805        func_map = {}
806        for func_id in sorted(self.functions.id_to_func):
807            function = self.functions.id_to_func[func_id]
808            func_data = {}
809            func_data['l'] = function.lib_id
810            func_data['f'] = modify_text_for_html(function.func_name)
811            if function.source_info:
812                func_data['s'] = function.source_info
813            if function.disassembly:
814                disassembly_list = []
815                for code, addr in function.disassembly:
816                    disassembly_list.append([modify_text_for_html(code), addr])
817                func_data['d'] = disassembly_list
818            func_map[func_id] = func_data
819        return func_map
820
821    def _gen_sample_info(self):
822        return [event.get_sample_info(self.gen_addr_hit_map_in_record_info)
823                for event in self.events.values()]
824
825    def _gen_source_files(self):
826        source_files = sorted(self.source_files.path_to_source_files.values(),
827                              key=lambda x: x.file_id)
828        file_list = []
829        for source_file in source_files:
830            file_data = {}
831            if not source_file.real_path:
832                file_data['path'] = ''
833                file_data['code'] = {}
834            else:
835                file_data['path'] = source_file.real_path
836                code_map = {}
837                for line in source_file.line_to_code:
838                    code_map[line] = modify_text_for_html(source_file.line_to_code[line])
839                file_data['code'] = code_map
840            file_list.append(file_data)
841        return file_list
842
843URLS = {
844    'jquery': 'https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js',
845    'bootstrap4-css': 'https://stackpath.bootstrapcdn.com/bootstrap/4.1.2/css/bootstrap.min.css',
846    'bootstrap4-popper':
847        'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js',
848    'bootstrap4': 'https://stackpath.bootstrapcdn.com/bootstrap/4.1.2/js/bootstrap.min.js',
849    'dataTable': 'https://cdn.datatables.net/1.10.19/js/jquery.dataTables.min.js',
850    'dataTable-bootstrap4': 'https://cdn.datatables.net/1.10.19/js/dataTables.bootstrap4.min.js',
851    'dataTable-css': 'https://cdn.datatables.net/1.10.19/css/dataTables.bootstrap4.min.css',
852    'gstatic-charts': 'https://www.gstatic.com/charts/loader.js',
853}
854
855class ReportGenerator(object):
856
857    def __init__(self, html_path):
858        self.hw = HtmlWriter(html_path)
859        self.hw.open_tag('html')
860        self.hw.open_tag('head')
861        for css in ['bootstrap4-css', 'dataTable-css']:
862            self.hw.open_tag('link', rel='stylesheet', type='text/css', href=URLS[css]).close_tag()
863        for js in ['jquery', 'bootstrap4-popper', 'bootstrap4', 'dataTable', 'dataTable-bootstrap4',
864                   'gstatic-charts']:
865            self.hw.open_tag('script', src=URLS[js]).close_tag()
866
867        self.hw.open_tag('script').add(
868            "google.charts.load('current', {'packages': ['corechart', 'table']});").close_tag()
869        self.hw.open_tag('style', type='text/css').add("""
870            .colForLine { width: 50px; }
871            .colForCount { width: 100px; }
872            .tableCell { font-size: 17px; }
873            .boldTableCell { font-weight: bold; font-size: 17px; }
874            """).close_tag()
875        self.hw.close_tag('head')
876        self.hw.open_tag('body')
877        self.record_info = {}
878
879    def write_content_div(self):
880        self.hw.open_tag('div', id='report_content').close_tag()
881
882    def write_record_data(self, record_data):
883        self.hw.open_tag('script', id='record_data', type='application/json')
884        self.hw.add(json.dumps(record_data))
885        self.hw.close_tag()
886
887    def write_script(self):
888        self.hw.open_tag('script').add_file('report_html.js').close_tag()
889
890    def finish(self):
891        self.hw.close_tag('body')
892        self.hw.close_tag('html')
893        self.hw.close()
894
895
896def main():
897    sys.setrecursionlimit(MAX_CALLSTACK_LENGTH * 2 + 50)
898    parser = argparse.ArgumentParser(description='report profiling data')
899    parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help="""
900                        Set profiling data file to report. Default is perf.data.""")
901    parser.add_argument('-o', '--report_path', default='report.html', help="""
902                        Set output html file. Default is report.html.""")
903    parser.add_argument('--min_func_percent', default=0.01, type=float, help="""
904                        Set min percentage of functions shown in the report.
905                        For example, when set to 0.01, only functions taking >= 0.01%% of total
906                        event count are collected in the report. Default is 0.01.""")
907    parser.add_argument('--min_callchain_percent', default=0.01, type=float, help="""
908                        Set min percentage of callchains shown in the report.
909                        It is used to limit nodes shown in the function flamegraph. For example,
910                        when set to 0.01, only callchains taking >= 0.01%% of the event count of
911                        the starting function are collected in the report. Default is 0.01.""")
912    parser.add_argument('--add_source_code', action='store_true', help='Add source code.')
913    parser.add_argument('--source_dirs', nargs='+', help='Source code directories.')
914    parser.add_argument('--add_disassembly', action='store_true', help='Add disassembled code.')
915    parser.add_argument('--binary_filter', nargs='+', help="""Annotate source code and disassembly
916                        only for selected binaries.""")
917    parser.add_argument('--ndk_path', nargs=1, help='Find tools in the ndk path.')
918    parser.add_argument('--no_browser', action='store_true', help="Don't open report in browser.")
919    parser.add_argument('--show_art_frames', action='store_true',
920                        help='Show frames of internal methods in the ART Java interpreter.')
921    parser.add_argument('--aggregate-by-thread-name', action='store_true', help="""aggregate
922                        samples by thread name instead of thread id. This is useful for
923                        showing multiple perf.data generated for the same app.""")
924    args = parser.parse_args()
925
926    # 1. Process args.
927    binary_cache_path = 'binary_cache'
928    if not os.path.isdir(binary_cache_path):
929        if args.add_source_code or args.add_disassembly:
930            log_exit("""binary_cache/ doesn't exist. Can't add source code or disassembled code
931                        without collected binaries. Please run binary_cache_builder.py to
932                        collect binaries for current profiling data, or run app_profiler.py
933                        without -nb option.""")
934        binary_cache_path = None
935
936    if args.add_source_code and not args.source_dirs:
937        log_exit('--source_dirs is needed to add source code.')
938    build_addr_hit_map = args.add_source_code or args.add_disassembly
939    ndk_path = None if not args.ndk_path else args.ndk_path[0]
940
941    # 2. Produce record data.
942    record_data = RecordData(binary_cache_path, ndk_path, build_addr_hit_map)
943    for record_file in args.record_file:
944        record_data.load_record_file(record_file, args.show_art_frames)
945    if args.aggregate_by_thread_name:
946        record_data.aggregate_by_thread_name()
947    record_data.limit_percents(args.min_func_percent, args.min_callchain_percent)
948
949    def filter_lib(lib_name):
950        if not args.binary_filter:
951            return True
952        for binary in args.binary_filter:
953            if binary in lib_name:
954                return True
955        return False
956    if args.add_source_code:
957        record_data.add_source_code(args.source_dirs, filter_lib)
958    if args.add_disassembly:
959        record_data.add_disassembly(filter_lib)
960
961    # 3. Generate report html.
962    report_generator = ReportGenerator(args.report_path)
963    report_generator.write_script()
964    report_generator.write_content_div()
965    report_generator.write_record_data(record_data.gen_record_info())
966    report_generator.finish()
967
968    if not args.no_browser:
969        open_report_in_browser(args.report_path)
970    log_info("Report generated at '%s'." % args.report_path)
971
972
973if __name__ == '__main__':
974    main()
975