1#!/usr/bin/env python
2#
3# Copyright 2012 the V8 project authors. All rights reserved.
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9#       notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11#       copyright notice, this list of conditions and the following
12#       disclaimer in the documentation and/or other materials provided
13#       with the distribution.
14#     * Neither the name of Google Inc. nor the names of its
15#       contributors may be used to endorse or promote products derived
16#       from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31# for py2/py3 compatibility
32from __future__ import print_function
33
34import bisect
35import collections
36import ctypes
37import disasm
38import mmap
39import optparse
40import os
41import re
42import subprocess
43import sys
44import time
45
46
47USAGE="""usage: %prog [OPTION]...
48
49Analyses V8 and perf logs to produce profiles.
50
51Perf logs can be collected using a command like:
52  $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
53  # -R: collect all data
54  # -e cycles: use cpu-cycles event (run "perf list" for details)
55  # -c 10000: write a sample after each 10000 events
56  # -f: force output file overwrite
57  # -i: limit profiling to our process and the kernel
58  # --ll-prof shell flag enables the right V8 logs
59This will produce a binary trace file (perf.data) that %prog can analyse.
60
61IMPORTANT:
62  The kernel has an internal maximum for events per second, it is 100K by
63  default. That's not enough for "-c 10000". Set it to some higher value:
64  $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
65  You can also make the warning about kernel address maps go away:
66  $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
67
68We have a convenience script that handles all of the above for you:
69  $ tools/run-llprof.sh ./d8 bench.js
70
71Examples:
72  # Print flat profile with annotated disassembly for the 10 top
73  # symbols. Use default log names.
74  $ %prog --disasm-top=10
75
76  # Print flat profile with annotated disassembly for all used symbols.
77  # Use default log names and include kernel symbols into analysis.
78  $ %prog --disasm-all --kernel
79
80  # Print flat profile. Use custom log names.
81  $ %prog --log=foo.log --trace=foo.data
82"""
83
84
85JS_ORIGIN = "js"
86
87
88class Code(object):
89  """Code object."""
90
91  _id = 0
92  UNKNOWN = 0
93  V8INTERNAL = 1
94  FULL_CODEGEN = 2
95  OPTIMIZED = 3
96
97  def __init__(self, name, start_address, end_address, origin, origin_offset):
98    self.id = Code._id
99    Code._id += 1
100    self.name = name
101    self.other_names = None
102    self.start_address = start_address
103    self.end_address = end_address
104    self.origin = origin
105    self.origin_offset = origin_offset
106    self.self_ticks = 0
107    self.self_ticks_map = None
108    self.callee_ticks = None
109    if name.startswith("LazyCompile:*"):
110      self.codetype = Code.OPTIMIZED
111    elif name.startswith("LazyCompile:"):
112      self.codetype = Code.FULL_CODEGEN
113    elif name.startswith("v8::internal::"):
114      self.codetype = Code.V8INTERNAL
115    else:
116      self.codetype = Code.UNKNOWN
117
118  def AddName(self, name):
119    assert self.name != name
120    if self.other_names is None:
121      self.other_names = [name]
122      return
123    if not name in self.other_names:
124      self.other_names.append(name)
125
126  def FullName(self):
127    if self.other_names is None:
128      return self.name
129    self.other_names.sort()
130    return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
131
132  def IsUsed(self):
133    return self.self_ticks > 0 or self.callee_ticks is not None
134
135  def Tick(self, pc):
136    self.self_ticks += 1
137    if self.self_ticks_map is None:
138      self.self_ticks_map = collections.defaultdict(lambda: 0)
139    offset = pc - self.start_address
140    self.self_ticks_map[offset] += 1
141
142  def CalleeTick(self, callee):
143    if self.callee_ticks is None:
144      self.callee_ticks = collections.defaultdict(lambda: 0)
145    self.callee_ticks[callee] += 1
146
147  def PrintAnnotated(self, arch, options):
148    if self.self_ticks_map is None:
149      ticks_map = []
150    else:
151      ticks_map = self.self_ticks_map.items()
152    # Convert the ticks map to offsets and counts arrays so that later
153    # we can do binary search in the offsets array.
154    ticks_map.sort(key=lambda t: t[0])
155    ticks_offsets = [t[0] for t in ticks_map]
156    ticks_counts = [t[1] for t in ticks_map]
157    # Get a list of disassembled lines and their addresses.
158    lines = self._GetDisasmLines(arch, options)
159    if len(lines) == 0:
160      return
161    # Print annotated lines.
162    address = lines[0][0]
163    total_count = 0
164    for i in range(len(lines)):
165      start_offset = lines[i][0] - address
166      if i == len(lines) - 1:
167        end_offset = self.end_address - self.start_address
168      else:
169        end_offset = lines[i + 1][0] - address
170      # Ticks (reported pc values) are not always precise, i.e. not
171      # necessarily point at instruction starts. So we have to search
172      # for ticks that touch the current instruction line.
173      j = bisect.bisect_left(ticks_offsets, end_offset)
174      count = 0
175      for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
176        if offset < start_offset:
177          break
178        count += cnt
179      total_count += count
180      percent = 100.0 * count / self.self_ticks
181      offset = lines[i][0]
182      if percent >= 0.01:
183        # 5 spaces for tick count
184        # 1 space following
185        # 1 for '|'
186        # 1 space following
187        # 6 for the percentage number, incl. the '.'
188        # 1 for the '%' sign
189        # => 15
190        print("%5d | %6.2f%% %x(%d): %s" % (count, percent, offset, offset, lines[i][1]))
191      else:
192        print("%s %x(%d): %s" % (" " * 15, offset, offset, lines[i][1]))
193    print()
194    assert total_count == self.self_ticks, \
195        "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
196
197  def __str__(self):
198    return "%s [0x%x, 0x%x) size: %d origin: %s" % (
199      self.name,
200      self.start_address,
201      self.end_address,
202      self.end_address - self.start_address,
203      self.origin)
204
205  def _GetDisasmLines(self, arch, options):
206    if self.origin == JS_ORIGIN:
207      inplace = False
208      filename = options.log + ".ll"
209    else:
210      inplace = True
211      filename = self.origin
212    return disasm.GetDisasmLines(filename,
213                                 self.origin_offset,
214                                 self.end_address - self.start_address,
215                                 arch,
216                                 inplace)
217
218
219class CodePage(object):
220  """Group of adjacent code objects."""
221
222  SHIFT = 20  # 1M pages
223  SIZE = (1 << SHIFT)
224  MASK = ~(SIZE - 1)
225
226  @staticmethod
227  def PageAddress(address):
228    return address & CodePage.MASK
229
230  @staticmethod
231  def PageId(address):
232    return address >> CodePage.SHIFT
233
234  @staticmethod
235  def PageAddressFromId(id):
236    return id << CodePage.SHIFT
237
238  def __init__(self, address):
239    self.address = address
240    self.code_objects = []
241
242  def Add(self, code):
243    self.code_objects.append(code)
244
245  def Remove(self, code):
246    self.code_objects.remove(code)
247
248  def Find(self, pc):
249    code_objects = self.code_objects
250    for i, code in enumerate(code_objects):
251      if code.start_address <= pc < code.end_address:
252        code_objects[0], code_objects[i] = code, code_objects[0]
253        return code
254    return None
255
256  def __iter__(self):
257    return self.code_objects.__iter__()
258
259
260class CodeMap(object):
261  """Code object map."""
262
263  def __init__(self):
264    self.pages = {}
265    self.min_address = 1 << 64
266    self.max_address = -1
267
268  def Add(self, code, max_pages=-1):
269    page_id = CodePage.PageId(code.start_address)
270    limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
271    pages = 0
272    while page_id < limit_id:
273      if max_pages >= 0 and pages > max_pages:
274        print("Warning: page limit (%d) reached for %s [%s]" % (
275            max_pages, code.name, code.origin), file=sys.stderr)
276        break
277      if page_id in self.pages:
278        page = self.pages[page_id]
279      else:
280        page = CodePage(CodePage.PageAddressFromId(page_id))
281        self.pages[page_id] = page
282      page.Add(code)
283      page_id += 1
284      pages += 1
285    self.min_address = min(self.min_address, code.start_address)
286    self.max_address = max(self.max_address, code.end_address)
287
288  def Remove(self, code):
289    page_id = CodePage.PageId(code.start_address)
290    limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
291    removed = False
292    while page_id < limit_id:
293      if page_id not in self.pages:
294        page_id += 1
295        continue
296      page = self.pages[page_id]
297      page.Remove(code)
298      removed = True
299      page_id += 1
300    return removed
301
302  def AllCode(self):
303    for page in self.pages.itervalues():
304      for code in page:
305        if CodePage.PageAddress(code.start_address) == page.address:
306          yield code
307
308  def UsedCode(self):
309    for code in self.AllCode():
310      if code.IsUsed():
311        yield code
312
313  def Print(self):
314    for code in self.AllCode():
315      print(code)
316
317  def Find(self, pc):
318    if pc < self.min_address or pc >= self.max_address:
319      return None
320    page_id = CodePage.PageId(pc)
321    if page_id not in self.pages:
322      return None
323    return self.pages[page_id].Find(pc)
324
325
326class CodeInfo(object):
327  """Generic info about generated code objects."""
328
329  def __init__(self, arch, header_size):
330    self.arch = arch
331    self.header_size = header_size
332
333
334class LogReader(object):
335  """V8 low-level (binary) log reader."""
336
337  _ARCH_TO_POINTER_TYPE_MAP = {
338    "ia32": ctypes.c_uint32,
339    "arm": ctypes.c_uint32,
340    "mips": ctypes.c_uint32,
341    "x64": ctypes.c_uint64,
342    "arm64": ctypes.c_uint64
343  }
344
345  _CODE_CREATE_TAG = "C"
346  _CODE_MOVE_TAG = "M"
347  _CODE_MOVING_GC_TAG = "G"
348
349  def __init__(self, log_name, code_map):
350    self.log_file = open(log_name, "r")
351    self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
352    self.log_pos = 0
353    self.code_map = code_map
354
355    self.arch = self.log[:self.log.find("\0")]
356    self.log_pos += len(self.arch) + 1
357    assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
358        "Unsupported architecture %s" % self.arch
359    pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
360
361    self.code_create_struct = LogReader._DefineStruct([
362        ("name_size", ctypes.c_int32),
363        ("code_address", pointer_type),
364        ("code_size", ctypes.c_int32)])
365
366    self.code_move_struct = LogReader._DefineStruct([
367        ("from_address", pointer_type),
368        ("to_address", pointer_type)])
369
370    self.code_delete_struct = LogReader._DefineStruct([
371        ("address", pointer_type)])
372
373  def ReadUpToGC(self):
374    while self.log_pos < self.log.size():
375      tag = self.log[self.log_pos]
376      self.log_pos += 1
377
378      if tag == LogReader._CODE_MOVING_GC_TAG:
379        return
380
381      if tag == LogReader._CODE_CREATE_TAG:
382        event = self.code_create_struct.from_buffer(self.log, self.log_pos)
383        self.log_pos += ctypes.sizeof(event)
384        start_address = event.code_address
385        end_address = start_address + event.code_size
386        name = self.log[self.log_pos:self.log_pos + event.name_size]
387        origin = JS_ORIGIN
388        self.log_pos += event.name_size
389        origin_offset = self.log_pos
390        self.log_pos += event.code_size
391        code = Code(name, start_address, end_address, origin, origin_offset)
392        conficting_code = self.code_map.Find(start_address)
393        if conficting_code:
394          if not (conficting_code.start_address == code.start_address and
395            conficting_code.end_address == code.end_address):
396            self.code_map.Remove(conficting_code)
397          else:
398            LogReader._HandleCodeConflict(conficting_code, code)
399            # TODO(vitalyr): this warning is too noisy because of our
400            # attempts to reconstruct code log from the snapshot.
401            # print >>sys.stderr, \
402            #     "Warning: Skipping duplicate code log entry %s" % code
403            continue
404        self.code_map.Add(code)
405        continue
406
407      if tag == LogReader._CODE_MOVE_TAG:
408        event = self.code_move_struct.from_buffer(self.log, self.log_pos)
409        self.log_pos += ctypes.sizeof(event)
410        old_start_address = event.from_address
411        new_start_address = event.to_address
412        if old_start_address == new_start_address:
413          # Skip useless code move entries.
414          continue
415        code = self.code_map.Find(old_start_address)
416        if not code:
417          print("Warning: Not found %x" % old_start_address, file=sys.stderr)
418          continue
419        assert code.start_address == old_start_address, \
420            "Inexact move address %x for %s" % (old_start_address, code)
421        self.code_map.Remove(code)
422        size = code.end_address - code.start_address
423        code.start_address = new_start_address
424        code.end_address = new_start_address + size
425        self.code_map.Add(code)
426        continue
427
428      assert False, "Unknown tag %s" % tag
429
430  def Dispose(self):
431    self.log.close()
432    self.log_file.close()
433
434  @staticmethod
435  def _DefineStruct(fields):
436    class Struct(ctypes.Structure):
437      _fields_ = fields
438    return Struct
439
440  @staticmethod
441  def _HandleCodeConflict(old_code, new_code):
442    assert (old_code.start_address == new_code.start_address and
443            old_code.end_address == new_code.end_address), \
444        "Conficting code log entries %s and %s" % (old_code, new_code)
445    if old_code.name == new_code.name:
446      return
447    # Code object may be shared by a few functions. Collect the full
448    # set of names.
449    old_code.AddName(new_code.name)
450
451
452class Descriptor(object):
453  """Descriptor of a structure in the binary trace log."""
454
455  CTYPE_MAP = {
456    "u16": ctypes.c_uint16,
457    "u32": ctypes.c_uint32,
458    "u64": ctypes.c_uint64
459  }
460
461  def __init__(self, fields):
462    class TraceItem(ctypes.Structure):
463      _fields_ = Descriptor.CtypesFields(fields)
464
465      def __str__(self):
466        return ", ".join("%s: %s" % (field, self.__getattribute__(field))
467                         for field, _ in TraceItem._fields_)
468
469    self.ctype = TraceItem
470
471  def Read(self, trace, offset):
472    return self.ctype.from_buffer(trace, offset)
473
474  @staticmethod
475  def CtypesFields(fields):
476    return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
477
478
479# Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
480# for the gory details.
481
482
483# Reference: struct perf_file_header in kernel/tools/perf/util/header.h
484TRACE_HEADER_DESC = Descriptor([
485  ("magic", "u64"),
486  ("size", "u64"),
487  ("attr_size", "u64"),
488  ("attrs_offset", "u64"),
489  ("attrs_size", "u64"),
490  ("data_offset", "u64"),
491  ("data_size", "u64"),
492  ("event_types_offset", "u64"),
493  ("event_types_size", "u64")
494])
495
496
497# Reference: /usr/include/linux/perf_event.h
498PERF_EVENT_ATTR_DESC = Descriptor([
499  ("type", "u32"),
500  ("size", "u32"),
501  ("config", "u64"),
502  ("sample_period_or_freq", "u64"),
503  ("sample_type", "u64"),
504  ("read_format", "u64"),
505  ("flags", "u64"),
506  ("wakeup_events_or_watermark", "u32"),
507  ("bp_type", "u32"),
508  ("bp_addr", "u64"),
509  ("bp_len", "u64")
510])
511
512
513# Reference: /usr/include/linux/perf_event.h
514PERF_EVENT_HEADER_DESC = Descriptor([
515  ("type", "u32"),
516  ("misc", "u16"),
517  ("size", "u16")
518])
519
520
521# Reference: kernel/tools/perf/util/event.h
522PERF_MMAP_EVENT_BODY_DESC = Descriptor([
523  ("pid", "u32"),
524  ("tid", "u32"),
525  ("addr", "u64"),
526  ("len", "u64"),
527  ("pgoff", "u64")
528])
529
530# Reference: kernel/tools/perf/util/event.h
531PERF_MMAP2_EVENT_BODY_DESC = Descriptor([
532  ("pid", "u32"),
533  ("tid", "u32"),
534  ("addr", "u64"),
535  ("len", "u64"),
536  ("pgoff", "u64"),
537  ("maj", "u32"),
538  ("min", "u32"),
539  ("ino", "u64"),
540  ("ino_generation", "u64"),
541  ("prot", "u32"),
542  ("flags","u32")
543])
544
545# perf_event_attr.sample_type bits control the set of
546# perf_sample_event fields.
547PERF_SAMPLE_IP = 1 << 0
548PERF_SAMPLE_TID = 1 << 1
549PERF_SAMPLE_TIME = 1 << 2
550PERF_SAMPLE_ADDR = 1 << 3
551PERF_SAMPLE_READ = 1 << 4
552PERF_SAMPLE_CALLCHAIN = 1 << 5
553PERF_SAMPLE_ID = 1 << 6
554PERF_SAMPLE_CPU = 1 << 7
555PERF_SAMPLE_PERIOD = 1 << 8
556PERF_SAMPLE_STREAM_ID = 1 << 9
557PERF_SAMPLE_RAW = 1 << 10
558
559
560# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
561PERF_SAMPLE_EVENT_BODY_FIELDS = [
562  ("ip", "u64", PERF_SAMPLE_IP),
563  ("pid", "u32", PERF_SAMPLE_TID),
564  ("tid", "u32", PERF_SAMPLE_TID),
565  ("time", "u64", PERF_SAMPLE_TIME),
566  ("addr", "u64", PERF_SAMPLE_ADDR),
567  ("id", "u64", PERF_SAMPLE_ID),
568  ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
569  ("cpu", "u32", PERF_SAMPLE_CPU),
570  ("res", "u32", PERF_SAMPLE_CPU),
571  ("period", "u64", PERF_SAMPLE_PERIOD),
572  # Don't want to handle read format that comes after the period and
573  # before the callchain and has variable size.
574  ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
575  # Raw data follows the callchain and is ignored.
576]
577
578
579PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
580
581
582PERF_RECORD_MMAP = 1
583PERF_RECORD_MMAP2 = 10
584PERF_RECORD_SAMPLE = 9
585
586
587class TraceReader(object):
588  """Perf (linux-2.6/tools/perf) trace file reader."""
589
590  _TRACE_HEADER_MAGIC = 4993446653023372624
591
592  def __init__(self, trace_name):
593    self.trace_file = open(trace_name, "r")
594    self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
595    self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
596    if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
597      print("Warning: unsupported trace header magic", file=sys.stderr)
598    self.offset = self.trace_header.data_offset
599    self.limit = self.trace_header.data_offset + self.trace_header.data_size
600    assert self.limit <= self.trace.size(), \
601        "Trace data limit exceeds trace file size"
602    self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
603    assert self.trace_header.attrs_size != 0, \
604        "No perf event attributes found in the trace"
605    perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
606                                                self.trace_header.attrs_offset)
607    self.sample_event_body_desc = self._SampleEventBodyDesc(
608        perf_event_attr.sample_type)
609    self.callchain_supported = \
610        (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
611    if self.callchain_supported:
612      self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
613      self.ip_size = ctypes.sizeof(self.ip_struct)
614
615  def ReadEventHeader(self):
616    if self.offset >= self.limit:
617      return None, 0
618    offset = self.offset
619    header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
620    self.offset += header.size
621    return header, offset
622
623  def ReadMmap(self, header, offset):
624    mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
625                                               offset + self.header_size)
626    # Read null-terminated filename.
627    filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
628                          offset + header.size]
629    mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
630    return mmap_info
631
632  def ReadMmap2(self, header, offset):
633    mmap_info = PERF_MMAP2_EVENT_BODY_DESC.Read(self.trace,
634                                                offset + self.header_size)
635    # Read null-terminated filename.
636    filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
637                          offset + header.size]
638    mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
639    return mmap_info
640
641  def ReadSample(self, header, offset):
642    sample = self.sample_event_body_desc.Read(self.trace,
643                                              offset + self.header_size)
644    if not self.callchain_supported:
645      return sample
646    sample.ips = []
647    offset += self.header_size + ctypes.sizeof(sample)
648    for _ in range(sample.nr):
649      sample.ips.append(
650        self.ip_struct.from_buffer(self.trace, offset).value)
651      offset += self.ip_size
652    return sample
653
654  def Dispose(self):
655    self.trace.close()
656    self.trace_file.close()
657
658  def _SampleEventBodyDesc(self, sample_type):
659    assert (sample_type & PERF_SAMPLE_READ) == 0, \
660           "Can't hande read format in samples"
661    fields = [(field, format)
662              for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
663              if (bit & sample_type) != 0]
664    return Descriptor(fields)
665
666
667OBJDUMP_SECTION_HEADER_RE = re.compile(
668  r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
669OBJDUMP_SYMBOL_LINE_RE = re.compile(
670  r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
671OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
672  r"^DYNAMIC SYMBOL TABLE")
673OBJDUMP_SKIP_RE = re.compile(
674  r"^.*ld\.so\.cache$")
675KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
676PERF_KERNEL_ALLSYMS_RE = re.compile(
677  r".*kallsyms.*")
678KERNEL_ALLSYMS_LINE_RE = re.compile(
679  r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
680
681
682class LibraryRepo(object):
683  def __init__(self):
684    self.infos = []
685    self.names = set()
686    self.ticks = {}
687
688
689  def HasDynamicSymbols(self, filename):
690    if filename.endswith(".ko"): return False
691    process = subprocess.Popen(
692      "%s -h %s" % (OBJDUMP_BIN, filename),
693      shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
694    pipe = process.stdout
695    try:
696      for line in pipe:
697        match = OBJDUMP_SECTION_HEADER_RE.match(line)
698        if match and match.group(1) == 'dynsym': return True
699    finally:
700      pipe.close()
701    assert process.wait() == 0, "Failed to objdump -h %s" % filename
702    return False
703
704
705  def Load(self, mmap_info, code_map, options):
706    # Skip kernel mmaps when requested using the fact that their tid
707    # is 0.
708    if mmap_info.tid == 0 and not options.kernel:
709      return True
710    if OBJDUMP_SKIP_RE.match(mmap_info.filename):
711      return True
712    if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
713      return self._LoadKernelSymbols(code_map)
714    self.infos.append(mmap_info)
715    mmap_info.ticks = 0
716    mmap_info.unique_name = self._UniqueMmapName(mmap_info)
717    if not os.path.exists(mmap_info.filename):
718      return True
719    # Request section headers (-h), symbols (-t), and dynamic symbols
720    # (-T) from objdump.
721    # Unfortunately, section headers span two lines, so we have to
722    # keep the just seen section name (from the first line in each
723    # section header) in the after_section variable.
724    if self.HasDynamicSymbols(mmap_info.filename):
725      dynamic_symbols = "-T"
726    else:
727      dynamic_symbols = ""
728    process = subprocess.Popen(
729      "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
730      shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
731    pipe = process.stdout
732    after_section = None
733    code_sections = set()
734    reloc_sections = set()
735    dynamic = False
736    try:
737      for line in pipe:
738        if after_section:
739          if line.find("CODE") != -1:
740            code_sections.add(after_section)
741          if line.find("RELOC") != -1:
742            reloc_sections.add(after_section)
743          after_section = None
744          continue
745
746        match = OBJDUMP_SECTION_HEADER_RE.match(line)
747        if match:
748          after_section = match.group(1)
749          continue
750
751        if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
752          dynamic = True
753          continue
754
755        match = OBJDUMP_SYMBOL_LINE_RE.match(line)
756        if match:
757          start_address = int(match.group(1), 16)
758          origin_offset = start_address
759          flags = match.group(2)
760          section = match.group(3)
761          if section in code_sections:
762            if dynamic or section in reloc_sections:
763              start_address += mmap_info.addr
764            size = int(match.group(4), 16)
765            name = match.group(5)
766            origin = mmap_info.filename
767            code_map.Add(Code(name, start_address, start_address + size,
768                              origin, origin_offset))
769    finally:
770      pipe.close()
771    assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
772
773  def Tick(self, pc):
774    for i, mmap_info in enumerate(self.infos):
775      if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
776        mmap_info.ticks += 1
777        self.infos[0], self.infos[i] = mmap_info, self.infos[0]
778        return True
779    return False
780
781  def _UniqueMmapName(self, mmap_info):
782    name = mmap_info.filename
783    index = 1
784    while name in self.names:
785      name = "%s-%d" % (mmap_info.filename, index)
786      index += 1
787    self.names.add(name)
788    return name
789
790  def _LoadKernelSymbols(self, code_map):
791    if not os.path.exists(KERNEL_ALLSYMS_FILE):
792      print("Warning: %s not found" % KERNEL_ALLSYMS_FILE, file=sys.stderr)
793      return False
794    kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
795    code = None
796    for line in kallsyms:
797      match = KERNEL_ALLSYMS_LINE_RE.match(line)
798      if match:
799        start_address = int(match.group(1), 16)
800        end_address = start_address
801        name = match.group(2)
802        if code:
803          code.end_address = start_address
804          code_map.Add(code, 16)
805        code = Code(name, start_address, end_address, "kernel", 0)
806    return True
807
808
809def PrintReport(code_map, library_repo, arch, ticks, options):
810  print("Ticks per symbol:")
811  used_code = [code for code in code_map.UsedCode()]
812  used_code.sort(key=lambda x: x.self_ticks, reverse=True)
813  for i, code in enumerate(used_code):
814    code_ticks = code.self_ticks
815    print("%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
816                                    code.FullName(), code.origin))
817    if options.disasm_all or i < options.disasm_top:
818      code.PrintAnnotated(arch, options)
819  print()
820  print("Ticks per library:")
821  mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
822  mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
823  for mmap_info in mmap_infos:
824    mmap_ticks = mmap_info.ticks
825    print("%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
826                               mmap_info.unique_name))
827
828
829def PrintDot(code_map, options):
830  print("digraph G {")
831  for code in code_map.UsedCode():
832    if code.self_ticks < 10:
833      continue
834    print("n%d [shape=box,label=\"%s\"];" % (code.id, code.name))
835    if code.callee_ticks:
836      for callee, ticks in code.callee_ticks.iteritems():
837        print("n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks))
838  print("}")
839
840
841if __name__ == "__main__":
842  parser = optparse.OptionParser(USAGE)
843  parser.add_option("--log",
844                    default="v8.log",
845                    help="V8 log file name [default: %default]")
846  parser.add_option("--trace",
847                    default="perf.data",
848                    help="perf trace file name [default: %default]")
849  parser.add_option("--kernel",
850                    default=False,
851                    action="store_true",
852                    help="process kernel entries [default: %default]")
853  parser.add_option("--disasm-top",
854                    default=0,
855                    type="int",
856                    help=("number of top symbols to disassemble and annotate "
857                          "[default: %default]"))
858  parser.add_option("--disasm-all",
859                    default=False,
860                    action="store_true",
861                    help=("disassemble and annotate all used symbols "
862                          "[default: %default]"))
863  parser.add_option("--dot",
864                    default=False,
865                    action="store_true",
866                    help="produce dot output (WIP) [default: %default]")
867  parser.add_option("--quiet", "-q",
868                    default=False,
869                    action="store_true",
870                    help="no auxiliary messages [default: %default]")
871  parser.add_option("--gc-fake-mmap",
872                    default="/tmp/__v8_gc__",
873                    help="gc fake mmap file [default: %default]")
874  parser.add_option("--objdump",
875                    default="/usr/bin/objdump",
876                    help="objdump tool to use [default: %default]")
877  parser.add_option("--host-root",
878                    default="",
879                    help="Path to the host root [default: %default]")
880  options, args = parser.parse_args()
881
882  if not options.quiet:
883    print("V8 log: %s, %s.ll" % (options.log, options.log))
884    print("Perf trace file: %s" % options.trace)
885
886  V8_GC_FAKE_MMAP = options.gc_fake_mmap
887  HOST_ROOT = options.host_root
888  if os.path.exists(options.objdump):
889    disasm.OBJDUMP_BIN = options.objdump
890    OBJDUMP_BIN = options.objdump
891  else:
892    print("Cannot find %s, falling back to default objdump" % options.objdump)
893
894  # Stats.
895  events = 0
896  ticks = 0
897  missed_ticks = 0
898  really_missed_ticks = 0
899  optimized_ticks = 0
900  generated_ticks = 0
901  v8_internal_ticks = 0
902  mmap_time = 0
903  sample_time = 0
904
905  # Initialize the log reader.
906  code_map = CodeMap()
907  log_reader = LogReader(log_name=options.log + ".ll",
908                         code_map=code_map)
909  if not options.quiet:
910    print("Generated code architecture: %s" % log_reader.arch)
911    print()
912    sys.stdout.flush()
913
914  # Process the code and trace logs.
915  library_repo = LibraryRepo()
916  log_reader.ReadUpToGC()
917  trace_reader = TraceReader(options.trace)
918  while True:
919    header, offset = trace_reader.ReadEventHeader()
920    if not header:
921      break
922    events += 1
923    if header.type == PERF_RECORD_MMAP:
924      start = time.time()
925      mmap_info = trace_reader.ReadMmap(header, offset)
926      if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
927        log_reader.ReadUpToGC()
928      else:
929        library_repo.Load(mmap_info, code_map, options)
930      mmap_time += time.time() - start
931    elif header.type == PERF_RECORD_MMAP2:
932      start = time.time()
933      mmap_info = trace_reader.ReadMmap2(header, offset)
934      if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
935        log_reader.ReadUpToGC()
936      else:
937        library_repo.Load(mmap_info, code_map, options)
938      mmap_time += time.time() - start
939    elif header.type == PERF_RECORD_SAMPLE:
940      ticks += 1
941      start = time.time()
942      sample = trace_reader.ReadSample(header, offset)
943      code = code_map.Find(sample.ip)
944      if code:
945        code.Tick(sample.ip)
946        if code.codetype == Code.OPTIMIZED:
947          optimized_ticks += 1
948        elif code.codetype == Code.FULL_CODEGEN:
949          generated_ticks += 1
950        elif code.codetype == Code.V8INTERNAL:
951          v8_internal_ticks += 1
952      else:
953        missed_ticks += 1
954      if not library_repo.Tick(sample.ip) and not code:
955        really_missed_ticks += 1
956      if trace_reader.callchain_supported:
957        for ip in sample.ips:
958          caller_code = code_map.Find(ip)
959          if caller_code:
960            if code:
961              caller_code.CalleeTick(code)
962            code = caller_code
963      sample_time += time.time() - start
964
965  if options.dot:
966    PrintDot(code_map, options)
967  else:
968    PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
969
970    if not options.quiet:
971      def PrintTicks(number, total, description):
972        print("%10d %5.1f%% ticks in %s" %
973              (number, 100.0*number/total, description))
974      print()
975      print("Stats:")
976      print("%10d total trace events" % events)
977      print("%10d total ticks" % ticks)
978      print("%10d ticks not in symbols" % missed_ticks)
979      unaccounted = "unaccounted ticks"
980      if really_missed_ticks > 0:
981        unaccounted += " (probably in the kernel, try --kernel)"
982      PrintTicks(really_missed_ticks, ticks, unaccounted)
983      PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
984      PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
985      PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
986      print("%10d total symbols" % len([c for c in code_map.AllCode()]))
987      print("%10d used symbols" % len([c for c in code_map.UsedCode()]))
988      print("%9.2fs library processing time" % mmap_time)
989      print("%9.2fs tick processing time" % sample_time)
990
991  log_reader.Dispose()
992  trace_reader.Dispose()
993