1#!/usr/bin/env python 2# 3# Copyright 2012 the V8 project authors. All rights reserved. 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: 7# 8# * Redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer. 10# * Redistributions in binary form must reproduce the above 11# copyright notice, this list of conditions and the following 12# disclaimer in the documentation and/or other materials provided 13# with the distribution. 14# * Neither the name of Google Inc. nor the names of its 15# contributors may be used to endorse or promote products derived 16# from this software without specific prior written permission. 17# 18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 31# for py2/py3 compatibility 32from __future__ import print_function 33 34import bisect 35import collections 36import ctypes 37import disasm 38import mmap 39import optparse 40import os 41import re 42import subprocess 43import sys 44import time 45 46 47USAGE="""usage: %prog [OPTION]... 48 49Analyses V8 and perf logs to produce profiles. 50 51Perf logs can be collected using a command like: 52 $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof 53 # -R: collect all data 54 # -e cycles: use cpu-cycles event (run "perf list" for details) 55 # -c 10000: write a sample after each 10000 events 56 # -f: force output file overwrite 57 # -i: limit profiling to our process and the kernel 58 # --ll-prof shell flag enables the right V8 logs 59This will produce a binary trace file (perf.data) that %prog can analyse. 60 61IMPORTANT: 62 The kernel has an internal maximum for events per second, it is 100K by 63 default. That's not enough for "-c 10000". Set it to some higher value: 64 $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate 65 You can also make the warning about kernel address maps go away: 66 $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict 67 68We have a convenience script that handles all of the above for you: 69 $ tools/run-llprof.sh ./d8 bench.js 70 71Examples: 72 # Print flat profile with annotated disassembly for the 10 top 73 # symbols. Use default log names. 74 $ %prog --disasm-top=10 75 76 # Print flat profile with annotated disassembly for all used symbols. 77 # Use default log names and include kernel symbols into analysis. 78 $ %prog --disasm-all --kernel 79 80 # Print flat profile. Use custom log names. 81 $ %prog --log=foo.log --trace=foo.data 82""" 83 84 85JS_ORIGIN = "js" 86 87 88class Code(object): 89 """Code object.""" 90 91 _id = 0 92 UNKNOWN = 0 93 V8INTERNAL = 1 94 FULL_CODEGEN = 2 95 OPTIMIZED = 3 96 97 def __init__(self, name, start_address, end_address, origin, origin_offset): 98 self.id = Code._id 99 Code._id += 1 100 self.name = name 101 self.other_names = None 102 self.start_address = start_address 103 self.end_address = end_address 104 self.origin = origin 105 self.origin_offset = origin_offset 106 self.self_ticks = 0 107 self.self_ticks_map = None 108 self.callee_ticks = None 109 if name.startswith("LazyCompile:*"): 110 self.codetype = Code.OPTIMIZED 111 elif name.startswith("LazyCompile:"): 112 self.codetype = Code.FULL_CODEGEN 113 elif name.startswith("v8::internal::"): 114 self.codetype = Code.V8INTERNAL 115 else: 116 self.codetype = Code.UNKNOWN 117 118 def AddName(self, name): 119 assert self.name != name 120 if self.other_names is None: 121 self.other_names = [name] 122 return 123 if not name in self.other_names: 124 self.other_names.append(name) 125 126 def FullName(self): 127 if self.other_names is None: 128 return self.name 129 self.other_names.sort() 130 return "%s (aka %s)" % (self.name, ", ".join(self.other_names)) 131 132 def IsUsed(self): 133 return self.self_ticks > 0 or self.callee_ticks is not None 134 135 def Tick(self, pc): 136 self.self_ticks += 1 137 if self.self_ticks_map is None: 138 self.self_ticks_map = collections.defaultdict(lambda: 0) 139 offset = pc - self.start_address 140 self.self_ticks_map[offset] += 1 141 142 def CalleeTick(self, callee): 143 if self.callee_ticks is None: 144 self.callee_ticks = collections.defaultdict(lambda: 0) 145 self.callee_ticks[callee] += 1 146 147 def PrintAnnotated(self, arch, options): 148 if self.self_ticks_map is None: 149 ticks_map = [] 150 else: 151 ticks_map = self.self_ticks_map.items() 152 # Convert the ticks map to offsets and counts arrays so that later 153 # we can do binary search in the offsets array. 154 ticks_map.sort(key=lambda t: t[0]) 155 ticks_offsets = [t[0] for t in ticks_map] 156 ticks_counts = [t[1] for t in ticks_map] 157 # Get a list of disassembled lines and their addresses. 158 lines = self._GetDisasmLines(arch, options) 159 if len(lines) == 0: 160 return 161 # Print annotated lines. 162 address = lines[0][0] 163 total_count = 0 164 for i in range(len(lines)): 165 start_offset = lines[i][0] - address 166 if i == len(lines) - 1: 167 end_offset = self.end_address - self.start_address 168 else: 169 end_offset = lines[i + 1][0] - address 170 # Ticks (reported pc values) are not always precise, i.e. not 171 # necessarily point at instruction starts. So we have to search 172 # for ticks that touch the current instruction line. 173 j = bisect.bisect_left(ticks_offsets, end_offset) 174 count = 0 175 for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])): 176 if offset < start_offset: 177 break 178 count += cnt 179 total_count += count 180 percent = 100.0 * count / self.self_ticks 181 offset = lines[i][0] 182 if percent >= 0.01: 183 # 5 spaces for tick count 184 # 1 space following 185 # 1 for '|' 186 # 1 space following 187 # 6 for the percentage number, incl. the '.' 188 # 1 for the '%' sign 189 # => 15 190 print("%5d | %6.2f%% %x(%d): %s" % (count, percent, offset, offset, lines[i][1])) 191 else: 192 print("%s %x(%d): %s" % (" " * 15, offset, offset, lines[i][1])) 193 print() 194 assert total_count == self.self_ticks, \ 195 "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self) 196 197 def __str__(self): 198 return "%s [0x%x, 0x%x) size: %d origin: %s" % ( 199 self.name, 200 self.start_address, 201 self.end_address, 202 self.end_address - self.start_address, 203 self.origin) 204 205 def _GetDisasmLines(self, arch, options): 206 if self.origin == JS_ORIGIN: 207 inplace = False 208 filename = options.log + ".ll" 209 else: 210 inplace = True 211 filename = self.origin 212 return disasm.GetDisasmLines(filename, 213 self.origin_offset, 214 self.end_address - self.start_address, 215 arch, 216 inplace) 217 218 219class CodePage(object): 220 """Group of adjacent code objects.""" 221 222 SHIFT = 20 # 1M pages 223 SIZE = (1 << SHIFT) 224 MASK = ~(SIZE - 1) 225 226 @staticmethod 227 def PageAddress(address): 228 return address & CodePage.MASK 229 230 @staticmethod 231 def PageId(address): 232 return address >> CodePage.SHIFT 233 234 @staticmethod 235 def PageAddressFromId(id): 236 return id << CodePage.SHIFT 237 238 def __init__(self, address): 239 self.address = address 240 self.code_objects = [] 241 242 def Add(self, code): 243 self.code_objects.append(code) 244 245 def Remove(self, code): 246 self.code_objects.remove(code) 247 248 def Find(self, pc): 249 code_objects = self.code_objects 250 for i, code in enumerate(code_objects): 251 if code.start_address <= pc < code.end_address: 252 code_objects[0], code_objects[i] = code, code_objects[0] 253 return code 254 return None 255 256 def __iter__(self): 257 return self.code_objects.__iter__() 258 259 260class CodeMap(object): 261 """Code object map.""" 262 263 def __init__(self): 264 self.pages = {} 265 self.min_address = 1 << 64 266 self.max_address = -1 267 268 def Add(self, code, max_pages=-1): 269 page_id = CodePage.PageId(code.start_address) 270 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 271 pages = 0 272 while page_id < limit_id: 273 if max_pages >= 0 and pages > max_pages: 274 print("Warning: page limit (%d) reached for %s [%s]" % ( 275 max_pages, code.name, code.origin), file=sys.stderr) 276 break 277 if page_id in self.pages: 278 page = self.pages[page_id] 279 else: 280 page = CodePage(CodePage.PageAddressFromId(page_id)) 281 self.pages[page_id] = page 282 page.Add(code) 283 page_id += 1 284 pages += 1 285 self.min_address = min(self.min_address, code.start_address) 286 self.max_address = max(self.max_address, code.end_address) 287 288 def Remove(self, code): 289 page_id = CodePage.PageId(code.start_address) 290 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 291 removed = False 292 while page_id < limit_id: 293 if page_id not in self.pages: 294 page_id += 1 295 continue 296 page = self.pages[page_id] 297 page.Remove(code) 298 removed = True 299 page_id += 1 300 return removed 301 302 def AllCode(self): 303 for page in self.pages.itervalues(): 304 for code in page: 305 if CodePage.PageAddress(code.start_address) == page.address: 306 yield code 307 308 def UsedCode(self): 309 for code in self.AllCode(): 310 if code.IsUsed(): 311 yield code 312 313 def Print(self): 314 for code in self.AllCode(): 315 print(code) 316 317 def Find(self, pc): 318 if pc < self.min_address or pc >= self.max_address: 319 return None 320 page_id = CodePage.PageId(pc) 321 if page_id not in self.pages: 322 return None 323 return self.pages[page_id].Find(pc) 324 325 326class CodeInfo(object): 327 """Generic info about generated code objects.""" 328 329 def __init__(self, arch, header_size): 330 self.arch = arch 331 self.header_size = header_size 332 333 334class LogReader(object): 335 """V8 low-level (binary) log reader.""" 336 337 _ARCH_TO_POINTER_TYPE_MAP = { 338 "ia32": ctypes.c_uint32, 339 "arm": ctypes.c_uint32, 340 "mips": ctypes.c_uint32, 341 "x64": ctypes.c_uint64, 342 "arm64": ctypes.c_uint64 343 } 344 345 _CODE_CREATE_TAG = "C" 346 _CODE_MOVE_TAG = "M" 347 _CODE_MOVING_GC_TAG = "G" 348 349 def __init__(self, log_name, code_map): 350 self.log_file = open(log_name, "r") 351 self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE) 352 self.log_pos = 0 353 self.code_map = code_map 354 355 self.arch = self.log[:self.log.find("\0")] 356 self.log_pos += len(self.arch) + 1 357 assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \ 358 "Unsupported architecture %s" % self.arch 359 pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch] 360 361 self.code_create_struct = LogReader._DefineStruct([ 362 ("name_size", ctypes.c_int32), 363 ("code_address", pointer_type), 364 ("code_size", ctypes.c_int32)]) 365 366 self.code_move_struct = LogReader._DefineStruct([ 367 ("from_address", pointer_type), 368 ("to_address", pointer_type)]) 369 370 self.code_delete_struct = LogReader._DefineStruct([ 371 ("address", pointer_type)]) 372 373 def ReadUpToGC(self): 374 while self.log_pos < self.log.size(): 375 tag = self.log[self.log_pos] 376 self.log_pos += 1 377 378 if tag == LogReader._CODE_MOVING_GC_TAG: 379 return 380 381 if tag == LogReader._CODE_CREATE_TAG: 382 event = self.code_create_struct.from_buffer(self.log, self.log_pos) 383 self.log_pos += ctypes.sizeof(event) 384 start_address = event.code_address 385 end_address = start_address + event.code_size 386 name = self.log[self.log_pos:self.log_pos + event.name_size] 387 origin = JS_ORIGIN 388 self.log_pos += event.name_size 389 origin_offset = self.log_pos 390 self.log_pos += event.code_size 391 code = Code(name, start_address, end_address, origin, origin_offset) 392 conficting_code = self.code_map.Find(start_address) 393 if conficting_code: 394 if not (conficting_code.start_address == code.start_address and 395 conficting_code.end_address == code.end_address): 396 self.code_map.Remove(conficting_code) 397 else: 398 LogReader._HandleCodeConflict(conficting_code, code) 399 # TODO(vitalyr): this warning is too noisy because of our 400 # attempts to reconstruct code log from the snapshot. 401 # print >>sys.stderr, \ 402 # "Warning: Skipping duplicate code log entry %s" % code 403 continue 404 self.code_map.Add(code) 405 continue 406 407 if tag == LogReader._CODE_MOVE_TAG: 408 event = self.code_move_struct.from_buffer(self.log, self.log_pos) 409 self.log_pos += ctypes.sizeof(event) 410 old_start_address = event.from_address 411 new_start_address = event.to_address 412 if old_start_address == new_start_address: 413 # Skip useless code move entries. 414 continue 415 code = self.code_map.Find(old_start_address) 416 if not code: 417 print("Warning: Not found %x" % old_start_address, file=sys.stderr) 418 continue 419 assert code.start_address == old_start_address, \ 420 "Inexact move address %x for %s" % (old_start_address, code) 421 self.code_map.Remove(code) 422 size = code.end_address - code.start_address 423 code.start_address = new_start_address 424 code.end_address = new_start_address + size 425 self.code_map.Add(code) 426 continue 427 428 assert False, "Unknown tag %s" % tag 429 430 def Dispose(self): 431 self.log.close() 432 self.log_file.close() 433 434 @staticmethod 435 def _DefineStruct(fields): 436 class Struct(ctypes.Structure): 437 _fields_ = fields 438 return Struct 439 440 @staticmethod 441 def _HandleCodeConflict(old_code, new_code): 442 assert (old_code.start_address == new_code.start_address and 443 old_code.end_address == new_code.end_address), \ 444 "Conficting code log entries %s and %s" % (old_code, new_code) 445 if old_code.name == new_code.name: 446 return 447 # Code object may be shared by a few functions. Collect the full 448 # set of names. 449 old_code.AddName(new_code.name) 450 451 452class Descriptor(object): 453 """Descriptor of a structure in the binary trace log.""" 454 455 CTYPE_MAP = { 456 "u16": ctypes.c_uint16, 457 "u32": ctypes.c_uint32, 458 "u64": ctypes.c_uint64 459 } 460 461 def __init__(self, fields): 462 class TraceItem(ctypes.Structure): 463 _fields_ = Descriptor.CtypesFields(fields) 464 465 def __str__(self): 466 return ", ".join("%s: %s" % (field, self.__getattribute__(field)) 467 for field, _ in TraceItem._fields_) 468 469 self.ctype = TraceItem 470 471 def Read(self, trace, offset): 472 return self.ctype.from_buffer(trace, offset) 473 474 @staticmethod 475 def CtypesFields(fields): 476 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields] 477 478 479# Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf 480# for the gory details. 481 482 483# Reference: struct perf_file_header in kernel/tools/perf/util/header.h 484TRACE_HEADER_DESC = Descriptor([ 485 ("magic", "u64"), 486 ("size", "u64"), 487 ("attr_size", "u64"), 488 ("attrs_offset", "u64"), 489 ("attrs_size", "u64"), 490 ("data_offset", "u64"), 491 ("data_size", "u64"), 492 ("event_types_offset", "u64"), 493 ("event_types_size", "u64") 494]) 495 496 497# Reference: /usr/include/linux/perf_event.h 498PERF_EVENT_ATTR_DESC = Descriptor([ 499 ("type", "u32"), 500 ("size", "u32"), 501 ("config", "u64"), 502 ("sample_period_or_freq", "u64"), 503 ("sample_type", "u64"), 504 ("read_format", "u64"), 505 ("flags", "u64"), 506 ("wakeup_events_or_watermark", "u32"), 507 ("bp_type", "u32"), 508 ("bp_addr", "u64"), 509 ("bp_len", "u64") 510]) 511 512 513# Reference: /usr/include/linux/perf_event.h 514PERF_EVENT_HEADER_DESC = Descriptor([ 515 ("type", "u32"), 516 ("misc", "u16"), 517 ("size", "u16") 518]) 519 520 521# Reference: kernel/tools/perf/util/event.h 522PERF_MMAP_EVENT_BODY_DESC = Descriptor([ 523 ("pid", "u32"), 524 ("tid", "u32"), 525 ("addr", "u64"), 526 ("len", "u64"), 527 ("pgoff", "u64") 528]) 529 530# Reference: kernel/tools/perf/util/event.h 531PERF_MMAP2_EVENT_BODY_DESC = Descriptor([ 532 ("pid", "u32"), 533 ("tid", "u32"), 534 ("addr", "u64"), 535 ("len", "u64"), 536 ("pgoff", "u64"), 537 ("maj", "u32"), 538 ("min", "u32"), 539 ("ino", "u64"), 540 ("ino_generation", "u64"), 541 ("prot", "u32"), 542 ("flags","u32") 543]) 544 545# perf_event_attr.sample_type bits control the set of 546# perf_sample_event fields. 547PERF_SAMPLE_IP = 1 << 0 548PERF_SAMPLE_TID = 1 << 1 549PERF_SAMPLE_TIME = 1 << 2 550PERF_SAMPLE_ADDR = 1 << 3 551PERF_SAMPLE_READ = 1 << 4 552PERF_SAMPLE_CALLCHAIN = 1 << 5 553PERF_SAMPLE_ID = 1 << 6 554PERF_SAMPLE_CPU = 1 << 7 555PERF_SAMPLE_PERIOD = 1 << 8 556PERF_SAMPLE_STREAM_ID = 1 << 9 557PERF_SAMPLE_RAW = 1 << 10 558 559 560# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE. 561PERF_SAMPLE_EVENT_BODY_FIELDS = [ 562 ("ip", "u64", PERF_SAMPLE_IP), 563 ("pid", "u32", PERF_SAMPLE_TID), 564 ("tid", "u32", PERF_SAMPLE_TID), 565 ("time", "u64", PERF_SAMPLE_TIME), 566 ("addr", "u64", PERF_SAMPLE_ADDR), 567 ("id", "u64", PERF_SAMPLE_ID), 568 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID), 569 ("cpu", "u32", PERF_SAMPLE_CPU), 570 ("res", "u32", PERF_SAMPLE_CPU), 571 ("period", "u64", PERF_SAMPLE_PERIOD), 572 # Don't want to handle read format that comes after the period and 573 # before the callchain and has variable size. 574 ("nr", "u64", PERF_SAMPLE_CALLCHAIN) 575 # Raw data follows the callchain and is ignored. 576] 577 578 579PERF_SAMPLE_EVENT_IP_FORMAT = "u64" 580 581 582PERF_RECORD_MMAP = 1 583PERF_RECORD_MMAP2 = 10 584PERF_RECORD_SAMPLE = 9 585 586 587class TraceReader(object): 588 """Perf (linux-2.6/tools/perf) trace file reader.""" 589 590 _TRACE_HEADER_MAGIC = 4993446653023372624 591 592 def __init__(self, trace_name): 593 self.trace_file = open(trace_name, "r") 594 self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE) 595 self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0) 596 if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC: 597 print("Warning: unsupported trace header magic", file=sys.stderr) 598 self.offset = self.trace_header.data_offset 599 self.limit = self.trace_header.data_offset + self.trace_header.data_size 600 assert self.limit <= self.trace.size(), \ 601 "Trace data limit exceeds trace file size" 602 self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype) 603 assert self.trace_header.attrs_size != 0, \ 604 "No perf event attributes found in the trace" 605 perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace, 606 self.trace_header.attrs_offset) 607 self.sample_event_body_desc = self._SampleEventBodyDesc( 608 perf_event_attr.sample_type) 609 self.callchain_supported = \ 610 (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 611 if self.callchain_supported: 612 self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT] 613 self.ip_size = ctypes.sizeof(self.ip_struct) 614 615 def ReadEventHeader(self): 616 if self.offset >= self.limit: 617 return None, 0 618 offset = self.offset 619 header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset) 620 self.offset += header.size 621 return header, offset 622 623 def ReadMmap(self, header, offset): 624 mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace, 625 offset + self.header_size) 626 # Read null-terminated filename. 627 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): 628 offset + header.size] 629 mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))] 630 return mmap_info 631 632 def ReadMmap2(self, header, offset): 633 mmap_info = PERF_MMAP2_EVENT_BODY_DESC.Read(self.trace, 634 offset + self.header_size) 635 # Read null-terminated filename. 636 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): 637 offset + header.size] 638 mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))] 639 return mmap_info 640 641 def ReadSample(self, header, offset): 642 sample = self.sample_event_body_desc.Read(self.trace, 643 offset + self.header_size) 644 if not self.callchain_supported: 645 return sample 646 sample.ips = [] 647 offset += self.header_size + ctypes.sizeof(sample) 648 for _ in range(sample.nr): 649 sample.ips.append( 650 self.ip_struct.from_buffer(self.trace, offset).value) 651 offset += self.ip_size 652 return sample 653 654 def Dispose(self): 655 self.trace.close() 656 self.trace_file.close() 657 658 def _SampleEventBodyDesc(self, sample_type): 659 assert (sample_type & PERF_SAMPLE_READ) == 0, \ 660 "Can't hande read format in samples" 661 fields = [(field, format) 662 for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS 663 if (bit & sample_type) != 0] 664 return Descriptor(fields) 665 666 667OBJDUMP_SECTION_HEADER_RE = re.compile( 668 r"^\s*\d+\s(\.\S+)\s+[a-f0-9]") 669OBJDUMP_SYMBOL_LINE_RE = re.compile( 670 r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$") 671OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile( 672 r"^DYNAMIC SYMBOL TABLE") 673OBJDUMP_SKIP_RE = re.compile( 674 r"^.*ld\.so\.cache$") 675KERNEL_ALLSYMS_FILE = "/proc/kallsyms" 676PERF_KERNEL_ALLSYMS_RE = re.compile( 677 r".*kallsyms.*") 678KERNEL_ALLSYMS_LINE_RE = re.compile( 679 r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$") 680 681 682class LibraryRepo(object): 683 def __init__(self): 684 self.infos = [] 685 self.names = set() 686 self.ticks = {} 687 688 689 def HasDynamicSymbols(self, filename): 690 if filename.endswith(".ko"): return False 691 process = subprocess.Popen( 692 "%s -h %s" % (OBJDUMP_BIN, filename), 693 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 694 pipe = process.stdout 695 try: 696 for line in pipe: 697 match = OBJDUMP_SECTION_HEADER_RE.match(line) 698 if match and match.group(1) == 'dynsym': return True 699 finally: 700 pipe.close() 701 assert process.wait() == 0, "Failed to objdump -h %s" % filename 702 return False 703 704 705 def Load(self, mmap_info, code_map, options): 706 # Skip kernel mmaps when requested using the fact that their tid 707 # is 0. 708 if mmap_info.tid == 0 and not options.kernel: 709 return True 710 if OBJDUMP_SKIP_RE.match(mmap_info.filename): 711 return True 712 if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename): 713 return self._LoadKernelSymbols(code_map) 714 self.infos.append(mmap_info) 715 mmap_info.ticks = 0 716 mmap_info.unique_name = self._UniqueMmapName(mmap_info) 717 if not os.path.exists(mmap_info.filename): 718 return True 719 # Request section headers (-h), symbols (-t), and dynamic symbols 720 # (-T) from objdump. 721 # Unfortunately, section headers span two lines, so we have to 722 # keep the just seen section name (from the first line in each 723 # section header) in the after_section variable. 724 if self.HasDynamicSymbols(mmap_info.filename): 725 dynamic_symbols = "-T" 726 else: 727 dynamic_symbols = "" 728 process = subprocess.Popen( 729 "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename), 730 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 731 pipe = process.stdout 732 after_section = None 733 code_sections = set() 734 reloc_sections = set() 735 dynamic = False 736 try: 737 for line in pipe: 738 if after_section: 739 if line.find("CODE") != -1: 740 code_sections.add(after_section) 741 if line.find("RELOC") != -1: 742 reloc_sections.add(after_section) 743 after_section = None 744 continue 745 746 match = OBJDUMP_SECTION_HEADER_RE.match(line) 747 if match: 748 after_section = match.group(1) 749 continue 750 751 if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line): 752 dynamic = True 753 continue 754 755 match = OBJDUMP_SYMBOL_LINE_RE.match(line) 756 if match: 757 start_address = int(match.group(1), 16) 758 origin_offset = start_address 759 flags = match.group(2) 760 section = match.group(3) 761 if section in code_sections: 762 if dynamic or section in reloc_sections: 763 start_address += mmap_info.addr 764 size = int(match.group(4), 16) 765 name = match.group(5) 766 origin = mmap_info.filename 767 code_map.Add(Code(name, start_address, start_address + size, 768 origin, origin_offset)) 769 finally: 770 pipe.close() 771 assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename 772 773 def Tick(self, pc): 774 for i, mmap_info in enumerate(self.infos): 775 if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len): 776 mmap_info.ticks += 1 777 self.infos[0], self.infos[i] = mmap_info, self.infos[0] 778 return True 779 return False 780 781 def _UniqueMmapName(self, mmap_info): 782 name = mmap_info.filename 783 index = 1 784 while name in self.names: 785 name = "%s-%d" % (mmap_info.filename, index) 786 index += 1 787 self.names.add(name) 788 return name 789 790 def _LoadKernelSymbols(self, code_map): 791 if not os.path.exists(KERNEL_ALLSYMS_FILE): 792 print("Warning: %s not found" % KERNEL_ALLSYMS_FILE, file=sys.stderr) 793 return False 794 kallsyms = open(KERNEL_ALLSYMS_FILE, "r") 795 code = None 796 for line in kallsyms: 797 match = KERNEL_ALLSYMS_LINE_RE.match(line) 798 if match: 799 start_address = int(match.group(1), 16) 800 end_address = start_address 801 name = match.group(2) 802 if code: 803 code.end_address = start_address 804 code_map.Add(code, 16) 805 code = Code(name, start_address, end_address, "kernel", 0) 806 return True 807 808 809def PrintReport(code_map, library_repo, arch, ticks, options): 810 print("Ticks per symbol:") 811 used_code = [code for code in code_map.UsedCode()] 812 used_code.sort(key=lambda x: x.self_ticks, reverse=True) 813 for i, code in enumerate(used_code): 814 code_ticks = code.self_ticks 815 print("%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks, 816 code.FullName(), code.origin)) 817 if options.disasm_all or i < options.disasm_top: 818 code.PrintAnnotated(arch, options) 819 print() 820 print("Ticks per library:") 821 mmap_infos = [m for m in library_repo.infos if m.ticks > 0] 822 mmap_infos.sort(key=lambda m: m.ticks, reverse=True) 823 for mmap_info in mmap_infos: 824 mmap_ticks = mmap_info.ticks 825 print("%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks, 826 mmap_info.unique_name)) 827 828 829def PrintDot(code_map, options): 830 print("digraph G {") 831 for code in code_map.UsedCode(): 832 if code.self_ticks < 10: 833 continue 834 print("n%d [shape=box,label=\"%s\"];" % (code.id, code.name)) 835 if code.callee_ticks: 836 for callee, ticks in code.callee_ticks.iteritems(): 837 print("n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)) 838 print("}") 839 840 841if __name__ == "__main__": 842 parser = optparse.OptionParser(USAGE) 843 parser.add_option("--log", 844 default="v8.log", 845 help="V8 log file name [default: %default]") 846 parser.add_option("--trace", 847 default="perf.data", 848 help="perf trace file name [default: %default]") 849 parser.add_option("--kernel", 850 default=False, 851 action="store_true", 852 help="process kernel entries [default: %default]") 853 parser.add_option("--disasm-top", 854 default=0, 855 type="int", 856 help=("number of top symbols to disassemble and annotate " 857 "[default: %default]")) 858 parser.add_option("--disasm-all", 859 default=False, 860 action="store_true", 861 help=("disassemble and annotate all used symbols " 862 "[default: %default]")) 863 parser.add_option("--dot", 864 default=False, 865 action="store_true", 866 help="produce dot output (WIP) [default: %default]") 867 parser.add_option("--quiet", "-q", 868 default=False, 869 action="store_true", 870 help="no auxiliary messages [default: %default]") 871 parser.add_option("--gc-fake-mmap", 872 default="/tmp/__v8_gc__", 873 help="gc fake mmap file [default: %default]") 874 parser.add_option("--objdump", 875 default="/usr/bin/objdump", 876 help="objdump tool to use [default: %default]") 877 parser.add_option("--host-root", 878 default="", 879 help="Path to the host root [default: %default]") 880 options, args = parser.parse_args() 881 882 if not options.quiet: 883 print("V8 log: %s, %s.ll" % (options.log, options.log)) 884 print("Perf trace file: %s" % options.trace) 885 886 V8_GC_FAKE_MMAP = options.gc_fake_mmap 887 HOST_ROOT = options.host_root 888 if os.path.exists(options.objdump): 889 disasm.OBJDUMP_BIN = options.objdump 890 OBJDUMP_BIN = options.objdump 891 else: 892 print("Cannot find %s, falling back to default objdump" % options.objdump) 893 894 # Stats. 895 events = 0 896 ticks = 0 897 missed_ticks = 0 898 really_missed_ticks = 0 899 optimized_ticks = 0 900 generated_ticks = 0 901 v8_internal_ticks = 0 902 mmap_time = 0 903 sample_time = 0 904 905 # Initialize the log reader. 906 code_map = CodeMap() 907 log_reader = LogReader(log_name=options.log + ".ll", 908 code_map=code_map) 909 if not options.quiet: 910 print("Generated code architecture: %s" % log_reader.arch) 911 print() 912 sys.stdout.flush() 913 914 # Process the code and trace logs. 915 library_repo = LibraryRepo() 916 log_reader.ReadUpToGC() 917 trace_reader = TraceReader(options.trace) 918 while True: 919 header, offset = trace_reader.ReadEventHeader() 920 if not header: 921 break 922 events += 1 923 if header.type == PERF_RECORD_MMAP: 924 start = time.time() 925 mmap_info = trace_reader.ReadMmap(header, offset) 926 if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP: 927 log_reader.ReadUpToGC() 928 else: 929 library_repo.Load(mmap_info, code_map, options) 930 mmap_time += time.time() - start 931 elif header.type == PERF_RECORD_MMAP2: 932 start = time.time() 933 mmap_info = trace_reader.ReadMmap2(header, offset) 934 if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP: 935 log_reader.ReadUpToGC() 936 else: 937 library_repo.Load(mmap_info, code_map, options) 938 mmap_time += time.time() - start 939 elif header.type == PERF_RECORD_SAMPLE: 940 ticks += 1 941 start = time.time() 942 sample = trace_reader.ReadSample(header, offset) 943 code = code_map.Find(sample.ip) 944 if code: 945 code.Tick(sample.ip) 946 if code.codetype == Code.OPTIMIZED: 947 optimized_ticks += 1 948 elif code.codetype == Code.FULL_CODEGEN: 949 generated_ticks += 1 950 elif code.codetype == Code.V8INTERNAL: 951 v8_internal_ticks += 1 952 else: 953 missed_ticks += 1 954 if not library_repo.Tick(sample.ip) and not code: 955 really_missed_ticks += 1 956 if trace_reader.callchain_supported: 957 for ip in sample.ips: 958 caller_code = code_map.Find(ip) 959 if caller_code: 960 if code: 961 caller_code.CalleeTick(code) 962 code = caller_code 963 sample_time += time.time() - start 964 965 if options.dot: 966 PrintDot(code_map, options) 967 else: 968 PrintReport(code_map, library_repo, log_reader.arch, ticks, options) 969 970 if not options.quiet: 971 def PrintTicks(number, total, description): 972 print("%10d %5.1f%% ticks in %s" % 973 (number, 100.0*number/total, description)) 974 print() 975 print("Stats:") 976 print("%10d total trace events" % events) 977 print("%10d total ticks" % ticks) 978 print("%10d ticks not in symbols" % missed_ticks) 979 unaccounted = "unaccounted ticks" 980 if really_missed_ticks > 0: 981 unaccounted += " (probably in the kernel, try --kernel)" 982 PrintTicks(really_missed_ticks, ticks, unaccounted) 983 PrintTicks(optimized_ticks, ticks, "ticks in optimized code") 984 PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code") 985 PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*") 986 print("%10d total symbols" % len([c for c in code_map.AllCode()])) 987 print("%10d used symbols" % len([c for c in code_map.UsedCode()])) 988 print("%9.2fs library processing time" % mmap_time) 989 print("%9.2fs tick processing time" % sample_time) 990 991 log_reader.Dispose() 992 trace_reader.Dispose() 993