1#! /usr/bin/env python3 2# 3# This Source Code Form is subject to the terms of the Mozilla Public 4# License, v. 2.0. If a copy of the MPL was not distributed with this 5# file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 7"""This script analyzes a JSON file emitted by DMD.""" 8 9from __future__ import absolute_import, print_function, division 10 11import argparse 12import collections 13import gzip 14import io 15import json 16import os 17import platform 18import re 19import shutil 20import sys 21import tempfile 22from bisect import bisect_right 23from functools import cmp_to_key 24 25# The DMD output version this script handles. 26outputVersion = 5 27 28# If --ignore-alloc-fns is specified, stack frames containing functions that 29# match these strings will be removed from the *start* of stack traces. (Once 30# we hit a non-matching frame, any subsequent frames won't be removed even if 31# they do match.) 32allocatorFns = [ 33 # Matches malloc, replace_malloc, moz_xmalloc, vpx_malloc, js_malloc, 34 # pod_malloc, malloc_zone_*, g_malloc. 35 "malloc", 36 # Matches calloc, replace_calloc, moz_xcalloc, vpx_calloc, js_calloc, 37 # pod_calloc, malloc_zone_calloc, pod_callocCanGC. 38 "calloc", 39 # Matches realloc, replace_realloc, moz_xrealloc, vpx_realloc, js_realloc, 40 # pod_realloc, pod_reallocCanGC. 41 "realloc", 42 # Matches memalign, posix_memalign, replace_memalign, replace_posix_memalign, 43 # moz_xmemalign, vpx_memalign, malloc_zone_memalign. 44 "memalign", 45 "operator new(", 46 "operator new[](", 47 "g_slice_alloc", 48 # This one necessary to fully filter some sequences of allocation functions 49 # that happen in practice. Note that ??? entries that follow non-allocation 50 # functions won't be stripped, as explained above. 51 "???", 52] 53 54 55def cmp(a, b): 56 return (a > b) - (a < b) 57 58 59class Record(object): 60 """A record is an aggregation of heap blocks that have identical stack 61 traces. It can also be used to represent the difference between two 62 records.""" 63 64 def __init__(self): 65 self.numBlocks = 0 66 self.reqSize = 0 67 self.slopSize = 0 68 self.usableSize = 0 69 self.allocatedAtDesc = None 70 self.reportedAtDescs = [] 71 self.usableSizes = collections.defaultdict(int) 72 73 def isZero(self, args): 74 return ( 75 self.numBlocks == 0 76 and self.reqSize == 0 77 and self.slopSize == 0 78 and self.usableSize == 0 79 and len(self.usableSizes) == 0 80 ) 81 82 def negate(self): 83 self.numBlocks = -self.numBlocks 84 self.reqSize = -self.reqSize 85 self.slopSize = -self.slopSize 86 self.usableSize = -self.usableSize 87 88 negatedUsableSizes = collections.defaultdict(int) 89 for usableSize, count in self.usableSizes.items(): 90 negatedUsableSizes[-usableSize] = count 91 self.usableSizes = negatedUsableSizes 92 93 def subtract(self, r): 94 # We should only be calling this on records with matching stack traces. 95 # Check this. 96 assert self.allocatedAtDesc == r.allocatedAtDesc 97 assert self.reportedAtDescs == r.reportedAtDescs 98 99 self.numBlocks -= r.numBlocks 100 self.reqSize -= r.reqSize 101 self.slopSize -= r.slopSize 102 self.usableSize -= r.usableSize 103 104 usableSizes1 = self.usableSizes 105 usableSizes2 = r.usableSizes 106 usableSizes3 = collections.defaultdict(int) 107 for usableSize in usableSizes1: 108 counts1 = usableSizes1[usableSize] 109 if usableSize in usableSizes2: 110 counts2 = usableSizes2[usableSize] 111 del usableSizes2[usableSize] 112 counts3 = counts1 - counts2 113 if counts3 != 0: 114 if counts3 < 0: 115 usableSize = -usableSize 116 counts3 = -counts3 117 usableSizes3[usableSize] = counts3 118 else: 119 usableSizes3[usableSize] = counts1 120 121 for usableSize in usableSizes2: 122 usableSizes3[-usableSize] = usableSizes2[usableSize] 123 124 self.usableSizes = usableSizes3 125 126 @staticmethod 127 def cmpByUsableSize(r1, r2): 128 # Sort by usable size, then by req size. 129 return cmp(abs(r1.usableSize), abs(r2.usableSize)) or Record.cmpByReqSize( 130 r1, r2 131 ) 132 133 @staticmethod 134 def cmpByReqSize(r1, r2): 135 # Sort by req size. 136 return cmp(abs(r1.reqSize), abs(r2.reqSize)) 137 138 @staticmethod 139 def cmpBySlopSize(r1, r2): 140 # Sort by slop size. 141 return cmp(abs(r1.slopSize), abs(r2.slopSize)) 142 143 @staticmethod 144 def cmpByNumBlocks(r1, r2): 145 # Sort by block counts, then by usable size. 146 return cmp(abs(r1.numBlocks), abs(r2.numBlocks)) or Record.cmpByUsableSize( 147 r1, r2 148 ) 149 150 151sortByChoices = { 152 "usable": Record.cmpByUsableSize, # the default 153 "req": Record.cmpByReqSize, 154 "slop": Record.cmpBySlopSize, 155 "num-blocks": Record.cmpByNumBlocks, 156} 157 158 159def parseCommandLine(): 160 # 24 is the maximum number of frames that DMD will produce. 161 def range_1_24(string): 162 value = int(string) 163 if value < 1 or value > 24: 164 msg = "{:s} is not in the range 1..24".format(string) 165 raise argparse.ArgumentTypeError(msg) 166 return value 167 168 description = """ 169Analyze heap data produced by DMD. 170If one file is specified, analyze it; if two files are specified, analyze the 171difference. 172Input files can be gzipped. 173Write to stdout unless -o/--output is specified. 174Stack traces are fixed to show function names, filenames and line numbers 175unless --no-fix-stacks is specified; stack fixing modifies the original file 176and may take some time. If specified, the BREAKPAD_SYMBOLS_PATH environment 177variable is used to find breakpad symbols for stack fixing. 178""" 179 p = argparse.ArgumentParser(description=description) 180 181 p.add_argument( 182 "-o", 183 "--output", 184 type=argparse.FileType("w"), 185 help="output file; stdout if unspecified", 186 ) 187 188 p.add_argument( 189 "-f", 190 "--max-frames", 191 type=range_1_24, 192 default=8, 193 help="maximum number of frames to consider in each trace", 194 ) 195 196 p.add_argument( 197 "-s", 198 "--sort-by", 199 choices=sortByChoices.keys(), 200 default="usable", 201 help="sort the records by a particular metric", 202 ) 203 204 p.add_argument( 205 "-a", 206 "--ignore-alloc-fns", 207 action="store_true", 208 help="ignore allocation functions at the start of traces", 209 ) 210 211 p.add_argument("--no-fix-stacks", action="store_true", help="do not fix stacks") 212 213 p.add_argument( 214 "--clamp-contents", 215 action="store_true", 216 help="for a scan mode log, clamp addresses to the start of live blocks, " 217 "or zero if not in one", 218 ) 219 220 p.add_argument( 221 "--print-clamp-stats", 222 action="store_true", 223 help="print information about the results of pointer clamping; mostly " 224 "useful for debugging clamping", 225 ) 226 227 p.add_argument( 228 "--filter-stacks-for-testing", 229 action="store_true", 230 help="filter stack traces; only useful for testing purposes", 231 ) 232 233 p.add_argument( 234 "--allocation-filter", 235 help="Only print entries that have a stack that matches the filter", 236 ) 237 238 p.add_argument("input_file", help="a file produced by DMD") 239 240 p.add_argument( 241 "input_file2", 242 nargs="?", 243 help="a file produced by DMD; if present, it is diff'd with input_file", 244 ) 245 246 return p.parse_args(sys.argv[1:]) 247 248 249# Fix stacks if necessary: first write the output to a tempfile, then replace 250# the original file with it. 251def fixStackTraces(inputFilename, isZipped, opener): 252 # This append() call is needed to make the import statements work when this 253 # script is installed as a symlink. 254 sys.path.append(os.path.dirname(__file__)) 255 256 bpsyms = os.environ.get("BREAKPAD_SYMBOLS_PATH", None) 257 sysname = platform.system() 258 if bpsyms and os.path.exists(bpsyms): 259 import fix_stacks as fixModule 260 261 def fix(line): 262 return fixModule.fixSymbols(line, jsonMode=True, breakpadSymsDir=bpsyms) 263 264 elif sysname in ("Linux", "Darwin", "Windows"): 265 import fix_stacks as fixModule 266 267 def fix(line): 268 return fixModule.fixSymbols(line, jsonMode=True) 269 270 else: 271 return 272 273 # Fix stacks, writing output to a temporary file, and then overwrite the 274 # original file. 275 tmpFile = tempfile.NamedTemporaryFile(delete=False) 276 277 # If the input is gzipped, then the output (written initially to |tmpFile|) 278 # should be gzipped as well. 279 # 280 # And we want to set its pre-gzipped filename to '' rather than the name of 281 # the temporary file, so that programs like the Unix 'file' utility don't 282 # say that it was called 'tmp6ozTxE' (or something like that) before it was 283 # zipped. So that explains the |filename=''| parameter. 284 # 285 # But setting the filename like that clobbers |tmpFile.name|, so we must 286 # get that now in order to move |tmpFile| at the end. 287 tmpFilename = tmpFile.name 288 if isZipped: 289 tmpFile = gzip.GzipFile(filename="", fileobj=tmpFile, mode="wb") 290 291 with opener(inputFilename, "rb") as inputFile: 292 for line in inputFile: 293 tmpFile.write(fix(line)) 294 295 tmpFile.close() 296 297 shutil.move(tmpFilename, inputFilename) 298 299 300def getDigestFromFile(args, inputFile): 301 # Handle gzipped input if necessary. 302 isZipped = inputFile.endswith(".gz") 303 opener = gzip.open if isZipped else open 304 305 # Fix stack traces unless otherwise instructed. 306 if not args.no_fix_stacks: 307 fixStackTraces(inputFile, isZipped, opener) 308 309 if args.clamp_contents: 310 clampBlockList(args, inputFile, isZipped, opener) 311 312 with opener(inputFile, "rb") as f: 313 j = json.load(f) 314 315 if j["version"] != outputVersion: 316 raise Exception("'version' property isn't '{:d}'".format(outputVersion)) 317 318 # Extract the main parts of the JSON object. 319 invocation = j["invocation"] 320 dmdEnvVar = invocation["dmdEnvVar"] 321 mode = invocation["mode"] 322 blockList = j["blockList"] 323 traceTable = j["traceTable"] 324 frameTable = j["frameTable"] 325 326 # Insert the necessary entries for unrecorded stack traces. Note that 'ut' 327 # and 'uf' will not overlap with any keys produced by DMD's 328 # ToIdStringConverter::Base32() function. 329 unrecordedTraceID = "ut" 330 unrecordedFrameID = "uf" 331 traceTable[unrecordedTraceID] = [unrecordedFrameID] 332 frameTable[ 333 unrecordedFrameID 334 ] = "#00: (no stack trace recorded due to --stacks=partial)" 335 336 # For the purposes of this script, 'scan' behaves like 'live'. 337 if mode == "scan": 338 mode = "live" 339 340 if mode not in ["live", "dark-matter", "cumulative"]: 341 raise Exception("bad 'mode' property: '{:s}'".format(mode)) 342 343 # Remove allocation functions at the start of traces. 344 if args.ignore_alloc_fns: 345 # Build a regexp that matches every function in allocatorFns. 346 escapedAllocatorFns = map(re.escape, allocatorFns) 347 fn_re = re.compile("|".join(escapedAllocatorFns)) 348 349 # Remove allocator fns from each stack trace. 350 for traceKey, frameKeys in traceTable.items(): 351 numSkippedFrames = 0 352 for frameKey in frameKeys: 353 frameDesc = frameTable[frameKey] 354 if re.search(fn_re, frameDesc): 355 numSkippedFrames += 1 356 else: 357 break 358 if numSkippedFrames > 0: 359 traceTable[traceKey] = frameKeys[numSkippedFrames:] 360 361 # Trim the number of frames. 362 for traceKey, frameKeys in traceTable.items(): 363 if len(frameKeys) > args.max_frames: 364 del frameKeys[args.max_frames :] 365 366 def buildTraceDescription(traceTable, frameTable, traceKey): 367 frameKeys = traceTable[traceKey] 368 fmt = " #{:02d}{:}" 369 370 if args.filter_stacks_for_testing: 371 # This option is used by `test_dmd.js`, which runs the code in 372 # `SmokeDMD.cpp`. When running that test, there is too much 373 # variation in the stack traces across different machines and 374 # platforms to do exact output matching. However, every stack trace 375 # should have at least three frames that contain `DMD` (in one of 376 # `DMD.cpp`, `SmokeDMD.cpp`, `SmokeDMD`, or `SmokeDMD.exe`). Some 377 # example frames from automation (where `..` indicates excised path 378 # segments): 379 # 380 # Linux debug, with stack fixing using breakpad syms: 381 # `#01: replace_realloc(void*, unsigned long) [../dmd/DMD.cpp:1110]` 382 # 383 # Linux opt, with native stack fixing: 384 # `#02: TestFull(char const*, int, char const*, int) (../dmd/test/SmokeDMD.cpp:165)` 385 # 386 # Mac opt, with native stack fixing: 387 # `#03: RunTests() (../build/tests/bin/SmokeDMD + 0x21f9)` 388 # 389 # Windows opt, with native stack fixing failing due to a missing PDB: 390 # `#04: ??? (..\\build\\tests\\bin\\SmokeDMD.exe + 0x1c58)` 391 # 392 # If we see three such frames, we replace the entire stack trace 393 # with a single, predictable frame. This imprecise matching will at 394 # least detect if stack fixing fails completely. 395 dmd_frame_matches = 0 396 for frameKey in frameKeys: 397 frameDesc = frameTable[frameKey] 398 if "DMD" in frameDesc: 399 dmd_frame_matches += 1 400 if dmd_frame_matches >= 3: 401 return [fmt.format(1, ": ... DMD.cpp ...")] 402 403 # The frame number is always '#00' (see DMD.h for why), so we have to 404 # replace that with the correct frame number. 405 desc = [] 406 for n, frameKey in enumerate(traceTable[traceKey], start=1): 407 desc.append(fmt.format(n, frameTable[frameKey][3:])) 408 return desc 409 410 # Aggregate blocks into records. All sufficiently similar blocks go into a 411 # single record. 412 413 if mode in ["live", "cumulative"]: 414 liveOrCumulativeRecords = collections.defaultdict(Record) 415 elif mode == "dark-matter": 416 unreportedRecords = collections.defaultdict(Record) 417 onceReportedRecords = collections.defaultdict(Record) 418 twiceReportedRecords = collections.defaultdict(Record) 419 420 heapUsableSize = 0 421 heapBlocks = 0 422 423 recordKeyPartCache = {} 424 425 for block in blockList: 426 # For each block we compute a |recordKey|, and all blocks with the same 427 # |recordKey| are aggregated into a single record. The |recordKey| is 428 # derived from the block's 'alloc' and 'reps' (if present) stack 429 # traces. 430 # 431 # We use frame descriptions (e.g. "#00: foo (X.cpp:99)") when comparing 432 # traces for equality. We can't use trace keys or frame keys because 433 # they're not comparable across different DMD runs (which is relevant 434 # when doing diffs). 435 # 436 # Using frame descriptions also fits in with the stack trimming done 437 # for --max-frames, which requires that stack traces with common 438 # beginnings but different endings to be considered equivalent. E.g. if 439 # we have distinct traces T1:[A:D1,B:D2,C:D3] and T2:[X:D1,Y:D2,Z:D4] 440 # and we trim the final frame of each they should be considered 441 # equivalent because the untrimmed frame descriptions (D1 and D2) 442 # match. 443 # 444 # Having said all that, during a single invocation of dmd.py on a 445 # single DMD file, for a single frameKey value the record key will 446 # always be the same, and we might encounter it 1000s of times. So we 447 # cache prior results for speed. 448 def makeRecordKeyPart(traceKey): 449 if traceKey in recordKeyPartCache: 450 return recordKeyPartCache[traceKey] 451 452 recordKeyPart = str( 453 list(map(lambda frameKey: frameTable[frameKey], traceTable[traceKey])) 454 ) 455 recordKeyPartCache[traceKey] = recordKeyPart 456 return recordKeyPart 457 458 allocatedAtTraceKey = block.get("alloc", unrecordedTraceID) 459 if mode in ["live", "cumulative"]: 460 recordKey = makeRecordKeyPart(allocatedAtTraceKey) 461 records = liveOrCumulativeRecords 462 elif mode == "dark-matter": 463 recordKey = makeRecordKeyPart(allocatedAtTraceKey) 464 if "reps" in block: 465 reportedAtTraceKeys = block["reps"] 466 for reportedAtTraceKey in reportedAtTraceKeys: 467 recordKey += makeRecordKeyPart(reportedAtTraceKey) 468 if len(reportedAtTraceKeys) == 1: 469 records = onceReportedRecords 470 else: 471 records = twiceReportedRecords 472 else: 473 records = unreportedRecords 474 475 record = records[recordKey] 476 477 if "req" not in block: 478 raise Exception("'req' property missing in block'") 479 480 reqSize = block["req"] 481 slopSize = block.get("slop", 0) 482 483 if "num" in block: 484 num = block["num"] 485 else: 486 num = 1 487 488 usableSize = reqSize + slopSize 489 heapUsableSize += num * usableSize 490 heapBlocks += num 491 492 record.numBlocks += num 493 record.reqSize += num * reqSize 494 record.slopSize += num * slopSize 495 record.usableSize += num * usableSize 496 if record.allocatedAtDesc is None: 497 record.allocatedAtDesc = buildTraceDescription( 498 traceTable, frameTable, allocatedAtTraceKey 499 ) 500 501 if mode in ["live", "cumulative"]: 502 pass 503 elif mode == "dark-matter": 504 if "reps" in block and record.reportedAtDescs == []: 505 506 def f(k): 507 return buildTraceDescription(traceTable, frameTable, k) 508 509 record.reportedAtDescs = list(map(f, reportedAtTraceKeys)) 510 record.usableSizes[usableSize] += num 511 512 # All the processed data for a single DMD file is called a "digest". 513 digest = {} 514 digest["dmdEnvVar"] = dmdEnvVar 515 digest["mode"] = mode 516 digest["heapUsableSize"] = heapUsableSize 517 digest["heapBlocks"] = heapBlocks 518 if mode in ["live", "cumulative"]: 519 digest["liveOrCumulativeRecords"] = liveOrCumulativeRecords 520 elif mode == "dark-matter": 521 digest["unreportedRecords"] = unreportedRecords 522 digest["onceReportedRecords"] = onceReportedRecords 523 digest["twiceReportedRecords"] = twiceReportedRecords 524 return digest 525 526 527def diffRecords(args, records1, records2): 528 records3 = {} 529 530 # Process records1. 531 for k in records1: 532 r1 = records1[k] 533 if k in records2: 534 # This record is present in both records1 and records2. 535 r2 = records2[k] 536 del records2[k] 537 r2.subtract(r1) 538 if not r2.isZero(args): 539 records3[k] = r2 540 else: 541 # This record is present only in records1. 542 r1.negate() 543 records3[k] = r1 544 545 for k in records2: 546 # This record is present only in records2. 547 records3[k] = records2[k] 548 549 return records3 550 551 552def diffDigests(args, d1, d2): 553 if d1["mode"] != d2["mode"]: 554 raise Exception("the input files have different 'mode' properties") 555 556 d3 = {} 557 d3["dmdEnvVar"] = (d1["dmdEnvVar"], d2["dmdEnvVar"]) 558 d3["mode"] = d1["mode"] 559 d3["heapUsableSize"] = d2["heapUsableSize"] - d1["heapUsableSize"] 560 d3["heapBlocks"] = d2["heapBlocks"] - d1["heapBlocks"] 561 if d1["mode"] in ["live", "cumulative"]: 562 d3["liveOrCumulativeRecords"] = diffRecords( 563 args, d1["liveOrCumulativeRecords"], d2["liveOrCumulativeRecords"] 564 ) 565 elif d1["mode"] == "dark-matter": 566 d3["unreportedRecords"] = diffRecords( 567 args, d1["unreportedRecords"], d2["unreportedRecords"] 568 ) 569 d3["onceReportedRecords"] = diffRecords( 570 args, d1["onceReportedRecords"], d2["onceReportedRecords"] 571 ) 572 d3["twiceReportedRecords"] = diffRecords( 573 args, d1["twiceReportedRecords"], d2["twiceReportedRecords"] 574 ) 575 return d3 576 577 578def printDigest(args, digest): 579 dmdEnvVar = digest["dmdEnvVar"] 580 mode = digest["mode"] 581 heapUsableSize = digest["heapUsableSize"] 582 heapBlocks = digest["heapBlocks"] 583 if mode in ["live", "cumulative"]: 584 liveOrCumulativeRecords = digest["liveOrCumulativeRecords"] 585 elif mode == "dark-matter": 586 unreportedRecords = digest["unreportedRecords"] 587 onceReportedRecords = digest["onceReportedRecords"] 588 twiceReportedRecords = digest["twiceReportedRecords"] 589 590 separator = "#" + "-" * 65 + "\n" 591 592 def number(n): 593 """Format a number with comma as a separator.""" 594 return "{:,d}".format(n) 595 596 def perc(m, n): 597 return 0 if n == 0 else (100 * m / n) 598 599 def plural(n): 600 return "" if n == 1 else "s" 601 602 # Prints to stdout, or to file if -o/--output was specified. 603 def out(*arguments, **kwargs): 604 print(*arguments, file=args.output, **kwargs) 605 606 def printStack(traceDesc): 607 for frameDesc in traceDesc: 608 out(frameDesc) 609 610 def printRecords(recordKind, records, heapUsableSize): 611 RecordKind = recordKind.capitalize() 612 out(separator) 613 numRecords = len(records) 614 cmpRecords = sortByChoices[args.sort_by] 615 sortedRecords = sorted( 616 records.values(), key=cmp_to_key(cmpRecords), reverse=True 617 ) 618 kindBlocks = 0 619 kindUsableSize = 0 620 maxRecord = 1000 621 622 if args.allocation_filter: 623 sortedRecords = list( 624 filter( 625 lambda x: any( 626 map(lambda y: args.allocation_filter in y, x.allocatedAtDesc) 627 ), 628 sortedRecords, 629 ) 630 ) 631 632 # First iteration: get totals, etc. 633 for record in sortedRecords: 634 kindBlocks += record.numBlocks 635 kindUsableSize += record.usableSize 636 637 # Second iteration: print. 638 if numRecords == 0: 639 out("# no {:} heap blocks\n".format(recordKind)) 640 641 kindCumulativeUsableSize = 0 642 for i, record in enumerate(sortedRecords, start=1): 643 # Stop printing at the |maxRecord|th record. 644 if i == maxRecord: 645 out( 646 "# {:}: stopping after {:,d} heap block records\n".format( 647 RecordKind, i 648 ) 649 ) 650 break 651 652 kindCumulativeUsableSize += record.usableSize 653 654 out(RecordKind + " {") 655 out( 656 " {:} block{:} in heap block record {:,d} of {:,d}".format( 657 number(record.numBlocks), plural(record.numBlocks), i, numRecords 658 ) 659 ) 660 out( 661 " {:} bytes ({:} requested / {:} slop)".format( 662 number(record.usableSize), 663 number(record.reqSize), 664 number(record.slopSize), 665 ) 666 ) 667 668 usableSizes = sorted( 669 record.usableSizes.items(), key=lambda x: abs(x[0]), reverse=True 670 ) 671 hasSingleBlock = len(usableSizes) == 1 and usableSizes[0][1] == 1 672 673 if not hasSingleBlock: 674 out(" Individual block sizes: ", end="") 675 if len(usableSizes) == 0: 676 out("(no change)", end="") 677 else: 678 isFirst = True 679 for usableSize, count in usableSizes: 680 if not isFirst: 681 out("; ", end="") 682 out("{:}".format(number(usableSize)), end="") 683 if count > 1: 684 out(" x {:,d}".format(count), end="") 685 isFirst = False 686 out() 687 688 out( 689 " {:4.2f}% of the heap ({:4.2f}% cumulative)".format( 690 perc(record.usableSize, heapUsableSize), 691 perc(kindCumulativeUsableSize, heapUsableSize), 692 ) 693 ) 694 if mode in ["live", "cumulative"]: 695 pass 696 elif mode == "dark-matter": 697 out( 698 " {:4.2f}% of {:} ({:4.2f}% cumulative)".format( 699 perc(record.usableSize, kindUsableSize), 700 recordKind, 701 perc(kindCumulativeUsableSize, kindUsableSize), 702 ) 703 ) 704 out(" Allocated at {") 705 printStack(record.allocatedAtDesc) 706 out(" }") 707 if mode in ["live", "cumulative"]: 708 pass 709 elif mode == "dark-matter": 710 for n, reportedAtDesc in enumerate(record.reportedAtDescs): 711 again = "again " if n > 0 else "" 712 out(" Reported {:}at {{".format(again)) 713 printStack(reportedAtDesc) 714 out(" }") 715 out("}\n") 716 717 return (kindUsableSize, kindBlocks) 718 719 def printInvocation(n, dmdEnvVar, mode): 720 out("Invocation{:} {{".format(n)) 721 if dmdEnvVar is None: 722 out(" $DMD is undefined") 723 else: 724 out(" $DMD = '" + dmdEnvVar + "'") 725 out(" Mode = '" + mode + "'") 726 out("}\n") 727 728 # Print command line. Strip dirs so the output is deterministic, which is 729 # needed for testing. 730 out(separator, end="") 731 out("# " + " ".join(map(os.path.basename, sys.argv)) + "\n") 732 733 # Print invocation(s). 734 if type(dmdEnvVar) is not tuple: 735 printInvocation("", dmdEnvVar, mode) 736 else: 737 printInvocation(" 1", dmdEnvVar[0], mode) 738 printInvocation(" 2", dmdEnvVar[1], mode) 739 740 # Print records. 741 if mode in ["live", "cumulative"]: 742 liveOrCumulativeUsableSize, liveOrCumulativeBlocks = printRecords( 743 mode, liveOrCumulativeRecords, heapUsableSize 744 ) 745 elif mode == "dark-matter": 746 twiceReportedUsableSize, twiceReportedBlocks = printRecords( 747 "twice-reported", twiceReportedRecords, heapUsableSize 748 ) 749 750 unreportedUsableSize, unreportedBlocks = printRecords( 751 "unreported", unreportedRecords, heapUsableSize 752 ) 753 754 onceReportedUsableSize, onceReportedBlocks = printRecords( 755 "once-reported", onceReportedRecords, heapUsableSize 756 ) 757 758 # Print summary. 759 out(separator) 760 out("Summary {") 761 if mode in ["live", "cumulative"]: 762 out( 763 " Total: {:} bytes in {:} blocks".format( 764 number(liveOrCumulativeUsableSize), number(liveOrCumulativeBlocks) 765 ) 766 ) 767 elif mode == "dark-matter": 768 fmt = " {:15} {:>12} bytes ({:6.2f}%) in {:>7} blocks ({:6.2f}%)" 769 out(fmt.format("Total:", number(heapUsableSize), 100, number(heapBlocks), 100)) 770 out( 771 fmt.format( 772 "Unreported:", 773 number(unreportedUsableSize), 774 perc(unreportedUsableSize, heapUsableSize), 775 number(unreportedBlocks), 776 perc(unreportedBlocks, heapBlocks), 777 ) 778 ) 779 out( 780 fmt.format( 781 "Once-reported:", 782 number(onceReportedUsableSize), 783 perc(onceReportedUsableSize, heapUsableSize), 784 number(onceReportedBlocks), 785 perc(onceReportedBlocks, heapBlocks), 786 ) 787 ) 788 out( 789 fmt.format( 790 "Twice-reported:", 791 number(twiceReportedUsableSize), 792 perc(twiceReportedUsableSize, heapUsableSize), 793 number(twiceReportedBlocks), 794 perc(twiceReportedBlocks, heapBlocks), 795 ) 796 ) 797 out("}\n") 798 799 800############################# 801# Pretty printer for DMD JSON 802############################# 803 804 805def prettyPrintDmdJson(out, j): 806 out.write("{\n") 807 808 out.write(' "version": {0},\n'.format(j["version"])) 809 out.write(' "invocation": ') 810 json.dump(j["invocation"], out, sort_keys=True) 811 out.write(",\n") 812 813 out.write(' "blockList": [') 814 first = True 815 for b in j["blockList"]: 816 out.write("" if first else ",") 817 out.write("\n ") 818 json.dump(b, out, sort_keys=True) 819 first = False 820 out.write("\n ],\n") 821 822 out.write(' "traceTable": {') 823 first = True 824 for k, l in j["traceTable"].items(): 825 out.write("" if first else ",") 826 out.write('\n "{0}": {1}'.format(k, json.dumps(l))) 827 first = False 828 out.write("\n },\n") 829 830 out.write(' "frameTable": {') 831 first = True 832 for k, v in j["frameTable"].items(): 833 out.write("" if first else ",") 834 out.write('\n "{0}": {1}'.format(k, json.dumps(v))) 835 first = False 836 out.write("\n }\n") 837 838 out.write("}\n") 839 840 841################################################################## 842# Code for clamping addresses using conservative pointer analysis. 843################################################################## 844 845# Start is the address of the first byte of the block, while end is 846# the address of the first byte after the final byte in the block. 847class AddrRange: 848 def __init__(self, block, length): 849 self.block = block 850 self.start = int(block, 16) 851 self.length = length 852 self.end = self.start + self.length 853 854 assert self.start > 0 855 assert length >= 0 856 857 858class ClampStats: 859 def __init__(self): 860 # Number of pointers already pointing to the start of a block. 861 self.startBlockPtr = 0 862 863 # Number of pointers pointing to the middle of a block. These 864 # are clamped to the start of the block they point into. 865 self.midBlockPtr = 0 866 867 # Number of null pointers. 868 self.nullPtr = 0 869 870 # Number of non-null pointers that didn't point into the middle 871 # of any blocks. These are clamped to null. 872 self.nonNullNonBlockPtr = 0 873 874 def clampedBlockAddr(self, sameAddress): 875 if sameAddress: 876 self.startBlockPtr += 1 877 else: 878 self.midBlockPtr += 1 879 880 def nullAddr(self): 881 self.nullPtr += 1 882 883 def clampedNonBlockAddr(self): 884 self.nonNullNonBlockPtr += 1 885 886 def log(self): 887 sys.stderr.write("Results:\n") 888 sys.stderr.write( 889 " Number of pointers already pointing to start of blocks: " 890 + str(self.startBlockPtr) 891 + "\n" 892 ) 893 sys.stderr.write( 894 " Number of pointers clamped to start of blocks: " 895 + str(self.midBlockPtr) 896 + "\n" 897 ) 898 sys.stderr.write( 899 " Number of non-null pointers not pointing into blocks " 900 "clamped to null: " + str(self.nonNullNonBlockPtr) + "\n" 901 ) 902 sys.stderr.write(" Number of null pointers: " + str(self.nullPtr) + "\n") 903 904 905# Search the block ranges array for a block that address points into. 906# The search is carried out in an array of starting addresses for each blocks 907# because it is faster. 908def clampAddress(blockRanges, blockStarts, clampStats, address): 909 i = bisect_right(blockStarts, address) 910 911 # Any addresses completely out of the range should have been eliminated already. 912 assert i > 0 913 r = blockRanges[i - 1] 914 assert r.start <= address 915 916 if address >= r.end: 917 assert address < blockRanges[i].start 918 clampStats.clampedNonBlockAddr() 919 return "0" 920 921 clampStats.clampedBlockAddr(r.start == address) 922 return r.block 923 924 925def clampBlockList(args, inputFileName, isZipped, opener): 926 # XXX This isn't very efficient because we end up reading and writing 927 # the file multiple times. 928 with opener(inputFileName, "rb") as f: 929 j = json.load(f) 930 931 if j["version"] != outputVersion: 932 raise Exception("'version' property isn't '{:d}'".format(outputVersion)) 933 934 # Check that the invocation is reasonable for contents clamping. 935 invocation = j["invocation"] 936 if invocation["mode"] != "scan": 937 raise Exception("Log was taken in mode " + invocation["mode"] + " not scan") 938 939 sys.stderr.write("Creating block range list.\n") 940 blockList = j["blockList"] 941 blockRanges = [] 942 for block in blockList: 943 blockRanges.append(AddrRange(block["addr"], block["req"])) 944 blockRanges.sort(key=lambda r: r.start) 945 946 # Make sure there are no overlapping blocks. 947 prevRange = blockRanges[0] 948 for currRange in blockRanges[1:]: 949 assert prevRange.end <= currRange.start 950 prevRange = currRange 951 952 sys.stderr.write("Clamping block contents.\n") 953 clampStats = ClampStats() 954 firstAddr = blockRanges[0].start 955 lastAddr = blockRanges[-1].end 956 957 blockStarts = [] 958 for r in blockRanges: 959 blockStarts.append(r.start) 960 961 for block in blockList: 962 # Small blocks don't have any contents. 963 if "contents" not in block: 964 continue 965 966 cont = block["contents"] 967 for i in range(len(cont)): 968 address = int(cont[i], 16) 969 970 if address == 0: 971 clampStats.nullAddr() 972 continue 973 974 # If the address is before the first block or after the last 975 # block then it can't be within a block. 976 if address < firstAddr or address >= lastAddr: 977 clampStats.clampedNonBlockAddr() 978 cont[i] = "0" 979 continue 980 981 cont[i] = clampAddress(blockRanges, blockStarts, clampStats, address) 982 983 # Remove any trailing nulls. 984 while len(cont) and cont[-1] == "0": 985 cont.pop() 986 987 if args.print_clamp_stats: 988 clampStats.log() 989 990 sys.stderr.write("Saving file.\n") 991 tmpFile = tempfile.NamedTemporaryFile(delete=False) 992 tmpFilename = tmpFile.name 993 if isZipped: 994 tmpFile = gzip.GzipFile(filename="", fileobj=tmpFile, mode="wb") 995 prettyPrintDmdJson(io.TextIOWrapper(tmpFile, encoding="utf-8"), j) 996 tmpFile.close() 997 shutil.move(tmpFilename, inputFileName) 998 999 1000def main(): 1001 args = parseCommandLine() 1002 digest = getDigestFromFile(args, args.input_file) 1003 if args.input_file2: 1004 digest2 = getDigestFromFile(args, args.input_file2) 1005 digest = diffDigests(args, digest, digest2) 1006 printDigest(args, digest) 1007 1008 1009if __name__ == "__main__": 1010 main() 1011