1#!/usr/bin/env python
2# Copyright 2016 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import argparse
7import bisect
8import collections
9import gzip
10import itertools
11import json
12import os
13import re
14import subprocess
15import sys
16import tempfile
17
18sys.path.append(os.path.abspath(os.path.join(
19    os.path.dirname(os.path.realpath(__file__)),
20    '..', '..', '..', 'tracing', 'tracing', 'extras')))
21sys.path.append(os.path.abspath(os.path.join(
22    os.path.dirname(os.path.realpath(__file__)),
23    '..', '..', '..', 'tracing', 'third_party', 'symbols')))
24# pylint: disable=import-error
25import symbols.elf_symbolizer as elf_symbolizer
26
27from symbolizer import symbolize_trace_atos_regex
28from symbolizer import symbolize_trace_macho_reader
29
30
31class MemoryMap(object):
32  """Represents 'process_mmaps' trace file entry."""
33
34  class Region(object):
35    def __init__(self, start_address, size, file_path):
36      self._start_address = start_address
37      self._size = size
38      self._file_path = file_path
39
40    @property
41    def start_address(self):
42      return self._start_address
43
44    @property
45    def end_address(self):
46      return self._start_address + self._size
47
48    @property
49    def size(self):
50      return self._size
51
52    @property
53    def file_path(self):
54      return self._file_path
55
56    def __cmp__(self, other):
57      if isinstance(other, type(self)):
58        other_start_address = other._start_address
59      elif isinstance(other, (long, int)):
60        other_start_address = other
61      else:
62        raise Exception('Cannot compare with %s' % type(other))
63      if self._start_address < other_start_address:
64        return -1
65      elif self._start_address > other_start_address:
66        return 1
67      else:
68        return 0
69
70    def __repr__(self):
71      return 'Region(0x{:X} - 0x{:X}, {})'.format(
72          self.start_address, self.end_address, self.file_path)
73
74  def __init__(self, process_mmaps_json):
75    regions = []
76    for region_json in process_mmaps_json['vm_regions']:
77      regions.append(self.Region(
78          long(region_json['sa'], 16),
79          long(region_json['sz'], 16),
80          region_json['mf']))
81    regions.sort()
82
83    # Copy regions without duplicates and check for overlaps.
84    self._regions = []
85    previous_region = None
86    for region in regions:
87      if previous_region is not None:
88        if region == previous_region:
89          continue
90        assert region.start_address >= previous_region.end_address, \
91            'Regions {} and {} overlap.'.format(previous_region, region)
92      previous_region = region
93      self._regions.append(region)
94
95  @property
96  def regions(self):
97    return self._regions
98
99  def FindRegion(self, address):
100    """Finds region containing |address|. Returns None if none found."""
101
102    region_index = bisect.bisect_right(self._regions, address) - 1
103    if region_index >= 0:
104      region = self._regions[region_index]
105      if address >= region.start_address and address < region.end_address:
106        return region
107    return None
108
109
110class UnsupportedHeapDumpVersionError(Exception):
111  def __init__(self, version):
112    message = 'Unsupported heap dump version: {}'.format(version)
113    super(UnsupportedHeapDumpVersionError, self).__init__(message)
114
115
116class StringMap(object):
117  def __init__(self):
118    self._modified = False
119    self._string_jsons = []
120    self._string_by_id = {}
121    self._id_by_string = {}
122    self._max_string_id = 0
123
124  @property
125  def modified(self):
126    return self._modified
127
128  @property
129  def string_by_id(self):
130    return self._string_by_id
131
132  def ParseMore(self, heap_dump_version, strings_json):
133    if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:
134      raise UnsupportedHeapDumpVersionError(heap_dump_version)
135
136    self._string_jsons.append(strings_json)
137    for string_json in strings_json:
138      self._Insert(string_json['id'], string_json['string'])
139
140  def Clear(self):
141    if self._string_by_id:
142      self._modified = True
143      self._string_by_id = {}
144      self._id_by_string = {}
145      self._Insert(0, '[null]')
146      self._max_string_id = 0
147
148  def AddString(self, string):
149    string_id = self._id_by_string.get(string)
150    if string_id is None:
151      string_id = self._max_string_id + 1
152      self._Insert(string_id, string)
153      self._modified = True
154    return string_id
155
156  def ApplyModifications(self):
157    if not self.modified:
158      return
159
160    assert self._string_jsons, 'no JSON nodes'
161
162    # Serialize into first JSON node, and clear all others.
163
164    for string_json in self._string_jsons:
165      string_json[:] = []
166    string_json = self._string_jsons[0]
167    for string_id, string in self._string_by_id.iteritems():
168      string_json.append({'id': string_id, 'string': string})
169
170    self._modified = False
171
172  def _Insert(self, string_id, string):
173    self._id_by_string[string] = string_id
174    self._string_by_id[string_id] = string
175    self._max_string_id = max(self._max_string_id, string_id)
176
177
178class TypeNameMap(object):
179  UNKNOWN_TYPE_ID = 0
180
181  def __init__(self):
182    self._modified = False
183    self._type_name_jsons = []
184    self._name_by_id = {}
185    self._id_by_name = {}
186    self._max_type_id = 0
187
188  @property
189  def modified(self):
190    return self._modified
191
192  @property
193  def name_by_id(self):
194    return self._name_by_id
195
196  def ParseMore(self, heap_dump_version, type_name_json, string_map):
197    if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:
198      raise UnsupportedHeapDumpVersionError(heap_dump_version)
199
200    self._type_name_jsons.append(type_name_json)
201    for type_json in type_name_json:
202      self._Insert(type_json['id'],
203                   string_map.string_by_id[type_json['name_sid']])
204
205  def AddType(self, type_name):
206    type_id = self._id_by_name.get(type_name)
207    if type_id is None:
208      type_id = self._max_type_id + 1
209      self._Insert(type_id, type_name)
210      self._modified = True
211    return type_id
212
213  def ApplyModifications(self, string_map, force=False):
214    if not self.modified and not force:
215      return
216
217    assert self._type_name_jsons, 'no JSON nodes'
218
219    # Serialize into first JSON node, and clear all others.
220
221    for types_json in self._type_name_jsons:
222      types_json[:] = []
223    types_json = self._type_name_jsons[0]
224    for type_id, type_name in self._name_by_id.iteritems():
225      types_json.append({
226          'id': type_id,
227          'name_sid': string_map.AddString(type_name)})
228
229    self._modified = False
230
231  def _Insert(self, type_id, type_name):
232    self._id_by_name[type_name] = type_id
233    self._name_by_id[type_id] = type_name
234    self._max_type_id = max(self._max_type_id, type_id)
235
236
237class StackFrameMap(object):
238  class Frame(object):
239    def __init__(self, frame_id, name, parent_frame_id):
240      self._modified = False
241      self._id = frame_id
242      self._name = name
243      self._pc = self._ParsePC(name)
244      self._parent_id = parent_frame_id
245      self._parent = None
246      self._ext = None
247
248    @property
249    def modified(self):
250      return self._modified
251
252    @property
253    def id(self):
254      return self._id
255
256    @property
257    def pc(self):
258      return self._pc
259
260    @property
261    def name(self):
262      return self._name
263
264    @name.setter
265    def name(self, value):
266      self._modified = True
267      self._name = value
268
269    @property
270    def parent_id(self):
271      return self._parent_id
272
273    @property
274    def parent(self):
275      return self._parent
276
277    @property
278    def ext(self):
279      if self._ext is None:
280        self._ext = self._ExtraProperties()
281      return self._ext
282
283    _PC_TAG = 'pc:'
284
285    class _ExtraProperties(object):
286      def __getattr__(self, name):
287        return None
288
289    def _ParsePC(self, name):
290      if not name.startswith(self._PC_TAG):
291        return None
292      return long(name[len(self._PC_TAG):], 16)
293
294    def _ClearModified(self):
295      self._modified = False
296
297    def _ResolveParent(self, parent):
298      assert parent.id == self._parent_id, 'wrong parent'
299      self._parent = parent
300
301    def _ChangeParent(self, parent):
302      self._parent = parent
303      self._parent_id = parent.id
304      self._modified = True
305
306  def __init__(self):
307    self._modified = False
308    self._heap_dump_version = None
309    self._stack_frames_jsons = []
310    self._frame_by_id = {}
311    self._max_frame_id = 0
312
313  @property
314  def modified(self):
315    return (self._modified or
316            any(f.modified for f in self._frame_by_id.itervalues()))
317
318  @property
319  def frame_by_id(self):
320    return self._frame_by_id
321
322  def ParseMore(self, heap_dump_version, stack_frames_json, string_map):
323    frame_by_id = {}
324    if heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
325      if self._stack_frames_jsons:
326        raise Exception('Legacy stack frames are expected only once.')
327      for frame_id, frame_json in stack_frames_json.iteritems():
328        frame = self.Frame(frame_id,
329                           frame_json['name'],
330                           frame_json.get('parent'))
331        frame_by_id[frame.id] = frame
332    else:
333      if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:
334        raise UnsupportedHeapDumpVersionError(heap_dump_version)
335      for frame_json in stack_frames_json:
336        frame = self.Frame(frame_json['id'],
337                           string_map.string_by_id[frame_json['name_sid']],
338                           frame_json.get('parent'))
339        frame_by_id[frame.id] = frame
340
341    self._heap_dump_version = heap_dump_version
342    self._stack_frames_jsons.append(stack_frames_json)
343
344    for frame in frame_by_id.itervalues():
345      if frame.parent_id:
346        parent = frame_by_id.get(frame.parent_id)
347        if not parent:
348          # Parent was added by previous ParseMore() call
349          parent = self._frame_by_id[frame.parent_id]
350        frame._ResolveParent(parent)
351      self._frame_by_id[frame.id] = frame
352      self._max_frame_id = max(frame.id, self._max_frame_id)
353
354  def AddFrame(self, name, parent_frame):
355    self._max_frame_id += 1
356    parent_id = None if parent_frame is None else parent_frame.id
357    frame = self.Frame(self._max_frame_id, name, parent_id)
358    if parent_frame is not None:
359      frame._ResolveParent(parent_frame)
360    self._frame_by_id[frame.id] = frame
361    self._modified = True
362    return frame
363
364  def MergeFrames(self, get_frame_key):
365    """ On each level, merges frames with similar keys.
366
367    This method builds a frame tree and then for each node merges children
368    with similar keys, as returned by |get_frame_key|. Keys are arbitrary
369    objects.
370
371    If |get_frame_key| returns None, then the corresponding tree branch
372    removed (i.e. it removes the frame and all child frames, recursively).
373
374    The method returns 'merged_frames_by_frame', which is a dictionary that
375    maps frame to all frames that were merged into it. All removed frames are
376    added under None key.
377    """
378    class _Node(object):
379      def __init__(self, frame):
380        self.marked = False
381        self.frame = frame
382        self.children = []
383
384      def Mark(self, marked_nodes=None):
385        self.marked = True
386        if marked_nodes is not None:
387          marked_nodes.append(self)
388        for child in self.children:
389          child.Mark(marked_nodes)
390
391    # TODO(dskiba): use BuildFrameTree() instead
392    node_by_id = {}
393    def _NodeForFrame(frame):
394      node = node_by_id.get(frame.id)
395      if node is None:
396        node = _Node(frame)
397        node_by_id[frame.id] = node
398      return node
399
400    root_node = _Node(None)
401    for frame in self._frame_by_id.itervalues():
402      if frame.parent is None:
403        root_node.children.append(_NodeForFrame(frame))
404      else:
405        parent_node = _NodeForFrame(frame.parent)
406        parent_node.children.append(_NodeForFrame(frame))
407
408    merged_frames_by_frame = collections.defaultdict(list)
409    def _MergeChildren(node):
410      children_by_key = collections.defaultdict(list)
411      for child in node.children:
412        key = get_frame_key(child.frame)
413        if key is None:
414          marked_nodes = []
415          child.Mark(marked_nodes)
416          removed_frames = merged_frames_by_frame[None]
417          removed_frames.extend(n.frame for n in marked_nodes)
418        else:
419          children_by_key[key].append(child)
420      node.children = []
421      for children in children_by_key.itervalues():
422        child = children[0]
423        node.children.append(child)
424        if len(children) > 1:
425          merged_frames = merged_frames_by_frame[child.frame]
426          for dupchild in children[1:]:
427            for grandchild in dupchild.children:
428              grandchild.frame._ChangeParent(child.frame)
429              child.children.append(grandchild)
430            dupchild.children = []
431            dupchild.Mark()
432            merged_frames.append(dupchild.frame)
433        _MergeChildren(child)
434
435    _MergeChildren(root_node)
436
437    if merged_frames_by_frame:
438      self._frame_by_id = {i:n.frame for i, n in node_by_id.iteritems()
439                           if not n.marked}
440      self._modified = True
441
442    return merged_frames_by_frame
443
444  def ApplyModifications(self, string_map, force=False):
445    if not self.modified and not force:
446      return
447
448    assert self._stack_frames_jsons, 'no JSON nodes'
449    if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
450      assert string_map is None, \
451          'string_map should not be used with the legacy format'
452
453    # Serialize frames into first JSON node, and clear all others.
454
455    for frames_json in self._stack_frames_jsons:
456      if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
457        frames_json.clear()
458      else:
459        frames_json[:] = []
460
461    frames_json = self._stack_frames_jsons[0]
462    for frame in self._frame_by_id.itervalues():
463      if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
464        frame_json = {'name': frame.name}
465        frames_json[frame.id] = frame_json
466      else:
467        frame_json = {
468            'id': frame.id,
469            'name_sid': string_map.AddString(frame.name)
470        }
471        frames_json.append(frame_json)
472      if frame.parent_id is not None:
473        frame_json['parent'] = frame.parent_id
474      frame._ClearModified()
475
476    self._modified = False
477
478  def BuildFrameTree(self, node_type):
479    """ Creates a frame tree using provided node type.
480
481    |node_type| is expected to have:
482    1. __init__(self, frame)
483    2. 'children' array
484
485    The function returns tuple (root_node, node_by_frame_id).
486    """
487
488    node_by_id = {}
489    def _NodeForFrame(frame):
490      node = node_by_id.get(frame.id)
491      if node is None:
492        node = node_type(frame)
493        node_by_id[frame.id] = node
494      return node
495
496    root_node = node_type(None)
497    for frame in self._frame_by_id.itervalues():
498      if frame.parent is None:
499        root_node.children.append(_NodeForFrame(frame))
500      else:
501        parent_node = _NodeForFrame(frame.parent)
502        parent_node.children.append(_NodeForFrame(frame))
503
504    return (root_node, node_by_id)
505
506
507class HeapProfile(object):
508  EntryKey = collections.namedtuple(
509      'EntryKey',
510      ['stack_frame_id', 'type_name_id'])
511
512  class Entry(object):
513    def __init__(self, key, mapped_value_by_name, numeric_value_by_name):
514      self._key = key
515      self._mapped_value_by_name = mapped_value_by_name
516      self._numeric_value_by_name = numeric_value_by_name
517
518    @property
519    def key(self):
520      return self._key
521
522    @property
523    def stack_frame_id(self):
524      return self._key.stack_frame_id
525
526    @property
527    def type_name_id(self):
528      return self._key.type_name_id
529
530    @property
531    def size(self):
532      return self._numeric_value_by_name.get(
533          HeapProfile._SIZES_JSON_VALUE_NAME)
534
535    def _AddValuesFrom(self, entry):
536      self._mapped_value_by_name.clear()
537      for name, value in entry._numeric_value_by_name.iteritems():
538        value += self._numeric_value_by_name.get(name, 0)
539        self._numeric_value_by_name[name] = value
540
541  def __init__(self, allocator_name, entries_json, mapped_entry_names):
542    self._modified = False
543    self._allocator_name = allocator_name
544    self._entries_json = entries_json
545    self._entries = []
546    for values in zip(*entries_json.itervalues()):
547      stack_frame_id = None
548      type_name_id = None
549      mapped_value_by_name = {}
550      numeric_value_by_name = {}
551      for index, name in enumerate(entries_json.iterkeys()):
552        value = values[index]
553        if name == self._STACK_FRAME_ID_JSON_VALUE_NAME:
554          stack_frame_id = value
555        elif name == self._TYPE_NAME_ID_JSON_VALUE_NAME:
556          type_name_id = value
557        elif name in mapped_entry_names:
558          mapped_value_by_name[name] = value
559        else:
560          numeric_value_by_name[name] = value
561      entry = self.Entry(self.EntryKey(stack_frame_id, type_name_id),
562                         mapped_value_by_name, numeric_value_by_name)
563      self._entries.append(entry)
564
565  @property
566  def modified(self):
567    return self._modified
568
569  @property
570  def allocator_name(self):
571    return self._allocator_name
572
573  @property
574  def entries(self):
575    return self._entries
576
577  def AddEntry(self, entry_key):
578    entry = self.Entry(entry_key, {}, {})
579    self._entries.append(entry)
580    self._modified = True
581    return entry
582
583  def MergeEntries(self, get_entry_key):
584    entry_by_key = {}
585    for entry in self._entries:
586      new_key = get_entry_key(entry)
587      new_entry = entry_by_key.get(new_key)
588      if new_entry is None:
589        entry_by_key[new_key] = entry
590      else:
591        new_entry._AddValuesFrom(entry)
592
593    if len(self._entries) != len(entry_by_key):
594      # This means that we hit _AddValuesFrom() above at least once,
595      # i.e. merged at least one entry.
596      self._entries = []
597      for key, entry in entry_by_key.iteritems():
598        entry._key = key
599        self._entries.append(entry)
600      self._modified = True
601
602  def ApplyModifications(self):
603    if not self.modified:
604      return
605
606    mapped_value_names = set()
607    numeric_value_names = set()
608    for entry in self._entries:
609      mapped_value_names.update(entry._mapped_value_by_name.iterkeys())
610      numeric_value_names.update(entry._numeric_value_by_name.iterkeys())
611
612    def _AddJSONValue(name, value):
613      values = self._entries_json.get(name)
614      if values is None:
615        values = []
616        self._entries_json[name] = values
617      values.append(value)
618
619    self._entries_json.clear()
620    for entry in self._entries:
621      _AddJSONValue(self._STACK_FRAME_ID_JSON_VALUE_NAME, entry.stack_frame_id)
622      _AddJSONValue(self._TYPE_NAME_ID_JSON_VALUE_NAME, entry.type_name_id)
623      for name in mapped_value_names:
624        value = entry._mapped_value_by_name[name]
625        _AddJSONValue(name, value)
626      for name in numeric_value_names:
627        value = entry._numeric_value_by_name[name]
628        _AddJSONValue(name, value)
629
630    self._modified = False
631
632  _STACK_FRAME_ID_JSON_VALUE_NAME = 'nodes'
633  _TYPE_NAME_ID_JSON_VALUE_NAME = 'types'
634  _SIZES_JSON_VALUE_NAME = 'sizes'
635
636
637class MemoryDump(object):
638  def __init__(self, allocators_json, mapped_entry_names):
639    self._profiles = []
640    for allocator_name, entries_json in allocators_json.iteritems():
641      profile = HeapProfile(allocator_name, entries_json, mapped_entry_names)
642      self._profiles.append(profile)
643
644  @property
645  def modified(self):
646    return any(p.modified for p in self.profiles)
647
648  @property
649  def profiles(self):
650    return self._profiles
651
652  def ApplyModifications(self):
653    for profile in self._profiles:
654      profile.ApplyModifications()
655
656
657class Trace(object):
658
659  HEAP_DUMP_VERSION_LEGACY = 'Legacy'
660  HEAP_DUMP_VERSION_1 = 1
661
662  class Process(object):
663    def __init__(self, pid):
664      self._pid = pid
665      self._name = None
666      self._memory_map = None
667      self._memory_dumps = []
668      self._stack_frame_map = StackFrameMap()
669      self._type_name_map = TypeNameMap()
670      self._string_map = StringMap()
671      self._heap_dump_version = None
672
673    @property
674    def modified(self):
675      return (self._stack_frame_map.modified or
676              self._type_name_map.modified or
677              any(d.modified for d in self._memory_dumps))
678
679    @property
680    def pid(self):
681      return self._pid
682
683    @property
684    def name(self):
685      return self._name
686
687    @property
688    def unique_name(self):
689      name = self._name if self._name else 'UnnamedProcess'
690      return '{}({})'.format(name, self._pid)
691
692    @property
693    def memory_map(self):
694      return self._memory_map
695
696    @property
697    def memory_dumps(self):
698      return self._memory_dumps
699
700    @property
701    def stack_frame_map(self):
702      return self._stack_frame_map
703
704    @property
705    def type_name_map(self):
706      return self._type_name_map
707
708    def ApplyModifications(self):
709      if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
710        self._stack_frame_map.ApplyModifications(None)
711      else:
712        if self._stack_frame_map.modified or self._type_name_map.modified:
713          self._string_map.Clear()
714          self._stack_frame_map.ApplyModifications(self._string_map, force=True)
715          self._type_name_map.ApplyModifications(self._string_map, force=True)
716          self._string_map.ApplyModifications()
717        for dump in self._memory_dumps:
718          dump.ApplyModifications()
719
720  def __init__(self, trace_json):
721    self._trace_json = trace_json
722    self._processes = []
723    self._heap_dump_version = None
724
725    # Misc per-process information needed only during parsing.
726    class ProcessExt(object):
727      def __init__(self, pid):
728        self.process = Trace.Process(pid)
729        self.mapped_entry_names = set()
730        self.process_mmaps_json = None
731        self.seen_strings_json = False
732
733    process_ext_by_pid = {}
734
735    # Android traces produced via 'chrome://inspect/?tracing#devices' are
736    # just list of events.
737    events = trace_json if isinstance(trace_json, list) \
738             else trace_json['traceEvents']
739    for event in events:
740      name = event.get('name')
741      if not name:
742        continue
743
744      pid = event['pid']
745      process_ext = process_ext_by_pid.get(pid)
746      if process_ext is None:
747        process_ext = ProcessExt(pid)
748        process_ext_by_pid[pid] = process_ext
749      process = process_ext.process
750
751      phase = event['ph']
752      if phase == self._EVENT_PHASE_METADATA:
753        if name == 'process_name':
754          process._name = event['args']['name']
755        elif name == 'stackFrames':
756          process._stack_frame_map.ParseMore(
757              self._UseHeapDumpVersion(self.HEAP_DUMP_VERSION_LEGACY),
758              event['args']['stackFrames'],
759              process._string_map)
760      elif phase == self._EVENT_PHASE_MEMORY_DUMP:
761        dumps = event['args']['dumps']
762        process_mmaps = dumps.get('process_mmaps')
763        if process_mmaps:
764          # We want the most recent memory map, so parsing happens later
765          # once we finished reading all events.
766          process_ext.process_mmaps_json = process_mmaps
767        heaps = dumps.get('heaps_v2')
768        if heaps:
769          version = self._UseHeapDumpVersion(heaps['version'])
770          maps = heaps.get('maps')
771          if maps:
772            process_ext.mapped_entry_names.update(maps.iterkeys())
773            types = maps.get('types')
774            stack_frames = maps.get('nodes')
775            strings = maps.get('strings')
776            if (strings is None and (types or stack_frames)
777                and not process_ext.seen_strings_json):
778              # ApplyModifications() for TypeNameMap and StackFrameMap puts
779              # everything into the first node and depends on StringMap. So
780              # we need to make sure that 'strings' node is there if any of
781              # other two nodes present.
782              strings = []
783              maps['strings'] = strings
784            if strings is not None:
785              process_ext.seen_strings_json = True
786              process._string_map.ParseMore(version, strings)
787            if types:
788              process._type_name_map.ParseMore(
789                  version, types, process._string_map)
790            if stack_frames:
791              process._stack_frame_map.ParseMore(
792                  version, stack_frames, process._string_map)
793          allocators = heaps.get('allocators')
794          if allocators:
795            dump = MemoryDump(allocators, process_ext.mapped_entry_names)
796            process._memory_dumps.append(dump)
797
798    self._processes = []
799    for pe in process_ext_by_pid.itervalues():
800      pe.process._heap_dump_version = self._heap_dump_version
801      if pe.process_mmaps_json:
802        # Now parse the most recent memory map.
803        pe.process._memory_map = MemoryMap(pe.process_mmaps_json)
804      self._processes.append(pe.process)
805
806  @property
807  def modified(self):
808    return any(p.modified for p in self._processes)
809
810  @property
811  def processes(self):
812    return self._processes
813
814  @property
815  def heap_dump_version(self):
816    return self._heap_dump_version
817
818  def ApplyModifications(self):
819    for process in self._processes:
820      process.ApplyModifications()
821    assert not self.modified, 'still modified'
822
823  def Serialize(self):
824    return self._trace_json
825
826  # Relevant trace event phases from Chromium's
827  # src/base/trace_event/common/trace_event_common.h.
828  _EVENT_PHASE_METADATA = 'M'
829  _EVENT_PHASE_MEMORY_DUMP = 'v'
830
831  def _UseHeapDumpVersion(self, version):
832    if self._heap_dump_version is None:
833      self._heap_dump_version = version
834      return version
835    elif self._heap_dump_version != version:
836      raise Exception(
837          ("Inconsistent trace file: first saw '{}' heap dump version, "
838           "then '{}'.").format(self._heap_dump_version, version))
839    else:
840      return version
841
842
843class SymbolizableFile(object):
844  """Holds file path, addresses to symbolize and stack frames to update.
845
846  This class is a link between ELFSymbolizer and a trace file: it specifies
847  what to symbolize (addresses) and what to update with the symbolization
848  result (frames).
849  """
850  def __init__(self, file_path):
851    self.path = file_path
852    self.symbolizable_path = file_path # path to use for symbolization
853    self.frames_by_address = collections.defaultdict(list)
854
855
856def ResolveSymbolizableFiles(processes):
857  """Resolves and groups PCs into list of SymbolizableFiles.
858
859  As part of the grouping process, this function resolves PC from each stack
860  frame to the corresponding mmap region. Stack frames that failed to resolve
861  are symbolized with '<unresolved>'.
862  """
863  symfile_by_path = {}
864  for process in processes:
865    if not process.memory_map:
866      continue
867    for frame in process.stack_frame_map.frame_by_id.itervalues():
868      if frame.pc is None:
869        continue
870      region = process.memory_map.FindRegion(frame.pc)
871      if region is None:
872        frame.name = '<unresolved>'
873        continue
874
875      symfile = symfile_by_path.get(region.file_path)
876      if symfile is None:
877        symfile = SymbolizableFile(region.file_path)
878        symfile_by_path[symfile.path] = symfile
879
880      relative_pc = frame.pc - region.start_address
881      symfile.frames_by_address[relative_pc].append(frame)
882  return symfile_by_path.values()
883
884
885def FindInSystemPath(binary_name):
886  paths = os.environ['PATH'].split(os.pathsep)
887  for path in paths:
888    binary_path = os.path.join(path, binary_name)
889    if os.path.isfile(binary_path):
890      return binary_path
891  return None
892
893
894class Symbolizer(object):
895  # Encapsulates platform-specific symbolization logic.
896  def __init__(self):
897    self.is_mac = sys.platform == 'darwin'
898    self.is_win = sys.platform == 'win32'
899    if self.is_mac:
900      self.binary = 'atos'
901      self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher()
902    elif self.is_win:
903      self.binary = 'addr2line-pdb.exe'
904    else:
905      self.binary = 'addr2line'
906    self.symbolizer_path = FindInSystemPath(self.binary)
907
908  def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name):
909    def _SymbolizerCallback(sym_info, frames):
910      # Unwind inline chain to the top.
911      while sym_info.inlined_by:
912        sym_info = sym_info.inlined_by
913
914      symbolized_name = sym_info.name if sym_info.name else unsymbolized_name
915      for frame in frames:
916        frame.name = symbolized_name
917        frame.ext.source_path = sym_info.source_path
918
919    symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path,
920                                              self.symbolizer_path,
921                                              _SymbolizerCallback,
922                                              inlines=True)
923
924    for address, frames in symfile.frames_by_address.iteritems():
925      # SymbolizeAsync() asserts that the type of address is int. We operate
926      # on longs (since they are raw pointers possibly from 64-bit processes).
927      # It's OK to cast here because we're passing relative PC, which should
928      # always fit into int.
929      symbolizer.SymbolizeAsync(int(address), frames)
930
931    symbolizer.Join()
932
933
934  def _SymbolizeMac(self, symfile):
935    load_address = (symbolize_trace_macho_reader.
936        ReadMachOTextLoadAddress(symfile.symbolizable_path))
937    assert load_address is not None
938
939    address_os_file, address_file_path = tempfile.mkstemp()
940    try:
941      with os.fdopen(address_os_file, 'w') as address_file:
942        for address in symfile.frames_by_address.iterkeys():
943          address_file.write('{:x} '.format(address + load_address))
944
945      cmd = [self.symbolizer_path, '-arch', 'x86_64', '-l',
946             '0x%x' % load_address, '-o', symfile.symbolizable_path,
947             '-f', address_file_path]
948      output_array = subprocess.check_output(cmd).split('\n')
949
950      for i, frames in enumerate(symfile.frames_by_address.itervalues()):
951        symbolized_name = self._matcher.Match(output_array[i])
952        for frame in frames:
953          frame.name = symbolized_name
954    finally:
955      os.remove(address_file_path)
956
957  def _SymbolizeWin(self, symfile):
958    """Invoke symbolizer binary on windows and write all input in one go.
959
960    Unlike linux, on windows, symbolization talks through a shared system
961    service that handles communication with the NT symbol servers. This
962    creates an explicit serialization (and therefor lock contention) of
963    any process using the symbol API for files do not have a local PDB.
964
965    Thus, even though the windows symbolizer binary can be make command line
966    compatible with the POSIX addr2line interface, paralellizing the
967    symbolization does not yield the same performance effects. Running
968    just one symbolizer seems good enough for now. Can optimize later
969    if this becomes a bottleneck.
970    """
971    cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe',
972                symfile.symbolizable_path]
973
974    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE,
975            stderr=sys.stderr)
976    addrs = ["%x" % relative_pc for relative_pc in
977             symfile.frames_by_address.keys()]
978    (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs))
979    stdout_data = stdout_data.split('\n')
980
981    # This is known to be in the same order as stderr_data.
982    for i, addr in enumerate(addrs):
983        for frame in  symfile.frames_by_address[int(addr, 16)]:
984            # Output of addr2line with --functions is always 2 outputs per
985            # symbol, function name followed by source line number. Only grab
986            # the function name as line info is not always available.
987            frame.name = stdout_data[i * 2]
988
989  def Symbolize(self, symfile, unsymbolized_name):
990    if self.is_mac:
991      self._SymbolizeMac(symfile)
992    elif self.is_win:
993      self._SymbolizeWin(symfile)
994    else:
995      self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name)
996
997  def IsSymbolizableFile(self, file_path):
998    if self.is_win:
999      extension = os.path.splitext(file_path)[1].lower()
1000      return extension in ['.dll', '.exe']
1001    else:
1002      result = subprocess.check_output(['file', '-0', file_path])
1003      type_string = result[result.find('\0') + 1:]
1004      return bool(re.match(r'.*(ELF|Mach-O) (32|64)-bit\b.*',
1005                  type_string, re.DOTALL))
1006
1007
1008def SymbolizeFiles(symfiles, symbolizer):
1009  """Symbolizes each file in the given list of SymbolizableFiles
1010     and updates stack frames with symbolization results."""
1011  print 'Symbolizing...'
1012
1013  def _SubPrintf(message, *args):
1014    print ('  ' + message).format(*args)
1015
1016  for symfile in symfiles:
1017    unsymbolized_name = '<{}>'.format(
1018        symfile.path if symfile.path else 'unnamed')
1019
1020    problem = None
1021    if not os.path.isabs(symfile.symbolizable_path):
1022      problem = 'not a file'
1023    elif not os.path.isfile(symfile.symbolizable_path):
1024      problem = "file doesn't exist"
1025    elif not symbolizer.IsSymbolizableFile(symfile.symbolizable_path):
1026      problem = 'file is not symbolizable'
1027    if problem:
1028      _SubPrintf("Won't symbolize {} PCs for '{}': {}.",
1029                 len(symfile.frames_by_address),
1030                 symfile.symbolizable_path,
1031                 problem)
1032      for frames in symfile.frames_by_address.itervalues():
1033        for frame in frames:
1034          frame.name = unsymbolized_name
1035      continue
1036
1037    _SubPrintf('Symbolizing {} PCs from {}...',
1038               len(symfile.frames_by_address),
1039               symfile.path)
1040
1041    symbolizer.Symbolize(symfile, unsymbolized_name)
1042
1043
1044def CollapseSmallBranches(trace, size_threshold):
1045  print 'Collapsing stack branches smaller than {}...'.format(size_threshold)
1046
1047  class _Node(object):
1048    def __init__(self, frame):
1049      self.frame = frame
1050      self.collapsed_child = None
1051      self.children = []
1052      self.entries = []
1053      self.total_entry_size = 0
1054      self.total_entry_count = 0
1055      self.needed = False
1056
1057    def ClearEntries(self):
1058      self.entries = []
1059      for child in self.children:
1060        child.ClearEntries()
1061
1062    def UpdateTotals(self):
1063      total_entry_size = sum(e.size for e in self.entries)
1064      total_entry_count = len(self.entries)
1065      for child in self.children:
1066        child.UpdateTotals()
1067        total_entry_size += child.total_entry_size
1068        total_entry_count += child.total_entry_count
1069      self.total_entry_size = total_entry_size
1070      self.total_entry_count = total_entry_count
1071
1072    def CollectEntries(self, entries):
1073      entries += self.entries
1074      for child in self.children:
1075        child.CollectEntries(entries)
1076
1077  for process in trace.processes:
1078    root_node, node_by_id = process.stack_frame_map.BuildFrameTree(_Node)
1079
1080    for dump in process.memory_dumps:
1081      for profile in dump.profiles:
1082
1083        root_node.ClearEntries()
1084        for entry in profile.entries:
1085          node_by_id[entry.stack_frame_id].entries.append(entry)
1086        root_node.UpdateTotals()
1087
1088        collapsed_entry_by_entry = {}
1089        def _CollapseEntries(node):
1090          if node.total_entry_count > 0:
1091            node.needed = True
1092          if node.frame is not None and node.total_entry_size < size_threshold:
1093            if node.children:
1094              if node.collapsed_child is None:
1095                collapsed_frame = process.stack_frame_map.AddFrame(
1096                    '[collapsed]',
1097                    node.frame)
1098                node.collapsed_child = _Node(collapsed_frame)
1099                node.children.append(node.collapsed_child)
1100                node_by_id[collapsed_frame.id] = node.collapsed_child
1101
1102              child_entries = []
1103              for child in node.children:
1104                child.CollectEntries(child_entries)
1105              if child_entries:
1106                type_name_id = None
1107                if len(node.entries) == 1:
1108                  type_name_id = node.entries[0].type_name_id
1109                else:
1110                  type_name_ids = set(e.type_name_id for e in child_entries)
1111                  if len(type_name_ids) == 1:
1112                    type_name_id = next(iter(type_name_ids))
1113
1114                if type_name_id is None:
1115                  type_name_id = process.type_name_map.AddType('[collapsed]')
1116
1117                collapsed_entry = profile.AddEntry(
1118                    HeapProfile.EntryKey(node.collapsed_child.frame.id,
1119                                         type_name_id))
1120                node.collapsed_child.needed = True
1121                for entry in child_entries:
1122                  collapsed_entry_by_entry[entry] = collapsed_entry
1123          else:
1124            for child in node.children:
1125              _CollapseEntries(child)
1126
1127        _CollapseEntries(root_node)
1128
1129        def _MergeToCollapsedEntry(entry):
1130          collapsed_entry = collapsed_entry_by_entry.get(entry)
1131          if collapsed_entry is not None:
1132            return collapsed_entry.key
1133          else:
1134            return entry.key
1135        profile.MergeEntries(_MergeToCollapsedEntry)
1136
1137    def _RemoveRedunantFrame(frame):
1138      node = node_by_id[frame.id]
1139      return None if not node.needed else frame
1140    frame_count_before = len(process.stack_frame_map.frame_by_id)
1141    process.stack_frame_map.MergeFrames(_RemoveRedunantFrame)
1142    print '  {}: collapsed {} stack frames (out of {})'.format(
1143        process.unique_name,
1144        frame_count_before - len(process.stack_frame_map.frame_by_id),
1145        frame_count_before)
1146
1147
1148def DeduplicateStackFrames(trace):
1149  if trace.heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:
1150    # We're not parsing heap entries for legacy format, so deduplicating
1151    # stack frames would result in a corrupted trace.
1152    return
1153
1154  print 'Deduplicating stack frames...'
1155
1156  for process in trace.processes:
1157    merged_frames_by_frame = process.stack_frame_map.MergeFrames(
1158        get_frame_key=lambda f: f.name)
1159
1160    if not merged_frames_by_frame:
1161      continue
1162
1163    frame_by_merged_frame_id = {}
1164    for frame, merged_frames in merged_frames_by_frame.iteritems():
1165      for merged_frame in merged_frames:
1166        frame_by_merged_frame_id[merged_frame.id] = frame
1167
1168    for dump in process.memory_dumps:
1169      for profile in dump.profiles:
1170        def _GetMergeKey(entry):
1171          frame = frame_by_merged_frame_id.get(entry.stack_frame_id)
1172          if frame is None:
1173            return entry.key
1174          else:
1175            return HeapProfile.EntryKey(frame.id, entry.type_name_id)
1176
1177        entry_count = len(profile.entries)
1178        profile.MergeEntries(_GetMergeKey)
1179
1180
1181# Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)
1182# as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available
1183# via 'name' group.
1184ANDROID_PATH_MATCHER = re.compile(
1185    r'^/data/(?:'
1186      r'app/[^/]+/lib/[^/]+/|'
1187      r'app-lib/[^/]+/|'
1188      r'data/[^/]+/incremental-install-files/lib/'
1189    r')(?P<name>.*\.so)')
1190
1191# Subpath of output path where unstripped libraries are stored.
1192ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'
1193
1194
1195def HaveFilesFromAndroid(symfiles):
1196  return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles)
1197
1198
1199def RemapAndroidFiles(symfiles, output_path):
1200  for symfile in symfiles:
1201    match = ANDROID_PATH_MATCHER.match(symfile.path)
1202    if match:
1203      name = match.group('name')
1204      symfile.symbolizable_path = os.path.join(
1205          output_path, ANDROID_UNSTRIPPED_SUBPATH, name)
1206    else:
1207      # Clobber file path to trigger "not a file" problem in SymbolizeFiles().
1208      # Without this, files won't be symbolized with "file not found" problem,
1209      # which is not accurate.
1210      symfile.symbolizable_path = 'android://{}'.format(symfile.path)
1211
1212
1213def Symbolize(options, trace, symbolizer):
1214  if options.collapse_threshold:
1215    CollapseSmallBranches(trace, options.collapse_threshold)
1216
1217  symfiles = ResolveSymbolizableFiles(trace.processes)
1218
1219  # Android trace files don't have any indication they are from Android.
1220  # So we're checking for Android-specific paths.
1221  if HaveFilesFromAndroid(symfiles):
1222    if not options.output_directory:
1223      sys.exit('The trace file appears to be from Android. Please '
1224               'specify output directory to properly symbolize it.')
1225    RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))
1226
1227  SymbolizeFiles(symfiles, symbolizer)
1228
1229  DeduplicateStackFrames(trace)
1230
1231
1232TRIVIAL_PATHS = [
1233    'base/allocator',
1234    'base/memory/aligned_memory.cc',
1235    'base/memory/ptr_util.h',
1236    'base/memory/scoped_vector.h',
1237    'base/process/memory.cc',
1238    'base/process/memory_linux.cc',
1239    'base/stl_util.h',
1240    'base/strings',
1241    'base/trace_event/malloc_dump_provider.cc',
1242    'skia/ext/SkMemory_new_handler.cpp',
1243    'third_party/android_ndk/sources/cxx-stl',
1244    'third_party/skia/src/core/SkArenaAlloc.cpp',
1245    'third_party/skia/src/core/SkMallocPixelRef.cpp',
1246    'third_party/WebKit/Source/platform/heap/BlinkGCMemoryDumpProvider.cpp',
1247    'third_party/WebKit/Source/platform/heap/Heap.h',
1248    'third_party/WebKit/Source/wtf/allocator/PartitionAllocator.cpp',
1249    'third_party/WebKit/Source/wtf/allocator/Partitions.h',
1250]
1251
1252def Categorize(options, trace):
1253  print 'Categorizing...'
1254
1255  # TODO(dskiba): move one level up
1256  if not options.output_directory:
1257    sys.exit('Output directory should be specified for categorization.')
1258
1259  trivial_paths = options.trivial_paths + options.extra_trivial_paths
1260  trivial_paths_regex = re.compile(
1261      '|'.join(re.escape(p) for p in trivial_paths))
1262
1263  category_slice = options.category_slice
1264
1265  def _NormalizePath(path):
1266    return os.path.normcase(os.path.normpath(path))
1267
1268  output_path = _NormalizePath(os.path.abspath(options.output_directory))
1269  src_path = _NormalizePath(os.path.join(output_path, '..', '..'))
1270
1271  failed_paths = set()
1272
1273  def _Categorize(file_path):
1274    if not file_path:
1275      return None
1276
1277    file_path = _NormalizePath(os.path.abspath(file_path))
1278    if file_path.startswith(output_path):
1279      file_subpath = os.path.relpath(file_path, output_path)
1280    elif file_path.startswith(src_path):
1281      file_subpath = os.path.relpath(file_path, src_path)
1282    else:
1283      if file_path not in failed_paths:
1284        failed_paths.add(file_path)
1285        print '  Not in source: {}'.format(file_path)
1286      return None
1287
1288    if trivial_paths_regex.search(file_subpath):
1289      if file_subpath not in failed_paths:
1290        failed_paths.add(file_subpath)
1291        print '  Skipped: {}'.format(file_subpath)
1292      return None
1293
1294    category = file_subpath.split(os.sep)
1295    if category_slice != 0:
1296      category = category[:category_slice]
1297
1298    return '/'.join(category)
1299
1300  def _GetCategory(frame):
1301    while frame:
1302      if not frame.ext.categorized:
1303        frame.ext.category = _Categorize(frame.ext.source_path)
1304        frame.ext.categorized = True
1305      if frame.ext.category:
1306        return frame.ext.category
1307      frame = frame.parent
1308    return None
1309
1310  for process in trace.processes:
1311    for dump in process.memory_dumps:
1312      for profile in dump.profiles:
1313        def _CategoryKey(entry):
1314          category = _GetCategory(
1315              process.stack_frame_map.frame_by_id[entry.stack_frame_id])
1316          category_id = 0 if category is None \
1317                        else process.type_name_map.AddType(category)
1318          return HeapProfile.EntryKey(entry.stack_frame_id, category_id)
1319
1320        profile.MergeEntries(_CategoryKey)
1321
1322
1323def FormatSize(size_bytes, show_sign=False):
1324  # Adapted from SO answer: http://goo.gl/Xb0mYx
1325  sign = ''
1326  if size_bytes < 0:
1327    size_bytes = -size_bytes
1328    sign = '-'
1329  elif show_sign:
1330    sign = '+'
1331
1332  if size_bytes == 1:
1333    # because I really hate unnecessary plurals
1334    return "%s1 byte" % sign
1335
1336  suffixes_table = [
1337      ('bytes', 0),
1338      ('KiB', 0),
1339      ('MiB', 1),
1340      ('GiB', 2),
1341      ('TiB', 2),
1342      ('PiB', 2)]
1343
1344  num = float(size_bytes)
1345  for suffix, precision in suffixes_table:
1346    if num < 1024.0:
1347        break
1348    num /= 1024.0
1349
1350  if precision == 0:
1351    formatted_size = "%d" % num
1352  else:
1353    formatted_size = str(round(num, ndigits=precision))
1354
1355  if formatted_size == '0':
1356    sign = ''
1357
1358  return "%s%s %s" % (sign, formatted_size, suffix)
1359
1360
1361WEBTREEMAP_HTML_TEMPLATE = (
1362    '<!DOCTYPE html>'
1363    '<title>$TITLE$</title>'
1364    '<link rel=stylesheet href='
1365        '"https://cdn.rawgit.com/evmar/webtreemap/gh-pages/webtreemap.css"/>'
1366    '<style>'
1367    'body {'
1368      'font-family: sans-serif;'
1369      'font-size: 0.8em;'
1370      'margin: 0em;'
1371    '}'
1372    '#map {'
1373      'width: 100%;'
1374      'height: 100%;'
1375      'position: absolute;'
1376      'cursor: pointer;'
1377      '-webkit-user-select: none;'
1378    '}'
1379    '</style>'
1380    '<center><p>Click on a box to zoom in. '
1381        'Click on the outermost box to zoom out.</p></center>'
1382    '<div id="map"></div>'
1383    '<script src='
1384        '"https://cdn.rawgit.com/evmar/webtreemap/gh-pages/webtreemap.js">'
1385    '</script>'
1386    '<script>'
1387    'var map = document.getElementById("map");'
1388    'appendTreemap(map, $TREEMAP$);'
1389    '</script>'
1390)
1391
1392def GenerateWebTreeMap(trace, base_path):
1393  print 'Generating WebTreeMaps...'
1394
1395  class _TreeNode(object):
1396    def __init__(self):
1397      self.total_size = 0
1398      self.size = 0
1399      self.child_by_name = collections.defaultdict(_TreeNode)
1400
1401    def UpdateTotalSize(self):
1402      total_size = self.size
1403      for child in self.child_by_name.itervalues():
1404        child.UpdateTotalSize()
1405        total_size += child.total_size
1406      self.total_size = total_size
1407
1408    def GetChildForPath(node, path):
1409      for name in path:
1410        node = node.child_by_name[name]
1411      return node
1412
1413    def GenerateTreeMap(self, name):
1414      child_trees = []
1415      for child_name, child in self.child_by_name.iteritems():
1416        child_trees.append(child.GenerateTreeMap(child_name))
1417      return {
1418          'data': {'$area': self.total_size},
1419          'name': '{} ({})'.format(name, FormatSize(self.total_size)),
1420          'children': child_trees
1421      }
1422
1423  for process in trace.processes:
1424    for dump_index, dump in enumerate(process.memory_dumps):
1425      for profile in dump.profiles:
1426        root = _TreeNode()
1427        for entry in profile.entries:
1428          category = process.type_name_map.name_by_id[entry.type_name_id]
1429          if category:
1430            node = root.GetChildForPath(category.split('/'))
1431            # Add function as leaf node
1432            frame = process.stack_frame_map.frame_by_id[entry.stack_frame_id]
1433            while frame is not None:
1434              if frame.ext.category:
1435                node = node.GetChildForPath([frame.name])
1436                break
1437              frame = frame.parent
1438            node.size += entry.size
1439
1440        root.UpdateTotalSize()
1441
1442        output_path = '{}-#{}-{}-{}.html'.format(
1443            base_path, dump_index, process.unique_name, profile.allocator_name)
1444
1445        print '  Writing {}'.format(output_path)
1446        with open(output_path, 'w') as output_file:
1447          title = '{} / {}'.format(process.unique_name, profile.allocator_name)
1448          treemap = root.GenerateTreeMap('/')
1449          html = WEBTREEMAP_HTML_TEMPLATE.\
1450            replace('$TITLE$', title).\
1451            replace('$TREEMAP$', json.dumps(treemap))
1452          output_file.write(html)
1453
1454
1455def OpenTraceFile(file_path, mode):
1456  if file_path.endswith('.gz'):
1457    return gzip.open(file_path, mode + 'b')
1458  else:
1459    return open(file_path, mode + 't')
1460
1461
1462# Suffix used for backup files.
1463BACKUP_FILE_TAG = '.BACKUP'
1464
1465def main():
1466  class MultilineHelpFormatter(argparse.HelpFormatter):
1467    def _split_lines(self, text, width):
1468      extra_lines = []
1469      if '\n' in text:
1470        lines = text.splitlines()
1471        text = lines[0]
1472        extra_lines = lines[1:]
1473      return super(MultilineHelpFormatter, self)._split_lines(text, width) + \
1474             extra_lines
1475
1476  parser = argparse.ArgumentParser(formatter_class=MultilineHelpFormatter)
1477  parser.add_argument(
1478      'file',
1479      help='Trace file to symbolize (.json or .json.gz)')
1480
1481  parser.add_argument(
1482      '--no-backup', dest='backup', default='true', action='store_false',
1483      help="Don't create {} files".format(BACKUP_FILE_TAG))
1484
1485  parser.add_argument(
1486      '--output-directory',
1487      help='The path to the build output directory, such as out/Debug.')
1488
1489  # Arguments below are not applicable to trace files with heap dumps
1490  # in legacy format.
1491
1492  parser.add_argument(
1493      '--collapse-threshold', type=int, default=0,
1494      help=('Collapse stack branches smaller than then value (in bytes).'
1495            'Default is 0.'))
1496
1497  parser.add_argument(
1498      '--categorize', action='store_true',
1499      help='Categorize allocations based on backtrace source paths.')
1500
1501  parser.add_argument(
1502      '--trivial-path-list', dest='trivial_paths', nargs='+',
1503      default=TRIVIAL_PATHS,
1504      help=('List of source paths to skip during categorization. By default '
1505            'the following paths are skipped:\n' +
1506            '\n'.join('  ' + s for s in TRIVIAL_PATHS)))
1507
1508  parser.add_argument(
1509      '--trivial-path', dest='extra_trivial_paths', default=[], action='append',
1510      help=('Extra source path to skip during categorization. Can be '
1511            'specified multiple times.'))
1512
1513  parser.add_argument(
1514      '--category-slice', type=int, default=0,
1515      help=('Number of path components to use for categorization.'
1516            ' Default is 0, which uses all components.'))
1517
1518  parser.add_argument(
1519      '--treemap',
1520      action='store_true',
1521      help='Generate Treemap HTMLs.')
1522
1523  symbolizer = Symbolizer()
1524  if symbolizer.symbolizer_path is None:
1525    sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary)
1526
1527  options = parser.parse_args()
1528
1529  trace_file_path = options.file
1530
1531  print 'Reading trace file...'
1532  with OpenTraceFile(trace_file_path, 'r') as trace_file:
1533    trace = Trace(json.load(trace_file))
1534
1535  if (trace.heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY and (
1536      options.categorize or options.treemap or
1537      options.collapse_threshold != 0)):
1538    parser.error(
1539        "The trace's heap data is in legacy format - categorization, "
1540        'Treemap generation, and collapsing of stack branches are not '
1541        'supported.')
1542
1543  Symbolize(options, trace, symbolizer)
1544  if options.categorize:
1545    Categorize(options, trace)
1546  if options.treemap:
1547    GenerateWebTreeMap(trace, trace_file_path)
1548
1549  if trace.modified:
1550    trace.ApplyModifications()
1551
1552    if options.backup:
1553      backup_file_path = trace_file_path + BACKUP_FILE_TAG
1554      if os.path.exists(backup_file_path):
1555        for i in itertools.count(1):
1556          unique_file_path = '{}{}'.format(backup_file_path, i)
1557          if not os.path.exists(unique_file_path):
1558            backup_file_path = unique_file_path
1559            break
1560      print 'Backing up trace file to {}'.format(backup_file_path)
1561      os.rename(trace_file_path, backup_file_path)
1562
1563    print 'Updating the trace file...'
1564    with OpenTraceFile(trace_file_path, 'w') as trace_file:
1565      json.dump(trace.Serialize(), trace_file)
1566  else:
1567    print 'No modifications were made - not updating the trace file.'
1568
1569
1570if __name__ == '__main__':
1571  main()
1572