1# Copyright 2019 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import datetime
6import logging
7import os
8import platform
9import re
10import subprocess
11
12from telemetry.internal.backends.chrome import minidump_symbolizer
13from telemetry.internal.results import artifact_logger
14from telemetry.internal.util import local_first_binary_manager
15
16
17# Directories relative to the build directory that may contain symbol binaries
18# that can be dumped to symbolize a minidump.
19_POSSIBLE_SYMBOL_BINARY_DIRECTORIES = [
20    'lib.unstripped',
21    os.path.join('android_clang_arm', 'lib.unstripped'),
22    os.path.join('android_clang_arm64', 'lib.unstripped'),
23]
24
25# Mappings from Crashpad/Breakpad processor architecture values to regular
26# expressions that will match the output of running "file" on a .so compiled
27# for that architecture.
28# The Breakpad processor architecture values are hex representations of the
29# values in MDCPUArchitecture from Breakpad's minidump_format.h.
30_BREAKPAD_ARCH_TO_FILE_REGEX = {
31    # 32-bit ARM.
32    '0x5': r'.*32-bit.*ARM.*',
33    # 64-bit ARM.
34    '0xc': r'.*64-bit.*ARM.*',
35}
36
37# Line looks like " processor_architecture = 0xc ".
38_PROCESSOR_ARCH_REGEX = r'\s*processor_architecture\s*\=\s*(?P<arch>\w*)\s*'
39
40
41class AndroidMinidumpSymbolizer(minidump_symbolizer.MinidumpSymbolizer):
42  def __init__(self, dump_finder, build_dir, symbols_dir=None):
43    """Class for handling all minidump symbolizing code on Android.
44
45    Args:
46      dump_finder: The minidump_finder.MinidumpFinder instance that is being
47          used to find minidumps for the test.
48      build_dir: The directory containing Chromium build artifacts to generate
49          symbols from.
50      symbols_dir: An optional path to a directory to store symbols for re-use.
51          Re-using symbols will result in faster symbolization times, but the
52          provided directory *must* be unique per browser binary, e.g. by
53          including the hash of the binary in the directory name.
54    """
55    # Map from minidump path (string) to minidump_dump output (string).
56    self._minidump_dump_output = {}
57    # Map from minidump path (string) to the directory that should be used when
58    # looking for symbol binaries (string).
59    self._minidump_symbol_binaries_directories = {}
60    # We use the OS/arch of the host, not the device.
61    super(AndroidMinidumpSymbolizer, self).__init__(
62        platform.system().lower(), platform.machine(), dump_finder, build_dir,
63        symbols_dir=symbols_dir)
64
65  def SymbolizeMinidump(self, minidump):
66    if platform.system() != 'Linux':
67      logging.warning(
68          'Cannot get Android stack traces unless running on a Posix host.')
69      return None
70    if not self._build_dir:
71      logging.warning(
72          'Cannot get Android stack traces without build directory.')
73      return None
74    return super(AndroidMinidumpSymbolizer, self).SymbolizeMinidump(minidump)
75
76  def GetSymbolBinaries(self, minidump):
77    """Returns a list of paths to binaries where symbols may be located.
78
79    Args:
80      minidump: The path to the minidump being symbolized.
81    """
82    libraries = self._ExtractLibraryNamesFromDump(minidump)
83    symbol_binary_dir = self._GetSymbolBinaryDirectory(minidump, libraries)
84    if not symbol_binary_dir:
85      return []
86
87    return [os.path.join(symbol_binary_dir, lib) for lib in libraries]
88
89  def GetBreakpadPlatformOverride(self):
90    return 'android'
91
92  def _ExtractLibraryNamesFromDump(self, minidump):
93    """Extracts library names that may contain symbols from the minidump.
94
95    This is a duplicate of the logic in Chromium's
96    //build/android/stacktrace/crashpad_stackwalker.py.
97
98    Returns:
99      A list of strings containing library names of interest for symbols.
100    """
101    default_library_name = 'libmonochrome.so'
102
103    minidump_dump_output = self._GetMinidumpDumpOutput(minidump)
104    if not minidump_dump_output:
105      logging.warning(
106          'Could not get minidump_dump output, defaulting to library %s',
107          default_library_name)
108      return [default_library_name]
109
110    library_names = []
111    module_library_line_re = re.compile(r'[(]code_file[)]\s+= '
112                                        r'"(?P<library_name>lib[^. ]+.so)"')
113    in_module = False
114    for line in minidump_dump_output.splitlines():
115      line = line.lstrip().rstrip('\n')
116      if line == 'MDRawModule':
117        in_module = True
118        continue
119      if line == '':
120        in_module = False
121        continue
122      if in_module:
123        m = module_library_line_re.match(line)
124        if m:
125          library_names.append(m.group('library_name'))
126    if not library_names:
127      logging.warning(
128          'Could not find any library name in the dump, '
129          'default to: %s', default_library_name)
130      return [default_library_name]
131    return library_names
132
133  def _GetSymbolBinaryDirectory(self, minidump, libraries):
134    """Gets the directory that should contain symbol binaries for |minidump|.
135
136    Args:
137      minidump: The path to the minidump being analyzed.
138      libraries: A list of library names that are within the minidump.
139
140    Returns:
141      A string containing the path to the directory that should contain the
142      symbol binaries that can be dumped to symbolize |minidump|. Returns None
143      if the directory is unable to be determined for some reason.
144    """
145    if minidump in self._minidump_symbol_binaries_directories:
146      return self._minidump_symbol_binaries_directories[minidump]
147
148    # Get the processor architecture reported by the minidump.
149    arch = None
150    matcher = re.compile(_PROCESSOR_ARCH_REGEX)
151    for line in self._GetMinidumpDumpOutput(minidump).splitlines():
152      match = matcher.match(line)
153      if match:
154        arch = match.groupdict()['arch'].lower()
155        break
156    if not arch:
157      logging.error('Unable to find processor architecture for minidump %s',
158                    minidump)
159      self._minidump_symbol_binaries_directories[minidump] = None
160      return None
161    if arch not in _BREAKPAD_ARCH_TO_FILE_REGEX:
162      logging.error(
163          'Unsupported processor architecture %s for minidump %s. This is '
164          'likely fixable by adding the correct mapping for the architecture '
165          'in android_minidump_symbolizer._BREAKPAD_ARCH_TO_FILE_REGEX.',
166          arch, minidump)
167      self._minidump_symbol_binaries_directories[minidump] = None
168      return None
169
170    # Look for a directory that contains binaries with the correct architecture.
171    matcher = re.compile(_BREAKPAD_ARCH_TO_FILE_REGEX[arch])
172    symbol_dir = None
173    for symbol_subdir in _POSSIBLE_SYMBOL_BINARY_DIRECTORIES:
174      possible_symbol_dir = os.path.join(self._build_dir, symbol_subdir)
175      if not os.path.exists(possible_symbol_dir):
176        continue
177      for f in os.listdir(possible_symbol_dir):
178        if f not in libraries:
179          continue
180        binary_path = os.path.join(possible_symbol_dir, f)
181        stdout = subprocess.check_output(
182            ['file', binary_path], stderr=subprocess.STDOUT)
183        if matcher.match(stdout):
184          symbol_dir = possible_symbol_dir
185          break
186
187    if not symbol_dir:
188      logging.error(
189          'Unable to find suitable symbol binary directory for architecture %s.'
190          'This is likely fixable by adding the correct directory to '
191          'android_minidump_symbolizer._POSSIBLE_SYMBOL_BINARY_DIRECTORIES.',
192          arch)
193    self._minidump_symbol_binaries_directories[minidump] = symbol_dir
194    return symbol_dir
195
196  def _GetMinidumpDumpOutput(self, minidump):
197    """Runs minidump_dump on the given minidump.
198
199    Caches the result for re-use.
200
201    Args:
202      minidump: The path to the minidump being analyzed.
203
204    Returns:
205      A string containing the output of minidump_dump, or None if it could not
206      be retrieved for some reason.
207    """
208    if minidump in self._minidump_dump_output:
209      logging.debug('Returning cached minidump_dump output for %s', minidump)
210      return self._minidump_dump_output[minidump]
211
212    dumper_path = local_first_binary_manager.GetInstance().FetchPath(
213        'minidump_dump')
214    if not os.access(dumper_path, os.X_OK):
215      logging.warning('Cannot run minidump_dump because %s is not found.',
216                      dumper_path)
217      return None
218
219    # Using subprocess.check_output with stdout/stderr mixed can result in
220    # errors due to log messages showing up in the minidump_dump output. So,
221    # use Popen and combine into a single string afterwards.
222    p = subprocess.Popen(
223        [dumper_path, minidump], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
224    stdout, stderr = p.communicate()
225    stdout = stdout + '\n' + stderr
226
227    if p.returncode != 0:
228      # Dumper errors often do not affect stack walkability, just a warning.
229      # It's possible for the same stack to be symbolized multiple times, so
230      # add a timestamp suffix to prevent artifact collisions.
231      now = datetime.datetime.now()
232      suffix = now.strftime('%Y-%m-%d-%H-%M-%S')
233      artifact_name = 'dumper_errors/%s-%s' % (
234          os.path.basename(minidump), suffix)
235      logging.warning(
236          'Reading minidump failed, but likely not actually an issue. Saving '
237          'output to artifact %s', artifact_name)
238      artifact_logger.CreateArtifact(artifact_name, stdout)
239    if stdout:
240      self._minidump_dump_output[minidump] = stdout
241    return stdout
242