1# This Source Code Form is subject to the terms of the Mozilla Public 2# License, v. 2.0. If a copy of the MPL was not distributed with this 3# file, You can obtain one at http://mozilla.org/MPL/2.0/. 4from __future__ import absolute_import, division 5 6import hashlib 7import os 8import platform 9import subprocess 10import six 11import zipfile 12from distutils import spawn 13from mozlog import get_proxy_logger 14 15from .symFileManager import SymFileManager 16from .symbolicationRequest import SymbolicationRequest 17 18LOG = get_proxy_logger("profiler") 19 20if six.PY2: 21 # Import for Python 2 22 from cStringIO import StringIO as sio 23 from urllib2 import urlopen 24else: 25 # Import for Python 3 26 from io import BytesIO as sio 27 from urllib.request import urlopen 28 29 # Symbolication is broken when using type 'str' in python 2.7, so we use 'basestring'. 30 # But for python 3.0 compatibility, 'basestring' isn't defined, but the 'str' type works. 31 # So we force 'basestring' to 'str'. 32 basestring = str 33 34 35class SymbolError(Exception): 36 pass 37 38 39class OSXSymbolDumper: 40 def __init__(self): 41 self.dump_syms_bin = os.path.join(os.path.dirname(__file__), "dump_syms_mac") 42 if not os.path.exists(self.dump_syms_bin): 43 raise SymbolError("No dump_syms_mac binary in this directory") 44 45 def store_symbols( 46 self, lib_path, expected_breakpad_id, output_filename_without_extension 47 ): 48 """ 49 Returns the filename at which the .sym file was created, or None if no 50 symbols were dumped. 51 """ 52 output_filename = output_filename_without_extension + ".sym" 53 54 def get_archs(filename): 55 """ 56 Find the list of architectures present in a Mach-O file. 57 """ 58 return ( 59 subprocess.Popen(["lipo", "-info", filename], stdout=subprocess.PIPE) 60 .communicate()[0] 61 .split(b":")[2] 62 .strip() 63 .split() 64 ) 65 66 def process_file(arch): 67 proc = subprocess.Popen( 68 [self.dump_syms_bin, "-a", arch, lib_path], 69 stdout=subprocess.PIPE, 70 stderr=subprocess.PIPE, 71 ) 72 stdout, stderr = proc.communicate() 73 if proc.returncode != 0: 74 return None 75 76 module = stdout.splitlines()[0] 77 bits = module.split(b" ", 4) 78 if len(bits) != 5: 79 return None 80 _, platform, cpu_arch, actual_breakpad_id, debug_file = bits 81 82 if str(actual_breakpad_id, "utf-8") != expected_breakpad_id: 83 return None 84 85 with open(output_filename, "wb") as f: 86 f.write(stdout) 87 return output_filename 88 89 for arch in get_archs(lib_path): 90 result = process_file(arch) 91 if result is not None: 92 return result 93 return None 94 95 96class LinuxSymbolDumper: 97 def __init__(self): 98 self.nm = spawn.find_executable("nm") 99 if not self.nm: 100 raise SymbolError("Could not find nm, necessary for symbol dumping") 101 102 def store_symbols(self, lib_path, breakpad_id, output_filename_without_extension): 103 """ 104 Returns the filename at which the .sym file was created, or None if no 105 symbols were dumped. 106 """ 107 output_filename = output_filename_without_extension + ".nmsym" 108 109 proc = subprocess.Popen( 110 [self.nm, "--demangle", lib_path], 111 stdout=subprocess.PIPE, 112 stderr=subprocess.PIPE, 113 ) 114 stdout, stderr = proc.communicate() 115 116 if proc.returncode != 0: 117 return 118 119 with open(output_filename, "wb") as f: 120 f.write(stdout) 121 122 # Append nm -D output to the file. On Linux, most system libraries 123 # have no "normal" symbols, but they have "dynamic" symbols, which 124 # nm -D shows. 125 proc = subprocess.Popen( 126 [self.nm, "--demangle", "-D", lib_path], 127 stdout=subprocess.PIPE, 128 stderr=subprocess.PIPE, 129 ) 130 stdout, stderr = proc.communicate() 131 if proc.returncode == 0: 132 f.write(stdout) 133 return output_filename 134 135 136class ProfileSymbolicator: 137 """This class orchestrates symbolication for a Gecko profile. 138 139 It can be used by multiple pieces of testing infrastructure that generate Gecko 140 performance profiles. 141 142 Args: 143 options (obj): See SymFileManager for details on these options. 144 """ 145 146 def __init__(self, options): 147 self.options = options 148 self.sym_file_manager = SymFileManager(self.options) 149 self.symbol_dumper = self.get_symbol_dumper() 150 151 def get_symbol_dumper(self): 152 try: 153 if platform.system() == "Darwin": 154 return OSXSymbolDumper() 155 elif platform.system() == "Linux": 156 return LinuxSymbolDumper() 157 except SymbolError: 158 return None 159 160 def integrate_symbol_zip_from_url(self, symbol_zip_url): 161 if self.have_integrated(symbol_zip_url): 162 return 163 LOG.info( 164 "Retrieving symbol zip from {symbol_zip_url}...".format( 165 symbol_zip_url=symbol_zip_url 166 ) 167 ) 168 try: 169 io = urlopen(symbol_zip_url, None, 30) 170 with zipfile.ZipFile(sio(io.read())) as zf: 171 self.integrate_symbol_zip(zf) 172 self._create_file_if_not_exists(self._marker_file(symbol_zip_url)) 173 except IOError: 174 LOG.info("Symbol zip request failed.") 175 176 def integrate_symbol_zip_from_file(self, filename): 177 if self.have_integrated(filename): 178 return 179 with open(filename, "rb") as f: 180 with zipfile.ZipFile(f) as zf: 181 self.integrate_symbol_zip(zf) 182 self._create_file_if_not_exists(self._marker_file(filename)) 183 184 def _create_file_if_not_exists(self, filename): 185 try: 186 os.makedirs(os.path.dirname(filename)) 187 except OSError: 188 pass 189 try: 190 open(filename, "a").close() 191 except IOError: 192 pass 193 194 def integrate_symbol_zip(self, symbol_zip_file): 195 symbol_zip_file.extractall(self.options["symbolPaths"]["FIREFOX"]) 196 197 def _marker_file(self, symbol_zip_url): 198 marker_dir = os.path.join(self.options["symbolPaths"]["FIREFOX"], ".markers") 199 return os.path.join( 200 marker_dir, hashlib.sha1(symbol_zip_url.encode("utf-8")).hexdigest() 201 ) 202 203 def have_integrated(self, symbol_zip_url): 204 return os.path.isfile(self._marker_file(symbol_zip_url)) 205 206 def get_unknown_modules_in_profile(self, profile_json): 207 if "libs" not in profile_json: 208 return [] 209 shared_libraries = profile_json["libs"] 210 memoryMap = [] 211 for lib in shared_libraries: 212 memoryMap.append([lib["debugName"], lib["breakpadId"]]) 213 214 rawRequest = { 215 "stacks": [[]], 216 "memoryMap": memoryMap, 217 "version": 4, 218 "symbolSources": ["FIREFOX", "WINDOWS"], 219 } 220 request = SymbolicationRequest(self.sym_file_manager, rawRequest) 221 if not request.isValidRequest: 222 return [] 223 request.Symbolicate(0) # This sets request.knownModules 224 225 unknown_modules = [] 226 for i, lib in enumerate(shared_libraries): 227 if not request.knownModules[i]: 228 unknown_modules.append(lib) 229 return unknown_modules 230 231 def dump_and_integrate_missing_symbols(self, profile_json, symbol_zip_path): 232 if not self.symbol_dumper: 233 return 234 235 unknown_modules = self.get_unknown_modules_in_profile(profile_json) 236 if not unknown_modules: 237 return 238 239 # We integrate the dumped symbols by dumping them directly into our 240 # symbol directory. 241 output_dir = self.options["symbolPaths"]["FIREFOX"] 242 243 # Additionally, we add all dumped symbol files to the missingsymbols 244 # zip file. 245 with zipfile.ZipFile(symbol_zip_path, "a", zipfile.ZIP_DEFLATED) as zf: 246 for lib in unknown_modules: 247 self.dump_and_integrate_symbols_for_lib(lib, output_dir, zf) 248 249 def dump_and_integrate_symbols_for_lib(self, lib, output_dir, zip): 250 name = lib["debugName"] 251 expected_name_without_extension = os.path.join(name, lib["breakpadId"], name) 252 for extension in [".sym", ".nmsym"]: 253 expected_name = expected_name_without_extension + extension 254 if expected_name in zip.namelist(): 255 # No need to dump the symbols again if we already have it in 256 # the missingsymbols zip file from a previous run. 257 zip.extract(expected_name, output_dir) 258 return 259 260 lib_path = lib["path"] 261 if not os.path.exists(lib_path): 262 return 263 264 output_filename_without_extension = os.path.join( 265 output_dir, expected_name_without_extension 266 ) 267 store_path = os.path.dirname(output_filename_without_extension) 268 if not os.path.exists(store_path): 269 os.makedirs(store_path) 270 271 # Dump the symbols. 272 sym_file = self.symbol_dumper.store_symbols( 273 lib_path, lib["breakpadId"], output_filename_without_extension 274 ) 275 if sym_file: 276 rootlen = len(os.path.join(output_dir, "_")) - 1 277 output_filename = sym_file[rootlen:] 278 if output_filename not in zip.namelist(): 279 zip.write(sym_file, output_filename) 280 281 def symbolicate_profile(self, profile_json): 282 if "libs" not in profile_json: 283 return 284 285 shared_libraries = profile_json["libs"] 286 addresses = self._find_addresses(profile_json) 287 symbols_to_resolve = self._assign_symbols_to_libraries( 288 addresses, shared_libraries 289 ) 290 symbolication_table = self._resolve_symbols(symbols_to_resolve) 291 self._substitute_symbols(profile_json, symbolication_table) 292 293 for process in profile_json["processes"]: 294 self.symbolicate_profile(process) 295 296 def _find_addresses(self, profile_json): 297 addresses = set() 298 for thread in profile_json["threads"]: 299 if isinstance(thread, basestring): 300 continue 301 for s in thread["stringTable"]: 302 if s[0:2] == "0x": 303 addresses.add(s) 304 return addresses 305 306 def _substitute_symbols(self, profile_json, symbolication_table): 307 for thread in profile_json["threads"]: 308 if isinstance(thread, basestring): 309 continue 310 for i, s in enumerate(thread["stringTable"]): 311 thread["stringTable"][i] = symbolication_table.get(s, s) 312 313 def _get_containing_library(self, address, libs): 314 left = 0 315 right = len(libs) - 1 316 while left <= right: 317 mid = (left + right) // 2 318 if address >= libs[mid]["end"]: 319 left = mid + 1 320 elif address < libs[mid]["start"]: 321 right = mid - 1 322 else: 323 return libs[mid] 324 return None 325 326 def _assign_symbols_to_libraries(self, addresses, shared_libraries): 327 libs_with_symbols = {} 328 for address in addresses: 329 lib = self._get_containing_library(int(address, 0), shared_libraries) 330 if not lib: 331 continue 332 if lib["start"] not in libs_with_symbols: 333 libs_with_symbols[lib["start"]] = {"library": lib, "symbols": set()} 334 libs_with_symbols[lib["start"]]["symbols"].add(address) 335 # pylint: disable=W1656 336 return libs_with_symbols.values() 337 338 def _resolve_symbols(self, symbols_to_resolve): 339 memoryMap = [] 340 processedStack = [] 341 all_symbols = [] 342 for moduleIndex, library_with_symbols in enumerate(symbols_to_resolve): 343 lib = library_with_symbols["library"] 344 symbols = library_with_symbols["symbols"] 345 memoryMap.append([lib["debugName"], lib["breakpadId"]]) 346 all_symbols += symbols 347 for symbol in symbols: 348 processedStack.append([moduleIndex, int(symbol, 0) - lib["start"]]) 349 350 rawRequest = { 351 "stacks": [processedStack], 352 "memoryMap": memoryMap, 353 "version": 4, 354 "symbolSources": ["FIREFOX", "WINDOWS"], 355 } 356 request = SymbolicationRequest(self.sym_file_manager, rawRequest) 357 if not request.isValidRequest: 358 return {} 359 symbolicated_stack = request.Symbolicate(0) 360 return dict(zip(all_symbols, symbolicated_stack)) 361