1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4from __future__ import absolute_import, division
5
6import hashlib
7import os
8import platform
9import subprocess
10import six
11import zipfile
12from distutils import spawn
13from mozlog import get_proxy_logger
14
15from .symFileManager import SymFileManager
16from .symbolicationRequest import SymbolicationRequest
17
18LOG = get_proxy_logger("profiler")
19
20if six.PY2:
21    # Import for Python 2
22    from cStringIO import StringIO as sio
23    from urllib2 import urlopen
24else:
25    # Import for Python 3
26    from io import BytesIO as sio
27    from urllib.request import urlopen
28
29    # Symbolication is broken when using type 'str' in python 2.7, so we use 'basestring'.
30    # But for python 3.0 compatibility, 'basestring' isn't defined, but the 'str' type works.
31    # So we force 'basestring' to 'str'.
32    basestring = str
33
34
35class SymbolError(Exception):
36    pass
37
38
39class OSXSymbolDumper:
40    def __init__(self):
41        self.dump_syms_bin = os.path.join(os.path.dirname(__file__), "dump_syms_mac")
42        if not os.path.exists(self.dump_syms_bin):
43            raise SymbolError("No dump_syms_mac binary in this directory")
44
45    def store_symbols(
46        self, lib_path, expected_breakpad_id, output_filename_without_extension
47    ):
48        """
49        Returns the filename at which the .sym file was created, or None if no
50        symbols were dumped.
51        """
52        output_filename = output_filename_without_extension + ".sym"
53
54        def get_archs(filename):
55            """
56            Find the list of architectures present in a Mach-O file.
57            """
58            return (
59                subprocess.Popen(["lipo", "-info", filename], stdout=subprocess.PIPE)
60                .communicate()[0]
61                .split(b":")[2]
62                .strip()
63                .split()
64            )
65
66        def process_file(arch):
67            proc = subprocess.Popen(
68                [self.dump_syms_bin, "-a", arch, lib_path],
69                stdout=subprocess.PIPE,
70                stderr=subprocess.PIPE,
71            )
72            stdout, stderr = proc.communicate()
73            if proc.returncode != 0:
74                return None
75
76            module = stdout.splitlines()[0]
77            bits = module.split(b" ", 4)
78            if len(bits) != 5:
79                return None
80            _, platform, cpu_arch, actual_breakpad_id, debug_file = bits
81
82            if str(actual_breakpad_id, "utf-8") != expected_breakpad_id:
83                return None
84
85            with open(output_filename, "wb") as f:
86                f.write(stdout)
87            return output_filename
88
89        for arch in get_archs(lib_path):
90            result = process_file(arch)
91            if result is not None:
92                return result
93        return None
94
95
96class LinuxSymbolDumper:
97    def __init__(self):
98        self.nm = spawn.find_executable("nm")
99        if not self.nm:
100            raise SymbolError("Could not find nm, necessary for symbol dumping")
101
102    def store_symbols(self, lib_path, breakpad_id, output_filename_without_extension):
103        """
104        Returns the filename at which the .sym file was created, or None if no
105        symbols were dumped.
106        """
107        output_filename = output_filename_without_extension + ".nmsym"
108
109        proc = subprocess.Popen(
110            [self.nm, "--demangle", lib_path],
111            stdout=subprocess.PIPE,
112            stderr=subprocess.PIPE,
113        )
114        stdout, stderr = proc.communicate()
115
116        if proc.returncode != 0:
117            return
118
119        with open(output_filename, "wb") as f:
120            f.write(stdout)
121
122            # Append nm -D output to the file. On Linux, most system libraries
123            # have no "normal" symbols, but they have "dynamic" symbols, which
124            # nm -D shows.
125            proc = subprocess.Popen(
126                [self.nm, "--demangle", "-D", lib_path],
127                stdout=subprocess.PIPE,
128                stderr=subprocess.PIPE,
129            )
130            stdout, stderr = proc.communicate()
131            if proc.returncode == 0:
132                f.write(stdout)
133        return output_filename
134
135
136class ProfileSymbolicator:
137    """This class orchestrates symbolication for a Gecko profile.
138
139    It can be used by multiple pieces of testing infrastructure that generate Gecko
140    performance profiles.
141
142    Args:
143        options (obj): See SymFileManager for details on these options.
144    """
145
146    def __init__(self, options):
147        self.options = options
148        self.sym_file_manager = SymFileManager(self.options)
149        self.symbol_dumper = self.get_symbol_dumper()
150
151    def get_symbol_dumper(self):
152        try:
153            if platform.system() == "Darwin":
154                return OSXSymbolDumper()
155            elif platform.system() == "Linux":
156                return LinuxSymbolDumper()
157        except SymbolError:
158            return None
159
160    def integrate_symbol_zip_from_url(self, symbol_zip_url):
161        if self.have_integrated(symbol_zip_url):
162            return
163        LOG.info(
164            "Retrieving symbol zip from {symbol_zip_url}...".format(
165                symbol_zip_url=symbol_zip_url
166            )
167        )
168        try:
169            io = urlopen(symbol_zip_url, None, 30)
170            with zipfile.ZipFile(sio(io.read())) as zf:
171                self.integrate_symbol_zip(zf)
172            self._create_file_if_not_exists(self._marker_file(symbol_zip_url))
173        except IOError:
174            LOG.info("Symbol zip request failed.")
175
176    def integrate_symbol_zip_from_file(self, filename):
177        if self.have_integrated(filename):
178            return
179        with open(filename, "rb") as f:
180            with zipfile.ZipFile(f) as zf:
181                self.integrate_symbol_zip(zf)
182        self._create_file_if_not_exists(self._marker_file(filename))
183
184    def _create_file_if_not_exists(self, filename):
185        try:
186            os.makedirs(os.path.dirname(filename))
187        except OSError:
188            pass
189        try:
190            open(filename, "a").close()
191        except IOError:
192            pass
193
194    def integrate_symbol_zip(self, symbol_zip_file):
195        symbol_zip_file.extractall(self.options["symbolPaths"]["FIREFOX"])
196
197    def _marker_file(self, symbol_zip_url):
198        marker_dir = os.path.join(self.options["symbolPaths"]["FIREFOX"], ".markers")
199        return os.path.join(
200            marker_dir, hashlib.sha1(symbol_zip_url.encode("utf-8")).hexdigest()
201        )
202
203    def have_integrated(self, symbol_zip_url):
204        return os.path.isfile(self._marker_file(symbol_zip_url))
205
206    def get_unknown_modules_in_profile(self, profile_json):
207        if "libs" not in profile_json:
208            return []
209        shared_libraries = profile_json["libs"]
210        memoryMap = []
211        for lib in shared_libraries:
212            memoryMap.append([lib["debugName"], lib["breakpadId"]])
213
214        rawRequest = {
215            "stacks": [[]],
216            "memoryMap": memoryMap,
217            "version": 4,
218            "symbolSources": ["FIREFOX", "WINDOWS"],
219        }
220        request = SymbolicationRequest(self.sym_file_manager, rawRequest)
221        if not request.isValidRequest:
222            return []
223        request.Symbolicate(0)  # This sets request.knownModules
224
225        unknown_modules = []
226        for i, lib in enumerate(shared_libraries):
227            if not request.knownModules[i]:
228                unknown_modules.append(lib)
229        return unknown_modules
230
231    def dump_and_integrate_missing_symbols(self, profile_json, symbol_zip_path):
232        if not self.symbol_dumper:
233            return
234
235        unknown_modules = self.get_unknown_modules_in_profile(profile_json)
236        if not unknown_modules:
237            return
238
239        # We integrate the dumped symbols by dumping them directly into our
240        # symbol directory.
241        output_dir = self.options["symbolPaths"]["FIREFOX"]
242
243        # Additionally, we add all dumped symbol files to the missingsymbols
244        # zip file.
245        with zipfile.ZipFile(symbol_zip_path, "a", zipfile.ZIP_DEFLATED) as zf:
246            for lib in unknown_modules:
247                self.dump_and_integrate_symbols_for_lib(lib, output_dir, zf)
248
249    def dump_and_integrate_symbols_for_lib(self, lib, output_dir, zip):
250        name = lib["debugName"]
251        expected_name_without_extension = os.path.join(name, lib["breakpadId"], name)
252        for extension in [".sym", ".nmsym"]:
253            expected_name = expected_name_without_extension + extension
254            if expected_name in zip.namelist():
255                # No need to dump the symbols again if we already have it in
256                # the missingsymbols zip file from a previous run.
257                zip.extract(expected_name, output_dir)
258                return
259
260        lib_path = lib["path"]
261        if not os.path.exists(lib_path):
262            return
263
264        output_filename_without_extension = os.path.join(
265            output_dir, expected_name_without_extension
266        )
267        store_path = os.path.dirname(output_filename_without_extension)
268        if not os.path.exists(store_path):
269            os.makedirs(store_path)
270
271        # Dump the symbols.
272        sym_file = self.symbol_dumper.store_symbols(
273            lib_path, lib["breakpadId"], output_filename_without_extension
274        )
275        if sym_file:
276            rootlen = len(os.path.join(output_dir, "_")) - 1
277            output_filename = sym_file[rootlen:]
278            if output_filename not in zip.namelist():
279                zip.write(sym_file, output_filename)
280
281    def symbolicate_profile(self, profile_json):
282        if "libs" not in profile_json:
283            return
284
285        shared_libraries = profile_json["libs"]
286        addresses = self._find_addresses(profile_json)
287        symbols_to_resolve = self._assign_symbols_to_libraries(
288            addresses, shared_libraries
289        )
290        symbolication_table = self._resolve_symbols(symbols_to_resolve)
291        self._substitute_symbols(profile_json, symbolication_table)
292
293        for process in profile_json["processes"]:
294            self.symbolicate_profile(process)
295
296    def _find_addresses(self, profile_json):
297        addresses = set()
298        for thread in profile_json["threads"]:
299            if isinstance(thread, basestring):
300                continue
301            for s in thread["stringTable"]:
302                if s[0:2] == "0x":
303                    addresses.add(s)
304        return addresses
305
306    def _substitute_symbols(self, profile_json, symbolication_table):
307        for thread in profile_json["threads"]:
308            if isinstance(thread, basestring):
309                continue
310            for i, s in enumerate(thread["stringTable"]):
311                thread["stringTable"][i] = symbolication_table.get(s, s)
312
313    def _get_containing_library(self, address, libs):
314        left = 0
315        right = len(libs) - 1
316        while left <= right:
317            mid = (left + right) // 2
318            if address >= libs[mid]["end"]:
319                left = mid + 1
320            elif address < libs[mid]["start"]:
321                right = mid - 1
322            else:
323                return libs[mid]
324        return None
325
326    def _assign_symbols_to_libraries(self, addresses, shared_libraries):
327        libs_with_symbols = {}
328        for address in addresses:
329            lib = self._get_containing_library(int(address, 0), shared_libraries)
330            if not lib:
331                continue
332            if lib["start"] not in libs_with_symbols:
333                libs_with_symbols[lib["start"]] = {"library": lib, "symbols": set()}
334            libs_with_symbols[lib["start"]]["symbols"].add(address)
335        # pylint: disable=W1656
336        return libs_with_symbols.values()
337
338    def _resolve_symbols(self, symbols_to_resolve):
339        memoryMap = []
340        processedStack = []
341        all_symbols = []
342        for moduleIndex, library_with_symbols in enumerate(symbols_to_resolve):
343            lib = library_with_symbols["library"]
344            symbols = library_with_symbols["symbols"]
345            memoryMap.append([lib["debugName"], lib["breakpadId"]])
346            all_symbols += symbols
347            for symbol in symbols:
348                processedStack.append([moduleIndex, int(symbol, 0) - lib["start"]])
349
350        rawRequest = {
351            "stacks": [processedStack],
352            "memoryMap": memoryMap,
353            "version": 4,
354            "symbolSources": ["FIREFOX", "WINDOWS"],
355        }
356        request = SymbolicationRequest(self.sym_file_manager, rawRequest)
357        if not request.isValidRequest:
358            return {}
359        symbolicated_stack = request.Symbolicate(0)
360        return dict(zip(all_symbols, symbolicated_stack))
361