1#!/bin/env python
2# This Source Code Form is subject to the terms of the Mozilla Public
3# License, v. 2.0. If a copy of the MPL was not distributed with this
4# file, You can obtain one at http://mozilla.org/MPL/2.0/.
5#
6# Usage: symbolstore.py <params> <dump_syms path> <symbol store path>
7#                                <debug info files or dirs>
8#   Runs dump_syms on each debug info file specified on the command line,
9#   then places the resulting symbol file in the proper directory
10#   structure in the symbol store path.  Accepts multiple files
11#   on the command line, so can be called as part of a pipe using
12#   find <dir> | xargs symbolstore.pl <dump_syms> <storepath>
13#   But really, you might just want to pass it <dir>.
14#
15#   Parameters accepted:
16#     -c           : Copy debug info files to the same directory structure
17#                    as sym files. On Windows, this will also copy
18#                    binaries into the symbol store.
19#     -a "<archs>" : Run dump_syms -a <arch> for each space separated
20#                    cpu architecture in <archs> (only on OS X)
21#     -s <srcdir>  : Use <srcdir> as the top source directory to
22#                    generate relative filenames.
23
24from __future__ import print_function
25
26import buildconfig
27import errno
28import sys
29import platform
30import os
31import re
32import shutil
33import textwrap
34import subprocess
35import time
36import ctypes
37
38from optparse import OptionParser
39
40from mozbuild.util import memoize
41from mozbuild.generated_sources import (
42    get_filename_with_digest,
43    get_generated_sources,
44    get_s3_region_and_bucket,
45)
46from mozpack.copier import FileRegistry
47from mozpack.manifests import (
48    InstallManifest,
49    UnreadableInstallManifest,
50)
51
52# Utility classes
53
54
55class VCSFileInfo:
56    """A base class for version-controlled file information. Ensures that the
57    following attributes are generated only once (successfully):
58
59        self.root
60        self.clean_root
61        self.revision
62        self.filename
63
64    The attributes are generated by a single call to the GetRoot,
65    GetRevision, and GetFilename methods. Those methods are explicitly not
66    implemented here and must be implemented in derived classes."""
67
68    def __init__(self, file):
69        if not file:
70            raise ValueError
71        self.file = file
72
73    def __getattr__(self, name):
74        """__getattr__ is only called for attributes that are not set on self,
75        so setting self.[attr] will prevent future calls to the GetRoot,
76        GetRevision, and GetFilename methods. We don't set the values on
77        failure on the off chance that a future call might succeed."""
78
79        if name == "root":
80            root = self.GetRoot()
81            if root:
82                self.root = root
83            return root
84
85        elif name == "clean_root":
86            clean_root = self.GetCleanRoot()
87            if clean_root:
88                self.clean_root = clean_root
89            return clean_root
90
91        elif name == "revision":
92            revision = self.GetRevision()
93            if revision:
94                self.revision = revision
95            return revision
96
97        elif name == "filename":
98            filename = self.GetFilename()
99            if filename:
100                self.filename = filename
101            return filename
102
103        raise AttributeError
104
105    def GetRoot(self):
106        """This method should return the unmodified root for the file or 'None'
107        on failure."""
108        raise NotImplementedError
109
110    def GetCleanRoot(self):
111        """This method should return the repository root for the file or 'None'
112        on failure."""
113        raise NotImplementedError
114
115    def GetRevision(self):
116        """This method should return the revision number for the file or 'None'
117        on failure."""
118        raise NotImplementedError
119
120    def GetFilename(self):
121        """This method should return the repository-specific filename for the
122        file or 'None' on failure."""
123        raise NotImplementedError
124
125
126# This regex separates protocol and optional username/password from a url.
127# For instance, all the following urls will be transformed into
128# 'foo.com/bar':
129#
130#   http://foo.com/bar
131#   svn+ssh://user@foo.com/bar
132#   svn+ssh://user:pass@foo.com/bar
133#
134rootRegex = re.compile(r"^\S+?:/+(?:[^\s/]*@)?(\S+)$")
135
136
137def read_output(*args):
138    (stdout, _) = subprocess.Popen(
139        args=args, universal_newlines=True, stdout=subprocess.PIPE
140    ).communicate()
141    return stdout.rstrip()
142
143
144class HGRepoInfo:
145    def __init__(self, path):
146        self.path = path
147
148        rev = os.environ.get("MOZ_SOURCE_CHANGESET")
149        if not rev:
150            rev = read_output("hg", "-R", path, "parent", "--template={node}")
151
152        # Look for the default hg path. If MOZ_SOURCE_REPO is set, we
153        # don't bother asking hg.
154        hg_root = os.environ.get("MOZ_SOURCE_REPO")
155        if hg_root:
156            root = hg_root
157        else:
158            root = read_output("hg", "-R", path, "showconfig", "paths.default")
159            if not root:
160                print("Failed to get HG Repo for %s" % path, file=sys.stderr)
161        cleanroot = None
162        if root:
163            match = rootRegex.match(root)
164            if match:
165                cleanroot = match.group(1)
166                if cleanroot.endswith("/"):
167                    cleanroot = cleanroot[:-1]
168        if cleanroot is None:
169            print(
170                textwrap.dedent(
171                    """\
172            Could not determine repo info for %s.  This is either not a clone of the web-based
173            repository, or you have not specified MOZ_SOURCE_REPO, or the clone is corrupt."""
174                )
175                % path,
176                sys.stderr,
177            )
178            sys.exit(1)
179        self.rev = rev
180        self.root = root
181        self.cleanroot = cleanroot
182
183    def GetFileInfo(self, file):
184        return HGFileInfo(file, self)
185
186
187class HGFileInfo(VCSFileInfo):
188    def __init__(self, file, repo):
189        VCSFileInfo.__init__(self, file)
190        self.repo = repo
191        self.file = os.path.relpath(file, repo.path)
192
193    def GetRoot(self):
194        return self.repo.root
195
196    def GetCleanRoot(self):
197        return self.repo.cleanroot
198
199    def GetRevision(self):
200        return self.repo.rev
201
202    def GetFilename(self):
203        if self.revision and self.clean_root:
204            return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision)
205        return self.file
206
207
208class GitRepoInfo:
209    """
210    Info about a local git repository. Does not currently
211    support discovering info about a git clone, the info must be
212    provided out-of-band.
213    """
214
215    def __init__(self, path, rev, root):
216        self.path = path
217        cleanroot = None
218        if root:
219            match = rootRegex.match(root)
220            if match:
221                cleanroot = match.group(1)
222                if cleanroot.endswith("/"):
223                    cleanroot = cleanroot[:-1]
224        if cleanroot is None:
225            print(
226                textwrap.dedent(
227                    """\
228            Could not determine repo info for %s (%s).  This is either not a clone of a web-based
229            repository, or you have not specified MOZ_SOURCE_REPO, or the clone is corrupt."""
230                )
231                % (path, root),
232                file=sys.stderr,
233            )
234            sys.exit(1)
235        self.rev = rev
236        self.cleanroot = cleanroot
237
238    def GetFileInfo(self, file):
239        return GitFileInfo(file, self)
240
241
242class GitFileInfo(VCSFileInfo):
243    def __init__(self, file, repo):
244        VCSFileInfo.__init__(self, file)
245        self.repo = repo
246        self.file = os.path.relpath(file, repo.path)
247
248    def GetRoot(self):
249        return self.repo.path
250
251    def GetCleanRoot(self):
252        return self.repo.cleanroot
253
254    def GetRevision(self):
255        return self.repo.rev
256
257    def GetFilename(self):
258        if self.revision and self.clean_root:
259            return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision)
260        return self.file
261
262
263# Utility functions
264
265
266# A cache of files for which VCS info has already been determined. Used to
267# prevent extra filesystem activity or process launching.
268vcsFileInfoCache = {}
269
270if platform.system() == "Windows":
271
272    def realpath(path):
273        """
274        Normalize a path using `GetFinalPathNameByHandleW` to get the
275        path with all components in the case they exist in on-disk, so
276        that making links to a case-sensitive server (hg.mozilla.org) works.
277
278        This function also resolves any symlinks in the path.
279        """
280        # Return the original path if something fails, which can happen for paths that
281        # don't exist on this system (like paths from the CRT).
282        result = path
283
284        ctypes.windll.kernel32.SetErrorMode(ctypes.c_uint(1))
285        handle = ctypes.windll.kernel32.CreateFileW(
286            path,
287            # GENERIC_READ
288            0x80000000,
289            # FILE_SHARE_READ
290            1,
291            None,
292            # OPEN_EXISTING
293            3,
294            # FILE_FLAG_BACKUP_SEMANTICS
295            # This is necessary to open
296            # directory handles.
297            0x02000000,
298            None,
299        )
300        if handle != -1:
301            size = ctypes.windll.kernel32.GetFinalPathNameByHandleW(handle, None, 0, 0)
302            buf = ctypes.create_unicode_buffer(size)
303            if (
304                ctypes.windll.kernel32.GetFinalPathNameByHandleW(handle, buf, size, 0)
305                > 0
306            ):
307                # The return value of GetFinalPathNameByHandleW uses the
308                # '\\?\' prefix.
309                result = buf.value[4:]
310            ctypes.windll.kernel32.CloseHandle(handle)
311        return result
312
313
314else:
315    # Just use the os.path version otherwise.
316    realpath = os.path.realpath
317
318
319def IsInDir(file, dir):
320    # the lower() is to handle win32+vc8, where
321    # the source filenames come out all lowercase,
322    # but the srcdir can be mixed case
323    return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower())
324
325
326def GetVCSFilenameFromSrcdir(file, srcdir):
327    if srcdir not in Dumper.srcdirRepoInfo:
328        # Not in cache, so find it adnd cache it
329        if os.path.isdir(os.path.join(srcdir, ".hg")):
330            Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir)
331        else:
332            # Unknown VCS or file is not in a repo.
333            return None
334    return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file)
335
336
337def GetVCSFilename(file, srcdirs):
338    """Given a full path to a file, and the top source directory,
339    look for version control information about this file, and return
340    a tuple containing
341    1) a specially formatted filename that contains the VCS type,
342    VCS location, relative filename, and revision number, formatted like:
343    vcs:vcs location:filename:revision
344    For example:
345    cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36
346    2) the unmodified root information if it exists"""
347    (path, filename) = os.path.split(file)
348    if path == "" or filename == "":
349        return (file, None)
350
351    fileInfo = None
352    root = ""
353    if file in vcsFileInfoCache:
354        # Already cached this info, use it.
355        fileInfo = vcsFileInfoCache[file]
356    else:
357        for srcdir in srcdirs:
358            if not IsInDir(file, srcdir):
359                continue
360            fileInfo = GetVCSFilenameFromSrcdir(file, srcdir)
361            if fileInfo:
362                vcsFileInfoCache[file] = fileInfo
363                break
364
365    if fileInfo:
366        file = fileInfo.filename
367        root = fileInfo.root
368
369    # we want forward slashes on win32 paths
370    return (file.replace("\\", "/"), root)
371
372
373def validate_install_manifests(install_manifest_args):
374    args = []
375    for arg in install_manifest_args:
376        bits = arg.split(",")
377        if len(bits) != 2:
378            raise ValueError(
379                "Invalid format for --install-manifest: " "specify manifest,target_dir"
380            )
381        manifest_file, destination = [os.path.abspath(b) for b in bits]
382        if not os.path.isfile(manifest_file):
383            raise IOError(errno.ENOENT, "Manifest file not found", manifest_file)
384        if not os.path.isdir(destination):
385            raise IOError(errno.ENOENT, "Install directory not found", destination)
386        try:
387            manifest = InstallManifest(manifest_file)
388        except UnreadableInstallManifest:
389            raise IOError(errno.EINVAL, "Error parsing manifest file", manifest_file)
390        args.append((manifest, destination))
391    return args
392
393
394def make_file_mapping(install_manifests):
395    file_mapping = {}
396    for manifest, destination in install_manifests:
397        destination = os.path.abspath(destination)
398        reg = FileRegistry()
399        manifest.populate_registry(reg)
400        for dst, src in reg:
401            if hasattr(src, "path"):
402                # Any paths that get compared to source file names need to go through realpath.
403                abs_dest = realpath(os.path.join(destination, dst))
404                file_mapping[abs_dest] = realpath(src.path)
405    return file_mapping
406
407
408@memoize
409def get_generated_file_s3_path(filename, rel_path, bucket):
410    """Given a filename, return a path formatted similarly to
411    GetVCSFilename but representing a file available in an s3 bucket."""
412    with open(filename, "rb") as f:
413        path = get_filename_with_digest(rel_path, f.read())
414        return "s3:{bucket}:{path}:".format(bucket=bucket, path=path)
415
416
417def GetPlatformSpecificDumper(**kwargs):
418    """This function simply returns a instance of a subclass of Dumper
419    that is appropriate for the current platform."""
420    return {"WINNT": Dumper_Win32, "Linux": Dumper_Linux, "Darwin": Dumper_Mac}[
421        buildconfig.substs["OS_ARCH"]
422    ](**kwargs)
423
424
425def SourceIndex(fileStream, outputPath, vcs_root):
426    """Takes a list of files, writes info to a data block in a .stream file"""
427    # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
428    # Create the srcsrv data block that indexes the pdb file
429    result = True
430    pdbStreamFile = open(outputPath, "w")
431    pdbStreamFile.write(
432        "SRCSRV: ini ------------------------------------------------\r"
433        + "\nVERSION=2\r\nINDEXVERSION=2\r"
434        + "\nVERCTRL=http\r"
435        + "\nSRCSRV: variables ------------------------------------------\r"
436        + "\nHGSERVER="
437    )
438    pdbStreamFile.write(vcs_root)
439    pdbStreamFile.write(
440        "\r\nSRCSRVVERCTRL=http\r"
441        + "\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r"
442        + "\nSRCSRVTRG=%http_extract_target%\r"
443        + "\nSRCSRV: source files ---------------------------------------\r\n"
444        ""
445    )
446    pdbStreamFile.write(fileStream)
447    # can't do string interpolation because the source server also uses this
448    # so there are % in the above
449    pdbStreamFile.write(
450        "SRCSRV: end ------------------------------------------------\r\n\n"
451    )
452    pdbStreamFile.close()
453    return result
454
455
456class Dumper:
457    """This class can dump symbols from a file with debug info, and
458    store the output in a directory structure that is valid for use as
459    a Breakpad symbol server.  Requires a path to a dump_syms binary--
460    |dump_syms| and a directory to store symbols in--|symbol_path|.
461    Optionally takes a list of processor architectures to process from
462    each debug file--|archs|, the full path to the top source
463    directory--|srcdir|, for generating relative source file names,
464    and an option to copy debug info files alongside the dumped
465    symbol files--|copy_debug|, mostly useful for creating a
466    Microsoft Symbol Server from the resulting output.
467
468    You don't want to use this directly if you intend to process files.
469    Instead, call GetPlatformSpecificDumper to get an instance of a
470    subclass."""
471
472    srcdirRepoInfo = {}
473
474    def __init__(
475        self,
476        dump_syms,
477        symbol_path,
478        archs=None,
479        srcdirs=[],
480        copy_debug=False,
481        vcsinfo=False,
482        srcsrv=False,
483        generated_files=None,
484        s3_bucket=None,
485        file_mapping=None,
486    ):
487        # popen likes absolute paths, at least on windows
488        self.dump_syms = os.path.abspath(dump_syms)
489        self.symbol_path = symbol_path
490        if archs is None:
491            # makes the loop logic simpler
492            self.archs = [""]
493        else:
494            self.archs = ["-a %s" % a for a in archs.split()]
495        # Any paths that get compared to source file names need to go through realpath.
496        self.srcdirs = [realpath(s) for s in srcdirs]
497        self.copy_debug = copy_debug
498        self.vcsinfo = vcsinfo
499        self.srcsrv = srcsrv
500        self.generated_files = generated_files or {}
501        self.s3_bucket = s3_bucket
502        self.file_mapping = file_mapping or {}
503        # Add a static mapping for Rust sources. Since Rust 1.30 official Rust builds map
504        # source paths to start with "/rust/<sha>/".
505        rust_sha = buildconfig.substs["RUSTC_COMMIT"]
506        rust_srcdir = "/rustc/" + rust_sha
507        self.srcdirs.append(rust_srcdir)
508        Dumper.srcdirRepoInfo[rust_srcdir] = GitRepoInfo(
509            rust_srcdir, rust_sha, "https://github.com/rust-lang/rust/"
510        )
511
512    # subclasses override this
513    def ShouldProcess(self, file):
514        return True
515
516    def RunFileCommand(self, file):
517        """Utility function, returns the output of file(1)"""
518        # we use -L to read the targets of symlinks,
519        # and -b to print just the content, not the filename
520        return read_output("file", "-Lb", file)
521
522    # This is a no-op except on Win32
523    def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
524        return ""
525
526    # subclasses override this if they want to support this
527    def CopyExeAndDebugInfo(self, file, debug_file, guid, code_file, code_id):
528        """This function will copy a library or executable and the file holding the
529        debug information to |symbol_path|"""
530        pass
531
532    def Process(self, file_to_process, count_ctors=False):
533        """Process the given file."""
534        if self.ShouldProcess(os.path.abspath(file_to_process)):
535            self.ProcessFile(file_to_process, count_ctors=count_ctors)
536
537    def ProcessFile(self, file, dsymbundle=None, count_ctors=False):
538        """Dump symbols from these files into a symbol file, stored
539        in the proper directory structure in  |symbol_path|; processing is performed
540        asynchronously, and Finish must be called to wait for it complete and cleanup.
541        All files after the first are fallbacks in case the first file does not process
542        successfully; if it does, no other files will be touched."""
543        print("Beginning work for file: %s" % file, file=sys.stderr)
544
545        # tries to get the vcs root from the .mozconfig first - if it's not set
546        # the tinderbox vcs path will be assigned further down
547        vcs_root = os.environ.get("MOZ_SOURCE_REPO")
548        for arch_num, arch in enumerate(self.archs):
549            self.ProcessFileWork(
550                file, arch_num, arch, vcs_root, dsymbundle, count_ctors=count_ctors
551            )
552
553    def dump_syms_cmdline(self, file, arch, dsymbundle=None):
554        """
555        Get the commandline used to invoke dump_syms.
556        """
557        # The Mac dumper overrides this.
558        return [self.dump_syms, file]
559
560    def ProcessFileWork(
561        self, file, arch_num, arch, vcs_root, dsymbundle=None, count_ctors=False
562    ):
563        ctors = 0
564        t_start = time.time()
565        print("Processing file: %s" % file, file=sys.stderr)
566
567        sourceFileStream = ""
568        code_id, code_file = None, None
569        try:
570            cmd = self.dump_syms_cmdline(file, arch, dsymbundle=dsymbundle)
571            print(" ".join(cmd), file=sys.stderr)
572            # We're interested in `stderr` in the case that something goes
573            # wrong with dump_syms, but we don't want to use
574            # `stderr=subprocess.PIPE` here, as that can land us in a
575            # deadlock when we try to read only from `stdout`, below.  The
576            # Python documentation recommends using `communicate()` in such
577            # cases, but `stderr` can be rather large, and we don't want to
578            # waste time accumulating all of it in the non-error case.  So we
579            # completely ignore `stderr` here and capture it separately,
580            # below.
581            proc = subprocess.Popen(
582                cmd,
583                universal_newlines=True,
584                stdout=subprocess.PIPE,
585                stderr=open(os.devnull, "wb"),
586            )
587            try:
588                module_line = next(proc.stdout)
589            except StopIteration:
590                module_line = ""
591            if module_line.startswith("MODULE"):
592                # MODULE os cpu guid debug_file
593                (guid, debug_file) = (module_line.split())[3:5]
594                # strip off .pdb extensions, and append .sym
595                sym_file = re.sub("\.pdb$", "", debug_file) + ".sym"
596                # we do want forward slashes here
597                rel_path = os.path.join(debug_file, guid, sym_file).replace("\\", "/")
598                full_path = os.path.normpath(os.path.join(self.symbol_path, rel_path))
599                try:
600                    os.makedirs(os.path.dirname(full_path))
601                except OSError:  # already exists
602                    pass
603                f = open(full_path, "w")
604                f.write(module_line)
605                # now process the rest of the output
606                for line in proc.stdout:
607                    if line.startswith("FILE"):
608                        # FILE index filename
609                        (x, index, filename) = line.rstrip().split(None, 2)
610                        # We want original file paths for the source server.
611                        sourcepath = filename
612                        filename = realpath(filename)
613                        if filename in self.file_mapping:
614                            filename = self.file_mapping[filename]
615                        if self.vcsinfo:
616                            gen_path = self.generated_files.get(filename)
617                            if gen_path and self.s3_bucket:
618                                filename = get_generated_file_s3_path(
619                                    filename, gen_path, self.s3_bucket
620                                )
621                                rootname = ""
622                            else:
623                                (filename, rootname) = GetVCSFilename(
624                                    filename, self.srcdirs
625                                )
626                            # sets vcs_root in case the loop through files were to end
627                            # on an empty rootname
628                            if vcs_root is None:
629                                if rootname:
630                                    vcs_root = rootname
631                        # gather up files with hg for indexing
632                        if filename.startswith("hg"):
633                            (ver, checkout, source_file, revision) = filename.split(
634                                ":", 3
635                            )
636                            sourceFileStream += sourcepath + "*" + source_file
637                            sourceFileStream += "*" + revision + "\r\n"
638                        f.write("FILE %s %s\n" % (index, filename))
639                    elif line.startswith("INFO CODE_ID "):
640                        # INFO CODE_ID code_id code_file
641                        # This gives some info we can use to
642                        # store binaries in the symbol store.
643                        bits = line.rstrip().split(None, 3)
644                        if len(bits) == 4:
645                            code_id, code_file = bits[2:]
646                        f.write(line)
647                    else:
648                        if count_ctors and line.startswith("FUNC "):
649                            # Static initializers, as created by clang and gcc
650                            # have symbols that start with "_GLOBAL_sub"
651                            if "_GLOBAL__sub_" in line:
652                                ctors += 1
653                            # MSVC creates `dynamic initializer for '...'`
654                            # symbols.
655                            elif "`dynamic initializer for '" in line:
656                                ctors += 1
657
658                        # pass through all other lines unchanged
659                        f.write(line)
660                f.close()
661                retcode = proc.wait()
662                if retcode != 0:
663                    raise RuntimeError("dump_syms failed with error code %d" % retcode)
664                # we output relative paths so callers can get a list of what
665                # was generated
666                print(rel_path)
667                if self.srcsrv and vcs_root:
668                    # add source server indexing to the pdb file
669                    self.SourceServerIndexing(
670                        debug_file, guid, sourceFileStream, vcs_root
671                    )
672                # only copy debug the first time if we have multiple architectures
673                if self.copy_debug and arch_num == 0:
674                    self.CopyExeAndDebugInfo(file, debug_file, guid, code_file, code_id)
675            else:
676                # For some reason, we didn't see the MODULE line as the first
677                # line of output.  It's very possible that the interesting error
678                # message(s) are on stderr, so let's re-execute the process and
679                # capture the entirety of stderr.
680                proc = subprocess.Popen(
681                    cmd, stdout=open(os.devnull, "wb"), stderr=subprocess.PIPE
682                )
683                (_, dumperr) = proc.communicate()
684                retcode = proc.returncode
685                message = [
686                    "dump_syms failed to produce the expected output",
687                    "return code: %d" % retcode,
688                    "first line of output: %s" % module_line,
689                    "stderr: %s" % dumperr,
690                ]
691                raise RuntimeError("\n----------\n".join(message))
692        except Exception as e:
693            print("Unexpected error: %s" % str(e), file=sys.stderr)
694            raise
695
696        if dsymbundle:
697            shutil.rmtree(dsymbundle)
698
699        if count_ctors:
700            import json
701
702            perfherder_data = {
703                "framework": {"name": "build_metrics"},
704                "suites": [
705                    {
706                        "name": "compiler_metrics",
707                        "subtests": [
708                            {
709                                "name": "num_static_constructors",
710                                "value": ctors,
711                                "alertChangeType": "absolute",
712                                "alertThreshold": 3,
713                            }
714                        ],
715                    }
716                ],
717            }
718            perfherder_extra_options = os.environ.get("PERFHERDER_EXTRA_OPTIONS", "")
719            for opt in perfherder_extra_options.split():
720                for suite in perfherder_data["suites"]:
721                    if opt not in suite.get("extraOptions", []):
722                        suite.setdefault("extraOptions", []).append(opt)
723
724            if "asan" not in perfherder_extra_options.lower():
725                print(
726                    "PERFHERDER_DATA: %s" % json.dumps(perfherder_data), file=sys.stderr
727                )
728
729        elapsed = time.time() - t_start
730        print("Finished processing %s in %.2fs" % (file, elapsed), file=sys.stderr)
731
732
733# Platform-specific subclasses.  For the most part, these just have
734# logic to determine what files to extract symbols from.
735
736
737def locate_pdb(path):
738    """Given a path to a binary, attempt to locate the matching pdb file with simple heuristics:
739    * Look for a pdb file with the same base name next to the binary
740    * Look for a pdb file with the same base name in the cwd
741
742    Returns the path to the pdb file if it exists, or None if it could not be located.
743    """
744    path, ext = os.path.splitext(path)
745    pdb = path + ".pdb"
746    if os.path.isfile(pdb):
747        return pdb
748    # If there's no pdb next to the file, see if there's a pdb with the same root name
749    # in the cwd. We build some binaries directly into dist/bin, but put the pdb files
750    # in the relative objdir, which is the cwd when running this script.
751    base = os.path.basename(pdb)
752    pdb = os.path.join(os.getcwd(), base)
753    if os.path.isfile(pdb):
754        return pdb
755    return None
756
757
758class Dumper_Win32(Dumper):
759    fixedFilenameCaseCache = {}
760
761    def ShouldProcess(self, file):
762        """This function will allow processing of exe or dll files that have pdb
763        files with the same base name next to them."""
764        if file.endswith(".exe") or file.endswith(".dll"):
765            if locate_pdb(file) is not None:
766                return True
767        return False
768
769    def CopyExeAndDebugInfo(self, file, debug_file, guid, code_file, code_id):
770        """This function will copy the executable or dll and pdb files to |symbol_path|"""
771        pdb_file = locate_pdb(file)
772
773        rel_path = os.path.join(debug_file, guid, debug_file).replace("\\", "/")
774        full_path = os.path.normpath(os.path.join(self.symbol_path, rel_path))
775        shutil.copyfile(pdb_file, full_path)
776        print(rel_path)
777
778        # Copy the binary file as well
779        if code_file and code_id:
780            full_code_path = os.path.join(os.path.dirname(file), code_file)
781            if os.path.exists(full_code_path):
782                rel_path = os.path.join(code_file, code_id, code_file).replace(
783                    "\\", "/"
784                )
785                full_path = os.path.normpath(os.path.join(self.symbol_path, rel_path))
786                try:
787                    os.makedirs(os.path.dirname(full_path))
788                except OSError as e:
789                    if e.errno != errno.EEXIST:
790                        raise
791                shutil.copyfile(full_code_path, full_path)
792                print(rel_path)
793
794    def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
795        # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
796        streamFilename = debug_file + ".stream"
797        stream_output_path = os.path.abspath(streamFilename)
798        # Call SourceIndex to create the .stream file
799        result = SourceIndex(sourceFileStream, stream_output_path, vcs_root)
800        if self.copy_debug:
801            pdbstr = buildconfig.substs["PDBSTR"]
802            wine = buildconfig.substs.get("WINE")
803            if wine:
804                cmd = [wine, pdbstr]
805            else:
806                cmd = [pdbstr]
807            subprocess.call(
808                cmd
809                + [
810                    "-w",
811                    "-p:" + os.path.basename(debug_file),
812                    "-i:" + os.path.basename(streamFilename),
813                    "-s:srcsrv",
814                ],
815                cwd=os.path.dirname(stream_output_path),
816            )
817            # clean up all the .stream files when done
818            os.remove(stream_output_path)
819        return result
820
821
822class Dumper_Linux(Dumper):
823    objcopy = os.environ["OBJCOPY"] if "OBJCOPY" in os.environ else "objcopy"
824
825    def ShouldProcess(self, file):
826        """This function will allow processing of files that are
827        executable, or end with the .so extension, and additionally
828        file(1) reports as being ELF files.  It expects to find the file
829        command in PATH."""
830        if file.endswith(".so") or os.access(file, os.X_OK):
831            return self.RunFileCommand(file).startswith("ELF")
832        return False
833
834    def CopyExeAndDebugInfo(self, file, debug_file, guid, code_file, code_id):
835        # We want to strip out the debug info, and add a
836        # .gnu_debuglink section to the object, so the debugger can
837        # actually load our debug info later.
838        # In some odd cases, the object might already have an irrelevant
839        # .gnu_debuglink section, and objcopy doesn't want to add one in
840        # such cases, so we make it remove it any existing one first.
841        file_dbg = file + ".dbg"
842        if (
843            subprocess.call([self.objcopy, "--only-keep-debug", file, file_dbg]) == 0
844            and subprocess.call(
845                [
846                    self.objcopy,
847                    "--remove-section",
848                    ".gnu_debuglink",
849                    "--add-gnu-debuglink=%s" % file_dbg,
850                    file,
851                ]
852            )
853            == 0
854        ):
855            rel_path = os.path.join(debug_file, guid, debug_file + ".dbg")
856            full_path = os.path.normpath(os.path.join(self.symbol_path, rel_path))
857            shutil.move(file_dbg, full_path)
858            print(rel_path)
859        else:
860            if os.path.isfile(file_dbg):
861                os.unlink(file_dbg)
862
863
864class Dumper_Solaris(Dumper):
865    def RunFileCommand(self, file):
866        """Utility function, returns the output of file(1)"""
867        try:
868            output = os.popen("file " + file).read()
869            return output.split("\t")[1]
870        except Exception:
871            return ""
872
873    def ShouldProcess(self, file):
874        """This function will allow processing of files that are
875        executable, or end with the .so extension, and additionally
876        file(1) reports as being ELF files.  It expects to find the file
877        command in PATH."""
878        if file.endswith(".so") or os.access(file, os.X_OK):
879            return self.RunFileCommand(file).startswith("ELF")
880        return False
881
882
883class Dumper_Mac(Dumper):
884    def ShouldProcess(self, file):
885        """This function will allow processing of files that are
886        executable, or end with the .dylib extension, and additionally
887        file(1) reports as being Mach-O files.  It expects to find the file
888        command in PATH."""
889        if file.endswith(".dylib") or os.access(file, os.X_OK):
890            return self.RunFileCommand(file).startswith("Mach-O")
891        return False
892
893    def ProcessFile(self, file, count_ctors=False):
894        print("Starting Mac pre-processing on file: %s" % file, file=sys.stderr)
895        dsymbundle = self.GenerateDSYM(file)
896        if dsymbundle:
897            # kick off new jobs per-arch with our new list of files
898            Dumper.ProcessFile(
899                self, file, dsymbundle=dsymbundle, count_ctors=count_ctors
900            )
901
902    def dump_syms_cmdline(self, file, arch, dsymbundle=None):
903        """
904        Get the commandline used to invoke dump_syms.
905        """
906        # dump_syms wants the path to the original binary and the .dSYM
907        # in order to dump all the symbols.
908        if dsymbundle:
909            # This is the .dSYM bundle.
910            return (
911                [self.dump_syms]
912                + arch.split()
913                + ["--type", "macho", "-j", "2", dsymbundle, file]
914            )
915        return Dumper.dump_syms_cmdline(self, file, arch)
916
917    def GenerateDSYM(self, file):
918        """dump_syms on Mac needs to be run on a dSYM bundle produced
919        by dsymutil(1), so run dsymutil here and pass the bundle name
920        down to the superclass method instead."""
921        t_start = time.time()
922        print("Running Mac pre-processing on file: %s" % (file,), file=sys.stderr)
923
924        dsymbundle = file + ".dSYM"
925        if os.path.exists(dsymbundle):
926            shutil.rmtree(dsymbundle)
927        dsymutil = buildconfig.substs["DSYMUTIL"]
928        # dsymutil takes --arch=foo instead of -a foo like everything else
929        cmd = (
930            [dsymutil] + [a.replace("-a ", "--arch=") for a in self.archs if a] + [file]
931        )
932        print(" ".join(cmd), file=sys.stderr)
933
934        dsymutil_proc = subprocess.Popen(
935            cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
936        )
937        dsymout, dsymerr = dsymutil_proc.communicate()
938        if dsymutil_proc.returncode != 0:
939            raise RuntimeError("Error running dsymutil: %s" % dsymerr)
940
941        # Regular dsymutil won't produce a .dSYM for files without symbols.
942        if not os.path.exists(dsymbundle):
943            print("No symbols found in file: %s" % (file,), file=sys.stderr)
944            return False
945
946        # llvm-dsymutil will produce a .dSYM for files without symbols or
947        # debug information, but only sometimes will it warn you about this.
948        # We don't want to run dump_syms on such bundles, because asserts
949        # will fire in debug mode and who knows what will happen in release.
950        #
951        # So we check for the error message and bail if it appears.  If it
952        # doesn't, we carefully check the bundled DWARF to see if dump_syms
953        # will be OK with it.
954        if "warning: no debug symbols in" in dsymerr:
955            print(dsymerr, file=sys.stderr)
956            return False
957
958        contents_dir = os.path.join(dsymbundle, "Contents", "Resources", "DWARF")
959        if not os.path.exists(contents_dir):
960            print(
961                "No DWARF information in .dSYM bundle %s" % (dsymbundle,),
962                file=sys.stderr,
963            )
964            return False
965
966        files = os.listdir(contents_dir)
967        if len(files) != 1:
968            print("Unexpected files in .dSYM bundle %s" % (files,), file=sys.stderr)
969            return False
970
971        otool_out = subprocess.check_output(
972            [buildconfig.substs["OTOOL"], "-l", os.path.join(contents_dir, files[0])],
973            universal_newlines=True,
974        )
975        if "sectname __debug_info" not in otool_out:
976            print("No symbols in .dSYM bundle %s" % (dsymbundle,), file=sys.stderr)
977            return False
978
979        elapsed = time.time() - t_start
980        print("Finished processing %s in %.2fs" % (file, elapsed), file=sys.stderr)
981        return dsymbundle
982
983    def CopyExeAndDebugInfo(self, file, debug_file, guid, code_file, code_id):
984        """ProcessFile has already produced a dSYM bundle, so we should just
985        copy that to the destination directory. However, we'll package it
986        into a .tar because it's a bundle, so it's a directory. |file| here is
987        the original filename."""
988        dsymbundle = file + ".dSYM"
989        rel_path = os.path.join(debug_file, guid, os.path.basename(dsymbundle) + ".tar")
990        full_path = os.path.abspath(os.path.join(self.symbol_path, rel_path))
991        success = subprocess.call(
992            ["tar", "cf", full_path, os.path.basename(dsymbundle)],
993            cwd=os.path.dirname(dsymbundle),
994            stdout=open(os.devnull, "w"),
995            stderr=subprocess.STDOUT,
996        )
997        if success == 0 and os.path.exists(full_path):
998            print(rel_path)
999
1000
1001# Entry point if called as a standalone program
1002
1003
1004def main():
1005    parser = OptionParser(
1006        usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>"
1007    )
1008    parser.add_option(
1009        "-c",
1010        "--copy",
1011        action="store_true",
1012        dest="copy_debug",
1013        default=False,
1014        help="Copy debug info files into the same directory structure as symbol files",
1015    )
1016    parser.add_option(
1017        "-a",
1018        "--archs",
1019        action="store",
1020        dest="archs",
1021        help="Run dump_syms -a <arch> for each space separated"
1022        + "cpu architecture in ARCHS (only on OS X)",
1023    )
1024    parser.add_option(
1025        "-s",
1026        "--srcdir",
1027        action="append",
1028        dest="srcdir",
1029        default=[],
1030        help="Use SRCDIR to determine relative paths to source files",
1031    )
1032    parser.add_option(
1033        "-v",
1034        "--vcs-info",
1035        action="store_true",
1036        dest="vcsinfo",
1037        help="Try to retrieve VCS info for each FILE listed in the output",
1038    )
1039    parser.add_option(
1040        "-i",
1041        "--source-index",
1042        action="store_true",
1043        dest="srcsrv",
1044        default=False,
1045        help="Add source index information to debug files, making them suitable"
1046        + " for use in a source server.",
1047    )
1048    parser.add_option(
1049        "--install-manifest",
1050        action="append",
1051        dest="install_manifests",
1052        default=[],
1053        help="""Use this install manifest to map filenames back
1054to canonical locations in the source repository. Specify
1055<install manifest filename>,<install destination> as a comma-separated pair.""",
1056    )
1057    parser.add_option(
1058        "--count-ctors",
1059        action="store_true",
1060        dest="count_ctors",
1061        default=False,
1062        help="Count static initializers",
1063    )
1064    (options, args) = parser.parse_args()
1065
1066    # check to see if the pdbstr.exe exists
1067    if options.srcsrv:
1068        if "PDBSTR" not in buildconfig.substs:
1069            print("pdbstr was not found by configure.\n", file=sys.stderr)
1070            sys.exit(1)
1071
1072    if len(args) < 3:
1073        parser.error("not enough arguments")
1074        exit(1)
1075
1076    try:
1077        manifests = validate_install_manifests(options.install_manifests)
1078    except (IOError, ValueError) as e:
1079        parser.error(str(e))
1080        exit(1)
1081    file_mapping = make_file_mapping(manifests)
1082    # Any paths that get compared to source file names need to go through realpath.
1083    generated_files = {
1084        realpath(os.path.join(buildconfig.topobjdir, f)): f
1085        for (f, _) in get_generated_sources()
1086    }
1087    _, bucket = get_s3_region_and_bucket()
1088    dumper = GetPlatformSpecificDumper(
1089        dump_syms=args[0],
1090        symbol_path=args[1],
1091        copy_debug=options.copy_debug,
1092        archs=options.archs,
1093        srcdirs=options.srcdir,
1094        vcsinfo=options.vcsinfo,
1095        srcsrv=options.srcsrv,
1096        generated_files=generated_files,
1097        s3_bucket=bucket,
1098        file_mapping=file_mapping,
1099    )
1100
1101    dumper.Process(args[2], options.count_ctors)
1102
1103
1104# run main if run directly
1105if __name__ == "__main__":
1106    main()
1107