1#!/bin/env python
2# This Source Code Form is subject to the terms of the Mozilla Public
3# License, v. 2.0. If a copy of the MPL was not distributed with this
4# file, You can obtain one at http://mozilla.org/MPL/2.0/.
5#
6# Usage: symbolstore.py <params> <dump_syms path> <symbol store path>
7#                                <debug info files or dirs>
8#   Runs dump_syms on each debug info file specified on the command line,
9#   then places the resulting symbol file in the proper directory
10#   structure in the symbol store path.  Accepts multiple files
11#   on the command line, so can be called as part of a pipe using
12#   find <dir> | xargs symbolstore.pl <dump_syms> <storepath>
13#   But really, you might just want to pass it <dir>.
14#
15#   Parameters accepted:
16#     -c           : Copy debug info files to the same directory structure
17#                    as sym files
18#     -a "<archs>" : Run dump_syms -a <arch> for each space separated
19#                    cpu architecture in <archs> (only on OS X)
20#     -s <srcdir>  : Use <srcdir> as the top source directory to
21#                    generate relative filenames.
22
23import sys
24import platform
25import os
26import re
27import shutil
28import textwrap
29import fnmatch
30import subprocess
31import urlparse
32import multiprocessing
33import collections
34from optparse import OptionParser
35from xml.dom.minidom import parse
36
37# Utility classes
38
39class VCSFileInfo:
40    """ A base class for version-controlled file information. Ensures that the
41        following attributes are generated only once (successfully):
42
43            self.root
44            self.clean_root
45            self.revision
46            self.filename
47
48        The attributes are generated by a single call to the GetRoot,
49        GetRevision, and GetFilename methods. Those methods are explicitly not
50        implemented here and must be implemented in derived classes. """
51
52    def __init__(self, file):
53        if not file:
54            raise ValueError
55        self.file = file
56
57    def __getattr__(self, name):
58        """ __getattr__ is only called for attributes that are not set on self,
59            so setting self.[attr] will prevent future calls to the GetRoot,
60            GetRevision, and GetFilename methods. We don't set the values on
61            failure on the off chance that a future call might succeed. """
62
63        if name == "root":
64            root = self.GetRoot()
65            if root:
66                self.root = root
67            return root
68
69        elif name == "clean_root":
70            clean_root = self.GetCleanRoot()
71            if clean_root:
72                self.clean_root = clean_root
73            return clean_root
74
75        elif name == "revision":
76            revision = self.GetRevision()
77            if revision:
78                self.revision = revision
79            return revision
80
81        elif name == "filename":
82            filename = self.GetFilename()
83            if filename:
84                self.filename = filename
85            return filename
86
87        raise AttributeError
88
89    def GetRoot(self):
90        """ This method should return the unmodified root for the file or 'None'
91            on failure. """
92        raise NotImplementedError
93
94    def GetCleanRoot(self):
95        """ This method should return the repository root for the file or 'None'
96            on failure. """
97        raise NotImplementedErrors
98
99    def GetRevision(self):
100        """ This method should return the revision number for the file or 'None'
101            on failure. """
102        raise NotImplementedError
103
104    def GetFilename(self):
105        """ This method should return the repository-specific filename for the
106            file or 'None' on failure. """
107        raise NotImplementedError
108
109
110# This regex separates protocol and optional username/password from a url.
111# For instance, all the following urls will be transformed into
112# 'foo.com/bar':
113#
114#   http://foo.com/bar
115#   svn+ssh://user@foo.com/bar
116#   svn+ssh://user:pass@foo.com/bar
117#
118rootRegex = re.compile(r'^\S+?:/+(?:[^\s/]*@)?(\S+)$')
119
120def read_output(*args):
121    (stdout, _) = subprocess.Popen(args=args, stdout=subprocess.PIPE).communicate()
122    return stdout.rstrip()
123
124class HGRepoInfo:
125    def __init__(self, path):
126        self.path = path
127        rev = read_output('hg', '-R', path,
128                          'parent', '--template={node|short}')
129        # Look for the default hg path.  If SRVSRV_ROOT is set, we
130        # don't bother asking hg.
131        hg_root = os.environ.get("SRCSRV_ROOT")
132        if hg_root:
133            root = hg_root
134        else:
135            root = read_output('hg', '-R', path,
136                               'showconfig', 'paths.default')
137            if not root:
138                print >> sys.stderr, "Failed to get HG Repo for %s" % path
139        cleanroot = None
140        if root:
141            match = rootRegex.match(root)
142            if match:
143                cleanroot = match.group(1)
144                if cleanroot.endswith('/'):
145                    cleanroot = cleanroot[:-1]
146        if cleanroot is None:
147            print >> sys.stderr, textwrap.dedent("""\
148                Could not determine repo info for %s.  This is either not a clone of the web-based
149                repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % path
150            sys.exit(1)
151        self.rev = rev
152        self.cleanroot = cleanroot
153
154    def GetFileInfo(self, file):
155        return HGFileInfo(file, self)
156
157class HGFileInfo(VCSFileInfo):
158    def __init__(self, file, repo):
159        VCSFileInfo.__init__(self, file)
160        self.repo = repo
161        self.file = os.path.relpath(file, repo.path)
162
163    def GetRoot(self):
164        return self.repo.path
165
166    def GetCleanRoot(self):
167        return self.repo.cleanroot
168
169    def GetRevision(self):
170        return self.repo.rev
171
172    def GetFilename(self):
173        if self.revision and self.clean_root:
174            return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision)
175        return self.file
176
177class GitRepoInfo:
178    """
179    Info about a local git repository. Does not currently
180    support discovering info about a git clone, the info must be
181    provided out-of-band.
182    """
183    def __init__(self, path, rev, root):
184        self.path = path
185        cleanroot = None
186        if root:
187            match = rootRegex.match(root)
188            if match:
189                cleanroot = match.group(1)
190                if cleanroot.endswith('/'):
191                    cleanroot = cleanroot[:-1]
192        if cleanroot is None:
193            print >> sys.stderr, textwrap.dedent("""\
194                Could not determine repo info for %s (%s).  This is either not a clone of a web-based
195                repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % (path, root)
196            sys.exit(1)
197        self.rev = rev
198        self.cleanroot = cleanroot
199
200    def GetFileInfo(self, file):
201        return GitFileInfo(file, self)
202
203class GitFileInfo(VCSFileInfo):
204    def __init__(self, file, repo):
205        VCSFileInfo.__init__(self, file)
206        self.repo = repo
207        self.file = os.path.relpath(file, repo.path)
208
209    def GetRoot(self):
210        return self.repo.path
211
212    def GetCleanRoot(self):
213        return self.repo.cleanroot
214
215    def GetRevision(self):
216        return self.repo.rev
217
218    def GetFilename(self):
219        if self.revision and self.clean_root:
220            return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision)
221        return self.file
222
223# Utility functions
224
225# A cache of repo info for each srcdir.
226srcdirRepoInfo = {}
227
228# A cache of files for which VCS info has already been determined. Used to
229# prevent extra filesystem activity or process launching.
230vcsFileInfoCache = {}
231
232def IsInDir(file, dir):
233    # the lower() is to handle win32+vc8, where
234    # the source filenames come out all lowercase,
235    # but the srcdir can be mixed case
236    return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower())
237
238def GetVCSFilenameFromSrcdir(file, srcdir):
239    if srcdir not in srcdirRepoInfo:
240        # Not in cache, so find it adnd cache it
241        if os.path.isdir(os.path.join(srcdir, '.hg')):
242            srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir)
243        else:
244            # Unknown VCS or file is not in a repo.
245            return None
246    return srcdirRepoInfo[srcdir].GetFileInfo(file)
247
248def GetVCSFilename(file, srcdirs):
249    """Given a full path to a file, and the top source directory,
250    look for version control information about this file, and return
251    a tuple containing
252    1) a specially formatted filename that contains the VCS type,
253    VCS location, relative filename, and revision number, formatted like:
254    vcs:vcs location:filename:revision
255    For example:
256    cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36
257    2) the unmodified root information if it exists"""
258    (path, filename) = os.path.split(file)
259    if path == '' or filename == '':
260        return (file, None)
261
262    fileInfo = None
263    root = ''
264    if file in vcsFileInfoCache:
265        # Already cached this info, use it.
266        fileInfo = vcsFileInfoCache[file]
267    else:
268        for srcdir in srcdirs:
269            if not IsInDir(file, srcdir):
270                continue
271            fileInfo = GetVCSFilenameFromSrcdir(file, srcdir)
272            if fileInfo:
273                vcsFileInfoCache[file] = fileInfo
274                break
275
276    if fileInfo:
277        file = fileInfo.filename
278        root = fileInfo.root
279
280    # we want forward slashes on win32 paths
281    return (file.replace("\\", "/"), root)
282
283def GetPlatformSpecificDumper(**kwargs):
284    """This function simply returns a instance of a subclass of Dumper
285    that is appropriate for the current platform."""
286    # Python 2.5 has a bug where platform.system() returns 'Microsoft'.
287    # Remove this when we no longer support Python 2.5.
288    return {'Windows': Dumper_Win32,
289            'Microsoft': Dumper_Win32,
290            'Linux': Dumper_Linux,
291            'Sunos5': Dumper_Solaris,
292            'Darwin': Dumper_Mac}[platform.system()](**kwargs)
293
294def SourceIndex(fileStream, outputPath, vcs_root):
295    """Takes a list of files, writes info to a data block in a .stream file"""
296    # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
297    # Create the srcsrv data block that indexes the pdb file
298    result = True
299    pdbStreamFile = open(outputPath, "w")
300    pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''')
301    pdbStreamFile.write(vcs_root)
302    pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''')
303    pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above
304    pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n")
305    pdbStreamFile.close()
306    return result
307
308def WorkerInitializer(cls, lock):
309    """Windows worker processes won't have run GlobalInit, and due to a lack of fork(),
310    won't inherit the class variables from the parent. The only one they need is the lock,
311    so we run an initializer to set it. Redundant but harmless on other platforms."""
312    cls.lock = lock
313
314def StartProcessFilesWork(dumper, files, arch_num, arch, vcs_root, after, after_arg):
315    """multiprocessing can't handle methods as Process targets, so we define
316    a simple wrapper function around the work method."""
317    return dumper.ProcessFilesWork(files, arch_num, arch, vcs_root, after, after_arg)
318
319class Dumper:
320    """This class can dump symbols from a file with debug info, and
321    store the output in a directory structure that is valid for use as
322    a Breakpad symbol server.  Requires a path to a dump_syms binary--
323    |dump_syms| and a directory to store symbols in--|symbol_path|.
324    Optionally takes a list of processor architectures to process from
325    each debug file--|archs|, the full path to the top source
326    directory--|srcdir|, for generating relative source file names,
327    and an option to copy debug info files alongside the dumped
328    symbol files--|copy_debug|, mostly useful for creating a
329    Microsoft Symbol Server from the resulting output.
330
331    You don't want to use this directly if you intend to call
332    ProcessDir.  Instead, call GetPlatformSpecificDumper to
333    get an instance of a subclass.
334
335    Processing is performed asynchronously via worker processes; in
336    order to wait for processing to finish and cleanup correctly, you
337    must call Finish after all Process/ProcessDir calls have been made.
338    You must also call Dumper.GlobalInit before creating or using any
339    instances."""
340    def __init__(self, dump_syms, symbol_path,
341                 archs=None,
342                 srcdirs=[],
343                 copy_debug=False,
344                 vcsinfo=False,
345                 srcsrv=False,
346                 exclude=[],
347                 repo_manifest=None):
348        # popen likes absolute paths, at least on windows
349        self.dump_syms = os.path.abspath(dump_syms)
350        self.symbol_path = symbol_path
351        if archs is None:
352            # makes the loop logic simpler
353            self.archs = ['']
354        else:
355            self.archs = ['-a %s' % a for a in archs.split()]
356        self.srcdirs = [os.path.normpath(a) for a in srcdirs]
357        self.copy_debug = copy_debug
358        self.vcsinfo = vcsinfo
359        self.srcsrv = srcsrv
360        self.exclude = exclude[:]
361        if repo_manifest:
362            self.parse_repo_manifest(repo_manifest)
363
364        # book-keeping to keep track of our jobs and the cleanup work per file tuple
365        self.files_record = {}
366        self.jobs_record = collections.defaultdict(int)
367
368    @classmethod
369    def GlobalInit(cls, module=multiprocessing):
370        """Initialize the class globals for the multiprocessing setup; must
371        be called before any Dumper instances are created and used. Test cases
372        may pass in a different module to supply Manager and Pool objects,
373        usually multiprocessing.dummy."""
374        num_cpus = module.cpu_count()
375        if num_cpus is None:
376            # assume a dual core machine if we can't find out for some reason
377            # probably better on single core anyway due to I/O constraints
378            num_cpus = 2
379
380        # have to create any locks etc before the pool
381        cls.manager = module.Manager()
382        cls.jobs_condition = Dumper.manager.Condition()
383        cls.lock = Dumper.manager.RLock()
384        cls.pool = module.Pool(num_cpus, WorkerInitializer, (cls, cls.lock))
385
386    def JobStarted(self, file_key):
387        """Increments the number of submitted jobs for the specified key file,
388        defined as the original file we processed; note that a single key file
389        can generate up to 1 + len(self.archs) jobs in the Mac case."""
390        with Dumper.jobs_condition:
391            self.jobs_record[file_key] += 1
392            Dumper.jobs_condition.notify_all()
393
394    def JobFinished(self, file_key):
395        """Decrements the number of submitted jobs for the specified key file,
396        defined as the original file we processed; once the count is back to 0,
397        remove the entry from our record."""
398        with Dumper.jobs_condition:
399            self.jobs_record[file_key] -= 1
400
401            if self.jobs_record[file_key] == 0:
402                del self.jobs_record[file_key]
403
404            Dumper.jobs_condition.notify_all()
405
406    def output(self, dest, output_str):
407        """Writes |output_str| to |dest|, holding |lock|;
408        terminates with a newline."""
409        with Dumper.lock:
410            dest.write(output_str + "\n")
411            dest.flush()
412
413    def output_pid(self, dest, output_str):
414        """Debugging output; prepends the pid to the string."""
415        self.output(dest, "%d: %s" % (os.getpid(), output_str))
416
417    def parse_repo_manifest(self, repo_manifest):
418        """
419        Parse an XML manifest of repository info as produced
420        by the `repo manifest -r` command.
421        """
422        doc = parse(repo_manifest)
423        if doc.firstChild.tagName != "manifest":
424            return
425        # First, get remotes.
426        remotes = dict([(r.getAttribute("name"), r.getAttribute("fetch")) for r in doc.getElementsByTagName("remote")])
427        # And default remote.
428        default_remote = None
429        if doc.getElementsByTagName("default"):
430            default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote")
431        # Now get projects. Assume they're relative to repo_manifest.
432        base_dir = os.path.abspath(os.path.dirname(repo_manifest))
433        for proj in doc.getElementsByTagName("project"):
434            # name is the repository URL relative to the remote path.
435            name = proj.getAttribute("name")
436            # path is the path on-disk, relative to the manifest file.
437            path = proj.getAttribute("path")
438            # revision is the changeset ID.
439            rev = proj.getAttribute("revision")
440            # remote is the base URL to use.
441            remote = proj.getAttribute("remote")
442            # remote defaults to the <default remote>.
443            if not remote:
444                remote = default_remote
445            # path defaults to name.
446            if not path:
447                path = name
448            if not (name and path and rev and remote):
449                print "Skipping project %s" % proj.toxml()
450                continue
451            remote = remotes[remote]
452            # Turn git URLs into http URLs so that urljoin works.
453            if remote.startswith("git:"):
454                remote = "http" + remote[3:]
455            # Add this project to srcdirs.
456            srcdir = os.path.join(base_dir, path)
457            self.srcdirs.append(srcdir)
458            # And cache its VCS file info. Currently all repos mentioned
459            # in a repo manifest are assumed to be git.
460            root = urlparse.urljoin(remote, name)
461            srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root)
462
463    # subclasses override this
464    def ShouldProcess(self, file):
465        return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude)
466
467    # and can override this
468    def ShouldSkipDir(self, dir):
469        return False
470
471    def RunFileCommand(self, file):
472        """Utility function, returns the output of file(1)"""
473        try:
474            # we use -L to read the targets of symlinks,
475            # and -b to print just the content, not the filename
476            return os.popen("file -Lb " + file).read()
477        except:
478            return ""
479
480    # This is a no-op except on Win32
481    def FixFilenameCase(self, file):
482        return file
483
484    # This is a no-op except on Win32
485    def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
486        return ""
487
488    # subclasses override this if they want to support this
489    def CopyDebug(self, file, debug_file, guid):
490        pass
491
492    def Finish(self, stop_pool=True):
493        """Wait for the expected number of jobs to be submitted, and then
494        wait for the pool to finish processing them. By default, will close
495        and clear the pool, but for testcases that need multiple runs, pass
496        stop_pool = False."""
497        with Dumper.jobs_condition:
498            while len(self.jobs_record) != 0:
499                Dumper.jobs_condition.wait()
500        if stop_pool:
501            Dumper.pool.close()
502            Dumper.pool.join()
503
504    def Process(self, file_or_dir):
505        """Process a file or all the (valid) files in a directory; processing is performed
506        asynchronously, and Finish must be called to wait for it complete and cleanup."""
507        if os.path.isdir(file_or_dir) and not self.ShouldSkipDir(file_or_dir):
508            self.ProcessDir(file_or_dir)
509        elif os.path.isfile(file_or_dir):
510            self.ProcessFiles((file_or_dir,))
511
512    def ProcessDir(self, dir):
513        """Process all the valid files in this directory.  Valid files
514        are determined by calling ShouldProcess; processing is performed
515        asynchronously, and Finish must be called to wait for it complete and cleanup."""
516        for root, dirs, files in os.walk(dir):
517            for d in dirs[:]:
518                if self.ShouldSkipDir(d):
519                    dirs.remove(d)
520            for f in files:
521                fullpath = os.path.join(root, f)
522                if self.ShouldProcess(fullpath):
523                    self.ProcessFiles((fullpath,))
524
525    def SubmitJob(self, file_key, func, args, callback):
526        """Submits a job to the pool of workers; increments the number of submitted jobs."""
527        self.JobStarted(file_key)
528        res = Dumper.pool.apply_async(func, args=args, callback=callback)
529
530    def ProcessFilesFinished(self, res):
531        """Callback from multiprocesing when ProcessFilesWork finishes;
532        run the cleanup work, if any"""
533        self.JobFinished(res['files'][-1])
534        # only run the cleanup function once per tuple of files
535        self.files_record[res['files']] += 1
536        if self.files_record[res['files']] == len(self.archs):
537            del self.files_record[res['files']]
538            if res['after']:
539                res['after'](res['status'], res['after_arg'])
540
541    def ProcessFiles(self, files, after=None, after_arg=None):
542        """Dump symbols from these files into a symbol file, stored
543        in the proper directory structure in  |symbol_path|; processing is performed
544        asynchronously, and Finish must be called to wait for it complete and cleanup.
545        All files after the first are fallbacks in case the first file does not process
546        successfully; if it does, no other files will be touched."""
547        self.output_pid(sys.stderr, "Submitting jobs for files: %s" % str(files))
548
549        # tries to get the vcs root from the .mozconfig first - if it's not set
550        # the tinderbox vcs path will be assigned further down
551        vcs_root = os.environ.get("SRCSRV_ROOT")
552        for arch_num, arch in enumerate(self.archs):
553            self.files_record[files] = 0 # record that we submitted jobs for this tuple of files
554            self.SubmitJob(files[-1], StartProcessFilesWork, args=(self, files, arch_num, arch, vcs_root, after, after_arg), callback=self.ProcessFilesFinished)
555
556    def ProcessFilesWork(self, files, arch_num, arch, vcs_root, after, after_arg):
557        self.output_pid(sys.stderr, "Worker processing files: %s" % (files,))
558
559        # our result is a status, a cleanup function, an argument to that function, and the tuple of files we were called on
560        result = { 'status' : False, 'after' : after, 'after_arg' : after_arg, 'files' : files }
561
562        sourceFileStream = ''
563        for file in files:
564            # files is a tuple of files, containing fallbacks in case the first file doesn't process successfully
565            try:
566                proc = subprocess.Popen([self.dump_syms] + arch.split() + [file],
567                                        stdout=subprocess.PIPE)
568                module_line = proc.stdout.next()
569                if module_line.startswith("MODULE"):
570                    # MODULE os cpu guid debug_file
571                    (guid, debug_file) = (module_line.split())[3:5]
572                    # strip off .pdb extensions, and append .sym
573                    sym_file = re.sub("\.pdb$", "", debug_file) + ".sym"
574                    # we do want forward slashes here
575                    rel_path = os.path.join(debug_file,
576                                            guid,
577                                            sym_file).replace("\\", "/")
578                    full_path = os.path.normpath(os.path.join(self.symbol_path,
579                                                              rel_path))
580                    try:
581                        os.makedirs(os.path.dirname(full_path))
582                    except OSError: # already exists
583                        pass
584                    f = open(full_path, "w")
585                    f.write(module_line)
586                    # now process the rest of the output
587                    for line in proc.stdout:
588                        if line.startswith("FILE"):
589                            # FILE index filename
590                            (x, index, filename) = line.rstrip().split(None, 2)
591                            if sys.platform == "sunos5":
592                                for srcdir in self.srcdirs:
593                                    start = filename.find(self.srcdir)
594                                    if start != -1:
595                                        filename = filename[start:]
596                                        break
597                            filename = self.FixFilenameCase(filename)
598                            sourcepath = filename
599                            if self.vcsinfo:
600                                (filename, rootname) = GetVCSFilename(filename, self.srcdirs)
601                                # sets vcs_root in case the loop through files were to end on an empty rootname
602                                if vcs_root is None:
603                                  if rootname:
604                                     vcs_root = rootname
605                            # gather up files with hg for indexing
606                            if filename.startswith("hg"):
607                                (ver, checkout, source_file, revision) = filename.split(":", 3)
608                                sourceFileStream += sourcepath + "*" + source_file + '*' + revision + "\r\n"
609                            f.write("FILE %s %s\n" % (index, filename))
610                        else:
611                            # pass through all other lines unchanged
612                            f.write(line)
613                            # we want to return true only if at least one line is not a MODULE or FILE line
614                            result['status'] = True
615                    f.close()
616                    proc.wait()
617                    # we output relative paths so callers can get a list of what
618                    # was generated
619                    self.output(sys.stdout, rel_path)
620                    if self.srcsrv and vcs_root:
621                        # add source server indexing to the pdb file
622                        self.SourceServerIndexing(file, guid, sourceFileStream, vcs_root)
623                    # only copy debug the first time if we have multiple architectures
624                    if self.copy_debug and arch_num == 0:
625                        self.CopyDebug(file, debug_file, guid)
626            except StopIteration:
627                pass
628            except e:
629                self.output(sys.stderr, "Unexpected error: %s" % (str(e),))
630                raise
631            if result['status']:
632                # we only need 1 file to work
633                break
634        return result
635
636# Platform-specific subclasses.  For the most part, these just have
637# logic to determine what files to extract symbols from.
638
639class Dumper_Win32(Dumper):
640    fixedFilenameCaseCache = {}
641
642    def ShouldProcess(self, file):
643        """This function will allow processing of pdb files that have dll
644        or exe files with the same base name next to them."""
645        if not Dumper.ShouldProcess(self, file):
646            return False
647        if file.endswith(".pdb"):
648            (path,ext) = os.path.splitext(file)
649            if os.path.isfile(path + ".exe") or os.path.isfile(path + ".dll"):
650                return True
651        return False
652
653    def FixFilenameCase(self, file):
654        """Recent versions of Visual C++ put filenames into
655        PDB files as all lowercase.  If the file exists
656        on the local filesystem, fix it."""
657
658        # Use a cached version if we have one.
659        if file in self.fixedFilenameCaseCache:
660            return self.fixedFilenameCaseCache[file]
661
662        result = file
663
664        (path, filename) = os.path.split(file)
665        if os.path.isdir(path):
666            lc_filename = filename.lower()
667            for f in os.listdir(path):
668                if f.lower() == lc_filename:
669                    result = os.path.join(path, f)
670                    break
671
672        # Cache the corrected version to avoid future filesystem hits.
673        self.fixedFilenameCaseCache[file] = result
674        return result
675
676    def CopyDebug(self, file, debug_file, guid):
677        rel_path = os.path.join(debug_file,
678                                guid,
679                                debug_file).replace("\\", "/")
680        full_path = os.path.normpath(os.path.join(self.symbol_path,
681                                                  rel_path))
682        shutil.copyfile(file, full_path)
683        # try compressing it
684        compressed_file = os.path.splitext(full_path)[0] + ".pd_"
685        # ignore makecab's output
686        success = subprocess.call(["makecab.exe", "/D", "CompressionType=LZX", "/D",
687                                   "CompressionMemory=21",
688                                   full_path, compressed_file],
689                                  stdout=open("NUL:","w"), stderr=subprocess.STDOUT)
690        if success == 0 and os.path.exists(compressed_file):
691            os.unlink(full_path)
692            self.output(sys.stdout, os.path.splitext(rel_path)[0] + ".pd_")
693        else:
694            self.output(sys.stdout, rel_path)
695
696    def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
697        # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
698        debug_file = os.path.abspath(debug_file)
699        streamFilename = debug_file + ".stream"
700        stream_output_path = os.path.abspath(streamFilename)
701        # Call SourceIndex to create the .stream file
702        result = SourceIndex(sourceFileStream, stream_output_path, vcs_root)
703        if self.copy_debug:
704            pdbstr_path = os.environ.get("PDBSTR_PATH")
705            pdbstr = os.path.normpath(pdbstr_path)
706            subprocess.call([pdbstr, "-w", "-p:" + os.path.basename(debug_file),
707                             "-i:" + os.path.basename(streamFilename), "-s:srcsrv"],
708                            cwd=os.path.dirname(stream_output_path))
709            # clean up all the .stream files when done
710            os.remove(stream_output_path)
711        return result
712
713class Dumper_Linux(Dumper):
714    objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy'
715    def ShouldProcess(self, file):
716        """This function will allow processing of files that are
717        executable, or end with the .so extension, and additionally
718        file(1) reports as being ELF files.  It expects to find the file
719        command in PATH."""
720        if not Dumper.ShouldProcess(self, file):
721            return False
722        if file.endswith(".so") or os.access(file, os.X_OK):
723            return self.RunFileCommand(file).startswith("ELF")
724        return False
725
726    def CopyDebug(self, file, debug_file, guid):
727        # We want to strip out the debug info, and add a
728        # .gnu_debuglink section to the object, so the debugger can
729        # actually load our debug info later.
730        file_dbg = file + ".dbg"
731        if subprocess.call([self.objcopy, '--only-keep-debug', file, file_dbg]) == 0 and \
732           subprocess.call([self.objcopy, '--add-gnu-debuglink=%s' % file_dbg, file]) == 0:
733            rel_path = os.path.join(debug_file,
734                                    guid,
735                                    debug_file + ".dbg")
736            full_path = os.path.normpath(os.path.join(self.symbol_path,
737                                                      rel_path))
738            shutil.move(file_dbg, full_path)
739            # gzip the shipped debug files
740            os.system("gzip %s" % full_path)
741            self.output(sys.stdout, rel_path + ".gz")
742        else:
743            if os.path.isfile(file_dbg):
744                os.unlink(file_dbg)
745
746class Dumper_Solaris(Dumper):
747    def RunFileCommand(self, file):
748        """Utility function, returns the output of file(1)"""
749        try:
750            output = os.popen("file " + file).read()
751            return output.split('\t')[1];
752        except:
753            return ""
754
755    def ShouldProcess(self, file):
756        """This function will allow processing of files that are
757        executable, or end with the .so extension, and additionally
758        file(1) reports as being ELF files.  It expects to find the file
759        command in PATH."""
760        if not Dumper.ShouldProcess(self, file):
761            return False
762        if file.endswith(".so") or os.access(file, os.X_OK):
763            return self.RunFileCommand(file).startswith("ELF")
764        return False
765
766def StartProcessFilesWorkMac(dumper, file):
767    """multiprocessing can't handle methods as Process targets, so we define
768    a simple wrapper function around the work method."""
769    return dumper.ProcessFilesWorkMac(file)
770
771def AfterMac(status, dsymbundle):
772    """Cleanup function to run on Macs after we process the file(s)."""
773    # CopyDebug will already have been run from Dumper.ProcessFiles
774    shutil.rmtree(dsymbundle)
775
776class Dumper_Mac(Dumper):
777    def ShouldProcess(self, file):
778        """This function will allow processing of files that are
779        executable, or end with the .dylib extension, and additionally
780        file(1) reports as being Mach-O files.  It expects to find the file
781        command in PATH."""
782        if not Dumper.ShouldProcess(self, file):
783            return False
784        if file.endswith(".dylib") or os.access(file, os.X_OK):
785            return self.RunFileCommand(file).startswith("Mach-O")
786        return False
787
788    def ShouldSkipDir(self, dir):
789        """We create .dSYM bundles on the fly, but if someone runs
790        buildsymbols twice, we should skip any bundles we created
791        previously, otherwise we'll recurse into them and try to
792        dump the inner bits again."""
793        if dir.endswith(".dSYM"):
794            return True
795        return False
796
797    def ProcessFiles(self, files, after=None, after_arg=None):
798        # also note, files must be len 1 here, since we're the only ones
799        # that ever add more than one file to the list
800        self.output_pid(sys.stderr, "Submitting job for Mac pre-processing on file: %s" % (files[0]))
801        self.SubmitJob(files[0], StartProcessFilesWorkMac, args=(self, files[0]), callback=self.ProcessFilesMacFinished)
802
803    def ProcessFilesMacFinished(self, result):
804        if result['status']:
805            # kick off new jobs per-arch with our new list of files
806            Dumper.ProcessFiles(self, result['files'], after=AfterMac, after_arg=result['files'][0])
807        # only decrement jobs *after* that, since otherwise we'll remove the record for this file
808        self.JobFinished(result['files'][-1])
809
810    def ProcessFilesWorkMac(self, file):
811        """dump_syms on Mac needs to be run on a dSYM bundle produced
812        by dsymutil(1), so run dsymutil here and pass the bundle name
813        down to the superclass method instead."""
814        self.output_pid(sys.stderr, "Worker running Mac pre-processing on file: %s" % (file,))
815
816        # our return is a status and a tuple of files to dump symbols for
817        # the extra files are fallbacks; as soon as one is dumped successfully, we stop
818        result = { 'status' : False, 'files' : None, 'file_key' : file }
819        dsymbundle = file + ".dSYM"
820        if os.path.exists(dsymbundle):
821            shutil.rmtree(dsymbundle)
822        # dsymutil takes --arch=foo instead of -a foo like everything else
823        subprocess.call(["dsymutil"] + [a.replace('-a ', '--arch=') for a in self.archs if a]
824                        + [file],
825                        stdout=open("/dev/null","w"))
826        if not os.path.exists(dsymbundle):
827            # dsymutil won't produce a .dSYM for files without symbols
828            result['status'] = False
829            return result
830
831        result['status'] = True
832        result['files'] = (dsymbundle, file)
833        return result
834
835    def CopyDebug(self, file, debug_file, guid):
836        """ProcessFiles has already produced a dSYM bundle, so we should just
837        copy that to the destination directory. However, we'll package it
838        into a .tar.bz2 because the debug symbols are pretty huge, and
839        also because it's a bundle, so it's a directory. |file| here is the
840        dSYM bundle, and |debug_file| is the original filename."""
841        rel_path = os.path.join(debug_file,
842                                guid,
843                                os.path.basename(file) + ".tar.bz2")
844        full_path = os.path.abspath(os.path.join(self.symbol_path,
845                                                  rel_path))
846        success = subprocess.call(["tar", "cjf", full_path, os.path.basename(file)],
847                                  cwd=os.path.dirname(file),
848                                  stdout=open("/dev/null","w"), stderr=subprocess.STDOUT)
849        if success == 0 and os.path.exists(full_path):
850            self.output(sys.stdout, rel_path)
851
852# Entry point if called as a standalone program
853def main():
854    parser = OptionParser(usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>")
855    parser.add_option("-c", "--copy",
856                      action="store_true", dest="copy_debug", default=False,
857                      help="Copy debug info files into the same directory structure as symbol files")
858    parser.add_option("-a", "--archs",
859                      action="store", dest="archs",
860                      help="Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on OS X)")
861    parser.add_option("-s", "--srcdir",
862                      action="append", dest="srcdir", default=[],
863                      help="Use SRCDIR to determine relative paths to source files")
864    parser.add_option("-v", "--vcs-info",
865                      action="store_true", dest="vcsinfo",
866                      help="Try to retrieve VCS info for each FILE listed in the output")
867    parser.add_option("-i", "--source-index",
868                      action="store_true", dest="srcsrv", default=False,
869                      help="Add source index information to debug files, making them suitable for use in a source server.")
870    parser.add_option("-x", "--exclude",
871                      action="append", dest="exclude", default=[], metavar="PATTERN",
872                      help="Skip processing files matching PATTERN.")
873    parser.add_option("--repo-manifest",
874                      action="store", dest="repo_manifest",
875                      help="""Get source information from this XML manifest
876produced by the `repo manifest -r` command.
877""")
878    (options, args) = parser.parse_args()
879
880    #check to see if the pdbstr.exe exists
881    if options.srcsrv:
882        pdbstr = os.environ.get("PDBSTR_PATH")
883        if not os.path.exists(pdbstr):
884            print >> sys.stderr, "Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n"
885            sys.exit(1)
886
887    if len(args) < 3:
888        parser.error("not enough arguments")
889        exit(1)
890
891    dumper = GetPlatformSpecificDumper(dump_syms=args[0],
892                                       symbol_path=args[1],
893                                       copy_debug=options.copy_debug,
894                                       archs=options.archs,
895                                       srcdirs=options.srcdir,
896                                       vcsinfo=options.vcsinfo,
897                                       srcsrv=options.srcsrv,
898                                       exclude=options.exclude,
899                                       repo_manifest=options.repo_manifest)
900    for arg in args[2:]:
901        dumper.Process(arg)
902    dumper.Finish()
903
904# run main if run directly
905if __name__ == "__main__":
906    # set up the multiprocessing infrastructure before we start;
907    # note that this needs to be in the __main__ guard, or else Windows will choke
908    Dumper.GlobalInit()
909
910    main()
911