1#!/bin/env python 2# This Source Code Form is subject to the terms of the Mozilla Public 3# License, v. 2.0. If a copy of the MPL was not distributed with this 4# file, You can obtain one at http://mozilla.org/MPL/2.0/. 5# 6# Usage: symbolstore.py <params> <dump_syms path> <symbol store path> 7# <debug info files or dirs> 8# Runs dump_syms on each debug info file specified on the command line, 9# then places the resulting symbol file in the proper directory 10# structure in the symbol store path. Accepts multiple files 11# on the command line, so can be called as part of a pipe using 12# find <dir> | xargs symbolstore.pl <dump_syms> <storepath> 13# But really, you might just want to pass it <dir>. 14# 15# Parameters accepted: 16# -c : Copy debug info files to the same directory structure 17# as sym files 18# -a "<archs>" : Run dump_syms -a <arch> for each space separated 19# cpu architecture in <archs> (only on OS X) 20# -s <srcdir> : Use <srcdir> as the top source directory to 21# generate relative filenames. 22 23import sys 24import platform 25import os 26import re 27import shutil 28import textwrap 29import fnmatch 30import subprocess 31import urlparse 32import multiprocessing 33import collections 34from optparse import OptionParser 35from xml.dom.minidom import parse 36 37# Utility classes 38 39class VCSFileInfo: 40 """ A base class for version-controlled file information. Ensures that the 41 following attributes are generated only once (successfully): 42 43 self.root 44 self.clean_root 45 self.revision 46 self.filename 47 48 The attributes are generated by a single call to the GetRoot, 49 GetRevision, and GetFilename methods. Those methods are explicitly not 50 implemented here and must be implemented in derived classes. """ 51 52 def __init__(self, file): 53 if not file: 54 raise ValueError 55 self.file = file 56 57 def __getattr__(self, name): 58 """ __getattr__ is only called for attributes that are not set on self, 59 so setting self.[attr] will prevent future calls to the GetRoot, 60 GetRevision, and GetFilename methods. We don't set the values on 61 failure on the off chance that a future call might succeed. """ 62 63 if name == "root": 64 root = self.GetRoot() 65 if root: 66 self.root = root 67 return root 68 69 elif name == "clean_root": 70 clean_root = self.GetCleanRoot() 71 if clean_root: 72 self.clean_root = clean_root 73 return clean_root 74 75 elif name == "revision": 76 revision = self.GetRevision() 77 if revision: 78 self.revision = revision 79 return revision 80 81 elif name == "filename": 82 filename = self.GetFilename() 83 if filename: 84 self.filename = filename 85 return filename 86 87 raise AttributeError 88 89 def GetRoot(self): 90 """ This method should return the unmodified root for the file or 'None' 91 on failure. """ 92 raise NotImplementedError 93 94 def GetCleanRoot(self): 95 """ This method should return the repository root for the file or 'None' 96 on failure. """ 97 raise NotImplementedErrors 98 99 def GetRevision(self): 100 """ This method should return the revision number for the file or 'None' 101 on failure. """ 102 raise NotImplementedError 103 104 def GetFilename(self): 105 """ This method should return the repository-specific filename for the 106 file or 'None' on failure. """ 107 raise NotImplementedError 108 109 110# This regex separates protocol and optional username/password from a url. 111# For instance, all the following urls will be transformed into 112# 'foo.com/bar': 113# 114# http://foo.com/bar 115# svn+ssh://user@foo.com/bar 116# svn+ssh://user:pass@foo.com/bar 117# 118rootRegex = re.compile(r'^\S+?:/+(?:[^\s/]*@)?(\S+)$') 119 120def read_output(*args): 121 (stdout, _) = subprocess.Popen(args=args, stdout=subprocess.PIPE).communicate() 122 return stdout.rstrip() 123 124class HGRepoInfo: 125 def __init__(self, path): 126 self.path = path 127 rev = read_output('hg', '-R', path, 128 'parent', '--template={node|short}') 129 # Look for the default hg path. If SRVSRV_ROOT is set, we 130 # don't bother asking hg. 131 hg_root = os.environ.get("SRCSRV_ROOT") 132 if hg_root: 133 root = hg_root 134 else: 135 root = read_output('hg', '-R', path, 136 'showconfig', 'paths.default') 137 if not root: 138 print >> sys.stderr, "Failed to get HG Repo for %s" % path 139 cleanroot = None 140 if root: 141 match = rootRegex.match(root) 142 if match: 143 cleanroot = match.group(1) 144 if cleanroot.endswith('/'): 145 cleanroot = cleanroot[:-1] 146 if cleanroot is None: 147 print >> sys.stderr, textwrap.dedent("""\ 148 Could not determine repo info for %s. This is either not a clone of the web-based 149 repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % path 150 sys.exit(1) 151 self.rev = rev 152 self.cleanroot = cleanroot 153 154 def GetFileInfo(self, file): 155 return HGFileInfo(file, self) 156 157class HGFileInfo(VCSFileInfo): 158 def __init__(self, file, repo): 159 VCSFileInfo.__init__(self, file) 160 self.repo = repo 161 self.file = os.path.relpath(file, repo.path) 162 163 def GetRoot(self): 164 return self.repo.path 165 166 def GetCleanRoot(self): 167 return self.repo.cleanroot 168 169 def GetRevision(self): 170 return self.repo.rev 171 172 def GetFilename(self): 173 if self.revision and self.clean_root: 174 return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision) 175 return self.file 176 177class GitRepoInfo: 178 """ 179 Info about a local git repository. Does not currently 180 support discovering info about a git clone, the info must be 181 provided out-of-band. 182 """ 183 def __init__(self, path, rev, root): 184 self.path = path 185 cleanroot = None 186 if root: 187 match = rootRegex.match(root) 188 if match: 189 cleanroot = match.group(1) 190 if cleanroot.endswith('/'): 191 cleanroot = cleanroot[:-1] 192 if cleanroot is None: 193 print >> sys.stderr, textwrap.dedent("""\ 194 Could not determine repo info for %s (%s). This is either not a clone of a web-based 195 repository, or you have not specified SRCSRV_ROOT, or the clone is corrupt.""") % (path, root) 196 sys.exit(1) 197 self.rev = rev 198 self.cleanroot = cleanroot 199 200 def GetFileInfo(self, file): 201 return GitFileInfo(file, self) 202 203class GitFileInfo(VCSFileInfo): 204 def __init__(self, file, repo): 205 VCSFileInfo.__init__(self, file) 206 self.repo = repo 207 self.file = os.path.relpath(file, repo.path) 208 209 def GetRoot(self): 210 return self.repo.path 211 212 def GetCleanRoot(self): 213 return self.repo.cleanroot 214 215 def GetRevision(self): 216 return self.repo.rev 217 218 def GetFilename(self): 219 if self.revision and self.clean_root: 220 return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision) 221 return self.file 222 223# Utility functions 224 225# A cache of repo info for each srcdir. 226srcdirRepoInfo = {} 227 228# A cache of files for which VCS info has already been determined. Used to 229# prevent extra filesystem activity or process launching. 230vcsFileInfoCache = {} 231 232def IsInDir(file, dir): 233 # the lower() is to handle win32+vc8, where 234 # the source filenames come out all lowercase, 235 # but the srcdir can be mixed case 236 return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower()) 237 238def GetVCSFilenameFromSrcdir(file, srcdir): 239 if srcdir not in srcdirRepoInfo: 240 # Not in cache, so find it adnd cache it 241 if os.path.isdir(os.path.join(srcdir, '.hg')): 242 srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir) 243 else: 244 # Unknown VCS or file is not in a repo. 245 return None 246 return srcdirRepoInfo[srcdir].GetFileInfo(file) 247 248def GetVCSFilename(file, srcdirs): 249 """Given a full path to a file, and the top source directory, 250 look for version control information about this file, and return 251 a tuple containing 252 1) a specially formatted filename that contains the VCS type, 253 VCS location, relative filename, and revision number, formatted like: 254 vcs:vcs location:filename:revision 255 For example: 256 cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36 257 2) the unmodified root information if it exists""" 258 (path, filename) = os.path.split(file) 259 if path == '' or filename == '': 260 return (file, None) 261 262 fileInfo = None 263 root = '' 264 if file in vcsFileInfoCache: 265 # Already cached this info, use it. 266 fileInfo = vcsFileInfoCache[file] 267 else: 268 for srcdir in srcdirs: 269 if not IsInDir(file, srcdir): 270 continue 271 fileInfo = GetVCSFilenameFromSrcdir(file, srcdir) 272 if fileInfo: 273 vcsFileInfoCache[file] = fileInfo 274 break 275 276 if fileInfo: 277 file = fileInfo.filename 278 root = fileInfo.root 279 280 # we want forward slashes on win32 paths 281 return (file.replace("\\", "/"), root) 282 283def GetPlatformSpecificDumper(**kwargs): 284 """This function simply returns a instance of a subclass of Dumper 285 that is appropriate for the current platform.""" 286 # Python 2.5 has a bug where platform.system() returns 'Microsoft'. 287 # Remove this when we no longer support Python 2.5. 288 return {'Windows': Dumper_Win32, 289 'Microsoft': Dumper_Win32, 290 'Linux': Dumper_Linux, 291 'Sunos5': Dumper_Solaris, 292 'Darwin': Dumper_Mac}[platform.system()](**kwargs) 293 294def SourceIndex(fileStream, outputPath, vcs_root): 295 """Takes a list of files, writes info to a data block in a .stream file""" 296 # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing 297 # Create the srcsrv data block that indexes the pdb file 298 result = True 299 pdbStreamFile = open(outputPath, "w") 300 pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''') 301 pdbStreamFile.write(vcs_root) 302 pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''') 303 pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above 304 pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n") 305 pdbStreamFile.close() 306 return result 307 308def WorkerInitializer(cls, lock): 309 """Windows worker processes won't have run GlobalInit, and due to a lack of fork(), 310 won't inherit the class variables from the parent. The only one they need is the lock, 311 so we run an initializer to set it. Redundant but harmless on other platforms.""" 312 cls.lock = lock 313 314def StartProcessFilesWork(dumper, files, arch_num, arch, vcs_root, after, after_arg): 315 """multiprocessing can't handle methods as Process targets, so we define 316 a simple wrapper function around the work method.""" 317 return dumper.ProcessFilesWork(files, arch_num, arch, vcs_root, after, after_arg) 318 319class Dumper: 320 """This class can dump symbols from a file with debug info, and 321 store the output in a directory structure that is valid for use as 322 a Breakpad symbol server. Requires a path to a dump_syms binary-- 323 |dump_syms| and a directory to store symbols in--|symbol_path|. 324 Optionally takes a list of processor architectures to process from 325 each debug file--|archs|, the full path to the top source 326 directory--|srcdir|, for generating relative source file names, 327 and an option to copy debug info files alongside the dumped 328 symbol files--|copy_debug|, mostly useful for creating a 329 Microsoft Symbol Server from the resulting output. 330 331 You don't want to use this directly if you intend to call 332 ProcessDir. Instead, call GetPlatformSpecificDumper to 333 get an instance of a subclass. 334 335 Processing is performed asynchronously via worker processes; in 336 order to wait for processing to finish and cleanup correctly, you 337 must call Finish after all Process/ProcessDir calls have been made. 338 You must also call Dumper.GlobalInit before creating or using any 339 instances.""" 340 def __init__(self, dump_syms, symbol_path, 341 archs=None, 342 srcdirs=[], 343 copy_debug=False, 344 vcsinfo=False, 345 srcsrv=False, 346 exclude=[], 347 repo_manifest=None): 348 # popen likes absolute paths, at least on windows 349 self.dump_syms = os.path.abspath(dump_syms) 350 self.symbol_path = symbol_path 351 if archs is None: 352 # makes the loop logic simpler 353 self.archs = [''] 354 else: 355 self.archs = ['-a %s' % a for a in archs.split()] 356 self.srcdirs = [os.path.normpath(a) for a in srcdirs] 357 self.copy_debug = copy_debug 358 self.vcsinfo = vcsinfo 359 self.srcsrv = srcsrv 360 self.exclude = exclude[:] 361 if repo_manifest: 362 self.parse_repo_manifest(repo_manifest) 363 364 # book-keeping to keep track of our jobs and the cleanup work per file tuple 365 self.files_record = {} 366 self.jobs_record = collections.defaultdict(int) 367 368 @classmethod 369 def GlobalInit(cls, module=multiprocessing): 370 """Initialize the class globals for the multiprocessing setup; must 371 be called before any Dumper instances are created and used. Test cases 372 may pass in a different module to supply Manager and Pool objects, 373 usually multiprocessing.dummy.""" 374 num_cpus = module.cpu_count() 375 if num_cpus is None: 376 # assume a dual core machine if we can't find out for some reason 377 # probably better on single core anyway due to I/O constraints 378 num_cpus = 2 379 380 # have to create any locks etc before the pool 381 cls.manager = module.Manager() 382 cls.jobs_condition = Dumper.manager.Condition() 383 cls.lock = Dumper.manager.RLock() 384 cls.pool = module.Pool(num_cpus, WorkerInitializer, (cls, cls.lock)) 385 386 def JobStarted(self, file_key): 387 """Increments the number of submitted jobs for the specified key file, 388 defined as the original file we processed; note that a single key file 389 can generate up to 1 + len(self.archs) jobs in the Mac case.""" 390 with Dumper.jobs_condition: 391 self.jobs_record[file_key] += 1 392 Dumper.jobs_condition.notify_all() 393 394 def JobFinished(self, file_key): 395 """Decrements the number of submitted jobs for the specified key file, 396 defined as the original file we processed; once the count is back to 0, 397 remove the entry from our record.""" 398 with Dumper.jobs_condition: 399 self.jobs_record[file_key] -= 1 400 401 if self.jobs_record[file_key] == 0: 402 del self.jobs_record[file_key] 403 404 Dumper.jobs_condition.notify_all() 405 406 def output(self, dest, output_str): 407 """Writes |output_str| to |dest|, holding |lock|; 408 terminates with a newline.""" 409 with Dumper.lock: 410 dest.write(output_str + "\n") 411 dest.flush() 412 413 def output_pid(self, dest, output_str): 414 """Debugging output; prepends the pid to the string.""" 415 self.output(dest, "%d: %s" % (os.getpid(), output_str)) 416 417 def parse_repo_manifest(self, repo_manifest): 418 """ 419 Parse an XML manifest of repository info as produced 420 by the `repo manifest -r` command. 421 """ 422 doc = parse(repo_manifest) 423 if doc.firstChild.tagName != "manifest": 424 return 425 # First, get remotes. 426 remotes = dict([(r.getAttribute("name"), r.getAttribute("fetch")) for r in doc.getElementsByTagName("remote")]) 427 # And default remote. 428 default_remote = None 429 if doc.getElementsByTagName("default"): 430 default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote") 431 # Now get projects. Assume they're relative to repo_manifest. 432 base_dir = os.path.abspath(os.path.dirname(repo_manifest)) 433 for proj in doc.getElementsByTagName("project"): 434 # name is the repository URL relative to the remote path. 435 name = proj.getAttribute("name") 436 # path is the path on-disk, relative to the manifest file. 437 path = proj.getAttribute("path") 438 # revision is the changeset ID. 439 rev = proj.getAttribute("revision") 440 # remote is the base URL to use. 441 remote = proj.getAttribute("remote") 442 # remote defaults to the <default remote>. 443 if not remote: 444 remote = default_remote 445 # path defaults to name. 446 if not path: 447 path = name 448 if not (name and path and rev and remote): 449 print "Skipping project %s" % proj.toxml() 450 continue 451 remote = remotes[remote] 452 # Turn git URLs into http URLs so that urljoin works. 453 if remote.startswith("git:"): 454 remote = "http" + remote[3:] 455 # Add this project to srcdirs. 456 srcdir = os.path.join(base_dir, path) 457 self.srcdirs.append(srcdir) 458 # And cache its VCS file info. Currently all repos mentioned 459 # in a repo manifest are assumed to be git. 460 root = urlparse.urljoin(remote, name) 461 srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root) 462 463 # subclasses override this 464 def ShouldProcess(self, file): 465 return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude) 466 467 # and can override this 468 def ShouldSkipDir(self, dir): 469 return False 470 471 def RunFileCommand(self, file): 472 """Utility function, returns the output of file(1)""" 473 try: 474 # we use -L to read the targets of symlinks, 475 # and -b to print just the content, not the filename 476 return os.popen("file -Lb " + file).read() 477 except: 478 return "" 479 480 # This is a no-op except on Win32 481 def FixFilenameCase(self, file): 482 return file 483 484 # This is a no-op except on Win32 485 def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root): 486 return "" 487 488 # subclasses override this if they want to support this 489 def CopyDebug(self, file, debug_file, guid): 490 pass 491 492 def Finish(self, stop_pool=True): 493 """Wait for the expected number of jobs to be submitted, and then 494 wait for the pool to finish processing them. By default, will close 495 and clear the pool, but for testcases that need multiple runs, pass 496 stop_pool = False.""" 497 with Dumper.jobs_condition: 498 while len(self.jobs_record) != 0: 499 Dumper.jobs_condition.wait() 500 if stop_pool: 501 Dumper.pool.close() 502 Dumper.pool.join() 503 504 def Process(self, file_or_dir): 505 """Process a file or all the (valid) files in a directory; processing is performed 506 asynchronously, and Finish must be called to wait for it complete and cleanup.""" 507 if os.path.isdir(file_or_dir) and not self.ShouldSkipDir(file_or_dir): 508 self.ProcessDir(file_or_dir) 509 elif os.path.isfile(file_or_dir): 510 self.ProcessFiles((file_or_dir,)) 511 512 def ProcessDir(self, dir): 513 """Process all the valid files in this directory. Valid files 514 are determined by calling ShouldProcess; processing is performed 515 asynchronously, and Finish must be called to wait for it complete and cleanup.""" 516 for root, dirs, files in os.walk(dir): 517 for d in dirs[:]: 518 if self.ShouldSkipDir(d): 519 dirs.remove(d) 520 for f in files: 521 fullpath = os.path.join(root, f) 522 if self.ShouldProcess(fullpath): 523 self.ProcessFiles((fullpath,)) 524 525 def SubmitJob(self, file_key, func, args, callback): 526 """Submits a job to the pool of workers; increments the number of submitted jobs.""" 527 self.JobStarted(file_key) 528 res = Dumper.pool.apply_async(func, args=args, callback=callback) 529 530 def ProcessFilesFinished(self, res): 531 """Callback from multiprocesing when ProcessFilesWork finishes; 532 run the cleanup work, if any""" 533 self.JobFinished(res['files'][-1]) 534 # only run the cleanup function once per tuple of files 535 self.files_record[res['files']] += 1 536 if self.files_record[res['files']] == len(self.archs): 537 del self.files_record[res['files']] 538 if res['after']: 539 res['after'](res['status'], res['after_arg']) 540 541 def ProcessFiles(self, files, after=None, after_arg=None): 542 """Dump symbols from these files into a symbol file, stored 543 in the proper directory structure in |symbol_path|; processing is performed 544 asynchronously, and Finish must be called to wait for it complete and cleanup. 545 All files after the first are fallbacks in case the first file does not process 546 successfully; if it does, no other files will be touched.""" 547 self.output_pid(sys.stderr, "Submitting jobs for files: %s" % str(files)) 548 549 # tries to get the vcs root from the .mozconfig first - if it's not set 550 # the tinderbox vcs path will be assigned further down 551 vcs_root = os.environ.get("SRCSRV_ROOT") 552 for arch_num, arch in enumerate(self.archs): 553 self.files_record[files] = 0 # record that we submitted jobs for this tuple of files 554 self.SubmitJob(files[-1], StartProcessFilesWork, args=(self, files, arch_num, arch, vcs_root, after, after_arg), callback=self.ProcessFilesFinished) 555 556 def ProcessFilesWork(self, files, arch_num, arch, vcs_root, after, after_arg): 557 self.output_pid(sys.stderr, "Worker processing files: %s" % (files,)) 558 559 # our result is a status, a cleanup function, an argument to that function, and the tuple of files we were called on 560 result = { 'status' : False, 'after' : after, 'after_arg' : after_arg, 'files' : files } 561 562 sourceFileStream = '' 563 for file in files: 564 # files is a tuple of files, containing fallbacks in case the first file doesn't process successfully 565 try: 566 proc = subprocess.Popen([self.dump_syms] + arch.split() + [file], 567 stdout=subprocess.PIPE) 568 module_line = proc.stdout.next() 569 if module_line.startswith("MODULE"): 570 # MODULE os cpu guid debug_file 571 (guid, debug_file) = (module_line.split())[3:5] 572 # strip off .pdb extensions, and append .sym 573 sym_file = re.sub("\.pdb$", "", debug_file) + ".sym" 574 # we do want forward slashes here 575 rel_path = os.path.join(debug_file, 576 guid, 577 sym_file).replace("\\", "/") 578 full_path = os.path.normpath(os.path.join(self.symbol_path, 579 rel_path)) 580 try: 581 os.makedirs(os.path.dirname(full_path)) 582 except OSError: # already exists 583 pass 584 f = open(full_path, "w") 585 f.write(module_line) 586 # now process the rest of the output 587 for line in proc.stdout: 588 if line.startswith("FILE"): 589 # FILE index filename 590 (x, index, filename) = line.rstrip().split(None, 2) 591 if sys.platform == "sunos5": 592 for srcdir in self.srcdirs: 593 start = filename.find(self.srcdir) 594 if start != -1: 595 filename = filename[start:] 596 break 597 filename = self.FixFilenameCase(filename) 598 sourcepath = filename 599 if self.vcsinfo: 600 (filename, rootname) = GetVCSFilename(filename, self.srcdirs) 601 # sets vcs_root in case the loop through files were to end on an empty rootname 602 if vcs_root is None: 603 if rootname: 604 vcs_root = rootname 605 # gather up files with hg for indexing 606 if filename.startswith("hg"): 607 (ver, checkout, source_file, revision) = filename.split(":", 3) 608 sourceFileStream += sourcepath + "*" + source_file + '*' + revision + "\r\n" 609 f.write("FILE %s %s\n" % (index, filename)) 610 else: 611 # pass through all other lines unchanged 612 f.write(line) 613 # we want to return true only if at least one line is not a MODULE or FILE line 614 result['status'] = True 615 f.close() 616 proc.wait() 617 # we output relative paths so callers can get a list of what 618 # was generated 619 self.output(sys.stdout, rel_path) 620 if self.srcsrv and vcs_root: 621 # add source server indexing to the pdb file 622 self.SourceServerIndexing(file, guid, sourceFileStream, vcs_root) 623 # only copy debug the first time if we have multiple architectures 624 if self.copy_debug and arch_num == 0: 625 self.CopyDebug(file, debug_file, guid) 626 except StopIteration: 627 pass 628 except e: 629 self.output(sys.stderr, "Unexpected error: %s" % (str(e),)) 630 raise 631 if result['status']: 632 # we only need 1 file to work 633 break 634 return result 635 636# Platform-specific subclasses. For the most part, these just have 637# logic to determine what files to extract symbols from. 638 639class Dumper_Win32(Dumper): 640 fixedFilenameCaseCache = {} 641 642 def ShouldProcess(self, file): 643 """This function will allow processing of pdb files that have dll 644 or exe files with the same base name next to them.""" 645 if not Dumper.ShouldProcess(self, file): 646 return False 647 if file.endswith(".pdb"): 648 (path,ext) = os.path.splitext(file) 649 if os.path.isfile(path + ".exe") or os.path.isfile(path + ".dll"): 650 return True 651 return False 652 653 def FixFilenameCase(self, file): 654 """Recent versions of Visual C++ put filenames into 655 PDB files as all lowercase. If the file exists 656 on the local filesystem, fix it.""" 657 658 # Use a cached version if we have one. 659 if file in self.fixedFilenameCaseCache: 660 return self.fixedFilenameCaseCache[file] 661 662 result = file 663 664 (path, filename) = os.path.split(file) 665 if os.path.isdir(path): 666 lc_filename = filename.lower() 667 for f in os.listdir(path): 668 if f.lower() == lc_filename: 669 result = os.path.join(path, f) 670 break 671 672 # Cache the corrected version to avoid future filesystem hits. 673 self.fixedFilenameCaseCache[file] = result 674 return result 675 676 def CopyDebug(self, file, debug_file, guid): 677 rel_path = os.path.join(debug_file, 678 guid, 679 debug_file).replace("\\", "/") 680 full_path = os.path.normpath(os.path.join(self.symbol_path, 681 rel_path)) 682 shutil.copyfile(file, full_path) 683 # try compressing it 684 compressed_file = os.path.splitext(full_path)[0] + ".pd_" 685 # ignore makecab's output 686 success = subprocess.call(["makecab.exe", "/D", "CompressionType=LZX", "/D", 687 "CompressionMemory=21", 688 full_path, compressed_file], 689 stdout=open("NUL:","w"), stderr=subprocess.STDOUT) 690 if success == 0 and os.path.exists(compressed_file): 691 os.unlink(full_path) 692 self.output(sys.stdout, os.path.splitext(rel_path)[0] + ".pd_") 693 else: 694 self.output(sys.stdout, rel_path) 695 696 def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root): 697 # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing 698 debug_file = os.path.abspath(debug_file) 699 streamFilename = debug_file + ".stream" 700 stream_output_path = os.path.abspath(streamFilename) 701 # Call SourceIndex to create the .stream file 702 result = SourceIndex(sourceFileStream, stream_output_path, vcs_root) 703 if self.copy_debug: 704 pdbstr_path = os.environ.get("PDBSTR_PATH") 705 pdbstr = os.path.normpath(pdbstr_path) 706 subprocess.call([pdbstr, "-w", "-p:" + os.path.basename(debug_file), 707 "-i:" + os.path.basename(streamFilename), "-s:srcsrv"], 708 cwd=os.path.dirname(stream_output_path)) 709 # clean up all the .stream files when done 710 os.remove(stream_output_path) 711 return result 712 713class Dumper_Linux(Dumper): 714 objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy' 715 def ShouldProcess(self, file): 716 """This function will allow processing of files that are 717 executable, or end with the .so extension, and additionally 718 file(1) reports as being ELF files. It expects to find the file 719 command in PATH.""" 720 if not Dumper.ShouldProcess(self, file): 721 return False 722 if file.endswith(".so") or os.access(file, os.X_OK): 723 return self.RunFileCommand(file).startswith("ELF") 724 return False 725 726 def CopyDebug(self, file, debug_file, guid): 727 # We want to strip out the debug info, and add a 728 # .gnu_debuglink section to the object, so the debugger can 729 # actually load our debug info later. 730 file_dbg = file + ".dbg" 731 if subprocess.call([self.objcopy, '--only-keep-debug', file, file_dbg]) == 0 and \ 732 subprocess.call([self.objcopy, '--add-gnu-debuglink=%s' % file_dbg, file]) == 0: 733 rel_path = os.path.join(debug_file, 734 guid, 735 debug_file + ".dbg") 736 full_path = os.path.normpath(os.path.join(self.symbol_path, 737 rel_path)) 738 shutil.move(file_dbg, full_path) 739 # gzip the shipped debug files 740 os.system("gzip %s" % full_path) 741 self.output(sys.stdout, rel_path + ".gz") 742 else: 743 if os.path.isfile(file_dbg): 744 os.unlink(file_dbg) 745 746class Dumper_Solaris(Dumper): 747 def RunFileCommand(self, file): 748 """Utility function, returns the output of file(1)""" 749 try: 750 output = os.popen("file " + file).read() 751 return output.split('\t')[1]; 752 except: 753 return "" 754 755 def ShouldProcess(self, file): 756 """This function will allow processing of files that are 757 executable, or end with the .so extension, and additionally 758 file(1) reports as being ELF files. It expects to find the file 759 command in PATH.""" 760 if not Dumper.ShouldProcess(self, file): 761 return False 762 if file.endswith(".so") or os.access(file, os.X_OK): 763 return self.RunFileCommand(file).startswith("ELF") 764 return False 765 766def StartProcessFilesWorkMac(dumper, file): 767 """multiprocessing can't handle methods as Process targets, so we define 768 a simple wrapper function around the work method.""" 769 return dumper.ProcessFilesWorkMac(file) 770 771def AfterMac(status, dsymbundle): 772 """Cleanup function to run on Macs after we process the file(s).""" 773 # CopyDebug will already have been run from Dumper.ProcessFiles 774 shutil.rmtree(dsymbundle) 775 776class Dumper_Mac(Dumper): 777 def ShouldProcess(self, file): 778 """This function will allow processing of files that are 779 executable, or end with the .dylib extension, and additionally 780 file(1) reports as being Mach-O files. It expects to find the file 781 command in PATH.""" 782 if not Dumper.ShouldProcess(self, file): 783 return False 784 if file.endswith(".dylib") or os.access(file, os.X_OK): 785 return self.RunFileCommand(file).startswith("Mach-O") 786 return False 787 788 def ShouldSkipDir(self, dir): 789 """We create .dSYM bundles on the fly, but if someone runs 790 buildsymbols twice, we should skip any bundles we created 791 previously, otherwise we'll recurse into them and try to 792 dump the inner bits again.""" 793 if dir.endswith(".dSYM"): 794 return True 795 return False 796 797 def ProcessFiles(self, files, after=None, after_arg=None): 798 # also note, files must be len 1 here, since we're the only ones 799 # that ever add more than one file to the list 800 self.output_pid(sys.stderr, "Submitting job for Mac pre-processing on file: %s" % (files[0])) 801 self.SubmitJob(files[0], StartProcessFilesWorkMac, args=(self, files[0]), callback=self.ProcessFilesMacFinished) 802 803 def ProcessFilesMacFinished(self, result): 804 if result['status']: 805 # kick off new jobs per-arch with our new list of files 806 Dumper.ProcessFiles(self, result['files'], after=AfterMac, after_arg=result['files'][0]) 807 # only decrement jobs *after* that, since otherwise we'll remove the record for this file 808 self.JobFinished(result['files'][-1]) 809 810 def ProcessFilesWorkMac(self, file): 811 """dump_syms on Mac needs to be run on a dSYM bundle produced 812 by dsymutil(1), so run dsymutil here and pass the bundle name 813 down to the superclass method instead.""" 814 self.output_pid(sys.stderr, "Worker running Mac pre-processing on file: %s" % (file,)) 815 816 # our return is a status and a tuple of files to dump symbols for 817 # the extra files are fallbacks; as soon as one is dumped successfully, we stop 818 result = { 'status' : False, 'files' : None, 'file_key' : file } 819 dsymbundle = file + ".dSYM" 820 if os.path.exists(dsymbundle): 821 shutil.rmtree(dsymbundle) 822 # dsymutil takes --arch=foo instead of -a foo like everything else 823 subprocess.call(["dsymutil"] + [a.replace('-a ', '--arch=') for a in self.archs if a] 824 + [file], 825 stdout=open("/dev/null","w")) 826 if not os.path.exists(dsymbundle): 827 # dsymutil won't produce a .dSYM for files without symbols 828 result['status'] = False 829 return result 830 831 result['status'] = True 832 result['files'] = (dsymbundle, file) 833 return result 834 835 def CopyDebug(self, file, debug_file, guid): 836 """ProcessFiles has already produced a dSYM bundle, so we should just 837 copy that to the destination directory. However, we'll package it 838 into a .tar.bz2 because the debug symbols are pretty huge, and 839 also because it's a bundle, so it's a directory. |file| here is the 840 dSYM bundle, and |debug_file| is the original filename.""" 841 rel_path = os.path.join(debug_file, 842 guid, 843 os.path.basename(file) + ".tar.bz2") 844 full_path = os.path.abspath(os.path.join(self.symbol_path, 845 rel_path)) 846 success = subprocess.call(["tar", "cjf", full_path, os.path.basename(file)], 847 cwd=os.path.dirname(file), 848 stdout=open("/dev/null","w"), stderr=subprocess.STDOUT) 849 if success == 0 and os.path.exists(full_path): 850 self.output(sys.stdout, rel_path) 851 852# Entry point if called as a standalone program 853def main(): 854 parser = OptionParser(usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>") 855 parser.add_option("-c", "--copy", 856 action="store_true", dest="copy_debug", default=False, 857 help="Copy debug info files into the same directory structure as symbol files") 858 parser.add_option("-a", "--archs", 859 action="store", dest="archs", 860 help="Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on OS X)") 861 parser.add_option("-s", "--srcdir", 862 action="append", dest="srcdir", default=[], 863 help="Use SRCDIR to determine relative paths to source files") 864 parser.add_option("-v", "--vcs-info", 865 action="store_true", dest="vcsinfo", 866 help="Try to retrieve VCS info for each FILE listed in the output") 867 parser.add_option("-i", "--source-index", 868 action="store_true", dest="srcsrv", default=False, 869 help="Add source index information to debug files, making them suitable for use in a source server.") 870 parser.add_option("-x", "--exclude", 871 action="append", dest="exclude", default=[], metavar="PATTERN", 872 help="Skip processing files matching PATTERN.") 873 parser.add_option("--repo-manifest", 874 action="store", dest="repo_manifest", 875 help="""Get source information from this XML manifest 876produced by the `repo manifest -r` command. 877""") 878 (options, args) = parser.parse_args() 879 880 #check to see if the pdbstr.exe exists 881 if options.srcsrv: 882 pdbstr = os.environ.get("PDBSTR_PATH") 883 if not os.path.exists(pdbstr): 884 print >> sys.stderr, "Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n" 885 sys.exit(1) 886 887 if len(args) < 3: 888 parser.error("not enough arguments") 889 exit(1) 890 891 dumper = GetPlatformSpecificDumper(dump_syms=args[0], 892 symbol_path=args[1], 893 copy_debug=options.copy_debug, 894 archs=options.archs, 895 srcdirs=options.srcdir, 896 vcsinfo=options.vcsinfo, 897 srcsrv=options.srcsrv, 898 exclude=options.exclude, 899 repo_manifest=options.repo_manifest) 900 for arg in args[2:]: 901 dumper.Process(arg) 902 dumper.Finish() 903 904# run main if run directly 905if __name__ == "__main__": 906 # set up the multiprocessing infrastructure before we start; 907 # note that this needs to be in the __main__ guard, or else Windows will choke 908 Dumper.GlobalInit() 909 910 main()