1# This Source Code Form is subject to the terms of the Mozilla Public 2# License, v. 2.0. If a copy of the MPL was not distributed with this 3# file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5''' 6Fetch build artifacts from a Firefox tree. 7 8This provides an (at-the-moment special purpose) interface to download Android 9artifacts from Mozilla's Task Cluster. 10 11This module performs the following steps: 12 13* find a candidate hg parent revision. At one time we used the local pushlog, 14 which required the mozext hg extension. This isn't feasible with git, and it 15 is only mildly less efficient to not use the pushlog, so we don't use it even 16 when querying hg. 17 18* map the candidate parent to candidate Task Cluster tasks and artifact 19 locations. Pushlog entries might not correspond to tasks (yet), and those 20 tasks might not produce the desired class of artifacts. 21 22* fetch fresh Task Cluster artifacts and purge old artifacts, using a simple 23 Least Recently Used cache. 24 25* post-process fresh artifacts, to speed future installation. In particular, 26 extract relevant files from Mac OS X DMG files into a friendly archive format 27 so we don't have to mount DMG files frequently. 28 29The bulk of the complexity is in managing and persisting several caches. If 30we found a Python LRU cache that pickled cleanly, we could remove a lot of 31this code! Sadly, I found no such candidate implementations, so we pickle 32pylru caches manually. 33 34None of the instances (or the underlying caches) are safe for concurrent use. 35A future need, perhaps. 36 37This module requires certain modules be importable from the ambient Python 38environment. |mach artifact| ensures these modules are available, but other 39consumers will need to arrange this themselves. 40''' 41 42 43from __future__ import absolute_import, print_function, unicode_literals 44 45import binascii 46import collections 47import functools 48import glob 49import hashlib 50import logging 51import operator 52import os 53import pickle 54import re 55import requests 56import shutil 57import stat 58import subprocess 59import tarfile 60import tempfile 61import urlparse 62import zipfile 63 64import pylru 65from taskgraph.util.taskcluster import ( 66 find_task_id, 67 get_artifact_url, 68 list_artifacts, 69) 70 71from mozbuild.util import ( 72 ensureParentDir, 73 FileAvoidWrite, 74 mkdir, 75) 76import mozinstall 77from mozpack.files import ( 78 JarFinder, 79 TarFinder, 80) 81from mozpack.mozjar import ( 82 JarReader, 83 JarWriter, 84) 85from mozpack.packager.unpack import UnpackFinder 86import mozpack.path as mozpath 87from dlmanager import ( 88 DownloadManager, 89 PersistLimit, 90) 91 92NUM_PUSHHEADS_TO_QUERY_PER_PARENT = 50 # Number of candidate pushheads to cache per parent changeset. 93 94# Number of parent changesets to consider as possible pushheads. 95# There isn't really such a thing as a reasonable default here, because we don't 96# know how many pushheads we'll need to look at to find a build with our artifacts, 97# and we don't know how many changesets will be in each push. For now we assume 98# we'll find a build in the last 50 pushes, assuming each push contains 10 changesets. 99NUM_REVISIONS_TO_QUERY = 500 100 101MAX_CACHED_TASKS = 400 # Number of pushheads to cache Task Cluster task data for. 102 103# Minimum number of downloaded artifacts to keep. Each artifact can be very large, 104# so don't make this to large! 105MIN_CACHED_ARTIFACTS = 6 106 107# Maximum size of the downloaded artifacts to keep in cache, in bytes (1GiB). 108MAX_CACHED_ARTIFACTS_SIZE = 1024 * 1024 * 1024 109 110# Downloaded artifacts are cached, and a subset of their contents extracted for 111# easy installation. This is most noticeable on Mac OS X: since mounting and 112# copying from DMG files is very slow, we extract the desired binaries to a 113# separate archive for fast re-installation. 114PROCESSED_SUFFIX = '.processed.jar' 115 116CANDIDATE_TREES = ( 117 'mozilla-central', 118 'integration/autoland', 119 'integration/mozilla-inbound', 120 'releases/mozilla-beta', 121 'releases/mozilla-release', 122 'releases/mozilla-esr60' 123) 124 125class ArtifactJob(object): 126 # These are a subset of TEST_HARNESS_BINS in testing/mochitest/Makefile.in. 127 # Each item is a pair of (pattern, (src_prefix, dest_prefix), where src_prefix 128 # is the prefix of the pattern relevant to its location in the archive, and 129 # dest_prefix is the prefix to be added that will yield the final path relative 130 # to dist/. 131 test_artifact_patterns = { 132 ('bin/BadCertServer', ('bin', 'bin')), 133 ('bin/GenerateOCSPResponse', ('bin', 'bin')), 134 ('bin/OCSPStaplingServer', ('bin', 'bin')), 135 ('bin/SymantecSanctionsServer', ('bin', 'bin')), 136 ('bin/certutil', ('bin', 'bin')), 137 ('bin/fileid', ('bin', 'bin')), 138 ('bin/geckodriver', ('bin', 'bin')), 139 ('bin/pk12util', ('bin', 'bin')), 140 ('bin/screentopng', ('bin', 'bin')), 141 ('bin/ssltunnel', ('bin', 'bin')), 142 ('bin/xpcshell', ('bin', 'bin')), 143 ('bin/plugins/gmp-*/*/*', ('bin/plugins', 'bin')), 144 ('bin/plugins/*', ('bin/plugins', 'plugins')), 145 ('bin/components/*.xpt', ('bin/components', 'bin/components')), 146 } 147 148 # We can tell our input is a test archive by this suffix, which happens to 149 # be the same across platforms. 150 _test_archive_suffix = '.common.tests.zip' 151 152 def __init__(self, package_re, tests_re, log=None, download_symbols=False, substs=None): 153 self._package_re = re.compile(package_re) 154 self._tests_re = None 155 if tests_re: 156 self._tests_re = re.compile(tests_re) 157 self._log = log 158 self._substs = substs 159 self._symbols_archive_suffix = None 160 if download_symbols: 161 self._symbols_archive_suffix = 'crashreporter-symbols.zip' 162 163 def log(self, *args, **kwargs): 164 if self._log: 165 self._log(*args, **kwargs) 166 167 def find_candidate_artifacts(self, artifacts): 168 # TODO: Handle multiple artifacts, taking the latest one. 169 tests_artifact = None 170 for artifact in artifacts: 171 name = artifact['name'] 172 if self._package_re and self._package_re.match(name): 173 yield name 174 elif self._tests_re and self._tests_re.match(name): 175 tests_artifact = name 176 yield name 177 elif self._symbols_archive_suffix and name.endswith(self._symbols_archive_suffix): 178 yield name 179 else: 180 self.log(logging.DEBUG, 'artifact', 181 {'name': name}, 182 'Not yielding artifact named {name} as a candidate artifact') 183 if self._tests_re and not tests_artifact: 184 raise ValueError('Expected tests archive matching "{re}", but ' 185 'found none!'.format(re=self._tests_re)) 186 187 def process_artifact(self, filename, processed_filename): 188 if filename.endswith(ArtifactJob._test_archive_suffix) and self._tests_re: 189 return self.process_tests_artifact(filename, processed_filename) 190 if self._symbols_archive_suffix and filename.endswith(self._symbols_archive_suffix): 191 return self.process_symbols_archive(filename, processed_filename) 192 return self.process_package_artifact(filename, processed_filename) 193 194 def process_package_artifact(self, filename, processed_filename): 195 raise NotImplementedError("Subclasses must specialize process_package_artifact!") 196 197 def process_tests_artifact(self, filename, processed_filename): 198 from mozbuild.action.test_archive import OBJDIR_TEST_FILES 199 added_entry = False 200 201 with JarWriter(file=processed_filename, optimize=False, compress_level=5) as writer: 202 reader = JarReader(filename) 203 for filename, entry in reader.entries.iteritems(): 204 for pattern, (src_prefix, dest_prefix) in self.test_artifact_patterns: 205 if not mozpath.match(filename, pattern): 206 continue 207 destpath = mozpath.relpath(filename, src_prefix) 208 destpath = mozpath.join(dest_prefix, destpath) 209 self.log(logging.INFO, 'artifact', 210 {'destpath': destpath}, 211 'Adding {destpath} to processed archive') 212 mode = entry['external_attr'] >> 16 213 writer.add(destpath.encode('utf-8'), reader[filename], mode=mode) 214 added_entry = True 215 break 216 for files_entry in OBJDIR_TEST_FILES.values(): 217 origin_pattern = files_entry['pattern'] 218 leaf_filename = filename 219 if 'dest' in files_entry: 220 dest = files_entry['dest'] 221 origin_pattern = mozpath.join(dest, origin_pattern) 222 leaf_filename = filename[len(dest) + 1:] 223 if mozpath.match(filename, origin_pattern): 224 destpath = mozpath.join('..', files_entry['base'], leaf_filename) 225 mode = entry['external_attr'] >> 16 226 writer.add(destpath.encode('utf-8'), reader[filename], mode=mode) 227 228 if not added_entry: 229 raise ValueError('Archive format changed! No pattern from "{patterns}"' 230 'matched an archive path.'.format( 231 patterns=LinuxArtifactJob.test_artifact_patterns)) 232 233 def process_symbols_archive(self, filename, processed_filename): 234 with JarWriter(file=processed_filename, optimize=False, compress_level=5) as writer: 235 reader = JarReader(filename) 236 for filename in reader.entries: 237 destpath = mozpath.join('crashreporter-symbols', filename) 238 self.log(logging.INFO, 'artifact', 239 {'destpath': destpath}, 240 'Adding {destpath} to processed archive') 241 writer.add(destpath.encode('utf-8'), reader[filename]) 242 243class AndroidArtifactJob(ArtifactJob): 244 245 product = 'mobile' 246 247 package_artifact_patterns = { 248 'application.ini', 249 'platform.ini', 250 '**/*.so', 251 '**/interfaces.xpt', 252 } 253 254 def process_package_artifact(self, filename, processed_filename): 255 # Extract all .so files into the root, which will get copied into dist/bin. 256 with JarWriter(file=processed_filename, optimize=False, compress_level=5) as writer: 257 for p, f in UnpackFinder(JarFinder(filename, JarReader(filename))): 258 if not any(mozpath.match(p, pat) for pat in self.package_artifact_patterns): 259 continue 260 261 dirname, basename = os.path.split(p) 262 self.log(logging.INFO, 'artifact', 263 {'basename': basename}, 264 'Adding {basename} to processed archive') 265 266 basedir = 'bin' 267 if not basename.endswith('.so'): 268 basedir = mozpath.join('bin', dirname.lstrip('assets/')) 269 basename = mozpath.join(basedir, basename) 270 writer.add(basename.encode('utf-8'), f.open()) 271 272 273class LinuxArtifactJob(ArtifactJob): 274 275 product = 'firefox' 276 277 package_artifact_patterns = { 278 'firefox/application.ini', 279 'firefox/crashreporter', 280 'firefox/dependentlibs.list', 281 'firefox/firefox', 282 'firefox/firefox-bin', 283 'firefox/minidump-analyzer', 284 'firefox/pingsender', 285 'firefox/platform.ini', 286 'firefox/plugin-container', 287 'firefox/updater', 288 'firefox/**/*.so', 289 'firefox/**/interfaces.xpt', 290 } 291 292 def process_package_artifact(self, filename, processed_filename): 293 added_entry = False 294 295 with JarWriter(file=processed_filename, optimize=False, compress_level=5) as writer: 296 with tarfile.open(filename) as reader: 297 for p, f in UnpackFinder(TarFinder(filename, reader)): 298 if not any(mozpath.match(p, pat) for pat in self.package_artifact_patterns): 299 continue 300 301 # We strip off the relative "firefox/" bit from the path, 302 # but otherwise preserve it. 303 destpath = mozpath.join('bin', 304 mozpath.relpath(p, "firefox")) 305 self.log(logging.INFO, 'artifact', 306 {'destpath': destpath}, 307 'Adding {destpath} to processed archive') 308 writer.add(destpath.encode('utf-8'), f.open(), mode=f.mode) 309 added_entry = True 310 311 if not added_entry: 312 raise ValueError('Archive format changed! No pattern from "{patterns}" ' 313 'matched an archive path.'.format( 314 patterns=LinuxArtifactJob.package_artifact_patterns)) 315 316 317class MacArtifactJob(ArtifactJob): 318 319 product = 'firefox' 320 321 def process_package_artifact(self, filename, processed_filename): 322 tempdir = tempfile.mkdtemp() 323 oldcwd = os.getcwd() 324 try: 325 self.log(logging.INFO, 'artifact', 326 {'tempdir': tempdir}, 327 'Unpacking DMG into {tempdir}') 328 if self._substs['HOST_OS_ARCH'] == 'Linux': 329 # This is a cross build, use hfsplus and dmg tools to extract the dmg. 330 os.chdir(tempdir) 331 with open(os.devnull, 'wb') as devnull: 332 subprocess.check_call([ 333 self._substs['DMG_TOOL'], 334 'extract', 335 filename, 336 'extracted_img', 337 ], stdout=devnull) 338 subprocess.check_call([ 339 self._substs['HFS_TOOL'], 340 'extracted_img', 341 'extractall' 342 ], stdout=devnull) 343 else: 344 mozinstall.install(filename, tempdir) 345 346 bundle_dirs = glob.glob(mozpath.join(tempdir, '*.app')) 347 if len(bundle_dirs) != 1: 348 raise ValueError('Expected one source bundle, found: {}'.format(bundle_dirs)) 349 [source] = bundle_dirs 350 351 # These get copied into dist/bin without the path, so "root/a/b/c" -> "dist/bin/c". 352 paths_no_keep_path = ('Contents/MacOS', [ 353 'crashreporter.app/Contents/MacOS/crashreporter', 354 'firefox', 355 'firefox-bin', 356 'libfreebl3.dylib', 357 'liblgpllibs.dylib', 358 # 'liblogalloc.dylib', 359 'libmozglue.dylib', 360 'libnss3.dylib', 361 'libnssckbi.dylib', 362 'libnssdbm3.dylib', 363 'libplugin_child_interpose.dylib', 364 # 'libreplace_jemalloc.dylib', 365 # 'libreplace_malloc.dylib', 366 'libmozavutil.dylib', 367 'libmozavcodec.dylib', 368 'libsoftokn3.dylib', 369 'pingsender', 370 'plugin-container.app/Contents/MacOS/plugin-container', 371 'updater.app/Contents/MacOS/org.mozilla.updater', 372 # 'xpcshell', 373 'XUL', 374 ]) 375 376 # These get copied into dist/bin with the path, so "root/a/b/c" -> "dist/bin/a/b/c". 377 paths_keep_path = [ 378 ('Contents/MacOS', [ 379 'crashreporter.app/Contents/MacOS/minidump-analyzer', 380 ]), 381 ('Contents/Resources', [ 382 'browser/components/libbrowsercomps.dylib', 383 'dependentlibs.list', 384 # 'firefox', 385 'gmp-clearkey/0.1/libclearkey.dylib', 386 # 'gmp-fake/1.0/libfake.dylib', 387 # 'gmp-fakeopenh264/1.0/libfakeopenh264.dylib', 388 '**/interfaces.xpt', 389 ]), 390 ] 391 392 with JarWriter(file=processed_filename, optimize=False, compress_level=5) as writer: 393 root, paths = paths_no_keep_path 394 finder = UnpackFinder(mozpath.join(source, root)) 395 for path in paths: 396 for p, f in finder.find(path): 397 self.log(logging.INFO, 'artifact', 398 {'path': p}, 399 'Adding {path} to processed archive') 400 destpath = mozpath.join('bin', os.path.basename(p)) 401 writer.add(destpath.encode('utf-8'), f, mode=f.mode) 402 403 for root, paths in paths_keep_path: 404 finder = UnpackFinder(mozpath.join(source, root)) 405 for path in paths: 406 for p, f in finder.find(path): 407 self.log(logging.INFO, 'artifact', 408 {'path': p}, 409 'Adding {path} to processed archive') 410 destpath = mozpath.join('bin', p) 411 writer.add(destpath.encode('utf-8'), f.open(), mode=f.mode) 412 413 finally: 414 os.chdir(oldcwd) 415 try: 416 shutil.rmtree(tempdir) 417 except (OSError, IOError): 418 self.log(logging.WARN, 'artifact', 419 {'tempdir': tempdir}, 420 'Unable to delete {tempdir}') 421 pass 422 423 424class WinArtifactJob(ArtifactJob): 425 package_artifact_patterns = { 426 'firefox/dependentlibs.list', 427 'firefox/platform.ini', 428 'firefox/application.ini', 429 'firefox/**/*.dll', 430 'firefox/*.exe', 431 'firefox/**/interfaces.xpt', 432 } 433 434 product = 'firefox' 435 436 # These are a subset of TEST_HARNESS_BINS in testing/mochitest/Makefile.in. 437 test_artifact_patterns = { 438 ('bin/BadCertServer.exe', ('bin', 'bin')), 439 ('bin/GenerateOCSPResponse.exe', ('bin', 'bin')), 440 ('bin/OCSPStaplingServer.exe', ('bin', 'bin')), 441 ('bin/SymantecSanctionsServer.exe', ('bin', 'bin')), 442 ('bin/certutil.exe', ('bin', 'bin')), 443 ('bin/fileid.exe', ('bin', 'bin')), 444 ('bin/geckodriver.exe', ('bin', 'bin')), 445 ('bin/pk12util.exe', ('bin', 'bin')), 446 ('bin/screenshot.exe', ('bin', 'bin')), 447 ('bin/ssltunnel.exe', ('bin', 'bin')), 448 ('bin/xpcshell.exe', ('bin', 'bin')), 449 ('bin/plugins/gmp-*/*/*', ('bin/plugins', 'bin')), 450 ('bin/plugins/*', ('bin/plugins', 'plugins')), 451 ('bin/components/*', ('bin/components', 'bin/components')), 452 } 453 454 def process_package_artifact(self, filename, processed_filename): 455 added_entry = False 456 with JarWriter(file=processed_filename, optimize=False, compress_level=5) as writer: 457 for p, f in UnpackFinder(JarFinder(filename, JarReader(filename))): 458 if not any(mozpath.match(p, pat) for pat in self.package_artifact_patterns): 459 continue 460 461 # strip off the relative "firefox/" bit from the path: 462 basename = mozpath.relpath(p, "firefox") 463 basename = mozpath.join('bin', basename) 464 self.log(logging.INFO, 'artifact', 465 {'basename': basename}, 466 'Adding {basename} to processed archive') 467 writer.add(basename.encode('utf-8'), f.open(), mode=f.mode) 468 added_entry = True 469 470 if not added_entry: 471 raise ValueError('Archive format changed! No pattern from "{patterns}"' 472 'matched an archive path.'.format( 473 patterns=self.artifact_patterns)) 474 475# Keep the keys of this map in sync with the |mach artifact| --job 476# options. The keys of this map correspond to entries at 477# https://tools.taskcluster.net/index/artifacts/#gecko.v2.mozilla-central.latest/gecko.v2.mozilla-central.latest 478# The values correpsond to a pair of (<package regex>, <test archive regex>). 479JOB_DETAILS = { 480 'android-api-16-opt': (AndroidArtifactJob, (r'(public/build/fennec-(.*)\.android-arm.apk|public/build/target\.apk)', 481 r'public/build/fennec-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')), 482 'android-api-16-debug': (AndroidArtifactJob, (r'public/build/target\.apk', 483 r'public/build/target\.common\.tests\.zip')), 484 'android-x86-opt': (AndroidArtifactJob, (r'public/build/target\.apk', 485 r'public/build/target\.common\.tests\.zip')), 486 'linux-opt': (LinuxArtifactJob, (r'public/build/target\.tar\.bz2', 487 r'public/build/target\.common\.tests\.zip')), 488 'linux-debug': (LinuxArtifactJob, (r'public/build/target\.tar\.bz2', 489 r'public/build/target\.common\.tests\.zip')), 490 'linux64-opt': (LinuxArtifactJob, (r'public/build/target\.tar\.bz2', 491 r'public/build/target\.common\.tests\.zip')), 492 'linux64-debug': (LinuxArtifactJob, (r'public/build/target\.tar\.bz2', 493 r'public/build/target\.common\.tests\.zip')), 494 'macosx64-opt': (MacArtifactJob, (r'public/build/firefox-(.*)\.mac\.dmg|public/build/target\.dmg', 495 r'public/build/firefox-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')), 496 'macosx64-debug': (MacArtifactJob, (r'public/build/firefox-(.*)\.mac\.dmg|public/build/target\.dmg', 497 r'public/build/firefox-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')), 498 'win32-opt': (WinArtifactJob, (r'public/build/firefox-(.*)\.win32\.zip|public/build/target\.zip', 499 r'public/build/firefox-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')), 500 'win32-debug': (WinArtifactJob, (r'public/build/firefox-(.*)\.win32\.zip|public/build/target\.zip', 501 r'public/build/firefox-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')), 502 'win64-opt': (WinArtifactJob, (r'public/build/firefox-(.*)\.win64\.zip|public/build/target\.zip', 503 r'public/build/firefox-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')), 504 'win64-debug': (WinArtifactJob, (r'public/build/firefox-(.*)\.win64\.zip|public/build/target\.zip', 505 r'public/build/firefox-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')), 506} 507 508 509 510def get_job_details(job, log=None, download_symbols=False, substs=None): 511 cls, (package_re, tests_re) = JOB_DETAILS[job] 512 return cls(package_re, tests_re, log=log, download_symbols=download_symbols, 513 substs=substs) 514 515def cachedmethod(cachefunc): 516 '''Decorator to wrap a class or instance method with a memoizing callable that 517 saves results in a (possibly shared) cache. 518 ''' 519 def decorator(method): 520 def wrapper(self, *args, **kwargs): 521 mapping = cachefunc(self) 522 if mapping is None: 523 return method(self, *args, **kwargs) 524 key = (method.__name__, args, tuple(sorted(kwargs.items()))) 525 try: 526 value = mapping[key] 527 return value 528 except KeyError: 529 pass 530 result = method(self, *args, **kwargs) 531 mapping[key] = result 532 return result 533 return functools.update_wrapper(wrapper, method) 534 return decorator 535 536 537class CacheManager(object): 538 '''Maintain an LRU cache. Provide simple persistence, including support for 539 loading and saving the state using a "with" block. Allow clearing the cache 540 and printing the cache for debugging. 541 542 Provide simple logging. 543 ''' 544 545 def __init__(self, cache_dir, cache_name, cache_size, cache_callback=None, log=None, skip_cache=False): 546 self._skip_cache = skip_cache 547 self._cache = pylru.lrucache(cache_size, callback=cache_callback) 548 self._cache_filename = mozpath.join(cache_dir, cache_name + '-cache.pickle') 549 self._log = log 550 mkdir(cache_dir, not_indexed=True) 551 552 def log(self, *args, **kwargs): 553 if self._log: 554 self._log(*args, **kwargs) 555 556 def load_cache(self): 557 if self._skip_cache: 558 self.log(logging.DEBUG, 'artifact', 559 {}, 560 'Skipping cache: ignoring load_cache!') 561 return 562 563 try: 564 items = pickle.load(open(self._cache_filename, 'rb')) 565 for key, value in items: 566 self._cache[key] = value 567 except Exception as e: 568 # Corrupt cache, perhaps? Sadly, pickle raises many different 569 # exceptions, so it's not worth trying to be fine grained here. 570 # We ignore any exception, so the cache is effectively dropped. 571 self.log(logging.INFO, 'artifact', 572 {'filename': self._cache_filename, 'exception': repr(e)}, 573 'Ignoring exception unpickling cache file {filename}: {exception}') 574 pass 575 576 def dump_cache(self): 577 if self._skip_cache: 578 self.log(logging.DEBUG, 'artifact', 579 {}, 580 'Skipping cache: ignoring dump_cache!') 581 return 582 583 ensureParentDir(self._cache_filename) 584 pickle.dump(list(reversed(list(self._cache.items()))), open(self._cache_filename, 'wb'), -1) 585 586 def clear_cache(self): 587 if self._skip_cache: 588 self.log(logging.DEBUG, 'artifact', 589 {}, 590 'Skipping cache: ignoring clear_cache!') 591 return 592 593 with self: 594 self._cache.clear() 595 596 def __enter__(self): 597 self.load_cache() 598 return self 599 600 def __exit__(self, type, value, traceback): 601 self.dump_cache() 602 603class PushheadCache(CacheManager): 604 '''Helps map tree/revision pairs to parent pushheads according to the pushlog.''' 605 606 def __init__(self, cache_dir, log=None, skip_cache=False): 607 CacheManager.__init__(self, cache_dir, 'pushhead_cache', MAX_CACHED_TASKS, log=log, skip_cache=skip_cache) 608 609 @cachedmethod(operator.attrgetter('_cache')) 610 def parent_pushhead_id(self, tree, revision): 611 cset_url_tmpl = ('https://hg.mozilla.org/{tree}/json-pushes?' 612 'changeset={changeset}&version=2&tipsonly=1') 613 req = requests.get(cset_url_tmpl.format(tree=tree, changeset=revision), 614 headers={'Accept': 'application/json'}) 615 if req.status_code not in range(200, 300): 616 raise ValueError 617 result = req.json() 618 [found_pushid] = result['pushes'].keys() 619 return int(found_pushid) 620 621 @cachedmethod(operator.attrgetter('_cache')) 622 def pushid_range(self, tree, start, end): 623 pushid_url_tmpl = ('https://hg.mozilla.org/{tree}/json-pushes?' 624 'startID={start}&endID={end}&version=2&tipsonly=1') 625 626 req = requests.get(pushid_url_tmpl.format(tree=tree, start=start, 627 end=end), 628 headers={'Accept': 'application/json'}) 629 result = req.json() 630 return [ 631 p['changesets'][-1] for p in result['pushes'].values() 632 ] 633 634class TaskCache(CacheManager): 635 '''Map candidate pushheads to Task Cluster task IDs and artifact URLs.''' 636 637 def __init__(self, cache_dir, log=None, skip_cache=False): 638 CacheManager.__init__(self, cache_dir, 'artifact_url', MAX_CACHED_TASKS, log=log, skip_cache=skip_cache) 639 640 @cachedmethod(operator.attrgetter('_cache')) 641 def artifact_urls(self, tree, job, rev, download_symbols): 642 try: 643 artifact_job = get_job_details(job, log=self._log, download_symbols=download_symbols) 644 except KeyError: 645 self.log(logging.INFO, 'artifact', 646 {'job': job}, 647 'Unknown job {job}') 648 raise KeyError("Unknown job") 649 650 # Grab the second part of the repo name, which is generally how things 651 # are indexed. Eg: 'integration/mozilla-inbound' is indexed as 652 # 'mozilla-inbound' 653 tree = tree.split('/')[1] if '/' in tree else tree 654 655 namespace = 'gecko.v2.{tree}.revision.{rev}.{product}.{job}'.format( 656 rev=rev, 657 tree=tree, 658 product=artifact_job.product, 659 job=job, 660 ) 661 self.log(logging.DEBUG, 'artifact', 662 {'namespace': namespace}, 663 'Searching Taskcluster index with namespace: {namespace}') 664 try: 665 taskId = find_task_id(namespace) 666 except KeyError: 667 # Not all revisions correspond to pushes that produce the job we 668 # care about; and even those that do may not have completed yet. 669 raise ValueError('Task for {namespace} does not exist (yet)!'.format(namespace=namespace)) 670 671 artifacts = list_artifacts(taskId) 672 673 urls = [] 674 for artifact_name in artifact_job.find_candidate_artifacts(artifacts): 675 # We can easily extract the task ID from the URL. We can't easily 676 # extract the build ID; we use the .ini files embedded in the 677 # downloaded artifact for this. 678 url = get_artifact_url(taskId, artifact_name) 679 urls.append(url) 680 if not urls: 681 raise ValueError('Task for {namespace} existed, but no artifacts found!'.format(namespace=namespace)) 682 return urls 683 684 685class ArtifactPersistLimit(PersistLimit): 686 '''Handle persistence for artifacts cache 687 688 When instantiating a DownloadManager, it starts by filling the 689 PersistLimit instance it's given with register_dir_content. 690 In practice, this registers all the files already in the cache directory. 691 After a download finishes, the newly downloaded file is registered, and the 692 oldest files registered to the PersistLimit instance are removed depending 693 on the size and file limits it's configured for. 694 This is all good, but there are a few tweaks we want here: 695 - We have pickle files in the cache directory that we don't want purged. 696 - Files that were just downloaded in the same session shouldn't be purged. 697 (if for some reason we end up downloading more than the default max size, 698 we don't want the files to be purged) 699 To achieve this, this subclass of PersistLimit inhibits the register_file 700 method for pickle files and tracks what files were downloaded in the same 701 session to avoid removing them. 702 703 The register_file method may be used to register cache matches too, so that 704 later sessions know they were freshly used. 705 ''' 706 707 def __init__(self, log=None): 708 super(ArtifactPersistLimit, self).__init__( 709 size_limit=MAX_CACHED_ARTIFACTS_SIZE, 710 file_limit=MIN_CACHED_ARTIFACTS) 711 self._log = log 712 self._registering_dir = False 713 self._downloaded_now = set() 714 715 def log(self, *args, **kwargs): 716 if self._log: 717 self._log(*args, **kwargs) 718 719 def register_file(self, path): 720 if path.endswith('.pickle') or \ 721 os.path.basename(path) == '.metadata_never_index': 722 return 723 if not self._registering_dir: 724 # Touch the file so that subsequent calls to a mach artifact 725 # command know it was recently used. While remove_old_files 726 # is based on access time, in various cases, the access time is not 727 # updated when just reading the file, so we force an update. 728 try: 729 os.utime(path, None) 730 except OSError: 731 pass 732 self._downloaded_now.add(path) 733 super(ArtifactPersistLimit, self).register_file(path) 734 735 def register_dir_content(self, directory, pattern="*"): 736 self._registering_dir = True 737 super(ArtifactPersistLimit, self).register_dir_content( 738 directory, pattern) 739 self._registering_dir = False 740 741 def remove_old_files(self): 742 from dlmanager import fs 743 files = sorted(self.files, key=lambda f: f.stat.st_atime) 744 kept = [] 745 while len(files) > self.file_limit and \ 746 self._files_size >= self.size_limit: 747 f = files.pop(0) 748 if f.path in self._downloaded_now: 749 kept.append(f) 750 continue 751 try: 752 fs.remove(f.path) 753 except WindowsError: 754 # For some reason, on automation, we can't remove those files. 755 # So for now, ignore the error. 756 kept.append(f) 757 continue 758 self.log(logging.INFO, 'artifact', 759 {'filename': f.path}, 760 'Purged artifact {filename}') 761 self._files_size -= f.stat.st_size 762 self.files = files + kept 763 764 def remove_all(self): 765 from dlmanager import fs 766 for f in self.files: 767 fs.remove(f.path) 768 self._files_size = 0 769 self.files = [] 770 771 772class ArtifactCache(object): 773 '''Fetch Task Cluster artifact URLs and purge least recently used artifacts from disk.''' 774 775 def __init__(self, cache_dir, log=None, skip_cache=False): 776 mkdir(cache_dir, not_indexed=True) 777 self._cache_dir = cache_dir 778 self._log = log 779 self._skip_cache = skip_cache 780 self._persist_limit = ArtifactPersistLimit(log) 781 self._download_manager = DownloadManager( 782 self._cache_dir, persist_limit=self._persist_limit) 783 self._last_dl_update = -1 784 785 def log(self, *args, **kwargs): 786 if self._log: 787 self._log(*args, **kwargs) 788 789 def fetch(self, url, force=False): 790 fname = os.path.basename(url) 791 try: 792 # Use the file name from the url if it looks like a hash digest. 793 if len(fname) not in (32, 40, 56, 64, 96, 128): 794 raise TypeError() 795 binascii.unhexlify(fname) 796 except TypeError: 797 # We download to a temporary name like HASH[:16]-basename to 798 # differentiate among URLs with the same basenames. We used to then 799 # extract the build ID from the downloaded artifact and use it to make a 800 # human readable unique name, but extracting build IDs is time consuming 801 # (especially on Mac OS X, where we must mount a large DMG file). 802 hash = hashlib.sha256(url).hexdigest()[:16] 803 # Strip query string and fragments. 804 basename = os.path.basename(urlparse.urlparse(url).path) 805 fname = hash + '-' + basename 806 807 path = os.path.abspath(mozpath.join(self._cache_dir, fname)) 808 if self._skip_cache and os.path.exists(path): 809 self.log(logging.DEBUG, 'artifact', 810 {'path': path}, 811 'Skipping cache: removing cached downloaded artifact {path}') 812 os.remove(path) 813 814 self.log(logging.INFO, 'artifact', 815 {'path': path}, 816 'Downloading to temporary location {path}') 817 try: 818 dl = self._download_manager.download(url, fname) 819 820 def download_progress(dl, bytes_so_far, total_size): 821 if not total_size: 822 return 823 percent = (float(bytes_so_far) / total_size) * 100 824 now = int(percent / 5) 825 if now == self._last_dl_update: 826 return 827 self._last_dl_update = now 828 self.log(logging.INFO, 'artifact', 829 {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent}, 830 'Downloading... {percent:02.1f} %') 831 832 if dl: 833 dl.set_progress(download_progress) 834 dl.wait() 835 else: 836 # Avoid the file being removed if it was in the cache already. 837 path = os.path.join(self._cache_dir, fname) 838 self._persist_limit.register_file(path) 839 840 self.log(logging.INFO, 'artifact', 841 {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))}, 842 'Downloaded artifact to {path}') 843 return os.path.abspath(mozpath.join(self._cache_dir, fname)) 844 finally: 845 # Cancel any background downloads in progress. 846 self._download_manager.cancel() 847 848 def clear_cache(self): 849 if self._skip_cache: 850 self.log(logging.DEBUG, 'artifact', 851 {}, 852 'Skipping cache: ignoring clear_cache!') 853 return 854 855 self._persist_limit.remove_all() 856 857 858class Artifacts(object): 859 '''Maintain state to efficiently fetch build artifacts from a Firefox tree.''' 860 861 def __init__(self, tree, substs, defines, job=None, log=None, 862 cache_dir='.', hg=None, git=None, skip_cache=False, 863 topsrcdir=None): 864 if (hg and git) or (not hg and not git): 865 raise ValueError("Must provide path to exactly one of hg and git") 866 867 self._substs = substs 868 self._download_symbols = self._substs.get('MOZ_ARTIFACT_BUILD_SYMBOLS', False) 869 self._defines = defines 870 self._tree = tree 871 self._job = job or self._guess_artifact_job() 872 self._log = log 873 self._hg = hg 874 self._git = git 875 self._cache_dir = cache_dir 876 self._skip_cache = skip_cache 877 self._topsrcdir = topsrcdir 878 879 try: 880 self._artifact_job = get_job_details(self._job, log=self._log, 881 download_symbols=self._download_symbols, 882 substs=self._substs) 883 except KeyError: 884 self.log(logging.INFO, 'artifact', 885 {'job': self._job}, 886 'Unknown job {job}') 887 raise KeyError("Unknown job") 888 889 self._task_cache = TaskCache(self._cache_dir, log=self._log, skip_cache=self._skip_cache) 890 self._artifact_cache = ArtifactCache(self._cache_dir, log=self._log, skip_cache=self._skip_cache) 891 self._pushhead_cache = PushheadCache(self._cache_dir, log=self._log, skip_cache=self._skip_cache) 892 893 def log(self, *args, **kwargs): 894 if self._log: 895 self._log(*args, **kwargs) 896 897 def _guess_artifact_job(self): 898 # Add the "-debug" suffix to the guessed artifact job name 899 # if MOZ_DEBUG is enabled. 900 if self._substs.get('MOZ_DEBUG'): 901 target_suffix = '-debug' 902 else: 903 target_suffix = '-opt' 904 905 if self._substs.get('MOZ_BUILD_APP', '') == 'mobile/android': 906 if self._substs['ANDROID_CPU_ARCH'] == 'x86': 907 return 'android-x86-opt' 908 return 'android-api-16' + target_suffix 909 910 target_64bit = False 911 if self._substs['target_cpu'] == 'x86_64': 912 target_64bit = True 913 914 if self._defines.get('XP_LINUX', False): 915 return ('linux64' if target_64bit else 'linux') + target_suffix 916 if self._defines.get('XP_WIN', False): 917 return ('win64' if target_64bit else 'win32') + target_suffix 918 if self._defines.get('XP_MACOSX', False): 919 # We only produce unified builds in automation, so the target_cpu 920 # check is not relevant. 921 return 'macosx64' + target_suffix 922 raise Exception('Cannot determine default job for |mach artifact|!') 923 924 def _pushheads_from_rev(self, rev, count): 925 """Queries hg.mozilla.org's json-pushlog for pushheads that are nearby 926 ancestors or `rev`. Multiple trees are queried, as the `rev` may 927 already have been pushed to multiple repositories. For each repository 928 containing `rev`, the pushhead introducing `rev` and the previous 929 `count` pushheads from that point are included in the output. 930 """ 931 932 with self._pushhead_cache as pushhead_cache: 933 found_pushids = {} 934 for tree in CANDIDATE_TREES: 935 self.log(logging.INFO, 'artifact', 936 {'tree': tree, 937 'rev': rev}, 938 'Attempting to find a pushhead containing {rev} on {tree}.') 939 try: 940 pushid = pushhead_cache.parent_pushhead_id(tree, rev) 941 found_pushids[tree] = pushid 942 except ValueError: 943 continue 944 945 candidate_pushheads = collections.defaultdict(list) 946 947 for tree, pushid in found_pushids.iteritems(): 948 end = pushid 949 start = pushid - NUM_PUSHHEADS_TO_QUERY_PER_PARENT 950 951 self.log(logging.INFO, 'artifact', 952 {'tree': tree, 953 'pushid': pushid, 954 'num': NUM_PUSHHEADS_TO_QUERY_PER_PARENT}, 955 'Retrieving the last {num} pushheads starting with id {pushid} on {tree}') 956 for pushhead in pushhead_cache.pushid_range(tree, start, end): 957 candidate_pushheads[pushhead].append(tree) 958 959 return candidate_pushheads 960 961 def _get_hg_revisions_from_git(self): 962 rev_list = subprocess.check_output([ 963 self._git, 'rev-list', '--topo-order', 964 '--max-count={num}'.format(num=NUM_REVISIONS_TO_QUERY), 965 'HEAD', 966 ], cwd=self._topsrcdir) 967 968 hg_hash_list = subprocess.check_output([ 969 self._git, 'cinnabar', 'git2hg' 970 ] + rev_list.splitlines(), cwd=self._topsrcdir) 971 972 zeroes = "0" * 40 973 974 hashes = [] 975 for hg_hash in hg_hash_list.splitlines(): 976 hg_hash = hg_hash.strip() 977 if not hg_hash or hg_hash == zeroes: 978 continue 979 hashes.append(hg_hash) 980 return hashes 981 982 def _get_recent_public_revisions(self): 983 """Returns recent ancestors of the working parent that are likely to 984 to be known to Mozilla automation. 985 986 If we're using git, retrieves hg revisions from git-cinnabar. 987 """ 988 if self._git: 989 return self._get_hg_revisions_from_git() 990 991 # Mercurial updated the ordering of "last" in 4.3. We use revision 992 # numbers to order here to accommodate multiple versions of hg. 993 last_revs = subprocess.check_output([ 994 self._hg, 'log', 995 '--template', '{rev}:{node}\n', 996 '-r', 'last(public() and ::., {num})'.format( 997 num=NUM_REVISIONS_TO_QUERY) 998 ], cwd=self._topsrcdir).splitlines() 999 return [i.split(':')[-1] for i in sorted(last_revs, reverse=True)] 1000 1001 def _find_pushheads(self): 1002 """Returns an iterator of recent pushhead revisions, starting with the 1003 working parent. 1004 """ 1005 1006 last_revs = self._get_recent_public_revisions() 1007 candidate_pushheads = self._pushheads_from_rev(last_revs[0].rstrip(), 1008 NUM_PUSHHEADS_TO_QUERY_PER_PARENT) 1009 count = 0 1010 for rev in last_revs: 1011 rev = rev.rstrip() 1012 if not rev: 1013 continue 1014 if rev not in candidate_pushheads: 1015 continue 1016 count += 1 1017 yield candidate_pushheads[rev], rev 1018 1019 if not count: 1020 raise Exception('Could not find any candidate pushheads in the last {num} revisions.\n' 1021 'Search started with {rev}, which must be known to Mozilla automation.\n\n' 1022 'see https://developer.mozilla.org/en-US/docs/Artifact_builds'.format( 1023 rev=last_revs[0], num=NUM_PUSHHEADS_TO_QUERY_PER_PARENT)) 1024 1025 def find_pushhead_artifacts(self, task_cache, job, tree, pushhead): 1026 try: 1027 urls = task_cache.artifact_urls(tree, job, pushhead, self._download_symbols) 1028 except ValueError: 1029 return None 1030 if urls: 1031 self.log(logging.INFO, 'artifact', 1032 {'pushhead': pushhead, 1033 'tree': tree}, 1034 'Installing from remote pushhead {pushhead} on {tree}') 1035 return urls 1036 return None 1037 1038 def install_from_file(self, filename, distdir): 1039 self.log(logging.INFO, 'artifact', 1040 {'filename': filename}, 1041 'Installing from {filename}') 1042 1043 # Do we need to post-process? 1044 processed_filename = filename + PROCESSED_SUFFIX 1045 1046 if self._skip_cache and os.path.exists(processed_filename): 1047 self.log(logging.DEBUG, 'artifact', 1048 {'path': processed_filename}, 1049 'Skipping cache: removing cached processed artifact {path}') 1050 os.remove(processed_filename) 1051 1052 if not os.path.exists(processed_filename): 1053 self.log(logging.INFO, 'artifact', 1054 {'filename': filename}, 1055 'Processing contents of {filename}') 1056 self.log(logging.INFO, 'artifact', 1057 {'processed_filename': processed_filename}, 1058 'Writing processed {processed_filename}') 1059 self._artifact_job.process_artifact(filename, processed_filename) 1060 1061 self._artifact_cache._persist_limit.register_file(processed_filename) 1062 1063 self.log(logging.INFO, 'artifact', 1064 {'processed_filename': processed_filename}, 1065 'Installing from processed {processed_filename}') 1066 1067 # Copy all .so files, avoiding modification where possible. 1068 ensureParentDir(mozpath.join(distdir, '.dummy')) 1069 1070 with zipfile.ZipFile(processed_filename) as zf: 1071 for info in zf.infolist(): 1072 if info.filename.endswith('.ini'): 1073 continue 1074 n = mozpath.join(distdir, info.filename) 1075 fh = FileAvoidWrite(n, mode='rb') 1076 shutil.copyfileobj(zf.open(info), fh) 1077 file_existed, file_updated = fh.close() 1078 self.log(logging.INFO, 'artifact', 1079 {'updating': 'Updating' if file_updated else 'Not updating', 'filename': n}, 1080 '{updating} {filename}') 1081 if not file_existed or file_updated: 1082 # Libraries and binaries may need to be marked executable, 1083 # depending on platform. 1084 perms = info.external_attr >> 16 # See http://stackoverflow.com/a/434689. 1085 perms |= stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH # u+w, a+r. 1086 os.chmod(n, perms) 1087 return 0 1088 1089 def install_from_url(self, url, distdir): 1090 self.log(logging.INFO, 'artifact', 1091 {'url': url}, 1092 'Installing from {url}') 1093 filename = self._artifact_cache.fetch(url) 1094 return self.install_from_file(filename, distdir) 1095 1096 def _install_from_hg_pushheads(self, hg_pushheads, distdir): 1097 """Iterate pairs (hg_hash, {tree-set}) associating hg revision hashes 1098 and tree-sets they are known to be in, trying to download and 1099 install from each. 1100 """ 1101 1102 urls = None 1103 count = 0 1104 # with blocks handle handle persistence. 1105 with self._task_cache as task_cache: 1106 for trees, hg_hash in hg_pushheads: 1107 for tree in trees: 1108 count += 1 1109 self.log(logging.DEBUG, 'artifact', 1110 {'hg_hash': hg_hash, 1111 'tree': tree}, 1112 'Trying to find artifacts for hg revision {hg_hash} on tree {tree}.') 1113 urls = self.find_pushhead_artifacts(task_cache, self._job, tree, hg_hash) 1114 if urls: 1115 for url in urls: 1116 if self.install_from_url(url, distdir): 1117 return 1 1118 return 0 1119 1120 self.log(logging.ERROR, 'artifact', 1121 {'count': count}, 1122 'Tried {count} pushheads, no built artifacts found.') 1123 return 1 1124 1125 def install_from_recent(self, distdir): 1126 hg_pushheads = self._find_pushheads() 1127 return self._install_from_hg_pushheads(hg_pushheads, distdir) 1128 1129 def install_from_revset(self, revset, distdir): 1130 if self._hg: 1131 revision = subprocess.check_output([self._hg, 'log', '--template', '{node}\n', 1132 '-r', revset], cwd=self._topsrcdir).strip() 1133 if len(revision.split('\n')) != 1: 1134 raise ValueError('hg revision specification must resolve to exactly one commit') 1135 else: 1136 revision = subprocess.check_output([self._git, 'rev-parse', revset], cwd=self._topsrcdir).strip() 1137 revision = subprocess.check_output([self._git, 'cinnabar', 'git2hg', revision], cwd=self._topsrcdir).strip() 1138 if len(revision.split('\n')) != 1: 1139 raise ValueError('hg revision specification must resolve to exactly one commit') 1140 if revision == "0" * 40: 1141 raise ValueError('git revision specification must resolve to a commit known to hg') 1142 1143 self.log(logging.INFO, 'artifact', 1144 {'revset': revset, 1145 'revision': revision}, 1146 'Will only accept artifacts from a pushhead at {revision} ' 1147 '(matched revset "{revset}").') 1148 # Include try in our search to allow pulling from a specific push. 1149 pushheads = [(list(CANDIDATE_TREES) + ['try'], revision)] 1150 return self._install_from_hg_pushheads(pushheads, distdir) 1151 1152 def install_from(self, source, distdir): 1153 """Install artifacts from a ``source`` into the given ``distdir``. 1154 """ 1155 if source and os.path.isfile(source): 1156 return self.install_from_file(source, distdir) 1157 elif source and urlparse.urlparse(source).scheme: 1158 return self.install_from_url(source, distdir) 1159 else: 1160 if source is None and 'MOZ_ARTIFACT_REVISION' in os.environ: 1161 source = os.environ['MOZ_ARTIFACT_REVISION'] 1162 1163 if source: 1164 return self.install_from_revset(source, distdir) 1165 1166 return self.install_from_recent(distdir) 1167 1168 1169 def clear_cache(self): 1170 self.log(logging.INFO, 'artifact', 1171 {}, 1172 'Deleting cached artifacts and caches.') 1173 self._task_cache.clear_cache() 1174 self._artifact_cache.clear_cache() 1175 self._pushhead_cache.clear_cache() 1176