1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5'''
6Fetch build artifacts from a Firefox tree.
7
8This provides an (at-the-moment special purpose) interface to download Android
9artifacts from Mozilla's Task Cluster.
10
11This module performs the following steps:
12
13* find a candidate hg parent revision.  At one time we used the local pushlog,
14  which required the mozext hg extension.  This isn't feasible with git, and it
15  is only mildly less efficient to not use the pushlog, so we don't use it even
16  when querying hg.
17
18* map the candidate parent to candidate Task Cluster tasks and artifact
19  locations.  Pushlog entries might not correspond to tasks (yet), and those
20  tasks might not produce the desired class of artifacts.
21
22* fetch fresh Task Cluster artifacts and purge old artifacts, using a simple
23  Least Recently Used cache.
24
25* post-process fresh artifacts, to speed future installation.  In particular,
26  extract relevant files from Mac OS X DMG files into a friendly archive format
27  so we don't have to mount DMG files frequently.
28
29The bulk of the complexity is in managing and persisting several caches.  If
30we found a Python LRU cache that pickled cleanly, we could remove a lot of
31this code!  Sadly, I found no such candidate implementations, so we pickle
32pylru caches manually.
33
34None of the instances (or the underlying caches) are safe for concurrent use.
35A future need, perhaps.
36
37This module requires certain modules be importable from the ambient Python
38environment.  |mach artifact| ensures these modules are available, but other
39consumers will need to arrange this themselves.
40'''
41
42
43from __future__ import absolute_import, print_function, unicode_literals
44
45import binascii
46import collections
47import functools
48import glob
49import hashlib
50import logging
51import operator
52import os
53import pickle
54import re
55import requests
56import shutil
57import stat
58import subprocess
59import tarfile
60import tempfile
61import urlparse
62import zipfile
63
64import pylru
65from taskgraph.util.taskcluster import (
66    find_task_id,
67    get_artifact_url,
68    list_artifacts,
69)
70
71from mozbuild.util import (
72    ensureParentDir,
73    FileAvoidWrite,
74    mkdir,
75)
76import mozinstall
77from mozpack.files import (
78    JarFinder,
79    TarFinder,
80)
81from mozpack.mozjar import (
82    JarReader,
83    JarWriter,
84)
85from mozpack.packager.unpack import UnpackFinder
86import mozpack.path as mozpath
87from dlmanager import (
88    DownloadManager,
89    PersistLimit,
90)
91
92NUM_PUSHHEADS_TO_QUERY_PER_PARENT = 50  # Number of candidate pushheads to cache per parent changeset.
93
94# Number of parent changesets to consider as possible pushheads.
95# There isn't really such a thing as a reasonable default here, because we don't
96# know how many pushheads we'll need to look at to find a build with our artifacts,
97# and we don't know how many changesets will be in each push. For now we assume
98# we'll find a build in the last 50 pushes, assuming each push contains 10 changesets.
99NUM_REVISIONS_TO_QUERY = 500
100
101MAX_CACHED_TASKS = 400  # Number of pushheads to cache Task Cluster task data for.
102
103# Minimum number of downloaded artifacts to keep. Each artifact can be very large,
104# so don't make this to large!
105MIN_CACHED_ARTIFACTS = 6
106
107# Maximum size of the downloaded artifacts to keep in cache, in bytes (1GiB).
108MAX_CACHED_ARTIFACTS_SIZE = 1024 * 1024 * 1024
109
110# Downloaded artifacts are cached, and a subset of their contents extracted for
111# easy installation.  This is most noticeable on Mac OS X: since mounting and
112# copying from DMG files is very slow, we extract the desired binaries to a
113# separate archive for fast re-installation.
114PROCESSED_SUFFIX = '.processed.jar'
115
116CANDIDATE_TREES = (
117    'mozilla-central',
118    'integration/autoland',
119    'integration/mozilla-inbound',
120    'releases/mozilla-beta',
121    'releases/mozilla-release',
122    'releases/mozilla-esr60'
123)
124
125class ArtifactJob(object):
126    # These are a subset of TEST_HARNESS_BINS in testing/mochitest/Makefile.in.
127    # Each item is a pair of (pattern, (src_prefix, dest_prefix), where src_prefix
128    # is the prefix of the pattern relevant to its location in the archive, and
129    # dest_prefix is the prefix to be added that will yield the final path relative
130    # to dist/.
131    test_artifact_patterns = {
132        ('bin/BadCertServer', ('bin', 'bin')),
133        ('bin/GenerateOCSPResponse', ('bin', 'bin')),
134        ('bin/OCSPStaplingServer', ('bin', 'bin')),
135        ('bin/SymantecSanctionsServer', ('bin', 'bin')),
136        ('bin/certutil', ('bin', 'bin')),
137        ('bin/fileid', ('bin', 'bin')),
138        ('bin/geckodriver', ('bin', 'bin')),
139        ('bin/pk12util', ('bin', 'bin')),
140        ('bin/screentopng', ('bin', 'bin')),
141        ('bin/ssltunnel', ('bin', 'bin')),
142        ('bin/xpcshell', ('bin', 'bin')),
143        ('bin/plugins/gmp-*/*/*', ('bin/plugins', 'bin')),
144        ('bin/plugins/*', ('bin/plugins', 'plugins')),
145        ('bin/components/*.xpt', ('bin/components', 'bin/components')),
146    }
147
148    # We can tell our input is a test archive by this suffix, which happens to
149    # be the same across platforms.
150    _test_archive_suffix = '.common.tests.zip'
151
152    def __init__(self, package_re, tests_re, log=None, download_symbols=False, substs=None):
153        self._package_re = re.compile(package_re)
154        self._tests_re = None
155        if tests_re:
156            self._tests_re = re.compile(tests_re)
157        self._log = log
158        self._substs = substs
159        self._symbols_archive_suffix = None
160        if download_symbols:
161            self._symbols_archive_suffix = 'crashreporter-symbols.zip'
162
163    def log(self, *args, **kwargs):
164        if self._log:
165            self._log(*args, **kwargs)
166
167    def find_candidate_artifacts(self, artifacts):
168        # TODO: Handle multiple artifacts, taking the latest one.
169        tests_artifact = None
170        for artifact in artifacts:
171            name = artifact['name']
172            if self._package_re and self._package_re.match(name):
173                yield name
174            elif self._tests_re and self._tests_re.match(name):
175                tests_artifact = name
176                yield name
177            elif self._symbols_archive_suffix and name.endswith(self._symbols_archive_suffix):
178                yield name
179            else:
180                self.log(logging.DEBUG, 'artifact',
181                         {'name': name},
182                         'Not yielding artifact named {name} as a candidate artifact')
183        if self._tests_re and not tests_artifact:
184            raise ValueError('Expected tests archive matching "{re}", but '
185                             'found none!'.format(re=self._tests_re))
186
187    def process_artifact(self, filename, processed_filename):
188        if filename.endswith(ArtifactJob._test_archive_suffix) and self._tests_re:
189            return self.process_tests_artifact(filename, processed_filename)
190        if self._symbols_archive_suffix and filename.endswith(self._symbols_archive_suffix):
191            return self.process_symbols_archive(filename, processed_filename)
192        return self.process_package_artifact(filename, processed_filename)
193
194    def process_package_artifact(self, filename, processed_filename):
195        raise NotImplementedError("Subclasses must specialize process_package_artifact!")
196
197    def process_tests_artifact(self, filename, processed_filename):
198        from mozbuild.action.test_archive import OBJDIR_TEST_FILES
199        added_entry = False
200
201        with JarWriter(file=processed_filename, optimize=False, compress_level=5) as writer:
202            reader = JarReader(filename)
203            for filename, entry in reader.entries.iteritems():
204                for pattern, (src_prefix, dest_prefix) in self.test_artifact_patterns:
205                    if not mozpath.match(filename, pattern):
206                        continue
207                    destpath = mozpath.relpath(filename, src_prefix)
208                    destpath = mozpath.join(dest_prefix, destpath)
209                    self.log(logging.INFO, 'artifact',
210                             {'destpath': destpath},
211                             'Adding {destpath} to processed archive')
212                    mode = entry['external_attr'] >> 16
213                    writer.add(destpath.encode('utf-8'), reader[filename], mode=mode)
214                    added_entry = True
215                    break
216                for files_entry in OBJDIR_TEST_FILES.values():
217                    origin_pattern = files_entry['pattern']
218                    leaf_filename = filename
219                    if 'dest' in files_entry:
220                        dest = files_entry['dest']
221                        origin_pattern = mozpath.join(dest, origin_pattern)
222                        leaf_filename = filename[len(dest) + 1:]
223                    if mozpath.match(filename, origin_pattern):
224                        destpath = mozpath.join('..', files_entry['base'], leaf_filename)
225                        mode = entry['external_attr'] >> 16
226                        writer.add(destpath.encode('utf-8'), reader[filename], mode=mode)
227
228        if not added_entry:
229            raise ValueError('Archive format changed! No pattern from "{patterns}"'
230                             'matched an archive path.'.format(
231                                 patterns=LinuxArtifactJob.test_artifact_patterns))
232
233    def process_symbols_archive(self, filename, processed_filename):
234        with JarWriter(file=processed_filename, optimize=False, compress_level=5) as writer:
235            reader = JarReader(filename)
236            for filename in reader.entries:
237                destpath = mozpath.join('crashreporter-symbols', filename)
238                self.log(logging.INFO, 'artifact',
239                         {'destpath': destpath},
240                         'Adding {destpath} to processed archive')
241                writer.add(destpath.encode('utf-8'), reader[filename])
242
243class AndroidArtifactJob(ArtifactJob):
244
245    product = 'mobile'
246
247    package_artifact_patterns = {
248        'application.ini',
249        'platform.ini',
250        '**/*.so',
251        '**/interfaces.xpt',
252    }
253
254    def process_package_artifact(self, filename, processed_filename):
255        # Extract all .so files into the root, which will get copied into dist/bin.
256        with JarWriter(file=processed_filename, optimize=False, compress_level=5) as writer:
257            for p, f in UnpackFinder(JarFinder(filename, JarReader(filename))):
258                if not any(mozpath.match(p, pat) for pat in self.package_artifact_patterns):
259                    continue
260
261                dirname, basename = os.path.split(p)
262                self.log(logging.INFO, 'artifact',
263                    {'basename': basename},
264                   'Adding {basename} to processed archive')
265
266                basedir = 'bin'
267                if not basename.endswith('.so'):
268                    basedir = mozpath.join('bin', dirname.lstrip('assets/'))
269                basename = mozpath.join(basedir, basename)
270                writer.add(basename.encode('utf-8'), f.open())
271
272
273class LinuxArtifactJob(ArtifactJob):
274
275    product = 'firefox'
276
277    package_artifact_patterns = {
278        'firefox/application.ini',
279        'firefox/crashreporter',
280        'firefox/dependentlibs.list',
281        'firefox/firefox',
282        'firefox/firefox-bin',
283        'firefox/minidump-analyzer',
284        'firefox/pingsender',
285        'firefox/platform.ini',
286        'firefox/plugin-container',
287        'firefox/updater',
288        'firefox/**/*.so',
289        'firefox/**/interfaces.xpt',
290    }
291
292    def process_package_artifact(self, filename, processed_filename):
293        added_entry = False
294
295        with JarWriter(file=processed_filename, optimize=False, compress_level=5) as writer:
296            with tarfile.open(filename) as reader:
297                for p, f in UnpackFinder(TarFinder(filename, reader)):
298                    if not any(mozpath.match(p, pat) for pat in self.package_artifact_patterns):
299                        continue
300
301                    # We strip off the relative "firefox/" bit from the path,
302                    # but otherwise preserve it.
303                    destpath = mozpath.join('bin',
304                                            mozpath.relpath(p, "firefox"))
305                    self.log(logging.INFO, 'artifact',
306                             {'destpath': destpath},
307                             'Adding {destpath} to processed archive')
308                    writer.add(destpath.encode('utf-8'), f.open(), mode=f.mode)
309                    added_entry = True
310
311        if not added_entry:
312            raise ValueError('Archive format changed! No pattern from "{patterns}" '
313                             'matched an archive path.'.format(
314                                 patterns=LinuxArtifactJob.package_artifact_patterns))
315
316
317class MacArtifactJob(ArtifactJob):
318
319    product = 'firefox'
320
321    def process_package_artifact(self, filename, processed_filename):
322        tempdir = tempfile.mkdtemp()
323        oldcwd = os.getcwd()
324        try:
325            self.log(logging.INFO, 'artifact',
326                {'tempdir': tempdir},
327                'Unpacking DMG into {tempdir}')
328            if self._substs['HOST_OS_ARCH'] == 'Linux':
329                # This is a cross build, use hfsplus and dmg tools to extract the dmg.
330                os.chdir(tempdir)
331                with open(os.devnull, 'wb') as devnull:
332                    subprocess.check_call([
333                        self._substs['DMG_TOOL'],
334                        'extract',
335                        filename,
336                        'extracted_img',
337                    ], stdout=devnull)
338                    subprocess.check_call([
339                        self._substs['HFS_TOOL'],
340                        'extracted_img',
341                        'extractall'
342                    ], stdout=devnull)
343            else:
344                mozinstall.install(filename, tempdir)
345
346            bundle_dirs = glob.glob(mozpath.join(tempdir, '*.app'))
347            if len(bundle_dirs) != 1:
348                raise ValueError('Expected one source bundle, found: {}'.format(bundle_dirs))
349            [source] = bundle_dirs
350
351            # These get copied into dist/bin without the path, so "root/a/b/c" -> "dist/bin/c".
352            paths_no_keep_path = ('Contents/MacOS', [
353                'crashreporter.app/Contents/MacOS/crashreporter',
354                'firefox',
355                'firefox-bin',
356                'libfreebl3.dylib',
357                'liblgpllibs.dylib',
358                # 'liblogalloc.dylib',
359                'libmozglue.dylib',
360                'libnss3.dylib',
361                'libnssckbi.dylib',
362                'libnssdbm3.dylib',
363                'libplugin_child_interpose.dylib',
364                # 'libreplace_jemalloc.dylib',
365                # 'libreplace_malloc.dylib',
366                'libmozavutil.dylib',
367                'libmozavcodec.dylib',
368                'libsoftokn3.dylib',
369                'pingsender',
370                'plugin-container.app/Contents/MacOS/plugin-container',
371                'updater.app/Contents/MacOS/org.mozilla.updater',
372                # 'xpcshell',
373                'XUL',
374            ])
375
376            # These get copied into dist/bin with the path, so "root/a/b/c" -> "dist/bin/a/b/c".
377            paths_keep_path = [
378                ('Contents/MacOS', [
379                    'crashreporter.app/Contents/MacOS/minidump-analyzer',
380                ]),
381                ('Contents/Resources', [
382                    'browser/components/libbrowsercomps.dylib',
383                    'dependentlibs.list',
384                    # 'firefox',
385                    'gmp-clearkey/0.1/libclearkey.dylib',
386                    # 'gmp-fake/1.0/libfake.dylib',
387                    # 'gmp-fakeopenh264/1.0/libfakeopenh264.dylib',
388                    '**/interfaces.xpt',
389                ]),
390            ]
391
392            with JarWriter(file=processed_filename, optimize=False, compress_level=5) as writer:
393                root, paths = paths_no_keep_path
394                finder = UnpackFinder(mozpath.join(source, root))
395                for path in paths:
396                    for p, f in finder.find(path):
397                        self.log(logging.INFO, 'artifact',
398                            {'path': p},
399                            'Adding {path} to processed archive')
400                        destpath = mozpath.join('bin', os.path.basename(p))
401                        writer.add(destpath.encode('utf-8'), f, mode=f.mode)
402
403                for root, paths in paths_keep_path:
404                    finder = UnpackFinder(mozpath.join(source, root))
405                    for path in paths:
406                        for p, f in finder.find(path):
407                            self.log(logging.INFO, 'artifact',
408                                     {'path': p},
409                                     'Adding {path} to processed archive')
410                            destpath = mozpath.join('bin', p)
411                            writer.add(destpath.encode('utf-8'), f.open(), mode=f.mode)
412
413        finally:
414            os.chdir(oldcwd)
415            try:
416                shutil.rmtree(tempdir)
417            except (OSError, IOError):
418                self.log(logging.WARN, 'artifact',
419                    {'tempdir': tempdir},
420                    'Unable to delete {tempdir}')
421                pass
422
423
424class WinArtifactJob(ArtifactJob):
425    package_artifact_patterns = {
426        'firefox/dependentlibs.list',
427        'firefox/platform.ini',
428        'firefox/application.ini',
429        'firefox/**/*.dll',
430        'firefox/*.exe',
431        'firefox/**/interfaces.xpt',
432    }
433
434    product = 'firefox'
435
436    # These are a subset of TEST_HARNESS_BINS in testing/mochitest/Makefile.in.
437    test_artifact_patterns = {
438        ('bin/BadCertServer.exe', ('bin', 'bin')),
439        ('bin/GenerateOCSPResponse.exe', ('bin', 'bin')),
440        ('bin/OCSPStaplingServer.exe', ('bin', 'bin')),
441        ('bin/SymantecSanctionsServer.exe', ('bin', 'bin')),
442        ('bin/certutil.exe', ('bin', 'bin')),
443        ('bin/fileid.exe', ('bin', 'bin')),
444        ('bin/geckodriver.exe', ('bin', 'bin')),
445        ('bin/pk12util.exe', ('bin', 'bin')),
446        ('bin/screenshot.exe', ('bin', 'bin')),
447        ('bin/ssltunnel.exe', ('bin', 'bin')),
448        ('bin/xpcshell.exe', ('bin', 'bin')),
449        ('bin/plugins/gmp-*/*/*', ('bin/plugins', 'bin')),
450        ('bin/plugins/*', ('bin/plugins', 'plugins')),
451        ('bin/components/*', ('bin/components', 'bin/components')),
452    }
453
454    def process_package_artifact(self, filename, processed_filename):
455        added_entry = False
456        with JarWriter(file=processed_filename, optimize=False, compress_level=5) as writer:
457            for p, f in UnpackFinder(JarFinder(filename, JarReader(filename))):
458                if not any(mozpath.match(p, pat) for pat in self.package_artifact_patterns):
459                    continue
460
461                # strip off the relative "firefox/" bit from the path:
462                basename = mozpath.relpath(p, "firefox")
463                basename = mozpath.join('bin', basename)
464                self.log(logging.INFO, 'artifact',
465                    {'basename': basename},
466                    'Adding {basename} to processed archive')
467                writer.add(basename.encode('utf-8'), f.open(), mode=f.mode)
468                added_entry = True
469
470        if not added_entry:
471            raise ValueError('Archive format changed! No pattern from "{patterns}"'
472                             'matched an archive path.'.format(
473                                 patterns=self.artifact_patterns))
474
475# Keep the keys of this map in sync with the |mach artifact| --job
476# options.  The keys of this map correspond to entries at
477# https://tools.taskcluster.net/index/artifacts/#gecko.v2.mozilla-central.latest/gecko.v2.mozilla-central.latest
478# The values correpsond to a pair of (<package regex>, <test archive regex>).
479JOB_DETAILS = {
480    'android-api-16-opt': (AndroidArtifactJob, (r'(public/build/fennec-(.*)\.android-arm.apk|public/build/target\.apk)',
481                                                r'public/build/fennec-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')),
482    'android-api-16-debug': (AndroidArtifactJob, (r'public/build/target\.apk',
483                                                  r'public/build/target\.common\.tests\.zip')),
484    'android-x86-opt': (AndroidArtifactJob, (r'public/build/target\.apk',
485                                             r'public/build/target\.common\.tests\.zip')),
486    'linux-opt': (LinuxArtifactJob, (r'public/build/target\.tar\.bz2',
487                                     r'public/build/target\.common\.tests\.zip')),
488    'linux-debug': (LinuxArtifactJob, (r'public/build/target\.tar\.bz2',
489                                       r'public/build/target\.common\.tests\.zip')),
490    'linux64-opt': (LinuxArtifactJob, (r'public/build/target\.tar\.bz2',
491                                       r'public/build/target\.common\.tests\.zip')),
492    'linux64-debug': (LinuxArtifactJob, (r'public/build/target\.tar\.bz2',
493                                         r'public/build/target\.common\.tests\.zip')),
494    'macosx64-opt': (MacArtifactJob, (r'public/build/firefox-(.*)\.mac\.dmg|public/build/target\.dmg',
495                                      r'public/build/firefox-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')),
496    'macosx64-debug': (MacArtifactJob, (r'public/build/firefox-(.*)\.mac\.dmg|public/build/target\.dmg',
497                                        r'public/build/firefox-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')),
498    'win32-opt': (WinArtifactJob, (r'public/build/firefox-(.*)\.win32\.zip|public/build/target\.zip',
499                                   r'public/build/firefox-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')),
500    'win32-debug': (WinArtifactJob, (r'public/build/firefox-(.*)\.win32\.zip|public/build/target\.zip',
501                                     r'public/build/firefox-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')),
502    'win64-opt': (WinArtifactJob, (r'public/build/firefox-(.*)\.win64\.zip|public/build/target\.zip',
503                                   r'public/build/firefox-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')),
504    'win64-debug': (WinArtifactJob, (r'public/build/firefox-(.*)\.win64\.zip|public/build/target\.zip',
505                                     r'public/build/firefox-(.*)\.common\.tests\.zip|public/build/target\.common\.tests\.zip')),
506}
507
508
509
510def get_job_details(job, log=None, download_symbols=False, substs=None):
511    cls, (package_re, tests_re) = JOB_DETAILS[job]
512    return cls(package_re, tests_re, log=log, download_symbols=download_symbols,
513               substs=substs)
514
515def cachedmethod(cachefunc):
516    '''Decorator to wrap a class or instance method with a memoizing callable that
517    saves results in a (possibly shared) cache.
518    '''
519    def decorator(method):
520        def wrapper(self, *args, **kwargs):
521            mapping = cachefunc(self)
522            if mapping is None:
523                return method(self, *args, **kwargs)
524            key = (method.__name__, args, tuple(sorted(kwargs.items())))
525            try:
526                value = mapping[key]
527                return value
528            except KeyError:
529                pass
530            result = method(self, *args, **kwargs)
531            mapping[key] = result
532            return result
533        return functools.update_wrapper(wrapper, method)
534    return decorator
535
536
537class CacheManager(object):
538    '''Maintain an LRU cache.  Provide simple persistence, including support for
539    loading and saving the state using a "with" block.  Allow clearing the cache
540    and printing the cache for debugging.
541
542    Provide simple logging.
543    '''
544
545    def __init__(self, cache_dir, cache_name, cache_size, cache_callback=None, log=None, skip_cache=False):
546        self._skip_cache = skip_cache
547        self._cache = pylru.lrucache(cache_size, callback=cache_callback)
548        self._cache_filename = mozpath.join(cache_dir, cache_name + '-cache.pickle')
549        self._log = log
550        mkdir(cache_dir, not_indexed=True)
551
552    def log(self, *args, **kwargs):
553        if self._log:
554            self._log(*args, **kwargs)
555
556    def load_cache(self):
557        if self._skip_cache:
558            self.log(logging.DEBUG, 'artifact',
559                {},
560                'Skipping cache: ignoring load_cache!')
561            return
562
563        try:
564            items = pickle.load(open(self._cache_filename, 'rb'))
565            for key, value in items:
566                self._cache[key] = value
567        except Exception as e:
568            # Corrupt cache, perhaps?  Sadly, pickle raises many different
569            # exceptions, so it's not worth trying to be fine grained here.
570            # We ignore any exception, so the cache is effectively dropped.
571            self.log(logging.INFO, 'artifact',
572                {'filename': self._cache_filename, 'exception': repr(e)},
573                'Ignoring exception unpickling cache file {filename}: {exception}')
574            pass
575
576    def dump_cache(self):
577        if self._skip_cache:
578            self.log(logging.DEBUG, 'artifact',
579                {},
580                'Skipping cache: ignoring dump_cache!')
581            return
582
583        ensureParentDir(self._cache_filename)
584        pickle.dump(list(reversed(list(self._cache.items()))), open(self._cache_filename, 'wb'), -1)
585
586    def clear_cache(self):
587        if self._skip_cache:
588            self.log(logging.DEBUG, 'artifact',
589                {},
590                'Skipping cache: ignoring clear_cache!')
591            return
592
593        with self:
594            self._cache.clear()
595
596    def __enter__(self):
597        self.load_cache()
598        return self
599
600    def __exit__(self, type, value, traceback):
601        self.dump_cache()
602
603class PushheadCache(CacheManager):
604    '''Helps map tree/revision pairs to parent pushheads according to the pushlog.'''
605
606    def __init__(self, cache_dir, log=None, skip_cache=False):
607        CacheManager.__init__(self, cache_dir, 'pushhead_cache', MAX_CACHED_TASKS, log=log, skip_cache=skip_cache)
608
609    @cachedmethod(operator.attrgetter('_cache'))
610    def parent_pushhead_id(self, tree, revision):
611        cset_url_tmpl = ('https://hg.mozilla.org/{tree}/json-pushes?'
612                         'changeset={changeset}&version=2&tipsonly=1')
613        req = requests.get(cset_url_tmpl.format(tree=tree, changeset=revision),
614                           headers={'Accept': 'application/json'})
615        if req.status_code not in range(200, 300):
616            raise ValueError
617        result = req.json()
618        [found_pushid] = result['pushes'].keys()
619        return int(found_pushid)
620
621    @cachedmethod(operator.attrgetter('_cache'))
622    def pushid_range(self, tree, start, end):
623        pushid_url_tmpl = ('https://hg.mozilla.org/{tree}/json-pushes?'
624                           'startID={start}&endID={end}&version=2&tipsonly=1')
625
626        req = requests.get(pushid_url_tmpl.format(tree=tree, start=start,
627                                                  end=end),
628                           headers={'Accept': 'application/json'})
629        result = req.json()
630        return [
631            p['changesets'][-1] for p in result['pushes'].values()
632        ]
633
634class TaskCache(CacheManager):
635    '''Map candidate pushheads to Task Cluster task IDs and artifact URLs.'''
636
637    def __init__(self, cache_dir, log=None, skip_cache=False):
638        CacheManager.__init__(self, cache_dir, 'artifact_url', MAX_CACHED_TASKS, log=log, skip_cache=skip_cache)
639
640    @cachedmethod(operator.attrgetter('_cache'))
641    def artifact_urls(self, tree, job, rev, download_symbols):
642        try:
643            artifact_job = get_job_details(job, log=self._log, download_symbols=download_symbols)
644        except KeyError:
645            self.log(logging.INFO, 'artifact',
646                {'job': job},
647                'Unknown job {job}')
648            raise KeyError("Unknown job")
649
650        # Grab the second part of the repo name, which is generally how things
651        # are indexed. Eg: 'integration/mozilla-inbound' is indexed as
652        # 'mozilla-inbound'
653        tree = tree.split('/')[1] if '/' in tree else tree
654
655        namespace = 'gecko.v2.{tree}.revision.{rev}.{product}.{job}'.format(
656            rev=rev,
657            tree=tree,
658            product=artifact_job.product,
659            job=job,
660        )
661        self.log(logging.DEBUG, 'artifact',
662                 {'namespace': namespace},
663                 'Searching Taskcluster index with namespace: {namespace}')
664        try:
665            taskId = find_task_id(namespace)
666        except KeyError:
667            # Not all revisions correspond to pushes that produce the job we
668            # care about; and even those that do may not have completed yet.
669            raise ValueError('Task for {namespace} does not exist (yet)!'.format(namespace=namespace))
670
671        artifacts = list_artifacts(taskId)
672
673        urls = []
674        for artifact_name in artifact_job.find_candidate_artifacts(artifacts):
675            # We can easily extract the task ID from the URL.  We can't easily
676            # extract the build ID; we use the .ini files embedded in the
677            # downloaded artifact for this.
678            url = get_artifact_url(taskId, artifact_name)
679            urls.append(url)
680        if not urls:
681            raise ValueError('Task for {namespace} existed, but no artifacts found!'.format(namespace=namespace))
682        return urls
683
684
685class ArtifactPersistLimit(PersistLimit):
686    '''Handle persistence for artifacts cache
687
688    When instantiating a DownloadManager, it starts by filling the
689    PersistLimit instance it's given with register_dir_content.
690    In practice, this registers all the files already in the cache directory.
691    After a download finishes, the newly downloaded file is registered, and the
692    oldest files registered to the PersistLimit instance are removed depending
693    on the size and file limits it's configured for.
694    This is all good, but there are a few tweaks we want here:
695    - We have pickle files in the cache directory that we don't want purged.
696    - Files that were just downloaded in the same session shouldn't be purged.
697      (if for some reason we end up downloading more than the default max size,
698       we don't want the files to be purged)
699    To achieve this, this subclass of PersistLimit inhibits the register_file
700    method for pickle files and tracks what files were downloaded in the same
701    session to avoid removing them.
702
703    The register_file method may be used to register cache matches too, so that
704    later sessions know they were freshly used.
705    '''
706
707    def __init__(self, log=None):
708        super(ArtifactPersistLimit, self).__init__(
709            size_limit=MAX_CACHED_ARTIFACTS_SIZE,
710            file_limit=MIN_CACHED_ARTIFACTS)
711        self._log = log
712        self._registering_dir = False
713        self._downloaded_now = set()
714
715    def log(self, *args, **kwargs):
716        if self._log:
717            self._log(*args, **kwargs)
718
719    def register_file(self, path):
720        if path.endswith('.pickle') or \
721                os.path.basename(path) == '.metadata_never_index':
722            return
723        if not self._registering_dir:
724            # Touch the file so that subsequent calls to a mach artifact
725            # command know it was recently used. While remove_old_files
726            # is based on access time, in various cases, the access time is not
727            # updated when just reading the file, so we force an update.
728            try:
729                os.utime(path, None)
730            except OSError:
731                pass
732            self._downloaded_now.add(path)
733        super(ArtifactPersistLimit, self).register_file(path)
734
735    def register_dir_content(self, directory, pattern="*"):
736        self._registering_dir = True
737        super(ArtifactPersistLimit, self).register_dir_content(
738            directory, pattern)
739        self._registering_dir = False
740
741    def remove_old_files(self):
742        from dlmanager import fs
743        files = sorted(self.files, key=lambda f: f.stat.st_atime)
744        kept = []
745        while len(files) > self.file_limit and \
746                self._files_size >= self.size_limit:
747            f = files.pop(0)
748            if f.path in self._downloaded_now:
749                kept.append(f)
750                continue
751            try:
752                fs.remove(f.path)
753            except WindowsError:
754                # For some reason, on automation, we can't remove those files.
755                # So for now, ignore the error.
756                kept.append(f)
757                continue
758            self.log(logging.INFO, 'artifact',
759                {'filename': f.path},
760                'Purged artifact {filename}')
761            self._files_size -= f.stat.st_size
762        self.files = files + kept
763
764    def remove_all(self):
765        from dlmanager import fs
766        for f in self.files:
767            fs.remove(f.path)
768        self._files_size = 0
769        self.files = []
770
771
772class ArtifactCache(object):
773    '''Fetch Task Cluster artifact URLs and purge least recently used artifacts from disk.'''
774
775    def __init__(self, cache_dir, log=None, skip_cache=False):
776        mkdir(cache_dir, not_indexed=True)
777        self._cache_dir = cache_dir
778        self._log = log
779        self._skip_cache = skip_cache
780        self._persist_limit = ArtifactPersistLimit(log)
781        self._download_manager = DownloadManager(
782            self._cache_dir, persist_limit=self._persist_limit)
783        self._last_dl_update = -1
784
785    def log(self, *args, **kwargs):
786        if self._log:
787            self._log(*args, **kwargs)
788
789    def fetch(self, url, force=False):
790        fname = os.path.basename(url)
791        try:
792            # Use the file name from the url if it looks like a hash digest.
793            if len(fname) not in (32, 40, 56, 64, 96, 128):
794                raise TypeError()
795            binascii.unhexlify(fname)
796        except TypeError:
797            # We download to a temporary name like HASH[:16]-basename to
798            # differentiate among URLs with the same basenames.  We used to then
799            # extract the build ID from the downloaded artifact and use it to make a
800            # human readable unique name, but extracting build IDs is time consuming
801            # (especially on Mac OS X, where we must mount a large DMG file).
802            hash = hashlib.sha256(url).hexdigest()[:16]
803            # Strip query string and fragments.
804            basename = os.path.basename(urlparse.urlparse(url).path)
805            fname = hash + '-' + basename
806
807        path = os.path.abspath(mozpath.join(self._cache_dir, fname))
808        if self._skip_cache and os.path.exists(path):
809            self.log(logging.DEBUG, 'artifact',
810                {'path': path},
811                'Skipping cache: removing cached downloaded artifact {path}')
812            os.remove(path)
813
814        self.log(logging.INFO, 'artifact',
815            {'path': path},
816            'Downloading to temporary location {path}')
817        try:
818            dl = self._download_manager.download(url, fname)
819
820            def download_progress(dl, bytes_so_far, total_size):
821                if not total_size:
822                    return
823                percent = (float(bytes_so_far) / total_size) * 100
824                now = int(percent / 5)
825                if now == self._last_dl_update:
826                    return
827                self._last_dl_update = now
828                self.log(logging.INFO, 'artifact',
829                         {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent},
830                         'Downloading... {percent:02.1f} %')
831
832            if dl:
833                dl.set_progress(download_progress)
834                dl.wait()
835            else:
836                # Avoid the file being removed if it was in the cache already.
837                path = os.path.join(self._cache_dir, fname)
838                self._persist_limit.register_file(path)
839
840            self.log(logging.INFO, 'artifact',
841                {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
842                'Downloaded artifact to {path}')
843            return os.path.abspath(mozpath.join(self._cache_dir, fname))
844        finally:
845            # Cancel any background downloads in progress.
846            self._download_manager.cancel()
847
848    def clear_cache(self):
849        if self._skip_cache:
850            self.log(logging.DEBUG, 'artifact',
851                {},
852                'Skipping cache: ignoring clear_cache!')
853            return
854
855        self._persist_limit.remove_all()
856
857
858class Artifacts(object):
859    '''Maintain state to efficiently fetch build artifacts from a Firefox tree.'''
860
861    def __init__(self, tree, substs, defines, job=None, log=None,
862                 cache_dir='.', hg=None, git=None, skip_cache=False,
863                 topsrcdir=None):
864        if (hg and git) or (not hg and not git):
865            raise ValueError("Must provide path to exactly one of hg and git")
866
867        self._substs = substs
868        self._download_symbols = self._substs.get('MOZ_ARTIFACT_BUILD_SYMBOLS', False)
869        self._defines = defines
870        self._tree = tree
871        self._job = job or self._guess_artifact_job()
872        self._log = log
873        self._hg = hg
874        self._git = git
875        self._cache_dir = cache_dir
876        self._skip_cache = skip_cache
877        self._topsrcdir = topsrcdir
878
879        try:
880            self._artifact_job = get_job_details(self._job, log=self._log,
881                                                 download_symbols=self._download_symbols,
882                                                 substs=self._substs)
883        except KeyError:
884            self.log(logging.INFO, 'artifact',
885                {'job': self._job},
886                'Unknown job {job}')
887            raise KeyError("Unknown job")
888
889        self._task_cache = TaskCache(self._cache_dir, log=self._log, skip_cache=self._skip_cache)
890        self._artifact_cache = ArtifactCache(self._cache_dir, log=self._log, skip_cache=self._skip_cache)
891        self._pushhead_cache = PushheadCache(self._cache_dir, log=self._log, skip_cache=self._skip_cache)
892
893    def log(self, *args, **kwargs):
894        if self._log:
895            self._log(*args, **kwargs)
896
897    def _guess_artifact_job(self):
898        # Add the "-debug" suffix to the guessed artifact job name
899        # if MOZ_DEBUG is enabled.
900        if self._substs.get('MOZ_DEBUG'):
901            target_suffix = '-debug'
902        else:
903            target_suffix = '-opt'
904
905        if self._substs.get('MOZ_BUILD_APP', '') == 'mobile/android':
906            if self._substs['ANDROID_CPU_ARCH'] == 'x86':
907                return 'android-x86-opt'
908            return 'android-api-16' + target_suffix
909
910        target_64bit = False
911        if self._substs['target_cpu'] == 'x86_64':
912            target_64bit = True
913
914        if self._defines.get('XP_LINUX', False):
915            return ('linux64' if target_64bit else 'linux') + target_suffix
916        if self._defines.get('XP_WIN', False):
917            return ('win64' if target_64bit else 'win32') + target_suffix
918        if self._defines.get('XP_MACOSX', False):
919            # We only produce unified builds in automation, so the target_cpu
920            # check is not relevant.
921            return 'macosx64' + target_suffix
922        raise Exception('Cannot determine default job for |mach artifact|!')
923
924    def _pushheads_from_rev(self, rev, count):
925        """Queries hg.mozilla.org's json-pushlog for pushheads that are nearby
926        ancestors or `rev`. Multiple trees are queried, as the `rev` may
927        already have been pushed to multiple repositories. For each repository
928        containing `rev`, the pushhead introducing `rev` and the previous
929        `count` pushheads from that point are included in the output.
930        """
931
932        with self._pushhead_cache as pushhead_cache:
933            found_pushids = {}
934            for tree in CANDIDATE_TREES:
935                self.log(logging.INFO, 'artifact',
936                         {'tree': tree,
937                          'rev': rev},
938                         'Attempting to find a pushhead containing {rev} on {tree}.')
939                try:
940                    pushid = pushhead_cache.parent_pushhead_id(tree, rev)
941                    found_pushids[tree] = pushid
942                except ValueError:
943                    continue
944
945            candidate_pushheads = collections.defaultdict(list)
946
947            for tree, pushid in found_pushids.iteritems():
948                end = pushid
949                start = pushid - NUM_PUSHHEADS_TO_QUERY_PER_PARENT
950
951                self.log(logging.INFO, 'artifact',
952                         {'tree': tree,
953                          'pushid': pushid,
954                          'num': NUM_PUSHHEADS_TO_QUERY_PER_PARENT},
955                         'Retrieving the last {num} pushheads starting with id {pushid} on {tree}')
956                for pushhead in pushhead_cache.pushid_range(tree, start, end):
957                    candidate_pushheads[pushhead].append(tree)
958
959        return candidate_pushheads
960
961    def _get_hg_revisions_from_git(self):
962        rev_list = subprocess.check_output([
963            self._git, 'rev-list', '--topo-order',
964            '--max-count={num}'.format(num=NUM_REVISIONS_TO_QUERY),
965            'HEAD',
966        ], cwd=self._topsrcdir)
967
968        hg_hash_list = subprocess.check_output([
969            self._git, 'cinnabar', 'git2hg'
970        ] + rev_list.splitlines(), cwd=self._topsrcdir)
971
972        zeroes = "0" * 40
973
974        hashes = []
975        for hg_hash in hg_hash_list.splitlines():
976            hg_hash = hg_hash.strip()
977            if not hg_hash or hg_hash == zeroes:
978                continue
979            hashes.append(hg_hash)
980        return hashes
981
982    def _get_recent_public_revisions(self):
983        """Returns recent ancestors of the working parent that are likely to
984        to be known to Mozilla automation.
985
986        If we're using git, retrieves hg revisions from git-cinnabar.
987        """
988        if self._git:
989            return self._get_hg_revisions_from_git()
990
991        # Mercurial updated the ordering of "last" in 4.3. We use revision
992        # numbers to order here to accommodate multiple versions of hg.
993        last_revs = subprocess.check_output([
994            self._hg, 'log',
995            '--template', '{rev}:{node}\n',
996            '-r', 'last(public() and ::., {num})'.format(
997                num=NUM_REVISIONS_TO_QUERY)
998        ], cwd=self._topsrcdir).splitlines()
999        return [i.split(':')[-1] for i in sorted(last_revs, reverse=True)]
1000
1001    def _find_pushheads(self):
1002        """Returns an iterator of recent pushhead revisions, starting with the
1003        working parent.
1004        """
1005
1006        last_revs = self._get_recent_public_revisions()
1007        candidate_pushheads = self._pushheads_from_rev(last_revs[0].rstrip(),
1008                                                       NUM_PUSHHEADS_TO_QUERY_PER_PARENT)
1009        count = 0
1010        for rev in last_revs:
1011            rev = rev.rstrip()
1012            if not rev:
1013                continue
1014            if rev not in candidate_pushheads:
1015                continue
1016            count += 1
1017            yield candidate_pushheads[rev], rev
1018
1019        if not count:
1020            raise Exception('Could not find any candidate pushheads in the last {num} revisions.\n'
1021                            'Search started with {rev}, which must be known to Mozilla automation.\n\n'
1022                            'see https://developer.mozilla.org/en-US/docs/Artifact_builds'.format(
1023                                rev=last_revs[0], num=NUM_PUSHHEADS_TO_QUERY_PER_PARENT))
1024
1025    def find_pushhead_artifacts(self, task_cache, job, tree, pushhead):
1026        try:
1027            urls = task_cache.artifact_urls(tree, job, pushhead, self._download_symbols)
1028        except ValueError:
1029            return None
1030        if urls:
1031            self.log(logging.INFO, 'artifact',
1032                     {'pushhead': pushhead,
1033                      'tree': tree},
1034                     'Installing from remote pushhead {pushhead} on {tree}')
1035            return urls
1036        return None
1037
1038    def install_from_file(self, filename, distdir):
1039        self.log(logging.INFO, 'artifact',
1040            {'filename': filename},
1041            'Installing from {filename}')
1042
1043        # Do we need to post-process?
1044        processed_filename = filename + PROCESSED_SUFFIX
1045
1046        if self._skip_cache and os.path.exists(processed_filename):
1047            self.log(logging.DEBUG, 'artifact',
1048                {'path': processed_filename},
1049                'Skipping cache: removing cached processed artifact {path}')
1050            os.remove(processed_filename)
1051
1052        if not os.path.exists(processed_filename):
1053            self.log(logging.INFO, 'artifact',
1054                {'filename': filename},
1055                'Processing contents of {filename}')
1056            self.log(logging.INFO, 'artifact',
1057                {'processed_filename': processed_filename},
1058                'Writing processed {processed_filename}')
1059            self._artifact_job.process_artifact(filename, processed_filename)
1060
1061        self._artifact_cache._persist_limit.register_file(processed_filename)
1062
1063        self.log(logging.INFO, 'artifact',
1064            {'processed_filename': processed_filename},
1065            'Installing from processed {processed_filename}')
1066
1067        # Copy all .so files, avoiding modification where possible.
1068        ensureParentDir(mozpath.join(distdir, '.dummy'))
1069
1070        with zipfile.ZipFile(processed_filename) as zf:
1071            for info in zf.infolist():
1072                if info.filename.endswith('.ini'):
1073                    continue
1074                n = mozpath.join(distdir, info.filename)
1075                fh = FileAvoidWrite(n, mode='rb')
1076                shutil.copyfileobj(zf.open(info), fh)
1077                file_existed, file_updated = fh.close()
1078                self.log(logging.INFO, 'artifact',
1079                    {'updating': 'Updating' if file_updated else 'Not updating', 'filename': n},
1080                    '{updating} {filename}')
1081                if not file_existed or file_updated:
1082                    # Libraries and binaries may need to be marked executable,
1083                    # depending on platform.
1084                    perms = info.external_attr >> 16 # See http://stackoverflow.com/a/434689.
1085                    perms |= stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH # u+w, a+r.
1086                    os.chmod(n, perms)
1087        return 0
1088
1089    def install_from_url(self, url, distdir):
1090        self.log(logging.INFO, 'artifact',
1091            {'url': url},
1092            'Installing from {url}')
1093        filename = self._artifact_cache.fetch(url)
1094        return self.install_from_file(filename, distdir)
1095
1096    def _install_from_hg_pushheads(self, hg_pushheads, distdir):
1097        """Iterate pairs (hg_hash, {tree-set}) associating hg revision hashes
1098        and tree-sets they are known to be in, trying to download and
1099        install from each.
1100        """
1101
1102        urls = None
1103        count = 0
1104        # with blocks handle handle persistence.
1105        with self._task_cache as task_cache:
1106            for trees, hg_hash in hg_pushheads:
1107                for tree in trees:
1108                    count += 1
1109                    self.log(logging.DEBUG, 'artifact',
1110                             {'hg_hash': hg_hash,
1111                              'tree': tree},
1112                             'Trying to find artifacts for hg revision {hg_hash} on tree {tree}.')
1113                    urls = self.find_pushhead_artifacts(task_cache, self._job, tree, hg_hash)
1114                    if urls:
1115                        for url in urls:
1116                            if self.install_from_url(url, distdir):
1117                                return 1
1118                        return 0
1119
1120        self.log(logging.ERROR, 'artifact',
1121                 {'count': count},
1122                 'Tried {count} pushheads, no built artifacts found.')
1123        return 1
1124
1125    def install_from_recent(self, distdir):
1126        hg_pushheads = self._find_pushheads()
1127        return self._install_from_hg_pushheads(hg_pushheads, distdir)
1128
1129    def install_from_revset(self, revset, distdir):
1130        if self._hg:
1131            revision = subprocess.check_output([self._hg, 'log', '--template', '{node}\n',
1132                                                '-r', revset], cwd=self._topsrcdir).strip()
1133            if len(revision.split('\n')) != 1:
1134                raise ValueError('hg revision specification must resolve to exactly one commit')
1135        else:
1136            revision = subprocess.check_output([self._git, 'rev-parse', revset], cwd=self._topsrcdir).strip()
1137            revision = subprocess.check_output([self._git, 'cinnabar', 'git2hg', revision], cwd=self._topsrcdir).strip()
1138            if len(revision.split('\n')) != 1:
1139                raise ValueError('hg revision specification must resolve to exactly one commit')
1140            if revision == "0" * 40:
1141                raise ValueError('git revision specification must resolve to a commit known to hg')
1142
1143        self.log(logging.INFO, 'artifact',
1144                 {'revset': revset,
1145                  'revision': revision},
1146                 'Will only accept artifacts from a pushhead at {revision} '
1147                 '(matched revset "{revset}").')
1148        # Include try in our search to allow pulling from a specific push.
1149        pushheads = [(list(CANDIDATE_TREES) + ['try'], revision)]
1150        return self._install_from_hg_pushheads(pushheads, distdir)
1151
1152    def install_from(self, source, distdir):
1153        """Install artifacts from a ``source`` into the given ``distdir``.
1154        """
1155        if source and os.path.isfile(source):
1156            return self.install_from_file(source, distdir)
1157        elif source and urlparse.urlparse(source).scheme:
1158            return self.install_from_url(source, distdir)
1159        else:
1160            if source is None and 'MOZ_ARTIFACT_REVISION' in os.environ:
1161                source = os.environ['MOZ_ARTIFACT_REVISION']
1162
1163            if source:
1164                return self.install_from_revset(source, distdir)
1165
1166            return self.install_from_recent(distdir)
1167
1168
1169    def clear_cache(self):
1170        self.log(logging.INFO, 'artifact',
1171            {},
1172            'Deleting cached artifacts and caches.')
1173        self._task_cache.clear_cache()
1174        self._artifact_cache.clear_cache()
1175        self._pushhead_cache.clear_cache()
1176