1# ***** BEGIN LICENSE BLOCK *****
2# This Source Code Form is subject to the terms of the Mozilla Public
3# License, v. 2.0. If a copy of the MPL was not distributed with this file,
4# You can obtain one at http://mozilla.org/MPL/2.0/.
5# ***** END LICENSE BLOCK *****
6"""Generic script objects.
7
8script.py, along with config.py and log.py, represents the core of
9mozharness.
10"""
11
12from __future__ import absolute_import, print_function
13
14import codecs
15import datetime
16import errno
17import fnmatch
18import functools
19import gzip
20import hashlib
21import inspect
22import itertools
23import os
24import platform
25import pprint
26import re
27import shutil
28import socket
29import ssl
30import subprocess
31import sys
32import tarfile
33import time
34import traceback
35import zipfile
36import zlib
37from contextlib import contextmanager
38from io import BytesIO
39
40import six
41from six import binary_type
42
43from mozprocess import ProcessHandler
44
45import mozinfo
46from mozharness.base.config import BaseConfig
47from mozharness.base.log import (
48    DEBUG,
49    ERROR,
50    FATAL,
51    INFO,
52    WARNING,
53    ConsoleLogger,
54    LogMixin,
55    MultiFileLogger,
56    OutputParser,
57    SimpleFileLogger,
58)
59
60try:
61    import httplib
62except ImportError:
63    import http.client as httplib
64try:
65    import simplejson as json
66except ImportError:
67    import json
68try:
69    from urllib2 import quote, urlopen, Request
70except ImportError:
71    from urllib.request import quote, urlopen, Request
72try:
73    import urlparse
74except ImportError:
75    import urllib.parse as urlparse
76if os.name == "nt":
77    import locale
78
79    try:
80        import win32file
81        import win32api
82
83        PYWIN32 = True
84    except ImportError:
85        PYWIN32 = False
86
87try:
88    from urllib2 import HTTPError, URLError
89except ImportError:
90    from urllib.error import HTTPError, URLError
91
92
93class ContentLengthMismatch(Exception):
94    pass
95
96
97def platform_name():
98    pm = PlatformMixin()
99
100    if pm._is_linux() and pm._is_64_bit():
101        return "linux64"
102    elif pm._is_linux() and not pm._is_64_bit():
103        return "linux"
104    elif pm._is_darwin():
105        return "macosx"
106    elif pm._is_windows() and pm._is_64_bit():
107        return "win64"
108    elif pm._is_windows() and not pm._is_64_bit():
109        return "win32"
110    else:
111        return None
112
113
114class PlatformMixin(object):
115    def _is_windows(self):
116        """check if the current operating system is Windows.
117
118        Returns:
119            bool: True if the current platform is Windows, False otherwise
120        """
121        system = platform.system()
122        if system in ("Windows", "Microsoft"):
123            return True
124        if system.startswith("CYGWIN"):
125            return True
126        if os.name == "nt":
127            return True
128
129    def _is_darwin(self):
130        """check if the current operating system is Darwin.
131
132        Returns:
133            bool: True if the current platform is Darwin, False otherwise
134        """
135        if platform.system() in ("Darwin"):
136            return True
137        if sys.platform.startswith("darwin"):
138            return True
139
140    def _is_linux(self):
141        """check if the current operating system is a Linux distribution.
142
143        Returns:
144            bool: True if the current platform is a Linux distro, False otherwise
145        """
146        if platform.system() in ("Linux"):
147            return True
148        if sys.platform.startswith("linux"):
149            return True
150
151    def _is_debian(self):
152        """check if the current operating system is explicitly Debian.
153        This intentionally doesn't count Debian derivatives like Ubuntu.
154
155        Returns:
156            bool: True if the current platform is debian, False otherwise
157        """
158        if not self._is_linux():
159            return False
160        self.info(mozinfo.linux_distro)
161        re_debian_distro = re.compile("debian")
162        return re_debian_distro.match(mozinfo.linux_distro) is not None
163
164    def _is_redhat_based(self):
165        """check if the current operating system is a Redhat derived Linux distribution.
166
167        Returns:
168            bool: True if the current platform is a Redhat Linux distro, False otherwise
169        """
170        if not self._is_linux():
171            return False
172        re_redhat_distro = re.compile("Redhat|Fedora|CentOS|Oracle")
173        return re_redhat_distro.match(mozinfo.linux_distro) is not None
174
175    def _is_64_bit(self):
176        if self._is_darwin():
177            # osx is a special snowflake and to ensure the arch, it is better to use the following
178            return (
179                sys.maxsize > 2 ** 32
180            )  # context: https://docs.python.org/2/library/platform.html
181        else:
182            # Using machine() gives you the architecture of the host rather
183            # than the build type of the Python binary
184            return "64" in platform.machine()
185
186
187# ScriptMixin {{{1
188class ScriptMixin(PlatformMixin):
189    """This mixin contains simple filesystem commands and the like.
190
191    It also contains some very special but very complex methods that,
192    together with logging and config, provide the base for all scripts
193    in this harness.
194
195    WARNING !!!
196    This class depends entirely on `LogMixin` methods in such a way that it will
197    only works if a class inherits from both `ScriptMixin` and `LogMixin`
198    simultaneously.
199
200    Depends on self.config of some sort.
201
202    Attributes:
203        env (dict): a mapping object representing the string environment.
204        script_obj (ScriptMixin): reference to a ScriptMixin instance.
205    """
206
207    env = None
208    script_obj = None
209    ssl_context = None
210
211    def query_filesize(self, file_path):
212        self.info("Determining filesize for %s" % file_path)
213        length = os.path.getsize(file_path)
214        self.info(" %s" % str(length))
215        return length
216
217    # TODO this should be parallelized with the to-be-written BaseHelper!
218    def query_sha512sum(self, file_path):
219        self.info("Determining sha512sum for %s" % file_path)
220        m = hashlib.sha512()
221        contents = self.read_from_file(file_path, verbose=False, open_mode="rb")
222        m.update(contents)
223        sha512 = m.hexdigest()
224        self.info(" %s" % sha512)
225        return sha512
226
227    def platform_name(self):
228        """Return the platform name on which the script is running on.
229        Returns:
230            None: for failure to determine the platform.
231            str: The name of the platform (e.g. linux64)
232        """
233        return platform_name()
234
235    # Simple filesystem commands {{{2
236    def mkdir_p(self, path, error_level=ERROR):
237        """Create a directory if it doesn't exists.
238        This method also logs the creation, error or current existence of the
239        directory to be created.
240
241        Args:
242            path (str): path of the directory to be created.
243            error_level (str): log level name to be used in case of error.
244
245        Returns:
246            None: for sucess.
247            int: -1 on error
248        """
249
250        if not os.path.exists(path):
251            self.info("mkdir: %s" % path)
252            try:
253                os.makedirs(path)
254            except OSError:
255                self.log("Can't create directory %s!" % path, level=error_level)
256                return -1
257        else:
258            self.debug("mkdir_p: %s Already exists." % path)
259
260    def rmtree(self, path, log_level=INFO, error_level=ERROR, exit_code=-1):
261        """Delete an entire directory tree and log its result.
262        This method also logs the platform rmtree function, its retries, errors,
263        and current existence of the directory.
264
265        Args:
266            path (str): path to the directory tree root to remove.
267            log_level (str, optional): log level name to for this operation. Defaults
268                                       to `INFO`.
269            error_level (str, optional): log level name to use in case of error.
270                                         Defaults to `ERROR`.
271            exit_code (int, optional): useless parameter, not use here.
272                                       Defaults to -1
273
274        Returns:
275            None: for success
276        """
277
278        self.log("rmtree: %s" % path, level=log_level)
279        error_message = "Unable to remove %s!" % path
280        if self._is_windows():
281            # Call _rmtree_windows() directly, since even checking
282            # os.path.exists(path) will hang if path is longer than MAX_PATH.
283            self.info("Using _rmtree_windows ...")
284            return self.retry(
285                self._rmtree_windows,
286                error_level=error_level,
287                error_message=error_message,
288                args=(path,),
289                log_level=log_level,
290            )
291        if os.path.exists(path):
292            if os.path.isdir(path):
293                return self.retry(
294                    shutil.rmtree,
295                    error_level=error_level,
296                    error_message=error_message,
297                    retry_exceptions=(OSError,),
298                    args=(path,),
299                    log_level=log_level,
300                )
301            else:
302                return self.retry(
303                    os.remove,
304                    error_level=error_level,
305                    error_message=error_message,
306                    retry_exceptions=(OSError,),
307                    args=(path,),
308                    log_level=log_level,
309                )
310        else:
311            self.debug("%s doesn't exist." % path)
312
313    def query_msys_path(self, path):
314        """replaces the Windows harddrive letter path style with a linux
315        path style, e.g. C:// --> /C/
316        Note: method, not used in any script.
317
318        Args:
319            path (str?): path to convert to the linux path style.
320        Returns:
321            str: in case `path` is a string. The result is the path with the new notation.
322            type(path): `path` itself is returned in case `path` is not str type.
323        """
324        if not isinstance(path, six.string_types):
325            return path
326        path = path.replace("\\", "/")
327
328        def repl(m):
329            return "/%s/" % m.group(1)
330
331        path = re.sub(r"""^([a-zA-Z]):/""", repl, path)
332        return path
333
334    def _rmtree_windows(self, path):
335        """Windows-specific rmtree that handles path lengths longer than MAX_PATH.
336            Ported from clobberer.py.
337
338        Args:
339            path (str): directory path to remove.
340
341        Returns:
342            None: if the path doesn't exists.
343            int: the return number of calling `self.run_command`
344            int: in case the path specified is not a directory but a file.
345                 0 on success, non-zero on error. Note: The returned value
346                 is the result of calling `win32file.DeleteFile`
347        """
348
349        assert self._is_windows()
350        path = os.path.realpath(path)
351        full_path = "\\\\?\\" + path
352        if not os.path.exists(full_path):
353            return
354        if not PYWIN32:
355            if not os.path.isdir(path):
356                return self.run_command('del /F /Q "%s"' % path)
357            else:
358                return self.run_command('rmdir /S /Q "%s"' % path)
359        # Make sure directory is writable
360        win32file.SetFileAttributesW("\\\\?\\" + path, win32file.FILE_ATTRIBUTE_NORMAL)
361        # Since we call rmtree() with a file, sometimes
362        if not os.path.isdir("\\\\?\\" + path):
363            return win32file.DeleteFile("\\\\?\\" + path)
364
365        for ffrec in win32api.FindFiles("\\\\?\\" + path + "\\*.*"):
366            file_attr = ffrec[0]
367            name = ffrec[8]
368            if name == "." or name == "..":
369                continue
370            full_name = os.path.join(path, name)
371
372            if file_attr & win32file.FILE_ATTRIBUTE_DIRECTORY:
373                self._rmtree_windows(full_name)
374            else:
375                try:
376                    win32file.SetFileAttributesW(
377                        "\\\\?\\" + full_name, win32file.FILE_ATTRIBUTE_NORMAL
378                    )
379                    win32file.DeleteFile("\\\\?\\" + full_name)
380                except Exception:
381                    # DeleteFile fails on long paths, del /f /q works just fine
382                    self.run_command('del /F /Q "%s"' % full_name)
383
384        win32file.RemoveDirectory("\\\\?\\" + path)
385
386    def get_filename_from_url(self, url):
387        """parse a filename base on an url.
388
389        Args:
390            url (str): url to parse for the filename
391
392        Returns:
393            str: filename parsed from the url, or `netloc` network location part
394                 of the url.
395        """
396
397        parsed = urlparse.urlsplit(url.rstrip("/"))
398        if parsed.path != "":
399            return parsed.path.rsplit("/", 1)[-1]
400        else:
401            return parsed.netloc
402
403    def _urlopen(self, url, **kwargs):
404        """open the url `url` using `urllib2`.`
405        This method can be overwritten to extend its complexity
406
407        Args:
408            url (str | urllib.request.Request): url to open
409            kwargs: Arbitrary keyword arguments passed to the `urllib.request.urlopen` function.
410
411        Returns:
412            file-like: file-like object with additional methods as defined in
413                       `urllib.request.urlopen`_.
414            None: None may be returned if no handler handles the request.
415
416        Raises:
417            urllib2.URLError: on errors
418
419        .. urillib.request.urlopen:
420        https://docs.python.org/2/library/urllib2.html#urllib2.urlopen
421        """
422        # http://bugs.python.org/issue13359 - urllib2 does not automatically quote the URL
423        url_quoted = quote(url, safe="%/:=&?~#+!$,;'@()*[]|")
424        # windows certificates need to be refreshed (https://bugs.python.org/issue36011)
425        if self.platform_name() in ("win64",) and platform.architecture()[0] in (
426            "x64",
427        ):
428            if self.ssl_context is None:
429                self.ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS)
430                self.ssl_context.load_default_certs()
431            return urlopen(url_quoted, context=self.ssl_context, **kwargs)
432        else:
433            return urlopen(url_quoted, **kwargs)
434
435    def fetch_url_into_memory(self, url):
436        """Downloads a file from a url into memory instead of disk.
437
438        Args:
439            url (str): URL path where the file to be downloaded is located.
440
441        Raises:
442            IOError: When the url points to a file on disk and cannot be found
443            ContentLengthMismatch: When the length of the retrieved content does not match the
444                                   Content-Length response header.
445            ValueError: When the scheme of a url is not what is expected.
446
447        Returns:
448            BytesIO: contents of url
449        """
450        self.info("Fetch {} into memory".format(url))
451        parsed_url = urlparse.urlparse(url)
452
453        if parsed_url.scheme in ("", "file"):
454            path = parsed_url.path
455            if not os.path.isfile(path):
456                raise IOError("Could not find file to extract: {}".format(url))
457
458            content_length = os.stat(path).st_size
459
460            # In case we're referrencing a file without file://
461            if parsed_url.scheme == "":
462                url = "file://%s" % os.path.abspath(url)
463                parsed_url = urlparse.urlparse(url)
464
465        request = Request(url)
466        # When calling fetch_url_into_memory() you should retry when we raise
467        # one of these exceptions:
468        # * Bug 1300663 - HTTPError: HTTP Error 404: Not Found
469        # * Bug 1300413 - HTTPError: HTTP Error 500: Internal Server Error
470        # * Bug 1300943 - HTTPError: HTTP Error 503: Service Unavailable
471        # * Bug 1300953 - URLError: <urlopen error [Errno -2] Name or service not known>
472        # * Bug 1301594 - URLError: <urlopen error [Errno 10054] An existing connection was ...
473        # * Bug 1301597 - URLError: <urlopen error [Errno 8] _ssl.c:504: EOF occurred in ...
474        # * Bug 1301855 - URLError: <urlopen error [Errno 60] Operation timed out>
475        # * Bug 1302237 - URLError: <urlopen error [Errno 104] Connection reset by peer>
476        # * Bug 1301807 - BadStatusLine: ''
477        #
478        # Bug 1309912 - Adding timeout in hopes to solve blocking on response.read() (bug 1300413)
479        response = urlopen(request, timeout=30)
480
481        if parsed_url.scheme in ("http", "https"):
482            content_length = int(response.headers.get("Content-Length"))
483
484        response_body = response.read()
485        response_body_size = len(response_body)
486
487        self.info("Content-Length response header: {}".format(content_length))
488        self.info("Bytes received: {}".format(response_body_size))
489
490        if response_body_size != content_length:
491            raise ContentLengthMismatch(
492                "The retrieved Content-Length header declares a body length "
493                "of {} bytes, while we actually retrieved {} bytes".format(
494                    content_length, response_body_size
495                )
496            )
497
498        if response.info().get("Content-Encoding") == "gzip":
499            self.info('Content-Encoding is "gzip", so decompressing response body')
500            # See http://www.zlib.net/manual.html#Advanced
501            # section "ZEXTERN int ZEXPORT inflateInit2 OF....":
502            #   Add 32 to windowBits to enable zlib and gzip decoding with automatic
503            #   header detection, or add 16 to decode only the gzip format (the zlib
504            #   format will return a Z_DATA_ERROR).
505            # Adding 16 since we only wish to support gzip encoding.
506            file_contents = zlib.decompress(response_body, zlib.MAX_WBITS | 16)
507        else:
508            file_contents = response_body
509
510        # Use BytesIO instead of StringIO
511        # http://stackoverflow.com/questions/34162017/unzip-buffer-with-python/34162395#34162395
512        return BytesIO(file_contents)
513
514    def _download_file(self, url, file_name):
515        """Helper function for download_file()
516        Additionaly this function logs all exceptions as warnings before
517        re-raising them
518
519        Args:
520            url (str): string containing the URL with the file location
521            file_name (str): name of the file where the downloaded file
522                             is written.
523
524        Returns:
525            str: filename of the written file on disk
526
527        Raises:
528            urllib2.URLError: on incomplete download.
529            urllib2.HTTPError: on Http error code
530            socket.timeout: on connection timeout
531            socket.error: on socket error
532        """
533        # If our URLs look like files, prefix them with file:// so they can
534        # be loaded like URLs.
535        if not (url.startswith("http") or url.startswith("file://")):
536            if not os.path.isfile(url):
537                self.fatal("The file %s does not exist" % url)
538            url = "file://%s" % os.path.abspath(url)
539
540        try:
541            f_length = None
542            f = self._urlopen(url, timeout=30)
543
544            if f.info().get("content-length") is not None:
545                f_length = int(f.info()["content-length"])
546                got_length = 0
547            if f.info().get("Content-Encoding") == "gzip":
548                # Note, we'll download the full compressed content into its own
549                # file, since that allows the gzip library to seek through it.
550                # Once downloaded, we'll decompress it into the real target
551                # file, and delete the compressed version.
552                local_file = open(file_name + ".gz", "wb")
553            else:
554                local_file = open(file_name, "wb")
555            while True:
556                block = f.read(1024 ** 2)
557                if not block:
558                    if f_length is not None and got_length != f_length:
559                        raise URLError(
560                            "Download incomplete; content-length was %d, "
561                            "but only received %d" % (f_length, got_length)
562                        )
563                    break
564                local_file.write(block)
565                if f_length is not None:
566                    got_length += len(block)
567            local_file.close()
568            if f.info().get("Content-Encoding") == "gzip":
569                # Decompress file into target location, then remove compressed version
570                with open(file_name, "wb") as f_out:
571                    # On some execution paths, this could be called with python 2.6
572                    # whereby gzip.open(...) cannot be used with a 'with' statement.
573                    # So let's do this the python 2.6 way...
574                    try:
575                        f_in = gzip.open(file_name + ".gz", "rb")
576                        shutil.copyfileobj(f_in, f_out)
577                    finally:
578                        f_in.close()
579                os.remove(file_name + ".gz")
580            return file_name
581        except HTTPError as e:
582            self.warning(
583                "Server returned status %s %s for %s" % (str(e.code), str(e), url)
584            )
585            raise
586        except URLError as e:
587            self.warning("URL Error: %s" % url)
588
589            # Failures due to missing local files won't benefit from retry.
590            # Raise the original OSError.
591            if isinstance(e.args[0], OSError) and e.args[0].errno == errno.ENOENT:
592                raise e.args[0]
593
594            raise
595        except socket.timeout as e:
596            self.warning("Timed out accessing %s: %s" % (url, str(e)))
597            raise
598        except socket.error as e:
599            self.warning("Socket error when accessing %s: %s" % (url, str(e)))
600            raise
601
602    def _retry_download(self, url, error_level, file_name=None, retry_config=None):
603        """Helper method to retry download methods.
604
605        This method calls `self.retry` on `self._download_file` using the passed
606        parameters if a file_name is specified. If no file is specified, we will
607        instead call `self._urlopen`, which grabs the contents of a url but does
608        not create a file on disk.
609
610        Args:
611            url (str): URL path where the file is located.
612            file_name (str): file_name where the file will be written to.
613            error_level (str): log level to use in case an error occurs.
614            retry_config (dict, optional): key-value pairs to be passed to
615                                           `self.retry`. Defaults to `None`
616
617        Returns:
618            str: `self._download_file` return value is returned
619            unknown: `self.retry` `failure_status` is returned on failure, which
620                     defaults to -1
621        """
622        retry_args = dict(
623            failure_status=None,
624            retry_exceptions=(
625                HTTPError,
626                URLError,
627                httplib.BadStatusLine,
628                socket.timeout,
629                socket.error,
630            ),
631            error_message="Can't download from %s to %s!" % (url, file_name),
632            error_level=error_level,
633        )
634
635        if retry_config:
636            retry_args.update(retry_config)
637
638        download_func = self._urlopen
639        kwargs = {"url": url}
640        if file_name:
641            download_func = self._download_file
642            kwargs = {"url": url, "file_name": file_name}
643
644        return self.retry(download_func, kwargs=kwargs, **retry_args)
645
646    def _filter_entries(self, namelist, extract_dirs):
647        """Filter entries of the archive based on the specified list of to extract dirs."""
648        filter_partial = functools.partial(fnmatch.filter, namelist)
649        entries = itertools.chain(*map(filter_partial, extract_dirs or ["*"]))
650
651        for entry in entries:
652            yield entry
653
654    def unzip(self, compressed_file, extract_to, extract_dirs="*", verbose=False):
655        """This method allows to extract a zip file without writing to disk first.
656
657        Args:
658            compressed_file (object): File-like object with the contents of a compressed zip file.
659            extract_to (str): where to extract the compressed file.
660            extract_dirs (list, optional): directories inside the archive file to extract.
661                                           Defaults to '*'.
662            verbose (bool, optional): whether or not extracted content should be displayed.
663                                      Defaults to False.
664
665        Raises:
666            zipfile.BadZipfile: on contents of zipfile being invalid
667        """
668        with zipfile.ZipFile(compressed_file) as bundle:
669            entries = self._filter_entries(bundle.namelist(), extract_dirs)
670
671            for entry in entries:
672                if verbose:
673                    self.info(" {}".format(entry))
674
675                # Exception to be retried:
676                # Bug 1301645 - BadZipfile: Bad CRC-32 for file ...
677                #    http://stackoverflow.com/questions/5624669/strange-badzipfile-bad-crc-32-problem/5626098#5626098
678                # Bug 1301802 - error: Error -3 while decompressing: invalid stored block lengths
679                bundle.extract(entry, path=extract_to)
680
681                # ZipFile doesn't preserve permissions during extraction:
682                # http://bugs.python.org/issue15795
683                fname = os.path.realpath(os.path.join(extract_to, entry))
684                try:
685                    # getinfo() can raise KeyError
686                    mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
687                    # Only set permissions if attributes are available. Otherwise all
688                    # permissions will be removed eg. on Windows.
689                    if mode:
690                        os.chmod(fname, mode)
691
692                except KeyError:
693                    self.warning("{} was not found in the zip file".format(entry))
694
695    def deflate(self, compressed_file, mode, extract_to=".", *args, **kwargs):
696        """This method allows to extract a compressed file from a tar, tar.bz2 and tar.gz files.
697
698        Args:
699            compressed_file (object): File-like object with the contents of a compressed file.
700            mode (str): string of the form 'filemode[:compression]' (e.g. 'r:gz' or 'r:bz2')
701            extract_to (str, optional): where to extract the compressed file.
702        """
703        t = tarfile.open(fileobj=compressed_file, mode=mode)
704        t.extractall(path=extract_to)
705
706    def download_unpack(self, url, extract_to=".", extract_dirs="*", verbose=False):
707        """Generic method to download and extract a compressed file without writing it to disk first.
708
709        Args:
710            url (str): URL where the file to be downloaded is located.
711            extract_to (str, optional): directory where the downloaded file will
712                                        be extracted to.
713            extract_dirs (list, optional): directories inside the archive to extract.
714                                           Defaults to `*`. It currently only applies to zip files.
715            verbose (bool, optional): whether or not extracted content should be displayed.
716                                      Defaults to False.
717
718        """
719
720        def _determine_extraction_method_and_kwargs(url):
721            EXTENSION_TO_MIMETYPE = {
722                "bz2": "application/x-bzip2",
723                "gz": "application/x-gzip",
724                "tar": "application/x-tar",
725                "zip": "application/zip",
726            }
727            MIMETYPES = {
728                "application/x-bzip2": {
729                    "function": self.deflate,
730                    "kwargs": {"mode": "r:bz2"},
731                },
732                "application/x-gzip": {
733                    "function": self.deflate,
734                    "kwargs": {"mode": "r:gz"},
735                },
736                "application/x-tar": {
737                    "function": self.deflate,
738                    "kwargs": {"mode": "r"},
739                },
740                "application/zip": {
741                    "function": self.unzip,
742                },
743                "application/x-zip-compressed": {
744                    "function": self.unzip,
745                },
746            }
747
748            filename = url.split("/")[-1]
749            # XXX: bz2/gz instead of tar.{bz2/gz}
750            extension = filename[filename.rfind(".") + 1 :]
751            mimetype = EXTENSION_TO_MIMETYPE[extension]
752            self.debug("Mimetype: {}".format(mimetype))
753
754            function = MIMETYPES[mimetype]["function"]
755            kwargs = {
756                "compressed_file": compressed_file,
757                "extract_to": extract_to,
758                "extract_dirs": extract_dirs,
759                "verbose": verbose,
760            }
761            kwargs.update(MIMETYPES[mimetype].get("kwargs", {}))
762
763            return function, kwargs
764
765        # Many scripts overwrite this method and set extract_dirs to None
766        extract_dirs = "*" if extract_dirs is None else extract_dirs
767        self.info(
768            "Downloading and extracting to {} these dirs {} from {}".format(
769                extract_to,
770                ", ".join(extract_dirs),
771                url,
772            )
773        )
774
775        # 1) Let's fetch the file
776        retry_args = dict(
777            retry_exceptions=(
778                HTTPError,
779                URLError,
780                httplib.BadStatusLine,
781                socket.timeout,
782                socket.error,
783                ContentLengthMismatch,
784            ),
785            sleeptime=30,
786            attempts=5,
787            error_message="Can't download from {}".format(url),
788            error_level=FATAL,
789        )
790        compressed_file = self.retry(
791            self.fetch_url_into_memory, kwargs={"url": url}, **retry_args
792        )
793
794        # 2) We're guaranteed to have download the file with error_level=FATAL
795        #    Let's unpack the file
796        function, kwargs = _determine_extraction_method_and_kwargs(url)
797        try:
798            function(**kwargs)
799        except zipfile.BadZipfile:
800            # Dump the exception and exit
801            self.exception(level=FATAL)
802
803    def load_json_url(self, url, error_level=None, *args, **kwargs):
804        """Returns a json object from a url (it retries)."""
805        contents = self._retry_download(
806            url=url, error_level=error_level, *args, **kwargs
807        )
808        return json.loads(contents.read())
809
810    # http://www.techniqal.com/blog/2008/07/31/python-file-read-write-with-urllib2/
811    # TODO thinking about creating a transfer object.
812    def download_file(
813        self,
814        url,
815        file_name=None,
816        parent_dir=None,
817        create_parent_dir=True,
818        error_level=ERROR,
819        exit_code=3,
820        retry_config=None,
821    ):
822        """Python wget.
823        Download the filename at `url` into `file_name` and put it on `parent_dir`.
824        On error log with the specified `error_level`, on fatal exit with `exit_code`.
825        Execute all the above based on `retry_config` parameter.
826
827        Args:
828            url (str): URL path where the file to be downloaded is located.
829            file_name (str, optional): file_name where the file will be written to.
830                                       Defaults to urls' filename.
831            parent_dir (str, optional): directory where the downloaded file will
832                                        be written to. Defaults to current working
833                                        directory
834            create_parent_dir (bool, optional): create the parent directory if it
835                                                doesn't exist. Defaults to `True`
836            error_level (str, optional): log level to use in case an error occurs.
837                                         Defaults to `ERROR`
838            retry_config (dict, optional): key-value pairs to be passed to
839                                          `self.retry`. Defaults to `None`
840
841        Returns:
842            str: filename where the downloaded file was written to.
843            unknown: on failure, `failure_status` is returned.
844        """
845        if not file_name:
846            try:
847                file_name = self.get_filename_from_url(url)
848            except AttributeError:
849                self.log(
850                    "Unable to get filename from %s; bad url?" % url,
851                    level=error_level,
852                    exit_code=exit_code,
853                )
854                return
855        if parent_dir:
856            file_name = os.path.join(parent_dir, file_name)
857            if create_parent_dir:
858                self.mkdir_p(parent_dir, error_level=error_level)
859        self.info("Downloading %s to %s" % (url, file_name))
860        status = self._retry_download(
861            url=url,
862            error_level=error_level,
863            file_name=file_name,
864            retry_config=retry_config,
865        )
866        if status == file_name:
867            self.info("Downloaded %d bytes." % os.path.getsize(file_name))
868        return status
869
870    def move(self, src, dest, log_level=INFO, error_level=ERROR, exit_code=-1):
871        """recursively move a file or directory (src) to another location (dest).
872
873        Args:
874            src (str): file or directory path to move.
875            dest (str): file or directory path where to move the content to.
876            log_level (str): log level to use for normal operation. Defaults to
877                                `INFO`
878            error_level (str): log level to use on error. Defaults to `ERROR`
879
880        Returns:
881            int: 0 on success. -1 on error.
882        """
883        self.log("Moving %s to %s" % (src, dest), level=log_level)
884        try:
885            shutil.move(src, dest)
886        # http://docs.python.org/tutorial/errors.html
887        except IOError as e:
888            self.log("IO error: %s" % str(e), level=error_level, exit_code=exit_code)
889            return -1
890        except shutil.Error as e:
891            # ERROR level ends up reporting the failure to treeherder &
892            # pollutes the failure summary list.
893            self.log("shutil error: %s" % str(e), level=WARNING, exit_code=exit_code)
894            return -1
895        return 0
896
897    def chmod(self, path, mode):
898        """change `path` mode to `mode`.
899
900        Args:
901            path (str): path whose mode will be modified.
902            mode (hex): one of the values defined at `stat`_
903
904        .. _stat:
905        https://docs.python.org/2/library/os.html#os.chmod
906        """
907
908        self.info("Chmoding %s to %s" % (path, str(oct(mode))))
909        os.chmod(path, mode)
910
911    def copyfile(
912        self,
913        src,
914        dest,
915        log_level=INFO,
916        error_level=ERROR,
917        copystat=False,
918        compress=False,
919    ):
920        """copy or compress `src` into `dest`.
921
922        Args:
923            src (str): filepath to copy.
924            dest (str): filepath where to move the content to.
925            log_level (str, optional): log level to use for normal operation. Defaults to
926                                      `INFO`
927            error_level (str, optional): log level to use on error. Defaults to `ERROR`
928            copystat (bool, optional): whether or not to copy the files metadata.
929                                       Defaults to `False`.
930            compress (bool, optional): whether or not to compress the destination file.
931                                       Defaults to `False`.
932
933        Returns:
934            int: -1 on error
935            None: on success
936        """
937
938        if compress:
939            self.log("Compressing %s to %s" % (src, dest), level=log_level)
940            try:
941                infile = open(src, "rb")
942                outfile = gzip.open(dest, "wb")
943                outfile.writelines(infile)
944                outfile.close()
945                infile.close()
946            except IOError as e:
947                self.log(
948                    "Can't compress %s to %s: %s!" % (src, dest, str(e)),
949                    level=error_level,
950                )
951                return -1
952        else:
953            self.log("Copying %s to %s" % (src, dest), level=log_level)
954            try:
955                shutil.copyfile(src, dest)
956            except (IOError, shutil.Error) as e:
957                self.log(
958                    "Can't copy %s to %s: %s!" % (src, dest, str(e)), level=error_level
959                )
960                return -1
961
962        if copystat:
963            try:
964                shutil.copystat(src, dest)
965            except (IOError, shutil.Error) as e:
966                self.log(
967                    "Can't copy attributes of %s to %s: %s!" % (src, dest, str(e)),
968                    level=error_level,
969                )
970                return -1
971
972    def copytree(
973        self, src, dest, overwrite="no_overwrite", log_level=INFO, error_level=ERROR
974    ):
975        """An implementation of `shutil.copytree` that allows for `dest` to exist
976        and implements different overwrite levels:
977        - 'no_overwrite' will keep all(any) existing files in destination tree
978        - 'overwrite_if_exists' will only overwrite destination paths that have
979                                the same path names relative to the root of the
980                                src and destination tree
981        - 'clobber' will replace the whole destination tree(clobber) if it exists
982
983        Args:
984            src (str): directory path to move.
985            dest (str): directory path where to move the content to.
986            overwrite (str): string specifying the overwrite level.
987            log_level (str, optional): log level to use for normal operation. Defaults to
988                                      `INFO`
989            error_level (str, optional): log level to use on error. Defaults to `ERROR`
990
991        Returns:
992            int: -1 on error
993            None: on success
994        """
995
996        self.info("copying tree: %s to %s" % (src, dest))
997        try:
998            if overwrite == "clobber" or not os.path.exists(dest):
999                self.rmtree(dest)
1000                shutil.copytree(src, dest)
1001            elif overwrite == "no_overwrite" or overwrite == "overwrite_if_exists":
1002                files = os.listdir(src)
1003                for f in files:
1004                    abs_src_f = os.path.join(src, f)
1005                    abs_dest_f = os.path.join(dest, f)
1006                    if not os.path.exists(abs_dest_f):
1007                        if os.path.isdir(abs_src_f):
1008                            self.mkdir_p(abs_dest_f)
1009                            self.copytree(abs_src_f, abs_dest_f, overwrite="clobber")
1010                        else:
1011                            shutil.copy2(abs_src_f, abs_dest_f)
1012                    elif overwrite == "no_overwrite":  # destination path exists
1013                        if os.path.isdir(abs_src_f) and os.path.isdir(abs_dest_f):
1014                            self.copytree(
1015                                abs_src_f, abs_dest_f, overwrite="no_overwrite"
1016                            )
1017                        else:
1018                            self.debug(
1019                                "ignoring path: %s as destination: \
1020                                    %s exists"
1021                                % (abs_src_f, abs_dest_f)
1022                            )
1023                    else:  # overwrite == 'overwrite_if_exists' and destination exists
1024                        self.debug("overwriting: %s with: %s" % (abs_dest_f, abs_src_f))
1025                        self.rmtree(abs_dest_f)
1026
1027                        if os.path.isdir(abs_src_f):
1028                            self.mkdir_p(abs_dest_f)
1029                            self.copytree(
1030                                abs_src_f, abs_dest_f, overwrite="overwrite_if_exists"
1031                            )
1032                        else:
1033                            shutil.copy2(abs_src_f, abs_dest_f)
1034            else:
1035                self.fatal(
1036                    "%s is not a valid argument for param overwrite" % (overwrite)
1037                )
1038        except (IOError, shutil.Error):
1039            self.exception(
1040                "There was an error while copying %s to %s!" % (src, dest),
1041                level=error_level,
1042            )
1043            return -1
1044
1045    def write_to_file(
1046        self,
1047        file_path,
1048        contents,
1049        verbose=True,
1050        open_mode="w",
1051        create_parent_dir=False,
1052        error_level=ERROR,
1053    ):
1054        """Write `contents` to `file_path`, according to `open_mode`.
1055
1056        Args:
1057            file_path (str): filepath where the content will be written to.
1058            contents (str): content to write to the filepath.
1059            verbose (bool, optional): whether or not to log `contents` value.
1060                                      Defaults to `True`
1061            open_mode (str, optional): open mode to use for openning the file.
1062                                       Defaults to `w`
1063            create_parent_dir (bool, optional): whether or not to create the
1064                                                parent directory of `file_path`
1065            error_level (str, optional): log level to use on error. Defaults to `ERROR`
1066
1067        Returns:
1068            str: `file_path` on success
1069            None: on error.
1070        """
1071        self.info("Writing to file %s" % file_path)
1072        if verbose:
1073            self.info("Contents:")
1074            for line in contents.splitlines():
1075                self.info(" %s" % line)
1076        if create_parent_dir:
1077            parent_dir = os.path.dirname(file_path)
1078            self.mkdir_p(parent_dir, error_level=error_level)
1079        try:
1080            fh = open(file_path, open_mode)
1081            try:
1082                fh.write(contents)
1083            except UnicodeEncodeError:
1084                fh.write(contents.encode("utf-8", "replace"))
1085            fh.close()
1086            return file_path
1087        except IOError:
1088            self.log("%s can't be opened for writing!" % file_path, level=error_level)
1089
1090    @contextmanager
1091    def opened(self, file_path, verbose=True, open_mode="r", error_level=ERROR):
1092        """Create a context manager to use on a with statement.
1093
1094        Args:
1095            file_path (str): filepath of the file to open.
1096            verbose (bool, optional): useless parameter, not used here.
1097                Defaults to True.
1098            open_mode (str, optional): open mode to use for openning the file.
1099                Defaults to `r`
1100            error_level (str, optional): log level name to use on error.
1101                Defaults to `ERROR`
1102
1103        Yields:
1104            tuple: (file object, error) pair. In case of error `None` is yielded
1105                as file object, together with the corresponding error.
1106                If there is no error, `None` is returned as the error.
1107        """
1108        # See opened_w_error in http://www.python.org/dev/peps/pep-0343/
1109        self.info("Reading from file %s" % file_path)
1110        try:
1111            fh = open(file_path, open_mode)
1112        except IOError as err:
1113            self.log(
1114                "unable to open %s: %s" % (file_path, err.strerror), level=error_level
1115            )
1116            yield None, err
1117        else:
1118            try:
1119                yield fh, None
1120            finally:
1121                fh.close()
1122
1123    def read_from_file(self, file_path, verbose=True, open_mode="r", error_level=ERROR):
1124        """Use `self.opened` context manager to open a file and read its
1125        content.
1126
1127        Args:
1128            file_path (str): filepath of the file to read.
1129            verbose (bool, optional): whether or not to log the file content.
1130                Defaults to True.
1131            open_mode (str, optional): open mode to use for openning the file.
1132                Defaults to `r`
1133            error_level (str, optional): log level name to use on error.
1134                Defaults to `ERROR`
1135
1136        Returns:
1137            None: on error.
1138            str: file content on success.
1139        """
1140        with self.opened(file_path, verbose, open_mode, error_level) as (fh, err):
1141            if err:
1142                return None
1143            contents = fh.read()
1144            if verbose:
1145                self.info("Contents:")
1146                for line in contents.splitlines():
1147                    self.info(" %s" % line)
1148            return contents
1149
1150    def chdir(self, dir_name):
1151        self.log("Changing directory to %s." % dir_name)
1152        os.chdir(dir_name)
1153
1154    def is_exe(self, fpath):
1155        """
1156        Determine if fpath is a file and if it is executable.
1157        """
1158        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
1159
1160    def which(self, program):
1161        """OS independent implementation of Unix's which command
1162
1163        Args:
1164            program (str): name or path to the program whose executable is
1165                being searched.
1166
1167        Returns:
1168            None: if the executable was not found.
1169            str: filepath of the executable file.
1170        """
1171        if self._is_windows() and not program.endswith(".exe"):
1172            program += ".exe"
1173        fpath, fname = os.path.split(program)
1174        if fpath:
1175            if self.is_exe(program):
1176                return program
1177        else:
1178            # If the exe file is defined in the configs let's use that
1179            exe = self.query_exe(program)
1180            if self.is_exe(exe):
1181                return exe
1182
1183            # If not defined, let's look for it in the $PATH
1184            env = self.query_env()
1185            for path in env["PATH"].split(os.pathsep):
1186                exe_file = os.path.join(path, program)
1187                if self.is_exe(exe_file):
1188                    return exe_file
1189        return None
1190
1191    # More complex commands {{{2
1192    def retry(
1193        self,
1194        action,
1195        attempts=None,
1196        sleeptime=60,
1197        max_sleeptime=5 * 60,
1198        retry_exceptions=(Exception,),
1199        good_statuses=None,
1200        cleanup=None,
1201        error_level=ERROR,
1202        error_message="%(action)s failed after %(attempts)d tries!",
1203        failure_status=-1,
1204        log_level=INFO,
1205        args=(),
1206        kwargs={},
1207    ):
1208        """generic retry command. Ported from `util.retry`_
1209
1210        Args:
1211            action (func): callable object to retry.
1212            attempts (int, optinal): maximum number of times to call actions.
1213                Defaults to `self.config.get('global_retries', 5)`
1214            sleeptime (int, optional): number of seconds to wait between
1215                attempts. Defaults to 60 and doubles each retry attempt, to
1216                a maximum of `max_sleeptime'
1217            max_sleeptime (int, optional): maximum value of sleeptime. Defaults
1218                to 5 minutes
1219            retry_exceptions (tuple, optional): Exceptions that should be caught.
1220                If exceptions other than those listed in `retry_exceptions' are
1221                raised from `action', they will be raised immediately. Defaults
1222                to (Exception)
1223            good_statuses (object, optional): return values which, if specified,
1224                will result in retrying if the return value isn't listed.
1225                Defaults to `None`.
1226            cleanup (func, optional): If `cleanup' is provided and callable
1227                it will be called immediately after an Exception is caught.
1228                No arguments will be passed to it. If your cleanup function
1229                requires arguments it is recommended that you wrap it in an
1230                argumentless function.
1231                Defaults to `None`.
1232            error_level (str, optional): log level name in case of error.
1233                Defaults to `ERROR`.
1234            error_message (str, optional): string format to use in case
1235                none of the attempts success. Defaults to
1236                '%(action)s failed after %(attempts)d tries!'
1237            failure_status (int, optional): flag to return in case the retries
1238                were not successfull. Defaults to -1.
1239            log_level (str, optional): log level name to use for normal activity.
1240                Defaults to `INFO`.
1241            args (tuple, optional): positional arguments to pass onto `action`.
1242            kwargs (dict, optional): key-value arguments to pass onto `action`.
1243
1244        Returns:
1245            object: return value of `action`.
1246            int: failure status in case of failure retries.
1247        """
1248        if not callable(action):
1249            self.fatal("retry() called with an uncallable method %s!" % action)
1250        if cleanup and not callable(cleanup):
1251            self.fatal("retry() called with an uncallable cleanup method %s!" % cleanup)
1252        if not attempts:
1253            attempts = self.config.get("global_retries", 5)
1254        if max_sleeptime < sleeptime:
1255            self.debug(
1256                "max_sleeptime %d less than sleeptime %d" % (max_sleeptime, sleeptime)
1257            )
1258        n = 0
1259        while n <= attempts:
1260            retry = False
1261            n += 1
1262            try:
1263                self.log(
1264                    "retry: Calling %s with args: %s, kwargs: %s, attempt #%d"
1265                    % (action.__name__, str(args), str(kwargs), n),
1266                    level=log_level,
1267                )
1268                status = action(*args, **kwargs)
1269                if good_statuses and status not in good_statuses:
1270                    retry = True
1271            except retry_exceptions as e:
1272                retry = True
1273                error_message = "%s\nCaught exception: %s" % (error_message, str(e))
1274                self.log(
1275                    "retry: attempt #%d caught %s exception: %s"
1276                    % (n, type(e).__name__, str(e)),
1277                    level=INFO,
1278                )
1279
1280            if not retry:
1281                return status
1282            else:
1283                if cleanup:
1284                    cleanup()
1285                if n == attempts:
1286                    self.log(
1287                        error_message % {"action": action, "attempts": n},
1288                        level=error_level,
1289                    )
1290                    return failure_status
1291                if sleeptime > 0:
1292                    self.log(
1293                        "retry: Failed, sleeping %d seconds before retrying"
1294                        % sleeptime,
1295                        level=log_level,
1296                    )
1297                    time.sleep(sleeptime)
1298                    sleeptime = sleeptime * 2
1299                    if sleeptime > max_sleeptime:
1300                        sleeptime = max_sleeptime
1301
1302    def query_env(
1303        self,
1304        partial_env=None,
1305        replace_dict=None,
1306        purge_env=(),
1307        set_self_env=None,
1308        log_level=DEBUG,
1309        avoid_host_env=False,
1310    ):
1311        """Environment query/generation method.
1312        The default, self.query_env(), will look for self.config['env']
1313        and replace any special strings in there ( %(PATH)s ).
1314        It will then store it as self.env for speeding things up later.
1315
1316        If you specify partial_env, partial_env will be used instead of
1317        self.config['env'], and we don't save self.env as it's a one-off.
1318
1319
1320        Args:
1321            partial_env (dict, optional): key-value pairs of the name and value
1322                of different environment variables. Defaults to an empty dictionary.
1323            replace_dict (dict, optional): key-value pairs to replace the old
1324                environment variables.
1325            purge_env (list): environment names to delete from the final
1326                environment dictionary.
1327            set_self_env (boolean, optional): whether or not the environment
1328                variables dictionary should be copied to `self`.
1329                Defaults to True.
1330            log_level (str, optional): log level name to use on normal operation.
1331                Defaults to `DEBUG`.
1332            avoid_host_env (boolean, optional): if set to True, we will not use
1333                any environment variables set on the host except PATH.
1334                Defaults to False.
1335
1336        Returns:
1337            dict: environment variables names with their values.
1338        """
1339        if partial_env is None:
1340            if self.env is not None:
1341                return self.env
1342            partial_env = self.config.get("env", None)
1343            if partial_env is None:
1344                partial_env = {}
1345            if set_self_env is None:
1346                set_self_env = True
1347
1348        env = {"PATH": os.environ["PATH"]} if avoid_host_env else os.environ.copy()
1349
1350        default_replace_dict = self.query_abs_dirs()
1351        default_replace_dict["PATH"] = os.environ["PATH"]
1352        if not replace_dict:
1353            replace_dict = default_replace_dict
1354        else:
1355            for key in default_replace_dict:
1356                if key not in replace_dict:
1357                    replace_dict[key] = default_replace_dict[key]
1358        for key in partial_env.keys():
1359            env[key] = partial_env[key] % replace_dict
1360            self.log("ENV: %s is now %s" % (key, env[key]), level=log_level)
1361        for k in purge_env:
1362            if k in env:
1363                del env[k]
1364        if os.name == "nt":
1365            pref_encoding = locale.getpreferredencoding()
1366            for k, v in six.iteritems(env):
1367                # When run locally on Windows machines, some environment
1368                # variables may be unicode.
1369                env[k] = six.ensure_str(v, pref_encoding)
1370        if set_self_env:
1371            self.env = env
1372        return env
1373
1374    def query_exe(
1375        self,
1376        exe_name,
1377        exe_dict="exes",
1378        default=None,
1379        return_type=None,
1380        error_level=FATAL,
1381    ):
1382        """One way to work around PATH rewrites.
1383
1384        By default, return exe_name, and we'll fall through to searching
1385        os.environ["PATH"].
1386        However, if self.config[exe_dict][exe_name] exists, return that.
1387        This lets us override exe paths via config file.
1388
1389        If we need runtime setting, we can build in self.exes support later.
1390
1391        Args:
1392            exe_name (str): name of the executable to search for.
1393            exe_dict(str, optional): name of the dictionary of executables
1394              present in `self.config`. Defaults to `exes`.
1395            default (str, optional): default name of the executable to search
1396              for. Defaults to `exe_name`.
1397            return_type (str, optional): type to which the original return
1398              value will be turn into. Only 'list', 'string' and `None` are
1399              supported. Defaults to `None`.
1400            error_level (str, optional): log level name to use on error.
1401
1402        Returns:
1403            list: in case return_type is 'list'
1404            str: in case return_type is 'string'
1405            None: in case return_type is `None`
1406            Any: if the found executable is not of type list, tuple nor str.
1407        """
1408        if default is None:
1409            default = exe_name
1410        exe = self.config.get(exe_dict, {}).get(exe_name, default)
1411        repl_dict = {}
1412        if hasattr(self.script_obj, "query_abs_dirs"):
1413            # allow for 'make': '%(abs_work_dir)s/...' etc.
1414            dirs = self.script_obj.query_abs_dirs()
1415            repl_dict.update(dirs)
1416        if isinstance(exe, dict):
1417            found = False
1418            # allow for searchable paths of the exe
1419            for name, path in six.iteritems(exe):
1420                if isinstance(path, list) or isinstance(path, tuple):
1421                    path = [x % repl_dict for x in path]
1422                    if all([os.path.exists(section) for section in path]):
1423                        found = True
1424                elif isinstance(path, str):
1425                    path = path % repl_dict
1426                    if os.path.exists(path):
1427                        found = True
1428                else:
1429                    self.log(
1430                        "a exes %s dict's value is not a string, list, or tuple. Got key "
1431                        "%s and value %s" % (exe_name, name, str(path)),
1432                        level=error_level,
1433                    )
1434                if found:
1435                    exe = path
1436                    break
1437            else:
1438                self.log(
1439                    "query_exe was a searchable dict but an existing "
1440                    "path could not be determined. Tried searching in "
1441                    "paths: %s" % (str(exe)),
1442                    level=error_level,
1443                )
1444                return None
1445        elif isinstance(exe, list) or isinstance(exe, tuple):
1446            exe = [x % repl_dict for x in exe]
1447        elif isinstance(exe, str):
1448            exe = exe % repl_dict
1449        else:
1450            self.log(
1451                "query_exe: %s is not a list, tuple, dict, or string: "
1452                "%s!" % (exe_name, str(exe)),
1453                level=error_level,
1454            )
1455            return exe
1456        if return_type == "list":
1457            if isinstance(exe, str):
1458                exe = [exe]
1459        elif return_type == "string":
1460            if isinstance(exe, list):
1461                exe = subprocess.list2cmdline(exe)
1462        elif return_type is not None:
1463            self.log(
1464                "Unknown return_type type %s requested in query_exe!" % return_type,
1465                level=error_level,
1466            )
1467        return exe
1468
1469    def run_command(
1470        self,
1471        command,
1472        cwd=None,
1473        error_list=None,
1474        halt_on_failure=False,
1475        success_codes=None,
1476        env=None,
1477        partial_env=None,
1478        return_type="status",
1479        throw_exception=False,
1480        output_parser=None,
1481        output_timeout=None,
1482        fatal_exit_code=2,
1483        error_level=ERROR,
1484        **kwargs
1485    ):
1486        """Run a command, with logging and error parsing.
1487        TODO: context_lines
1488
1489        error_list example:
1490        [{'regex': re.compile('^Error: LOL J/K'), level=IGNORE},
1491         {'regex': re.compile('^Error:'), level=ERROR, contextLines='5:5'},
1492         {'substr': 'THE WORLD IS ENDING', level=FATAL, contextLines='20:'}
1493        ]
1494        (context_lines isn't written yet)
1495
1496        Args:
1497            command (str | list | tuple): command or sequence of commands to
1498              execute and log.
1499            cwd (str, optional): directory path from where to execute the
1500              command. Defaults to `None`.
1501            error_list (list, optional): list of errors to pass to
1502              `mozharness.base.log.OutputParser`. Defaults to `None`.
1503            halt_on_failure (bool, optional): whether or not to redefine the
1504              log level as `FATAL` on errors. Defaults to False.
1505            success_codes (int, optional): numeric value to compare against
1506              the command return value.
1507            env (dict, optional): key-value of environment values to use to
1508              run the command. Defaults to None.
1509            partial_env (dict, optional): key-value of environment values to
1510              replace from the current environment values. Defaults to None.
1511            return_type (str, optional): if equal to 'num_errors' then the
1512              amount of errors matched by `error_list` is returned. Defaults
1513              to 'status'.
1514            throw_exception (bool, optional): whether or not to raise an
1515              exception if the return value of the command doesn't match
1516              any of the `success_codes`. Defaults to False.
1517            output_parser (OutputParser, optional): lets you provide an
1518              instance of your own OutputParser subclass. Defaults to `OutputParser`.
1519            output_timeout (int): amount of seconds to wait for output before
1520              the process is killed.
1521            fatal_exit_code (int, optional): call `self.fatal` if the return value
1522              of the command is not in `success_codes`. Defaults to 2.
1523            error_level (str, optional): log level name to use on error. Defaults
1524              to `ERROR`.
1525            **kwargs: Arbitrary keyword arguments.
1526
1527        Returns:
1528            int: -1 on error.
1529            Any: `command` return value is returned otherwise.
1530        """
1531        if success_codes is None:
1532            success_codes = [0]
1533        if cwd is not None:
1534            if not os.path.isdir(cwd):
1535                level = error_level
1536                if halt_on_failure:
1537                    level = FATAL
1538                self.log(
1539                    "Can't run command %s in non-existent directory '%s'!"
1540                    % (command, cwd),
1541                    level=level,
1542                )
1543                return -1
1544            self.info("Running command: %s in %s" % (command, cwd))
1545        else:
1546            self.info("Running command: %s" % command)
1547        if isinstance(command, list) or isinstance(command, tuple):
1548            self.info("Copy/paste: %s" % subprocess.list2cmdline(command))
1549        shell = True
1550        if isinstance(command, list) or isinstance(command, tuple):
1551            shell = False
1552        if env is None:
1553            if partial_env:
1554                self.info("Using partial env: %s" % pprint.pformat(partial_env))
1555                env = self.query_env(partial_env=partial_env)
1556        else:
1557            if hasattr(self, "previous_env") and env == self.previous_env:
1558                self.info("Using env: (same as previous command)")
1559            else:
1560                self.info("Using env: %s" % pprint.pformat(env))
1561                self.previous_env = env
1562
1563        if output_parser is None:
1564            parser = OutputParser(
1565                config=self.config, log_obj=self.log_obj, error_list=error_list
1566            )
1567        else:
1568            parser = output_parser
1569
1570        try:
1571            if output_timeout:
1572
1573                def processOutput(line):
1574                    parser.add_lines(line)
1575
1576                def onTimeout():
1577                    self.info(
1578                        "Automation Error: mozprocess timed out after "
1579                        "%s seconds running %s" % (str(output_timeout), str(command))
1580                    )
1581
1582                p = ProcessHandler(
1583                    command,
1584                    shell=shell,
1585                    env=env,
1586                    cwd=cwd,
1587                    storeOutput=False,
1588                    onTimeout=(onTimeout,),
1589                    processOutputLine=[processOutput],
1590                )
1591                self.info(
1592                    "Calling %s with output_timeout %d" % (command, output_timeout)
1593                )
1594                p.run(outputTimeout=output_timeout)
1595                p.wait()
1596                if p.timedOut:
1597                    self.log(
1598                        "timed out after %s seconds of no output" % output_timeout,
1599                        level=error_level,
1600                    )
1601                returncode = int(p.proc.returncode)
1602            else:
1603                p = subprocess.Popen(
1604                    command,
1605                    shell=shell,
1606                    stdout=subprocess.PIPE,
1607                    cwd=cwd,
1608                    stderr=subprocess.STDOUT,
1609                    env=env,
1610                    bufsize=0,
1611                )
1612                loop = True
1613                while loop:
1614                    if p.poll() is not None:
1615                        """Avoid losing the final lines of the log?"""
1616                        loop = False
1617                    while True:
1618                        line = p.stdout.readline()
1619                        if not line:
1620                            break
1621                        parser.add_lines(line)
1622                returncode = p.returncode
1623        except KeyboardInterrupt:
1624            level = error_level
1625            if halt_on_failure:
1626                level = FATAL
1627            self.log(
1628                "Process interrupted by the user, killing process with pid %s" % p.pid,
1629                level=level,
1630            )
1631            p.kill()
1632            return -1
1633        except OSError as e:
1634            level = error_level
1635            if halt_on_failure:
1636                level = FATAL
1637            self.log(
1638                "caught OS error %s: %s while running %s"
1639                % (e.errno, e.strerror, command),
1640                level=level,
1641            )
1642            return -1
1643
1644        return_level = INFO
1645        if returncode not in success_codes:
1646            return_level = error_level
1647            if throw_exception:
1648                raise subprocess.CalledProcessError(returncode, command)
1649        self.log("Return code: %d" % returncode, level=return_level)
1650
1651        if halt_on_failure:
1652            _fail = False
1653            if returncode not in success_codes:
1654                self.log(
1655                    "%s not in success codes: %s" % (returncode, success_codes),
1656                    level=error_level,
1657                )
1658                _fail = True
1659            if parser.num_errors:
1660                self.log("failures found while parsing output", level=error_level)
1661                _fail = True
1662            if _fail:
1663                self.return_code = fatal_exit_code
1664                self.fatal(
1665                    "Halting on failure while running %s" % command,
1666                    exit_code=fatal_exit_code,
1667                )
1668        if return_type == "num_errors":
1669            return parser.num_errors
1670        return returncode
1671
1672    def get_output_from_command(
1673        self,
1674        command,
1675        cwd=None,
1676        halt_on_failure=False,
1677        env=None,
1678        silent=False,
1679        log_level=INFO,
1680        tmpfile_base_path="tmpfile",
1681        return_type="output",
1682        save_tmpfiles=False,
1683        throw_exception=False,
1684        fatal_exit_code=2,
1685        ignore_errors=False,
1686        success_codes=None,
1687    ):
1688        """Similar to run_command, but where run_command is an
1689        os.system(command) analog, get_output_from_command is a `command`
1690        analog.
1691
1692        Less error checking by design, though if we figure out how to
1693        do it without borking the output, great.
1694
1695        TODO: binary mode? silent is kinda like that.
1696        TODO: since p.wait() can take a long time, optionally log something
1697        every N seconds?
1698        TODO: optionally only keep the first or last (N) line(s) of output?
1699        TODO: optionally only return the tmp_stdout_filename?
1700
1701        ignore_errors=True is for the case where a command might produce standard
1702        error output, but you don't particularly care; setting to True will
1703        cause standard error to be logged at DEBUG rather than ERROR
1704
1705        Args:
1706            command (str | list): command or list of commands to
1707              execute and log.
1708            cwd (str, optional): directory path from where to execute the
1709              command. Defaults to `None`.
1710            halt_on_failure (bool, optional): whether or not to redefine the
1711              log level as `FATAL` on error. Defaults to False.
1712            env (dict, optional): key-value of environment values to use to
1713              run the command. Defaults to None.
1714            silent (bool, optional): whether or not to output the stdout of
1715              executing the command. Defaults to False.
1716            log_level (str, optional): log level name to use on normal execution.
1717              Defaults to `INFO`.
1718            tmpfile_base_path (str, optional): base path of the file to which
1719              the output will be writen to. Defaults to 'tmpfile'.
1720            return_type (str, optional): if equal to 'output' then the complete
1721              output of the executed command is returned, otherwise the written
1722              filenames are returned. Defaults to 'output'.
1723            save_tmpfiles (bool, optional): whether or not to save the temporary
1724              files created from the command output. Defaults to False.
1725            throw_exception (bool, optional): whether or not to raise an
1726              exception if the return value of the command is not zero.
1727              Defaults to False.
1728            fatal_exit_code (int, optional): call self.fatal if the return value
1729              of the command match this value.
1730            ignore_errors (bool, optional): whether or not to change the log
1731              level to `ERROR` for the output of stderr. Defaults to False.
1732            success_codes (int, optional): numeric value to compare against
1733              the command return value.
1734
1735        Returns:
1736            None: if the cwd is not a directory.
1737            None: on IOError.
1738            tuple: stdout and stderr filenames.
1739            str: stdout output.
1740        """
1741        if cwd:
1742            if not os.path.isdir(cwd):
1743                level = ERROR
1744                if halt_on_failure:
1745                    level = FATAL
1746                self.log(
1747                    "Can't run command %s in non-existent directory %s!"
1748                    % (command, cwd),
1749                    level=level,
1750                )
1751                return None
1752            self.info("Getting output from command: %s in %s" % (command, cwd))
1753        else:
1754            self.info("Getting output from command: %s" % command)
1755        if isinstance(command, list):
1756            self.info("Copy/paste: %s" % subprocess.list2cmdline(command))
1757        # This could potentially return something?
1758        tmp_stdout = None
1759        tmp_stderr = None
1760        tmp_stdout_filename = "%s_stdout" % tmpfile_base_path
1761        tmp_stderr_filename = "%s_stderr" % tmpfile_base_path
1762        if success_codes is None:
1763            success_codes = [0]
1764
1765        # TODO probably some more elegant solution than 2 similar passes
1766        try:
1767            tmp_stdout = open(tmp_stdout_filename, "w")
1768        except IOError:
1769            level = ERROR
1770            if halt_on_failure:
1771                level = FATAL
1772            self.log(
1773                "Can't open %s for writing!" % tmp_stdout_filename + self.exception(),
1774                level=level,
1775            )
1776            return None
1777        try:
1778            tmp_stderr = open(tmp_stderr_filename, "w")
1779        except IOError:
1780            level = ERROR
1781            if halt_on_failure:
1782                level = FATAL
1783            self.log(
1784                "Can't open %s for writing!" % tmp_stderr_filename + self.exception(),
1785                level=level,
1786            )
1787            return None
1788        shell = True
1789        if isinstance(command, list):
1790            shell = False
1791
1792        p = subprocess.Popen(
1793            command,
1794            shell=shell,
1795            stdout=tmp_stdout,
1796            cwd=cwd,
1797            stderr=tmp_stderr,
1798            env=env,
1799            bufsize=0,
1800        )
1801        # XXX: changed from self.debug to self.log due to this error:
1802        #      TypeError: debug() takes exactly 1 argument (2 given)
1803        self.log(
1804            "Temporary files: %s and %s" % (tmp_stdout_filename, tmp_stderr_filename),
1805            level=DEBUG,
1806        )
1807        p.wait()
1808        tmp_stdout.close()
1809        tmp_stderr.close()
1810        return_level = DEBUG
1811        output = None
1812        if return_type == "output" or not silent:
1813            if os.path.exists(tmp_stdout_filename) and os.path.getsize(
1814                tmp_stdout_filename
1815            ):
1816                output = self.read_from_file(tmp_stdout_filename, verbose=False)
1817                if not silent:
1818                    self.log("Output received:", level=log_level)
1819                    output_lines = output.rstrip().splitlines()
1820                    for line in output_lines:
1821                        if not line or line.isspace():
1822                            continue
1823                        if isinstance(line, binary_type):
1824                            line = line.decode("utf-8")
1825                        self.log(" %s" % line, level=log_level)
1826                    output = "\n".join(output_lines)
1827        if os.path.exists(tmp_stderr_filename) and os.path.getsize(tmp_stderr_filename):
1828            if not ignore_errors:
1829                return_level = ERROR
1830            self.log("Errors received:", level=return_level)
1831            errors = self.read_from_file(tmp_stderr_filename, verbose=False)
1832            for line in errors.rstrip().splitlines():
1833                if not line or line.isspace():
1834                    continue
1835                if isinstance(line, binary_type):
1836                    line = line.decode("utf-8")
1837                self.log(" %s" % line, level=return_level)
1838        elif p.returncode not in success_codes and not ignore_errors:
1839            return_level = ERROR
1840        # Clean up.
1841        if not save_tmpfiles:
1842            self.rmtree(tmp_stderr_filename, log_level=DEBUG)
1843            self.rmtree(tmp_stdout_filename, log_level=DEBUG)
1844        if p.returncode and throw_exception:
1845            raise subprocess.CalledProcessError(p.returncode, command)
1846        self.log("Return code: %d" % p.returncode, level=return_level)
1847        if halt_on_failure and return_level == ERROR:
1848            self.return_code = fatal_exit_code
1849            self.fatal(
1850                "Halting on failure while running %s" % command,
1851                exit_code=fatal_exit_code,
1852            )
1853        # Hm, options on how to return this? I bet often we'll want
1854        # output_lines[0] with no newline.
1855        if return_type != "output":
1856            return (tmp_stdout_filename, tmp_stderr_filename)
1857        else:
1858            return output
1859
1860    def _touch_file(self, file_name, times=None, error_level=FATAL):
1861        """touch a file.
1862
1863        Args:
1864            file_name (str): name of the file to touch.
1865            times (tuple, optional): 2-tuple as specified by `os.utime`_
1866              Defaults to None.
1867            error_level (str, optional): log level name in case of error.
1868              Defaults to `FATAL`.
1869
1870        .. _`os.utime`:
1871           https://docs.python.org/3.4/library/os.html?highlight=os.utime#os.utime
1872        """
1873        self.info("Touching: %s" % file_name)
1874        try:
1875            os.utime(file_name, times)
1876        except OSError:
1877            try:
1878                open(file_name, "w").close()
1879            except IOError as e:
1880                msg = "I/O error(%s): %s" % (e.errno, e.strerror)
1881                self.log(msg, error_level=error_level)
1882        os.utime(file_name, times)
1883
1884    def unpack(
1885        self,
1886        filename,
1887        extract_to,
1888        extract_dirs=None,
1889        error_level=ERROR,
1890        fatal_exit_code=2,
1891        verbose=False,
1892    ):
1893        """The method allows to extract a file regardless of its extension.
1894
1895        Args:
1896            filename (str): filename of the compressed file.
1897            extract_to (str): where to extract the compressed file.
1898            extract_dirs (list, optional): directories inside the archive file to extract.
1899                                           Defaults to `None`.
1900            fatal_exit_code (int, optional): call `self.fatal` if the return value
1901              of the command is not in `success_codes`. Defaults to 2.
1902            verbose (bool, optional): whether or not extracted content should be displayed.
1903                                      Defaults to False.
1904
1905        Raises:
1906            IOError: on `filename` file not found.
1907
1908        """
1909        if not os.path.isfile(filename):
1910            raise IOError("Could not find file to extract: %s" % filename)
1911
1912        if zipfile.is_zipfile(filename):
1913            try:
1914                self.info(
1915                    "Using ZipFile to extract {} to {}".format(filename, extract_to)
1916                )
1917                with zipfile.ZipFile(filename) as bundle:
1918                    for entry in self._filter_entries(bundle.namelist(), extract_dirs):
1919                        if verbose:
1920                            self.info(" %s" % entry)
1921                        bundle.extract(entry, path=extract_to)
1922
1923                        # ZipFile doesn't preserve permissions during extraction:
1924                        # http://bugs.python.org/issue15795
1925                        fname = os.path.realpath(os.path.join(extract_to, entry))
1926                        mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
1927                        # Only set permissions if attributes are available. Otherwise all
1928                        # permissions will be removed eg. on Windows.
1929                        if mode:
1930                            os.chmod(fname, mode)
1931            except zipfile.BadZipfile as e:
1932                self.log(
1933                    "%s (%s)" % (str(e), filename),
1934                    level=error_level,
1935                    exit_code=fatal_exit_code,
1936                )
1937
1938        # Bug 1211882 - is_tarfile cannot be trusted for dmg files
1939        elif tarfile.is_tarfile(filename) and not filename.lower().endswith(".dmg"):
1940            try:
1941                self.info(
1942                    "Using TarFile to extract {} to {}".format(filename, extract_to)
1943                )
1944                with tarfile.open(filename) as bundle:
1945                    for entry in self._filter_entries(bundle.getnames(), extract_dirs):
1946                        if verbose:
1947                            self.info(" %s" % entry)
1948                        bundle.extract(entry, path=extract_to)
1949            except tarfile.TarError as e:
1950                self.log(
1951                    "%s (%s)" % (str(e), filename),
1952                    level=error_level,
1953                    exit_code=fatal_exit_code,
1954                )
1955        else:
1956            self.log(
1957                "No extraction method found for: %s" % filename,
1958                level=error_level,
1959                exit_code=fatal_exit_code,
1960            )
1961
1962    def is_taskcluster(self):
1963        """Returns boolean indicating if we're running in TaskCluster."""
1964        # This may need expanding in the future to work on
1965        return "TASKCLUSTER_WORKER_TYPE" in os.environ
1966
1967
1968def PreScriptRun(func):
1969    """Decorator for methods that will be called before script execution.
1970
1971    Each method on a BaseScript having this decorator will be called at the
1972    beginning of BaseScript.run().
1973
1974    The return value is ignored. Exceptions will abort execution.
1975    """
1976    func._pre_run_listener = True
1977    return func
1978
1979
1980def PostScriptRun(func):
1981    """Decorator for methods that will be called after script execution.
1982
1983    This is similar to PreScriptRun except it is called at the end of
1984    execution. The method will always be fired, even if execution fails.
1985    """
1986    func._post_run_listener = True
1987    return func
1988
1989
1990def PreScriptAction(action=None):
1991    """Decorator for methods that will be called at the beginning of each action.
1992
1993    Each method on a BaseScript having this decorator will be called during
1994    BaseScript.run() before an individual action is executed. The method will
1995    receive the action's name as an argument.
1996
1997    If no values are passed to the decorator, it will be applied to every
1998    action. If a string is passed, the decorated function will only be called
1999    for the action of that name.
2000
2001    The return value of the method is ignored. Exceptions will abort execution.
2002    """
2003
2004    def _wrapped(func):
2005        func._pre_action_listener = action
2006        return func
2007
2008    def _wrapped_none(func):
2009        func._pre_action_listener = None
2010        return func
2011
2012    if type(action) == type(_wrapped):
2013        return _wrapped_none(action)
2014
2015    return _wrapped
2016
2017
2018def PostScriptAction(action=None):
2019    """Decorator for methods that will be called at the end of each action.
2020
2021    This behaves similarly to PreScriptAction. It varies in that it is called
2022    after execution of the action.
2023
2024    The decorated method will receive the action name as a positional argument.
2025    It will then receive the following named arguments:
2026
2027        success - Bool indicating whether the action finished successfully.
2028
2029    The decorated method will always be called, even if the action threw an
2030    exception.
2031
2032    The return value is ignored.
2033    """
2034
2035    def _wrapped(func):
2036        func._post_action_listener = action
2037        return func
2038
2039    def _wrapped_none(func):
2040        func._post_action_listener = None
2041        return func
2042
2043    if type(action) == type(_wrapped):
2044        return _wrapped_none(action)
2045
2046    return _wrapped
2047
2048
2049# BaseScript {{{1
2050class BaseScript(ScriptMixin, LogMixin, object):
2051    def __init__(
2052        self,
2053        config_options=None,
2054        ConfigClass=BaseConfig,
2055        default_log_level="info",
2056        **kwargs
2057    ):
2058        self._return_code = 0
2059        super(BaseScript, self).__init__()
2060
2061        self.log_obj = None
2062        self.abs_dirs = None
2063        if config_options is None:
2064            config_options = []
2065        self.summary_list = []
2066        self.failures = []
2067        rw_config = ConfigClass(config_options=config_options, **kwargs)
2068        self.config = rw_config.get_read_only_config()
2069        self.actions = tuple(rw_config.actions)
2070        self.all_actions = tuple(rw_config.all_actions)
2071        self.env = None
2072        self.new_log_obj(default_log_level=default_log_level)
2073        self.script_obj = self
2074
2075        # Indicate we're a source checkout if VCS directory is present at the
2076        # appropriate place. This code will break if this file is ever moved
2077        # to another directory.
2078        self.topsrcdir = None
2079
2080        srcreldir = "testing/mozharness/mozharness/base"
2081        here = os.path.normpath(os.path.dirname(__file__))
2082        if here.replace("\\", "/").endswith(srcreldir):
2083            topsrcdir = os.path.normpath(os.path.join(here, "..", "..", "..", ".."))
2084            hg_dir = os.path.join(topsrcdir, ".hg")
2085            git_dir = os.path.join(topsrcdir, ".git")
2086            if os.path.isdir(hg_dir) or os.path.isdir(git_dir):
2087                self.topsrcdir = topsrcdir
2088
2089        # Set self.config to read-only.
2090        #
2091        # We can create intermediate config info programmatically from
2092        # this in a repeatable way, with logs; this is how we straddle the
2093        # ideal-but-not-user-friendly static config and the
2094        # easy-to-write-hard-to-debug writable config.
2095        #
2096        # To allow for other, script-specific configurations
2097        # (e.g., props json parsing), before locking,
2098        # call self._pre_config_lock().  If needed, this method can
2099        # alter self.config.
2100        self._pre_config_lock(rw_config)
2101        self._config_lock()
2102
2103        self.info("Run as %s" % rw_config.command_line)
2104        if self.config.get("dump_config_hierarchy"):
2105            # we only wish to dump and display what self.config is made up of,
2106            # against the current script + args, without actually running any
2107            # actions
2108            self._dump_config_hierarchy(rw_config.all_cfg_files_and_dicts)
2109        if self.config.get("dump_config"):
2110            self.dump_config(exit_on_finish=True)
2111
2112        # Collect decorated methods. We simply iterate over the attributes of
2113        # the current class instance and look for signatures deposited by
2114        # the decorators.
2115        self._listeners = dict(
2116            pre_run=[],
2117            pre_action=[],
2118            post_action=[],
2119            post_run=[],
2120        )
2121        for k in dir(self):
2122            try:
2123                item = self._getattr(k)
2124            except Exception as e:
2125                item = None
2126                self.warning(
2127                    "BaseScript collecting decorated methods: "
2128                    "failure to get attribute {}: {}".format(k, str(e))
2129                )
2130            if not item:
2131                continue
2132
2133            # We only decorate methods, so ignore other types.
2134            if not inspect.ismethod(item):
2135                continue
2136
2137            if hasattr(item, "_pre_run_listener"):
2138                self._listeners["pre_run"].append(k)
2139
2140            if hasattr(item, "_pre_action_listener"):
2141                self._listeners["pre_action"].append((k, item._pre_action_listener))
2142
2143            if hasattr(item, "_post_action_listener"):
2144                self._listeners["post_action"].append((k, item._post_action_listener))
2145
2146            if hasattr(item, "_post_run_listener"):
2147                self._listeners["post_run"].append(k)
2148
2149    def _getattr(self, name):
2150        # `getattr(self, k)` will call the method `k` for any property
2151        # access. If the property depends upon a module which has not
2152        # been imported at the time the BaseScript initializer is
2153        # executed, this property access will result in an
2154        # Exception. Until Python 3's `inspect.getattr_static` is
2155        # available, the simplest approach is to ignore the specific
2156        # properties which are known to cause issues. Currently
2157        # adb_path and device are ignored since they require the
2158        # availablity of the mozdevice package which is not guaranteed
2159        # when BaseScript is called.
2160        property_list = set(["adb_path", "device"])
2161        if six.PY2:
2162            if name in property_list:
2163                item = None
2164            else:
2165                item = getattr(self, name)
2166        else:
2167            item = inspect.getattr_static(self, name)
2168            if type(item) == property:
2169                item = None
2170            else:
2171                item = getattr(self, name)
2172        return item
2173
2174    def _dump_config_hierarchy(self, cfg_files):
2175        """interpret each config file used.
2176
2177        This will show which keys/values are being added or overwritten by
2178        other config files depending on their hierarchy (when they were added).
2179        """
2180        # go through each config_file. We will start with the lowest and
2181        # print its keys/values that are being used in self.config. If any
2182        # keys/values are present in a config file with a higher precedence,
2183        # ignore those.
2184        dirs = self.query_abs_dirs()
2185        cfg_files_dump_config = {}  # we will dump this to file
2186        # keep track of keys that did not come from a config file
2187        keys_not_from_file = set(self.config.keys())
2188        if not cfg_files:
2189            cfg_files = []
2190        self.info("Total config files: %d" % (len(cfg_files)))
2191        if len(cfg_files):
2192            self.info("cfg files used from lowest precedence to highest:")
2193        for i, (target_file, target_dict) in enumerate(cfg_files):
2194            unique_keys = set(target_dict.keys())
2195            unique_dict = {}
2196            # iterate through the target_dicts remaining 'higher' cfg_files
2197            remaining_cfgs = cfg_files[slice(i + 1, len(cfg_files))]
2198            # where higher == more precedent
2199            for ii, (higher_file, higher_dict) in enumerate(remaining_cfgs):
2200                # now only keep keys/values that are not overwritten by a
2201                # higher config
2202                unique_keys = unique_keys.difference(set(higher_dict.keys()))
2203            # unique_dict we know now has only keys/values that are unique to
2204            # this config file.
2205            unique_dict = dict((key, target_dict.get(key)) for key in unique_keys)
2206            cfg_files_dump_config[target_file] = unique_dict
2207            self.action_message("Config File %d: %s" % (i + 1, target_file))
2208            self.info(pprint.pformat(unique_dict))
2209            # let's also find out which keys/values from self.config are not
2210            # from each target config file dict
2211            keys_not_from_file = keys_not_from_file.difference(set(target_dict.keys()))
2212        not_from_file_dict = dict(
2213            (key, self.config.get(key)) for key in keys_not_from_file
2214        )
2215        cfg_files_dump_config["not_from_cfg_file"] = not_from_file_dict
2216        self.action_message(
2217            "Not from any config file (default_config, " "cmd line options, etc)"
2218        )
2219        self.info(pprint.pformat(not_from_file_dict))
2220
2221        # finally, let's dump this output as JSON and exit early
2222        self.dump_config(
2223            os.path.join(dirs["abs_log_dir"], "localconfigfiles.json"),
2224            cfg_files_dump_config,
2225            console_output=False,
2226            exit_on_finish=True,
2227        )
2228
2229    def _pre_config_lock(self, rw_config):
2230        """This empty method can allow for config checking and manipulation
2231        before the config lock, when overridden in scripts.
2232        """
2233        pass
2234
2235    def _config_lock(self):
2236        """After this point, the config is locked and should not be
2237        manipulated (based on mozharness.base.config.ReadOnlyDict)
2238        """
2239        self.config.lock()
2240
2241    def _possibly_run_method(self, method_name, error_if_missing=False):
2242        """This is here for run()."""
2243        if hasattr(self, method_name) and callable(self._getattr(method_name)):
2244            return getattr(self, method_name)()
2245        elif error_if_missing:
2246            self.error("No such method %s!" % method_name)
2247
2248    def run_action(self, action):
2249        if action not in self.actions:
2250            self.action_message("Skipping %s step." % action)
2251            return
2252
2253        method_name = action.replace("-", "_")
2254        self.action_message("Running %s step." % action)
2255
2256        # An exception during a pre action listener should abort execution.
2257        for fn, target in self._listeners["pre_action"]:
2258            if target is not None and target != action:
2259                continue
2260
2261            try:
2262                self.info("Running pre-action listener: %s" % fn)
2263                method = getattr(self, fn)
2264                method(action)
2265            except Exception:
2266                self.error(
2267                    "Exception during pre-action for %s: %s"
2268                    % (action, traceback.format_exc())
2269                )
2270
2271                for fn, target in self._listeners["post_action"]:
2272                    if target is not None and target != action:
2273                        continue
2274
2275                    try:
2276                        self.info("Running post-action listener: %s" % fn)
2277                        method = getattr(self, fn)
2278                        method(action, success=False)
2279                    except Exception:
2280                        self.error(
2281                            "An additional exception occurred during "
2282                            "post-action for %s: %s" % (action, traceback.format_exc())
2283                        )
2284
2285                self.fatal("Aborting due to exception in pre-action listener.")
2286
2287        # We always run post action listeners, even if the main routine failed.
2288        success = False
2289        try:
2290            self.info("Running main action method: %s" % method_name)
2291            self._possibly_run_method("preflight_%s" % method_name)
2292            self._possibly_run_method(method_name, error_if_missing=True)
2293            self._possibly_run_method("postflight_%s" % method_name)
2294            success = True
2295        finally:
2296            post_success = True
2297            for fn, target in self._listeners["post_action"]:
2298                if target is not None and target != action:
2299                    continue
2300
2301                try:
2302                    self.info("Running post-action listener: %s" % fn)
2303                    method = getattr(self, fn)
2304                    method(action, success=success and self.return_code == 0)
2305                except Exception:
2306                    post_success = False
2307                    self.error(
2308                        "Exception during post-action for %s: %s"
2309                        % (action, traceback.format_exc())
2310                    )
2311
2312            step_result = "success" if success else "failed"
2313            self.action_message("Finished %s step (%s)" % (action, step_result))
2314
2315            if not post_success:
2316                self.fatal("Aborting due to failure in post-action listener.")
2317
2318    def run(self):
2319        """Default run method.
2320        This is the "do everything" method, based on actions and all_actions.
2321
2322        First run self.dump_config() if it exists.
2323        Second, go through the list of all_actions.
2324        If they're in the list of self.actions, try to run
2325        self.preflight_ACTION(), self.ACTION(), and self.postflight_ACTION().
2326
2327        Preflight is sanity checking before doing anything time consuming or
2328        destructive.
2329
2330        Postflight is quick testing for success after an action.
2331
2332        """
2333        for fn in self._listeners["pre_run"]:
2334            try:
2335                self.info("Running pre-run listener: %s" % fn)
2336                method = getattr(self, fn)
2337                method()
2338            except Exception:
2339                self.error(
2340                    "Exception during pre-run listener: %s" % traceback.format_exc()
2341                )
2342
2343                for fn in self._listeners["post_run"]:
2344                    try:
2345                        method = getattr(self, fn)
2346                        method()
2347                    except Exception:
2348                        self.error(
2349                            "An additional exception occurred during a "
2350                            "post-run listener: %s" % traceback.format_exc()
2351                        )
2352
2353                self.fatal("Aborting due to failure in pre-run listener.")
2354
2355        self.dump_config()
2356        try:
2357            for action in self.all_actions:
2358                self.run_action(action)
2359        except Exception:
2360            self.fatal("Uncaught exception: %s" % traceback.format_exc())
2361        finally:
2362            post_success = True
2363            for fn in self._listeners["post_run"]:
2364                try:
2365                    self.info("Running post-run listener: %s" % fn)
2366                    method = getattr(self, fn)
2367                    method()
2368                except Exception:
2369                    post_success = False
2370                    self.error(
2371                        "Exception during post-run listener: %s"
2372                        % traceback.format_exc()
2373                    )
2374
2375            if not post_success:
2376                self.fatal("Aborting due to failure in post-run listener.")
2377
2378        return self.return_code
2379
2380    def run_and_exit(self):
2381        """Runs the script and exits the current interpreter."""
2382        rc = self.run()
2383        if rc != 0:
2384            self.warning("returning nonzero exit status %d" % rc)
2385        sys.exit(rc)
2386
2387    def clobber(self):
2388        """
2389        Delete the working directory
2390        """
2391        dirs = self.query_abs_dirs()
2392        self.rmtree(dirs["abs_work_dir"], error_level=FATAL)
2393
2394    def query_abs_dirs(self):
2395        """We want to be able to determine where all the important things
2396        are.  Absolute paths lend themselves well to this, though I wouldn't
2397        be surprised if this causes some issues somewhere.
2398
2399        This should be overridden in any script that has additional dirs
2400        to query.
2401
2402        The query_* methods tend to set self.VAR variables as their
2403        runtime cache.
2404        """
2405        if self.abs_dirs:
2406            return self.abs_dirs
2407        c = self.config
2408        dirs = {}
2409        dirs["base_work_dir"] = c["base_work_dir"]
2410        dirs["abs_work_dir"] = os.path.join(c["base_work_dir"], c["work_dir"])
2411        dirs["abs_log_dir"] = os.path.join(c["base_work_dir"], c.get("log_dir", "logs"))
2412        if "GECKO_PATH" in os.environ:
2413            dirs["abs_src_dir"] = os.environ["GECKO_PATH"]
2414        self.abs_dirs = dirs
2415        return self.abs_dirs
2416
2417    def dump_config(
2418        self, file_path=None, config=None, console_output=True, exit_on_finish=False
2419    ):
2420        """Dump self.config to localconfig.json"""
2421        config = config or self.config
2422        dirs = self.query_abs_dirs()
2423        if not file_path:
2424            file_path = os.path.join(dirs["abs_log_dir"], "localconfig.json")
2425        self.info("Dumping config to %s." % file_path)
2426        self.mkdir_p(os.path.dirname(file_path))
2427        json_config = json.dumps(config, sort_keys=True, indent=4)
2428        fh = codecs.open(file_path, encoding="utf-8", mode="w+")
2429        fh.write(json_config)
2430        fh.close()
2431        if console_output:
2432            self.info(pprint.pformat(config))
2433        if exit_on_finish:
2434            sys.exit()
2435
2436    # logging {{{2
2437    def new_log_obj(self, default_log_level="info"):
2438        c = self.config
2439        log_dir = os.path.join(c["base_work_dir"], c.get("log_dir", "logs"))
2440        log_config = {
2441            "logger_name": "Simple",
2442            "log_name": "log",
2443            "log_dir": log_dir,
2444            "log_level": default_log_level,
2445            "log_format": "%(asctime)s %(levelname)8s - %(message)s",
2446            "log_to_console": True,
2447            "append_to_log": False,
2448        }
2449        log_type = self.config.get("log_type", "console")
2450        for key in log_config.keys():
2451            value = self.config.get(key, None)
2452            if value is not None:
2453                log_config[key] = value
2454        if log_type == "multi":
2455            self.log_obj = MultiFileLogger(**log_config)
2456        elif log_type == "simple":
2457            self.log_obj = SimpleFileLogger(**log_config)
2458        else:
2459            self.log_obj = ConsoleLogger(**log_config)
2460
2461    def action_message(self, message):
2462        self.info(
2463            "[mozharness: %sZ] %s"
2464            % (datetime.datetime.utcnow().isoformat(" "), message)
2465        )
2466
2467    def summary(self):
2468        """Print out all the summary lines added via add_summary()
2469        throughout the script.
2470
2471        I'd like to revisit how to do this in a prettier fashion.
2472        """
2473        self.action_message("%s summary:" % self.__class__.__name__)
2474        if self.summary_list:
2475            for item in self.summary_list:
2476                try:
2477                    self.log(item["message"], level=item["level"])
2478                except ValueError:
2479                    """log is closed; print as a default. Ran into this
2480                    when calling from __del__()"""
2481                    print("### Log is closed! (%s)" % item["message"])
2482
2483    def add_summary(self, message, level=INFO):
2484        self.summary_list.append({"message": message, "level": level})
2485        # TODO write to a summary-only log?
2486        # Summaries need a lot more love.
2487        self.log(message, level=level)
2488
2489    def summarize_success_count(
2490        self, success_count, total_count, message="%d of %d successful.", level=None
2491    ):
2492        if level is None:
2493            level = INFO
2494            if success_count < total_count:
2495                level = ERROR
2496        self.add_summary(message % (success_count, total_count), level=level)
2497
2498    def get_hash_for_file(self, file_path, hash_type="sha512"):
2499        bs = 65536
2500        hasher = hashlib.new(hash_type)
2501        with open(file_path, "rb") as fh:
2502            buf = fh.read(bs)
2503            while len(buf) > 0:
2504                hasher.update(buf)
2505                buf = fh.read(bs)
2506        return hasher.hexdigest()
2507
2508    @property
2509    def return_code(self):
2510        return self._return_code
2511
2512    @return_code.setter
2513    def return_code(self, code):
2514        old_return_code, self._return_code = self._return_code, code
2515        if old_return_code != code:
2516            self.warning("setting return code to %d" % code)
2517