1# ***** BEGIN LICENSE BLOCK *****
2# This Source Code Form is subject to the terms of the Mozilla Public
3# License, v. 2.0. If a copy of the MPL was not distributed with this file,
4# You can obtain one at http://mozilla.org/MPL/2.0/.
5# ***** END LICENSE BLOCK *****
6"""Generic script objects.
7
8script.py, along with config.py and log.py, represents the core of
9mozharness.
10"""
11
12from __future__ import print_function
13
14import codecs
15import datetime
16import errno
17import fnmatch
18import functools
19import gzip
20import hashlib
21import inspect
22import itertools
23import os
24import platform
25import pprint
26import re
27import shutil
28import socket
29import subprocess
30import sys
31import tarfile
32import time
33import traceback
34import zipfile
35import zlib
36from contextlib import contextmanager
37from io import BytesIO
38
39from six import binary_type
40
41from mozprocess import ProcessHandler
42
43import mozinfo
44from mozharness.base.config import BaseConfig
45from mozharness.base.log import (DEBUG, ERROR, FATAL, INFO, WARNING,
46                                 ConsoleLogger, LogMixin, MultiFileLogger,
47                                 OutputParser, SimpleFileLogger)
48
49try:
50    import httplib
51except ImportError:
52    import http.client as httplib
53try:
54    import simplejson as json
55except ImportError:
56    import json
57try:
58    from urllib2 import quote, urlopen, Request
59except ImportError:
60    from urllib.request import quote, urlopen, Request
61try:
62    import urlparse
63except ImportError:
64    import urllib.parse as urlparse
65if os.name == 'nt':
66    import locale
67    try:
68        import win32file
69        import win32api
70        PYWIN32 = True
71    except ImportError:
72        PYWIN32 = False
73
74try:
75    from urllib2 import HTTPError, URLError
76except ImportError:
77    from urllib.error import HTTPError, URLError
78
79
80class ContentLengthMismatch(Exception):
81    pass
82
83
84def platform_name():
85    pm = PlatformMixin()
86
87    if pm._is_linux() and pm._is_64_bit():
88        return 'linux64'
89    elif pm._is_linux() and not pm._is_64_bit():
90        return 'linux'
91    elif pm._is_darwin():
92        return 'macosx'
93    elif pm._is_windows() and pm._is_64_bit():
94        return 'win64'
95    elif pm._is_windows() and not pm._is_64_bit():
96        return 'win32'
97    else:
98        return None
99
100
101class PlatformMixin(object):
102    def _is_windows(self):
103        """ check if the current operating system is Windows.
104
105        Returns:
106            bool: True if the current platform is Windows, False otherwise
107        """
108        system = platform.system()
109        if system in ("Windows", "Microsoft"):
110            return True
111        if system.startswith("CYGWIN"):
112            return True
113        if os.name == 'nt':
114            return True
115
116    def _is_darwin(self):
117        """ check if the current operating system is Darwin.
118
119        Returns:
120            bool: True if the current platform is Darwin, False otherwise
121        """
122        if platform.system() in ("Darwin"):
123            return True
124        if sys.platform.startswith("darwin"):
125            return True
126
127    def _is_linux(self):
128        """ check if the current operating system is a Linux distribution.
129
130        Returns:
131            bool: True if the current platform is a Linux distro, False otherwise
132        """
133        if platform.system() in ("Linux"):
134            return True
135        if sys.platform.startswith("linux"):
136            return True
137
138    def _is_debian(self):
139        """ check if the current operating system is explicitly Debian.
140        This intentionally doesn't count Debian derivatives like Ubuntu.
141
142        Returns:
143            bool: True if the current platform is debian, False otherwise
144        """
145        if not self._is_linux():
146            return False
147        self.info(mozinfo.linux_distro)
148        re_debian_distro = re.compile('debian')
149        return re_debian_distro.match(mozinfo.linux_distro) is not None
150
151    def _is_redhat_based(self):
152        """ check if the current operating system is a Redhat derived Linux distribution.
153
154        Returns:
155            bool: True if the current platform is a Redhat Linux distro, False otherwise
156        """
157        if not self._is_linux():
158            return False
159        re_redhat_distro = re.compile('Redhat|Fedora|CentOS|Oracle')
160        return re_redhat_distro.match(mozinfo.linux_distro) is not None
161
162    def _is_64_bit(self):
163        if self._is_darwin():
164            # osx is a special snowflake and to ensure the arch, it is better to use the following
165            return sys.maxsize > 2**32  # context: https://docs.python.org/2/library/platform.html
166        else:
167            # Using machine() gives you the architecture of the host rather
168            # than the build type of the Python binary
169            return '64' in platform.machine()
170
171
172# ScriptMixin {{{1
173class ScriptMixin(PlatformMixin):
174    """This mixin contains simple filesystem commands and the like.
175
176    It also contains some very special but very complex methods that,
177    together with logging and config, provide the base for all scripts
178    in this harness.
179
180    WARNING !!!
181    This class depends entirely on `LogMixin` methods in such a way that it will
182    only works if a class inherits from both `ScriptMixin` and `LogMixin`
183    simultaneously.
184
185    Depends on self.config of some sort.
186
187    Attributes:
188        env (dict): a mapping object representing the string environment.
189        script_obj (ScriptMixin): reference to a ScriptMixin instance.
190    """
191
192    env = None
193    script_obj = None
194
195    def query_filesize(self, file_path):
196        self.info("Determining filesize for %s" % file_path)
197        length = os.path.getsize(file_path)
198        self.info(" %s" % str(length))
199        return length
200
201    # TODO this should be parallelized with the to-be-written BaseHelper!
202    def query_sha512sum(self, file_path):
203        self.info("Determining sha512sum for %s" % file_path)
204        m = hashlib.sha512()
205        contents = self.read_from_file(file_path, verbose=False,
206                                       open_mode='rb')
207        m.update(contents)
208        sha512 = m.hexdigest()
209        self.info(" %s" % sha512)
210        return sha512
211
212    def platform_name(self):
213        """ Return the platform name on which the script is running on.
214        Returns:
215            None: for failure to determine the platform.
216            str: The name of the platform (e.g. linux64)
217        """
218        return platform_name()
219
220    # Simple filesystem commands {{{2
221    def mkdir_p(self, path, error_level=ERROR):
222        """ Create a directory if it doesn't exists.
223        This method also logs the creation, error or current existence of the
224        directory to be created.
225
226        Args:
227            path (str): path of the directory to be created.
228            error_level (str): log level name to be used in case of error.
229
230        Returns:
231            None: for sucess.
232            int: -1 on error
233        """
234
235        if not os.path.exists(path):
236            self.info("mkdir: %s" % path)
237            try:
238                os.makedirs(path)
239            except OSError:
240                self.log("Can't create directory %s!" % path,
241                         level=error_level)
242                return -1
243        else:
244            self.debug("mkdir_p: %s Already exists." % path)
245
246    def rmtree(self, path, log_level=INFO, error_level=ERROR,
247               exit_code=-1):
248        """ Delete an entire directory tree and log its result.
249        This method also logs the platform rmtree function, its retries, errors,
250        and current existence of the directory.
251
252        Args:
253            path (str): path to the directory tree root to remove.
254            log_level (str, optional): log level name to for this operation. Defaults
255                                       to `INFO`.
256            error_level (str, optional): log level name to use in case of error.
257                                         Defaults to `ERROR`.
258            exit_code (int, optional): useless parameter, not use here.
259                                       Defaults to -1
260
261        Returns:
262            None: for success
263        """
264
265        self.log("rmtree: %s" % path, level=log_level)
266        error_message = "Unable to remove %s!" % path
267        if self._is_windows():
268            # Call _rmtree_windows() directly, since even checking
269            # os.path.exists(path) will hang if path is longer than MAX_PATH.
270            self.info("Using _rmtree_windows ...")
271            return self.retry(
272                self._rmtree_windows,
273                error_level=error_level,
274                error_message=error_message,
275                args=(path, ),
276                log_level=log_level,
277            )
278        if os.path.exists(path):
279            if os.path.isdir(path):
280                return self.retry(
281                    shutil.rmtree,
282                    error_level=error_level,
283                    error_message=error_message,
284                    retry_exceptions=(OSError, ),
285                    args=(path, ),
286                    log_level=log_level,
287                )
288            else:
289                return self.retry(
290                    os.remove,
291                    error_level=error_level,
292                    error_message=error_message,
293                    retry_exceptions=(OSError, ),
294                    args=(path, ),
295                    log_level=log_level,
296                )
297        else:
298            self.debug("%s doesn't exist." % path)
299
300    def query_msys_path(self, path):
301        """ replaces the Windows harddrive letter path style with a linux
302        path style, e.g. C:// --> /C/
303        Note: method, not used in any script.
304
305        Args:
306            path (str?): path to convert to the linux path style.
307        Returns:
308            str: in case `path` is a string. The result is the path with the new notation.
309            type(path): `path` itself is returned in case `path` is not str type.
310        """
311        if not isinstance(path, basestring):
312            return path
313        path = path.replace("\\", "/")
314
315        def repl(m):
316            return '/%s/' % m.group(1)
317        path = re.sub(r'''^([a-zA-Z]):/''', repl, path)
318        return path
319
320    def _rmtree_windows(self, path):
321        """ Windows-specific rmtree that handles path lengths longer than MAX_PATH.
322            Ported from clobberer.py.
323
324        Args:
325            path (str): directory path to remove.
326
327        Returns:
328            None: if the path doesn't exists.
329            int: the return number of calling `self.run_command`
330            int: in case the path specified is not a directory but a file.
331                 0 on success, non-zero on error. Note: The returned value
332                 is the result of calling `win32file.DeleteFile`
333        """
334
335        assert self._is_windows()
336        path = os.path.realpath(path)
337        full_path = '\\\\?\\' + path
338        if not os.path.exists(full_path):
339            return
340        if not PYWIN32:
341            if not os.path.isdir(path):
342                return self.run_command('del /F /Q "%s"' % path)
343            else:
344                return self.run_command('rmdir /S /Q "%s"' % path)
345        # Make sure directory is writable
346        win32file.SetFileAttributesW('\\\\?\\' + path, win32file.FILE_ATTRIBUTE_NORMAL)
347        # Since we call rmtree() with a file, sometimes
348        if not os.path.isdir('\\\\?\\' + path):
349            return win32file.DeleteFile('\\\\?\\' + path)
350
351        for ffrec in win32api.FindFiles('\\\\?\\' + path + '\\*.*'):
352            file_attr = ffrec[0]
353            name = ffrec[8]
354            if name == '.' or name == '..':
355                continue
356            full_name = os.path.join(path, name)
357
358            if file_attr & win32file.FILE_ATTRIBUTE_DIRECTORY:
359                self._rmtree_windows(full_name)
360            else:
361                try:
362                    win32file.SetFileAttributesW(
363                        '\\\\?\\' + full_name, win32file.FILE_ATTRIBUTE_NORMAL)
364                    win32file.DeleteFile('\\\\?\\' + full_name)
365                except Exception:
366                    # DeleteFile fails on long paths, del /f /q works just fine
367                    self.run_command('del /F /Q "%s"' % full_name)
368
369        win32file.RemoveDirectory('\\\\?\\' + path)
370
371    def get_filename_from_url(self, url):
372        """ parse a filename base on an url.
373
374        Args:
375            url (str): url to parse for the filename
376
377        Returns:
378            str: filename parsed from the url, or `netloc` network location part
379                 of the url.
380        """
381
382        parsed = urlparse.urlsplit(url.rstrip('/'))
383        if parsed.path != '':
384            return parsed.path.rsplit('/', 1)[-1]
385        else:
386            return parsed.netloc
387
388    def _urlopen(self, url, **kwargs):
389        """ open the url `url` using `urllib2`.`
390        This method can be overwritten to extend its complexity
391
392        Args:
393            url (str | urllib.request.Request): url to open
394            kwargs: Arbitrary keyword arguments passed to the `urllib.request.urlopen` function.
395
396        Returns:
397            file-like: file-like object with additional methods as defined in
398                       `urllib.request.urlopen`_.
399            None: None may be returned if no handler handles the request.
400
401        Raises:
402            urllib2.URLError: on errors
403
404        .. urillib.request.urlopen:
405        https://docs.python.org/2/library/urllib2.html#urllib2.urlopen
406        """
407        # http://bugs.python.org/issue13359 - urllib2 does not automatically quote the URL
408        url_quoted = quote(url, safe='%/:=&?~#+!$,;\'@()*[]|')
409        return urlopen(url_quoted, **kwargs)
410
411    def fetch_url_into_memory(self, url):
412        ''' Downloads a file from a url into memory instead of disk.
413
414        Args:
415            url (str): URL path where the file to be downloaded is located.
416
417        Raises:
418            IOError: When the url points to a file on disk and cannot be found
419            ContentLengthMismatch: When the length of the retrieved content does not match the
420                                   Content-Length response header.
421            ValueError: When the scheme of a url is not what is expected.
422
423        Returns:
424            BytesIO: contents of url
425        '''
426        self.info('Fetch {} into memory'.format(url))
427        parsed_url = urlparse.urlparse(url)
428
429        if parsed_url.scheme in ('', 'file'):
430            path = parsed_url.path
431            if not os.path.isfile(path):
432                raise IOError('Could not find file to extract: {}'.format(url))
433
434            content_length = os.stat(path).st_size
435
436            # In case we're referrencing a file without file://
437            if parsed_url.scheme == '':
438                url = 'file://%s' % os.path.abspath(url)
439                parsed_url = urlparse.urlparse(url)
440
441        request = Request(url)
442        # When calling fetch_url_into_memory() you should retry when we raise
443        # one of these exceptions:
444        # * Bug 1300663 - HTTPError: HTTP Error 404: Not Found
445        # * Bug 1300413 - HTTPError: HTTP Error 500: Internal Server Error
446        # * Bug 1300943 - HTTPError: HTTP Error 503: Service Unavailable
447        # * Bug 1300953 - URLError: <urlopen error [Errno -2] Name or service not known>
448        # * Bug 1301594 - URLError: <urlopen error [Errno 10054] An existing connection was ...
449        # * Bug 1301597 - URLError: <urlopen error [Errno 8] _ssl.c:504: EOF occurred in ...
450        # * Bug 1301855 - URLError: <urlopen error [Errno 60] Operation timed out>
451        # * Bug 1302237 - URLError: <urlopen error [Errno 104] Connection reset by peer>
452        # * Bug 1301807 - BadStatusLine: ''
453        #
454        # Bug 1309912 - Adding timeout in hopes to solve blocking on response.read() (bug 1300413)
455        response = urlopen(request, timeout=30)
456
457        if parsed_url.scheme in ('http', 'https'):
458            content_length = int(response.headers.get('Content-Length'))
459
460        response_body = response.read()
461        response_body_size = len(response_body)
462
463        self.info('Content-Length response header: {}'.format(content_length))
464        self.info('Bytes received: {}'.format(response_body_size))
465
466        if response_body_size != content_length:
467            raise ContentLengthMismatch(
468                'The retrieved Content-Length header declares a body length '
469                'of {} bytes, while we actually retrieved {} bytes'.format(
470                    content_length, response_body_size)
471            )
472
473        if response.info().get('Content-Encoding') == 'gzip':
474            self.info('Content-Encoding is "gzip", so decompressing response body')
475            # See http://www.zlib.net/manual.html#Advanced
476            # section "ZEXTERN int ZEXPORT inflateInit2 OF....":
477            #   Add 32 to windowBits to enable zlib and gzip decoding with automatic
478            #   header detection, or add 16 to decode only the gzip format (the zlib
479            #   format will return a Z_DATA_ERROR).
480            # Adding 16 since we only wish to support gzip encoding.
481            file_contents = zlib.decompress(response_body, zlib.MAX_WBITS | 16)
482        else:
483            file_contents = response_body
484
485        # Use BytesIO instead of StringIO
486        # http://stackoverflow.com/questions/34162017/unzip-buffer-with-python/34162395#34162395
487        return BytesIO(file_contents)
488
489    def _download_file(self, url, file_name):
490        """ Helper function for download_file()
491        Additionaly this function logs all exceptions as warnings before
492        re-raising them
493
494        Args:
495            url (str): string containing the URL with the file location
496            file_name (str): name of the file where the downloaded file
497                             is written.
498
499        Returns:
500            str: filename of the written file on disk
501
502        Raises:
503            urllib2.URLError: on incomplete download.
504            urllib2.HTTPError: on Http error code
505            socket.timeout: on connection timeout
506            socket.error: on socket error
507        """
508        # If our URLs look like files, prefix them with file:// so they can
509        # be loaded like URLs.
510        if not (url.startswith("http") or url.startswith("file://")):
511            if not os.path.isfile(url):
512                self.fatal("The file %s does not exist" % url)
513            url = 'file://%s' % os.path.abspath(url)
514
515        try:
516            f_length = None
517            f = self._urlopen(url, timeout=30)
518
519            if f.info().get('content-length') is not None:
520                f_length = int(f.info()['content-length'])
521                got_length = 0
522            if f.info().get('Content-Encoding') == 'gzip':
523                # Note, we'll download the full compressed content into its own
524                # file, since that allows the gzip library to seek through it.
525                # Once downloaded, we'll decompress it into the real target
526                # file, and delete the compressed version.
527                local_file = open(file_name + '.gz', 'wb')
528            else:
529                local_file = open(file_name, 'wb')
530            while True:
531                block = f.read(1024 ** 2)
532                if not block:
533                    if f_length is not None and got_length != f_length:
534                        raise URLError(
535                            "Download incomplete; content-length was %d, "
536                            "but only received %d" % (f_length, got_length))
537                    break
538                local_file.write(block)
539                if f_length is not None:
540                    got_length += len(block)
541            local_file.close()
542            if f.info().get('Content-Encoding') == 'gzip':
543                # Decompress file into target location, then remove compressed version
544                with open(file_name, 'wb') as f_out:
545                    # On some execution paths, this could be called with python 2.6
546                    # whereby gzip.open(...) cannot be used with a 'with' statement.
547                    # So let's do this the python 2.6 way...
548                    try:
549                        f_in = gzip.open(file_name + '.gz', 'rb')
550                        shutil.copyfileobj(f_in, f_out)
551                    finally:
552                        f_in.close()
553                os.remove(file_name + '.gz')
554            return file_name
555        except HTTPError as e:
556            self.warning("Server returned status %s %s for %s" % (str(e.code), str(e), url))
557            raise
558        except URLError as e:
559            self.warning("URL Error: %s" % url)
560
561            # Failures due to missing local files won't benefit from retry.
562            # Raise the original OSError.
563            if isinstance(e.args[0], OSError) and e.args[0].errno == errno.ENOENT:
564                raise e.args[0]
565
566            raise
567        except socket.timeout as e:
568            self.warning("Timed out accessing %s: %s" % (url, str(e)))
569            raise
570        except socket.error as e:
571            self.warning("Socket error when accessing %s: %s" % (url, str(e)))
572            raise
573
574    def _retry_download(self, url, error_level, file_name=None, retry_config=None):
575        """ Helper method to retry download methods.
576
577        This method calls `self.retry` on `self._download_file` using the passed
578        parameters if a file_name is specified. If no file is specified, we will
579        instead call `self._urlopen`, which grabs the contents of a url but does
580        not create a file on disk.
581
582        Args:
583            url (str): URL path where the file is located.
584            file_name (str): file_name where the file will be written to.
585            error_level (str): log level to use in case an error occurs.
586            retry_config (dict, optional): key-value pairs to be passed to
587                                           `self.retry`. Defaults to `None`
588
589        Returns:
590            str: `self._download_file` return value is returned
591            unknown: `self.retry` `failure_status` is returned on failure, which
592                     defaults to -1
593        """
594        retry_args = dict(
595            failure_status=None,
596            retry_exceptions=(HTTPError, URLError,
597                              httplib.BadStatusLine,
598                              socket.timeout, socket.error),
599            error_message="Can't download from %s to %s!" % (url, file_name),
600            error_level=error_level,
601        )
602
603        if retry_config:
604            retry_args.update(retry_config)
605
606        download_func = self._urlopen
607        kwargs = {"url": url}
608        if file_name:
609            download_func = self._download_file
610            kwargs = {"url": url, "file_name": file_name}
611
612        return self.retry(
613            download_func,
614            kwargs=kwargs,
615            **retry_args
616        )
617
618    def _filter_entries(self, namelist, extract_dirs):
619        """Filter entries of the archive based on the specified list of to extract dirs."""
620        filter_partial = functools.partial(fnmatch.filter, namelist)
621        entries = itertools.chain(*map(filter_partial, extract_dirs or ['*']))
622
623        for entry in entries:
624            yield entry
625
626    def unzip(self, compressed_file, extract_to, extract_dirs='*', verbose=False):
627        """This method allows to extract a zip file without writing to disk first.
628
629        Args:
630            compressed_file (object): File-like object with the contents of a compressed zip file.
631            extract_to (str): where to extract the compressed file.
632            extract_dirs (list, optional): directories inside the archive file to extract.
633                                           Defaults to '*'.
634            verbose (bool, optional): whether or not extracted content should be displayed.
635                                      Defaults to False.
636
637        Raises:
638            zipfile.BadZipfile: on contents of zipfile being invalid
639        """
640        with zipfile.ZipFile(compressed_file) as bundle:
641            entries = self._filter_entries(bundle.namelist(), extract_dirs)
642
643            for entry in entries:
644                if verbose:
645                    self.info(' {}'.format(entry))
646
647                # Exception to be retried:
648                # Bug 1301645 - BadZipfile: Bad CRC-32 for file ...
649                #    http://stackoverflow.com/questions/5624669/strange-badzipfile-bad-crc-32-problem/5626098#5626098
650                # Bug 1301802 - error: Error -3 while decompressing: invalid stored block lengths
651                bundle.extract(entry, path=extract_to)
652
653                # ZipFile doesn't preserve permissions during extraction:
654                # http://bugs.python.org/issue15795
655                fname = os.path.realpath(os.path.join(extract_to, entry))
656                try:
657                    # getinfo() can raise KeyError
658                    mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
659                    # Only set permissions if attributes are available. Otherwise all
660                    # permissions will be removed eg. on Windows.
661                    if mode:
662                        os.chmod(fname, mode)
663
664                except KeyError:
665                    self.warning('{} was not found in the zip file'.format(entry))
666
667    def deflate(self, compressed_file, mode, extract_to='.', *args, **kwargs):
668        """This method allows to extract a compressed file from a tar, tar.bz2 and tar.gz files.
669
670        Args:
671            compressed_file (object): File-like object with the contents of a compressed file.
672            mode (str): string of the form 'filemode[:compression]' (e.g. 'r:gz' or 'r:bz2')
673            extract_to (str, optional): where to extract the compressed file.
674        """
675        t = tarfile.open(fileobj=compressed_file, mode=mode)
676        t.extractall(path=extract_to)
677
678    def download_unpack(self, url, extract_to='.', extract_dirs='*', verbose=False):
679        """Generic method to download and extract a compressed file without writing it to disk first.
680
681        Args:
682            url (str): URL where the file to be downloaded is located.
683            extract_to (str, optional): directory where the downloaded file will
684                                        be extracted to.
685            extract_dirs (list, optional): directories inside the archive to extract.
686                                           Defaults to `*`. It currently only applies to zip files.
687            verbose (bool, optional): whether or not extracted content should be displayed.
688                                      Defaults to False.
689
690        """
691        def _determine_extraction_method_and_kwargs(url):
692            EXTENSION_TO_MIMETYPE = {
693                'bz2': 'application/x-bzip2',
694                'gz':  'application/x-gzip',
695                'tar': 'application/x-tar',
696                'zip': 'application/zip',
697            }
698            MIMETYPES = {
699                'application/x-bzip2': {
700                    'function': self.deflate,
701                    'kwargs': {'mode': 'r:bz2'},
702                },
703                'application/x-gzip': {
704                    'function': self.deflate,
705                    'kwargs': {'mode': 'r:gz'},
706                },
707                'application/x-tar': {
708                    'function': self.deflate,
709                    'kwargs': {'mode': 'r'},
710                },
711                'application/zip': {
712                    'function': self.unzip,
713                },
714                'application/x-zip-compressed': {
715                    'function': self.unzip,
716                },
717            }
718
719            filename = url.split('/')[-1]
720            # XXX: bz2/gz instead of tar.{bz2/gz}
721            extension = filename[filename.rfind('.')+1:]
722            mimetype = EXTENSION_TO_MIMETYPE[extension]
723            self.debug('Mimetype: {}'.format(mimetype))
724
725            function = MIMETYPES[mimetype]['function']
726            kwargs = {
727                'compressed_file': compressed_file,
728                'extract_to': extract_to,
729                'extract_dirs': extract_dirs,
730                'verbose': verbose,
731            }
732            kwargs.update(MIMETYPES[mimetype].get('kwargs', {}))
733
734            return function, kwargs
735
736        # Many scripts overwrite this method and set extract_dirs to None
737        extract_dirs = '*' if extract_dirs is None else extract_dirs
738        self.info('Downloading and extracting to {} these dirs {} from {}'.format(
739            extract_to,
740            ', '.join(extract_dirs),
741            url,
742        ))
743
744        # 1) Let's fetch the file
745        retry_args = dict(
746            retry_exceptions=(
747                HTTPError,
748                URLError,
749                httplib.BadStatusLine,
750                socket.timeout,
751                socket.error,
752                ContentLengthMismatch,
753            ),
754            sleeptime=30,
755            attempts=5,
756            error_message="Can't download from {}".format(url),
757            error_level=FATAL,
758        )
759        compressed_file = self.retry(
760            self.fetch_url_into_memory,
761            kwargs={'url': url},
762            **retry_args
763        )
764
765        # 2) We're guaranteed to have download the file with error_level=FATAL
766        #    Let's unpack the file
767        function, kwargs = _determine_extraction_method_and_kwargs(url)
768        try:
769            function(**kwargs)
770        except zipfile.BadZipfile:
771            # Dump the exception and exit
772            self.exception(level=FATAL)
773
774    def load_json_url(self, url, error_level=None, *args, **kwargs):
775        """ Returns a json object from a url (it retries). """
776        contents = self._retry_download(
777            url=url, error_level=error_level, *args, **kwargs
778        )
779        return json.loads(contents.read())
780
781    # http://www.techniqal.com/blog/2008/07/31/python-file-read-write-with-urllib2/
782    # TODO thinking about creating a transfer object.
783    def download_file(self, url, file_name=None, parent_dir=None,
784                      create_parent_dir=True, error_level=ERROR,
785                      exit_code=3, retry_config=None):
786        """ Python wget.
787        Download the filename at `url` into `file_name` and put it on `parent_dir`.
788        On error log with the specified `error_level`, on fatal exit with `exit_code`.
789        Execute all the above based on `retry_config` parameter.
790
791        Args:
792            url (str): URL path where the file to be downloaded is located.
793            file_name (str, optional): file_name where the file will be written to.
794                                       Defaults to urls' filename.
795            parent_dir (str, optional): directory where the downloaded file will
796                                        be written to. Defaults to current working
797                                        directory
798            create_parent_dir (bool, optional): create the parent directory if it
799                                                doesn't exist. Defaults to `True`
800            error_level (str, optional): log level to use in case an error occurs.
801                                         Defaults to `ERROR`
802            retry_config (dict, optional): key-value pairs to be passed to
803                                          `self.retry`. Defaults to `None`
804
805        Returns:
806            str: filename where the downloaded file was written to.
807            unknown: on failure, `failure_status` is returned.
808        """
809        if not file_name:
810            try:
811                file_name = self.get_filename_from_url(url)
812            except AttributeError:
813                self.log("Unable to get filename from %s; bad url?" % url,
814                         level=error_level, exit_code=exit_code)
815                return
816        if parent_dir:
817            file_name = os.path.join(parent_dir, file_name)
818            if create_parent_dir:
819                self.mkdir_p(parent_dir, error_level=error_level)
820        self.info("Downloading %s to %s" % (url, file_name))
821        status = self._retry_download(
822            url=url,
823            error_level=error_level,
824            file_name=file_name,
825            retry_config=retry_config
826        )
827        if status == file_name:
828            self.info("Downloaded %d bytes." % os.path.getsize(file_name))
829        return status
830
831    def move(self, src, dest, log_level=INFO, error_level=ERROR,
832             exit_code=-1):
833        """ recursively move a file or directory (src) to another location (dest).
834
835        Args:
836            src (str): file or directory path to move.
837            dest (str): file or directory path where to move the content to.
838            log_level (str): log level to use for normal operation. Defaults to
839                                `INFO`
840            error_level (str): log level to use on error. Defaults to `ERROR`
841
842        Returns:
843            int: 0 on success. -1 on error.
844        """
845        self.log("Moving %s to %s" % (src, dest), level=log_level)
846        try:
847            shutil.move(src, dest)
848        # http://docs.python.org/tutorial/errors.html
849        except IOError as e:
850            self.log("IO error: %s" % str(e),
851                     level=error_level, exit_code=exit_code)
852            return -1
853        except shutil.Error as e:
854            # ERROR level ends up reporting the failure to treeherder &
855            # pollutes the failure summary list.
856            self.log("shutil error: %s" % str(e),
857                     level=WARNING, exit_code=exit_code)
858            return -1
859        return 0
860
861    def chmod(self, path, mode):
862        """ change `path` mode to `mode`.
863
864        Args:
865            path (str): path whose mode will be modified.
866            mode (hex): one of the values defined at `stat`_
867
868        .. _stat:
869        https://docs.python.org/2/library/os.html#os.chmod
870        """
871
872        self.info("Chmoding %s to %s" % (path, str(oct(mode))))
873        os.chmod(path, mode)
874
875    def copyfile(
876        self, src, dest, log_level=INFO, error_level=ERROR, copystat=False, compress=False
877    ):
878        """ copy or compress `src` into `dest`.
879
880        Args:
881            src (str): filepath to copy.
882            dest (str): filepath where to move the content to.
883            log_level (str, optional): log level to use for normal operation. Defaults to
884                                      `INFO`
885            error_level (str, optional): log level to use on error. Defaults to `ERROR`
886            copystat (bool, optional): whether or not to copy the files metadata.
887                                       Defaults to `False`.
888            compress (bool, optional): whether or not to compress the destination file.
889                                       Defaults to `False`.
890
891        Returns:
892            int: -1 on error
893            None: on success
894        """
895
896        if compress:
897            self.log("Compressing %s to %s" % (src, dest), level=log_level)
898            try:
899                infile = open(src, "rb")
900                outfile = gzip.open(dest, "wb")
901                outfile.writelines(infile)
902                outfile.close()
903                infile.close()
904            except IOError as e:
905                self.log("Can't compress %s to %s: %s!" % (src, dest, str(e)),
906                         level=error_level)
907                return -1
908        else:
909            self.log("Copying %s to %s" % (src, dest), level=log_level)
910            try:
911                shutil.copyfile(src, dest)
912            except (IOError, shutil.Error) as e:
913                self.log("Can't copy %s to %s: %s!" % (src, dest, str(e)),
914                         level=error_level)
915                return -1
916
917        if copystat:
918            try:
919                shutil.copystat(src, dest)
920            except (IOError, shutil.Error) as e:
921                self.log("Can't copy attributes of %s to %s: %s!" % (src, dest, str(e)),
922                         level=error_level)
923                return -1
924
925    def copytree(self, src, dest, overwrite='no_overwrite', log_level=INFO,
926                 error_level=ERROR):
927        """ An implementation of `shutil.copytree` that allows for `dest` to exist
928        and implements different overwrite levels:
929        - 'no_overwrite' will keep all(any) existing files in destination tree
930        - 'overwrite_if_exists' will only overwrite destination paths that have
931                                the same path names relative to the root of the
932                                src and destination tree
933        - 'clobber' will replace the whole destination tree(clobber) if it exists
934
935        Args:
936            src (str): directory path to move.
937            dest (str): directory path where to move the content to.
938            overwrite (str): string specifying the overwrite level.
939            log_level (str, optional): log level to use for normal operation. Defaults to
940                                      `INFO`
941            error_level (str, optional): log level to use on error. Defaults to `ERROR`
942
943        Returns:
944            int: -1 on error
945            None: on success
946        """
947
948        self.info('copying tree: %s to %s' % (src, dest))
949        try:
950            if overwrite == 'clobber' or not os.path.exists(dest):
951                self.rmtree(dest)
952                shutil.copytree(src, dest)
953            elif overwrite == 'no_overwrite' or overwrite == 'overwrite_if_exists':
954                files = os.listdir(src)
955                for f in files:
956                    abs_src_f = os.path.join(src, f)
957                    abs_dest_f = os.path.join(dest, f)
958                    if not os.path.exists(abs_dest_f):
959                        if os.path.isdir(abs_src_f):
960                            self.mkdir_p(abs_dest_f)
961                            self.copytree(abs_src_f, abs_dest_f,
962                                          overwrite='clobber')
963                        else:
964                            shutil.copy2(abs_src_f, abs_dest_f)
965                    elif overwrite == 'no_overwrite':  # destination path exists
966                        if os.path.isdir(abs_src_f) and os.path.isdir(abs_dest_f):
967                            self.copytree(abs_src_f, abs_dest_f,
968                                          overwrite='no_overwrite')
969                        else:
970                            self.debug('ignoring path: %s as destination: \
971                                    %s exists' % (abs_src_f, abs_dest_f))
972                    else:  # overwrite == 'overwrite_if_exists' and destination exists
973                        self.debug('overwriting: %s with: %s' %
974                                   (abs_dest_f, abs_src_f))
975                        self.rmtree(abs_dest_f)
976
977                        if os.path.isdir(abs_src_f):
978                            self.mkdir_p(abs_dest_f)
979                            self.copytree(abs_src_f, abs_dest_f,
980                                          overwrite='overwrite_if_exists')
981                        else:
982                            shutil.copy2(abs_src_f, abs_dest_f)
983            else:
984                self.fatal("%s is not a valid argument for param overwrite" % (overwrite))
985        except (IOError, shutil.Error):
986            self.exception("There was an error while copying %s to %s!" % (src, dest),
987                           level=error_level)
988            return -1
989
990    def write_to_file(self, file_path, contents, verbose=True,
991                      open_mode='w', create_parent_dir=False,
992                      error_level=ERROR):
993        """ Write `contents` to `file_path`, according to `open_mode`.
994
995        Args:
996            file_path (str): filepath where the content will be written to.
997            contents (str): content to write to the filepath.
998            verbose (bool, optional): whether or not to log `contents` value.
999                                      Defaults to `True`
1000            open_mode (str, optional): open mode to use for openning the file.
1001                                       Defaults to `w`
1002            create_parent_dir (bool, optional): whether or not to create the
1003                                                parent directory of `file_path`
1004            error_level (str, optional): log level to use on error. Defaults to `ERROR`
1005
1006        Returns:
1007            str: `file_path` on success
1008            None: on error.
1009        """
1010        self.info("Writing to file %s" % file_path)
1011        if verbose:
1012            self.info("Contents:")
1013            for line in contents.splitlines():
1014                self.info(" %s" % line)
1015        if create_parent_dir:
1016            parent_dir = os.path.dirname(file_path)
1017            self.mkdir_p(parent_dir, error_level=error_level)
1018        try:
1019            fh = open(file_path, open_mode)
1020            try:
1021                fh.write(contents)
1022            except UnicodeEncodeError:
1023                fh.write(contents.encode('utf-8', 'replace'))
1024            fh.close()
1025            return file_path
1026        except IOError:
1027            self.log("%s can't be opened for writing!" % file_path,
1028                     level=error_level)
1029
1030    @contextmanager
1031    def opened(self, file_path, verbose=True, open_mode='r',
1032               error_level=ERROR):
1033        """ Create a context manager to use on a with statement.
1034
1035        Args:
1036            file_path (str): filepath of the file to open.
1037            verbose (bool, optional): useless parameter, not used here.
1038                Defaults to True.
1039            open_mode (str, optional): open mode to use for openning the file.
1040                Defaults to `r`
1041            error_level (str, optional): log level name to use on error.
1042                Defaults to `ERROR`
1043
1044        Yields:
1045            tuple: (file object, error) pair. In case of error `None` is yielded
1046                as file object, together with the corresponding error.
1047                If there is no error, `None` is returned as the error.
1048        """
1049        # See opened_w_error in http://www.python.org/dev/peps/pep-0343/
1050        self.info("Reading from file %s" % file_path)
1051        try:
1052            fh = open(file_path, open_mode)
1053        except IOError as err:
1054            self.log("unable to open %s: %s" % (file_path, err.strerror),
1055                     level=error_level)
1056            yield None, err
1057        else:
1058            try:
1059                yield fh, None
1060            finally:
1061                fh.close()
1062
1063    def read_from_file(self, file_path, verbose=True, open_mode='r',
1064                       error_level=ERROR):
1065        """ Use `self.opened` context manager to open a file and read its
1066        content.
1067
1068        Args:
1069            file_path (str): filepath of the file to read.
1070            verbose (bool, optional): whether or not to log the file content.
1071                Defaults to True.
1072            open_mode (str, optional): open mode to use for openning the file.
1073                Defaults to `r`
1074            error_level (str, optional): log level name to use on error.
1075                Defaults to `ERROR`
1076
1077        Returns:
1078            None: on error.
1079            str: file content on success.
1080        """
1081        with self.opened(file_path, verbose, open_mode, error_level) as (fh, err):
1082            if err:
1083                return None
1084            contents = fh.read()
1085            if verbose:
1086                self.info("Contents:")
1087                for line in contents.splitlines():
1088                    self.info(" %s" % line)
1089            return contents
1090
1091    def chdir(self, dir_name):
1092        self.log("Changing directory to %s." % dir_name)
1093        os.chdir(dir_name)
1094
1095    def is_exe(self, fpath):
1096        """
1097        Determine if fpath is a file and if it is executable.
1098        """
1099        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
1100
1101    def which(self, program):
1102        """ OS independent implementation of Unix's which command
1103
1104        Args:
1105            program (str): name or path to the program whose executable is
1106                being searched.
1107
1108        Returns:
1109            None: if the executable was not found.
1110            str: filepath of the executable file.
1111        """
1112        if self._is_windows() and not program.endswith(".exe"):
1113            program += ".exe"
1114        fpath, fname = os.path.split(program)
1115        if fpath:
1116            if self.is_exe(program):
1117                return program
1118        else:
1119            # If the exe file is defined in the configs let's use that
1120            exe = self.query_exe(program)
1121            if self.is_exe(exe):
1122                return exe
1123
1124            # If not defined, let's look for it in the $PATH
1125            env = self.query_env()
1126            for path in env["PATH"].split(os.pathsep):
1127                exe_file = os.path.join(path, program)
1128                if self.is_exe(exe_file):
1129                    return exe_file
1130        return None
1131
1132    # More complex commands {{{2
1133    def retry(self, action, attempts=None, sleeptime=60, max_sleeptime=5 * 60,
1134              retry_exceptions=(Exception, ), good_statuses=None, cleanup=None,
1135              error_level=ERROR, error_message="%(action)s failed after %(attempts)d tries!",
1136              failure_status=-1, log_level=INFO, args=(), kwargs={}):
1137        """ generic retry command. Ported from `util.retry`_
1138
1139        Args:
1140            action (func): callable object to retry.
1141            attempts (int, optinal): maximum number of times to call actions.
1142                Defaults to `self.config.get('global_retries', 5)`
1143            sleeptime (int, optional): number of seconds to wait between
1144                attempts. Defaults to 60 and doubles each retry attempt, to
1145                a maximum of `max_sleeptime'
1146            max_sleeptime (int, optional): maximum value of sleeptime. Defaults
1147                to 5 minutes
1148            retry_exceptions (tuple, optional): Exceptions that should be caught.
1149                If exceptions other than those listed in `retry_exceptions' are
1150                raised from `action', they will be raised immediately. Defaults
1151                to (Exception)
1152            good_statuses (object, optional): return values which, if specified,
1153                will result in retrying if the return value isn't listed.
1154                Defaults to `None`.
1155            cleanup (func, optional): If `cleanup' is provided and callable
1156                it will be called immediately after an Exception is caught.
1157                No arguments will be passed to it. If your cleanup function
1158                requires arguments it is recommended that you wrap it in an
1159                argumentless function.
1160                Defaults to `None`.
1161            error_level (str, optional): log level name in case of error.
1162                Defaults to `ERROR`.
1163            error_message (str, optional): string format to use in case
1164                none of the attempts success. Defaults to
1165                '%(action)s failed after %(attempts)d tries!'
1166            failure_status (int, optional): flag to return in case the retries
1167                were not successfull. Defaults to -1.
1168            log_level (str, optional): log level name to use for normal activity.
1169                Defaults to `INFO`.
1170            args (tuple, optional): positional arguments to pass onto `action`.
1171            kwargs (dict, optional): key-value arguments to pass onto `action`.
1172
1173        Returns:
1174            object: return value of `action`.
1175            int: failure status in case of failure retries.
1176        """
1177        if not callable(action):
1178            self.fatal("retry() called with an uncallable method %s!" % action)
1179        if cleanup and not callable(cleanup):
1180            self.fatal("retry() called with an uncallable cleanup method %s!" % cleanup)
1181        if not attempts:
1182            attempts = self.config.get("global_retries", 5)
1183        if max_sleeptime < sleeptime:
1184            self.debug("max_sleeptime %d less than sleeptime %d" % (
1185                       max_sleeptime, sleeptime))
1186        n = 0
1187        while n <= attempts:
1188            retry = False
1189            n += 1
1190            try:
1191                self.log("retry: Calling %s with args: %s, kwargs: %s, attempt #%d" %
1192                         (action.__name__, str(args), str(kwargs), n), level=log_level)
1193                status = action(*args, **kwargs)
1194                if good_statuses and status not in good_statuses:
1195                    retry = True
1196            except retry_exceptions as e:
1197                retry = True
1198                error_message = "%s\nCaught exception: %s" % (error_message, str(e))
1199                self.log('retry: attempt #%d caught %s exception: %s' %
1200                         (n, type(e).__name__, str(e)), level=INFO)
1201
1202            if not retry:
1203                return status
1204            else:
1205                if cleanup:
1206                    cleanup()
1207                if n == attempts:
1208                    self.log(error_message % {'action': action, 'attempts': n}, level=error_level)
1209                    return failure_status
1210                if sleeptime > 0:
1211                    self.log("retry: Failed, sleeping %d seconds before retrying" %
1212                             sleeptime, level=log_level)
1213                    time.sleep(sleeptime)
1214                    sleeptime = sleeptime * 2
1215                    if sleeptime > max_sleeptime:
1216                        sleeptime = max_sleeptime
1217
1218    def query_env(self, partial_env=None, replace_dict=None,
1219                  purge_env=(),
1220                  set_self_env=None, log_level=DEBUG,
1221                  avoid_host_env=False):
1222        """ Environment query/generation method.
1223        The default, self.query_env(), will look for self.config['env']
1224        and replace any special strings in there ( %(PATH)s ).
1225        It will then store it as self.env for speeding things up later.
1226
1227        If you specify partial_env, partial_env will be used instead of
1228        self.config['env'], and we don't save self.env as it's a one-off.
1229
1230
1231        Args:
1232            partial_env (dict, optional): key-value pairs of the name and value
1233                of different environment variables. Defaults to an empty dictionary.
1234            replace_dict (dict, optional): key-value pairs to replace the old
1235                environment variables.
1236            purge_env (list): environment names to delete from the final
1237                environment dictionary.
1238            set_self_env (boolean, optional): whether or not the environment
1239                variables dictionary should be copied to `self`.
1240                Defaults to True.
1241            log_level (str, optional): log level name to use on normal operation.
1242                Defaults to `DEBUG`.
1243            avoid_host_env (boolean, optional): if set to True, we will not use
1244                any environment variables set on the host except PATH.
1245                Defaults to False.
1246
1247        Returns:
1248            dict: environment variables names with their values.
1249        """
1250        if partial_env is None:
1251            if self.env is not None:
1252                return self.env
1253            partial_env = self.config.get('env', None)
1254            if partial_env is None:
1255                partial_env = {}
1256            if set_self_env is None:
1257                set_self_env = True
1258
1259        env = {'PATH': os.environ['PATH']} if avoid_host_env else os.environ.copy()
1260
1261        default_replace_dict = self.query_abs_dirs()
1262        default_replace_dict['PATH'] = os.environ['PATH']
1263        if not replace_dict:
1264            replace_dict = default_replace_dict
1265        else:
1266            for key in default_replace_dict:
1267                if key not in replace_dict:
1268                    replace_dict[key] = default_replace_dict[key]
1269        for key in partial_env.keys():
1270            env[key] = partial_env[key] % replace_dict
1271            self.log("ENV: %s is now %s" % (key, env[key]), level=log_level)
1272        for k in purge_env:
1273            if k in env:
1274                del env[k]
1275        if os.name == 'nt':
1276            pref_encoding = locale.getpreferredencoding()
1277            for k, v in env.iteritems():
1278                # When run locally on Windows machines, some environment
1279                # variables may be unicode.
1280                if isinstance(v, unicode):
1281                    env[k] = v.encode(pref_encoding)
1282        if set_self_env:
1283            self.env = env
1284        return env
1285
1286    def query_exe(self, exe_name, exe_dict='exes', default=None,
1287                  return_type=None, error_level=FATAL):
1288        """One way to work around PATH rewrites.
1289
1290        By default, return exe_name, and we'll fall through to searching
1291        os.environ["PATH"].
1292        However, if self.config[exe_dict][exe_name] exists, return that.
1293        This lets us override exe paths via config file.
1294
1295        If we need runtime setting, we can build in self.exes support later.
1296
1297        Args:
1298            exe_name (str): name of the executable to search for.
1299            exe_dict(str, optional): name of the dictionary of executables
1300              present in `self.config`. Defaults to `exes`.
1301            default (str, optional): default name of the executable to search
1302              for. Defaults to `exe_name`.
1303            return_type (str, optional): type to which the original return
1304              value will be turn into. Only 'list', 'string' and `None` are
1305              supported. Defaults to `None`.
1306            error_level (str, optional): log level name to use on error.
1307
1308        Returns:
1309            list: in case return_type is 'list'
1310            str: in case return_type is 'string'
1311            None: in case return_type is `None`
1312            Any: if the found executable is not of type list, tuple nor str.
1313        """
1314        if default is None:
1315            default = exe_name
1316        exe = self.config.get(exe_dict, {}).get(exe_name, default)
1317        repl_dict = {}
1318        if hasattr(self.script_obj, 'query_abs_dirs'):
1319            # allow for 'make': '%(abs_work_dir)s/...' etc.
1320            dirs = self.script_obj.query_abs_dirs()
1321            repl_dict.update(dirs)
1322        if isinstance(exe, dict):
1323            found = False
1324            # allow for searchable paths of the exe
1325            for name, path in exe.iteritems():
1326                if isinstance(path, list) or isinstance(path, tuple):
1327                    path = [x % repl_dict for x in path]
1328                    if all([os.path.exists(section) for section in path]):
1329                        found = True
1330                elif isinstance(path, str):
1331                    path = path % repl_dict
1332                    if os.path.exists(path):
1333                        found = True
1334                else:
1335                    self.log("a exes %s dict's value is not a string, list, or tuple. Got key "
1336                             "%s and value %s" % (exe_name, name, str(path)), level=error_level)
1337                if found:
1338                    exe = path
1339                    break
1340            else:
1341                self.log("query_exe was a searchable dict but an existing "
1342                         "path could not be determined. Tried searching in "
1343                         "paths: %s" % (str(exe)), level=error_level)
1344                return None
1345        elif isinstance(exe, list) or isinstance(exe, tuple):
1346            exe = [x % repl_dict for x in exe]
1347        elif isinstance(exe, str):
1348            exe = exe % repl_dict
1349        else:
1350            self.log("query_exe: %s is not a list, tuple, dict, or string: "
1351                     "%s!" % (exe_name, str(exe)), level=error_level)
1352            return exe
1353        if return_type == "list":
1354            if isinstance(exe, str):
1355                exe = [exe]
1356        elif return_type == "string":
1357            if isinstance(exe, list):
1358                exe = subprocess.list2cmdline(exe)
1359        elif return_type is not None:
1360            self.log(
1361                "Unknown return_type type %s requested in query_exe!"
1362                % return_type, level=error_level)
1363        return exe
1364
1365    def run_command(self, command, cwd=None, error_list=None,
1366                    halt_on_failure=False, success_codes=None,
1367                    env=None, partial_env=None, return_type='status',
1368                    throw_exception=False, output_parser=None,
1369                    output_timeout=None, fatal_exit_code=2,
1370                    error_level=ERROR, **kwargs):
1371        """Run a command, with logging and error parsing.
1372        TODO: context_lines
1373
1374        error_list example:
1375        [{'regex': re.compile('^Error: LOL J/K'), level=IGNORE},
1376         {'regex': re.compile('^Error:'), level=ERROR, contextLines='5:5'},
1377         {'substr': 'THE WORLD IS ENDING', level=FATAL, contextLines='20:'}
1378        ]
1379        (context_lines isn't written yet)
1380
1381        Args:
1382            command (str | list | tuple): command or sequence of commands to
1383              execute and log.
1384            cwd (str, optional): directory path from where to execute the
1385              command. Defaults to `None`.
1386            error_list (list, optional): list of errors to pass to
1387              `mozharness.base.log.OutputParser`. Defaults to `None`.
1388            halt_on_failure (bool, optional): whether or not to redefine the
1389              log level as `FATAL` on errors. Defaults to False.
1390            success_codes (int, optional): numeric value to compare against
1391              the command return value.
1392            env (dict, optional): key-value of environment values to use to
1393              run the command. Defaults to None.
1394            partial_env (dict, optional): key-value of environment values to
1395              replace from the current environment values. Defaults to None.
1396            return_type (str, optional): if equal to 'num_errors' then the
1397              amount of errors matched by `error_list` is returned. Defaults
1398              to 'status'.
1399            throw_exception (bool, optional): whether or not to raise an
1400              exception if the return value of the command doesn't match
1401              any of the `success_codes`. Defaults to False.
1402            output_parser (OutputParser, optional): lets you provide an
1403              instance of your own OutputParser subclass. Defaults to `OutputParser`.
1404            output_timeout (int): amount of seconds to wait for output before
1405              the process is killed.
1406            fatal_exit_code (int, optional): call `self.fatal` if the return value
1407              of the command is not in `success_codes`. Defaults to 2.
1408            error_level (str, optional): log level name to use on error. Defaults
1409              to `ERROR`.
1410            **kwargs: Arbitrary keyword arguments.
1411
1412        Returns:
1413            int: -1 on error.
1414            Any: `command` return value is returned otherwise.
1415        """
1416        if success_codes is None:
1417            success_codes = [0]
1418        if cwd is not None:
1419            if not os.path.isdir(cwd):
1420                level = error_level
1421                if halt_on_failure:
1422                    level = FATAL
1423                self.log("Can't run command %s in non-existent directory '%s'!" %
1424                         (command, cwd), level=level)
1425                return -1
1426            self.info("Running command: %s in %s" % (command, cwd))
1427        else:
1428            self.info("Running command: %s" % command)
1429        if isinstance(command, list) or isinstance(command, tuple):
1430            self.info("Copy/paste: %s" % subprocess.list2cmdline(command))
1431        shell = True
1432        if isinstance(command, list) or isinstance(command, tuple):
1433            shell = False
1434        if env is None:
1435            if partial_env:
1436                self.info("Using partial env: %s" % pprint.pformat(partial_env))
1437                env = self.query_env(partial_env=partial_env)
1438        else:
1439            if hasattr(self, 'previous_env') and env == self.previous_env:
1440                self.info("Using env: (same as previous command)")
1441            else:
1442                self.info("Using env: %s" % pprint.pformat(env))
1443                self.previous_env = env
1444
1445        if output_parser is None:
1446            parser = OutputParser(config=self.config, log_obj=self.log_obj,
1447                                  error_list=error_list)
1448        else:
1449            parser = output_parser
1450
1451        try:
1452            if output_timeout:
1453                def processOutput(line):
1454                    parser.add_lines(line)
1455
1456                def onTimeout():
1457                    self.info(
1458                        "Automation Error: mozprocess timed out after "
1459                        "%s seconds running %s" % (str(output_timeout), str(command)))
1460
1461                p = ProcessHandler(command,
1462                                   shell=shell,
1463                                   env=env,
1464                                   cwd=cwd,
1465                                   storeOutput=False,
1466                                   onTimeout=(onTimeout,),
1467                                   processOutputLine=[processOutput])
1468                self.info("Calling %s with output_timeout %d" % (command, output_timeout))
1469                p.run(outputTimeout=output_timeout)
1470                p.wait()
1471                if p.timedOut:
1472                    self.log(
1473                        'timed out after %s seconds of no output' % output_timeout,
1474                        level=error_level
1475                    )
1476                returncode = int(p.proc.returncode)
1477            else:
1478                p = subprocess.Popen(command, shell=shell, stdout=subprocess.PIPE,
1479                                     cwd=cwd, stderr=subprocess.STDOUT, env=env,
1480                                     bufsize=0)
1481                loop = True
1482                while loop:
1483                    if p.poll() is not None:
1484                        """Avoid losing the final lines of the log?"""
1485                        loop = False
1486                    while True:
1487                        line = p.stdout.readline()
1488                        if not line:
1489                            break
1490                        parser.add_lines(line)
1491                returncode = p.returncode
1492        except KeyboardInterrupt:
1493            level = error_level
1494            if halt_on_failure:
1495                level = FATAL
1496            self.log("Process interrupted by the user, killing process with pid %s" % p.pid,
1497                     level=level)
1498            p.kill()
1499            return -1
1500        except OSError as e:
1501            level = error_level
1502            if halt_on_failure:
1503                level = FATAL
1504            self.log('caught OS error %s: %s while running %s' % (e.errno,
1505                     e.strerror, command), level=level)
1506            return -1
1507
1508        return_level = INFO
1509        if returncode not in success_codes:
1510            return_level = error_level
1511            if throw_exception:
1512                raise subprocess.CalledProcessError(returncode, command)
1513        self.log("Return code: %d" % returncode, level=return_level)
1514
1515        if halt_on_failure:
1516            _fail = False
1517            if returncode not in success_codes:
1518                self.log(
1519                    "%s not in success codes: %s" % (returncode, success_codes),
1520                    level=error_level
1521                )
1522                _fail = True
1523            if parser.num_errors:
1524                self.log("failures found while parsing output", level=error_level)
1525                _fail = True
1526            if _fail:
1527                self.return_code = fatal_exit_code
1528                self.fatal("Halting on failure while running %s" % command,
1529                           exit_code=fatal_exit_code)
1530        if return_type == 'num_errors':
1531            return parser.num_errors
1532        return returncode
1533
1534    def get_output_from_command(self, command, cwd=None,
1535                                halt_on_failure=False, env=None,
1536                                silent=False, log_level=INFO,
1537                                tmpfile_base_path='tmpfile',
1538                                return_type='output', save_tmpfiles=False,
1539                                throw_exception=False, fatal_exit_code=2,
1540                                ignore_errors=False, success_codes=None):
1541        """Similar to run_command, but where run_command is an
1542        os.system(command) analog, get_output_from_command is a `command`
1543        analog.
1544
1545        Less error checking by design, though if we figure out how to
1546        do it without borking the output, great.
1547
1548        TODO: binary mode? silent is kinda like that.
1549        TODO: since p.wait() can take a long time, optionally log something
1550        every N seconds?
1551        TODO: optionally only keep the first or last (N) line(s) of output?
1552        TODO: optionally only return the tmp_stdout_filename?
1553
1554        ignore_errors=True is for the case where a command might produce standard
1555        error output, but you don't particularly care; setting to True will
1556        cause standard error to be logged at DEBUG rather than ERROR
1557
1558        Args:
1559            command (str | list): command or list of commands to
1560              execute and log.
1561            cwd (str, optional): directory path from where to execute the
1562              command. Defaults to `None`.
1563            halt_on_failure (bool, optional): whether or not to redefine the
1564              log level as `FATAL` on error. Defaults to False.
1565            env (dict, optional): key-value of environment values to use to
1566              run the command. Defaults to None.
1567            silent (bool, optional): whether or not to output the stdout of
1568              executing the command. Defaults to False.
1569            log_level (str, optional): log level name to use on normal execution.
1570              Defaults to `INFO`.
1571            tmpfile_base_path (str, optional): base path of the file to which
1572              the output will be writen to. Defaults to 'tmpfile'.
1573            return_type (str, optional): if equal to 'output' then the complete
1574              output of the executed command is returned, otherwise the written
1575              filenames are returned. Defaults to 'output'.
1576            save_tmpfiles (bool, optional): whether or not to save the temporary
1577              files created from the command output. Defaults to False.
1578            throw_exception (bool, optional): whether or not to raise an
1579              exception if the return value of the command is not zero.
1580              Defaults to False.
1581            fatal_exit_code (int, optional): call self.fatal if the return value
1582              of the command match this value.
1583            ignore_errors (bool, optional): whether or not to change the log
1584              level to `ERROR` for the output of stderr. Defaults to False.
1585            success_codes (int, optional): numeric value to compare against
1586              the command return value.
1587
1588        Returns:
1589            None: if the cwd is not a directory.
1590            None: on IOError.
1591            tuple: stdout and stderr filenames.
1592            str: stdout output.
1593        """
1594        if cwd:
1595            if not os.path.isdir(cwd):
1596                level = ERROR
1597                if halt_on_failure:
1598                    level = FATAL
1599                self.log("Can't run command %s in non-existent directory %s!" %
1600                         (command, cwd), level=level)
1601                return None
1602            self.info("Getting output from command: %s in %s" % (command, cwd))
1603        else:
1604            self.info("Getting output from command: %s" % command)
1605        if isinstance(command, list):
1606            self.info("Copy/paste: %s" % subprocess.list2cmdline(command))
1607        # This could potentially return something?
1608        tmp_stdout = None
1609        tmp_stderr = None
1610        tmp_stdout_filename = '%s_stdout' % tmpfile_base_path
1611        tmp_stderr_filename = '%s_stderr' % tmpfile_base_path
1612        if success_codes is None:
1613            success_codes = [0]
1614
1615        # TODO probably some more elegant solution than 2 similar passes
1616        try:
1617            tmp_stdout = open(tmp_stdout_filename, 'w')
1618        except IOError:
1619            level = ERROR
1620            if halt_on_failure:
1621                level = FATAL
1622            self.log("Can't open %s for writing!" % tmp_stdout_filename +
1623                     self.exception(), level=level)
1624            return None
1625        try:
1626            tmp_stderr = open(tmp_stderr_filename, 'w')
1627        except IOError:
1628            level = ERROR
1629            if halt_on_failure:
1630                level = FATAL
1631            self.log("Can't open %s for writing!" % tmp_stderr_filename +
1632                     self.exception(), level=level)
1633            return None
1634        shell = True
1635        if isinstance(command, list):
1636            shell = False
1637
1638        p = subprocess.Popen(command, shell=shell, stdout=tmp_stdout,
1639                             cwd=cwd, stderr=tmp_stderr, env=env, bufsize=0)
1640        # XXX: changed from self.debug to self.log due to this error:
1641        #      TypeError: debug() takes exactly 1 argument (2 given)
1642        self.log(
1643            "Temporary files: %s and %s"
1644            % (tmp_stdout_filename, tmp_stderr_filename), level=DEBUG)
1645        p.wait()
1646        tmp_stdout.close()
1647        tmp_stderr.close()
1648        return_level = DEBUG
1649        output = None
1650        if return_type == 'output' or not silent:
1651            if os.path.exists(tmp_stdout_filename) and os.path.getsize(tmp_stdout_filename):
1652                output = self.read_from_file(tmp_stdout_filename,
1653                                             verbose=False)
1654                if not silent:
1655                    self.log("Output received:", level=log_level)
1656                    output_lines = output.rstrip().splitlines()
1657                    for line in output_lines:
1658                        if not line or line.isspace():
1659                            continue
1660                        if isinstance(line, binary_type):
1661                            line = line.decode("utf-8")
1662                        self.log(' %s' % line, level=log_level)
1663                    output = '\n'.join(output_lines)
1664        if os.path.exists(tmp_stderr_filename) and os.path.getsize(tmp_stderr_filename):
1665            if not ignore_errors:
1666                return_level = ERROR
1667            self.log("Errors received:", level=return_level)
1668            errors = self.read_from_file(tmp_stderr_filename,
1669                                         verbose=False)
1670            for line in errors.rstrip().splitlines():
1671                if not line or line.isspace():
1672                    continue
1673                if isinstance(line, binary_type):
1674                    line = line.decode("utf-8")
1675                self.log(' %s' % line, level=return_level)
1676        elif p.returncode not in success_codes and not ignore_errors:
1677            return_level = ERROR
1678        # Clean up.
1679        if not save_tmpfiles:
1680            self.rmtree(tmp_stderr_filename, log_level=DEBUG)
1681            self.rmtree(tmp_stdout_filename, log_level=DEBUG)
1682        if p.returncode and throw_exception:
1683            raise subprocess.CalledProcessError(p.returncode, command)
1684        self.log("Return code: %d" % p.returncode, level=return_level)
1685        if halt_on_failure and return_level == ERROR:
1686            self.return_code = fatal_exit_code
1687            self.fatal("Halting on failure while running %s" % command,
1688                       exit_code=fatal_exit_code)
1689        # Hm, options on how to return this? I bet often we'll want
1690        # output_lines[0] with no newline.
1691        if return_type != 'output':
1692            return (tmp_stdout_filename, tmp_stderr_filename)
1693        else:
1694            return output
1695
1696    def _touch_file(self, file_name, times=None, error_level=FATAL):
1697        """touch a file.
1698
1699        Args:
1700            file_name (str): name of the file to touch.
1701            times (tuple, optional): 2-tuple as specified by `os.utime`_
1702              Defaults to None.
1703            error_level (str, optional): log level name in case of error.
1704              Defaults to `FATAL`.
1705
1706        .. _`os.utime`:
1707           https://docs.python.org/3.4/library/os.html?highlight=os.utime#os.utime
1708        """
1709        self.info("Touching: %s" % file_name)
1710        try:
1711            os.utime(file_name, times)
1712        except OSError:
1713            try:
1714                open(file_name, 'w').close()
1715            except IOError as e:
1716                msg = "I/O error(%s): %s" % (e.errno, e.strerror)
1717                self.log(msg, error_level=error_level)
1718        os.utime(file_name, times)
1719
1720    def unpack(self, filename, extract_to, extract_dirs=None,
1721               error_level=ERROR, fatal_exit_code=2, verbose=False):
1722        """The method allows to extract a file regardless of its extension.
1723
1724        Args:
1725            filename (str): filename of the compressed file.
1726            extract_to (str): where to extract the compressed file.
1727            extract_dirs (list, optional): directories inside the archive file to extract.
1728                                           Defaults to `None`.
1729            fatal_exit_code (int, optional): call `self.fatal` if the return value
1730              of the command is not in `success_codes`. Defaults to 2.
1731            verbose (bool, optional): whether or not extracted content should be displayed.
1732                                      Defaults to False.
1733
1734        Raises:
1735            IOError: on `filename` file not found.
1736
1737        """
1738        if not os.path.isfile(filename):
1739            raise IOError('Could not find file to extract: %s' % filename)
1740
1741        if zipfile.is_zipfile(filename):
1742            try:
1743                self.info('Using ZipFile to extract {} to {}'.format(filename, extract_to))
1744                with zipfile.ZipFile(filename) as bundle:
1745                    for entry in self._filter_entries(bundle.namelist(), extract_dirs):
1746                        if verbose:
1747                            self.info(' %s' % entry)
1748                        bundle.extract(entry, path=extract_to)
1749
1750                        # ZipFile doesn't preserve permissions during extraction:
1751                        # http://bugs.python.org/issue15795
1752                        fname = os.path.realpath(os.path.join(extract_to, entry))
1753                        mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
1754                        # Only set permissions if attributes are available. Otherwise all
1755                        # permissions will be removed eg. on Windows.
1756                        if mode:
1757                            os.chmod(fname, mode)
1758            except zipfile.BadZipfile as e:
1759                self.log('%s (%s)' % (e.message, filename),
1760                         level=error_level, exit_code=fatal_exit_code)
1761
1762        # Bug 1211882 - is_tarfile cannot be trusted for dmg files
1763        elif tarfile.is_tarfile(filename) and not filename.lower().endswith('.dmg'):
1764            try:
1765                self.info('Using TarFile to extract {} to {}'.format(filename, extract_to))
1766                with tarfile.open(filename) as bundle:
1767                    for entry in self._filter_entries(bundle.getnames(), extract_dirs):
1768                        if verbose:
1769                            self.info(' %s' % entry)
1770                        bundle.extract(entry, path=extract_to)
1771            except tarfile.TarError as e:
1772                self.log('%s (%s)' % (e.message, filename),
1773                         level=error_level, exit_code=fatal_exit_code)
1774        else:
1775            self.log('No extraction method found for: %s' % filename,
1776                     level=error_level, exit_code=fatal_exit_code)
1777
1778    def is_taskcluster(self):
1779        """Returns boolean indicating if we're running in TaskCluster."""
1780        # This may need expanding in the future to work on
1781        return 'TASKCLUSTER_WORKER_TYPE' in os.environ
1782
1783
1784def PreScriptRun(func):
1785    """Decorator for methods that will be called before script execution.
1786
1787    Each method on a BaseScript having this decorator will be called at the
1788    beginning of BaseScript.run().
1789
1790    The return value is ignored. Exceptions will abort execution.
1791    """
1792    func._pre_run_listener = True
1793    return func
1794
1795
1796def PostScriptRun(func):
1797    """Decorator for methods that will be called after script execution.
1798
1799    This is similar to PreScriptRun except it is called at the end of
1800    execution. The method will always be fired, even if execution fails.
1801    """
1802    func._post_run_listener = True
1803    return func
1804
1805
1806def PreScriptAction(action=None):
1807    """Decorator for methods that will be called at the beginning of each action.
1808
1809    Each method on a BaseScript having this decorator will be called during
1810    BaseScript.run() before an individual action is executed. The method will
1811    receive the action's name as an argument.
1812
1813    If no values are passed to the decorator, it will be applied to every
1814    action. If a string is passed, the decorated function will only be called
1815    for the action of that name.
1816
1817    The return value of the method is ignored. Exceptions will abort execution.
1818    """
1819    def _wrapped(func):
1820        func._pre_action_listener = action
1821        return func
1822
1823    def _wrapped_none(func):
1824        func._pre_action_listener = None
1825        return func
1826
1827    if type(action) == type(_wrapped):
1828        return _wrapped_none(action)
1829
1830    return _wrapped
1831
1832
1833def PostScriptAction(action=None):
1834    """Decorator for methods that will be called at the end of each action.
1835
1836    This behaves similarly to PreScriptAction. It varies in that it is called
1837    after execution of the action.
1838
1839    The decorated method will receive the action name as a positional argument.
1840    It will then receive the following named arguments:
1841
1842        success - Bool indicating whether the action finished successfully.
1843
1844    The decorated method will always be called, even if the action threw an
1845    exception.
1846
1847    The return value is ignored.
1848    """
1849    def _wrapped(func):
1850        func._post_action_listener = action
1851        return func
1852
1853    def _wrapped_none(func):
1854        func._post_action_listener = None
1855        return func
1856
1857    if type(action) == type(_wrapped):
1858        return _wrapped_none(action)
1859
1860    return _wrapped
1861
1862
1863# BaseScript {{{1
1864class BaseScript(ScriptMixin, LogMixin, object):
1865    def __init__(self, config_options=None, ConfigClass=BaseConfig,
1866                 default_log_level="info", **kwargs):
1867        self._return_code = 0
1868        super(BaseScript, self).__init__()
1869
1870        # Collect decorated methods. We simply iterate over the attributes of
1871        # the current class instance and look for signatures deposited by
1872        # the decorators.
1873        self._listeners = dict(
1874            pre_run=[],
1875            pre_action=[],
1876            post_action=[],
1877            post_run=[],
1878        )
1879        for k in dir(self):
1880            item = getattr(self, k)
1881
1882            # We only decorate methods, so ignore other types.
1883            if not inspect.ismethod(item):
1884                continue
1885
1886            if hasattr(item, '_pre_run_listener'):
1887                self._listeners['pre_run'].append(k)
1888
1889            if hasattr(item, '_pre_action_listener'):
1890                self._listeners['pre_action'].append((
1891                    k,
1892                    item._pre_action_listener))
1893
1894            if hasattr(item, '_post_action_listener'):
1895                self._listeners['post_action'].append((
1896                    k,
1897                    item._post_action_listener))
1898
1899            if hasattr(item, '_post_run_listener'):
1900                self._listeners['post_run'].append(k)
1901
1902        self.log_obj = None
1903        self.abs_dirs = None
1904        if config_options is None:
1905            config_options = []
1906        self.summary_list = []
1907        self.failures = []
1908        rw_config = ConfigClass(config_options=config_options, **kwargs)
1909        self.config = rw_config.get_read_only_config()
1910        self.actions = tuple(rw_config.actions)
1911        self.all_actions = tuple(rw_config.all_actions)
1912        self.env = None
1913        self.new_log_obj(default_log_level=default_log_level)
1914        self.script_obj = self
1915
1916        # Indicate we're a source checkout if VCS directory is present at the
1917        # appropriate place. This code will break if this file is ever moved
1918        # to another directory.
1919        self.topsrcdir = None
1920
1921        srcreldir = 'testing/mozharness/mozharness/base'
1922        here = os.path.normpath(os.path.dirname(__file__))
1923        if here.replace('\\', '/').endswith(srcreldir):
1924            topsrcdir = os.path.normpath(os.path.join(here, '..', '..',
1925                                                      '..', '..'))
1926            hg_dir = os.path.join(topsrcdir, '.hg')
1927            git_dir = os.path.join(topsrcdir, '.git')
1928            if os.path.isdir(hg_dir) or os.path.isdir(git_dir):
1929                self.topsrcdir = topsrcdir
1930
1931        # Set self.config to read-only.
1932        #
1933        # We can create intermediate config info programmatically from
1934        # this in a repeatable way, with logs; this is how we straddle the
1935        # ideal-but-not-user-friendly static config and the
1936        # easy-to-write-hard-to-debug writable config.
1937        #
1938        # To allow for other, script-specific configurations
1939        # (e.g., props json parsing), before locking,
1940        # call self._pre_config_lock().  If needed, this method can
1941        # alter self.config.
1942        self._pre_config_lock(rw_config)
1943        self._config_lock()
1944
1945        self.info("Run as %s" % rw_config.command_line)
1946        if self.config.get("dump_config_hierarchy"):
1947            # we only wish to dump and display what self.config is made up of,
1948            # against the current script + args, without actually running any
1949            # actions
1950            self._dump_config_hierarchy(rw_config.all_cfg_files_and_dicts)
1951        if self.config.get("dump_config"):
1952            self.dump_config(exit_on_finish=True)
1953
1954    def _dump_config_hierarchy(self, cfg_files):
1955        """ interpret each config file used.
1956
1957        This will show which keys/values are being added or overwritten by
1958        other config files depending on their hierarchy (when they were added).
1959        """
1960        # go through each config_file. We will start with the lowest and
1961        # print its keys/values that are being used in self.config. If any
1962        # keys/values are present in a config file with a higher precedence,
1963        # ignore those.
1964        dirs = self.query_abs_dirs()
1965        cfg_files_dump_config = {}  # we will dump this to file
1966        # keep track of keys that did not come from a config file
1967        keys_not_from_file = set(self.config.keys())
1968        if not cfg_files:
1969            cfg_files = []
1970        self.info("Total config files: %d" % (len(cfg_files)))
1971        if len(cfg_files):
1972            self.info("cfg files used from lowest precedence to highest:")
1973        for i, (target_file, target_dict) in enumerate(cfg_files):
1974            unique_keys = set(target_dict.keys())
1975            unique_dict = {}
1976            # iterate through the target_dicts remaining 'higher' cfg_files
1977            remaining_cfgs = cfg_files[slice(i + 1, len(cfg_files))]
1978            # where higher == more precedent
1979            for ii, (higher_file, higher_dict) in enumerate(remaining_cfgs):
1980                # now only keep keys/values that are not overwritten by a
1981                # higher config
1982                unique_keys = unique_keys.difference(set(higher_dict.keys()))
1983            # unique_dict we know now has only keys/values that are unique to
1984            # this config file.
1985            unique_dict = dict(
1986                (key, target_dict.get(key)) for key in unique_keys
1987            )
1988            cfg_files_dump_config[target_file] = unique_dict
1989            self.action_message("Config File %d: %s" % (i + 1, target_file))
1990            self.info(pprint.pformat(unique_dict))
1991            # let's also find out which keys/values from self.config are not
1992            # from each target config file dict
1993            keys_not_from_file = keys_not_from_file.difference(
1994                set(target_dict.keys())
1995            )
1996        not_from_file_dict = dict(
1997            (key, self.config.get(key)) for key in keys_not_from_file
1998        )
1999        cfg_files_dump_config["not_from_cfg_file"] = not_from_file_dict
2000        self.action_message("Not from any config file (default_config, "
2001                            "cmd line options, etc)")
2002        self.info(pprint.pformat(not_from_file_dict))
2003
2004        # finally, let's dump this output as JSON and exit early
2005        self.dump_config(
2006            os.path.join(dirs['abs_log_dir'], "localconfigfiles.json"),
2007            cfg_files_dump_config, console_output=False, exit_on_finish=True
2008        )
2009
2010    def _pre_config_lock(self, rw_config):
2011        """This empty method can allow for config checking and manipulation
2012        before the config lock, when overridden in scripts.
2013        """
2014        pass
2015
2016    def _config_lock(self):
2017        """After this point, the config is locked and should not be
2018        manipulated (based on mozharness.base.config.ReadOnlyDict)
2019        """
2020        self.config.lock()
2021
2022    def _possibly_run_method(self, method_name, error_if_missing=False):
2023        """This is here for run().
2024        """
2025        if hasattr(self, method_name) and callable(getattr(self, method_name)):
2026            return getattr(self, method_name)()
2027        elif error_if_missing:
2028            self.error("No such method %s!" % method_name)
2029
2030    def run_action(self, action):
2031        if action not in self.actions:
2032            self.action_message("Skipping %s step." % action)
2033            return
2034
2035        method_name = action.replace("-", "_")
2036        self.action_message("Running %s step." % action)
2037
2038        # An exception during a pre action listener should abort execution.
2039        for fn, target in self._listeners['pre_action']:
2040            if target is not None and target != action:
2041                continue
2042
2043            try:
2044                self.info("Running pre-action listener: %s" % fn)
2045                method = getattr(self, fn)
2046                method(action)
2047            except Exception:
2048                self.error("Exception during pre-action for %s: %s" % (
2049                    action, traceback.format_exc()))
2050
2051                for fn, target in self._listeners['post_action']:
2052                    if target is not None and target != action:
2053                        continue
2054
2055                    try:
2056                        self.info("Running post-action listener: %s" % fn)
2057                        method = getattr(self, fn)
2058                        method(action, success=False)
2059                    except Exception:
2060                        self.error("An additional exception occurred during "
2061                                   "post-action for %s: %s"
2062                                   % (action, traceback.format_exc()))
2063
2064                self.fatal("Aborting due to exception in pre-action listener.")
2065
2066        # We always run post action listeners, even if the main routine failed.
2067        success = False
2068        try:
2069            self.info("Running main action method: %s" % method_name)
2070            self._possibly_run_method("preflight_%s" % method_name)
2071            self._possibly_run_method(method_name, error_if_missing=True)
2072            self._possibly_run_method("postflight_%s" % method_name)
2073            success = True
2074        finally:
2075            post_success = True
2076            for fn, target in self._listeners['post_action']:
2077                if target is not None and target != action:
2078                    continue
2079
2080                try:
2081                    self.info("Running post-action listener: %s" % fn)
2082                    method = getattr(self, fn)
2083                    method(action, success=success and self.return_code == 0)
2084                except Exception:
2085                    post_success = False
2086                    self.error("Exception during post-action for %s: %s" % (
2087                        action, traceback.format_exc()))
2088
2089            step_result = 'success' if success else 'failed'
2090            self.action_message("Finished %s step (%s)" % (action, step_result))
2091
2092            if not post_success:
2093                self.fatal("Aborting due to failure in post-action listener.")
2094
2095    def run(self):
2096        """Default run method.
2097        This is the "do everything" method, based on actions and all_actions.
2098
2099        First run self.dump_config() if it exists.
2100        Second, go through the list of all_actions.
2101        If they're in the list of self.actions, try to run
2102        self.preflight_ACTION(), self.ACTION(), and self.postflight_ACTION().
2103
2104        Preflight is sanity checking before doing anything time consuming or
2105        destructive.
2106
2107        Postflight is quick testing for success after an action.
2108
2109        """
2110        for fn in self._listeners['pre_run']:
2111            try:
2112                self.info("Running pre-run listener: %s" % fn)
2113                method = getattr(self, fn)
2114                method()
2115            except Exception:
2116                self.error("Exception during pre-run listener: %s" %
2117                           traceback.format_exc())
2118
2119                for fn in self._listeners['post_run']:
2120                    try:
2121                        method = getattr(self, fn)
2122                        method()
2123                    except Exception:
2124                        self.error("An additional exception occurred during a "
2125                                   "post-run listener: %s" % traceback.format_exc())
2126
2127                self.fatal("Aborting due to failure in pre-run listener.")
2128
2129        self.dump_config()
2130        try:
2131            for action in self.all_actions:
2132                self.run_action(action)
2133        except Exception:
2134            self.fatal("Uncaught exception: %s" % traceback.format_exc())
2135        finally:
2136            post_success = True
2137            for fn in self._listeners['post_run']:
2138                try:
2139                    self.info("Running post-run listener: %s" % fn)
2140                    method = getattr(self, fn)
2141                    method()
2142                except Exception:
2143                    post_success = False
2144                    self.error("Exception during post-run listener: %s" %
2145                               traceback.format_exc())
2146
2147            if not post_success:
2148                self.fatal("Aborting due to failure in post-run listener.")
2149
2150        return self.return_code
2151
2152    def run_and_exit(self):
2153        """Runs the script and exits the current interpreter."""
2154        rc = self.run()
2155        if rc != 0:
2156            self.warning("returning nonzero exit status %d" % rc)
2157        sys.exit(rc)
2158
2159    def clobber(self):
2160        """
2161        Delete the working directory
2162        """
2163        dirs = self.query_abs_dirs()
2164        self.rmtree(dirs['abs_work_dir'], error_level=FATAL)
2165
2166    def query_abs_dirs(self):
2167        """We want to be able to determine where all the important things
2168        are.  Absolute paths lend themselves well to this, though I wouldn't
2169        be surprised if this causes some issues somewhere.
2170
2171        This should be overridden in any script that has additional dirs
2172        to query.
2173
2174        The query_* methods tend to set self.VAR variables as their
2175        runtime cache.
2176        """
2177        if self.abs_dirs:
2178            return self.abs_dirs
2179        c = self.config
2180        dirs = {}
2181        dirs['base_work_dir'] = c['base_work_dir']
2182        dirs['abs_work_dir'] = os.path.join(c['base_work_dir'], c['work_dir'])
2183        dirs['abs_log_dir'] = os.path.join(c['base_work_dir'], c.get('log_dir', 'logs'))
2184        if 'GECKO_PATH' in os.environ:
2185            dirs['abs_src_dir'] = os.environ['GECKO_PATH']
2186        self.abs_dirs = dirs
2187        return self.abs_dirs
2188
2189    def dump_config(self, file_path=None, config=None,
2190                    console_output=True, exit_on_finish=False):
2191        """Dump self.config to localconfig.json
2192        """
2193        config = config or self.config
2194        dirs = self.query_abs_dirs()
2195        if not file_path:
2196            file_path = os.path.join(dirs['abs_log_dir'], "localconfig.json")
2197        self.info("Dumping config to %s." % file_path)
2198        self.mkdir_p(os.path.dirname(file_path))
2199        json_config = json.dumps(config, sort_keys=True, indent=4)
2200        fh = codecs.open(file_path, encoding='utf-8', mode='w+')
2201        fh.write(json_config)
2202        fh.close()
2203        if console_output:
2204            self.info(pprint.pformat(config))
2205        if exit_on_finish:
2206            sys.exit()
2207
2208    # logging {{{2
2209    def new_log_obj(self, default_log_level="info"):
2210        c = self.config
2211        log_dir = os.path.join(c['base_work_dir'], c.get('log_dir', 'logs'))
2212        log_config = {
2213            "logger_name": 'Simple',
2214            "log_name": 'log',
2215            "log_dir": log_dir,
2216            "log_level": default_log_level,
2217            "log_format": '%(asctime)s %(levelname)8s - %(message)s',
2218            "log_to_console": True,
2219            "append_to_log": False,
2220        }
2221        log_type = self.config.get("log_type", "console")
2222        for key in log_config.keys():
2223            value = self.config.get(key, None)
2224            if value is not None:
2225                log_config[key] = value
2226        if log_type == "multi":
2227            self.log_obj = MultiFileLogger(**log_config)
2228        elif log_type == "simple":
2229            self.log_obj = SimpleFileLogger(**log_config)
2230        else:
2231            self.log_obj = ConsoleLogger(**log_config)
2232
2233    def action_message(self, message):
2234        self.info("[mozharness: %sZ] %s" % (
2235            datetime.datetime.utcnow().isoformat(' '), message))
2236
2237    def summary(self):
2238        """Print out all the summary lines added via add_summary()
2239        throughout the script.
2240
2241        I'd like to revisit how to do this in a prettier fashion.
2242        """
2243        self.action_message("%s summary:" % self.__class__.__name__)
2244        if self.summary_list:
2245            for item in self.summary_list:
2246                try:
2247                    self.log(item['message'], level=item['level'])
2248                except ValueError:
2249                    """log is closed; print as a default. Ran into this
2250                    when calling from __del__()"""
2251                    print("### Log is closed! (%s)" % item['message'])
2252
2253    def add_summary(self, message, level=INFO):
2254        self.summary_list.append({'message': message, 'level': level})
2255        # TODO write to a summary-only log?
2256        # Summaries need a lot more love.
2257        self.log(message, level=level)
2258
2259    def summarize_success_count(self, success_count, total_count,
2260                                message="%d of %d successful.",
2261                                level=None):
2262        if level is None:
2263            level = INFO
2264            if success_count < total_count:
2265                level = ERROR
2266        self.add_summary(message % (success_count, total_count),
2267                         level=level)
2268
2269    def get_hash_for_file(self, file_path, hash_type="sha512"):
2270        bs = 65536
2271        hasher = hashlib.new(hash_type)
2272        with open(file_path, 'rb') as fh:
2273            buf = fh.read(bs)
2274            while len(buf) > 0:
2275                hasher.update(buf)
2276                buf = fh.read(bs)
2277        return hasher.hexdigest()
2278
2279    @property
2280    def return_code(self):
2281        return self._return_code
2282
2283    @return_code.setter
2284    def return_code(self, code):
2285        old_return_code, self._return_code = self._return_code, code
2286        if old_return_code != code:
2287            self.warning("setting return code to %d" % code)
2288