1# ***** BEGIN LICENSE BLOCK ***** 2# This Source Code Form is subject to the terms of the Mozilla Public 3# License, v. 2.0. If a copy of the MPL was not distributed with this file, 4# You can obtain one at http://mozilla.org/MPL/2.0/. 5# ***** END LICENSE BLOCK ***** 6"""Generic script objects. 7 8script.py, along with config.py and log.py, represents the core of 9mozharness. 10""" 11 12from __future__ import absolute_import, print_function 13 14import codecs 15import datetime 16import errno 17import fnmatch 18import functools 19import gzip 20import hashlib 21import inspect 22import itertools 23import os 24import platform 25import pprint 26import re 27import shutil 28import socket 29import ssl 30import subprocess 31import sys 32import tarfile 33import time 34import traceback 35import zipfile 36import zlib 37from contextlib import contextmanager 38from io import BytesIO 39 40import six 41from six import binary_type 42 43from mozprocess import ProcessHandler 44 45import mozinfo 46from mozharness.base.config import BaseConfig 47from mozharness.base.log import ( 48 DEBUG, 49 ERROR, 50 FATAL, 51 INFO, 52 WARNING, 53 ConsoleLogger, 54 LogMixin, 55 MultiFileLogger, 56 OutputParser, 57 SimpleFileLogger, 58) 59 60try: 61 import httplib 62except ImportError: 63 import http.client as httplib 64try: 65 import simplejson as json 66except ImportError: 67 import json 68try: 69 from urllib2 import quote, urlopen, Request 70except ImportError: 71 from urllib.request import quote, urlopen, Request 72try: 73 import urlparse 74except ImportError: 75 import urllib.parse as urlparse 76if os.name == "nt": 77 import locale 78 79 try: 80 import win32file 81 import win32api 82 83 PYWIN32 = True 84 except ImportError: 85 PYWIN32 = False 86 87try: 88 from urllib2 import HTTPError, URLError 89except ImportError: 90 from urllib.error import HTTPError, URLError 91 92 93class ContentLengthMismatch(Exception): 94 pass 95 96 97def platform_name(): 98 pm = PlatformMixin() 99 100 if pm._is_linux() and pm._is_64_bit(): 101 return "linux64" 102 elif pm._is_linux() and not pm._is_64_bit(): 103 return "linux" 104 elif pm._is_darwin(): 105 return "macosx" 106 elif pm._is_windows() and pm._is_64_bit(): 107 return "win64" 108 elif pm._is_windows() and not pm._is_64_bit(): 109 return "win32" 110 else: 111 return None 112 113 114class PlatformMixin(object): 115 def _is_windows(self): 116 """check if the current operating system is Windows. 117 118 Returns: 119 bool: True if the current platform is Windows, False otherwise 120 """ 121 system = platform.system() 122 if system in ("Windows", "Microsoft"): 123 return True 124 if system.startswith("CYGWIN"): 125 return True 126 if os.name == "nt": 127 return True 128 129 def _is_darwin(self): 130 """check if the current operating system is Darwin. 131 132 Returns: 133 bool: True if the current platform is Darwin, False otherwise 134 """ 135 if platform.system() in ("Darwin"): 136 return True 137 if sys.platform.startswith("darwin"): 138 return True 139 140 def _is_linux(self): 141 """check if the current operating system is a Linux distribution. 142 143 Returns: 144 bool: True if the current platform is a Linux distro, False otherwise 145 """ 146 if platform.system() in ("Linux"): 147 return True 148 if sys.platform.startswith("linux"): 149 return True 150 151 def _is_debian(self): 152 """check if the current operating system is explicitly Debian. 153 This intentionally doesn't count Debian derivatives like Ubuntu. 154 155 Returns: 156 bool: True if the current platform is debian, False otherwise 157 """ 158 if not self._is_linux(): 159 return False 160 self.info(mozinfo.linux_distro) 161 re_debian_distro = re.compile("debian") 162 return re_debian_distro.match(mozinfo.linux_distro) is not None 163 164 def _is_redhat_based(self): 165 """check if the current operating system is a Redhat derived Linux distribution. 166 167 Returns: 168 bool: True if the current platform is a Redhat Linux distro, False otherwise 169 """ 170 if not self._is_linux(): 171 return False 172 re_redhat_distro = re.compile("Redhat|Fedora|CentOS|Oracle") 173 return re_redhat_distro.match(mozinfo.linux_distro) is not None 174 175 def _is_64_bit(self): 176 if self._is_darwin(): 177 # osx is a special snowflake and to ensure the arch, it is better to use the following 178 return ( 179 sys.maxsize > 2 ** 32 180 ) # context: https://docs.python.org/2/library/platform.html 181 else: 182 # Using machine() gives you the architecture of the host rather 183 # than the build type of the Python binary 184 return "64" in platform.machine() 185 186 187# ScriptMixin {{{1 188class ScriptMixin(PlatformMixin): 189 """This mixin contains simple filesystem commands and the like. 190 191 It also contains some very special but very complex methods that, 192 together with logging and config, provide the base for all scripts 193 in this harness. 194 195 WARNING !!! 196 This class depends entirely on `LogMixin` methods in such a way that it will 197 only works if a class inherits from both `ScriptMixin` and `LogMixin` 198 simultaneously. 199 200 Depends on self.config of some sort. 201 202 Attributes: 203 env (dict): a mapping object representing the string environment. 204 script_obj (ScriptMixin): reference to a ScriptMixin instance. 205 """ 206 207 env = None 208 script_obj = None 209 ssl_context = None 210 211 def query_filesize(self, file_path): 212 self.info("Determining filesize for %s" % file_path) 213 length = os.path.getsize(file_path) 214 self.info(" %s" % str(length)) 215 return length 216 217 # TODO this should be parallelized with the to-be-written BaseHelper! 218 def query_sha512sum(self, file_path): 219 self.info("Determining sha512sum for %s" % file_path) 220 m = hashlib.sha512() 221 contents = self.read_from_file(file_path, verbose=False, open_mode="rb") 222 m.update(contents) 223 sha512 = m.hexdigest() 224 self.info(" %s" % sha512) 225 return sha512 226 227 def platform_name(self): 228 """Return the platform name on which the script is running on. 229 Returns: 230 None: for failure to determine the platform. 231 str: The name of the platform (e.g. linux64) 232 """ 233 return platform_name() 234 235 # Simple filesystem commands {{{2 236 def mkdir_p(self, path, error_level=ERROR): 237 """Create a directory if it doesn't exists. 238 This method also logs the creation, error or current existence of the 239 directory to be created. 240 241 Args: 242 path (str): path of the directory to be created. 243 error_level (str): log level name to be used in case of error. 244 245 Returns: 246 None: for sucess. 247 int: -1 on error 248 """ 249 250 if not os.path.exists(path): 251 self.info("mkdir: %s" % path) 252 try: 253 os.makedirs(path) 254 except OSError: 255 self.log("Can't create directory %s!" % path, level=error_level) 256 return -1 257 else: 258 self.debug("mkdir_p: %s Already exists." % path) 259 260 def rmtree(self, path, log_level=INFO, error_level=ERROR, exit_code=-1): 261 """Delete an entire directory tree and log its result. 262 This method also logs the platform rmtree function, its retries, errors, 263 and current existence of the directory. 264 265 Args: 266 path (str): path to the directory tree root to remove. 267 log_level (str, optional): log level name to for this operation. Defaults 268 to `INFO`. 269 error_level (str, optional): log level name to use in case of error. 270 Defaults to `ERROR`. 271 exit_code (int, optional): useless parameter, not use here. 272 Defaults to -1 273 274 Returns: 275 None: for success 276 """ 277 278 self.log("rmtree: %s" % path, level=log_level) 279 error_message = "Unable to remove %s!" % path 280 if self._is_windows(): 281 # Call _rmtree_windows() directly, since even checking 282 # os.path.exists(path) will hang if path is longer than MAX_PATH. 283 self.info("Using _rmtree_windows ...") 284 return self.retry( 285 self._rmtree_windows, 286 error_level=error_level, 287 error_message=error_message, 288 args=(path,), 289 log_level=log_level, 290 ) 291 if os.path.exists(path): 292 if os.path.isdir(path): 293 return self.retry( 294 shutil.rmtree, 295 error_level=error_level, 296 error_message=error_message, 297 retry_exceptions=(OSError,), 298 args=(path,), 299 log_level=log_level, 300 ) 301 else: 302 return self.retry( 303 os.remove, 304 error_level=error_level, 305 error_message=error_message, 306 retry_exceptions=(OSError,), 307 args=(path,), 308 log_level=log_level, 309 ) 310 else: 311 self.debug("%s doesn't exist." % path) 312 313 def query_msys_path(self, path): 314 """replaces the Windows harddrive letter path style with a linux 315 path style, e.g. C:// --> /C/ 316 Note: method, not used in any script. 317 318 Args: 319 path (str?): path to convert to the linux path style. 320 Returns: 321 str: in case `path` is a string. The result is the path with the new notation. 322 type(path): `path` itself is returned in case `path` is not str type. 323 """ 324 if not isinstance(path, six.string_types): 325 return path 326 path = path.replace("\\", "/") 327 328 def repl(m): 329 return "/%s/" % m.group(1) 330 331 path = re.sub(r"""^([a-zA-Z]):/""", repl, path) 332 return path 333 334 def _rmtree_windows(self, path): 335 """Windows-specific rmtree that handles path lengths longer than MAX_PATH. 336 Ported from clobberer.py. 337 338 Args: 339 path (str): directory path to remove. 340 341 Returns: 342 None: if the path doesn't exists. 343 int: the return number of calling `self.run_command` 344 int: in case the path specified is not a directory but a file. 345 0 on success, non-zero on error. Note: The returned value 346 is the result of calling `win32file.DeleteFile` 347 """ 348 349 assert self._is_windows() 350 path = os.path.realpath(path) 351 full_path = "\\\\?\\" + path 352 if not os.path.exists(full_path): 353 return 354 if not PYWIN32: 355 if not os.path.isdir(path): 356 return self.run_command('del /F /Q "%s"' % path) 357 else: 358 return self.run_command('rmdir /S /Q "%s"' % path) 359 # Make sure directory is writable 360 win32file.SetFileAttributesW("\\\\?\\" + path, win32file.FILE_ATTRIBUTE_NORMAL) 361 # Since we call rmtree() with a file, sometimes 362 if not os.path.isdir("\\\\?\\" + path): 363 return win32file.DeleteFile("\\\\?\\" + path) 364 365 for ffrec in win32api.FindFiles("\\\\?\\" + path + "\\*.*"): 366 file_attr = ffrec[0] 367 name = ffrec[8] 368 if name == "." or name == "..": 369 continue 370 full_name = os.path.join(path, name) 371 372 if file_attr & win32file.FILE_ATTRIBUTE_DIRECTORY: 373 self._rmtree_windows(full_name) 374 else: 375 try: 376 win32file.SetFileAttributesW( 377 "\\\\?\\" + full_name, win32file.FILE_ATTRIBUTE_NORMAL 378 ) 379 win32file.DeleteFile("\\\\?\\" + full_name) 380 except Exception: 381 # DeleteFile fails on long paths, del /f /q works just fine 382 self.run_command('del /F /Q "%s"' % full_name) 383 384 win32file.RemoveDirectory("\\\\?\\" + path) 385 386 def get_filename_from_url(self, url): 387 """parse a filename base on an url. 388 389 Args: 390 url (str): url to parse for the filename 391 392 Returns: 393 str: filename parsed from the url, or `netloc` network location part 394 of the url. 395 """ 396 397 parsed = urlparse.urlsplit(url.rstrip("/")) 398 if parsed.path != "": 399 return parsed.path.rsplit("/", 1)[-1] 400 else: 401 return parsed.netloc 402 403 def _urlopen(self, url, **kwargs): 404 """open the url `url` using `urllib2`.` 405 This method can be overwritten to extend its complexity 406 407 Args: 408 url (str | urllib.request.Request): url to open 409 kwargs: Arbitrary keyword arguments passed to the `urllib.request.urlopen` function. 410 411 Returns: 412 file-like: file-like object with additional methods as defined in 413 `urllib.request.urlopen`_. 414 None: None may be returned if no handler handles the request. 415 416 Raises: 417 urllib2.URLError: on errors 418 419 .. urillib.request.urlopen: 420 https://docs.python.org/2/library/urllib2.html#urllib2.urlopen 421 """ 422 # http://bugs.python.org/issue13359 - urllib2 does not automatically quote the URL 423 url_quoted = quote(url, safe="%/:=&?~#+!$,;'@()*[]|") 424 # windows certificates need to be refreshed (https://bugs.python.org/issue36011) 425 if self.platform_name() in ("win64",) and platform.architecture()[0] in ( 426 "x64", 427 ): 428 if self.ssl_context is None: 429 self.ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS) 430 self.ssl_context.load_default_certs() 431 return urlopen(url_quoted, context=self.ssl_context, **kwargs) 432 else: 433 return urlopen(url_quoted, **kwargs) 434 435 def fetch_url_into_memory(self, url): 436 """Downloads a file from a url into memory instead of disk. 437 438 Args: 439 url (str): URL path where the file to be downloaded is located. 440 441 Raises: 442 IOError: When the url points to a file on disk and cannot be found 443 ContentLengthMismatch: When the length of the retrieved content does not match the 444 Content-Length response header. 445 ValueError: When the scheme of a url is not what is expected. 446 447 Returns: 448 BytesIO: contents of url 449 """ 450 self.info("Fetch {} into memory".format(url)) 451 parsed_url = urlparse.urlparse(url) 452 453 if parsed_url.scheme in ("", "file"): 454 path = parsed_url.path 455 if not os.path.isfile(path): 456 raise IOError("Could not find file to extract: {}".format(url)) 457 458 content_length = os.stat(path).st_size 459 460 # In case we're referrencing a file without file:// 461 if parsed_url.scheme == "": 462 url = "file://%s" % os.path.abspath(url) 463 parsed_url = urlparse.urlparse(url) 464 465 request = Request(url) 466 # When calling fetch_url_into_memory() you should retry when we raise 467 # one of these exceptions: 468 # * Bug 1300663 - HTTPError: HTTP Error 404: Not Found 469 # * Bug 1300413 - HTTPError: HTTP Error 500: Internal Server Error 470 # * Bug 1300943 - HTTPError: HTTP Error 503: Service Unavailable 471 # * Bug 1300953 - URLError: <urlopen error [Errno -2] Name or service not known> 472 # * Bug 1301594 - URLError: <urlopen error [Errno 10054] An existing connection was ... 473 # * Bug 1301597 - URLError: <urlopen error [Errno 8] _ssl.c:504: EOF occurred in ... 474 # * Bug 1301855 - URLError: <urlopen error [Errno 60] Operation timed out> 475 # * Bug 1302237 - URLError: <urlopen error [Errno 104] Connection reset by peer> 476 # * Bug 1301807 - BadStatusLine: '' 477 # 478 # Bug 1309912 - Adding timeout in hopes to solve blocking on response.read() (bug 1300413) 479 response = urlopen(request, timeout=30) 480 481 if parsed_url.scheme in ("http", "https"): 482 content_length = int(response.headers.get("Content-Length")) 483 484 response_body = response.read() 485 response_body_size = len(response_body) 486 487 self.info("Content-Length response header: {}".format(content_length)) 488 self.info("Bytes received: {}".format(response_body_size)) 489 490 if response_body_size != content_length: 491 raise ContentLengthMismatch( 492 "The retrieved Content-Length header declares a body length " 493 "of {} bytes, while we actually retrieved {} bytes".format( 494 content_length, response_body_size 495 ) 496 ) 497 498 if response.info().get("Content-Encoding") == "gzip": 499 self.info('Content-Encoding is "gzip", so decompressing response body') 500 # See http://www.zlib.net/manual.html#Advanced 501 # section "ZEXTERN int ZEXPORT inflateInit2 OF....": 502 # Add 32 to windowBits to enable zlib and gzip decoding with automatic 503 # header detection, or add 16 to decode only the gzip format (the zlib 504 # format will return a Z_DATA_ERROR). 505 # Adding 16 since we only wish to support gzip encoding. 506 file_contents = zlib.decompress(response_body, zlib.MAX_WBITS | 16) 507 else: 508 file_contents = response_body 509 510 # Use BytesIO instead of StringIO 511 # http://stackoverflow.com/questions/34162017/unzip-buffer-with-python/34162395#34162395 512 return BytesIO(file_contents) 513 514 def _download_file(self, url, file_name): 515 """Helper function for download_file() 516 Additionaly this function logs all exceptions as warnings before 517 re-raising them 518 519 Args: 520 url (str): string containing the URL with the file location 521 file_name (str): name of the file where the downloaded file 522 is written. 523 524 Returns: 525 str: filename of the written file on disk 526 527 Raises: 528 urllib2.URLError: on incomplete download. 529 urllib2.HTTPError: on Http error code 530 socket.timeout: on connection timeout 531 socket.error: on socket error 532 """ 533 # If our URLs look like files, prefix them with file:// so they can 534 # be loaded like URLs. 535 if not (url.startswith("http") or url.startswith("file://")): 536 if not os.path.isfile(url): 537 self.fatal("The file %s does not exist" % url) 538 url = "file://%s" % os.path.abspath(url) 539 540 try: 541 f_length = None 542 f = self._urlopen(url, timeout=30) 543 544 if f.info().get("content-length") is not None: 545 f_length = int(f.info()["content-length"]) 546 got_length = 0 547 if f.info().get("Content-Encoding") == "gzip": 548 # Note, we'll download the full compressed content into its own 549 # file, since that allows the gzip library to seek through it. 550 # Once downloaded, we'll decompress it into the real target 551 # file, and delete the compressed version. 552 local_file = open(file_name + ".gz", "wb") 553 else: 554 local_file = open(file_name, "wb") 555 while True: 556 block = f.read(1024 ** 2) 557 if not block: 558 if f_length is not None and got_length != f_length: 559 raise URLError( 560 "Download incomplete; content-length was %d, " 561 "but only received %d" % (f_length, got_length) 562 ) 563 break 564 local_file.write(block) 565 if f_length is not None: 566 got_length += len(block) 567 local_file.close() 568 if f.info().get("Content-Encoding") == "gzip": 569 # Decompress file into target location, then remove compressed version 570 with open(file_name, "wb") as f_out: 571 # On some execution paths, this could be called with python 2.6 572 # whereby gzip.open(...) cannot be used with a 'with' statement. 573 # So let's do this the python 2.6 way... 574 try: 575 f_in = gzip.open(file_name + ".gz", "rb") 576 shutil.copyfileobj(f_in, f_out) 577 finally: 578 f_in.close() 579 os.remove(file_name + ".gz") 580 return file_name 581 except HTTPError as e: 582 self.warning( 583 "Server returned status %s %s for %s" % (str(e.code), str(e), url) 584 ) 585 raise 586 except URLError as e: 587 self.warning("URL Error: %s" % url) 588 589 # Failures due to missing local files won't benefit from retry. 590 # Raise the original OSError. 591 if isinstance(e.args[0], OSError) and e.args[0].errno == errno.ENOENT: 592 raise e.args[0] 593 594 raise 595 except socket.timeout as e: 596 self.warning("Timed out accessing %s: %s" % (url, str(e))) 597 raise 598 except socket.error as e: 599 self.warning("Socket error when accessing %s: %s" % (url, str(e))) 600 raise 601 602 def _retry_download(self, url, error_level, file_name=None, retry_config=None): 603 """Helper method to retry download methods. 604 605 This method calls `self.retry` on `self._download_file` using the passed 606 parameters if a file_name is specified. If no file is specified, we will 607 instead call `self._urlopen`, which grabs the contents of a url but does 608 not create a file on disk. 609 610 Args: 611 url (str): URL path where the file is located. 612 file_name (str): file_name where the file will be written to. 613 error_level (str): log level to use in case an error occurs. 614 retry_config (dict, optional): key-value pairs to be passed to 615 `self.retry`. Defaults to `None` 616 617 Returns: 618 str: `self._download_file` return value is returned 619 unknown: `self.retry` `failure_status` is returned on failure, which 620 defaults to -1 621 """ 622 retry_args = dict( 623 failure_status=None, 624 retry_exceptions=( 625 HTTPError, 626 URLError, 627 httplib.BadStatusLine, 628 socket.timeout, 629 socket.error, 630 ), 631 error_message="Can't download from %s to %s!" % (url, file_name), 632 error_level=error_level, 633 ) 634 635 if retry_config: 636 retry_args.update(retry_config) 637 638 download_func = self._urlopen 639 kwargs = {"url": url} 640 if file_name: 641 download_func = self._download_file 642 kwargs = {"url": url, "file_name": file_name} 643 644 return self.retry(download_func, kwargs=kwargs, **retry_args) 645 646 def _filter_entries(self, namelist, extract_dirs): 647 """Filter entries of the archive based on the specified list of to extract dirs.""" 648 filter_partial = functools.partial(fnmatch.filter, namelist) 649 entries = itertools.chain(*map(filter_partial, extract_dirs or ["*"])) 650 651 for entry in entries: 652 yield entry 653 654 def unzip(self, compressed_file, extract_to, extract_dirs="*", verbose=False): 655 """This method allows to extract a zip file without writing to disk first. 656 657 Args: 658 compressed_file (object): File-like object with the contents of a compressed zip file. 659 extract_to (str): where to extract the compressed file. 660 extract_dirs (list, optional): directories inside the archive file to extract. 661 Defaults to '*'. 662 verbose (bool, optional): whether or not extracted content should be displayed. 663 Defaults to False. 664 665 Raises: 666 zipfile.BadZipfile: on contents of zipfile being invalid 667 """ 668 with zipfile.ZipFile(compressed_file) as bundle: 669 entries = self._filter_entries(bundle.namelist(), extract_dirs) 670 671 for entry in entries: 672 if verbose: 673 self.info(" {}".format(entry)) 674 675 # Exception to be retried: 676 # Bug 1301645 - BadZipfile: Bad CRC-32 for file ... 677 # http://stackoverflow.com/questions/5624669/strange-badzipfile-bad-crc-32-problem/5626098#5626098 678 # Bug 1301802 - error: Error -3 while decompressing: invalid stored block lengths 679 bundle.extract(entry, path=extract_to) 680 681 # ZipFile doesn't preserve permissions during extraction: 682 # http://bugs.python.org/issue15795 683 fname = os.path.realpath(os.path.join(extract_to, entry)) 684 try: 685 # getinfo() can raise KeyError 686 mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF 687 # Only set permissions if attributes are available. Otherwise all 688 # permissions will be removed eg. on Windows. 689 if mode: 690 os.chmod(fname, mode) 691 692 except KeyError: 693 self.warning("{} was not found in the zip file".format(entry)) 694 695 def deflate(self, compressed_file, mode, extract_to=".", *args, **kwargs): 696 """This method allows to extract a compressed file from a tar, tar.bz2 and tar.gz files. 697 698 Args: 699 compressed_file (object): File-like object with the contents of a compressed file. 700 mode (str): string of the form 'filemode[:compression]' (e.g. 'r:gz' or 'r:bz2') 701 extract_to (str, optional): where to extract the compressed file. 702 """ 703 t = tarfile.open(fileobj=compressed_file, mode=mode) 704 t.extractall(path=extract_to) 705 706 def download_unpack(self, url, extract_to=".", extract_dirs="*", verbose=False): 707 """Generic method to download and extract a compressed file without writing it to disk first. 708 709 Args: 710 url (str): URL where the file to be downloaded is located. 711 extract_to (str, optional): directory where the downloaded file will 712 be extracted to. 713 extract_dirs (list, optional): directories inside the archive to extract. 714 Defaults to `*`. It currently only applies to zip files. 715 verbose (bool, optional): whether or not extracted content should be displayed. 716 Defaults to False. 717 718 """ 719 720 def _determine_extraction_method_and_kwargs(url): 721 EXTENSION_TO_MIMETYPE = { 722 "bz2": "application/x-bzip2", 723 "gz": "application/x-gzip", 724 "tar": "application/x-tar", 725 "zip": "application/zip", 726 } 727 MIMETYPES = { 728 "application/x-bzip2": { 729 "function": self.deflate, 730 "kwargs": {"mode": "r:bz2"}, 731 }, 732 "application/x-gzip": { 733 "function": self.deflate, 734 "kwargs": {"mode": "r:gz"}, 735 }, 736 "application/x-tar": { 737 "function": self.deflate, 738 "kwargs": {"mode": "r"}, 739 }, 740 "application/zip": { 741 "function": self.unzip, 742 }, 743 "application/x-zip-compressed": { 744 "function": self.unzip, 745 }, 746 } 747 748 filename = url.split("/")[-1] 749 # XXX: bz2/gz instead of tar.{bz2/gz} 750 extension = filename[filename.rfind(".") + 1 :] 751 mimetype = EXTENSION_TO_MIMETYPE[extension] 752 self.debug("Mimetype: {}".format(mimetype)) 753 754 function = MIMETYPES[mimetype]["function"] 755 kwargs = { 756 "compressed_file": compressed_file, 757 "extract_to": extract_to, 758 "extract_dirs": extract_dirs, 759 "verbose": verbose, 760 } 761 kwargs.update(MIMETYPES[mimetype].get("kwargs", {})) 762 763 return function, kwargs 764 765 # Many scripts overwrite this method and set extract_dirs to None 766 extract_dirs = "*" if extract_dirs is None else extract_dirs 767 self.info( 768 "Downloading and extracting to {} these dirs {} from {}".format( 769 extract_to, 770 ", ".join(extract_dirs), 771 url, 772 ) 773 ) 774 775 # 1) Let's fetch the file 776 retry_args = dict( 777 retry_exceptions=( 778 HTTPError, 779 URLError, 780 httplib.BadStatusLine, 781 socket.timeout, 782 socket.error, 783 ContentLengthMismatch, 784 ), 785 sleeptime=30, 786 attempts=5, 787 error_message="Can't download from {}".format(url), 788 error_level=FATAL, 789 ) 790 compressed_file = self.retry( 791 self.fetch_url_into_memory, kwargs={"url": url}, **retry_args 792 ) 793 794 # 2) We're guaranteed to have download the file with error_level=FATAL 795 # Let's unpack the file 796 function, kwargs = _determine_extraction_method_and_kwargs(url) 797 try: 798 function(**kwargs) 799 except zipfile.BadZipfile: 800 # Dump the exception and exit 801 self.exception(level=FATAL) 802 803 def load_json_url(self, url, error_level=None, *args, **kwargs): 804 """Returns a json object from a url (it retries).""" 805 contents = self._retry_download( 806 url=url, error_level=error_level, *args, **kwargs 807 ) 808 return json.loads(contents.read()) 809 810 # http://www.techniqal.com/blog/2008/07/31/python-file-read-write-with-urllib2/ 811 # TODO thinking about creating a transfer object. 812 def download_file( 813 self, 814 url, 815 file_name=None, 816 parent_dir=None, 817 create_parent_dir=True, 818 error_level=ERROR, 819 exit_code=3, 820 retry_config=None, 821 ): 822 """Python wget. 823 Download the filename at `url` into `file_name` and put it on `parent_dir`. 824 On error log with the specified `error_level`, on fatal exit with `exit_code`. 825 Execute all the above based on `retry_config` parameter. 826 827 Args: 828 url (str): URL path where the file to be downloaded is located. 829 file_name (str, optional): file_name where the file will be written to. 830 Defaults to urls' filename. 831 parent_dir (str, optional): directory where the downloaded file will 832 be written to. Defaults to current working 833 directory 834 create_parent_dir (bool, optional): create the parent directory if it 835 doesn't exist. Defaults to `True` 836 error_level (str, optional): log level to use in case an error occurs. 837 Defaults to `ERROR` 838 retry_config (dict, optional): key-value pairs to be passed to 839 `self.retry`. Defaults to `None` 840 841 Returns: 842 str: filename where the downloaded file was written to. 843 unknown: on failure, `failure_status` is returned. 844 """ 845 if not file_name: 846 try: 847 file_name = self.get_filename_from_url(url) 848 except AttributeError: 849 self.log( 850 "Unable to get filename from %s; bad url?" % url, 851 level=error_level, 852 exit_code=exit_code, 853 ) 854 return 855 if parent_dir: 856 file_name = os.path.join(parent_dir, file_name) 857 if create_parent_dir: 858 self.mkdir_p(parent_dir, error_level=error_level) 859 self.info("Downloading %s to %s" % (url, file_name)) 860 status = self._retry_download( 861 url=url, 862 error_level=error_level, 863 file_name=file_name, 864 retry_config=retry_config, 865 ) 866 if status == file_name: 867 self.info("Downloaded %d bytes." % os.path.getsize(file_name)) 868 return status 869 870 def move(self, src, dest, log_level=INFO, error_level=ERROR, exit_code=-1): 871 """recursively move a file or directory (src) to another location (dest). 872 873 Args: 874 src (str): file or directory path to move. 875 dest (str): file or directory path where to move the content to. 876 log_level (str): log level to use for normal operation. Defaults to 877 `INFO` 878 error_level (str): log level to use on error. Defaults to `ERROR` 879 880 Returns: 881 int: 0 on success. -1 on error. 882 """ 883 self.log("Moving %s to %s" % (src, dest), level=log_level) 884 try: 885 shutil.move(src, dest) 886 # http://docs.python.org/tutorial/errors.html 887 except IOError as e: 888 self.log("IO error: %s" % str(e), level=error_level, exit_code=exit_code) 889 return -1 890 except shutil.Error as e: 891 # ERROR level ends up reporting the failure to treeherder & 892 # pollutes the failure summary list. 893 self.log("shutil error: %s" % str(e), level=WARNING, exit_code=exit_code) 894 return -1 895 return 0 896 897 def chmod(self, path, mode): 898 """change `path` mode to `mode`. 899 900 Args: 901 path (str): path whose mode will be modified. 902 mode (hex): one of the values defined at `stat`_ 903 904 .. _stat: 905 https://docs.python.org/2/library/os.html#os.chmod 906 """ 907 908 self.info("Chmoding %s to %s" % (path, str(oct(mode)))) 909 os.chmod(path, mode) 910 911 def copyfile( 912 self, 913 src, 914 dest, 915 log_level=INFO, 916 error_level=ERROR, 917 copystat=False, 918 compress=False, 919 ): 920 """copy or compress `src` into `dest`. 921 922 Args: 923 src (str): filepath to copy. 924 dest (str): filepath where to move the content to. 925 log_level (str, optional): log level to use for normal operation. Defaults to 926 `INFO` 927 error_level (str, optional): log level to use on error. Defaults to `ERROR` 928 copystat (bool, optional): whether or not to copy the files metadata. 929 Defaults to `False`. 930 compress (bool, optional): whether or not to compress the destination file. 931 Defaults to `False`. 932 933 Returns: 934 int: -1 on error 935 None: on success 936 """ 937 938 if compress: 939 self.log("Compressing %s to %s" % (src, dest), level=log_level) 940 try: 941 infile = open(src, "rb") 942 outfile = gzip.open(dest, "wb") 943 outfile.writelines(infile) 944 outfile.close() 945 infile.close() 946 except IOError as e: 947 self.log( 948 "Can't compress %s to %s: %s!" % (src, dest, str(e)), 949 level=error_level, 950 ) 951 return -1 952 else: 953 self.log("Copying %s to %s" % (src, dest), level=log_level) 954 try: 955 shutil.copyfile(src, dest) 956 except (IOError, shutil.Error) as e: 957 self.log( 958 "Can't copy %s to %s: %s!" % (src, dest, str(e)), level=error_level 959 ) 960 return -1 961 962 if copystat: 963 try: 964 shutil.copystat(src, dest) 965 except (IOError, shutil.Error) as e: 966 self.log( 967 "Can't copy attributes of %s to %s: %s!" % (src, dest, str(e)), 968 level=error_level, 969 ) 970 return -1 971 972 def copytree( 973 self, src, dest, overwrite="no_overwrite", log_level=INFO, error_level=ERROR 974 ): 975 """An implementation of `shutil.copytree` that allows for `dest` to exist 976 and implements different overwrite levels: 977 - 'no_overwrite' will keep all(any) existing files in destination tree 978 - 'overwrite_if_exists' will only overwrite destination paths that have 979 the same path names relative to the root of the 980 src and destination tree 981 - 'clobber' will replace the whole destination tree(clobber) if it exists 982 983 Args: 984 src (str): directory path to move. 985 dest (str): directory path where to move the content to. 986 overwrite (str): string specifying the overwrite level. 987 log_level (str, optional): log level to use for normal operation. Defaults to 988 `INFO` 989 error_level (str, optional): log level to use on error. Defaults to `ERROR` 990 991 Returns: 992 int: -1 on error 993 None: on success 994 """ 995 996 self.info("copying tree: %s to %s" % (src, dest)) 997 try: 998 if overwrite == "clobber" or not os.path.exists(dest): 999 self.rmtree(dest) 1000 shutil.copytree(src, dest) 1001 elif overwrite == "no_overwrite" or overwrite == "overwrite_if_exists": 1002 files = os.listdir(src) 1003 for f in files: 1004 abs_src_f = os.path.join(src, f) 1005 abs_dest_f = os.path.join(dest, f) 1006 if not os.path.exists(abs_dest_f): 1007 if os.path.isdir(abs_src_f): 1008 self.mkdir_p(abs_dest_f) 1009 self.copytree(abs_src_f, abs_dest_f, overwrite="clobber") 1010 else: 1011 shutil.copy2(abs_src_f, abs_dest_f) 1012 elif overwrite == "no_overwrite": # destination path exists 1013 if os.path.isdir(abs_src_f) and os.path.isdir(abs_dest_f): 1014 self.copytree( 1015 abs_src_f, abs_dest_f, overwrite="no_overwrite" 1016 ) 1017 else: 1018 self.debug( 1019 "ignoring path: %s as destination: \ 1020 %s exists" 1021 % (abs_src_f, abs_dest_f) 1022 ) 1023 else: # overwrite == 'overwrite_if_exists' and destination exists 1024 self.debug("overwriting: %s with: %s" % (abs_dest_f, abs_src_f)) 1025 self.rmtree(abs_dest_f) 1026 1027 if os.path.isdir(abs_src_f): 1028 self.mkdir_p(abs_dest_f) 1029 self.copytree( 1030 abs_src_f, abs_dest_f, overwrite="overwrite_if_exists" 1031 ) 1032 else: 1033 shutil.copy2(abs_src_f, abs_dest_f) 1034 else: 1035 self.fatal( 1036 "%s is not a valid argument for param overwrite" % (overwrite) 1037 ) 1038 except (IOError, shutil.Error): 1039 self.exception( 1040 "There was an error while copying %s to %s!" % (src, dest), 1041 level=error_level, 1042 ) 1043 return -1 1044 1045 def write_to_file( 1046 self, 1047 file_path, 1048 contents, 1049 verbose=True, 1050 open_mode="w", 1051 create_parent_dir=False, 1052 error_level=ERROR, 1053 ): 1054 """Write `contents` to `file_path`, according to `open_mode`. 1055 1056 Args: 1057 file_path (str): filepath where the content will be written to. 1058 contents (str): content to write to the filepath. 1059 verbose (bool, optional): whether or not to log `contents` value. 1060 Defaults to `True` 1061 open_mode (str, optional): open mode to use for openning the file. 1062 Defaults to `w` 1063 create_parent_dir (bool, optional): whether or not to create the 1064 parent directory of `file_path` 1065 error_level (str, optional): log level to use on error. Defaults to `ERROR` 1066 1067 Returns: 1068 str: `file_path` on success 1069 None: on error. 1070 """ 1071 self.info("Writing to file %s" % file_path) 1072 if verbose: 1073 self.info("Contents:") 1074 for line in contents.splitlines(): 1075 self.info(" %s" % line) 1076 if create_parent_dir: 1077 parent_dir = os.path.dirname(file_path) 1078 self.mkdir_p(parent_dir, error_level=error_level) 1079 try: 1080 fh = open(file_path, open_mode) 1081 try: 1082 fh.write(contents) 1083 except UnicodeEncodeError: 1084 fh.write(contents.encode("utf-8", "replace")) 1085 fh.close() 1086 return file_path 1087 except IOError: 1088 self.log("%s can't be opened for writing!" % file_path, level=error_level) 1089 1090 @contextmanager 1091 def opened(self, file_path, verbose=True, open_mode="r", error_level=ERROR): 1092 """Create a context manager to use on a with statement. 1093 1094 Args: 1095 file_path (str): filepath of the file to open. 1096 verbose (bool, optional): useless parameter, not used here. 1097 Defaults to True. 1098 open_mode (str, optional): open mode to use for openning the file. 1099 Defaults to `r` 1100 error_level (str, optional): log level name to use on error. 1101 Defaults to `ERROR` 1102 1103 Yields: 1104 tuple: (file object, error) pair. In case of error `None` is yielded 1105 as file object, together with the corresponding error. 1106 If there is no error, `None` is returned as the error. 1107 """ 1108 # See opened_w_error in http://www.python.org/dev/peps/pep-0343/ 1109 self.info("Reading from file %s" % file_path) 1110 try: 1111 fh = open(file_path, open_mode) 1112 except IOError as err: 1113 self.log( 1114 "unable to open %s: %s" % (file_path, err.strerror), level=error_level 1115 ) 1116 yield None, err 1117 else: 1118 try: 1119 yield fh, None 1120 finally: 1121 fh.close() 1122 1123 def read_from_file(self, file_path, verbose=True, open_mode="r", error_level=ERROR): 1124 """Use `self.opened` context manager to open a file and read its 1125 content. 1126 1127 Args: 1128 file_path (str): filepath of the file to read. 1129 verbose (bool, optional): whether or not to log the file content. 1130 Defaults to True. 1131 open_mode (str, optional): open mode to use for openning the file. 1132 Defaults to `r` 1133 error_level (str, optional): log level name to use on error. 1134 Defaults to `ERROR` 1135 1136 Returns: 1137 None: on error. 1138 str: file content on success. 1139 """ 1140 with self.opened(file_path, verbose, open_mode, error_level) as (fh, err): 1141 if err: 1142 return None 1143 contents = fh.read() 1144 if verbose: 1145 self.info("Contents:") 1146 for line in contents.splitlines(): 1147 self.info(" %s" % line) 1148 return contents 1149 1150 def chdir(self, dir_name): 1151 self.log("Changing directory to %s." % dir_name) 1152 os.chdir(dir_name) 1153 1154 def is_exe(self, fpath): 1155 """ 1156 Determine if fpath is a file and if it is executable. 1157 """ 1158 return os.path.isfile(fpath) and os.access(fpath, os.X_OK) 1159 1160 def which(self, program): 1161 """OS independent implementation of Unix's which command 1162 1163 Args: 1164 program (str): name or path to the program whose executable is 1165 being searched. 1166 1167 Returns: 1168 None: if the executable was not found. 1169 str: filepath of the executable file. 1170 """ 1171 if self._is_windows() and not program.endswith(".exe"): 1172 program += ".exe" 1173 fpath, fname = os.path.split(program) 1174 if fpath: 1175 if self.is_exe(program): 1176 return program 1177 else: 1178 # If the exe file is defined in the configs let's use that 1179 exe = self.query_exe(program) 1180 if self.is_exe(exe): 1181 return exe 1182 1183 # If not defined, let's look for it in the $PATH 1184 env = self.query_env() 1185 for path in env["PATH"].split(os.pathsep): 1186 exe_file = os.path.join(path, program) 1187 if self.is_exe(exe_file): 1188 return exe_file 1189 return None 1190 1191 # More complex commands {{{2 1192 def retry( 1193 self, 1194 action, 1195 attempts=None, 1196 sleeptime=60, 1197 max_sleeptime=5 * 60, 1198 retry_exceptions=(Exception,), 1199 good_statuses=None, 1200 cleanup=None, 1201 error_level=ERROR, 1202 error_message="%(action)s failed after %(attempts)d tries!", 1203 failure_status=-1, 1204 log_level=INFO, 1205 args=(), 1206 kwargs={}, 1207 ): 1208 """generic retry command. Ported from `util.retry`_ 1209 1210 Args: 1211 action (func): callable object to retry. 1212 attempts (int, optinal): maximum number of times to call actions. 1213 Defaults to `self.config.get('global_retries', 5)` 1214 sleeptime (int, optional): number of seconds to wait between 1215 attempts. Defaults to 60 and doubles each retry attempt, to 1216 a maximum of `max_sleeptime' 1217 max_sleeptime (int, optional): maximum value of sleeptime. Defaults 1218 to 5 minutes 1219 retry_exceptions (tuple, optional): Exceptions that should be caught. 1220 If exceptions other than those listed in `retry_exceptions' are 1221 raised from `action', they will be raised immediately. Defaults 1222 to (Exception) 1223 good_statuses (object, optional): return values which, if specified, 1224 will result in retrying if the return value isn't listed. 1225 Defaults to `None`. 1226 cleanup (func, optional): If `cleanup' is provided and callable 1227 it will be called immediately after an Exception is caught. 1228 No arguments will be passed to it. If your cleanup function 1229 requires arguments it is recommended that you wrap it in an 1230 argumentless function. 1231 Defaults to `None`. 1232 error_level (str, optional): log level name in case of error. 1233 Defaults to `ERROR`. 1234 error_message (str, optional): string format to use in case 1235 none of the attempts success. Defaults to 1236 '%(action)s failed after %(attempts)d tries!' 1237 failure_status (int, optional): flag to return in case the retries 1238 were not successfull. Defaults to -1. 1239 log_level (str, optional): log level name to use for normal activity. 1240 Defaults to `INFO`. 1241 args (tuple, optional): positional arguments to pass onto `action`. 1242 kwargs (dict, optional): key-value arguments to pass onto `action`. 1243 1244 Returns: 1245 object: return value of `action`. 1246 int: failure status in case of failure retries. 1247 """ 1248 if not callable(action): 1249 self.fatal("retry() called with an uncallable method %s!" % action) 1250 if cleanup and not callable(cleanup): 1251 self.fatal("retry() called with an uncallable cleanup method %s!" % cleanup) 1252 if not attempts: 1253 attempts = self.config.get("global_retries", 5) 1254 if max_sleeptime < sleeptime: 1255 self.debug( 1256 "max_sleeptime %d less than sleeptime %d" % (max_sleeptime, sleeptime) 1257 ) 1258 n = 0 1259 while n <= attempts: 1260 retry = False 1261 n += 1 1262 try: 1263 self.log( 1264 "retry: Calling %s with args: %s, kwargs: %s, attempt #%d" 1265 % (action.__name__, str(args), str(kwargs), n), 1266 level=log_level, 1267 ) 1268 status = action(*args, **kwargs) 1269 if good_statuses and status not in good_statuses: 1270 retry = True 1271 except retry_exceptions as e: 1272 retry = True 1273 error_message = "%s\nCaught exception: %s" % (error_message, str(e)) 1274 self.log( 1275 "retry: attempt #%d caught %s exception: %s" 1276 % (n, type(e).__name__, str(e)), 1277 level=INFO, 1278 ) 1279 1280 if not retry: 1281 return status 1282 else: 1283 if cleanup: 1284 cleanup() 1285 if n == attempts: 1286 self.log( 1287 error_message % {"action": action, "attempts": n}, 1288 level=error_level, 1289 ) 1290 return failure_status 1291 if sleeptime > 0: 1292 self.log( 1293 "retry: Failed, sleeping %d seconds before retrying" 1294 % sleeptime, 1295 level=log_level, 1296 ) 1297 time.sleep(sleeptime) 1298 sleeptime = sleeptime * 2 1299 if sleeptime > max_sleeptime: 1300 sleeptime = max_sleeptime 1301 1302 def query_env( 1303 self, 1304 partial_env=None, 1305 replace_dict=None, 1306 purge_env=(), 1307 set_self_env=None, 1308 log_level=DEBUG, 1309 avoid_host_env=False, 1310 ): 1311 """Environment query/generation method. 1312 The default, self.query_env(), will look for self.config['env'] 1313 and replace any special strings in there ( %(PATH)s ). 1314 It will then store it as self.env for speeding things up later. 1315 1316 If you specify partial_env, partial_env will be used instead of 1317 self.config['env'], and we don't save self.env as it's a one-off. 1318 1319 1320 Args: 1321 partial_env (dict, optional): key-value pairs of the name and value 1322 of different environment variables. Defaults to an empty dictionary. 1323 replace_dict (dict, optional): key-value pairs to replace the old 1324 environment variables. 1325 purge_env (list): environment names to delete from the final 1326 environment dictionary. 1327 set_self_env (boolean, optional): whether or not the environment 1328 variables dictionary should be copied to `self`. 1329 Defaults to True. 1330 log_level (str, optional): log level name to use on normal operation. 1331 Defaults to `DEBUG`. 1332 avoid_host_env (boolean, optional): if set to True, we will not use 1333 any environment variables set on the host except PATH. 1334 Defaults to False. 1335 1336 Returns: 1337 dict: environment variables names with their values. 1338 """ 1339 if partial_env is None: 1340 if self.env is not None: 1341 return self.env 1342 partial_env = self.config.get("env", None) 1343 if partial_env is None: 1344 partial_env = {} 1345 if set_self_env is None: 1346 set_self_env = True 1347 1348 env = {"PATH": os.environ["PATH"]} if avoid_host_env else os.environ.copy() 1349 1350 default_replace_dict = self.query_abs_dirs() 1351 default_replace_dict["PATH"] = os.environ["PATH"] 1352 if not replace_dict: 1353 replace_dict = default_replace_dict 1354 else: 1355 for key in default_replace_dict: 1356 if key not in replace_dict: 1357 replace_dict[key] = default_replace_dict[key] 1358 for key in partial_env.keys(): 1359 env[key] = partial_env[key] % replace_dict 1360 self.log("ENV: %s is now %s" % (key, env[key]), level=log_level) 1361 for k in purge_env: 1362 if k in env: 1363 del env[k] 1364 if os.name == "nt": 1365 pref_encoding = locale.getpreferredencoding() 1366 for k, v in six.iteritems(env): 1367 # When run locally on Windows machines, some environment 1368 # variables may be unicode. 1369 env[k] = six.ensure_str(v, pref_encoding) 1370 if set_self_env: 1371 self.env = env 1372 return env 1373 1374 def query_exe( 1375 self, 1376 exe_name, 1377 exe_dict="exes", 1378 default=None, 1379 return_type=None, 1380 error_level=FATAL, 1381 ): 1382 """One way to work around PATH rewrites. 1383 1384 By default, return exe_name, and we'll fall through to searching 1385 os.environ["PATH"]. 1386 However, if self.config[exe_dict][exe_name] exists, return that. 1387 This lets us override exe paths via config file. 1388 1389 If we need runtime setting, we can build in self.exes support later. 1390 1391 Args: 1392 exe_name (str): name of the executable to search for. 1393 exe_dict(str, optional): name of the dictionary of executables 1394 present in `self.config`. Defaults to `exes`. 1395 default (str, optional): default name of the executable to search 1396 for. Defaults to `exe_name`. 1397 return_type (str, optional): type to which the original return 1398 value will be turn into. Only 'list', 'string' and `None` are 1399 supported. Defaults to `None`. 1400 error_level (str, optional): log level name to use on error. 1401 1402 Returns: 1403 list: in case return_type is 'list' 1404 str: in case return_type is 'string' 1405 None: in case return_type is `None` 1406 Any: if the found executable is not of type list, tuple nor str. 1407 """ 1408 if default is None: 1409 default = exe_name 1410 exe = self.config.get(exe_dict, {}).get(exe_name, default) 1411 repl_dict = {} 1412 if hasattr(self.script_obj, "query_abs_dirs"): 1413 # allow for 'make': '%(abs_work_dir)s/...' etc. 1414 dirs = self.script_obj.query_abs_dirs() 1415 repl_dict.update(dirs) 1416 if isinstance(exe, dict): 1417 found = False 1418 # allow for searchable paths of the exe 1419 for name, path in six.iteritems(exe): 1420 if isinstance(path, list) or isinstance(path, tuple): 1421 path = [x % repl_dict for x in path] 1422 if all([os.path.exists(section) for section in path]): 1423 found = True 1424 elif isinstance(path, str): 1425 path = path % repl_dict 1426 if os.path.exists(path): 1427 found = True 1428 else: 1429 self.log( 1430 "a exes %s dict's value is not a string, list, or tuple. Got key " 1431 "%s and value %s" % (exe_name, name, str(path)), 1432 level=error_level, 1433 ) 1434 if found: 1435 exe = path 1436 break 1437 else: 1438 self.log( 1439 "query_exe was a searchable dict but an existing " 1440 "path could not be determined. Tried searching in " 1441 "paths: %s" % (str(exe)), 1442 level=error_level, 1443 ) 1444 return None 1445 elif isinstance(exe, list) or isinstance(exe, tuple): 1446 exe = [x % repl_dict for x in exe] 1447 elif isinstance(exe, str): 1448 exe = exe % repl_dict 1449 else: 1450 self.log( 1451 "query_exe: %s is not a list, tuple, dict, or string: " 1452 "%s!" % (exe_name, str(exe)), 1453 level=error_level, 1454 ) 1455 return exe 1456 if return_type == "list": 1457 if isinstance(exe, str): 1458 exe = [exe] 1459 elif return_type == "string": 1460 if isinstance(exe, list): 1461 exe = subprocess.list2cmdline(exe) 1462 elif return_type is not None: 1463 self.log( 1464 "Unknown return_type type %s requested in query_exe!" % return_type, 1465 level=error_level, 1466 ) 1467 return exe 1468 1469 def run_command( 1470 self, 1471 command, 1472 cwd=None, 1473 error_list=None, 1474 halt_on_failure=False, 1475 success_codes=None, 1476 env=None, 1477 partial_env=None, 1478 return_type="status", 1479 throw_exception=False, 1480 output_parser=None, 1481 output_timeout=None, 1482 fatal_exit_code=2, 1483 error_level=ERROR, 1484 **kwargs 1485 ): 1486 """Run a command, with logging and error parsing. 1487 TODO: context_lines 1488 1489 error_list example: 1490 [{'regex': re.compile('^Error: LOL J/K'), level=IGNORE}, 1491 {'regex': re.compile('^Error:'), level=ERROR, contextLines='5:5'}, 1492 {'substr': 'THE WORLD IS ENDING', level=FATAL, contextLines='20:'} 1493 ] 1494 (context_lines isn't written yet) 1495 1496 Args: 1497 command (str | list | tuple): command or sequence of commands to 1498 execute and log. 1499 cwd (str, optional): directory path from where to execute the 1500 command. Defaults to `None`. 1501 error_list (list, optional): list of errors to pass to 1502 `mozharness.base.log.OutputParser`. Defaults to `None`. 1503 halt_on_failure (bool, optional): whether or not to redefine the 1504 log level as `FATAL` on errors. Defaults to False. 1505 success_codes (int, optional): numeric value to compare against 1506 the command return value. 1507 env (dict, optional): key-value of environment values to use to 1508 run the command. Defaults to None. 1509 partial_env (dict, optional): key-value of environment values to 1510 replace from the current environment values. Defaults to None. 1511 return_type (str, optional): if equal to 'num_errors' then the 1512 amount of errors matched by `error_list` is returned. Defaults 1513 to 'status'. 1514 throw_exception (bool, optional): whether or not to raise an 1515 exception if the return value of the command doesn't match 1516 any of the `success_codes`. Defaults to False. 1517 output_parser (OutputParser, optional): lets you provide an 1518 instance of your own OutputParser subclass. Defaults to `OutputParser`. 1519 output_timeout (int): amount of seconds to wait for output before 1520 the process is killed. 1521 fatal_exit_code (int, optional): call `self.fatal` if the return value 1522 of the command is not in `success_codes`. Defaults to 2. 1523 error_level (str, optional): log level name to use on error. Defaults 1524 to `ERROR`. 1525 **kwargs: Arbitrary keyword arguments. 1526 1527 Returns: 1528 int: -1 on error. 1529 Any: `command` return value is returned otherwise. 1530 """ 1531 if success_codes is None: 1532 success_codes = [0] 1533 if cwd is not None: 1534 if not os.path.isdir(cwd): 1535 level = error_level 1536 if halt_on_failure: 1537 level = FATAL 1538 self.log( 1539 "Can't run command %s in non-existent directory '%s'!" 1540 % (command, cwd), 1541 level=level, 1542 ) 1543 return -1 1544 self.info("Running command: %s in %s" % (command, cwd)) 1545 else: 1546 self.info("Running command: %s" % command) 1547 if isinstance(command, list) or isinstance(command, tuple): 1548 self.info("Copy/paste: %s" % subprocess.list2cmdline(command)) 1549 shell = True 1550 if isinstance(command, list) or isinstance(command, tuple): 1551 shell = False 1552 if env is None: 1553 if partial_env: 1554 self.info("Using partial env: %s" % pprint.pformat(partial_env)) 1555 env = self.query_env(partial_env=partial_env) 1556 else: 1557 if hasattr(self, "previous_env") and env == self.previous_env: 1558 self.info("Using env: (same as previous command)") 1559 else: 1560 self.info("Using env: %s" % pprint.pformat(env)) 1561 self.previous_env = env 1562 1563 if output_parser is None: 1564 parser = OutputParser( 1565 config=self.config, log_obj=self.log_obj, error_list=error_list 1566 ) 1567 else: 1568 parser = output_parser 1569 1570 try: 1571 if output_timeout: 1572 1573 def processOutput(line): 1574 parser.add_lines(line) 1575 1576 def onTimeout(): 1577 self.info( 1578 "Automation Error: mozprocess timed out after " 1579 "%s seconds running %s" % (str(output_timeout), str(command)) 1580 ) 1581 1582 p = ProcessHandler( 1583 command, 1584 shell=shell, 1585 env=env, 1586 cwd=cwd, 1587 storeOutput=False, 1588 onTimeout=(onTimeout,), 1589 processOutputLine=[processOutput], 1590 ) 1591 self.info( 1592 "Calling %s with output_timeout %d" % (command, output_timeout) 1593 ) 1594 p.run(outputTimeout=output_timeout) 1595 p.wait() 1596 if p.timedOut: 1597 self.log( 1598 "timed out after %s seconds of no output" % output_timeout, 1599 level=error_level, 1600 ) 1601 returncode = int(p.proc.returncode) 1602 else: 1603 p = subprocess.Popen( 1604 command, 1605 shell=shell, 1606 stdout=subprocess.PIPE, 1607 cwd=cwd, 1608 stderr=subprocess.STDOUT, 1609 env=env, 1610 bufsize=0, 1611 ) 1612 loop = True 1613 while loop: 1614 if p.poll() is not None: 1615 """Avoid losing the final lines of the log?""" 1616 loop = False 1617 while True: 1618 line = p.stdout.readline() 1619 if not line: 1620 break 1621 parser.add_lines(line) 1622 returncode = p.returncode 1623 except KeyboardInterrupt: 1624 level = error_level 1625 if halt_on_failure: 1626 level = FATAL 1627 self.log( 1628 "Process interrupted by the user, killing process with pid %s" % p.pid, 1629 level=level, 1630 ) 1631 p.kill() 1632 return -1 1633 except OSError as e: 1634 level = error_level 1635 if halt_on_failure: 1636 level = FATAL 1637 self.log( 1638 "caught OS error %s: %s while running %s" 1639 % (e.errno, e.strerror, command), 1640 level=level, 1641 ) 1642 return -1 1643 1644 return_level = INFO 1645 if returncode not in success_codes: 1646 return_level = error_level 1647 if throw_exception: 1648 raise subprocess.CalledProcessError(returncode, command) 1649 self.log("Return code: %d" % returncode, level=return_level) 1650 1651 if halt_on_failure: 1652 _fail = False 1653 if returncode not in success_codes: 1654 self.log( 1655 "%s not in success codes: %s" % (returncode, success_codes), 1656 level=error_level, 1657 ) 1658 _fail = True 1659 if parser.num_errors: 1660 self.log("failures found while parsing output", level=error_level) 1661 _fail = True 1662 if _fail: 1663 self.return_code = fatal_exit_code 1664 self.fatal( 1665 "Halting on failure while running %s" % command, 1666 exit_code=fatal_exit_code, 1667 ) 1668 if return_type == "num_errors": 1669 return parser.num_errors 1670 return returncode 1671 1672 def get_output_from_command( 1673 self, 1674 command, 1675 cwd=None, 1676 halt_on_failure=False, 1677 env=None, 1678 silent=False, 1679 log_level=INFO, 1680 tmpfile_base_path="tmpfile", 1681 return_type="output", 1682 save_tmpfiles=False, 1683 throw_exception=False, 1684 fatal_exit_code=2, 1685 ignore_errors=False, 1686 success_codes=None, 1687 ): 1688 """Similar to run_command, but where run_command is an 1689 os.system(command) analog, get_output_from_command is a `command` 1690 analog. 1691 1692 Less error checking by design, though if we figure out how to 1693 do it without borking the output, great. 1694 1695 TODO: binary mode? silent is kinda like that. 1696 TODO: since p.wait() can take a long time, optionally log something 1697 every N seconds? 1698 TODO: optionally only keep the first or last (N) line(s) of output? 1699 TODO: optionally only return the tmp_stdout_filename? 1700 1701 ignore_errors=True is for the case where a command might produce standard 1702 error output, but you don't particularly care; setting to True will 1703 cause standard error to be logged at DEBUG rather than ERROR 1704 1705 Args: 1706 command (str | list): command or list of commands to 1707 execute and log. 1708 cwd (str, optional): directory path from where to execute the 1709 command. Defaults to `None`. 1710 halt_on_failure (bool, optional): whether or not to redefine the 1711 log level as `FATAL` on error. Defaults to False. 1712 env (dict, optional): key-value of environment values to use to 1713 run the command. Defaults to None. 1714 silent (bool, optional): whether or not to output the stdout of 1715 executing the command. Defaults to False. 1716 log_level (str, optional): log level name to use on normal execution. 1717 Defaults to `INFO`. 1718 tmpfile_base_path (str, optional): base path of the file to which 1719 the output will be writen to. Defaults to 'tmpfile'. 1720 return_type (str, optional): if equal to 'output' then the complete 1721 output of the executed command is returned, otherwise the written 1722 filenames are returned. Defaults to 'output'. 1723 save_tmpfiles (bool, optional): whether or not to save the temporary 1724 files created from the command output. Defaults to False. 1725 throw_exception (bool, optional): whether or not to raise an 1726 exception if the return value of the command is not zero. 1727 Defaults to False. 1728 fatal_exit_code (int, optional): call self.fatal if the return value 1729 of the command match this value. 1730 ignore_errors (bool, optional): whether or not to change the log 1731 level to `ERROR` for the output of stderr. Defaults to False. 1732 success_codes (int, optional): numeric value to compare against 1733 the command return value. 1734 1735 Returns: 1736 None: if the cwd is not a directory. 1737 None: on IOError. 1738 tuple: stdout and stderr filenames. 1739 str: stdout output. 1740 """ 1741 if cwd: 1742 if not os.path.isdir(cwd): 1743 level = ERROR 1744 if halt_on_failure: 1745 level = FATAL 1746 self.log( 1747 "Can't run command %s in non-existent directory %s!" 1748 % (command, cwd), 1749 level=level, 1750 ) 1751 return None 1752 self.info("Getting output from command: %s in %s" % (command, cwd)) 1753 else: 1754 self.info("Getting output from command: %s" % command) 1755 if isinstance(command, list): 1756 self.info("Copy/paste: %s" % subprocess.list2cmdline(command)) 1757 # This could potentially return something? 1758 tmp_stdout = None 1759 tmp_stderr = None 1760 tmp_stdout_filename = "%s_stdout" % tmpfile_base_path 1761 tmp_stderr_filename = "%s_stderr" % tmpfile_base_path 1762 if success_codes is None: 1763 success_codes = [0] 1764 1765 # TODO probably some more elegant solution than 2 similar passes 1766 try: 1767 tmp_stdout = open(tmp_stdout_filename, "w") 1768 except IOError: 1769 level = ERROR 1770 if halt_on_failure: 1771 level = FATAL 1772 self.log( 1773 "Can't open %s for writing!" % tmp_stdout_filename + self.exception(), 1774 level=level, 1775 ) 1776 return None 1777 try: 1778 tmp_stderr = open(tmp_stderr_filename, "w") 1779 except IOError: 1780 level = ERROR 1781 if halt_on_failure: 1782 level = FATAL 1783 self.log( 1784 "Can't open %s for writing!" % tmp_stderr_filename + self.exception(), 1785 level=level, 1786 ) 1787 return None 1788 shell = True 1789 if isinstance(command, list): 1790 shell = False 1791 1792 p = subprocess.Popen( 1793 command, 1794 shell=shell, 1795 stdout=tmp_stdout, 1796 cwd=cwd, 1797 stderr=tmp_stderr, 1798 env=env, 1799 bufsize=0, 1800 ) 1801 # XXX: changed from self.debug to self.log due to this error: 1802 # TypeError: debug() takes exactly 1 argument (2 given) 1803 self.log( 1804 "Temporary files: %s and %s" % (tmp_stdout_filename, tmp_stderr_filename), 1805 level=DEBUG, 1806 ) 1807 p.wait() 1808 tmp_stdout.close() 1809 tmp_stderr.close() 1810 return_level = DEBUG 1811 output = None 1812 if return_type == "output" or not silent: 1813 if os.path.exists(tmp_stdout_filename) and os.path.getsize( 1814 tmp_stdout_filename 1815 ): 1816 output = self.read_from_file(tmp_stdout_filename, verbose=False) 1817 if not silent: 1818 self.log("Output received:", level=log_level) 1819 output_lines = output.rstrip().splitlines() 1820 for line in output_lines: 1821 if not line or line.isspace(): 1822 continue 1823 if isinstance(line, binary_type): 1824 line = line.decode("utf-8") 1825 self.log(" %s" % line, level=log_level) 1826 output = "\n".join(output_lines) 1827 if os.path.exists(tmp_stderr_filename) and os.path.getsize(tmp_stderr_filename): 1828 if not ignore_errors: 1829 return_level = ERROR 1830 self.log("Errors received:", level=return_level) 1831 errors = self.read_from_file(tmp_stderr_filename, verbose=False) 1832 for line in errors.rstrip().splitlines(): 1833 if not line or line.isspace(): 1834 continue 1835 if isinstance(line, binary_type): 1836 line = line.decode("utf-8") 1837 self.log(" %s" % line, level=return_level) 1838 elif p.returncode not in success_codes and not ignore_errors: 1839 return_level = ERROR 1840 # Clean up. 1841 if not save_tmpfiles: 1842 self.rmtree(tmp_stderr_filename, log_level=DEBUG) 1843 self.rmtree(tmp_stdout_filename, log_level=DEBUG) 1844 if p.returncode and throw_exception: 1845 raise subprocess.CalledProcessError(p.returncode, command) 1846 self.log("Return code: %d" % p.returncode, level=return_level) 1847 if halt_on_failure and return_level == ERROR: 1848 self.return_code = fatal_exit_code 1849 self.fatal( 1850 "Halting on failure while running %s" % command, 1851 exit_code=fatal_exit_code, 1852 ) 1853 # Hm, options on how to return this? I bet often we'll want 1854 # output_lines[0] with no newline. 1855 if return_type != "output": 1856 return (tmp_stdout_filename, tmp_stderr_filename) 1857 else: 1858 return output 1859 1860 def _touch_file(self, file_name, times=None, error_level=FATAL): 1861 """touch a file. 1862 1863 Args: 1864 file_name (str): name of the file to touch. 1865 times (tuple, optional): 2-tuple as specified by `os.utime`_ 1866 Defaults to None. 1867 error_level (str, optional): log level name in case of error. 1868 Defaults to `FATAL`. 1869 1870 .. _`os.utime`: 1871 https://docs.python.org/3.4/library/os.html?highlight=os.utime#os.utime 1872 """ 1873 self.info("Touching: %s" % file_name) 1874 try: 1875 os.utime(file_name, times) 1876 except OSError: 1877 try: 1878 open(file_name, "w").close() 1879 except IOError as e: 1880 msg = "I/O error(%s): %s" % (e.errno, e.strerror) 1881 self.log(msg, error_level=error_level) 1882 os.utime(file_name, times) 1883 1884 def unpack( 1885 self, 1886 filename, 1887 extract_to, 1888 extract_dirs=None, 1889 error_level=ERROR, 1890 fatal_exit_code=2, 1891 verbose=False, 1892 ): 1893 """The method allows to extract a file regardless of its extension. 1894 1895 Args: 1896 filename (str): filename of the compressed file. 1897 extract_to (str): where to extract the compressed file. 1898 extract_dirs (list, optional): directories inside the archive file to extract. 1899 Defaults to `None`. 1900 fatal_exit_code (int, optional): call `self.fatal` if the return value 1901 of the command is not in `success_codes`. Defaults to 2. 1902 verbose (bool, optional): whether or not extracted content should be displayed. 1903 Defaults to False. 1904 1905 Raises: 1906 IOError: on `filename` file not found. 1907 1908 """ 1909 if not os.path.isfile(filename): 1910 raise IOError("Could not find file to extract: %s" % filename) 1911 1912 if zipfile.is_zipfile(filename): 1913 try: 1914 self.info( 1915 "Using ZipFile to extract {} to {}".format(filename, extract_to) 1916 ) 1917 with zipfile.ZipFile(filename) as bundle: 1918 for entry in self._filter_entries(bundle.namelist(), extract_dirs): 1919 if verbose: 1920 self.info(" %s" % entry) 1921 bundle.extract(entry, path=extract_to) 1922 1923 # ZipFile doesn't preserve permissions during extraction: 1924 # http://bugs.python.org/issue15795 1925 fname = os.path.realpath(os.path.join(extract_to, entry)) 1926 mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF 1927 # Only set permissions if attributes are available. Otherwise all 1928 # permissions will be removed eg. on Windows. 1929 if mode: 1930 os.chmod(fname, mode) 1931 except zipfile.BadZipfile as e: 1932 self.log( 1933 "%s (%s)" % (str(e), filename), 1934 level=error_level, 1935 exit_code=fatal_exit_code, 1936 ) 1937 1938 # Bug 1211882 - is_tarfile cannot be trusted for dmg files 1939 elif tarfile.is_tarfile(filename) and not filename.lower().endswith(".dmg"): 1940 try: 1941 self.info( 1942 "Using TarFile to extract {} to {}".format(filename, extract_to) 1943 ) 1944 with tarfile.open(filename) as bundle: 1945 for entry in self._filter_entries(bundle.getnames(), extract_dirs): 1946 if verbose: 1947 self.info(" %s" % entry) 1948 bundle.extract(entry, path=extract_to) 1949 except tarfile.TarError as e: 1950 self.log( 1951 "%s (%s)" % (str(e), filename), 1952 level=error_level, 1953 exit_code=fatal_exit_code, 1954 ) 1955 else: 1956 self.log( 1957 "No extraction method found for: %s" % filename, 1958 level=error_level, 1959 exit_code=fatal_exit_code, 1960 ) 1961 1962 def is_taskcluster(self): 1963 """Returns boolean indicating if we're running in TaskCluster.""" 1964 # This may need expanding in the future to work on 1965 return "TASKCLUSTER_WORKER_TYPE" in os.environ 1966 1967 1968def PreScriptRun(func): 1969 """Decorator for methods that will be called before script execution. 1970 1971 Each method on a BaseScript having this decorator will be called at the 1972 beginning of BaseScript.run(). 1973 1974 The return value is ignored. Exceptions will abort execution. 1975 """ 1976 func._pre_run_listener = True 1977 return func 1978 1979 1980def PostScriptRun(func): 1981 """Decorator for methods that will be called after script execution. 1982 1983 This is similar to PreScriptRun except it is called at the end of 1984 execution. The method will always be fired, even if execution fails. 1985 """ 1986 func._post_run_listener = True 1987 return func 1988 1989 1990def PreScriptAction(action=None): 1991 """Decorator for methods that will be called at the beginning of each action. 1992 1993 Each method on a BaseScript having this decorator will be called during 1994 BaseScript.run() before an individual action is executed. The method will 1995 receive the action's name as an argument. 1996 1997 If no values are passed to the decorator, it will be applied to every 1998 action. If a string is passed, the decorated function will only be called 1999 for the action of that name. 2000 2001 The return value of the method is ignored. Exceptions will abort execution. 2002 """ 2003 2004 def _wrapped(func): 2005 func._pre_action_listener = action 2006 return func 2007 2008 def _wrapped_none(func): 2009 func._pre_action_listener = None 2010 return func 2011 2012 if type(action) == type(_wrapped): 2013 return _wrapped_none(action) 2014 2015 return _wrapped 2016 2017 2018def PostScriptAction(action=None): 2019 """Decorator for methods that will be called at the end of each action. 2020 2021 This behaves similarly to PreScriptAction. It varies in that it is called 2022 after execution of the action. 2023 2024 The decorated method will receive the action name as a positional argument. 2025 It will then receive the following named arguments: 2026 2027 success - Bool indicating whether the action finished successfully. 2028 2029 The decorated method will always be called, even if the action threw an 2030 exception. 2031 2032 The return value is ignored. 2033 """ 2034 2035 def _wrapped(func): 2036 func._post_action_listener = action 2037 return func 2038 2039 def _wrapped_none(func): 2040 func._post_action_listener = None 2041 return func 2042 2043 if type(action) == type(_wrapped): 2044 return _wrapped_none(action) 2045 2046 return _wrapped 2047 2048 2049# BaseScript {{{1 2050class BaseScript(ScriptMixin, LogMixin, object): 2051 def __init__( 2052 self, 2053 config_options=None, 2054 ConfigClass=BaseConfig, 2055 default_log_level="info", 2056 **kwargs 2057 ): 2058 self._return_code = 0 2059 super(BaseScript, self).__init__() 2060 2061 self.log_obj = None 2062 self.abs_dirs = None 2063 if config_options is None: 2064 config_options = [] 2065 self.summary_list = [] 2066 self.failures = [] 2067 rw_config = ConfigClass(config_options=config_options, **kwargs) 2068 self.config = rw_config.get_read_only_config() 2069 self.actions = tuple(rw_config.actions) 2070 self.all_actions = tuple(rw_config.all_actions) 2071 self.env = None 2072 self.new_log_obj(default_log_level=default_log_level) 2073 self.script_obj = self 2074 2075 # Indicate we're a source checkout if VCS directory is present at the 2076 # appropriate place. This code will break if this file is ever moved 2077 # to another directory. 2078 self.topsrcdir = None 2079 2080 srcreldir = "testing/mozharness/mozharness/base" 2081 here = os.path.normpath(os.path.dirname(__file__)) 2082 if here.replace("\\", "/").endswith(srcreldir): 2083 topsrcdir = os.path.normpath(os.path.join(here, "..", "..", "..", "..")) 2084 hg_dir = os.path.join(topsrcdir, ".hg") 2085 git_dir = os.path.join(topsrcdir, ".git") 2086 if os.path.isdir(hg_dir) or os.path.isdir(git_dir): 2087 self.topsrcdir = topsrcdir 2088 2089 # Set self.config to read-only. 2090 # 2091 # We can create intermediate config info programmatically from 2092 # this in a repeatable way, with logs; this is how we straddle the 2093 # ideal-but-not-user-friendly static config and the 2094 # easy-to-write-hard-to-debug writable config. 2095 # 2096 # To allow for other, script-specific configurations 2097 # (e.g., props json parsing), before locking, 2098 # call self._pre_config_lock(). If needed, this method can 2099 # alter self.config. 2100 self._pre_config_lock(rw_config) 2101 self._config_lock() 2102 2103 self.info("Run as %s" % rw_config.command_line) 2104 if self.config.get("dump_config_hierarchy"): 2105 # we only wish to dump and display what self.config is made up of, 2106 # against the current script + args, without actually running any 2107 # actions 2108 self._dump_config_hierarchy(rw_config.all_cfg_files_and_dicts) 2109 if self.config.get("dump_config"): 2110 self.dump_config(exit_on_finish=True) 2111 2112 # Collect decorated methods. We simply iterate over the attributes of 2113 # the current class instance and look for signatures deposited by 2114 # the decorators. 2115 self._listeners = dict( 2116 pre_run=[], 2117 pre_action=[], 2118 post_action=[], 2119 post_run=[], 2120 ) 2121 for k in dir(self): 2122 try: 2123 item = self._getattr(k) 2124 except Exception as e: 2125 item = None 2126 self.warning( 2127 "BaseScript collecting decorated methods: " 2128 "failure to get attribute {}: {}".format(k, str(e)) 2129 ) 2130 if not item: 2131 continue 2132 2133 # We only decorate methods, so ignore other types. 2134 if not inspect.ismethod(item): 2135 continue 2136 2137 if hasattr(item, "_pre_run_listener"): 2138 self._listeners["pre_run"].append(k) 2139 2140 if hasattr(item, "_pre_action_listener"): 2141 self._listeners["pre_action"].append((k, item._pre_action_listener)) 2142 2143 if hasattr(item, "_post_action_listener"): 2144 self._listeners["post_action"].append((k, item._post_action_listener)) 2145 2146 if hasattr(item, "_post_run_listener"): 2147 self._listeners["post_run"].append(k) 2148 2149 def _getattr(self, name): 2150 # `getattr(self, k)` will call the method `k` for any property 2151 # access. If the property depends upon a module which has not 2152 # been imported at the time the BaseScript initializer is 2153 # executed, this property access will result in an 2154 # Exception. Until Python 3's `inspect.getattr_static` is 2155 # available, the simplest approach is to ignore the specific 2156 # properties which are known to cause issues. Currently 2157 # adb_path and device are ignored since they require the 2158 # availablity of the mozdevice package which is not guaranteed 2159 # when BaseScript is called. 2160 property_list = set(["adb_path", "device"]) 2161 if six.PY2: 2162 if name in property_list: 2163 item = None 2164 else: 2165 item = getattr(self, name) 2166 else: 2167 item = inspect.getattr_static(self, name) 2168 if type(item) == property: 2169 item = None 2170 else: 2171 item = getattr(self, name) 2172 return item 2173 2174 def _dump_config_hierarchy(self, cfg_files): 2175 """interpret each config file used. 2176 2177 This will show which keys/values are being added or overwritten by 2178 other config files depending on their hierarchy (when they were added). 2179 """ 2180 # go through each config_file. We will start with the lowest and 2181 # print its keys/values that are being used in self.config. If any 2182 # keys/values are present in a config file with a higher precedence, 2183 # ignore those. 2184 dirs = self.query_abs_dirs() 2185 cfg_files_dump_config = {} # we will dump this to file 2186 # keep track of keys that did not come from a config file 2187 keys_not_from_file = set(self.config.keys()) 2188 if not cfg_files: 2189 cfg_files = [] 2190 self.info("Total config files: %d" % (len(cfg_files))) 2191 if len(cfg_files): 2192 self.info("cfg files used from lowest precedence to highest:") 2193 for i, (target_file, target_dict) in enumerate(cfg_files): 2194 unique_keys = set(target_dict.keys()) 2195 unique_dict = {} 2196 # iterate through the target_dicts remaining 'higher' cfg_files 2197 remaining_cfgs = cfg_files[slice(i + 1, len(cfg_files))] 2198 # where higher == more precedent 2199 for ii, (higher_file, higher_dict) in enumerate(remaining_cfgs): 2200 # now only keep keys/values that are not overwritten by a 2201 # higher config 2202 unique_keys = unique_keys.difference(set(higher_dict.keys())) 2203 # unique_dict we know now has only keys/values that are unique to 2204 # this config file. 2205 unique_dict = dict((key, target_dict.get(key)) for key in unique_keys) 2206 cfg_files_dump_config[target_file] = unique_dict 2207 self.action_message("Config File %d: %s" % (i + 1, target_file)) 2208 self.info(pprint.pformat(unique_dict)) 2209 # let's also find out which keys/values from self.config are not 2210 # from each target config file dict 2211 keys_not_from_file = keys_not_from_file.difference(set(target_dict.keys())) 2212 not_from_file_dict = dict( 2213 (key, self.config.get(key)) for key in keys_not_from_file 2214 ) 2215 cfg_files_dump_config["not_from_cfg_file"] = not_from_file_dict 2216 self.action_message( 2217 "Not from any config file (default_config, " "cmd line options, etc)" 2218 ) 2219 self.info(pprint.pformat(not_from_file_dict)) 2220 2221 # finally, let's dump this output as JSON and exit early 2222 self.dump_config( 2223 os.path.join(dirs["abs_log_dir"], "localconfigfiles.json"), 2224 cfg_files_dump_config, 2225 console_output=False, 2226 exit_on_finish=True, 2227 ) 2228 2229 def _pre_config_lock(self, rw_config): 2230 """This empty method can allow for config checking and manipulation 2231 before the config lock, when overridden in scripts. 2232 """ 2233 pass 2234 2235 def _config_lock(self): 2236 """After this point, the config is locked and should not be 2237 manipulated (based on mozharness.base.config.ReadOnlyDict) 2238 """ 2239 self.config.lock() 2240 2241 def _possibly_run_method(self, method_name, error_if_missing=False): 2242 """This is here for run().""" 2243 if hasattr(self, method_name) and callable(self._getattr(method_name)): 2244 return getattr(self, method_name)() 2245 elif error_if_missing: 2246 self.error("No such method %s!" % method_name) 2247 2248 def run_action(self, action): 2249 if action not in self.actions: 2250 self.action_message("Skipping %s step." % action) 2251 return 2252 2253 method_name = action.replace("-", "_") 2254 self.action_message("Running %s step." % action) 2255 2256 # An exception during a pre action listener should abort execution. 2257 for fn, target in self._listeners["pre_action"]: 2258 if target is not None and target != action: 2259 continue 2260 2261 try: 2262 self.info("Running pre-action listener: %s" % fn) 2263 method = getattr(self, fn) 2264 method(action) 2265 except Exception: 2266 self.error( 2267 "Exception during pre-action for %s: %s" 2268 % (action, traceback.format_exc()) 2269 ) 2270 2271 for fn, target in self._listeners["post_action"]: 2272 if target is not None and target != action: 2273 continue 2274 2275 try: 2276 self.info("Running post-action listener: %s" % fn) 2277 method = getattr(self, fn) 2278 method(action, success=False) 2279 except Exception: 2280 self.error( 2281 "An additional exception occurred during " 2282 "post-action for %s: %s" % (action, traceback.format_exc()) 2283 ) 2284 2285 self.fatal("Aborting due to exception in pre-action listener.") 2286 2287 # We always run post action listeners, even if the main routine failed. 2288 success = False 2289 try: 2290 self.info("Running main action method: %s" % method_name) 2291 self._possibly_run_method("preflight_%s" % method_name) 2292 self._possibly_run_method(method_name, error_if_missing=True) 2293 self._possibly_run_method("postflight_%s" % method_name) 2294 success = True 2295 finally: 2296 post_success = True 2297 for fn, target in self._listeners["post_action"]: 2298 if target is not None and target != action: 2299 continue 2300 2301 try: 2302 self.info("Running post-action listener: %s" % fn) 2303 method = getattr(self, fn) 2304 method(action, success=success and self.return_code == 0) 2305 except Exception: 2306 post_success = False 2307 self.error( 2308 "Exception during post-action for %s: %s" 2309 % (action, traceback.format_exc()) 2310 ) 2311 2312 step_result = "success" if success else "failed" 2313 self.action_message("Finished %s step (%s)" % (action, step_result)) 2314 2315 if not post_success: 2316 self.fatal("Aborting due to failure in post-action listener.") 2317 2318 def run(self): 2319 """Default run method. 2320 This is the "do everything" method, based on actions and all_actions. 2321 2322 First run self.dump_config() if it exists. 2323 Second, go through the list of all_actions. 2324 If they're in the list of self.actions, try to run 2325 self.preflight_ACTION(), self.ACTION(), and self.postflight_ACTION(). 2326 2327 Preflight is sanity checking before doing anything time consuming or 2328 destructive. 2329 2330 Postflight is quick testing for success after an action. 2331 2332 """ 2333 for fn in self._listeners["pre_run"]: 2334 try: 2335 self.info("Running pre-run listener: %s" % fn) 2336 method = getattr(self, fn) 2337 method() 2338 except Exception: 2339 self.error( 2340 "Exception during pre-run listener: %s" % traceback.format_exc() 2341 ) 2342 2343 for fn in self._listeners["post_run"]: 2344 try: 2345 method = getattr(self, fn) 2346 method() 2347 except Exception: 2348 self.error( 2349 "An additional exception occurred during a " 2350 "post-run listener: %s" % traceback.format_exc() 2351 ) 2352 2353 self.fatal("Aborting due to failure in pre-run listener.") 2354 2355 self.dump_config() 2356 try: 2357 for action in self.all_actions: 2358 self.run_action(action) 2359 except Exception: 2360 self.fatal("Uncaught exception: %s" % traceback.format_exc()) 2361 finally: 2362 post_success = True 2363 for fn in self._listeners["post_run"]: 2364 try: 2365 self.info("Running post-run listener: %s" % fn) 2366 method = getattr(self, fn) 2367 method() 2368 except Exception: 2369 post_success = False 2370 self.error( 2371 "Exception during post-run listener: %s" 2372 % traceback.format_exc() 2373 ) 2374 2375 if not post_success: 2376 self.fatal("Aborting due to failure in post-run listener.") 2377 2378 return self.return_code 2379 2380 def run_and_exit(self): 2381 """Runs the script and exits the current interpreter.""" 2382 rc = self.run() 2383 if rc != 0: 2384 self.warning("returning nonzero exit status %d" % rc) 2385 sys.exit(rc) 2386 2387 def clobber(self): 2388 """ 2389 Delete the working directory 2390 """ 2391 dirs = self.query_abs_dirs() 2392 self.rmtree(dirs["abs_work_dir"], error_level=FATAL) 2393 2394 def query_abs_dirs(self): 2395 """We want to be able to determine where all the important things 2396 are. Absolute paths lend themselves well to this, though I wouldn't 2397 be surprised if this causes some issues somewhere. 2398 2399 This should be overridden in any script that has additional dirs 2400 to query. 2401 2402 The query_* methods tend to set self.VAR variables as their 2403 runtime cache. 2404 """ 2405 if self.abs_dirs: 2406 return self.abs_dirs 2407 c = self.config 2408 dirs = {} 2409 dirs["base_work_dir"] = c["base_work_dir"] 2410 dirs["abs_work_dir"] = os.path.join(c["base_work_dir"], c["work_dir"]) 2411 dirs["abs_log_dir"] = os.path.join(c["base_work_dir"], c.get("log_dir", "logs")) 2412 if "GECKO_PATH" in os.environ: 2413 dirs["abs_src_dir"] = os.environ["GECKO_PATH"] 2414 self.abs_dirs = dirs 2415 return self.abs_dirs 2416 2417 def dump_config( 2418 self, file_path=None, config=None, console_output=True, exit_on_finish=False 2419 ): 2420 """Dump self.config to localconfig.json""" 2421 config = config or self.config 2422 dirs = self.query_abs_dirs() 2423 if not file_path: 2424 file_path = os.path.join(dirs["abs_log_dir"], "localconfig.json") 2425 self.info("Dumping config to %s." % file_path) 2426 self.mkdir_p(os.path.dirname(file_path)) 2427 json_config = json.dumps(config, sort_keys=True, indent=4) 2428 fh = codecs.open(file_path, encoding="utf-8", mode="w+") 2429 fh.write(json_config) 2430 fh.close() 2431 if console_output: 2432 self.info(pprint.pformat(config)) 2433 if exit_on_finish: 2434 sys.exit() 2435 2436 # logging {{{2 2437 def new_log_obj(self, default_log_level="info"): 2438 c = self.config 2439 log_dir = os.path.join(c["base_work_dir"], c.get("log_dir", "logs")) 2440 log_config = { 2441 "logger_name": "Simple", 2442 "log_name": "log", 2443 "log_dir": log_dir, 2444 "log_level": default_log_level, 2445 "log_format": "%(asctime)s %(levelname)8s - %(message)s", 2446 "log_to_console": True, 2447 "append_to_log": False, 2448 } 2449 log_type = self.config.get("log_type", "console") 2450 for key in log_config.keys(): 2451 value = self.config.get(key, None) 2452 if value is not None: 2453 log_config[key] = value 2454 if log_type == "multi": 2455 self.log_obj = MultiFileLogger(**log_config) 2456 elif log_type == "simple": 2457 self.log_obj = SimpleFileLogger(**log_config) 2458 else: 2459 self.log_obj = ConsoleLogger(**log_config) 2460 2461 def action_message(self, message): 2462 self.info( 2463 "[mozharness: %sZ] %s" 2464 % (datetime.datetime.utcnow().isoformat(" "), message) 2465 ) 2466 2467 def summary(self): 2468 """Print out all the summary lines added via add_summary() 2469 throughout the script. 2470 2471 I'd like to revisit how to do this in a prettier fashion. 2472 """ 2473 self.action_message("%s summary:" % self.__class__.__name__) 2474 if self.summary_list: 2475 for item in self.summary_list: 2476 try: 2477 self.log(item["message"], level=item["level"]) 2478 except ValueError: 2479 """log is closed; print as a default. Ran into this 2480 when calling from __del__()""" 2481 print("### Log is closed! (%s)" % item["message"]) 2482 2483 def add_summary(self, message, level=INFO): 2484 self.summary_list.append({"message": message, "level": level}) 2485 # TODO write to a summary-only log? 2486 # Summaries need a lot more love. 2487 self.log(message, level=level) 2488 2489 def summarize_success_count( 2490 self, success_count, total_count, message="%d of %d successful.", level=None 2491 ): 2492 if level is None: 2493 level = INFO 2494 if success_count < total_count: 2495 level = ERROR 2496 self.add_summary(message % (success_count, total_count), level=level) 2497 2498 def get_hash_for_file(self, file_path, hash_type="sha512"): 2499 bs = 65536 2500 hasher = hashlib.new(hash_type) 2501 with open(file_path, "rb") as fh: 2502 buf = fh.read(bs) 2503 while len(buf) > 0: 2504 hasher.update(buf) 2505 buf = fh.read(bs) 2506 return hasher.hexdigest() 2507 2508 @property 2509 def return_code(self): 2510 return self._return_code 2511 2512 @return_code.setter 2513 def return_code(self, code): 2514 old_return_code, self._return_code = self._return_code, code 2515 if old_return_code != code: 2516 self.warning("setting return code to %d" % code) 2517