1# -*- coding: utf-8 -*- 2 3# This Source Code Form is subject to the terms of the Mozilla Public 4# License, v. 2.0. If a copy of the MPL was not distributed with this 5# file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 7# We don't import all modules at the top for performance reasons. See Bug 1008943 8 9from __future__ import absolute_import, print_function 10 11import errno 12import os 13import re 14import stat 15import sys 16import time 17import warnings 18from contextlib import contextmanager 19 20from six.moves import urllib 21 22 23__all__ = [ 24 "extract_tarball", 25 "extract_zip", 26 "extract", 27 "is_url", 28 "load", 29 "copy_contents", 30 "match", 31 "move", 32 "remove", 33 "rmtree", 34 "tree", 35 "which", 36 "NamedTemporaryFile", 37 "TemporaryDirectory", 38] 39 40# utilities for extracting archives 41 42 43def extract_tarball(src, dest, ignore=None): 44 """extract a .tar file""" 45 46 import tarfile 47 48 with tarfile.open(src) as bundle: 49 namelist = [] 50 51 for m in bundle: 52 if ignore and any(match(m.name, i) for i in ignore): 53 continue 54 bundle.extract(m, path=dest) 55 namelist.append(m.name) 56 57 return namelist 58 59 60def extract_zip(src, dest, ignore=None): 61 """extract a zip file""" 62 63 import zipfile 64 65 if isinstance(src, zipfile.ZipFile): 66 bundle = src 67 else: 68 try: 69 bundle = zipfile.ZipFile(src) 70 except Exception: 71 print("src: %s" % src) 72 raise 73 74 namelist = bundle.namelist() 75 76 for name in namelist: 77 if ignore and any(match(name, i) for i in ignore): 78 continue 79 80 bundle.extract(name, dest) 81 filename = os.path.realpath(os.path.join(dest, name)) 82 mode = bundle.getinfo(name).external_attr >> 16 & 0x1FF 83 # Only update permissions if attributes are set. Otherwise fallback to the defaults. 84 if mode: 85 os.chmod(filename, mode) 86 bundle.close() 87 return namelist 88 89 90def extract(src, dest=None, ignore=None): 91 """ 92 Takes in a tar or zip file and extracts it to dest 93 94 If dest is not specified, extracts to os.path.dirname(src) 95 96 Returns the list of top level files that were extracted 97 """ 98 99 import zipfile 100 import tarfile 101 102 assert os.path.exists(src), "'%s' does not exist" % src 103 104 if dest is None: 105 dest = os.path.dirname(src) 106 elif not os.path.isdir(dest): 107 os.makedirs(dest) 108 assert not os.path.isfile(dest), "dest cannot be a file" 109 110 if tarfile.is_tarfile(src): 111 namelist = extract_tarball(src, dest, ignore=ignore) 112 elif zipfile.is_zipfile(src): 113 namelist = extract_zip(src, dest, ignore=ignore) 114 else: 115 raise Exception("mozfile.extract: no archive format found for '%s'" % src) 116 117 # namelist returns paths with forward slashes even in windows 118 top_level_files = [ 119 os.path.join(dest, name.rstrip("/")) 120 for name in namelist 121 if len(name.rstrip("/").split("/")) == 1 122 ] 123 124 # namelist doesn't include folders, append these to the list 125 for name in namelist: 126 index = name.find("/") 127 if index != -1: 128 root = os.path.join(dest, name[:index]) 129 if root not in top_level_files: 130 top_level_files.append(root) 131 132 return top_level_files 133 134 135# utilities for removal of files and directories 136 137 138def rmtree(dir): 139 """Deprecated wrapper method to remove a directory tree. 140 141 Ensure to update your code to use mozfile.remove() directly 142 143 :param dir: directory to be removed 144 """ 145 146 warnings.warn( 147 "mozfile.rmtree() is deprecated in favor of mozfile.remove()", 148 PendingDeprecationWarning, 149 stacklevel=2, 150 ) 151 return remove(dir) 152 153 154def _call_windows_retry(func, args=(), retry_max=5, retry_delay=0.5): 155 """ 156 It's possible to see spurious errors on Windows due to various things 157 keeping a handle to the directory open (explorer, virus scanners, etc) 158 So we try a few times if it fails with a known error. 159 retry_delay is multiplied by the number of failed attempts to increase 160 the likelihood of success in subsequent attempts. 161 """ 162 retry_count = 0 163 while True: 164 try: 165 func(*args) 166 except OSError as e: 167 # Error codes are defined in: 168 # http://docs.python.org/2/library/errno.html#module-errno 169 if e.errno not in (errno.EACCES, errno.ENOTEMPTY): 170 raise 171 172 if retry_count == retry_max: 173 raise 174 175 retry_count += 1 176 177 print( 178 '%s() failed for "%s". Reason: %s (%s). Retrying...' 179 % (func.__name__, args, e.strerror, e.errno) 180 ) 181 time.sleep(retry_count * retry_delay) 182 else: 183 # If no exception has been thrown it should be done 184 break 185 186 187def remove(path): 188 """Removes the specified file, link, or directory tree. 189 190 This is a replacement for shutil.rmtree that works better under 191 windows. It does the following things: 192 193 - check path access for the current user before trying to remove 194 - retry operations on some known errors due to various things keeping 195 a handle on file paths - like explorer, virus scanners, etc. The 196 known errors are errno.EACCES and errno.ENOTEMPTY, and it will 197 retry up to 5 five times with a delay of (failed_attempts * 0.5) seconds 198 between each attempt. 199 200 Note that no error will be raised if the given path does not exists. 201 202 :param path: path to be removed 203 """ 204 205 import shutil 206 207 def _call_with_windows_retry(*args, **kwargs): 208 try: 209 _call_windows_retry(*args, **kwargs) 210 except OSError as e: 211 # The file or directory to be removed doesn't exist anymore 212 if e.errno != errno.ENOENT: 213 raise 214 215 def _update_permissions(path): 216 """Sets specified pemissions depending on filetype""" 217 if os.path.islink(path): 218 # Path is a symlink which we don't have to modify 219 # because it should already have all the needed permissions 220 return 221 222 stats = os.stat(path) 223 224 if os.path.isfile(path): 225 mode = stats.st_mode | stat.S_IWUSR 226 elif os.path.isdir(path): 227 mode = stats.st_mode | stat.S_IWUSR | stat.S_IXUSR 228 else: 229 # Not supported type 230 return 231 232 _call_with_windows_retry(os.chmod, (path, mode)) 233 234 if not os.path.exists(path): 235 return 236 237 """ 238 On Windows, adds '\\\\?\\' to paths which match ^[A-Za-z]:\\.* to access 239 files or directories that exceed MAX_PATH(260) limitation or that ends 240 with a period. 241 """ 242 if ( 243 sys.platform in ("win32", "cygwin") 244 and len(path) >= 3 245 and path[1] == ":" 246 and path[2] == "\\" 247 ): 248 path = u"\\\\?\\%s" % path 249 250 if os.path.isfile(path) or os.path.islink(path): 251 # Verify the file or link is read/write for the current user 252 _update_permissions(path) 253 _call_with_windows_retry(os.remove, (path,)) 254 255 elif os.path.isdir(path): 256 # Verify the directory is read/write/execute for the current user 257 _update_permissions(path) 258 259 # We're ensuring that every nested item has writable permission. 260 for root, dirs, files in os.walk(path): 261 for entry in dirs + files: 262 _update_permissions(os.path.join(root, entry)) 263 _call_with_windows_retry(shutil.rmtree, (path,)) 264 265 266def copy_contents(srcdir, dstdir): 267 """ 268 Copy the contents of the srcdir into the dstdir, preserving 269 subdirectories. 270 271 If an existing file of the same name exists in dstdir, it will be overwritten. 272 """ 273 import shutil 274 275 # dirs_exist_ok was introduced in Python 3.8 276 # On earlier versions, or Windows, use the verbose mechanism. 277 # We use it on Windows because _call_with_windows_retry doesn't allow 278 # named arguments to be passed. 279 if (sys.version_info.major < 3 or sys.version_info.minor < 8) or (os.name == "nt"): 280 names = os.listdir(srcdir) 281 if not os.path.isdir(dstdir): 282 os.makedirs(dstdir) 283 errors = [] 284 for name in names: 285 srcname = os.path.join(srcdir, name) 286 dstname = os.path.join(dstdir, name) 287 try: 288 if os.path.islink(srcname): 289 linkto = os.readlink(srcname) 290 os.symlink(linkto, dstname) 291 elif os.path.isdir(srcname): 292 copy_contents(srcname, dstname) 293 else: 294 _call_windows_retry(shutil.copy2, (srcname, dstname)) 295 except OSError as why: 296 errors.append((srcname, dstname, str(why))) 297 except Exception as err: 298 errors.extend(err) 299 try: 300 _call_windows_retry(shutil.copystat, (srcdir, dstdir)) 301 except OSError as why: 302 if why.winerror is None: 303 errors.extend((srcdir, dstdir, str(why))) 304 if errors: 305 raise Exception(errors) 306 else: 307 shutil.copytree(srcdir, dstdir, dirs_exist_ok=True) 308 309 310def move(src, dst): 311 """ 312 Move a file or directory path. 313 314 This is a replacement for shutil.move that works better under windows, 315 retrying operations on some known errors due to various things keeping 316 a handle on file paths. 317 """ 318 import shutil 319 320 _call_windows_retry(shutil.move, (src, dst)) 321 322 323def depth(directory): 324 """returns the integer depth of a directory or path relative to '/'""" 325 326 directory = os.path.abspath(directory) 327 level = 0 328 while True: 329 directory, remainder = os.path.split(directory) 330 level += 1 331 if not remainder: 332 break 333 return level 334 335 336def tree(directory, sort_key=lambda x: x.lower()): 337 """Display tree directory structure for `directory`.""" 338 vertical_line = u"│" 339 item_marker = u"├" 340 last_child = u"└" 341 342 retval = [] 343 indent = [] 344 last = {} 345 top = depth(directory) 346 347 for dirpath, dirnames, filenames in os.walk(directory, topdown=True): 348 349 abspath = os.path.abspath(dirpath) 350 basename = os.path.basename(abspath) 351 parent = os.path.dirname(abspath) 352 level = depth(abspath) - top 353 354 # sort articles of interest 355 for resource in (dirnames, filenames): 356 resource[:] = sorted(resource, key=sort_key) 357 358 if level > len(indent): 359 indent.append(vertical_line) 360 indent = indent[:level] 361 362 if dirnames: 363 files_end = item_marker 364 last[abspath] = dirnames[-1] 365 else: 366 files_end = last_child 367 368 if last.get(parent) == os.path.basename(abspath): 369 # last directory of parent 370 dirpath_mark = last_child 371 indent[-1] = " " 372 elif not indent: 373 dirpath_mark = "" 374 else: 375 dirpath_mark = item_marker 376 377 # append the directory and piece of tree structure 378 # if the top-level entry directory, print as passed 379 retval.append( 380 "%s%s%s" 381 % ("".join(indent[:-1]), dirpath_mark, basename if retval else directory) 382 ) 383 # add the files 384 if filenames: 385 last_file = filenames[-1] 386 retval.extend( 387 [ 388 ( 389 "%s%s%s" 390 % ( 391 "".join(indent), 392 files_end if filename == last_file else item_marker, 393 filename, 394 ) 395 ) 396 for index, filename in enumerate(filenames) 397 ] 398 ) 399 400 return "\n".join(retval) 401 402 403def which(cmd, mode=os.F_OK | os.X_OK, path=None, exts=None, extra_search_dirs=()): 404 """A wrapper around `shutil.which` to make the behavior on Windows 405 consistent with other platforms. 406 407 On non-Windows platforms, this is a direct call to `shutil.which`. On 408 Windows, this: 409 410 * Ensures that `cmd` without an extension will be found. Previously it was 411 only found if it had an extension in `PATHEXT`. 412 * Ensures the absolute path to the binary is returned. Previously if the 413 binary was found in `cwd`, a relative path was returned. 414 * Checks the Windows registry if shutil.which doesn't come up with anything. 415 416 The arguments are the same as the ones in `shutil.which`. In addition there 417 is an `exts` argument that only has an effect on Windows. This is used to 418 set a custom value for PATHEXT and is formatted as a list of file 419 extensions. 420 421 extra_search_dirs is a convenience argument. If provided, the strings in 422 the sequence will be appended to the END of the given `path`. 423 """ 424 from shutil import which as shutil_which 425 426 if isinstance(path, (list, tuple)): 427 path = os.pathsep.join(path) 428 429 if not path: 430 path = os.environ.get("PATH", os.defpath) 431 432 if extra_search_dirs: 433 path = os.pathsep.join([path] + list(extra_search_dirs)) 434 435 if sys.platform != "win32": 436 return shutil_which(cmd, mode=mode, path=path) 437 438 oldexts = os.environ.get("PATHEXT", "") 439 if not exts: 440 exts = oldexts.split(os.pathsep) 441 442 # This ensures that `cmd` without any extensions will be found. 443 # See: https://bugs.python.org/issue31405 444 if "." not in exts: 445 exts.append(".") 446 447 os.environ["PATHEXT"] = os.pathsep.join(exts) 448 try: 449 path = shutil_which(cmd, mode=mode, path=path) 450 if path: 451 return os.path.abspath(path.rstrip(".")) 452 finally: 453 if oldexts: 454 os.environ["PATHEXT"] = oldexts 455 else: 456 del os.environ["PATHEXT"] 457 458 # If we've gotten this far, we need to check for registered executables 459 # before giving up. 460 try: 461 import winreg 462 except ImportError: 463 import _winreg as winreg 464 if not cmd.lower().endswith(".exe"): 465 cmd += ".exe" 466 try: 467 ret = winreg.QueryValue( 468 winreg.HKEY_LOCAL_MACHINE, 469 r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\%s" % cmd, 470 ) 471 return os.path.abspath(ret) if ret else None 472 except winreg.error: 473 return None 474 475 476# utilities for temporary resources 477 478 479class NamedTemporaryFile(object): 480 """ 481 Like tempfile.NamedTemporaryFile except it works on Windows 482 in the case where you open the created file a second time. 483 484 This behaves very similarly to tempfile.NamedTemporaryFile but may 485 not behave exactly the same. For example, this function does not 486 prevent fd inheritance by children. 487 488 Example usage: 489 490 with NamedTemporaryFile() as fh: 491 fh.write(b'foobar') 492 493 print('Filename: %s' % fh.name) 494 495 see https://bugzilla.mozilla.org/show_bug.cgi?id=821362 496 """ 497 498 def __init__( 499 self, mode="w+b", bufsize=-1, suffix="", prefix="tmp", dir=None, delete=True 500 ): 501 502 import tempfile 503 504 fd, path = tempfile.mkstemp(suffix, prefix, dir, "t" in mode) 505 os.close(fd) 506 507 self.file = open(path, mode) 508 self._path = path 509 self._delete = delete 510 self._unlinked = False 511 512 def __getattr__(self, k): 513 return getattr(self.__dict__["file"], k) 514 515 def __iter__(self): 516 return self.__dict__["file"] 517 518 def __enter__(self): 519 self.file.__enter__() 520 return self 521 522 def __exit__(self, exc, value, tb): 523 self.file.__exit__(exc, value, tb) 524 if self.__dict__["_delete"]: 525 os.unlink(self.__dict__["_path"]) 526 self._unlinked = True 527 528 def __del__(self): 529 if self.__dict__["_unlinked"]: 530 return 531 self.file.__exit__(None, None, None) 532 if self.__dict__["_delete"]: 533 os.unlink(self.__dict__["_path"]) 534 535 536@contextmanager 537def TemporaryDirectory(): 538 """ 539 create a temporary directory using tempfile.mkdtemp, and then clean it up. 540 541 Example usage: 542 with TemporaryDirectory() as tmp: 543 open(os.path.join(tmp, "a_temp_file"), "w").write("data") 544 545 """ 546 547 import tempfile 548 import shutil 549 550 tempdir = tempfile.mkdtemp() 551 try: 552 yield tempdir 553 finally: 554 shutil.rmtree(tempdir) 555 556 557# utilities dealing with URLs 558 559 560def is_url(thing): 561 """ 562 Return True if thing looks like a URL. 563 """ 564 565 parsed = urllib.parse.urlparse(thing) 566 if "scheme" in parsed: 567 return len(parsed.scheme) >= 2 568 else: 569 return len(parsed[0]) >= 2 570 571 572def load(resource): 573 """ 574 open a file or URL for reading. If the passed resource string is not a URL, 575 or begins with 'file://', return a ``file``. Otherwise, return the 576 result of urllib.urlopen() 577 """ 578 579 # handle file URLs separately due to python stdlib limitations 580 if resource.startswith("file://"): 581 resource = resource[len("file://") :] 582 583 if not is_url(resource): 584 # if no scheme is given, it is a file path 585 return open(resource) 586 587 return urllib.request.urlopen(resource) 588 589 590# We can't depend on mozpack.path here, so copy the 'match' function over. 591 592re_cache = {} 593# Python versions < 3.7 return r'\/' for re.escape('/'). 594if re.escape("/") == "/": 595 MATCH_STAR_STAR_RE = re.compile(r"(^|/)\\\*\\\*/") 596 MATCH_STAR_STAR_END_RE = re.compile(r"(^|/)\\\*\\\*$") 597else: 598 MATCH_STAR_STAR_RE = re.compile(r"(^|\\\/)\\\*\\\*\\\/") 599 MATCH_STAR_STAR_END_RE = re.compile(r"(^|\\\/)\\\*\\\*$") 600 601 602def match(path, pattern): 603 """ 604 Return whether the given path matches the given pattern. 605 An asterisk can be used to match any string, including the null string, in 606 one part of the path: 607 608 ``foo`` matches ``*``, ``f*`` or ``fo*o`` 609 610 However, an asterisk matching a subdirectory may not match the null string: 611 612 ``foo/bar`` does *not* match ``foo/*/bar`` 613 614 If the pattern matches one of the ancestor directories of the path, the 615 patch is considered matching: 616 617 ``foo/bar`` matches ``foo`` 618 619 Two adjacent asterisks can be used to match files and zero or more 620 directories and subdirectories. 621 622 ``foo/bar`` matches ``foo/**/bar``, or ``**/bar`` 623 """ 624 if not pattern: 625 return True 626 if pattern not in re_cache: 627 p = re.escape(pattern) 628 p = MATCH_STAR_STAR_RE.sub(r"\1(?:.+/)?", p) 629 p = MATCH_STAR_STAR_END_RE.sub(r"(?:\1.+)?", p) 630 p = p.replace(r"\*", "[^/]*") + "(?:/.*)?$" 631 re_cache[pattern] = re.compile(p) 632 return re_cache[pattern].match(path) is not None 633