1# -*- coding: utf-8 -*- # 2# Copyright 2013 Google LLC. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Some general file utilities used that can be used by the Cloud SDK.""" 17 18from __future__ import absolute_import 19from __future__ import division 20from __future__ import unicode_literals 21 22import contextlib 23import enum 24import errno 25import hashlib 26import io 27import logging 28import os 29import shutil 30import stat 31import sys 32import tempfile 33import time 34 35from googlecloudsdk.core import exceptions 36from googlecloudsdk.core.util import encoding as encoding_util 37from googlecloudsdk.core.util import platforms 38from googlecloudsdk.core.util import retry 39 40import six 41from six.moves import range # pylint: disable=redefined-builtin 42 43NUM_RETRIES = 10 44 45# WindowsError only exists when running on Windows 46try: 47 # pylint: disable=invalid-name, We are not defining this name. 48 WindowsError 49except NameError: 50 # pylint: disable=invalid-name, We are not defining this name. 51 WindowsError = None 52 53 54class Error(Exception): 55 """Base exception for the file_utils module.""" 56 pass 57 58 59class MissingFileError(Error): 60 """Error for when a file does not exist.""" 61 pass 62 63 64def CopyTree(src, dst): 65 """Copies a directory recursively, without copying file stat info. 66 67 More specifically, behaves like `cp -R` rather than `cp -Rp`, which means that 68 the destination directory and its contents will be *writable* and *deletable*. 69 70 (Yes, an omnipotent being can shutil.copytree a directory so read-only that 71 they cannot delete it. But they cannot do that with this function.) 72 73 Adapted from shutil.copytree. 74 75 Args: 76 src: str, the path to the source directory 77 dst: str, the path to the destination directory. Must not already exist and 78 be writable. 79 80 Raises: 81 shutil.Error: if copying failed for any reason. 82 """ 83 os.makedirs(dst) 84 errors = [] 85 for name in os.listdir(src): 86 name = encoding_util.Decode(name) 87 srcname = os.path.join(src, name) 88 dstname = os.path.join(dst, name) 89 try: 90 if os.path.isdir(srcname): 91 CopyTree(srcname, dstname) 92 else: 93 # Will raise a SpecialFileError for unsupported file types 94 shutil.copy2(srcname, dstname) 95 # catch the Error from the recursive copytree so that we can 96 # continue with other files 97 except shutil.Error as err: 98 errors.extend(err.args[0]) 99 except EnvironmentError as why: 100 errors.append((srcname, dstname, six.text_type(why))) 101 if errors: 102 raise shutil.Error(errors) 103 104 105def MakeDir(path, mode=0o777): 106 """Creates the given directory and its parents and does not fail if it exists. 107 108 Args: 109 path: str, The path of the directory to create. 110 mode: int, The permissions to give the created directories. 0777 is the 111 default mode for os.makedirs(), allowing reading, writing, and listing 112 by all users on the machine. 113 114 Raises: 115 Error: if the operation fails and we can provide extra information. 116 OSError: if the operation fails. 117 """ 118 try: 119 os.makedirs(path, mode=mode) 120 except OSError as ex: 121 base_msg = 'Could not create directory [{0}]: '.format(path) 122 if ex.errno == errno.EEXIST and os.path.isdir(path): 123 pass 124 elif ex.errno == errno.EEXIST and os.path.isfile(path): 125 raise Error(base_msg + 'A file exists at that location.\n\n') 126 elif ex.errno == errno.EACCES: 127 raise Error( 128 base_msg + 'Permission denied.\n\n' + 129 ('Please verify that you have permissions to write to the parent ' 130 'directory.')) 131 else: 132 raise 133 134 135def _WaitForRetry(retries_left): 136 """Sleeps for a period of time based on the retry count. 137 138 Args: 139 retries_left: int, The number of retries remaining. Should be in the range 140 of NUM_RETRIES - 1 to 0. 141 """ 142 time_to_wait = .1 * (2 * (NUM_RETRIES - retries_left)) 143 logging.debug('Waiting for retry: [%s]', time_to_wait) 144 time.sleep(time_to_wait) 145 146 147RETRY_ERROR_CODES = [5, 32, 145] 148 149 150def _ShouldRetryOperation(func, exc_info): 151 """Matches specific error types that should be retried. 152 153 This will retry the following errors: 154 WindowsError(5, 'Access is denied'), When trying to delete a readonly file 155 WindowsError(32, 'The process cannot access the file because it is being ' 156 'used by another process'), When a file is in use. 157 WindowsError(145, 'The directory is not empty'), When a directory cannot be 158 deleted. 159 160 Args: 161 func: function, The function that failed. 162 exc_info: sys.exc_info(), The current exception state. 163 164 Returns: 165 True if the error can be retried or false if we should just fail. 166 """ 167 # os.unlink is the same as os.remove 168 if not (func == os.remove or func == os.rmdir or func == os.unlink): 169 return False 170 if not WindowsError: 171 return False 172 e = exc_info[1] 173 return getattr(e, 'winerror', None) in RETRY_ERROR_CODES 174 175 176def _RetryOperation(exc_info, func, args, 177 retry_test_function=lambda func, exc_info: True): 178 """Attempts to retry the failed file operation. 179 180 Args: 181 exc_info: sys.exc_info(), The current exception state. 182 func: function, The function that failed. 183 args: (str, ...), The tuple of args that should be passed to func when 184 retrying. 185 retry_test_function: The function to call to determine if a retry should be 186 attempted. Takes the function that is being retried as well as the 187 current exc_info. 188 189 Returns: 190 True if the operation eventually succeeded or False if it continued to fail 191 for all retries. 192 """ 193 retries_left = NUM_RETRIES 194 while retries_left > 0 and retry_test_function(func, exc_info): 195 logging.debug( 196 'Retrying file system operation: %s, %s, %s, retries_left=%s', 197 func, args, exc_info, retries_left) 198 retries_left -= 1 199 try: 200 _WaitForRetry(retries_left) 201 func(*args) 202 return True 203 # pylint: disable=bare-except, We look at the exception later. 204 except: 205 exc_info = sys.exc_info() 206 return False 207 208 209def _HandleRemoveError(func, failed_path, exc_info): 210 """A function to pass as the onerror arg to rmdir for handling errors. 211 212 Args: 213 func: function, The function that failed. 214 failed_path: str, The path of the file the error occurred on. 215 exc_info: sys.exc_info(), The current exception state. 216 """ 217 logging.debug('Handling file system error: %s, %s, %s', 218 func, failed_path, exc_info) 219 220 # Access denied on Windows. This happens when trying to delete a readonly 221 # file. Change the permissions and retry the delete. 222 # 223 # In python 3.3+, WindowsError is an alias of OSError and exc_info[0] can be 224 # a subclass of OSError. 225 if (WindowsError and issubclass(exc_info[0], WindowsError) and 226 getattr(exc_info[1], 'winerror', None) == 5): 227 os.chmod(failed_path, stat.S_IWUSR) 228 229 # Don't remove the trailing comma in the passed arg tuple. It indicates that 230 # it is a tuple of 1, rather than a tuple of characters that will get expanded 231 # by *args. 232 if not _RetryOperation(exc_info, func, (failed_path,), _ShouldRetryOperation): 233 # Always raise the original error. 234 exceptions.reraise(exc_info[1], tb=exc_info[2]) 235 236 237def RmTree(path): 238 """Calls shutil.rmtree() with error handling to fix Windows problems. 239 240 It also ensures that the top level directory deletion is actually reflected 241 in the file system before this returns. 242 243 Args: 244 path: str, The path to remove. 245 """ 246 # The subdirectories and/or files under dir_path may have file names 247 # containing unicode characters. If the arg to shutil.rmtree() is not unicode 248 # then any child unicode files will raise an exception. Coercing dir_path to 249 # unicode makes shutil.rmtree() play nice with unicode. 250 path = six.text_type(path) 251 shutil.rmtree(path, onerror=_HandleRemoveError) 252 retries_left = NUM_RETRIES 253 while os.path.isdir(path) and retries_left > 0: 254 logging.debug('Waiting for directory to disappear: %s', path) 255 retries_left -= 1 256 _WaitForRetry(retries_left) 257 258 259def _DestInSrc(src, dst): 260 # Copied directly from shutil 261 src = os.path.abspath(src) 262 dst = os.path.abspath(dst) 263 if not src.endswith(os.path.sep): 264 src += os.path.sep 265 if not dst.endswith(os.path.sep): 266 dst += os.path.sep 267 return dst.startswith(src) 268 269 270def MoveDir(src, dst): 271 """Recursively moves a directory to another location. 272 273 This code is mostly copied from shutil.move(), but has been scoped down to 274 specifically handle only directories. The src must be a directory, and 275 the dst must not exist. It uses functions from this module to be resilient 276 against spurious file system errors in Windows. It will try to do an 277 os.rename() of the directory. If that fails, the tree will be copied to the 278 new location and then deleted from the old location. 279 280 Args: 281 src: str, The directory path to move. 282 dst: str, The path to move the directory to. 283 284 Raises: 285 Error: If the src or dst directories are not valid. 286 """ 287 if not os.path.isdir(src): 288 raise Error("Source path '{0}' must be a directory".format(src)) 289 if os.path.exists(dst): 290 raise Error("Destination path '{0}' already exists".format(dst)) 291 if _DestInSrc(src, dst): 292 raise Error("Cannot move a directory '{0}' into itself '{1}'." 293 .format(src, dst)) 294 try: 295 logging.debug('Attempting to move directory [%s] to [%s]', src, dst) 296 try: 297 os.rename(src, dst) 298 except OSError: 299 if not _RetryOperation(sys.exc_info(), os.rename, (src, dst)): 300 raise 301 except OSError as e: 302 logging.debug('Directory rename failed. Falling back to copy. [%s]', e) 303 shutil.copytree(src, dst, symlinks=True) 304 RmTree(src) 305 306 307def FindDirectoryContaining(starting_dir_path, directory_entry_name): 308 """Searches directories upwards until it finds one with the given contents. 309 310 This can be used to find the directory above you that contains the given 311 entry. It is useful for things like finding the workspace root you are under 312 that contains a configuration directory. 313 314 Args: 315 starting_dir_path: str, The path of the directory to start searching 316 upwards from. 317 directory_entry_name: str, The name of the directory that must be present 318 in order to return the current directory. 319 320 Returns: 321 str, The full path to the directory above the starting dir that contains the 322 given entry, or None if the root of the file system was hit without finding 323 it. 324 """ 325 prev_path = None 326 path = encoding_util.Decode(os.path.realpath(starting_dir_path)) 327 while path != prev_path: 328 search_dir = os.path.join(path, directory_entry_name) 329 if os.path.isdir(search_dir): 330 return path 331 prev_path = path 332 path, _ = os.path.split(path) 333 return None 334 335 336def IsDirAncestorOf(ancestor_directory, path): 337 """Returns whether ancestor_directory is an ancestor of path. 338 339 Args: 340 ancestor_directory: str, path to the directory that is the potential 341 ancestor of path 342 path: str, path to the file/directory that is a potential descendant of 343 ancestor_directory 344 345 Returns: 346 bool, whether path has ancestor_directory as an ancestor. 347 348 Raises: 349 ValueError: if the given ancestor_directory is not, in fact, a directory. 350 """ 351 if not os.path.isdir(ancestor_directory): 352 raise ValueError('[{0}] is not a directory.'.format(ancestor_directory)) 353 354 path = encoding_util.Decode(os.path.realpath(path)) 355 ancestor_directory = encoding_util.Decode( 356 os.path.realpath(ancestor_directory)) 357 358 try: 359 rel = os.path.relpath(path, ancestor_directory) 360 except ValueError: # On Windows, relpath raises for paths on different drives 361 return False 362 363 # rel can be just '..' if path is a child of ancestor_directory 364 return not rel.startswith('..' + os.path.sep) and rel != '..' 365 366 367def _GetSystemPath(): 368 """Returns properly encoded system PATH variable string.""" 369 return encoding_util.GetEncodedValue(os.environ, 'PATH') 370 371 372def SearchForExecutableOnPath(executable, path=None): 373 """Tries to find all 'executable' in the directories listed in the PATH. 374 375 This is mostly copied from distutils.spawn.find_executable() but with a 376 few differences. It does not check the current directory for the 377 executable. We only want to find things that are actually on the path, not 378 based on what the CWD is. It also returns a list of all matching 379 executables. If there are multiple versions of an executable on the path 380 it will return all of them at once. 381 382 Args: 383 executable: The name of the executable to find 384 path: A path to search. If none, the system PATH will be used. 385 386 Returns: 387 A list of full paths to matching executables or an empty list if none 388 are found. 389 """ 390 if not path: 391 path = _GetSystemPath() 392 paths = path.split(os.pathsep) 393 394 matching = [] 395 for p in paths: 396 f = os.path.join(p, executable) 397 if os.path.isfile(f): 398 matching.append(f) 399 400 return matching 401 402 403def _FindExecutableOnPath(executable, path, pathext): 404 """Internal function to a find an executable. 405 406 Args: 407 executable: The name of the executable to find. 408 path: A list of directories to search separated by 'os.pathsep'. 409 pathext: An iterable of file name extensions to use. 410 411 Returns: 412 str, the path to a file on `path` with name `executable` + `p` for 413 `p` in `pathext`. 414 415 Raises: 416 ValueError: invalid input. 417 """ 418 419 if isinstance(pathext, six.string_types): 420 raise ValueError('_FindExecutableOnPath(..., pathext=\'{0}\') failed ' 421 'because pathext must be an iterable of strings, but got ' 422 'a string.'.format(pathext)) 423 424 # Prioritize preferred extension over earlier in path. 425 for ext in pathext: 426 for directory in path.split(os.pathsep): 427 # Windows can have paths quoted. 428 directory = directory.strip('"') 429 full = os.path.normpath(os.path.join(directory, executable) + ext) 430 # On Windows os.access(full, os.X_OK) is always True. 431 if os.path.isfile(full) and os.access(full, os.X_OK): 432 return full 433 return None 434 435 436def _PlatformExecutableExtensions(platform): 437 if platform == platforms.OperatingSystem.WINDOWS: 438 return ('.exe', '.cmd', '.bat', '.com', '.ps1') 439 else: 440 return ('', '.sh') 441 442 443def FindExecutableOnPath(executable, path=None, pathext=None, 444 allow_extensions=False): 445 """Searches for `executable` in the directories listed in `path` or $PATH. 446 447 Executable must not contain a directory or an extension. 448 449 Args: 450 executable: The name of the executable to find. 451 path: A list of directories to search separated by 'os.pathsep'. If None 452 then the system PATH is used. 453 pathext: An iterable of file name extensions to use. If None then 454 platform specific extensions are used. 455 allow_extensions: A boolean flag indicating whether extensions in the 456 executable are allowed. 457 458 Returns: 459 The path of 'executable' (possibly with a platform-specific extension) if 460 found and executable, None if not found. 461 462 Raises: 463 ValueError: if executable has a path or an extension, and extensions are 464 not allowed, or if there's an internal error. 465 """ 466 467 if not allow_extensions and os.path.splitext(executable)[1]: 468 raise ValueError('FindExecutableOnPath({0},...) failed because first ' 469 'argument must not have an extension.'.format(executable)) 470 471 if os.path.dirname(executable): 472 raise ValueError('FindExecutableOnPath({0},...) failed because first ' 473 'argument must not have a path.'.format(executable)) 474 475 if path is None: 476 effective_path = _GetSystemPath() 477 else: 478 effective_path = path 479 effective_pathext = (pathext if pathext is not None 480 else _PlatformExecutableExtensions( 481 platforms.OperatingSystem.Current())) 482 483 return _FindExecutableOnPath(executable, effective_path, 484 effective_pathext) 485 486 487def HasWriteAccessInDir(directory): 488 """Determines if the current user is able to modify the contents of the dir. 489 490 Args: 491 directory: str, The full path of the directory to check. 492 493 Raises: 494 ValueError: If the given directory path is not a valid directory. 495 496 Returns: 497 True if the current user has missing write and execute permissions. 498 """ 499 if not os.path.isdir(directory): 500 raise ValueError( 501 'The given path [{path}] is not a directory.'.format(path=directory)) 502 # Appending . tests search permissions, especially on windows, by forcing 503 # 'directory' to be treated as a directory 504 path = os.path.join(directory, '.') 505 if not os.access(path, os.X_OK) or not os.access(path, os.W_OK): 506 # We can believe os.access() indicating no access. 507 return False 508 509 # At this point the only platform and filesystem independent method is to 510 # attempt to create or delete a file in the directory. 511 # 512 # Why? os.accesss() and os.stat() use the underlying C library on Windows, 513 # which doesn't check the correct user and group permissions and almost always 514 # results in false positive writability tests. 515 516 path = os.path.join(directory, 517 '.HasWriteAccessInDir{pid}'.format(pid=os.getpid())) 518 # while True: should work here, but we limit the retries just in case. 519 for _ in range(10): 520 521 try: 522 fd = os.open(path, os.O_RDWR | os.O_CREAT, 0o666) 523 os.close(fd) 524 except OSError as e: 525 if e.errno == errno.EACCES: 526 # No write access. 527 return False 528 if e.errno in [errno.ENOTDIR, errno.ENOENT]: 529 # The directory has been removed or replaced by a file. 530 raise ValueError('The given path [{path}] is not a directory.'.format( 531 path=directory)) 532 raise 533 534 try: 535 os.remove(path) 536 # Write access. 537 return True 538 except OSError as e: 539 if e.errno == errno.EACCES: 540 # No write access. 541 return False 542 # os.remove() could fail with ENOENT if we're in a race with another 543 # process/thread (which just succeeded) or if the directory has been 544 # removed. 545 if e.errno != errno.ENOENT: 546 raise 547 548 return False 549 550 551def GetCWD(): 552 """Returns os.getcwd() properly decoded.""" 553 return encoding_util.Decode(os.getcwd()) 554 555 556class TemporaryDirectory(object): 557 """A class to easily create and dispose of temporary directories. 558 559 Securely creates a directory for temporary use. This class can be used with 560 a context manager (the with statement) to ensure cleanup in exceptional 561 situations. 562 """ 563 564 def __init__(self, change_to=False): 565 self.__temp_dir = tempfile.mkdtemp() 566 self._curdir = None 567 if change_to: 568 self._curdir = GetCWD() 569 os.chdir(self.__temp_dir) 570 571 @property 572 def path(self): 573 return self.__temp_dir 574 575 def __enter__(self): 576 return self.path 577 578 def __exit__(self, prev_exc_type, prev_exc_val, prev_exc_trace): 579 try: 580 self.Close() 581 except: # pylint: disable=bare-except 582 exceptions.RaiseWithContext( 583 prev_exc_type, prev_exc_val, prev_exc_trace, *sys.exc_info()) 584 # Always return False so any previous exception will be re-raised. 585 return False 586 587 def Close(self): 588 if self._curdir is not None: 589 os.chdir(self._curdir) 590 if self.path: 591 RmTree(self.path) 592 self.__temp_dir = None 593 return True 594 return False 595 596 597class Checksum(object): 598 """Consistently handles calculating checksums across the Cloud SDK.""" 599 600 def __init__(self, algorithm=hashlib.sha256): 601 """Creates a new Checksum.""" 602 self.__hash = algorithm() 603 self.__files = set() 604 605 def AddContents(self, contents): 606 """Adds the given contents to the checksum. 607 608 Args: 609 contents: str or bytes, The contents to add. 610 611 Returns: 612 self, For method chaining. 613 """ 614 self.__hash.update(six.ensure_binary(contents)) 615 return self 616 617 def AddFileContents(self, file_path): 618 """Adds the contents of the given file to the checksum. 619 620 Args: 621 file_path: str, The file path of the contents to add. 622 623 Returns: 624 self, For method chaining. 625 """ 626 with BinaryFileReader(file_path) as fp: 627 while True: 628 chunk = fp.read(4096) 629 if not chunk: 630 break 631 self.__hash.update(chunk) 632 return self 633 634 def AddDirectory(self, dir_path): 635 """Adds all files under the given directory to the checksum. 636 637 This adds both the contents of the files as well as their names and 638 locations to the checksum. If the checksums of two directories are equal 639 this means they have exactly the same files, and contents. 640 641 Args: 642 dir_path: str, The directory path to add all files from. 643 644 Returns: 645 self, For method chaining. 646 """ 647 # The subdirectories and/or files under dir_path may have file names 648 # containing unicode characters. If the arg to os.walk() is not unicode then 649 # any child unicode files will raise an exception. Coercing dir_path to 650 # unicode makes os.walk() play nice with unicode. 651 dir_path = six.text_type(dir_path) 652 for root, dirs, files in os.walk(dir_path): 653 dirs.sort(key=os.path.normcase) 654 files.sort(key=os.path.normcase) 655 for d in dirs: 656 path = os.path.join(root, d) 657 # We don't traverse directory links, but add the fact that it was found 658 # in the tree. 659 if os.path.islink(path): 660 relpath = os.path.relpath(path, dir_path) 661 self.__files.add(relpath) 662 self.AddContents(relpath) 663 self.AddContents(os.readlink(path)) 664 for f in files: 665 path = os.path.join(root, f) 666 relpath = os.path.relpath(path, dir_path) 667 self.__files.add(relpath) 668 self.AddContents(relpath) 669 if os.path.islink(path): 670 self.AddContents(os.readlink(path)) 671 else: 672 self.AddFileContents(path) 673 return self 674 675 def HexDigest(self): 676 """Gets the hex digest for all content added to this checksum. 677 678 Returns: 679 str, The checksum digest as a hex string. 680 """ 681 return self.__hash.hexdigest() 682 683 def Files(self): 684 """Gets the list of all files that were discovered when adding a directory. 685 686 Returns: 687 {str}, The relative paths of all files that were found when traversing the 688 directory tree. 689 """ 690 return self.__files 691 692 @staticmethod 693 def FromSingleFile(input_path, algorithm=hashlib.sha256): 694 """Creates a Checksum containing one file. 695 696 Args: 697 input_path: str, The file path of the contents to add. 698 algorithm: a hashing algorithm method, a la hashlib.algorithms 699 700 Returns: 701 Checksum, The checksum containing the file. 702 """ 703 return Checksum(algorithm=algorithm).AddFileContents(input_path) 704 705 @staticmethod 706 def HashSingleFile(input_path, algorithm=hashlib.sha256): 707 """Gets the hex digest of a single file. 708 709 Args: 710 input_path: str, The file path of the contents to add. 711 algorithm: a hashing algorithm method, ala hashlib.algorithms 712 713 Returns: 714 str, The checksum digest of the file as a hex string. 715 """ 716 return Checksum.FromSingleFile(input_path, algorithm=algorithm).HexDigest() 717 718 719class ChDir(object): 720 """Do some things from a certain directory, and reset the directory afterward. 721 """ 722 723 def __init__(self, directory): 724 self.__dir = directory 725 726 def __enter__(self): 727 self.__original_dir = GetCWD() 728 os.chdir(self.__dir) 729 return self.__dir 730 731 def __exit__(self, typ, value, tb): 732 os.chdir(self.__original_dir) 733 734 735class FileLockLockingError(Error): 736 pass 737 738 739class FileLockTimeoutError(FileLockLockingError): 740 """A case of FileLockLockingError.""" 741 pass 742 743 744class FileLockUnlockingError(Error): 745 pass 746 747 748class FileLock(object): 749 """A file lock for interprocess (not interthread) mutual exclusion. 750 751 At most one FileLock instance may be locked at a time for a given local file 752 path. FileLock instances may be used as context objects. 753 """ 754 755 def __init__(self, path, timeout_secs=None): 756 """Constructs the FileLock. 757 758 Args: 759 path: str, the path to the file to lock. The directory containing the 760 file must already exist when Lock() is called. 761 timeout_secs: int, seconds Lock() may wait for the lock to become 762 available. If None, Lock() may block forever. 763 """ 764 self._path = path 765 self._timeout_secs = timeout_secs 766 self._file = None 767 self._locked = False 768 if platforms.OperatingSystem.Current() == platforms.OperatingSystem.WINDOWS: 769 self._impl = _WindowsLocking() 770 else: 771 self._impl = _PosixLocking() 772 773 def Lock(self): 774 """Opens and locks the file. A no-op if this FileLock is already locked. 775 776 The lock file is created if it does not already exist. 777 778 Raises: 779 FileLockLockingError: if the file could not be opened (or created when 780 necessary). 781 FileLockTimeoutError: if the file could not be locked before the timeout 782 elapsed. 783 """ 784 if self._locked: 785 return 786 try: 787 self._file = FileWriter(self._path) 788 except Error as e: 789 raise FileLockLockingError(e) 790 791 max_wait_ms = None 792 if self._timeout_secs is not None: 793 max_wait_ms = 1000 * self._timeout_secs 794 795 r = retry.Retryer(max_wait_ms=max_wait_ms) 796 try: 797 r.RetryOnException(self._impl.TryLock, args=[self._file.fileno()], 798 sleep_ms=100) 799 except retry.RetryException as e: 800 self._file.close() 801 self._file = None 802 raise FileLockTimeoutError( 803 'Timed-out waiting to lock file: {0}'.format(self._path)) 804 else: 805 self._locked = True 806 807 def Unlock(self): 808 """Unlocks and closes the file. 809 810 A no-op if this object is not locked. 811 812 Raises: 813 FileLockUnlockingError: if a problem was encountered when unlocking the 814 file. There is no need to retry. 815 """ 816 if not self._locked: 817 return 818 try: 819 self._impl.Unlock(self._file.fileno()) 820 except IOError as e: 821 # We don't expect Unlock() to ever raise an error, but can't be sure. 822 raise FileLockUnlockingError(e) 823 finally: 824 self._file.close() 825 self._file = None 826 self._locked = False 827 828 def __enter__(self): 829 """Locks and returns this FileLock object.""" 830 self.Lock() 831 return self 832 833 def __exit__(self, exc_type, exc_val, exc_tb): 834 """Unlocks, logging any errors encountered.""" 835 try: 836 self.Unlock() 837 except Error as e: 838 logging.debug('Encountered error unlocking file %s: %s', self._path, e) 839 # Have Python re-raise the exception which caused the context to exit, if 840 # any. 841 return False 842 843 844# Imports fcntl, which is only available on POSIX. 845class _PosixLocking(object): 846 """Exclusive, non-blocking file locking on POSIX systems.""" 847 848 def TryLock(self, fd): 849 """Raises IOError on failure.""" 850 # pylint: disable=g-import-not-at-top 851 import fcntl 852 # Exclusive lock, non-blocking 853 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) 854 855 def Unlock(self, fd): 856 import fcntl # pylint: disable=g-import-not-at-top 857 fcntl.flock(fd, fcntl.LOCK_UN) 858 859 860# Imports msvcrt, which is only available on Windows. 861class _WindowsLocking(object): 862 """Exclusive, non-blocking file locking on Windows.""" 863 864 def TryLock(self, fd): 865 """Raises IOError on failure.""" 866 # pylint: disable=g-import-not-at-top 867 import msvcrt 868 # Exclusive lock, non-blocking 869 msvcrt.locking(fd, msvcrt.LK_NBLCK, 1) 870 871 def Unlock(self, fd): 872 import msvcrt # pylint: disable=g-import-not-at-top 873 msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) 874 875 876@contextlib.contextmanager 877def _FileInBinaryMode(file_obj): 878 """Context manager to temporarily swap a file to binary mode on Windows. 879 880 On exit, the mode is swapped back to its original mode, whether that was text 881 or binary. 882 883 See the 'On Windows...' note in the Python docs for more info about text and 884 binary mode: 885 https://docs.python.org/2/tutorial/inputoutput.html#reading-and-writing-files 886 887 Args: 888 file_obj: File-like object to swap to binary mode. 889 890 Yields: 891 None. 892 """ 893 # If file_obj does not define fileno, just pass it through. For example, 894 # this happens for unit tests which replace sys.stdin with StringIO. 895 try: 896 fd = file_obj.fileno() 897 except (AttributeError, io.UnsupportedOperation): 898 yield 899 return 900 901 if platforms.OperatingSystem.IsWindows(): 902 # pylint: disable=g-import-not-at-top 903 import msvcrt 904 905 try: 906 old_mode = msvcrt.setmode(fd, os.O_BINARY) 907 yield 908 finally: 909 msvcrt.setmode(fd, old_mode) 910 else: 911 # On non-Windows platforms, text mode == binary mode, so just yield. 912 yield 913 914 915def WriteStreamBytes(stream, contents): 916 """Write the given bytes to the stream. 917 918 Args: 919 stream: The raw stream to write to, usually sys.stdout or sys.stderr. 920 contents: A byte string to write to the stream. 921 """ 922 if six.PY2: 923 with _FileInBinaryMode(stream): 924 stream.write(contents) 925 # Flush to force content to be written out with the correct mode. 926 stream.flush() 927 else: 928 # This is raw byte stream, but it doesn't exist on PY2. 929 stream.buffer.write(contents) 930 931 932def ReadStdinBytes(): 933 """Reads raw bytes from sys.stdin without any encoding interpretation. 934 935 Returns: 936 bytes, The byte string that was read. 937 """ 938 if six.PY2: 939 with _FileInBinaryMode(sys.stdin): 940 return sys.stdin.read() 941 else: 942 # This is raw byte stream, but it doesn't exist on PY2. 943 return sys.stdin.buffer.read() 944 945 946def WriteFileAtomically(file_name, contents): 947 """Writes a file to disk safely cross platform. 948 949 Specified directories will be created if they don't exist. 950 951 Writes a file to disk safely cross platform. Note that on Windows, there 952 is no good way to atomically write a file to disk. 953 954 Args: 955 file_name: The actual file to write to. 956 contents: The file contents to write. 957 958 Raises: 959 ValueError: file_name or contents is empty. 960 TypeError: contents is not a valid string. 961 """ 962 if not file_name or contents is None: 963 raise ValueError('Empty file_name [{}] or contents [{}].'.format( 964 file_name, contents)) 965 966 if not isinstance(contents, six.string_types): 967 raise TypeError('Invalid contents [{}].'.format(contents)) 968 969 dirname = os.path.dirname(file_name) 970 971 # Create the directories, if they dont exist. 972 try: 973 os.makedirs(dirname) 974 except os.error: 975 # Deliberately ignore errors here. This usually means that the directory 976 # already exists. Other errors will surface from the write calls below. 977 pass 978 979 if platforms.OperatingSystem.IsWindows(): 980 # On Windows, there is no good way to atomically write this file. 981 WriteFileContents(file_name, contents, private=True) 982 else: 983 # This opens files with 0600, which are the correct permissions. 984 with tempfile.NamedTemporaryFile( 985 mode='w', dir=dirname, delete=False) as temp_file: 986 temp_file.write(contents) 987 # This was a user-submitted patch to fix a race condition that we couldn't 988 # reproduce. It may be due to the file being renamed before the OS's 989 # buffer flushes to disk. 990 temp_file.flush() 991 # This pattern atomically writes the file on non-Windows systems. 992 os.rename(temp_file.name, file_name) 993 994 995def GetTreeSizeBytes(path, predicate=None): 996 """Returns sum of sizes of not-ignored files under given path, in bytes.""" 997 result = 0 998 if predicate is None: 999 predicate = lambda x: True 1000 for directory in os.walk(six.text_type(path)): 1001 for file_name in directory[2]: 1002 file_path = os.path.join(directory[0], file_name) 1003 if predicate(file_path): 1004 result += os.path.getsize(file_path) 1005 return result 1006 1007 1008def GetDirectoryTreeListing(path, 1009 include_dirs=False, 1010 file_predicate=None, 1011 dir_sort_func=None, 1012 file_sort_func=None): 1013 """Yields a generator that list all the files in a directory tree. 1014 1015 Walks directory tree from path and yeilds all files that it finds. Will expand 1016 paths relative to home dir e.g. those that start with '~'. 1017 1018 Args: 1019 path: string, base of file tree to walk. 1020 include_dirs: bool, if true will yield directory names in addition to files. 1021 file_predicate: function, boolean function to determine which files should 1022 be included in the output. Default is all files. 1023 dir_sort_func: function, function that will determine order directories are 1024 processed. Default is lexical ordering. 1025 file_sort_func: function, function that will determine order directories 1026 are processed. Default is lexical ordering. 1027 Yields: 1028 Generator: yields all files and directory paths matching supplied criteria. 1029 """ 1030 if not file_sort_func: 1031 file_sort_func = sorted 1032 if file_predicate is None: 1033 file_predicate = lambda x: True 1034 if dir_sort_func is None: 1035 dir_sort_func = lambda x: x.sort() 1036 1037 for root, dirs, files in os.walk(ExpandHomeDir(six.text_type(path))): 1038 dir_sort_func(dirs) 1039 if include_dirs: 1040 for dirname in dirs: 1041 yield dirname 1042 for file_name in file_sort_func(files): 1043 file_path = os.path.join(root, file_name) 1044 if file_predicate(file_path): 1045 yield file_path 1046 1047 1048def ReadFileContents(path): 1049 """Reads the text contents from the given path. 1050 1051 Args: 1052 path: str, The file path to read. 1053 1054 Raises: 1055 Error: If the file cannot be read. 1056 1057 Returns: 1058 str, The text string read from the file. 1059 """ 1060 try: 1061 with FileReader(path) as f: 1062 return f.read() 1063 except EnvironmentError as e: 1064 # EnvironmentError is parent of IOError, OSError and WindowsError. 1065 # Raised when file does not exist or can't be opened/read. 1066 raise Error('Unable to read file [{0}]: {1}'.format(path, e)) 1067 1068 1069def ReadBinaryFileContents(path): 1070 """Reads the binary contents from the given path. 1071 1072 Args: 1073 path: str, The file path to read. 1074 1075 Raises: 1076 Error: If the file cannot be read. 1077 1078 Returns: 1079 bytes, The byte string read from the file. 1080 """ 1081 try: 1082 with BinaryFileReader(path) as f: 1083 return f.read() 1084 except EnvironmentError as e: 1085 # EnvironmentError is parent of IOError, OSError and WindowsError. 1086 # Raised when file does not exist or can't be opened/read. 1087 raise Error('Unable to read file [{0}]: {1}'.format(path, e)) 1088 1089 1090def WriteFileContents(path, 1091 contents, 1092 overwrite=True, 1093 private=False, 1094 create_path=True, 1095 newline=None): 1096 """Writes the given text contents to a file at the given path. 1097 1098 Args: 1099 path: str, The file path to write to. 1100 contents: str, The text string to write. 1101 overwrite: bool, False to error out if the file already exists. 1102 private: bool, True to make the file have 0o600 permissions. 1103 create_path: bool, True to create intermediate directories, if needed. 1104 newline: str, The line ending style to use, or None to use platform default. 1105 1106 Raises: 1107 Error: If the file cannot be written. 1108 """ 1109 try: 1110 _CheckOverwrite(path, overwrite) 1111 with FileWriter( 1112 path, private=private, create_path=create_path, newline=newline) as f: 1113 # This decode is here because a lot of libraries on Python 2 can return 1114 # both text or bytes depending on if unicode is present. If you truly 1115 # pass binary data to this, the decode will fail (as it should). If you 1116 # pass an ascii string (that you got from json.dumps() for example), this 1117 # will prevent it from crashing. 1118 f.write(encoding_util.Decode(contents)) 1119 except EnvironmentError as e: 1120 # EnvironmentError is parent of IOError, OSError and WindowsError. 1121 # Raised when file does not exist or can't be opened/read. 1122 raise Error('Unable to write file [{0}]: {1}'.format(path, e)) 1123 1124 1125def WriteBinaryFileContents(path, 1126 contents, 1127 overwrite=True, 1128 private=False, 1129 create_path=True): 1130 """Writes the given binary contents to a file at the given path. 1131 1132 Args: 1133 path: str, The file path to write to. 1134 contents: str, The byte string to write. 1135 overwrite: bool, False to error out if the file already exists. 1136 private: bool, True to make the file have 0o600 permissions. 1137 create_path: bool, True to create intermediate directories, if needed. 1138 1139 Raises: 1140 Error: If the file cannot be written. 1141 """ 1142 try: 1143 _CheckOverwrite(path, overwrite) 1144 with BinaryFileWriter(path, private=private, create_path=create_path) as f: 1145 f.write(contents) 1146 except EnvironmentError as e: 1147 # EnvironmentError is parent of IOError, OSError and WindowsError. 1148 # Raised when file does not exist or can't be opened/read. 1149 raise Error('Unable to write file [{0}]: {1}'.format(path, e)) 1150 1151 1152def _CheckOverwrite(path, overwrite): 1153 if not overwrite and os.path.exists(path): 1154 raise Error( 1155 'File [{0}] already exists and cannot be overwritten'.format(path)) 1156 1157 1158def FileReader(path): 1159 """Opens the given file for text read for use in a 'with' statement. 1160 1161 Args: 1162 path: str, The file path to read from. 1163 1164 Returns: 1165 A file-like object opened for read in text mode. 1166 """ 1167 return _FileOpener(path, 'rt', 'read', encoding='utf-8') 1168 1169 1170def BinaryFileReader(path): 1171 """Opens the given file for binary read for use in a 'with' statement. 1172 1173 Args: 1174 path: str, The file path to read from. 1175 1176 Returns: 1177 A file-like object opened for read in binary mode. 1178 """ 1179 return _FileOpener(encoding_util.Encode(path, encoding='utf-8'), 'rb', 'read') 1180 1181 1182def FileWriter(path, 1183 private=False, 1184 append=False, 1185 create_path=False, 1186 newline=None): 1187 """Opens the given file for text write for use in a 'with' statement. 1188 1189 Args: 1190 path: str, The file path to write to. 1191 private: bool, True to create or update the file permission to be 0o600. 1192 append: bool, True to append to an existing file. 1193 create_path: bool, True to create intermediate directories, if needed. 1194 newline: str, The line ending style to use, or None to use plaform default. 1195 1196 Returns: 1197 A file-like object opened for write in text mode. 1198 """ 1199 mode = 'at' if append else 'wt' 1200 return _FileOpener( 1201 path, 1202 mode, 1203 'write', 1204 encoding='utf-8', 1205 private=private, 1206 create_path=create_path, 1207 newline=newline) 1208 1209 1210class BinaryFileWriterMode(enum.Enum): 1211 APPEND = 'ab' 1212 MODIFY = 'r+b' 1213 TRUNCATE = 'wb' 1214 1215 1216def BinaryFileWriter(path, 1217 private=False, 1218 mode=BinaryFileWriterMode.TRUNCATE, 1219 create_path=False): 1220 """Opens the given file for binary write for use in a 'with' statement. 1221 1222 Args: 1223 path: str, The file path to write to. 1224 private: bool, True to create or update the file permission to be 0o600. 1225 mode: BinaryFileWriterMode, Determines how to open file for writing. 1226 create_path: bool, True to create intermediate directories, if needed. 1227 1228 Returns: 1229 A file-like object opened for write in binary mode. 1230 """ 1231 return _FileOpener( 1232 path, mode.value, 'write', private=private, create_path=create_path) 1233 1234 1235def _FileOpener(path, 1236 mode, 1237 verb, 1238 encoding=None, 1239 private=False, 1240 create_path=False, 1241 newline=None): 1242 """Opens a file in various modes and does error handling.""" 1243 if private: 1244 PrivatizeFile(path) 1245 if create_path: 1246 _MakePathToFile(path) 1247 try: 1248 return io.open(path, mode, encoding=encoding, newline=newline) 1249 except EnvironmentError as e: 1250 # EnvironmentError is parent of IOError, OSError and WindowsError. 1251 # Raised when file does not exist or can't be opened/read. 1252 exc_type = Error 1253 if isinstance(e, IOError) and e.errno == errno.ENOENT: 1254 exc_type = MissingFileError 1255 raise exc_type('Unable to {0} file [{1}]: {2}'.format(verb, path, e)) 1256 1257 1258def GetHomeDir(): 1259 """Returns the current user HOME directory path.""" 1260 return ExpandHomeDir('~') 1261 1262 1263def ExpandHomeDir(path): 1264 """Returns path with leading ~<SEP> or ~<USER><SEP> expanded.""" 1265 return encoding_util.Decode(os.path.expanduser(path)) 1266 1267 1268def ExpandHomeAndVars(path): 1269 """Expands ~ and ENV_VARS in path.""" 1270 return encoding_util.Decode(os.path.expandvars(ExpandHomeDir(path))) 1271 1272 1273def NormalizePathFromURL(url): 1274 """Converts url to path string and normalizes path string.""" 1275 url2pathname = six.moves.urllib.request.url2pathname 1276 return os.path.normcase(os.path.normpath(url2pathname(url))) 1277 1278 1279def _MakePathToFile(path, mode=0o777): 1280 parent_dir_path, _ = os.path.split(path) 1281 full_parent_dir_path = os.path.realpath(ExpandHomeDir(parent_dir_path)) 1282 MakeDir(full_parent_dir_path, mode) 1283 1284 1285def PrivatizeFile(path): 1286 """Makes an existing file private or creates a new, empty private file. 1287 1288 In theory it would be better to return the open file descriptor so that it 1289 could be used directly. The issue that we would need to pass an encoding to 1290 os.fdopen() and on Python 2. This is not supported. Instead we just create 1291 the empty file and then we will just open it normally later to do the write. 1292 1293 Args: 1294 path: str, The path of the file to create or privatize. 1295 """ 1296 try: 1297 if os.path.exists(path): 1298 os.chmod(path, 0o600) 1299 else: 1300 _MakePathToFile(path, mode=0o700) 1301 flags = os.O_RDWR | os.O_CREAT | os.O_TRUNC 1302 # Accommodate Windows; stolen from python2.6/tempfile.py. 1303 if hasattr(os, 'O_NOINHERIT'): 1304 flags |= os.O_NOINHERIT 1305 1306 fd = os.open(path, flags, 0o600) 1307 os.close(fd) 1308 except EnvironmentError as e: 1309 # EnvironmentError is parent of IOError, OSError and WindowsError. 1310 # Raised when file does not exist or can't be opened/read. 1311 raise Error('Unable to create private file [{0}]: {1}'.format(path, e)) 1312